├── Dockerfile ├── LICENSE ├── README.md ├── base-config.yaml ├── covbot ├── __init__.py ├── bot.py └── data.py ├── maubot.yaml └── screenshot.png /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dock.mau.dev/maubot/maubot 2 | 3 | VOLUME /src 4 | CMD mbc build /src && cp /opt/maubot/*.mbp /src -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Peter Roberts 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # covbot 2 | 3 | CovBot has served his time and now been shut down. The code here is deprecated and has not been updated in quite some time. 4 | -------------------------------------------------------------------------------- /base-config.yaml: -------------------------------------------------------------------------------- 1 | # Users allowed to perform admin actions 2 | # CHANGE THIS 3 | admins: 4 | - '@user:example.com' -------------------------------------------------------------------------------- /covbot/__init__.py: -------------------------------------------------------------------------------- 1 | from .bot import CovBot -------------------------------------------------------------------------------- /covbot/bot.py: -------------------------------------------------------------------------------- 1 | from mautrix.types import EventType 2 | from maubot import Plugin, MessageEvent 3 | from maubot.matrix import parse_formatted 4 | from maubot.handlers import event, command 5 | from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper 6 | 7 | import os 8 | import csv 9 | import datetime 10 | import asyncio 11 | import math 12 | import whoosh 13 | import time 14 | import pycountry 15 | import traceback 16 | from whoosh.fields import Schema, TEXT 17 | from whoosh.index import create_in, FileIndex 18 | from whoosh.qparser import QueryParser 19 | from tabulate import tabulate 20 | from mautrix.types import TextMessageEventContent, MessageType 21 | from mautrix.client import MembershipEventDispatcher, InternalEventType 22 | from mautrix.errors.request import MLimitExceeded 23 | 24 | from .data import DataSource 25 | 26 | RATE_LIMIT_BACKOFF_SECONDS = 10 27 | UPDATE_INTERVAL_SECONDS = 15 * 60 28 | ROOM_PRUNE_INTERVAL_SECONDS = 60 * 60 29 | 30 | # command: ( usage, description ) 31 | HELP = { 32 | 'cases': ( 33 | '!cases location', 34 | 'Get up to date info on cases, optionally in a specific location.' 35 | ' You can give a country code, country, state, county, region or city.' 36 | ' E.g. !cases china' 37 | ), 38 | 'compare': ( 39 | '!compare locations', 40 | 'Compare up to date info on cases in multiple locations.' 41 | 'If it looks bad on mobile try rotating into landscape mode. ' 42 | ' Separate the locations with semicolons (;).' 43 | ' You can give a country codes, countries, states, counties, regions or cities.' 44 | ' E.g. !compare cn;us;uk;it;de' 45 | ), 46 | 'risk': ('!risk age', 'For a person of the given age, what is is the risk to them if they become sick with COVID-19?'), 47 | 'source': ('!source', 'Find out about my data sources and developers.'), 48 | 'help': ('!help', 'Get a reminder what I can do for you.'), 49 | } 50 | 51 | 52 | class Config(BaseProxyConfig): 53 | def do_update(self, helper: ConfigUpdateHelper) -> None: 54 | helper.copy("admins") 55 | 56 | 57 | class CovBot(Plugin): 58 | next_update_at: datetime.datetime = None 59 | _rooms_joined = {} 60 | 61 | async def _handle_rate_limit(self, api_call_wrapper): 62 | while True: 63 | try: 64 | return await api_call_wrapper() 65 | except MLimitExceeded: 66 | self.log.warning( 67 | 'API rate limit exceepted so backing off for %s seconds.', RATE_LIMIT_BACKOFF_SECONDS) 68 | await asyncio.sleep(RATE_LIMIT_BACKOFF_SECONDS) 69 | except Exception: # ignore other errors but give up 70 | tb = traceback.format_exc() 71 | self.log.warning('%s', tb) 72 | return 73 | 74 | async def _prune_dead_rooms(self): 75 | while True: 76 | self.log.info('Tidying up empty rooms.') 77 | users = set() 78 | rooms = await self._handle_rate_limit(lambda: self.client.get_joined_rooms()) 79 | 80 | left = 0 81 | for r in rooms: 82 | members = await self._handle_rate_limit(lambda: self.client.get_joined_members(r)) 83 | 84 | if len(members) == 1: 85 | self.log.debug('Leaving empty room %s.', r) 86 | await self._handle_rate_limit(lambda: self.client.leave_room(r)) 87 | left += 1 88 | else: 89 | for m in members: 90 | users.add(m) 91 | 92 | self.log.debug('I am in %s rooms.', len(rooms) - left) 93 | self.log.debug('I reach %s unique users.', 94 | len(users) - 1) # ignore myself 95 | await asyncio.sleep(ROOM_PRUNE_INTERVAL_SECONDS) 96 | 97 | async def _update_data(self): 98 | while True: 99 | try: 100 | await self.data.update() 101 | except Exception: 102 | tb = traceback.format_exc() 103 | self.log.warn( 104 | 'Failed to update data: %s.', tb) 105 | 106 | await asyncio.sleep(UPDATE_INTERVAL_SECONDS) 107 | 108 | @classmethod 109 | def get_config_class(cls) -> BaseProxyConfig: 110 | return Config 111 | 112 | async def start(self): 113 | await super().start() 114 | self.config.load_and_update() 115 | # So we can get room join events. 116 | self.client.add_dispatcher(MembershipEventDispatcher) 117 | self.data = DataSource(self.log, self.http) 118 | self._data_update_task = asyncio.create_task(self._update_data()) 119 | self._room_prune_task = asyncio.create_task(self._prune_dead_rooms()) 120 | 121 | async def stop(self): 122 | await super().stop() 123 | self.client.remove_dispatcher(MembershipEventDispatcher) 124 | self._room_prune_task.cancel() 125 | self._data_update_task.cancel() 126 | 127 | def _short_location(self, location: str, length=int(12)) -> str: 128 | """Returns a shortened location name. 129 | 130 | If exactly matches a country code, return that. (1) 131 | 132 | If shorter/equal than length, return intact. (2) 133 | 134 | Logic done in that order so that if someone passes a list 135 | of countries, they get the codes back, rather than a mix 136 | of codes and country names. 137 | 138 | If longer, split on commas and replace the final part with 139 | a country code if that matches. (3) 140 | 141 | If still too long, strip out 'middle' to get desired length. 142 | 143 | TODO: - consider stripping out ", City of," 144 | - consider simple truncation 145 | 146 | Example (length=12): 147 | United States → US 148 | Manchester, GB → Manch..r ,GB 149 | """ 150 | 151 | self.log.debug('Shortening %s.', location) 152 | 153 | # Exact country case (1) 154 | try: 155 | return pycountry.countries.lookup(location).alpha_2 156 | except LookupError: 157 | pass 158 | 159 | # It fits already (2) 160 | if len(location) <= length: 161 | return location 162 | 163 | # If there's commas, try to replace the last bit with a 164 | # country code (3) 165 | if "," in location: 166 | loc_parts = [s.strip() for s in location.split(",")] 167 | if pycountry.countries.lookup(loc_parts[-1]): 168 | loc_parts[-1] = pycountry.countries.lookup( 169 | loc_parts[-1]).alpha_2 170 | location = " ,".join(loc_parts) 171 | 172 | # If what we have is still longer, cut out the middle (4) 173 | if len(location) <= length: 174 | return location 175 | else: 176 | return "..".join([ 177 | location[:int((length-2)/2)], 178 | location[-int((length-2)/2):] 179 | ]) 180 | 181 | async def _locations_table(self, event: MessageEvent, data: dict, 182 | tabletype=str("text"), 183 | length=str("long")) -> str: 184 | """Build a table of locations to respond to. 185 | 186 | Uses tabulate module to tabulate data. 187 | 188 | Can be: 189 | - tabletype: text (default) or html 190 | - length: long (default), short or tiny 191 | 192 | Missing data (eg PHE) is handled and replaced 193 | by '---'; although this throws off tabulate's 194 | auto-alignment of numerical data. 195 | 196 | Tables by default report in following columns: 197 | - Location 198 | - Cases 199 | - Sick (%) 200 | - Recovered (%) 201 | - Deaths (%) 202 | 203 | Short table limits 'Location' to <= 12 chars 204 | and renames 'Recovered' to "matchesRec'd". 205 | 206 | Tiny table only outputs Loction and Cases columns. 207 | 208 | Table includes a 'Total' row, even where this makes 209 | no meaningful sense (eg countries + world data). 210 | """ 211 | MISSINGDATA = "---" 212 | 213 | self.log.debug('Building table for %s', data.keys()) 214 | columns = ["Location", "Cases"] 215 | 216 | if [v for v in data.values() if "recoveries" in v]: 217 | # At least one of the results has recovery data 218 | columns.extend(["Recovered", "%"]) 219 | 220 | if [v for v in data.values() if "deaths" in v]: 221 | # At least one of the results has deaths data 222 | columns.extend(["Deaths", "%"]) 223 | 224 | if "Recovered" in columns and "Deaths" in columns: 225 | # L - C - S - R - D 226 | columns.insert(2, "Sick") 227 | columns.insert(3, "%") 228 | 229 | # TODO: sort by cases descending 230 | tabledata = [] 231 | for location, data in data.items(): 232 | rowdata = [] 233 | cases = data['cases'] 234 | 235 | # Location 236 | if length == "short": 237 | rowdata.extend([self._short_location(location)]) 238 | else: 239 | rowdata.extend([location]) 240 | 241 | # Cases 242 | rowdata.extend([f'{cases:,}']) 243 | 244 | # TODO: decide if eliding % columns 245 | if "recoveries" in data: 246 | recs = data['recoveries'] 247 | per_rec = 0 if cases == 0 else \ 248 | int(recs) / int(cases) * 100 249 | 250 | rowdata.extend([f'{recs:,}', f"{per_rec:.1f}"]) 251 | else: 252 | rowdata.extend([MISSINGDATA, MISSINGDATA]) 253 | 254 | if "deaths" in data: 255 | deaths = data['deaths'] 256 | per_dead = 0 if cases == 0 else \ 257 | int(deaths) / int(cases) * 100 258 | 259 | rowdata.extend([f'{deaths:,}', f"{per_dead:.1f}"]) 260 | else: 261 | rowdata.extend([MISSINGDATA, MISSINGDATA]) 262 | 263 | if "recoveries" in data and "deaths" in data: 264 | sick = cases - int(data['recoveries']) - data['deaths'] 265 | per_sick = 100 - per_rec - per_dead 266 | 267 | rowdata.insert(2, f'{sick:,}') 268 | rowdata.insert(3, f"{per_sick:.1f}") 269 | else: 270 | rowdata.extend([MISSINGDATA, MISSINGDATA]) 271 | 272 | # Trim data for which there are no columns 273 | rowdata = rowdata[:len(columns)] 274 | 275 | tabledata.append(rowdata) 276 | 277 | # Shorten columns if needed 278 | if length == "short": 279 | columns = [w.replace("Recovered", "Rec'd") for w in columns] 280 | # Minimal- cases only: 281 | if length == "tiny": 282 | columns = columns[:2] 283 | tabledata = [row[:2] for row in tabledata] 284 | 285 | # Build table 286 | if tabletype == "html": 287 | tablefmt = "html" 288 | else: 289 | tablefmt = "presto" 290 | 291 | table = tabulate(tabledata, headers=columns, 292 | tablefmt=tablefmt, floatfmt=".1f") 293 | 294 | if data: 295 | return table 296 | 297 | async def _respond(self, e: MessageEvent, m: str) -> None: 298 | # IRC people don't like notices. 299 | if '@appservice-irc:matrix.org' in await self.client.get_joined_members(e.room_id): 300 | t = MessageType.TEXT 301 | else: # But matrix people do. 302 | t = MessageType.NOTICE 303 | 304 | c = TextMessageEventContent(msgtype=t, body=m) 305 | await self._handle_rate_limit(lambda: e.respond(c)) 306 | 307 | async def _respond_formatted(self, e: MessageEvent, m: str) -> None: 308 | """Respond with formatted message in m.text matrix format, 309 | not m.notice. 310 | 311 | This is needed as mobile clients (Riot 0.9.10, RiotX) currently 312 | do not seem to render markdown / HTML in m.notice events 313 | which are conventionally send by bots. 314 | 315 | Desktop/web Riot.im does render MD/HTML in m.notice, however. 316 | """ 317 | # IRC people don't like notices. 318 | if '@appservice-irc:matrix.org' in await self.client.get_joined_members(e.room_id): 319 | t = MessageType.TEXT 320 | else: # But matrix people do. 321 | t = MessageType.NOTICE 322 | 323 | c = TextMessageEventContent( 324 | msgtype=t, formatted_body=m, format="org.matrix.custom.html") 325 | c.body, c.formatted_body = parse_formatted(m, allow_html=True) 326 | await e.respond(c, markdown=True, allow_html=True) 327 | 328 | # source : https://www.desmos.com/calculator/v0zif7tflm 329 | @command.new('risk', help=HELP['risk'][1]) 330 | @command.argument("age", pass_raw=True, required=True) 331 | async def risks_handler(self, event: MessageEvent, age: str) -> None: 332 | self.log.info( 333 | "Responding to !risk request for age %s from %s.", age, event.sender) 334 | 335 | try: 336 | age = int(age) 337 | except ValueError: 338 | self.log.warn( 339 | "Age %s is not an int, letting %s know.", age, event.sender) 340 | await self._respond(event, f'{age} does not look like a number to me.') 341 | return 342 | 343 | if age < 0 or age > 110: 344 | self.log.warn( 345 | '%s is out of the age range of the risk model, letting %s know.', age, event.sender) 346 | await self._respond(event, "The risk model only handles ages between 0 and 110.") 347 | return 348 | 349 | # Maths that Peter doesn't really understand! 350 | death_rate = max(0, -0.00186807 + 0.00000351867 * 351 | age ** 2 + (2.7595 * 10 ** -15) * age ** 7) 352 | ic_rate = max(0, -0.0572602 - -0.0027617 * age) 353 | h_rate = max(0, -0.0730827 - age * -0.00628289) 354 | survival_rate = 1 - death_rate 355 | 356 | s = ( 357 | f"I estimate a {age} year old patient sick with COVID-19 has a {survival_rate:.1%} chance of survival," 358 | f" a {h_rate:.1%} likelihood of needing to go to hospital, a {ic_rate:.1%} risk of needing intensive care there" 359 | f" and a {death_rate:.1%} chance of death." 360 | ) 361 | 362 | await self._respond(event, s) 363 | 364 | @command.new('cases', help=HELP['cases'][1]) 365 | @command.argument("location", pass_raw=True, required=False) 366 | async def cases_handler(self, event: MessageEvent, location: str) -> None: 367 | if location == "": 368 | location = "World" 369 | 370 | self.log.info('Responding to !cases request for %s from %s.', 371 | location, event.sender) 372 | matches = self.data.get(location) 373 | 374 | if len(matches) == 0: 375 | self.log.debug( 376 | 'No matches found for %s, letting %s know.', location, event.sender) 377 | await self._respond( 378 | event, 379 | f'My data doesn\'t seem to include {location}.' 380 | ' It might be under a different name, data on it might not be available or there could even be no cases.' 381 | ' You may have more luck if you try a less specific location, like the country it\'s in.' 382 | f' \n\nIf you think I should have data on it you can open an issue at https://github.com/pwr22/covbot/issues and Peter will take a look.' 383 | ) 384 | return 385 | elif len(matches) > 5: 386 | self.log.debug( 387 | "Too many results for %s, advising %s to be more specific.", location, event.sender) 388 | await self._respond(event, f'I found a lot of matches for {location}. Please could you be more specific?') 389 | return 390 | elif len(matches) > 1: 391 | self.log.debug( 392 | "Found multiple results for %s, providing them to %sr so they can try again.", location, event.sender) 393 | ms = "\n".join(m[0] for m in matches) 394 | await self._respond(event, f"Which of these did you mean?\n\n{ms}") 395 | return 396 | 397 | m_loc, data = matches[0] 398 | cases, last_update = data['cases'], data['last_update'] 399 | s = f'In {m_loc} there have been a total of {cases:,} cases as of {last_update} UTC.' 400 | 401 | # some data is more detailed 402 | if 'recoveries' in data and 'deaths' in data: 403 | recoveries, deaths = data['recoveries'], data['deaths'] 404 | sick = cases - recoveries - deaths 405 | 406 | per_rec = 0 if cases == 0 else int(recoveries) / int(cases) * 100 407 | per_dead = 0 if cases == 0 else int(deaths) / int(cases) * 100 408 | per_sick = 100 - per_rec - per_dead 409 | 410 | s += ( 411 | f' Of these {sick:,} ({per_sick:.1f}%) are still sick or may have recovered without being recorded,' 412 | f' {recoveries:,} ({per_rec:.1f}%) have definitely recovered' 413 | f' and {deaths:,} ({per_dead:.1f}%) have died.' 414 | ) 415 | 416 | await self._respond( 417 | event, 418 | s 419 | ) 420 | 421 | @command.new('compare', help=HELP["compare"][1]) 422 | @command.argument("locations", pass_raw=True, required=True) 423 | async def table_handler(self, event: MessageEvent, locations: str) -> None: 424 | self.log.info( 425 | "Responding to !compare request for %s from %s.", locations, event.sender) 426 | 427 | results = {} 428 | for loc in locations.split(";"): 429 | matches = self.data.get(loc) 430 | 431 | if len(matches) == 0: 432 | self.log.debug( 433 | "No matches found for %s, letting %s know.", loc, event.sender) 434 | await self._respond(event, 435 | f"I cannot find a match for {loc}") 436 | return 437 | elif len(matches) > 5: 438 | self.log.debug( 439 | "Too many results for %s, advising %s to be more specific.", loc, event.sender) 440 | await self._respond(event, f'I found a lot of matches for {loc}. Please could you be more specific?') 441 | return {} 442 | elif len(matches) > 1: 443 | self.log.debug( 444 | "Found multiple results for %s, providing them to %s so they can try again.", loc, event.sender) 445 | ms = " - ".join(m[0] for m in matches) 446 | await self._respond(event, 447 | f"Multiple results for {loc}: {ms}. " 448 | "Please provide one.") 449 | return {} 450 | 451 | m = matches.pop() # there's only one 452 | loc, data = m 453 | results[loc] = data 454 | 455 | t = await self._locations_table(event, data=results, 456 | tabletype="text", 457 | length="long") 458 | if t: 459 | await self._respond_formatted(event, f'
{t}
')
460 |
461 | @command.new('source', help=HELP['source'][1])
462 | async def source_handler(self, event: MessageEvent) -> None:
463 | self.log.info('Responding to !source request from %s.', event.sender)
464 | await self._respond(
465 | event,
466 | 'I was created by Peter Roberts and MIT licensed on Github at https://github.com/pwr22/covbot.'
467 | f' I fetch new data every 15 minutes from {self.data.get_sources()}.'
468 | f' Risk estimates are based on the model at https://www.desmos.com/calculator/v0zif7tflm.'
469 | )
470 |
471 | @command.new('help', help=HELP['help'][1])
472 | async def help_handler(self, event: MessageEvent) -> None:
473 | self.log.info('Responding to !help request from %s.', event.sender)
474 |
475 | s = 'You can message me any of these commands:\n\n'
476 | s += '\n\n'.join(f'{usage} - {desc}' for (usage,
477 | desc) in HELP.values())
478 | await self._message(event.room_id, s)
479 |
480 | async def _message(self, room_id, m: str) -> None:
481 | # IRC people don't like notices.
482 | if '@appservice-irc:matrix.org' in await self.client.get_joined_members(room_id):
483 | t = MessageType.TEXT
484 | else: # But matrix people do.
485 | t = MessageType.NOTICE
486 |
487 | c = TextMessageEventContent(msgtype=t, body=m)
488 | await self._handle_rate_limit(lambda: self.client.send_message(room_id=room_id, content=c))
489 |
490 | @command.new('announce', help='Send broadcast a message to all rooms.')
491 | @command.argument("message", pass_raw=True, required=True)
492 | async def announce_handler(self, event: MessageEvent, message: str) -> None:
493 |
494 | if event.sender not in self.config['admins']:
495 | self.log.warn(
496 | 'User %s tried to send an announcement but only admins are authorised to do so.'
497 | ' They tried to send %s.',
498 | event.sender, message
499 | )
500 | await self._respond(event, 'You do not have permission to !announce.')
501 | return None
502 |
503 | rooms = await self._handle_rate_limit(lambda: self.client.get_joined_rooms())
504 | self.log.info('Sending announcement %s to all %s rooms',
505 | message, len(rooms))
506 |
507 | for r in rooms:
508 | await self._message(r, message)
509 |
510 | @event.on(InternalEventType.JOIN)
511 | async def join_handler(self, event: InternalEventType.JOIN) -> None:
512 | me = await self._handle_rate_limit(lambda: self.client.whoami())
513 |
514 | # Ignore all joins but mine.
515 | if event.sender != me:
516 | return
517 |
518 | if event.room_id in self._rooms_joined:
519 | self.log.warning(
520 | 'Duplicate join event for room %s.', event.room_id)
521 | return
522 |
523 | # work around duplicate joins
524 | self._rooms_joined[event.room_id] = True
525 | self.log.info(
526 | 'Sending unsolicited help on join to room %s.', event.room_id)
527 |
528 | s = 'Hi, I am a bot that tracks SARS-COV-2 infection statistics for you. You can message me any of these commands:\n\n'
529 | s += '\n'.join(f'{usage} - {desc}' for (usage, desc) in HELP.values())
530 | await self._message(event.room_id, s)
531 |
--------------------------------------------------------------------------------
/covbot/data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 | import datetime
4 | import asyncio
5 | import math
6 | import whoosh
7 | import time
8 | import pycountry
9 | from whoosh.fields import Schema, TEXT
10 | from whoosh.index import create_in, FileIndex
11 | from whoosh.qparser import QueryParser
12 | from tabulate import tabulate
13 | from mautrix.types import TextMessageEventContent, MessageType
14 | from mautrix.client import MembershipEventDispatcher, InternalEventType
15 | from mautrix.errors.request import MLimitExceeded
16 |
17 | OFFLOOP_CASES_URL = 'http://offloop.net/covid19h/unconfirmed.csv'
18 | OFFLOOP_GROUPS_URL = 'https://offloop.net/covid19h/groups.txt'
19 | NHS_URL = 'https://www.arcgis.com/sharing/rest/content/items/ca796627a2294c51926865748c4a56e8/data'
20 | UK_URL = 'https://www.arcgis.com/sharing/rest/content/items/b684319181f94875a6879bbc833ca3a6/data'
21 | FINLAND_URL = 'https://w3qa5ydb4l.execute-api.eu-west-1.amazonaws.com/prod/finnishCoronaData/v2'
22 | UK_COUNTRIES_URL = 'https://raw.githubusercontent.com/tomwhite/covid-19-uk-data/master/data/covid-19-indicators-uk.csv'
23 | UK_REGIONS_URL = 'https://raw.githubusercontent.com/tomwhite/covid-19-uk-data/master/data/covid-19-cases-uk.csv'
24 |
25 | COUNTRY_RENAMES = {
26 | 'US': 'United States',
27 | 'DRC': 'Democratic Republic of the Congo',
28 | 'UAE': 'United Arab Emirates',
29 | "U.S. Virgin Islands": "United States Virgin Islands"
30 | }
31 |
32 | # UK constituent countries with their nominal data update times
33 | UK_COUNTRIES = {"Wales": "1200 GMT", "Scotland": "1400 GMT",
34 | "England": "1800 GMT", "Northern Ireland": "1400 GMT"}
35 |
36 | SCHEMA = Schema(country=TEXT(stored=True), area=TEXT(
37 | stored=True), location=TEXT(stored=True))
38 |
39 |
40 | class DataSource:
41 | def __init__(self, log, http):
42 | # TODO create our own logger
43 | self.log, self.http = log, http
44 | self.cases = {}
45 | self.groups = {}
46 |
47 | async def _get_offloop_groups(self):
48 | groups = {}
49 |
50 | self.log.debug("Fetching %s.", OFFLOOP_GROUPS_URL)
51 | async with self.http.get(OFFLOOP_GROUPS_URL) as r:
52 | t = await r.text()
53 |
54 | # group;country_1;country_2 ...
55 | cr = csv.reader(t.splitlines(), delimiter=';')
56 | for group, *areas in cr:
57 | groups[group] = areas
58 |
59 | return groups
60 |
61 | async def _get_nhs(self):
62 | regions = {}
63 |
64 | self.log.debug("Fetching %s.", NHS_URL)
65 | async with self.http.get(NHS_URL) as r:
66 | t = await r.text()
67 | l = t.splitlines()
68 |
69 | # GSS_CD, NHSRNm, TotalCases
70 | cr = csv.DictReader(l)
71 | for row in cr:
72 | regions[row['NHSRNm']] = int(row['TotalCases'].replace(',', ''))
73 |
74 | return regions
75 |
76 | async def _get_uk(self):
77 | regions = {}
78 |
79 | self.log.debug("Fetching %s.", UK_URL)
80 | async with self.http.get(UK_URL) as r:
81 | t = await r.text()
82 | l = t.splitlines()
83 |
84 | # GSS_CD, GSS_NM, TotalCases
85 | cr = csv.DictReader(l)
86 | for row in cr:
87 | regions[row['GSS_NM']] = int(row['TotalCases'].replace(',', ''))
88 |
89 | return regions
90 |
91 | async def _get_uk_countries(self) -> dict:
92 | """Get UK constituent countries: WAL/SCO/ENG/NI
93 |
94 | Data is processed by _process_uk_countries()
95 | before being returned as a dict
96 | """
97 |
98 | async def _process_uk_countries(uk_countries_data: list) -> dict:
99 | """Process to covbot format:
100 |
101 | {Country1: {data1}, Country2: {data2}, ...}
102 | """
103 | # GB/UK data is processed elsewhere
104 | countries_data = {}
105 | for country, update_time in UK_COUNTRIES.items():
106 | # Filter data to country (ie Wales/Scotland/England/NI)
107 | country_data = [r for r in uk_countries_data
108 | if r["Country"] == country]
109 | # Find latest (= maximum) date and use that
110 | maxidate = max([r["Date"] for r in country_data])
111 | latest_country_data = [r for r in country_data
112 | if r["Date"] == maxidate]
113 | latest_data_d = {}
114 | # Pivot data to covbot format
115 | for r in latest_country_data:
116 | latest_data_d[r["Indicator"].lower()] = int(r["Value"])
117 | # Rename confirmedcases → cases
118 | if "confirmedcases" in latest_data_d:
119 | latest_data_d["cases"] = int(latest_data_d.pop(
120 | "confirmedcases"))
121 | latest_data_d["last_update"] = datetime.datetime.\
122 | strptime(f"{maxidate} {update_time}",
123 | "%Y-%m-%d %H%M %Z")
124 | countries_data[country] = latest_data_d
125 |
126 | return countries_data
127 |
128 | async with self.http.get(UK_COUNTRIES_URL) as r:
129 | t = await r.text()
130 | lines = t.splitlines()
131 |
132 | cr = list(csv.DictReader(lines))
133 |
134 | uk_country_data = await _process_uk_countries(cr)
135 |
136 | return uk_country_data
137 |
138 | async def _get_uk_regions(self) -> dict:
139 | """Return dict of UK region data"""
140 | async def _process_uk_regions(regions_data: list) -> dict:
141 | """Filter region data and process to covbot format"""
142 | uk_region_data = {}
143 |
144 | for country in UK_COUNTRIES.keys():
145 | country_regions = [r for r in regions_data if r["Country"] == country]
146 | # Get latest data
147 | maxidate = max([r["Date"] for r in country_regions])
148 | region_data = [r for r in country_regions if r["Date"] == maxidate]
149 | for r in region_data:
150 | # Fix for GJNH data 2020-04-22 (blank)
151 | if r["TotalCases"] == '':
152 | r["TotalCases"] = 0
153 | uk_region_data[r["Area"]] = {
154 | "cases": int(r["TotalCases"]), "last_update":
155 | datetime.datetime.strptime(
156 | f"{maxidate} {UK_COUNTRIES[country]}",
157 | "%Y-%m-%d %H%M %Z")}
158 |
159 | return uk_region_data
160 |
161 | async with self.http.get(UK_REGIONS_URL) as r:
162 | t = await r.text()
163 | lines = t.splitlines()
164 |
165 | cr = list(csv.DictReader(lines))
166 |
167 | uk_region_data = await _process_uk_regions(cr)
168 |
169 | return uk_region_data
170 |
171 | async def _get_offloop_cases(self):
172 | countries = {}
173 | now = time.time() * 1000 # millis to match the data
174 |
175 | self.log.debug("Fetching %s.", OFFLOOP_CASES_URL)
176 | async with self.http.get(OFFLOOP_CASES_URL) as r:
177 | t = await r.text()
178 | l = t.splitlines()
179 |
180 | # Country;Province;Confirmed;Deaths;Recovered;LastUpdated
181 | cr = csv.DictReader(l, delimiter=';')
182 | for row in cr:
183 | country = row['Country']
184 | if country in COUNTRY_RENAMES:
185 | country = COUNTRY_RENAMES[country]
186 |
187 | if not country in countries:
188 | countries[country] = {'areas': {}}
189 | #
190 | # handle missing data
191 | cases = 0 if row['Confirmed'] == '' else int(row['Confirmed'])
192 | deaths = 0 if row['Deaths'] == '' else int(row['Deaths'])
193 | recoveries = 0 if row['Recovered'] == '' else int(row['Recovered'])
194 | ts_msec = now if row['LastUpdated'] == '' else int(
195 | row['LastUpdated'])
196 |
197 | ts = ts_msec // 1000
198 | last_update = datetime.datetime.utcfromtimestamp(ts)
199 |
200 | area = row['Province']
201 | # Do we have a total?
202 | # area for totals can be either blank or matching the country
203 | if area == '' or area.lower() == country.lower():
204 | if 'totals' in countries[country]:
205 | self.log.warning('Duplicate totals for %s.', country)
206 |
207 | d = {'cases': cases, 'deaths': deaths,
208 | 'recoveries': recoveries, 'last_update': last_update}
209 | # TODO take the max for each value
210 | countries[country]['totals'] = d
211 | else: # or an area?
212 | d = {'cases': cases, 'deaths': deaths,
213 | 'recoveries': recoveries, 'last_update': last_update}
214 | countries[country]['areas'][area] = d
215 |
216 | return countries
217 |
218 | async def _get_finland(self):
219 | districts = {}
220 |
221 | self.log.debug("Fetching %s.", UK_URL)
222 | async with self.http.get(FINLAND_URL) as r:
223 | j = await r.json()
224 |
225 | for case in j['confirmed']:
226 | d = case['healthCareDistrict']
227 | if d == None or d == '': # skip missing data
228 | continue
229 |
230 | if d not in districts:
231 | districts[d] = 1
232 | else:
233 | districts[d] += 1
234 |
235 | return districts
236 |
237 | def _update_index(self):
238 | # create a new index
239 | d = '/tmp/covbotindex'
240 | self.log.debug('Updating index in %s.', d)
241 | if not os.path.exists(d):
242 | os.mkdir(d)
243 |
244 | self.index = create_in(d, SCHEMA)
245 | idx_w = self.index.writer()
246 |
247 | # add all the documents
248 | for c, c_data in self.cases.items():
249 | # TODO should this be conditional on a record existing?
250 | idx_w.add_document(country=c, location=c)
251 | for a in c_data['areas']:
252 | l = f'{a}, {c}'
253 | idx_w.add_document(country=c, area=a, location=l)
254 |
255 | idx_w.commit()
256 |
257 | async def update(self):
258 | now = datetime.datetime.utcfromtimestamp(int(time.time()))
259 |
260 | self.log.info('Updating data.')
261 | # offloop, nhs, uk, finland = await asyncio.gather(self._get_offloop_cases(), self._get_nhs(), self._get_uk(), self._get_finland())
262 | offloop, finland, uk_countries, uk_regions = await asyncio.gather(self._get_offloop_cases(), self._get_finland(), self._get_uk_countries(), self._get_uk_regions())
263 |
264 | # TODO take the max value
265 | # for area, cases in nhs.items():
266 | # offloop['United Kingdom']['areas'][area] = {
267 | # 'cases': cases, 'last_update': now}
268 | # for area, cases in uk.items():
269 | # offloop['United Kingdom']['areas'][area] = {
270 | # 'cases': cases, 'last_update': now}
271 | for area, cases in finland.items():
272 | offloop['Finland']['areas'][area] = {
273 | 'cases': cases, 'last_update': now}
274 |
275 | for r, ukdata in uk_countries.items():
276 | offloop['United Kingdom']['areas'][r] = ukdata
277 |
278 | for r, regiondata in uk_regions.items():
279 | offloop['United Kingdom']['areas'][r] = regiondata
280 |
281 | self.cases = offloop
282 | await asyncio.get_running_loop().run_in_executor(None, self._update_index)
283 |
284 | def _exact_country_code_match(self, query: str) -> list:
285 | self.log.debug('Trying an exact country code match on %s.', query)
286 | cc = query.upper()
287 |
288 | # TODO generalise.
289 | # Handle UK alias.
290 | if cc == 'UK':
291 | cc = 'GB'
292 |
293 | c = pycountry.countries.get(
294 | alpha_2=cc) or pycountry.countries.get(alpha_3=cc)
295 | if c != None:
296 | self.log.debug('Country code %s is %s.', cc, c.name)
297 |
298 | if c.name not in self.cases:
299 | self.log.warn('No data for %s.', c.name)
300 | return None
301 |
302 | d = self.cases[c.name]
303 |
304 | if not 'totals' in d:
305 | self.log.debug('No totals found for %s.', c.name)
306 | return None
307 |
308 | return [(c.name, d['totals'])]
309 |
310 | return None
311 |
312 | def _exact_country_match(self, query: str) -> list:
313 | self.log.debug('Trying an exact country match on %s.', query)
314 | for country in self.cases:
315 | if country.lower() == query.lower():
316 | self.log.debug('Got an exact country match on %s.', query)
317 |
318 | if 'totals' not in self.cases[country]:
319 | self.log.debug('No totals found for %s.', country)
320 | return None
321 |
322 | return [(country, self.cases[country]['totals'])]
323 |
324 | return None
325 |
326 | def _exact_region_match(self, query: str) -> list:
327 | self.log.debug('Trying an exact region match on %s.', query)
328 | regions = []
329 | for country, data in self.cases.items():
330 | for area, data in data['areas'].items():
331 | if area.lower() == query.lower():
332 | regions.append((f'{area}, {country}', data))
333 |
334 | if len(regions) > 0:
335 | self.log.debug(
336 | 'Got exact region matches on %s: %s.', query, regions)
337 |
338 | return regions
339 |
340 | def _wildcard_location_match(self, query: str) -> list:
341 | self.log.debug('Trying a wildcard location match on %s.', query)
342 | with self.index.searcher() as s:
343 | qs = f'*{query}*'
344 | q = QueryParser("location", SCHEMA).parse(qs)
345 | matches = s.search(q, limit=None)
346 |
347 | locs = []
348 | for m in matches:
349 | c, l = m['country'], m['location']
350 |
351 | if 'area' in m:
352 | d = self.cases[c]['areas'][m['area']]
353 | else:
354 | d = self.cases[c]['totals']
355 |
356 | locs.append((l, d))
357 |
358 | if len(locs) > 0:
359 | self.log.debug(
360 | 'Found wildcard location matches on %s: %s.', query, locs)
361 |
362 | return locs
363 |
364 | def get(self, query: str) -> list:
365 | self.log.info('Looking up data for %s.', query)
366 |
367 | m = self._exact_country_code_match(query)
368 | if m != None:
369 | return m
370 |
371 | m = self._exact_country_match(query)
372 | if m != None:
373 | return m
374 |
375 | areas = self._exact_region_match(query)
376 | if len(areas) > 0:
377 | return areas
378 |
379 | locs = self._wildcard_location_match(query)
380 | if len(locs) > 0:
381 | return locs
382 |
383 | return []
384 |
385 | def get_mult(self, *queries: list) -> list:
386 | return [self.get(q) for q in queries]
387 |
388 | @classmethod
389 | def get_sources(cls) -> str:
390 | return f"{OFFLOOP_CASES_URL}, {NHS_URL}, {UK_URL} and {FINLAND_URL}"
391 |
--------------------------------------------------------------------------------
/maubot.yaml:
--------------------------------------------------------------------------------
1 | maubot: 0.1.0
2 | id: dev.shortestpath.covbot
3 | version: 0.1.5
4 | license: MIT
5 | modules:
6 | - covbot
7 | main_class: CovBot
8 | dependencies:
9 | - whoosh
10 | - pycountry
11 | - tabulate
12 | database: false
13 | extra_files:
14 | - LICENSE
15 | - base-config.yaml
16 |
--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwr22/covbot/1f3b7de36ad72ce3583a576a392e1b750420654f/screenshot.png
--------------------------------------------------------------------------------