├── Dockerfile ├── LICENSE ├── README.md ├── base-config.yaml ├── covbot ├── __init__.py ├── bot.py └── data.py ├── maubot.yaml └── screenshot.png /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dock.mau.dev/maubot/maubot 2 | 3 | VOLUME /src 4 | CMD mbc build /src && cp /opt/maubot/*.mbp /src -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Peter Roberts 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # covbot 2 | 3 | CovBot has served his time and now been shut down. The code here is deprecated and has not been updated in quite some time. 4 | -------------------------------------------------------------------------------- /base-config.yaml: -------------------------------------------------------------------------------- 1 | # Users allowed to perform admin actions 2 | # CHANGE THIS 3 | admins: 4 | - '@user:example.com' -------------------------------------------------------------------------------- /covbot/__init__.py: -------------------------------------------------------------------------------- 1 | from .bot import CovBot -------------------------------------------------------------------------------- /covbot/bot.py: -------------------------------------------------------------------------------- 1 | from mautrix.types import EventType 2 | from maubot import Plugin, MessageEvent 3 | from maubot.matrix import parse_formatted 4 | from maubot.handlers import event, command 5 | from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper 6 | 7 | import os 8 | import csv 9 | import datetime 10 | import asyncio 11 | import math 12 | import whoosh 13 | import time 14 | import pycountry 15 | import traceback 16 | from whoosh.fields import Schema, TEXT 17 | from whoosh.index import create_in, FileIndex 18 | from whoosh.qparser import QueryParser 19 | from tabulate import tabulate 20 | from mautrix.types import TextMessageEventContent, MessageType 21 | from mautrix.client import MembershipEventDispatcher, InternalEventType 22 | from mautrix.errors.request import MLimitExceeded 23 | 24 | from .data import DataSource 25 | 26 | RATE_LIMIT_BACKOFF_SECONDS = 10 27 | UPDATE_INTERVAL_SECONDS = 15 * 60 28 | ROOM_PRUNE_INTERVAL_SECONDS = 60 * 60 29 | 30 | # command: ( usage, description ) 31 | HELP = { 32 | 'cases': ( 33 | '!cases location', 34 | 'Get up to date info on cases, optionally in a specific location.' 35 | ' You can give a country code, country, state, county, region or city.' 36 | ' E.g. !cases china' 37 | ), 38 | 'compare': ( 39 | '!compare locations', 40 | 'Compare up to date info on cases in multiple locations.' 41 | 'If it looks bad on mobile try rotating into landscape mode. ' 42 | ' Separate the locations with semicolons (;).' 43 | ' You can give a country codes, countries, states, counties, regions or cities.' 44 | ' E.g. !compare cn;us;uk;it;de' 45 | ), 46 | 'risk': ('!risk age', 'For a person of the given age, what is is the risk to them if they become sick with COVID-19?'), 47 | 'source': ('!source', 'Find out about my data sources and developers.'), 48 | 'help': ('!help', 'Get a reminder what I can do for you.'), 49 | } 50 | 51 | 52 | class Config(BaseProxyConfig): 53 | def do_update(self, helper: ConfigUpdateHelper) -> None: 54 | helper.copy("admins") 55 | 56 | 57 | class CovBot(Plugin): 58 | next_update_at: datetime.datetime = None 59 | _rooms_joined = {} 60 | 61 | async def _handle_rate_limit(self, api_call_wrapper): 62 | while True: 63 | try: 64 | return await api_call_wrapper() 65 | except MLimitExceeded: 66 | self.log.warning( 67 | 'API rate limit exceepted so backing off for %s seconds.', RATE_LIMIT_BACKOFF_SECONDS) 68 | await asyncio.sleep(RATE_LIMIT_BACKOFF_SECONDS) 69 | except Exception: # ignore other errors but give up 70 | tb = traceback.format_exc() 71 | self.log.warning('%s', tb) 72 | return 73 | 74 | async def _prune_dead_rooms(self): 75 | while True: 76 | self.log.info('Tidying up empty rooms.') 77 | users = set() 78 | rooms = await self._handle_rate_limit(lambda: self.client.get_joined_rooms()) 79 | 80 | left = 0 81 | for r in rooms: 82 | members = await self._handle_rate_limit(lambda: self.client.get_joined_members(r)) 83 | 84 | if len(members) == 1: 85 | self.log.debug('Leaving empty room %s.', r) 86 | await self._handle_rate_limit(lambda: self.client.leave_room(r)) 87 | left += 1 88 | else: 89 | for m in members: 90 | users.add(m) 91 | 92 | self.log.debug('I am in %s rooms.', len(rooms) - left) 93 | self.log.debug('I reach %s unique users.', 94 | len(users) - 1) # ignore myself 95 | await asyncio.sleep(ROOM_PRUNE_INTERVAL_SECONDS) 96 | 97 | async def _update_data(self): 98 | while True: 99 | try: 100 | await self.data.update() 101 | except Exception: 102 | tb = traceback.format_exc() 103 | self.log.warn( 104 | 'Failed to update data: %s.', tb) 105 | 106 | await asyncio.sleep(UPDATE_INTERVAL_SECONDS) 107 | 108 | @classmethod 109 | def get_config_class(cls) -> BaseProxyConfig: 110 | return Config 111 | 112 | async def start(self): 113 | await super().start() 114 | self.config.load_and_update() 115 | # So we can get room join events. 116 | self.client.add_dispatcher(MembershipEventDispatcher) 117 | self.data = DataSource(self.log, self.http) 118 | self._data_update_task = asyncio.create_task(self._update_data()) 119 | self._room_prune_task = asyncio.create_task(self._prune_dead_rooms()) 120 | 121 | async def stop(self): 122 | await super().stop() 123 | self.client.remove_dispatcher(MembershipEventDispatcher) 124 | self._room_prune_task.cancel() 125 | self._data_update_task.cancel() 126 | 127 | def _short_location(self, location: str, length=int(12)) -> str: 128 | """Returns a shortened location name. 129 | 130 | If exactly matches a country code, return that. (1) 131 | 132 | If shorter/equal than length, return intact. (2) 133 | 134 | Logic done in that order so that if someone passes a list 135 | of countries, they get the codes back, rather than a mix 136 | of codes and country names. 137 | 138 | If longer, split on commas and replace the final part with 139 | a country code if that matches. (3) 140 | 141 | If still too long, strip out 'middle' to get desired length. 142 | 143 | TODO: - consider stripping out ", City of," 144 | - consider simple truncation 145 | 146 | Example (length=12): 147 | United States → US 148 | Manchester, GB → Manch..r ,GB 149 | """ 150 | 151 | self.log.debug('Shortening %s.', location) 152 | 153 | # Exact country case (1) 154 | try: 155 | return pycountry.countries.lookup(location).alpha_2 156 | except LookupError: 157 | pass 158 | 159 | # It fits already (2) 160 | if len(location) <= length: 161 | return location 162 | 163 | # If there's commas, try to replace the last bit with a 164 | # country code (3) 165 | if "," in location: 166 | loc_parts = [s.strip() for s in location.split(",")] 167 | if pycountry.countries.lookup(loc_parts[-1]): 168 | loc_parts[-1] = pycountry.countries.lookup( 169 | loc_parts[-1]).alpha_2 170 | location = " ,".join(loc_parts) 171 | 172 | # If what we have is still longer, cut out the middle (4) 173 | if len(location) <= length: 174 | return location 175 | else: 176 | return "..".join([ 177 | location[:int((length-2)/2)], 178 | location[-int((length-2)/2):] 179 | ]) 180 | 181 | async def _locations_table(self, event: MessageEvent, data: dict, 182 | tabletype=str("text"), 183 | length=str("long")) -> str: 184 | """Build a table of locations to respond to. 185 | 186 | Uses tabulate module to tabulate data. 187 | 188 | Can be: 189 | - tabletype: text (default) or html 190 | - length: long (default), short or tiny 191 | 192 | Missing data (eg PHE) is handled and replaced 193 | by '---'; although this throws off tabulate's 194 | auto-alignment of numerical data. 195 | 196 | Tables by default report in following columns: 197 | - Location 198 | - Cases 199 | - Sick (%) 200 | - Recovered (%) 201 | - Deaths (%) 202 | 203 | Short table limits 'Location' to <= 12 chars 204 | and renames 'Recovered' to "matchesRec'd". 205 | 206 | Tiny table only outputs Loction and Cases columns. 207 | 208 | Table includes a 'Total' row, even where this makes 209 | no meaningful sense (eg countries + world data). 210 | """ 211 | MISSINGDATA = "---" 212 | 213 | self.log.debug('Building table for %s', data.keys()) 214 | columns = ["Location", "Cases"] 215 | 216 | if [v for v in data.values() if "recoveries" in v]: 217 | # At least one of the results has recovery data 218 | columns.extend(["Recovered", "%"]) 219 | 220 | if [v for v in data.values() if "deaths" in v]: 221 | # At least one of the results has deaths data 222 | columns.extend(["Deaths", "%"]) 223 | 224 | if "Recovered" in columns and "Deaths" in columns: 225 | # L - C - S - R - D 226 | columns.insert(2, "Sick") 227 | columns.insert(3, "%") 228 | 229 | # TODO: sort by cases descending 230 | tabledata = [] 231 | for location, data in data.items(): 232 | rowdata = [] 233 | cases = data['cases'] 234 | 235 | # Location 236 | if length == "short": 237 | rowdata.extend([self._short_location(location)]) 238 | else: 239 | rowdata.extend([location]) 240 | 241 | # Cases 242 | rowdata.extend([f'{cases:,}']) 243 | 244 | # TODO: decide if eliding % columns 245 | if "recoveries" in data: 246 | recs = data['recoveries'] 247 | per_rec = 0 if cases == 0 else \ 248 | int(recs) / int(cases) * 100 249 | 250 | rowdata.extend([f'{recs:,}', f"{per_rec:.1f}"]) 251 | else: 252 | rowdata.extend([MISSINGDATA, MISSINGDATA]) 253 | 254 | if "deaths" in data: 255 | deaths = data['deaths'] 256 | per_dead = 0 if cases == 0 else \ 257 | int(deaths) / int(cases) * 100 258 | 259 | rowdata.extend([f'{deaths:,}', f"{per_dead:.1f}"]) 260 | else: 261 | rowdata.extend([MISSINGDATA, MISSINGDATA]) 262 | 263 | if "recoveries" in data and "deaths" in data: 264 | sick = cases - int(data['recoveries']) - data['deaths'] 265 | per_sick = 100 - per_rec - per_dead 266 | 267 | rowdata.insert(2, f'{sick:,}') 268 | rowdata.insert(3, f"{per_sick:.1f}") 269 | else: 270 | rowdata.extend([MISSINGDATA, MISSINGDATA]) 271 | 272 | # Trim data for which there are no columns 273 | rowdata = rowdata[:len(columns)] 274 | 275 | tabledata.append(rowdata) 276 | 277 | # Shorten columns if needed 278 | if length == "short": 279 | columns = [w.replace("Recovered", "Rec'd") for w in columns] 280 | # Minimal- cases only: 281 | if length == "tiny": 282 | columns = columns[:2] 283 | tabledata = [row[:2] for row in tabledata] 284 | 285 | # Build table 286 | if tabletype == "html": 287 | tablefmt = "html" 288 | else: 289 | tablefmt = "presto" 290 | 291 | table = tabulate(tabledata, headers=columns, 292 | tablefmt=tablefmt, floatfmt=".1f") 293 | 294 | if data: 295 | return table 296 | 297 | async def _respond(self, e: MessageEvent, m: str) -> None: 298 | # IRC people don't like notices. 299 | if '@appservice-irc:matrix.org' in await self.client.get_joined_members(e.room_id): 300 | t = MessageType.TEXT 301 | else: # But matrix people do. 302 | t = MessageType.NOTICE 303 | 304 | c = TextMessageEventContent(msgtype=t, body=m) 305 | await self._handle_rate_limit(lambda: e.respond(c)) 306 | 307 | async def _respond_formatted(self, e: MessageEvent, m: str) -> None: 308 | """Respond with formatted message in m.text matrix format, 309 | not m.notice. 310 | 311 | This is needed as mobile clients (Riot 0.9.10, RiotX) currently 312 | do not seem to render markdown / HTML in m.notice events 313 | which are conventionally send by bots. 314 | 315 | Desktop/web Riot.im does render MD/HTML in m.notice, however. 316 | """ 317 | # IRC people don't like notices. 318 | if '@appservice-irc:matrix.org' in await self.client.get_joined_members(e.room_id): 319 | t = MessageType.TEXT 320 | else: # But matrix people do. 321 | t = MessageType.NOTICE 322 | 323 | c = TextMessageEventContent( 324 | msgtype=t, formatted_body=m, format="org.matrix.custom.html") 325 | c.body, c.formatted_body = parse_formatted(m, allow_html=True) 326 | await e.respond(c, markdown=True, allow_html=True) 327 | 328 | # source : https://www.desmos.com/calculator/v0zif7tflm 329 | @command.new('risk', help=HELP['risk'][1]) 330 | @command.argument("age", pass_raw=True, required=True) 331 | async def risks_handler(self, event: MessageEvent, age: str) -> None: 332 | self.log.info( 333 | "Responding to !risk request for age %s from %s.", age, event.sender) 334 | 335 | try: 336 | age = int(age) 337 | except ValueError: 338 | self.log.warn( 339 | "Age %s is not an int, letting %s know.", age, event.sender) 340 | await self._respond(event, f'{age} does not look like a number to me.') 341 | return 342 | 343 | if age < 0 or age > 110: 344 | self.log.warn( 345 | '%s is out of the age range of the risk model, letting %s know.', age, event.sender) 346 | await self._respond(event, "The risk model only handles ages between 0 and 110.") 347 | return 348 | 349 | # Maths that Peter doesn't really understand! 350 | death_rate = max(0, -0.00186807 + 0.00000351867 * 351 | age ** 2 + (2.7595 * 10 ** -15) * age ** 7) 352 | ic_rate = max(0, -0.0572602 - -0.0027617 * age) 353 | h_rate = max(0, -0.0730827 - age * -0.00628289) 354 | survival_rate = 1 - death_rate 355 | 356 | s = ( 357 | f"I estimate a {age} year old patient sick with COVID-19 has a {survival_rate:.1%} chance of survival," 358 | f" a {h_rate:.1%} likelihood of needing to go to hospital, a {ic_rate:.1%} risk of needing intensive care there" 359 | f" and a {death_rate:.1%} chance of death." 360 | ) 361 | 362 | await self._respond(event, s) 363 | 364 | @command.new('cases', help=HELP['cases'][1]) 365 | @command.argument("location", pass_raw=True, required=False) 366 | async def cases_handler(self, event: MessageEvent, location: str) -> None: 367 | if location == "": 368 | location = "World" 369 | 370 | self.log.info('Responding to !cases request for %s from %s.', 371 | location, event.sender) 372 | matches = self.data.get(location) 373 | 374 | if len(matches) == 0: 375 | self.log.debug( 376 | 'No matches found for %s, letting %s know.', location, event.sender) 377 | await self._respond( 378 | event, 379 | f'My data doesn\'t seem to include {location}.' 380 | ' It might be under a different name, data on it might not be available or there could even be no cases.' 381 | ' You may have more luck if you try a less specific location, like the country it\'s in.' 382 | f' \n\nIf you think I should have data on it you can open an issue at https://github.com/pwr22/covbot/issues and Peter will take a look.' 383 | ) 384 | return 385 | elif len(matches) > 5: 386 | self.log.debug( 387 | "Too many results for %s, advising %s to be more specific.", location, event.sender) 388 | await self._respond(event, f'I found a lot of matches for {location}. Please could you be more specific?') 389 | return 390 | elif len(matches) > 1: 391 | self.log.debug( 392 | "Found multiple results for %s, providing them to %sr so they can try again.", location, event.sender) 393 | ms = "\n".join(m[0] for m in matches) 394 | await self._respond(event, f"Which of these did you mean?\n\n{ms}") 395 | return 396 | 397 | m_loc, data = matches[0] 398 | cases, last_update = data['cases'], data['last_update'] 399 | s = f'In {m_loc} there have been a total of {cases:,} cases as of {last_update} UTC.' 400 | 401 | # some data is more detailed 402 | if 'recoveries' in data and 'deaths' in data: 403 | recoveries, deaths = data['recoveries'], data['deaths'] 404 | sick = cases - recoveries - deaths 405 | 406 | per_rec = 0 if cases == 0 else int(recoveries) / int(cases) * 100 407 | per_dead = 0 if cases == 0 else int(deaths) / int(cases) * 100 408 | per_sick = 100 - per_rec - per_dead 409 | 410 | s += ( 411 | f' Of these {sick:,} ({per_sick:.1f}%) are still sick or may have recovered without being recorded,' 412 | f' {recoveries:,} ({per_rec:.1f}%) have definitely recovered' 413 | f' and {deaths:,} ({per_dead:.1f}%) have died.' 414 | ) 415 | 416 | await self._respond( 417 | event, 418 | s 419 | ) 420 | 421 | @command.new('compare', help=HELP["compare"][1]) 422 | @command.argument("locations", pass_raw=True, required=True) 423 | async def table_handler(self, event: MessageEvent, locations: str) -> None: 424 | self.log.info( 425 | "Responding to !compare request for %s from %s.", locations, event.sender) 426 | 427 | results = {} 428 | for loc in locations.split(";"): 429 | matches = self.data.get(loc) 430 | 431 | if len(matches) == 0: 432 | self.log.debug( 433 | "No matches found for %s, letting %s know.", loc, event.sender) 434 | await self._respond(event, 435 | f"I cannot find a match for {loc}") 436 | return 437 | elif len(matches) > 5: 438 | self.log.debug( 439 | "Too many results for %s, advising %s to be more specific.", loc, event.sender) 440 | await self._respond(event, f'I found a lot of matches for {loc}. Please could you be more specific?') 441 | return {} 442 | elif len(matches) > 1: 443 | self.log.debug( 444 | "Found multiple results for %s, providing them to %s so they can try again.", loc, event.sender) 445 | ms = " - ".join(m[0] for m in matches) 446 | await self._respond(event, 447 | f"Multiple results for {loc}: {ms}. " 448 | "Please provide one.") 449 | return {} 450 | 451 | m = matches.pop() # there's only one 452 | loc, data = m 453 | results[loc] = data 454 | 455 | t = await self._locations_table(event, data=results, 456 | tabletype="text", 457 | length="long") 458 | if t: 459 | await self._respond_formatted(event, f'
{t}
') 460 | 461 | @command.new('source', help=HELP['source'][1]) 462 | async def source_handler(self, event: MessageEvent) -> None: 463 | self.log.info('Responding to !source request from %s.', event.sender) 464 | await self._respond( 465 | event, 466 | 'I was created by Peter Roberts and MIT licensed on Github at https://github.com/pwr22/covbot.' 467 | f' I fetch new data every 15 minutes from {self.data.get_sources()}.' 468 | f' Risk estimates are based on the model at https://www.desmos.com/calculator/v0zif7tflm.' 469 | ) 470 | 471 | @command.new('help', help=HELP['help'][1]) 472 | async def help_handler(self, event: MessageEvent) -> None: 473 | self.log.info('Responding to !help request from %s.', event.sender) 474 | 475 | s = 'You can message me any of these commands:\n\n' 476 | s += '\n\n'.join(f'{usage} - {desc}' for (usage, 477 | desc) in HELP.values()) 478 | await self._message(event.room_id, s) 479 | 480 | async def _message(self, room_id, m: str) -> None: 481 | # IRC people don't like notices. 482 | if '@appservice-irc:matrix.org' in await self.client.get_joined_members(room_id): 483 | t = MessageType.TEXT 484 | else: # But matrix people do. 485 | t = MessageType.NOTICE 486 | 487 | c = TextMessageEventContent(msgtype=t, body=m) 488 | await self._handle_rate_limit(lambda: self.client.send_message(room_id=room_id, content=c)) 489 | 490 | @command.new('announce', help='Send broadcast a message to all rooms.') 491 | @command.argument("message", pass_raw=True, required=True) 492 | async def announce_handler(self, event: MessageEvent, message: str) -> None: 493 | 494 | if event.sender not in self.config['admins']: 495 | self.log.warn( 496 | 'User %s tried to send an announcement but only admins are authorised to do so.' 497 | ' They tried to send %s.', 498 | event.sender, message 499 | ) 500 | await self._respond(event, 'You do not have permission to !announce.') 501 | return None 502 | 503 | rooms = await self._handle_rate_limit(lambda: self.client.get_joined_rooms()) 504 | self.log.info('Sending announcement %s to all %s rooms', 505 | message, len(rooms)) 506 | 507 | for r in rooms: 508 | await self._message(r, message) 509 | 510 | @event.on(InternalEventType.JOIN) 511 | async def join_handler(self, event: InternalEventType.JOIN) -> None: 512 | me = await self._handle_rate_limit(lambda: self.client.whoami()) 513 | 514 | # Ignore all joins but mine. 515 | if event.sender != me: 516 | return 517 | 518 | if event.room_id in self._rooms_joined: 519 | self.log.warning( 520 | 'Duplicate join event for room %s.', event.room_id) 521 | return 522 | 523 | # work around duplicate joins 524 | self._rooms_joined[event.room_id] = True 525 | self.log.info( 526 | 'Sending unsolicited help on join to room %s.', event.room_id) 527 | 528 | s = 'Hi, I am a bot that tracks SARS-COV-2 infection statistics for you. You can message me any of these commands:\n\n' 529 | s += '\n'.join(f'{usage} - {desc}' for (usage, desc) in HELP.values()) 530 | await self._message(event.room_id, s) 531 | -------------------------------------------------------------------------------- /covbot/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import datetime 4 | import asyncio 5 | import math 6 | import whoosh 7 | import time 8 | import pycountry 9 | from whoosh.fields import Schema, TEXT 10 | from whoosh.index import create_in, FileIndex 11 | from whoosh.qparser import QueryParser 12 | from tabulate import tabulate 13 | from mautrix.types import TextMessageEventContent, MessageType 14 | from mautrix.client import MembershipEventDispatcher, InternalEventType 15 | from mautrix.errors.request import MLimitExceeded 16 | 17 | OFFLOOP_CASES_URL = 'http://offloop.net/covid19h/unconfirmed.csv' 18 | OFFLOOP_GROUPS_URL = 'https://offloop.net/covid19h/groups.txt' 19 | NHS_URL = 'https://www.arcgis.com/sharing/rest/content/items/ca796627a2294c51926865748c4a56e8/data' 20 | UK_URL = 'https://www.arcgis.com/sharing/rest/content/items/b684319181f94875a6879bbc833ca3a6/data' 21 | FINLAND_URL = 'https://w3qa5ydb4l.execute-api.eu-west-1.amazonaws.com/prod/finnishCoronaData/v2' 22 | UK_COUNTRIES_URL = 'https://raw.githubusercontent.com/tomwhite/covid-19-uk-data/master/data/covid-19-indicators-uk.csv' 23 | UK_REGIONS_URL = 'https://raw.githubusercontent.com/tomwhite/covid-19-uk-data/master/data/covid-19-cases-uk.csv' 24 | 25 | COUNTRY_RENAMES = { 26 | 'US': 'United States', 27 | 'DRC': 'Democratic Republic of the Congo', 28 | 'UAE': 'United Arab Emirates', 29 | "U.S. Virgin Islands": "United States Virgin Islands" 30 | } 31 | 32 | # UK constituent countries with their nominal data update times 33 | UK_COUNTRIES = {"Wales": "1200 GMT", "Scotland": "1400 GMT", 34 | "England": "1800 GMT", "Northern Ireland": "1400 GMT"} 35 | 36 | SCHEMA = Schema(country=TEXT(stored=True), area=TEXT( 37 | stored=True), location=TEXT(stored=True)) 38 | 39 | 40 | class DataSource: 41 | def __init__(self, log, http): 42 | # TODO create our own logger 43 | self.log, self.http = log, http 44 | self.cases = {} 45 | self.groups = {} 46 | 47 | async def _get_offloop_groups(self): 48 | groups = {} 49 | 50 | self.log.debug("Fetching %s.", OFFLOOP_GROUPS_URL) 51 | async with self.http.get(OFFLOOP_GROUPS_URL) as r: 52 | t = await r.text() 53 | 54 | # group;country_1;country_2 ... 55 | cr = csv.reader(t.splitlines(), delimiter=';') 56 | for group, *areas in cr: 57 | groups[group] = areas 58 | 59 | return groups 60 | 61 | async def _get_nhs(self): 62 | regions = {} 63 | 64 | self.log.debug("Fetching %s.", NHS_URL) 65 | async with self.http.get(NHS_URL) as r: 66 | t = await r.text() 67 | l = t.splitlines() 68 | 69 | # GSS_CD, NHSRNm, TotalCases 70 | cr = csv.DictReader(l) 71 | for row in cr: 72 | regions[row['NHSRNm']] = int(row['TotalCases'].replace(',', '')) 73 | 74 | return regions 75 | 76 | async def _get_uk(self): 77 | regions = {} 78 | 79 | self.log.debug("Fetching %s.", UK_URL) 80 | async with self.http.get(UK_URL) as r: 81 | t = await r.text() 82 | l = t.splitlines() 83 | 84 | # GSS_CD, GSS_NM, TotalCases 85 | cr = csv.DictReader(l) 86 | for row in cr: 87 | regions[row['GSS_NM']] = int(row['TotalCases'].replace(',', '')) 88 | 89 | return regions 90 | 91 | async def _get_uk_countries(self) -> dict: 92 | """Get UK constituent countries: WAL/SCO/ENG/NI 93 | 94 | Data is processed by _process_uk_countries() 95 | before being returned as a dict 96 | """ 97 | 98 | async def _process_uk_countries(uk_countries_data: list) -> dict: 99 | """Process to covbot format: 100 | 101 | {Country1: {data1}, Country2: {data2}, ...} 102 | """ 103 | # GB/UK data is processed elsewhere 104 | countries_data = {} 105 | for country, update_time in UK_COUNTRIES.items(): 106 | # Filter data to country (ie Wales/Scotland/England/NI) 107 | country_data = [r for r in uk_countries_data 108 | if r["Country"] == country] 109 | # Find latest (= maximum) date and use that 110 | maxidate = max([r["Date"] for r in country_data]) 111 | latest_country_data = [r for r in country_data 112 | if r["Date"] == maxidate] 113 | latest_data_d = {} 114 | # Pivot data to covbot format 115 | for r in latest_country_data: 116 | latest_data_d[r["Indicator"].lower()] = int(r["Value"]) 117 | # Rename confirmedcases → cases 118 | if "confirmedcases" in latest_data_d: 119 | latest_data_d["cases"] = int(latest_data_d.pop( 120 | "confirmedcases")) 121 | latest_data_d["last_update"] = datetime.datetime.\ 122 | strptime(f"{maxidate} {update_time}", 123 | "%Y-%m-%d %H%M %Z") 124 | countries_data[country] = latest_data_d 125 | 126 | return countries_data 127 | 128 | async with self.http.get(UK_COUNTRIES_URL) as r: 129 | t = await r.text() 130 | lines = t.splitlines() 131 | 132 | cr = list(csv.DictReader(lines)) 133 | 134 | uk_country_data = await _process_uk_countries(cr) 135 | 136 | return uk_country_data 137 | 138 | async def _get_uk_regions(self) -> dict: 139 | """Return dict of UK region data""" 140 | async def _process_uk_regions(regions_data: list) -> dict: 141 | """Filter region data and process to covbot format""" 142 | uk_region_data = {} 143 | 144 | for country in UK_COUNTRIES.keys(): 145 | country_regions = [r for r in regions_data if r["Country"] == country] 146 | # Get latest data 147 | maxidate = max([r["Date"] for r in country_regions]) 148 | region_data = [r for r in country_regions if r["Date"] == maxidate] 149 | for r in region_data: 150 | # Fix for GJNH data 2020-04-22 (blank) 151 | if r["TotalCases"] == '': 152 | r["TotalCases"] = 0 153 | uk_region_data[r["Area"]] = { 154 | "cases": int(r["TotalCases"]), "last_update": 155 | datetime.datetime.strptime( 156 | f"{maxidate} {UK_COUNTRIES[country]}", 157 | "%Y-%m-%d %H%M %Z")} 158 | 159 | return uk_region_data 160 | 161 | async with self.http.get(UK_REGIONS_URL) as r: 162 | t = await r.text() 163 | lines = t.splitlines() 164 | 165 | cr = list(csv.DictReader(lines)) 166 | 167 | uk_region_data = await _process_uk_regions(cr) 168 | 169 | return uk_region_data 170 | 171 | async def _get_offloop_cases(self): 172 | countries = {} 173 | now = time.time() * 1000 # millis to match the data 174 | 175 | self.log.debug("Fetching %s.", OFFLOOP_CASES_URL) 176 | async with self.http.get(OFFLOOP_CASES_URL) as r: 177 | t = await r.text() 178 | l = t.splitlines() 179 | 180 | # Country;Province;Confirmed;Deaths;Recovered;LastUpdated 181 | cr = csv.DictReader(l, delimiter=';') 182 | for row in cr: 183 | country = row['Country'] 184 | if country in COUNTRY_RENAMES: 185 | country = COUNTRY_RENAMES[country] 186 | 187 | if not country in countries: 188 | countries[country] = {'areas': {}} 189 | # 190 | # handle missing data 191 | cases = 0 if row['Confirmed'] == '' else int(row['Confirmed']) 192 | deaths = 0 if row['Deaths'] == '' else int(row['Deaths']) 193 | recoveries = 0 if row['Recovered'] == '' else int(row['Recovered']) 194 | ts_msec = now if row['LastUpdated'] == '' else int( 195 | row['LastUpdated']) 196 | 197 | ts = ts_msec // 1000 198 | last_update = datetime.datetime.utcfromtimestamp(ts) 199 | 200 | area = row['Province'] 201 | # Do we have a total? 202 | # area for totals can be either blank or matching the country 203 | if area == '' or area.lower() == country.lower(): 204 | if 'totals' in countries[country]: 205 | self.log.warning('Duplicate totals for %s.', country) 206 | 207 | d = {'cases': cases, 'deaths': deaths, 208 | 'recoveries': recoveries, 'last_update': last_update} 209 | # TODO take the max for each value 210 | countries[country]['totals'] = d 211 | else: # or an area? 212 | d = {'cases': cases, 'deaths': deaths, 213 | 'recoveries': recoveries, 'last_update': last_update} 214 | countries[country]['areas'][area] = d 215 | 216 | return countries 217 | 218 | async def _get_finland(self): 219 | districts = {} 220 | 221 | self.log.debug("Fetching %s.", UK_URL) 222 | async with self.http.get(FINLAND_URL) as r: 223 | j = await r.json() 224 | 225 | for case in j['confirmed']: 226 | d = case['healthCareDistrict'] 227 | if d == None or d == '': # skip missing data 228 | continue 229 | 230 | if d not in districts: 231 | districts[d] = 1 232 | else: 233 | districts[d] += 1 234 | 235 | return districts 236 | 237 | def _update_index(self): 238 | # create a new index 239 | d = '/tmp/covbotindex' 240 | self.log.debug('Updating index in %s.', d) 241 | if not os.path.exists(d): 242 | os.mkdir(d) 243 | 244 | self.index = create_in(d, SCHEMA) 245 | idx_w = self.index.writer() 246 | 247 | # add all the documents 248 | for c, c_data in self.cases.items(): 249 | # TODO should this be conditional on a record existing? 250 | idx_w.add_document(country=c, location=c) 251 | for a in c_data['areas']: 252 | l = f'{a}, {c}' 253 | idx_w.add_document(country=c, area=a, location=l) 254 | 255 | idx_w.commit() 256 | 257 | async def update(self): 258 | now = datetime.datetime.utcfromtimestamp(int(time.time())) 259 | 260 | self.log.info('Updating data.') 261 | # offloop, nhs, uk, finland = await asyncio.gather(self._get_offloop_cases(), self._get_nhs(), self._get_uk(), self._get_finland()) 262 | offloop, finland, uk_countries, uk_regions = await asyncio.gather(self._get_offloop_cases(), self._get_finland(), self._get_uk_countries(), self._get_uk_regions()) 263 | 264 | # TODO take the max value 265 | # for area, cases in nhs.items(): 266 | # offloop['United Kingdom']['areas'][area] = { 267 | # 'cases': cases, 'last_update': now} 268 | # for area, cases in uk.items(): 269 | # offloop['United Kingdom']['areas'][area] = { 270 | # 'cases': cases, 'last_update': now} 271 | for area, cases in finland.items(): 272 | offloop['Finland']['areas'][area] = { 273 | 'cases': cases, 'last_update': now} 274 | 275 | for r, ukdata in uk_countries.items(): 276 | offloop['United Kingdom']['areas'][r] = ukdata 277 | 278 | for r, regiondata in uk_regions.items(): 279 | offloop['United Kingdom']['areas'][r] = regiondata 280 | 281 | self.cases = offloop 282 | await asyncio.get_running_loop().run_in_executor(None, self._update_index) 283 | 284 | def _exact_country_code_match(self, query: str) -> list: 285 | self.log.debug('Trying an exact country code match on %s.', query) 286 | cc = query.upper() 287 | 288 | # TODO generalise. 289 | # Handle UK alias. 290 | if cc == 'UK': 291 | cc = 'GB' 292 | 293 | c = pycountry.countries.get( 294 | alpha_2=cc) or pycountry.countries.get(alpha_3=cc) 295 | if c != None: 296 | self.log.debug('Country code %s is %s.', cc, c.name) 297 | 298 | if c.name not in self.cases: 299 | self.log.warn('No data for %s.', c.name) 300 | return None 301 | 302 | d = self.cases[c.name] 303 | 304 | if not 'totals' in d: 305 | self.log.debug('No totals found for %s.', c.name) 306 | return None 307 | 308 | return [(c.name, d['totals'])] 309 | 310 | return None 311 | 312 | def _exact_country_match(self, query: str) -> list: 313 | self.log.debug('Trying an exact country match on %s.', query) 314 | for country in self.cases: 315 | if country.lower() == query.lower(): 316 | self.log.debug('Got an exact country match on %s.', query) 317 | 318 | if 'totals' not in self.cases[country]: 319 | self.log.debug('No totals found for %s.', country) 320 | return None 321 | 322 | return [(country, self.cases[country]['totals'])] 323 | 324 | return None 325 | 326 | def _exact_region_match(self, query: str) -> list: 327 | self.log.debug('Trying an exact region match on %s.', query) 328 | regions = [] 329 | for country, data in self.cases.items(): 330 | for area, data in data['areas'].items(): 331 | if area.lower() == query.lower(): 332 | regions.append((f'{area}, {country}', data)) 333 | 334 | if len(regions) > 0: 335 | self.log.debug( 336 | 'Got exact region matches on %s: %s.', query, regions) 337 | 338 | return regions 339 | 340 | def _wildcard_location_match(self, query: str) -> list: 341 | self.log.debug('Trying a wildcard location match on %s.', query) 342 | with self.index.searcher() as s: 343 | qs = f'*{query}*' 344 | q = QueryParser("location", SCHEMA).parse(qs) 345 | matches = s.search(q, limit=None) 346 | 347 | locs = [] 348 | for m in matches: 349 | c, l = m['country'], m['location'] 350 | 351 | if 'area' in m: 352 | d = self.cases[c]['areas'][m['area']] 353 | else: 354 | d = self.cases[c]['totals'] 355 | 356 | locs.append((l, d)) 357 | 358 | if len(locs) > 0: 359 | self.log.debug( 360 | 'Found wildcard location matches on %s: %s.', query, locs) 361 | 362 | return locs 363 | 364 | def get(self, query: str) -> list: 365 | self.log.info('Looking up data for %s.', query) 366 | 367 | m = self._exact_country_code_match(query) 368 | if m != None: 369 | return m 370 | 371 | m = self._exact_country_match(query) 372 | if m != None: 373 | return m 374 | 375 | areas = self._exact_region_match(query) 376 | if len(areas) > 0: 377 | return areas 378 | 379 | locs = self._wildcard_location_match(query) 380 | if len(locs) > 0: 381 | return locs 382 | 383 | return [] 384 | 385 | def get_mult(self, *queries: list) -> list: 386 | return [self.get(q) for q in queries] 387 | 388 | @classmethod 389 | def get_sources(cls) -> str: 390 | return f"{OFFLOOP_CASES_URL}, {NHS_URL}, {UK_URL} and {FINLAND_URL}" 391 | -------------------------------------------------------------------------------- /maubot.yaml: -------------------------------------------------------------------------------- 1 | maubot: 0.1.0 2 | id: dev.shortestpath.covbot 3 | version: 0.1.5 4 | license: MIT 5 | modules: 6 | - covbot 7 | main_class: CovBot 8 | dependencies: 9 | - whoosh 10 | - pycountry 11 | - tabulate 12 | database: false 13 | extra_files: 14 | - LICENSE 15 | - base-config.yaml 16 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwr22/covbot/1f3b7de36ad72ce3583a576a392e1b750420654f/screenshot.png --------------------------------------------------------------------------------