├── .github └── workflows │ └── pythonapp.yml ├── .gitignore ├── README.md ├── aiess ├── .env ├── __init__.py ├── common.py ├── database.py ├── errors.py ├── event_types.py ├── logger.py ├── objects.py ├── reader.py ├── settings.py ├── tests │ ├── __init__.py │ ├── mocks │ │ ├── __init__.py │ │ └── api │ │ │ ├── __init__.py │ │ │ ├── beatmap.py │ │ │ └── old_beatmap.py │ ├── test_api.py │ ├── test_apiv2.py │ ├── test_database.py │ ├── test_objects.py │ ├── test_reader.py │ └── test_timestamp.py ├── timestamp.py └── web │ ├── __init__.py │ ├── api.py │ ├── apiv2.py │ └── ratelimiter.py ├── batch ├── all.bat ├── bnplanner.bat ├── bnsite.bat ├── bot.bat └── scraper.bat ├── bnplanner ├── __init__.py ├── interface.py └── main.py ├── bnsite ├── __init__.py ├── api.py ├── converter.py ├── interface.py ├── main.py └── tests │ ├── __init__.py │ ├── test_api.py │ └── test_interface.py ├── bot ├── .env ├── __init__.py ├── activity.py ├── cmd_modules_deprecated │ └── __init__.py ├── cmdcommon.py ├── cogs │ ├── general_commands.py │ ├── ready_events.py │ └── sub_commands.py ├── database.py ├── filterer.py ├── filterers │ ├── event_filterer.py │ └── perms_filterer.py ├── formatter.py ├── logic.py ├── main.py ├── objects.py ├── settings.py ├── subscriber.py └── tests │ ├── __init__.py │ ├── test_activity.py │ ├── test_cmdcommon.py │ ├── test_database.py │ ├── test_filterer.py │ ├── test_formatter.py │ ├── test_logic.py │ ├── test_objects.py │ └── test_subscriber.py ├── codecov.yml ├── schema.sql ├── scraper ├── .env ├── __init__.py ├── crawler.py ├── main.py ├── parsers │ ├── __init__.py │ ├── beatmapset_event_parser.py │ ├── discussion_event_parser.py │ ├── discussion_parser.py │ ├── event_parser.py │ ├── group_parser.py │ └── news_parser.py ├── populator.py ├── requester.py └── tests │ ├── __init__.py │ ├── last_datetime-test.txt │ ├── mocks │ ├── __init__.py │ ├── discussion_diff_and_tabs.py │ ├── discussion_events_json.py │ ├── discussion_jsons │ │ ├── additional_details.py │ │ ├── crawler_json.py │ │ └── nomination_comment.py │ ├── events │ │ ├── __init__.py │ │ ├── faulty │ │ │ ├── __init__.py │ │ │ ├── beatmapset_events.py │ │ │ ├── discussion_events.py │ │ │ ├── kudosu_deleted_beatmap.py │ │ │ ├── no_events.py │ │ │ └── resolve_deleted_beatmap.py │ │ ├── issue_resolve.py │ │ ├── news.py │ │ ├── problem.py │ │ └── reply.py │ ├── events_json.py │ ├── events_json_deleted_mapset.py │ ├── events_json_lang_genre.py │ ├── events_json_nominate.py │ ├── groups.py │ └── requester.py │ ├── parsers │ ├── __init__.py │ ├── test_beatmapset_event_parser.py │ ├── test_discussion_event_parser.py │ ├── test_discussion_parser.py │ ├── test_event_parser.py │ ├── test_group_parser.py │ └── test_news_parser.py │ ├── test_crawler.py │ ├── test_populator.py │ └── test_requester.py └── settings.json /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | services: 13 | mysql: 14 | image: mysql:8.0 15 | env: 16 | MYSQL_ROOT_PASSWORD: ${{ secrets.DB_PASSWORD }} 17 | ports: 18 | - 3306 19 | options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Set up Python 3.8 24 | uses: actions/setup-python@v1 25 | with: 26 | python-version: 3.8 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install flake8 pytest mock pytest-cov pytest-asyncio requests BeautifulSoup4 mysql-connector-python pymongo 31 | pip install -U py-cord --pre 32 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 33 | - name: Setup database 34 | run: | 35 | mysql -h127.0.0.1 -P${{ job.services.mysql.ports['3306'] }} -uroot -p${{ secrets.DB_PASSWORD }} < /home/runner/work/Aiess/Aiess/schema.sql 36 | - name: Lint with flake8 37 | run: | 38 | # stop the build if there are Python syntax errors or undefined names 39 | flake8 . --count --select=E9,F63,F7,F82 --extend-ignore=F722 --show-source --statistics 40 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 41 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 42 | - name: Test with pytest & generate coverage 43 | env: 44 | BNSITE_HEADER_USERNAME: ${{ secrets.BNSITE_HEADER_USERNAME }} 45 | BNSITE_HEADER_SECRET: ${{ secrets.BNSITE_HEADER_SECRET }} 46 | OSU_API_KEY: ${{ secrets.OSU_API_KEY }} 47 | DB_PASSWORD: ${{ secrets.DB_PASSWORD }} 48 | DB_PORT: ${{ job.services.mysql.ports['3306'] }} 49 | ROOT_PATH: /home/runner/work/Aiess/Aiess/ 50 | run: | 51 | pytest --cov=./ --cov-report=xml 52 | - name: Upload coverage to Codecov 53 | uses: codecov/codecov-action@v1.0.7 54 | with: 55 | token: ${{ secrets.CODECOV_TOKEN }} 56 | file: ./coverage.xml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Caches 2 | __pycache__/ 3 | .pytest_cache/ 4 | 5 | # Coverage reports 6 | htmlcov/ 7 | .coverage 8 | 9 | # Specific instance details 10 | logs/ 11 | time/ 12 | .env 13 | 14 | # VS Code config 15 | .vscode 16 | 17 | # Temporary scripts & caches 18 | stats/ 19 | temp/ 20 | *.pk 21 | 22 | # Assets 23 | assets/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](https://i.imgur.com/RR3937R.jpg) 2 | # Aiess 3 | ![tests](https://github.com/Naxesss/Aiess/workflows/tests/badge.svg) [![codecov](https://codecov.io/gh/Naxesss/Aiess/branch/master/graph/badge.svg)](https://codecov.io/gh/Naxesss/Aiess) [![CodeFactor](https://www.codefactor.io/repository/github/naxesss/aiess/badge)](https://www.codefactor.io/repository/github/naxesss/aiess) [![Discord](https://img.shields.io/discord/420015424365789184.svg?label=&logo=discord&logoColor=ffffff&color=7389D8&labelColor=6A7EC2)](https://discord.gg/2XV5dcW) 4 | 5 | Aiess gathers mapping-related events in osu! (e.g. ranks, qualifications, nominations, suggestions, newsposts, group changes, etc), and provides the ability to subscribe to these events through a Discord bot. 6 | 7 | ## Discord Bot (`./bot`) | **[Invite](https://discord.com/api/oauth2/authorize?client_id=680467769573244928&permissions=0&scope=bot%20applications.commands)** 8 | Written in [Python](https://www.python.org/) using [Pycord](https://docs.pycord.dev/en/master/). 9 | 10 | ### **Example Commands** 11 | - `/subscribe ` Subscribes this channel to events matching `` 12 | - e.g. `/subscribe type:ranked` 13 | 14 | ![](https://i.imgur.com/VDgSBPu.png) 15 | - `/recent [filter]` Returns the most recent event gathered, optionally matching `[filter]` 16 | - e.g. `/recent type:ranked and creator:vinxis` 17 | 18 | ![](https://i.imgur.com/gNzQTgn.png) 19 | 20 | ### **Filtering** 21 | Supports 22 | - logical operators (`and`/`or`/`not` case insensitive). 23 | - parentheses (e.g. `type:(nominate or qualify)`). 24 | - aliases for types and groups (e.g. `type:nomination-reset` = `type:reset`). 25 | - quoting to escape spaces (e.g. `user:"name with spaces"`). 26 | - basic wildcards (e.g. `content:%needle%` for any event with "needle" in its text). 27 | 28 | 29 | Here's a few useful filters: 30 | - **#mapfeed** `type:(nom or qual or dq or pop or rank or love) and not user:banchobot` 31 | - **#mapfeed-osu** `type:(nom or qual or dq or pop or rank or love) and not user:banchobot and mode:osu` 32 | - **#groupfeed** `type:(add or remove)` 33 | - **#groupfeed-bns** `group:bns` 34 | - **#newsfeed** `type:news` 35 | - **#newsfeed-fa** `type:news and news-title:"%featured artist%"` 36 | 37 | ### **Permissions** 38 | Commands can be disabled in **Server Settings > Integrations > Aiess**. By default: 39 | - `/subscribe` and `/unsubscribe` requires the `Manage Channel` permission. 40 | - `/info`, `/ping`, etc. are public, but only visible to the caller. 41 | 42 | ![](https://i.imgur.com/lqmIQp0.png) 43 | - `/recent` is public. 44 | 45 | ## Event Gatherer (`./scraper`) 46 | Gathers event data from [osu!apiv2](https://osu.ppy.sh/docs/index.html) and the [osu! website](https://osu.ppy.sh) into a [MySQL](https://www.mysql.com/) database. 47 | 48 | ### **Rate Limits** 49 | | Request | Rate Limit | 50 | |:-|:- 51 | | Page | 1 / 60 seconds | 52 | | API | 1 / 1 second | 53 | 54 | ### **Scraping** 55 | | Route(s) | Event type(s) yielded | 56 | |-----------------|-----------------------| 57 | |**[/beatmapsets/events](https://osu.ppy.sh/beatmapsets/events)**|`Nominated`, `Qualified`, `Nomination Reset`, `Disqualified`, `Ranked`, `Loved`, `Unloved`, `Resolved`, `Reopened`, `Kudosu Given`, `Kudosu Removed`, `Kudosu Allowed`, `Kudosu Denied`, `Genre Edit`, `Language Edit`| 58 | |**[/beatmapsets/discussions](https://osu.ppy.sh/beatmapsets/discussions)**|`Suggestion`, `Problem`, `Note`, `Praise`, `Hype`| 59 | |**[/beatmapsets/discussions/posts](https://osu.ppy.sh/beatmapsets/discussions/posts)**|`Reply`| 60 | |**[/home/news](https://osu.ppy.sh/home/news)**|`News`| 61 | |**[/groups/28](https://osu.ppy.sh/groups/28)** (full bns), **[/groups/32](https://osu.ppy.sh/groups/32)** (probation bns), **[/groups/7](https://osu.ppy.sh/groups/7)** (nat), **[/groups/4](https://osu.ppy.sh/groups/4)** (gmt), **[/groups/16](https://osu.ppy.sh/groups/16)** (alumni), **[/groups/11](https://osu.ppy.sh/groups/11)** (devs), **[/groups/22](https://osu.ppy.sh/groups/22)** (support), **[/groups/31](https://osu.ppy.sh/groups/31)** (project loved)|`Added`, `Removed`| 62 | 63 | ## [BN Website](https://github.com/pishifat/qat/) Interface (`./bnsite`) | [bn.mappersguild.com](https://bn.mappersguild.com/) 64 | 65 | Handles activity, performance, and application evaluations of [Beatmap Nominators](https://osu.ppy.sh/help/wiki/People/The_Team/Beatmap_Nominators). 66 | 67 | | Events | Types | 68 | |:-|:- 69 | | Forwarded | `Nominated`, `Qualified`, `Nomination Reset`, `Disqualified`, `Ranked` | 70 | | Retrieved | `Added`, `Removed` 71 | 72 | ## [BN Planner](https://github.com/Darius-Wattimena/bnplanner) Interface (`./bnplanner`) 73 | 74 | Handles planning of nominations for [Beatmap Nominators](https://osu.ppy.sh/help/wiki/People/The_Team/Beatmap_Nominators) in osu!catch. 75 | 76 | | Events | Types | 77 | |:-|:- 78 | | Forwarded | `Nominated`, `Qualified`, `Nomination Reset`, `Disqualified`, `Ranked`, `Add`, `Remove` | -------------------------------------------------------------------------------- /aiess/.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=./ -------------------------------------------------------------------------------- /aiess/__init__.py: -------------------------------------------------------------------------------- 1 | from .objects import User, Beatmapset, Beatmap, Discussion, Usergroup, NewsPost, Event 2 | from .errors import ParsingError, DeletedContextError 3 | from .reader import Reader 4 | from .database import Database -------------------------------------------------------------------------------- /aiess/common.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, TypeVar, List 2 | from itertools import tee 3 | from types import GeneratorType 4 | 5 | T = TypeVar("T") 6 | async def anext(async_generator: Generator[T, None, None], default_value: T=None): 7 | try: 8 | return await async_generator.__anext__() 9 | except StopAsyncIteration: 10 | return default_value 11 | 12 | # Memoization decorator (caches results), works with generators too. 13 | Tee = tee([], 1)[0].__class__ 14 | def memoized(f): 15 | cache={} 16 | def ret(*args): 17 | if args not in cache: 18 | cache[args]=f(*args) 19 | if isinstance(cache[args], (GeneratorType, Tee)): 20 | cache[args], r = tee(cache[args]) 21 | return r 22 | return cache[args] 23 | return ret 24 | 25 | def __make_hashable(*args): 26 | for arg in args: 27 | if isinstance(arg, List): 28 | yield tuple(arg) 29 | else: 30 | yield arg -------------------------------------------------------------------------------- /aiess/errors.py: -------------------------------------------------------------------------------- 1 | class ParsingError(Exception): 2 | """For use in cases where parsing some portion of an event failed and should stop the program.""" 3 | 4 | class DeletedContextError(Exception): 5 | """For use in cases where some portion of an event is missing due to its context being deleted. 6 | This should not stop the program, but instead simply delete the event the error was raised from.""" -------------------------------------------------------------------------------- /aiess/event_types.py: -------------------------------------------------------------------------------- 1 | # Future-proofing, in case these names ever change we only need to change them in one place. 2 | # Also prevents mistakingly using the wrong name (e.g. "resolve" instead of "issue-resolve"). 3 | 4 | RANK = "rank" 5 | LOVE = "love" 6 | UNLOVE = "remove_from_loved" 7 | QUALIFY = "qualify" 8 | DISQUALIFY = "disqualify" 9 | NOMINATE = "nominate" 10 | RESET = "nomination_reset" 11 | 12 | SUGGESTION = "suggestion" 13 | PROBLEM = "problem" 14 | NOTE = "mapper_note" 15 | PRAISE = "praise" 16 | HYPE = "hype" 17 | REPLY = "reply" 18 | 19 | RESOLVE = "issue_resolve" 20 | REOPEN = "issue_reopen" 21 | 22 | KUDOSU_GAIN = "kudosu_gain" 23 | KUDOSU_LOSS = "kudosu_lost" 24 | KUDOSU_ALLOW = "kudosu_allow" 25 | KUDOSU_DENY = "kudosu_deny" 26 | 27 | LANGUAGE_EDIT = "language_edit" 28 | GENRE_EDIT = "genre_edit" 29 | 30 | DISCUSSION_DELETE = "discussion_delete" 31 | DISCUSSION_RESTORE = "discussion_restore" 32 | REPLY_DELETE = "discussion_post_delete" 33 | REPLY_RESTORE = "discussion_post_restore" 34 | 35 | NEWS = "news" 36 | 37 | ADD = "add" 38 | REMOVE = "remove" 39 | 40 | SEV = "sev" -------------------------------------------------------------------------------- /aiess/logger.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from aiess import timestamp 4 | from aiess.settings import ROOT_PATH 5 | 6 | PATH_PREFIX = ROOT_PATH + "logs/" 7 | 8 | time_str = None 9 | 10 | ESC = "\033" 11 | 12 | # These are ANSI escape sequences, see http://ascii-table.com/ansi-escape-sequences.php 13 | class colors: 14 | CLEAR = f"{ESC}[0m" 15 | 16 | LOADING_FILLED = f"{ESC}[47m" 17 | LOADING_EMPTY = f"{ESC}[44m" 18 | 19 | EVENT = f"{ESC}[33m" 20 | AUTHOR = f"{ESC}[32m" 21 | CONTEXT = f"{ESC}[34m" 22 | 23 | RED = f"{ESC}[31m" 24 | GREEN = f"{ESC}[32m" 25 | YELLOW = f"{ESC}[33m" 26 | 27 | def fmt(string: str, esc_seq: str): 28 | return f"{esc_seq}{string}{colors.CLEAR}" 29 | 30 | def init(_time_str: str=None): 31 | """Sets the time string for the log file (i.e. "log-{time_str}.txt").""" 32 | global time_str 33 | if _time_str: time_str = _time_str 34 | else: time_str = datetime.utcnow().strftime("%Y%m%d-%H%M%S") 35 | 36 | def log(_obj="", newline: bool=True, postfix: str="") -> None: 37 | """Takes the given object as a string with the current timestamp, 38 | prints it, and appends it to the current log file.""" 39 | message = timestamped_str(_obj) if newline else str(_obj) 40 | try: 41 | print(message, end="") 42 | except OSError as error: 43 | # Occurs when the message includes character codes which the python terminal doesn't recognize. 44 | print(f"OSError: {error}\n") 45 | 46 | write(_obj, newline, postfix) 47 | 48 | def write(_obj, newline: bool=True, postfix: str="") -> None: 49 | """Takes the given object as a string and appends it to the current log file, 50 | with a given postfix, if specified.""" 51 | global time_str 52 | if not time_str: 53 | print("WARNING | Logger.write() called before Logger.init(), no logs created!") 54 | return # Tests do not initalize the time string. 55 | 56 | message = timestamped_str(_obj) if newline else str(_obj) 57 | with open(f"{PATH_PREFIX}log{postfix}-{time_str}.txt", "a", encoding="utf-8") as _file: 58 | _file.write(message) 59 | 60 | def timestamped_str(_obj) -> str: 61 | """Returns the given object as a string, prefixes it by a timestamp (unless the object evaluates to False), 62 | and postfixes it by a newline character.""" 63 | if not _obj: 64 | return "\n" 65 | 66 | stamp = timestamp.to_string(datetime.utcnow()) 67 | return f"{stamp} | {str(_obj)}\n" 68 | 69 | def log_err(err: Exception) -> None: 70 | """Takes the given exception as a string with the current timestamp, 71 | and appends it to both the current log and log-err files.""" 72 | log(err) 73 | write(err, postfix="-err") -------------------------------------------------------------------------------- /aiess/reader.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, List, Iterable, Callable 2 | from datetime import datetime 3 | import itertools 4 | import asyncio 5 | import copy 6 | 7 | from aiess import Event 8 | from aiess.database import Database 9 | from aiess import timestamp 10 | from aiess import event_types as types 11 | 12 | # The former element takes the type of the second, which is removed. 13 | MERGABLE_TYPES = [ 14 | (types.NOMINATE, types.QUALIFY), 15 | (types.PROBLEM, types.RESET), 16 | (types.PROBLEM, types.DISQUALIFY), 17 | (types.REPLY, types.RESOLVE), 18 | (types.REPLY, types.REOPEN) 19 | ] 20 | 21 | class Scope(): 22 | """Determines which events should be read in a Reader. The `sql_target` WHERE clause is used 23 | whenever events are retrieved using this scope.""" 24 | def __init__(self, name: str, sql_target: str="TRUE"): 25 | self.name = name 26 | self.sql_target = sql_target 27 | 28 | class Reader(): 29 | """This has an async method `run`, which starts a loop that reads Aiess events every 10 seconds. 30 | 31 | If an event is found that is after an internal timestamp (initially current time on first run), 32 | then `on_event` is called with this; basically called for every new event. 33 | 34 | For each of these reads, `on_event_batch` is called, regardless of if any new events were found. 35 | 36 | Use this by creating a class inheriting Reader, and override above methods with custom functionality.""" 37 | def __init__(self, reader_id: str, db_name: str): 38 | self.reader_id = reader_id 39 | self.database = Database(db_name) 40 | self.running = False 41 | self.latest_event_time = None 42 | 43 | async def run(self) -> None: 44 | """A blocking method which initiates a loop looking through events in the database. 45 | This is from where on_event is called, for each new event found. 46 | 47 | Being a blocking call, any statement after calling this method will not be executed, 48 | so place this after any setup code.""" 49 | if self.running: 50 | raise ValueError("Reader is already running.") 51 | 52 | self.running = True 53 | while True: 54 | await self.__push_all_new_events() 55 | await asyncio.sleep(10) 56 | 57 | async def __push_all_new_events(self) -> None: 58 | """Triggers the on_event method for each new event since the last stored datetime for each scope.""" 59 | news_target = f"type=\"{types.NEWS}\"" 60 | groups_target = f"type=\"{types.ADD}\" OR type=\"{types.REMOVE}\"" 61 | sev_target = f"type=\"{types.SEV}\"" 62 | 63 | await self.__push_new_events(Scope("mapset", sql_target=f"NOT ({news_target}) AND NOT ({groups_target}) AND NOT ({sev_target})")) 64 | await self.__push_new_events(Scope("news", sql_target=news_target)) 65 | await self.__push_new_events(Scope("groups", sql_target=groups_target)) 66 | await self.__push_new_events(Scope("sev", sql_target=sev_target)) 67 | 68 | async def __push_new_events(self, scope: Scope) -> None: 69 | """Triggers the on_event method for each new event since the last stored datetime for the given scope.""" 70 | last_time = timestamp.get_last(self.__time_id(scope)) 71 | await self.__push_events_between(last_time, datetime.utcnow(), scope) 72 | 73 | async def __push_events_between(self, last_time: datetime, current_time: datetime, scope: Scope) -> datetime: 74 | """Triggers the on_event method for each event between the two datetimes. 75 | Updates the last stored datetime after each on_event call.""" 76 | await self.on_event_batch() 77 | async for event in await self.events_between(last_time, current_time, scope.sql_target): 78 | await self.on_event(event) 79 | timestamp.set_last(event.time, self.__time_id(scope)) 80 | 81 | if self.latest_event_time is None or event.time > self.latest_event_time: 82 | self.latest_event_time = event.time 83 | 84 | def __time_id(self, scope: Scope): 85 | """Returns the identifier of the file the reader creates to keep track of the last time for this scope. 86 | This is based on the identifier supplied to the reader on initialization.""" 87 | return f"reader-{self.reader_id}-{scope.name}" 88 | 89 | async def events_between(self, _from: datetime, to: datetime, sql_target: str="TRUE") -> Generator[Event, None, None]: 90 | """Yields each event found in the database, from (excluding) the later time to (including) the earlier time. 91 | Optionally only retrieves events matching the `sql_target` WHERE clause.""" 92 | return self.database.retrieve_events( 93 | where = f"({sql_target}) AND time > %s AND time <= %s", 94 | where_values = (_from, to), 95 | order_by = "time ASC" 96 | ) 97 | 98 | async def on_event_batch(self) -> None: 99 | """Called for each new event batch found in the running loop of the reader. 100 | This happens before on_event is called for each event.""" 101 | 102 | async def on_event(self, event: Event) -> None: 103 | """Called for each new event found in the running loop of the reader.""" 104 | 105 | def merge_concurrent(events: Iterable[Event]) -> List[Event]: 106 | """Returns a list of events where certain concurrent events are merged 107 | (e.g. user nominates + system qualifies -> user qualifies).""" 108 | # `dict.fromkeys` removes duplicates in the db, as keys in a dictonary are unique. We essentially merge same events. 109 | # This is copied such that any modification we make to this list won't affect the original references. 110 | new_events = list(dict.fromkeys(copy.deepcopy(list(events)))) 111 | new_events = list(sorted(new_events, key=lambda event: event.time)) 112 | merged_events = [] 113 | 114 | # `reversed(new_events)` is to go through events closer to each other first. 115 | for event, other_event in itertools.permutations(reversed(new_events), 2): 116 | # The system event is rarely 1 second late, hence the leniency. 117 | if abs((event.time - other_event.time).total_seconds()) > 1: 118 | continue 119 | 120 | if event.beatmapset != other_event.beatmapset: 121 | continue 122 | 123 | if (event.type, other_event.type) in MERGABLE_TYPES: 124 | if event in merged_events or other_event in merged_events: 125 | # This would result in lost information (e.g. overriding a user attribute). 126 | continue 127 | 128 | # Ensure that we have not already merged this event (e.g. nominate 1s <- nominate 0s <- qualify 0s). 129 | if other_event in new_events: 130 | # Former event has all properties the second does and more, 131 | # and is represented better having the type of the latter. 132 | event.type = other_event.type 133 | merged_events.append(event) 134 | new_events.remove(other_event) 135 | 136 | return new_events -------------------------------------------------------------------------------- /aiess/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | with open("settings.json", mode="r") as settings_file: 5 | json_str = settings_file.read() 6 | json_str = os.path.expandvars(json_str) 7 | settings = json.loads(json_str) 8 | 9 | # WEB 10 | API_KEY = settings["api-key"] 11 | API_RATE_LIMIT = settings["api-rate-limit"] 12 | PAGE_RATE_LIMIT = settings["page-rate-limit"] 13 | BNSITE_RATE_LIMIT = settings["bnsite-rate-limit"] 14 | 15 | # APIV2 16 | APIV2_CLIENT_ID = settings["apiv2-client-id"] 17 | APIV2_CLIENT_SECRET = settings["apiv2-client-secret"] 18 | 19 | # STORAGE 20 | ROOT_PATH = settings["root-path"] 21 | DB_CONFIG = settings["db-config"] 22 | 23 | # 3RD PARTY 24 | BNSITE_MONGODB_URI = settings["bnsite-mongodb-uri"] 25 | BNSITE_HEADERS = settings["bnsite-headers"] 26 | BNSTATS_HEADERS = settings["bnstats-headers"] 27 | BNPLANNER_HEADERS = settings["bnplanner-headers"] 28 | -------------------------------------------------------------------------------- /aiess/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/aiess/tests/__init__.py -------------------------------------------------------------------------------- /aiess/tests/mocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/aiess/tests/mocks/__init__.py -------------------------------------------------------------------------------- /aiess/tests/mocks/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/aiess/tests/mocks/api/__init__.py -------------------------------------------------------------------------------- /aiess/tests/mocks/api/beatmap.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | JSON = json.loads(""" 4 | [ 5 | { 6 | "beatmapset_id": "1001546", 7 | "beatmap_id": "2096611", 8 | "approved": "1", 9 | "total_length": "177", 10 | "hit_length": "158", 11 | "version": "Expert", 12 | "file_md5": "d23686d24f57f9cac6fc205c62b38e90", 13 | "diff_size": "5.3", 14 | "diff_overall": "8", 15 | "diff_approach": "7.5", 16 | "diff_drain": "5.5", 17 | "mode": "0", 18 | "count_normal": "393", 19 | "count_slider": "194", 20 | "count_spinner": "2", 21 | "submit_date": "2019-07-11 12:43:49", 22 | "approved_date": "2019-10-31 00:43:44", 23 | "last_update": "2019-10-27 12:39:02", 24 | "artist": "Carpool Tunnel", 25 | "artist_unicode": "Carpool Tunnel", 26 | "title": "Afterlight", 27 | "title_unicode": "Afterlight", 28 | "creator": "_Epreus", 29 | "creator_id": "7342798", 30 | "bpm": "178", 31 | "source": "", 32 | "tags": "osu! featured artist indie rock squirrelpascals mappers' guild mapper's", 33 | "genre_id": "1", 34 | "language_id": "1", 35 | "favourite_count": "28", 36 | "rating": "8.7033", 37 | "download_unavailable": "0", 38 | "audio_unavailable": "0", 39 | "playcount": "3438", 40 | "passcount": "189", 41 | "packs": "A54,S833", 42 | "max_combo": "785", 43 | "diff_aim": "2.64899", 44 | "diff_speed": "2.67291", 45 | "difficultyrating": "5.33386" 46 | }, 47 | { 48 | "beatmapset_id": "1001546", 49 | "beatmap_id": "2105912", 50 | "approved": "1", 51 | "total_length": "177", 52 | "hit_length": "143", 53 | "version": "Normal", 54 | "file_md5": "f6a4dde40fd76228747b47d614f5acad", 55 | "diff_size": "4", 56 | "diff_overall": "4", 57 | "diff_approach": "5", 58 | "diff_drain": "4", 59 | "mode": "0", 60 | "count_normal": "102", 61 | "count_slider": "167", 62 | "count_spinner": "1", 63 | "submit_date": "2019-07-11 12:43:49", 64 | "approved_date": "2019-10-31 00:43:44", 65 | "last_update": "2019-10-27 12:39:02", 66 | "artist": "Carpool Tunnel", 67 | "artist_unicode": "Carpool Tunnel", 68 | "title": "Afterlight", 69 | "title_unicode": "Afterlight", 70 | "creator": "_Epreus", 71 | "creator_id": "7342798", 72 | "bpm": "178", 73 | "source": "", 74 | "tags": "osu! featured artist indie rock squirrelpascals mappers' guild mapper's", 75 | "genre_id": "1", 76 | "language_id": "1", 77 | "favourite_count": "28", 78 | "rating": "8.7033", 79 | "download_unavailable": "0", 80 | "audio_unavailable": "0", 81 | "playcount": "4383", 82 | "passcount": "1701", 83 | "packs": "A54,S833", 84 | "max_combo": "480", 85 | "diff_aim": "1.2248", 86 | "diff_speed": "1.04308", 87 | "difficultyrating": "2.35874" 88 | }, 89 | { 90 | "beatmapset_id": "1001546", 91 | "beatmap_id": "2105913", 92 | "approved": "1", 93 | "total_length": "177", 94 | "hit_length": "167", 95 | "version": "squirrelp's Traffic Jam Dreams", 96 | "file_md5": "d9bbe460a4e0e07684f09da5f1d57982", 97 | "diff_size": "5.4", 98 | "diff_overall": "8", 99 | "diff_approach": "9.3", 100 | "diff_drain": "5", 101 | "mode": "0", 102 | "count_normal": "329", 103 | "count_slider": "279", 104 | "count_spinner": "1", 105 | "submit_date": "2019-07-11 12:43:49", 106 | "approved_date": "2019-10-31 00:43:44", 107 | "last_update": "2019-10-27 12:39:02", 108 | "artist": "Carpool Tunnel", 109 | "artist_unicode": "Carpool Tunnel", 110 | "title": "Afterlight", 111 | "title_unicode": "Afterlight", 112 | "creator": "_Epreus", 113 | "creator_id": "7342798", 114 | "bpm": "178", 115 | "source": "", 116 | "tags": "osu! featured artist indie rock squirrelpascals mappers' guild mapper's", 117 | "genre_id": "1", 118 | "language_id": "1", 119 | "favourite_count": "28", 120 | "rating": "8.7033", 121 | "download_unavailable": "0", 122 | "audio_unavailable": "0", 123 | "playcount": "5220", 124 | "passcount": "711", 125 | "packs": "A54,S833", 126 | "max_combo": "951", 127 | "diff_aim": "2.82876", 128 | "diff_speed": "2.29884", 129 | "difficultyrating": "5.39256" 130 | }, 131 | { 132 | "beatmapset_id": "1001546", 133 | "beatmap_id": "2126008", 134 | "approved": "1", 135 | "total_length": "177", 136 | "hit_length": "141", 137 | "version": "Hard", 138 | "file_md5": "51c4690ed10728ac98618712b79694a7", 139 | "diff_size": "4.5", 140 | "diff_overall": "6", 141 | "diff_approach": "8", 142 | "diff_drain": "4", 143 | "mode": "0", 144 | "count_normal": "131", 145 | "count_slider": "236", 146 | "count_spinner": "2", 147 | "submit_date": "2019-07-11 12:43:49", 148 | "approved_date": "2019-10-31 00:43:44", 149 | "last_update": "2019-10-27 12:39:02", 150 | "artist": "Carpool Tunnel", 151 | "artist_unicode": "Carpool Tunnel", 152 | "title": "Afterlight", 153 | "title_unicode": "Afterlight", 154 | "creator": "_Epreus", 155 | "creator_id": "7342798", 156 | "bpm": "178", 157 | "source": "", 158 | "tags": "osu! featured artist indie rock squirrelpascals mappers' guild mapper's", 159 | "genre_id": "1", 160 | "language_id": "1", 161 | "favourite_count": "28", 162 | "rating": "8.7033", 163 | "download_unavailable": "0", 164 | "audio_unavailable": "0", 165 | "playcount": "7875", 166 | "passcount": "2862", 167 | "packs": "A54,S833", 168 | "max_combo": "651", 169 | "diff_aim": "1.64468", 170 | "diff_speed": "1.57871", 171 | "difficultyrating": "3.25638" 172 | }, 173 | { 174 | "beatmapset_id": "1001546", 175 | "beatmap_id": "2162331", 176 | "approved": "1", 177 | "total_length": "177", 178 | "hit_length": "142", 179 | "version": "Insane", 180 | "file_md5": "91fd2a9bc140432bf32d2946d7d611c4", 181 | "diff_size": "5", 182 | "diff_overall": "7", 183 | "diff_approach": "8", 184 | "diff_drain": "5", 185 | "mode": "0", 186 | "count_normal": "237", 187 | "count_slider": "235", 188 | "count_spinner": "2", 189 | "submit_date": "2019-07-11 12:43:49", 190 | "approved_date": "2019-10-31 00:43:44", 191 | "last_update": "2019-10-27 12:39:02", 192 | "artist": "Carpool Tunnel", 193 | "artist_unicode": "Carpool Tunnel", 194 | "title": "Afterlight", 195 | "title_unicode": "Afterlight", 196 | "creator": "_Epreus", 197 | "creator_id": "7342798", 198 | "bpm": "178", 199 | "source": "", 200 | "tags": "osu! featured artist indie rock squirrelpascals mappers' guild mapper's", 201 | "genre_id": "1", 202 | "language_id": "1", 203 | "favourite_count": "28", 204 | "rating": "8.7033", 205 | "download_unavailable": "0", 206 | "audio_unavailable": "0", 207 | "playcount": "4230", 208 | "passcount": "306", 209 | "packs": "A54,S833", 210 | "max_combo": "735", 211 | "diff_aim": "2.17076", 212 | "diff_speed": "2.19787", 213 | "difficultyrating": "4.38218" 214 | } 215 | ] 216 | """) -------------------------------------------------------------------------------- /aiess/tests/mocks/api/old_beatmap.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | JSON = json.loads(""" 4 | [ 5 | { 6 | "beatmapset_id": "41823", 7 | "beatmap_id": "131891", 8 | "approved": "2", 9 | "total_length": "139", 10 | "hit_length": "112", 11 | "version": "WHO'S AFRAID OF THE BIG BLACK", 12 | "file_md5": "2d687e5ee79f3862ad0c60651471cdcc", 13 | "diff_size": "4", 14 | "diff_overall": "7", 15 | "diff_approach": "10", 16 | "diff_drain": "5", 17 | "mode": "0", 18 | "count_normal": "410", 19 | "count_slider": "334", 20 | "count_spinner": "2", 21 | "submit_date": "2011-12-24 00:34:33", 22 | "approved_date": "2012-02-19 05:51:54", 23 | "last_update": "2012-02-19 05:05:41", 24 | "artist": "The Quick Brown Fox", 25 | "title": "The Big Black", 26 | "creator": "Blue Dragon", 27 | "creator_id": "19048", 28 | "bpm": "360.3", 29 | "source": "", 30 | "tags": "onosakihito speedcore renard lapfox", 31 | "genre_id": "10", 32 | "language_id": "2", 33 | "favourite_count": "4611", 34 | "rating": "9.27135", 35 | "download_unavailable": "0", 36 | "audio_unavailable": "0", 37 | "playcount": "18821004", 38 | "passcount": "2066674", 39 | "max_combo": "1337", 40 | "diff_aim": "3.54972", 41 | "diff_speed": "2.95511", 42 | "difficultyrating": "6.80213" 43 | }, 44 | { 45 | "beatmapset_id": "41823", 46 | "beatmap_id": "132889", 47 | "approved": "2", 48 | "total_length": "140", 49 | "hit_length": "133", 50 | "version": "Ono's Taiko Oni", 51 | "file_md5": "87c7ba3ab20dd56ca52e6c2488de4824", 52 | "diff_size": "5", 53 | "diff_overall": "6", 54 | "diff_approach": "5", 55 | "diff_drain": "6", 56 | "mode": "1", 57 | "count_normal": "947", 58 | "count_slider": "2", 59 | "count_spinner": "0", 60 | "submit_date": "2011-12-24 00:34:33", 61 | "approved_date": "2012-02-19 05:51:54", 62 | "last_update": "2012-02-19 05:05:41", 63 | "artist": "The Quick Brown Fox", 64 | "title": "The Big Black", 65 | "creator": "Blue Dragon", 66 | "creator_id": "19048", 67 | "bpm": "360.3", 68 | "source": "", 69 | "tags": "onosakihito speedcore renard lapfox", 70 | "genre_id": "10", 71 | "language_id": "2", 72 | "favourite_count": "4611", 73 | "rating": "9.27135", 74 | "download_unavailable": "0", 75 | "audio_unavailable": "0", 76 | "playcount": "616270", 77 | "passcount": "66210", 78 | "max_combo": null, 79 | "diff_aim": null, 80 | "diff_speed": null, 81 | "difficultyrating": "4.99009" 82 | } 83 | ] 84 | """) -------------------------------------------------------------------------------- /aiess/tests/test_api.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from aiess.web import api 4 | 5 | def test_cache(): 6 | api.cache.clear() 7 | assert not api.cache 8 | 9 | beatmapset_response = api.request_beatmapset(1) 10 | user_response = api.request_user(2) 11 | assert beatmapset_response 12 | assert user_response 13 | 14 | assert api.cache 15 | assert api.cache["/get_beatmaps?s=1"] 16 | assert api.cache["/get_user?u=2"] 17 | 18 | def test_cache_timing(): 19 | api.cache.clear() 20 | 21 | # Not cached, needs to be retrieved with API rate limit. 22 | time = datetime.utcnow() 23 | api.request_beatmapset(1) 24 | api.request_user(2) 25 | delta_time = datetime.utcnow() - time 26 | 27 | assert delta_time.total_seconds() > 1 28 | 29 | # Cached, can simply be read from a dictionary, should be pretty much instant. 30 | time = datetime.utcnow() 31 | api.request_beatmapset(1) 32 | api.request_user(2) 33 | delta_time = datetime.utcnow() - time 34 | 35 | assert delta_time.total_seconds() < 0.01 36 | 37 | def test_request_beatmapset(): 38 | beatmapset_response = api.request_beatmapset(1) 39 | 40 | assert beatmapset_response[0]["title"] == "DISCO PRINCE" 41 | assert beatmapset_response[0]["artist"] == "Kenji Ninuma" 42 | 43 | def test_request_user(): 44 | user_response = api.request_user(2) 45 | 46 | assert user_response["username"] == "peppy" 47 | 48 | def test_request_user_escaped_name(): 49 | user_response = api.request_user("-Mo- & Ephemeral") 50 | 51 | assert user_response is None -------------------------------------------------------------------------------- /aiess/tests/test_apiv2.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import asyncio 3 | from datetime import datetime 4 | 5 | from aiess.web import apiv2 6 | 7 | def test_request_token(): 8 | token = apiv2._request_token() 9 | 10 | assert token 11 | assert token.bearer 12 | assert token.expires_at > datetime.utcnow() 13 | assert token.access_token 14 | 15 | @pytest.mark.asyncio 16 | async def test_request_token_cached(): 17 | apiv2.cached_token = None 18 | 19 | token_1 = apiv2.get_or_request_token() 20 | token_2 = apiv2.get_or_request_token() 21 | 22 | assert apiv2.cached_token == token_1 23 | assert token_2 == token_1 24 | 25 | def test_request_discussions(): 26 | response = apiv2.request_discussions() 27 | 28 | assert response 29 | assert response["discussions"] 30 | assert response["users"] 31 | 32 | def test_request_discussion_posts(): 33 | response = apiv2.request_discussions() 34 | 35 | assert response 36 | assert response["discussions"] 37 | assert response["users"] -------------------------------------------------------------------------------- /aiess/tests/test_timestamp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from datetime import datetime 4 | from contextlib import suppress 5 | 6 | from aiess import timestamp 7 | 8 | def test_get_missing_created(): 9 | time_id = "test_missing" 10 | expected_path = timestamp.get_path(time_id) 11 | with suppress(OSError): 12 | os.remove(expected_path) 13 | 14 | assert not timestamp.exists(time_id) 15 | assert not os.path.exists(expected_path) 16 | 17 | time_empty = timestamp.get_last(time_id) 18 | time_delta = datetime.utcnow() - time_empty 19 | 20 | assert timestamp.exists(time_id) 21 | assert os.path.exists(expected_path) 22 | os.remove(expected_path) 23 | 24 | assert not timestamp.exists(time_id) 25 | assert time_delta.total_seconds() < 1 26 | 27 | def test_get_set(): 28 | new_time = datetime.utcnow() 29 | assert timestamp.get_last("test") < new_time 30 | 31 | timestamp.set_last(new_time, "test") 32 | assert abs((new_time - timestamp.get_last("test")).total_seconds()) < 1 33 | 34 | def test_exists(): 35 | assert not timestamp.exists("test_missing") 36 | assert timestamp.exists("test") 37 | 38 | def test_from_string(): 39 | result = timestamp.from_string("2020-01-12 05:00:00") 40 | assert result 41 | assert result.year == 2020 42 | assert result.month == 1 43 | assert result.day == 12 44 | assert result.hour == 5 45 | assert result.minute == 0 46 | assert result.second == 0 47 | 48 | def test_from_string_wrong_format(): 49 | with pytest.raises(ValueError) as err: 50 | timestamp.from_string("this isn't a datetime") 51 | 52 | assert "could not parse" in str(err).lower() 53 | 54 | def test_from_string_tz(): 55 | assert timestamp.from_string("2020-01-12T05:00:00+00:00") == timestamp.from_string("2020-01-12 05:00:00") 56 | 57 | def test_from_string_tz_different_timezone(): 58 | assert timestamp.from_string("2020-01-12T05:00:00+02:00") == timestamp.from_string("2020-01-12 07:00:00") 59 | 60 | def test_from_string_tz2(): 61 | assert timestamp.from_string("2020-01-12T05:00:00.000Z") == timestamp.from_string("2020-01-12 05:00:00") -------------------------------------------------------------------------------- /aiess/timestamp.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from contextlib import suppress 3 | import os 4 | import tempfile 5 | 6 | from aiess.settings import ROOT_PATH 7 | 8 | PATH_PREFIX = ROOT_PATH + "time/" 9 | 10 | TIME_FORMAT = "%Y-%m-%d %H:%M:%S" # e.g. "2020-01-12 05:00:00" 11 | TIME_FORMAT_TZ = "%Y-%m-%dT%H:%M:%S%z" # e.g. "2020-01-12T05:00:00+00:00" 12 | TIME_FORMAT_TZ2 = "%Y-%m-%dT%H:%M:%S.%fZ" # e.g. "2020-01-12T05:00:00.302Z" 13 | 14 | FILE_NAME_PREFIX = "last_datetime-" 15 | FILE_NAME_POSTFIX = ".txt" 16 | 17 | def get_last(_id: str=None) -> datetime: 18 | """Returns the last datetime we're done with for this id. If the file doesn't exist, one will be created 19 | with the current time in UTC as datetime.""" 20 | # Only in cases where the file does not already exist do we want to create and initialize one. 21 | # In any other case we should throw an exception if data is missing (e.g. corruption due to power loss), 22 | # to prevent silent failure. 23 | if not os.path.exists(get_dir_path(_id)): 24 | os.makedirs(get_dir_path(_id)) 25 | 26 | path = get_path(_id) 27 | if not exists(_id): 28 | with open(path, "w") as _file: 29 | _file.write(to_string(datetime.utcnow())) 30 | 31 | with open(path, "r") as _file: 32 | last_datetime_text = _file.read() 33 | if not last_datetime_text: 34 | raise ValueError(f"{path} has no contents.") 35 | 36 | # Will only raise an exception if the file already exists, but has an invalid datetime format (e.g. empty). 37 | last_datetime = datetime.strptime(last_datetime_text, TIME_FORMAT) 38 | return last_datetime 39 | 40 | def set_last(new_datetime: datetime, _id: str=None) -> None: 41 | """Sets the last datetime we're done with for this id. Creates the respective file if it does not exist.""" 42 | if not os.path.exists(get_dir_path(_id)): 43 | os.makedirs(get_dir_path(_id)) 44 | 45 | with tempfile.NamedTemporaryFile("w", dir=get_dir_path(_id), delete=False) as temp_file: 46 | temp_file.write(to_string(new_datetime)) 47 | temp_file.flush() 48 | os.fsync(temp_file.fileno()) 49 | 50 | os.replace(temp_file.name, get_path(_id)) 51 | 52 | def exists(_id: str=None) -> bool: 53 | """Returns whether a datetime file for this id exists.""" 54 | return os.path.exists(get_path(_id)) 55 | 56 | def get_path(_id: str=None) -> str: 57 | """Returns the path to the event time file with the given identifier.""" 58 | global PATH_PREFIX, FILE_NAME_PREFIX, FILE_NAME_POSTFIX 59 | return f"{PATH_PREFIX}{FILE_NAME_PREFIX}{_id}{FILE_NAME_POSTFIX}" 60 | 61 | def get_dir_path(_id: str=None) -> str: 62 | """Returns the path to the event time file with the given identifier.""" 63 | global PATH_PREFIX 64 | return f"{PATH_PREFIX}" 65 | 66 | def to_string(_datetime: datetime) -> str: 67 | """Returns the ISO 8601 format (except timezone and microsecond values) of the given datetime.""" 68 | return _datetime.strftime(TIME_FORMAT) 69 | 70 | def from_string(string: str) -> datetime: 71 | """Returns the datetime of the given ISO 8601 formatted string (except timezone and microsecond 72 | values), otherwise raises ValueError (e.g. wrong format).""" 73 | time = None 74 | with suppress(ValueError): time = datetime.strptime(string, TIME_FORMAT) 75 | with suppress(ValueError): time = datetime.strptime(string, TIME_FORMAT_TZ) 76 | with suppress(ValueError): time = datetime.strptime(string, TIME_FORMAT_TZ2) 77 | 78 | if not time: 79 | raise ValueError(f"Could not parse \"{string}\" as an ISO-8061 formatted datetime.") 80 | 81 | if not time.tzinfo: 82 | return time 83 | 84 | return time.replace(tzinfo=None) + time.tzinfo.utcoffset(time) 85 | 86 | # Any other case (e.g. time being None or not matching the format). 87 | raise ValueError(f"The given string, {string}, did not match the format \"{TIME_FORMAT}\".") -------------------------------------------------------------------------------- /aiess/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/aiess/web/__init__.py -------------------------------------------------------------------------------- /aiess/web/api.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | 4 | from urllib.parse import quote 5 | 6 | from aiess.web.ratelimiter import request_with_rate_limit 7 | from aiess.settings import API_KEY, API_RATE_LIMIT 8 | 9 | MODES = { 10 | "0": "osu", 11 | "1": "taiko", 12 | "2": "catch", 13 | "3": "mania" 14 | } 15 | GENRES = { 16 | "0": "Any", 17 | "1": "Unspecified", 18 | "2": "Video Game", 19 | "3": "Anime", 20 | "4": "Rock", 21 | "5": "Pop", 22 | "6": "Other", 23 | "7": "Novelty", 24 | # There is apparently no 8. 25 | "9": "Hip Hop", 26 | "10": "Electronic", 27 | "11": "Metal", # API docs excluded 11, 12, and 14, so "Metal", "Classical", and "Jazz" are guesses. 28 | "12": "Classical", 29 | "13": "Folk", 30 | "14": "Jazz" 31 | } 32 | LANGUAGES = { 33 | "0": "Any", 34 | "1": "Unspecified", # API docs claim this is "Other", but it is actually "Unspecified", "Other" is id 14 instead. 35 | "2": "English", 36 | "3": "Japanese", 37 | "4": "Chinese", 38 | "5": "Instrumental", 39 | "6": "Korean", 40 | "7": "French", 41 | "8": "German", 42 | "9": "Swedish", 43 | "10": "Spanish", 44 | "11": "Italian", 45 | "12": "Russian", 46 | "13": "Polish", 47 | "14": "Other" 48 | } 49 | 50 | cache = {} 51 | def request_api(request_type: str, query: str) -> object: 52 | """Requests a json object from the v1 osu!api, where the api key is supplied.""" 53 | request = f"https://osu.ppy.sh/api/{request_type}?{query}&k={API_KEY}" 54 | 55 | cache_line = f"/{request_type}?{query}" 56 | if cache_line in cache: 57 | return cache[cache_line] 58 | 59 | response = request_with_rate_limit(request, API_RATE_LIMIT, "api", timeout = 10) 60 | try: 61 | json_response = json.loads(response.text) 62 | if "error" in json_response: 63 | # e.g. "Please provide a valid API key." if it's incorrect. 64 | error_str = json_response["error"] 65 | raise ValueError(f"The osu! api responded with an error \"{error_str}\"") 66 | 67 | cache[cache_line] = json_response 68 | return json_response 69 | except json.decoder.JSONDecodeError: 70 | # The response text is empty (e.g. "[]"). 71 | return None 72 | 73 | def request_beatmapset(beatmapset_id: str) -> object: 74 | """Requests a json object of the given beatmapset id. 75 | Caches any response gotten until cleared manually.""" 76 | beatmapset_json = request_api("get_beatmaps", f"s={quote(str(beatmapset_id))}") 77 | return beatmapset_json 78 | 79 | def request_user(user_id: str) -> object: 80 | """Requests a json object of the given user id. 81 | Caches any response gotten until cleared manually.""" 82 | user_json = request_api("get_user", f"u={quote(str(user_id))}") 83 | if len(user_json) > 0: 84 | return user_json[0] 85 | # The user is restricted. 86 | return None -------------------------------------------------------------------------------- /aiess/web/apiv2.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime, timedelta 3 | 4 | from aiess.web.ratelimiter import request_with_rate_limit 5 | from aiess.settings import API_RATE_LIMIT, APIV2_CLIENT_ID, APIV2_CLIENT_SECRET 6 | 7 | cached_token = None 8 | 9 | class Token(): 10 | def __init__(self, bearer: str, expires_in: int, access_token: str): 11 | self.bearer = bearer 12 | self.expires_at = datetime.utcnow() + timedelta(seconds=expires_in) 13 | self.access_token = access_token 14 | 15 | def alive(self): 16 | return datetime.utcnow() < self.expires_at 17 | 18 | def _request_token(): 19 | response = request_with_rate_limit( 20 | request_url = "https://osu.ppy.sh/oauth/token", 21 | rate_limit = API_RATE_LIMIT, 22 | rate_limit_id = "api", 23 | timeout = 10, 24 | method = "POST", 25 | data = { 26 | "client_id": APIV2_CLIENT_ID, 27 | "client_secret": APIV2_CLIENT_SECRET, 28 | "grant_type": "client_credentials", 29 | "scope": "public" 30 | } 31 | ) 32 | 33 | json_obj = json.loads(response.text) 34 | 35 | bearer = json_obj["token_type"] 36 | expires_in = json_obj["expires_in"] 37 | access_token = json_obj["access_token"] 38 | 39 | token = Token(bearer, expires_in, access_token) 40 | global cached_token 41 | cached_token = token 42 | 43 | return token 44 | 45 | def get_or_request_token(): 46 | if cached_token and cached_token.alive(): 47 | return cached_token 48 | else: 49 | return _request_token() 50 | 51 | def request_api(route: str, query: str=None) -> object: 52 | """Requests a json object from the v1 osu!api, where the api key is supplied.""" 53 | route = route.strip("/") 54 | token = get_or_request_token() 55 | response = request_with_rate_limit( 56 | request_url = f"https://osu.ppy.sh/api/v2/{route}" + (f"?{query}" if query else ""), 57 | rate_limit = API_RATE_LIMIT, 58 | rate_limit_id = "api", 59 | timeout = 10, 60 | headers = { "Authorization": f"Bearer {token.access_token}" } 61 | ) 62 | 63 | return json.loads(response.text) 64 | 65 | def request_discussions(page: int=1, message_types: str=None, limit: int=None) -> object: 66 | """Requests a json object representing discussions on the given page, by default page 1.""" 67 | # Response is of from: 68 | # {"beatmaps":[],"cursor":null,"discussions":[],"included_discussions":[],"reviews_config":{"max_blocks":100},"users":[]} 69 | return request_api( 70 | route = "/beatmapsets/discussions", 71 | query = f"page={page}" + (f"&message_types[]={message_types}" if message_types else "") + (f"&limit={limit}" if limit else "") 72 | ) 73 | 74 | def request_discussion_posts(page: int=1, limit: int=None) -> object: 75 | """Requests a json object representing discussion replies on the given page, by default page 1.""" 76 | # Response is of form: 77 | # {"beatmaps":[],"discussions":[],"cursor":null,"posts":[],"users":[]} 78 | return request_api( 79 | route = "/beatmapsets/discussions/posts", 80 | query = f"page={page}" + (f"&limit={limit}" if limit else "") 81 | ) 82 | 83 | def request_news(cursor_id: int, cursor_published_at: str, limit: int=None) -> object: 84 | return request_api( 85 | route = "/news", 86 | query = f"cursor[id]={cursor_id}&cursor[published_at]={cursor_published_at}" + (f"&limit={limit}" if limit else "") 87 | ) -------------------------------------------------------------------------------- /aiess/web/ratelimiter.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from requests import Response 3 | from typing import Dict 4 | from time import sleep 5 | from datetime import datetime, timedelta 6 | from collections import defaultdict 7 | import asyncio 8 | 9 | from aiess.logger import log_err, log 10 | 11 | next_request_time: Dict[str, datetime] = defaultdict(datetime.utcnow) 12 | failed_attempts: Dict[str, datetime] = defaultdict(int) 13 | 14 | async def async_call_with_rate_limit(awaited_result_func, is_result_invalid, rate_limit: float, rate_limit_id: str=None, sleep_if_ratelimited: bool=True) -> Response: 15 | """Calls `result_func` at most once every `rate_limit` seconds for the same `rate_limit_id` (default None). 16 | If given an invalid result, backs off exponentially; waiting longer and longer between attempts.""" 17 | global next_request_time 18 | 19 | result = None 20 | while is_result_invalid(result): 21 | request_time = next_request_time[rate_limit_id] 22 | if request_time and request_time > datetime.now(): 23 | if not sleep_if_ratelimited: 24 | return None 25 | sleep_seconds = (request_time - datetime.now()).total_seconds() 26 | await asyncio.sleep(sleep_seconds) 27 | 28 | result = await awaited_result_func() 29 | next_request_time[rate_limit_id] = datetime.now() + timedelta(seconds=rate_limit) 30 | 31 | if is_result_invalid(result): 32 | back_off(rate_limit_id) 33 | 34 | if rate_limit_id in failed_attempts: 35 | failed_attempts[rate_limit_id] = 0 36 | 37 | return result 38 | 39 | def call_with_rate_limit(result_func, is_result_invalid, rate_limit: float, rate_limit_id: str=None, sleep_if_ratelimited: bool=True) -> Response: 40 | """Calls `result_func` at most once every `rate_limit` seconds for the same `rate_limit_id` (default None). 41 | If given an invalid result, backs off exponentially; waiting longer and longer between attempts.""" 42 | global next_request_time 43 | 44 | result = None 45 | while is_result_invalid(result): 46 | request_time = next_request_time[rate_limit_id] 47 | if request_time and request_time > datetime.now(): 48 | if not sleep_if_ratelimited: 49 | return None 50 | sleep((request_time - datetime.now()).total_seconds()) 51 | 52 | result = result_func() 53 | next_request_time[rate_limit_id] = datetime.now() + timedelta(seconds=rate_limit) 54 | 55 | if is_result_invalid(result): 56 | back_off(rate_limit_id) 57 | 58 | if rate_limit_id in failed_attempts: 59 | failed_attempts[rate_limit_id] = 0 60 | 61 | return result 62 | 63 | def request_with_rate_limit(request_url: str, rate_limit: float, rate_limit_id: str=None, method: str="GET", sleep_if_ratelimited: bool=True, **kwargs) -> Response: 64 | """Requests a response object at most once every rate_limit seconds for the same rate_limit_id (default None). 65 | Additional keyword arguments are given to the request function (e.g. headers, timeout, etc).""" 66 | return call_with_rate_limit( 67 | result_func = lambda: try_request(request_url, method=method, **kwargs), 68 | is_result_invalid = invalid_response, 69 | rate_limit = rate_limit, 70 | rate_limit_id = rate_limit_id, 71 | sleep_if_ratelimited = sleep_if_ratelimited 72 | ) 73 | 74 | def try_request(request_url: str, method: str="GET", **kwargs) -> Response: 75 | """Requests a response object and returns it if successful, otherwise None is returned. 76 | If the website is in cloudflare IUAM mode, we also return None.""" 77 | response = None 78 | 79 | log(f"GET {request_url}", postfix="requests") 80 | 81 | try: 82 | response = requests.request(method, request_url, **kwargs) 83 | except requests.exceptions.ConnectionError: 84 | log_err(f"WARNING | ConnectionError was raised on GET \"{request_url}\"") 85 | return None 86 | except requests.exceptions.ReadTimeout: 87 | log_err(f"WARNING | ReadTimeout was raised on GET \"{request_url}\"") 88 | return None 89 | 90 | if "Just a moment..." in response.text: 91 | log_err("WARNING | CloudFlare IUAM is active") 92 | return None 93 | 94 | log(f"RECEIVED {response.status_code}: {response.reason}", postfix="requests") 95 | 96 | return response 97 | 98 | def invalid_response(response: Response) -> bool: 99 | # `try_request` will return None in case of ConnectionErrors or IUAM. 100 | # In these cases we back off and wait until it's over. 101 | return response is None or str(response.status_code).startswith('5') 102 | 103 | def back_off(rate_limit_id: str=None) -> None: 104 | """Postpones the next request for 30 -> 60 -> 120 -> 240 seconds for 1, 2, 3, and 4+ tries respectively. 105 | This way we give the website some room to breathe if there are already many incoming connections causing this.""" 106 | global failed_attempts 107 | if failed_attempts[rate_limit_id] < 4: 108 | failed_attempts[rate_limit_id] += 1 109 | 110 | next_request_time[rate_limit_id] += timedelta(seconds = 30 * 2**(failed_attempts[rate_limit_id] - 1)) -------------------------------------------------------------------------------- /batch/all.bat: -------------------------------------------------------------------------------- 1 | start "" "%~dp0\bnplanner.bat" 2 | start "" "%~dp0\bnsite.bat" 3 | start "" "%~dp0\bot.bat" 4 | start "" "%~dp0\scraper.bat" -------------------------------------------------------------------------------- /batch/bnplanner.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd .. 3 | setlocal 4 | FOR /F "tokens=*" %%i in ('type .env') do SET %%i 5 | python -m bnplanner.main 6 | endlocal 7 | pause -------------------------------------------------------------------------------- /batch/bnsite.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd .. 3 | setlocal 4 | FOR /F "tokens=*" %%i in ('type .env') do SET %%i 5 | python -m bnsite.main 6 | endlocal 7 | pause -------------------------------------------------------------------------------- /batch/bot.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd .. 3 | setlocal 4 | FOR /F "tokens=*" %%i in ('type .env') do SET %%i 5 | python -m bot.main 6 | endlocal 7 | pause -------------------------------------------------------------------------------- /batch/scraper.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd .. 3 | setlocal 4 | FOR /F "tokens=*" %%i in ('type .env') do SET %%i 5 | python -m scraper.main 6 | endlocal 7 | pause -------------------------------------------------------------------------------- /bnplanner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/bnplanner/__init__.py -------------------------------------------------------------------------------- /bnplanner/interface.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import requests 5 | from datetime import datetime 6 | 7 | from aiess import Event 8 | from aiess import event_types as types 9 | from aiess.settings import BNPLANNER_HEADERS 10 | 11 | class StatusDocument(): 12 | def __init__(self, event): 13 | self.time = (event.time - datetime(1970,1,1)).total_seconds() 14 | self.beatmapSetId = event.beatmapset.id 15 | self.artist = event.beatmapset.artist 16 | self.title = event.beatmapset.title 17 | self.creatorId = event.beatmapset.creator.id 18 | self.creatorName = event.beatmapset.creator.name 19 | self.userId = event.user.id if event.user else None 20 | self.userName = event.user.name if event.user else None 21 | self.status = status_by_event_type[event.type] 22 | self.modes = list(map(to_api_mode, event.beatmapset.modes)) 23 | 24 | class GroupChangeDocument(): 25 | def __init__(self, event): 26 | self.time = (event.time - datetime(1970,1,1)).total_seconds() 27 | self.type = event.type # "add" / "remove" 28 | self.userId = event.user.id if event.user else None 29 | self.userName = event.user.name if event.user else None 30 | self.groupId = event.group.id 31 | self.groupMode = to_api_mode(event.group.mode) 32 | 33 | status_by_event_type = { 34 | types.QUALIFY: "Qualified", 35 | types.NOMINATE: "Nominated", 36 | types.DISQUALIFY: "Disqualified", 37 | types.RESET: "Reset", 38 | types.RANK: "Ranked" 39 | } 40 | 41 | # Greaper wants the mode names the api uses (`catch` -> `fruits`). 42 | def to_api_mode(mode: str): 43 | return mode if mode != "catch" else "fruits" 44 | 45 | def insert_event(event: Event) -> None: 46 | """Sends a POST request with this event as a json, outlined in `Document`.""" 47 | is_group_change = event.type in ["add", "remove"] 48 | if is_group_change: 49 | url = "https://bnplannerbackend.greaper.net/v2/aiess/event/user" 50 | document = GroupChangeDocument(event) 51 | else: 52 | url = "https://bnplannerbackend.greaper.net/v2/aiess/event/beatmap" 53 | document = StatusDocument(event) 54 | 55 | response = requests.post(url, json=vars(document), headers=BNPLANNER_HEADERS) 56 | # Response codes are the usual ones. 57 | code = str(response.status_code) 58 | if not code.startswith("2"): 59 | # 2XX indicates success. If we don't succeed, we should raise an error. 60 | raise ValueError(f"Received {response.text}") -------------------------------------------------------------------------------- /bnplanner/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import asyncio 5 | 6 | import aiess 7 | from aiess import Event 8 | from aiess.reader import Reader 9 | from aiess.database import SCRAPER_DB_NAME 10 | from aiess import logger 11 | from aiess import event_types as types 12 | 13 | from bnplanner import interface 14 | 15 | EXPECTED_TYPES = [ 16 | types.NOMINATE, 17 | types.QUALIFY, 18 | types.RESET, 19 | types.DISQUALIFY, 20 | types.RANK 21 | ] 22 | 23 | class Reader(aiess.Reader): 24 | async def on_event(self, event: Event): 25 | if event.type in EXPECTED_TYPES: #and "catch" in event.beatmapset.modes: 26 | logger.log(event, postfix=self.reader_id) 27 | interface.insert_event(event) 28 | 29 | logger.init() 30 | reader = Reader("bnplanner", db_name=SCRAPER_DB_NAME) 31 | loop = asyncio.get_event_loop() 32 | loop.run_until_complete(reader.run()) -------------------------------------------------------------------------------- /bnsite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/bnsite/__init__.py -------------------------------------------------------------------------------- /bnsite/api.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import json 5 | from typing import Tuple, Generator 6 | from datetime import datetime 7 | 8 | from aiess.web.ratelimiter import request_with_rate_limit 9 | from aiess.settings import BNSITE_RATE_LIMIT, BNSITE_HEADERS 10 | from aiess import timestamp 11 | 12 | cache = {} 13 | def request(route: str, query: str, allow_cache: bool=True) -> object: 14 | """Requests the page from the given route and query. 15 | Caches any response such that requesting the same discussion id yields the same result.""" 16 | request_url = f"https://bn.mappersguild.com/interOp/{route}/{query}" 17 | if request_url in cache and cache[request_url] and allow_cache: 18 | return cache[request_url] 19 | 20 | response = request_with_rate_limit( 21 | request_url = request_url, 22 | rate_limit = BNSITE_RATE_LIMIT, 23 | rate_limit_id = "bnsite", 24 | headers = BNSITE_HEADERS, 25 | timeout = 10 26 | ) 27 | try: 28 | result = json.loads(response.text) 29 | except json.decoder.JSONDecodeError: 30 | # This happens whenever the response text is empty (e.g. "[]"). 31 | result = None 32 | 33 | cache[request_url] = result 34 | return result 35 | 36 | def request_last_eval(user_id: int) -> object: 37 | """Returns the last updated evaluation associated with a user given their user id. Caches results.""" 38 | return request("latestEvaluation", query=user_id) 39 | 40 | def request_user_info(user_id: int) -> str: 41 | """Returns the data associated with a user given their user id. Caches results.""" 42 | return request("users", query=user_id) 43 | 44 | def request_dq_info(discussion_id: int) -> object: 45 | """Returns the disqualification info (SEV, QAH checkers, etc) associated with the given 46 | discussion id. Caches results.""" 47 | return request("dqInfoByDiscussionId", query=discussion_id) 48 | 49 | def request_obv_sev(discussion_id: int) -> Tuple[int, int]: 50 | """Returns a tuple of the obviousness (0-2) and severity (0-3) ratings from the disqualification 51 | associated with the given discussion id, if any, otherwise a (None, None) tuple. Caches results.""" 52 | dq_info_json = request_dq_info(discussion_id) 53 | if not dq_info_json: 54 | return (None, None) 55 | 56 | return (dq_info_json["obviousness"], dq_info_json["severity"]) 57 | 58 | def request_discussion_sev(since: datetime) -> Generator[Tuple[int, int, int, datetime], None, None]: 59 | """Returns a list of tuples representing the SEV for a reset `(discussion_id, obv, sev, time)`, since 60 | the given time. If either the severity or obviousness was unchanged, they will be returned as None. 61 | If the severity or obviousness were unset, they will be returned as -1.""" 62 | sev_logs = request("eventsByDate", query=timestamp.to_string(since), allow_cache=False) 63 | discussion_ids = [] 64 | discussion_obv = {} 65 | discussion_sev = {} 66 | discussion_time = {} 67 | for sev_log in sev_logs: 68 | if "Updated DQ reason" in sev_log["action"] or "Toggled review status" in sev_log["action"]: 69 | # May find, e.g. "DQ reason updated to \"xyz\"" or "Toggled review status of s/1457453 to false", which isn't what we're looking for. 70 | continue 71 | 72 | discussion_id = sev_log["relatedId"]["discussionId"] 73 | if discussion_id not in discussion_ids: 74 | discussion_ids.append(discussion_id) 75 | 76 | # E.g. "Updated severity of s/1207984 to "0"" 77 | amount_str = sev_log["action"].split(" ")[-1].strip("\"") 78 | if amount_str == "null": 79 | amount = -1 80 | else: 81 | try: 82 | amount = int(amount_str) 83 | except ValueError: 84 | raise ValueError(f"Could not parse \"{amount_str}\" as int. The entire action is \"{sev_log['action']}\".") 85 | 86 | if "obviousness" in sev_log["action"]: discussion_obv[discussion_id] = amount 87 | else: discussion_sev[discussion_id] = amount 88 | 89 | time = timestamp.from_string(sev_log["updatedAt"]) 90 | if discussion_id not in discussion_time or time < discussion_time[discussion_id]: 91 | discussion_time[discussion_id] = time 92 | 93 | # Much more intuitive to receive these in bulk rather than obv/sev separately. 94 | for discussion_id in discussion_ids: 95 | obv = discussion_obv[discussion_id] if discussion_id in discussion_obv else None 96 | sev = discussion_sev[discussion_id] if discussion_id in discussion_sev else None 97 | time = discussion_time[discussion_id] 98 | yield (discussion_id, obv, sev, time) -------------------------------------------------------------------------------- /bnsite/converter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from aiess import Event 5 | from aiess import timestamp 6 | from aiess.database import Database, SCRAPER_DB_NAME 7 | 8 | database = Database(SCRAPER_DB_NAME) 9 | 10 | def convert_to_event(json: object) -> Event: 11 | beatmapset_id = float(json["beatmapsetId"]) if "beatmapsetId" in json and json["beatmapsetId"] else None 12 | discussion_id = float(json["discussionId"]) if "discussionId" in json and json["discussionId"] else None 13 | user_id = float(json["userId"]) if "userId" in json and json["userId"] else None 14 | 15 | _type = json["type"] 16 | time = timestamp.from_string(json["timestamp"]) 17 | beatmapset = database.retrieve_beatmapset("id=%s", (beatmapset_id,)) if beatmapset_id else None 18 | discussion = database.retrieve_discussion("id=%s", (discussion_id,)) if discussion_id else None 19 | user = database.retrieve_user("id=%s", (user_id,)) if user_id else None 20 | content = json["content"] 21 | 22 | return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) -------------------------------------------------------------------------------- /bnsite/interface.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pymongo.common import SERVER_SELECTION_TIMEOUT 4 | sys.path.append('..') 5 | 6 | from aiess import Event, Beatmap 7 | from aiess.settings import BNSITE_MONGODB_URI 8 | from aiess import event_types as types 9 | 10 | from pymongo import MongoClient 11 | 12 | class DocumentBeatmap(): 13 | def __init__(self, beatmap: Beatmap): 14 | self.drain = beatmap.draintime 15 | self.starRating = beatmap.sr_total 16 | self.userRating = beatmap.userrating 17 | 18 | class Document(): 19 | def __init__(self, event: Event): 20 | self.type = event.type 21 | self.timestamp = event.time 22 | self.beatmapsetId = event.beatmapset.id 23 | self.creatorId = event.beatmapset.creator.id 24 | self.creatorName = event.beatmapset.creator.name 25 | self.modes = event.beatmapset.modes 26 | self.discussionId = event.discussion.id if event.discussion else None 27 | self.userId = event.user.id if event.user else None 28 | self.artistTitle = f"{event.beatmapset.artist} - {event.beatmapset.title}" 29 | self.content = event.content 30 | self.genre = event.beatmapset.genre 31 | self.language = event.beatmapset.language 32 | self.beatmaps = list(map(lambda beatmap: vars(DocumentBeatmap(beatmap)), event.beatmapset.beatmaps)) 33 | 34 | def insert_event(event: Event) -> None: 35 | """Creates a connection to the MongoDB server, inserts the event as a 36 | custom document, then immediately closes the connection.""" 37 | client = MongoClient(BNSITE_MONGODB_URI, retryWrites=False, serverSelectionTimeoutMS=120000) 38 | client.qat_db.aiess.insert_one(vars(Document(event))) 39 | client.close() -------------------------------------------------------------------------------- /bnsite/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import asyncio 5 | 6 | import aiess 7 | from aiess import Event 8 | from aiess.reader import Reader 9 | from aiess.database import SCRAPER_DB_NAME 10 | from aiess import logger 11 | from aiess import event_types as types 12 | 13 | from bnsite import interface 14 | 15 | EXPECTED_TYPES = [ 16 | types.NOMINATE, 17 | types.QUALIFY, 18 | types.RESET, 19 | types.DISQUALIFY, 20 | types.RANK, 21 | types.LOVE 22 | ] 23 | 24 | class Reader(aiess.Reader): 25 | async def on_event(self, event: Event): 26 | if event.type in EXPECTED_TYPES and (not event.user or event.user.id != 3): 27 | logger.log(event, postfix=self.reader_id) 28 | interface.insert_event(event) 29 | 30 | logger.init() 31 | reader = Reader("bnsite", db_name=SCRAPER_DB_NAME) 32 | loop = asyncio.get_event_loop() 33 | loop.run_until_complete(reader.run()) -------------------------------------------------------------------------------- /bnsite/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/bnsite/tests/__init__.py -------------------------------------------------------------------------------- /bnsite/tests/test_api.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from datetime import datetime, timedelta 5 | 6 | from aiess.timestamp import from_string 7 | from bnsite import api 8 | 9 | def setup_function(): 10 | api.cache.clear() 11 | 12 | def test_request(): 13 | # https://osu.ppy.sh/beatmapsets/1179039/discussion#/1755074 14 | json = api.request(route="dqInfoByDiscussionId", query=1755074) 15 | assert json 16 | assert json["obviousness"] == 0 17 | assert json["severity"] == 0 18 | 19 | def test_request_cached(): 20 | # Not cached, needs to be retrieved with bnsite API rate limit. 21 | time = datetime.utcnow() 22 | json1 = api.request(route="dqInfoByDiscussionId", query=1755074) 23 | json2 = api.request(route="dqInfoByDiscussionId", query=1824796) 24 | delta_time = datetime.utcnow() - time 25 | 26 | assert delta_time.total_seconds() > 1 27 | 28 | # Cached, should be pretty much instant. 29 | time = datetime.utcnow() 30 | json1_cached = api.request(route="dqInfoByDiscussionId", query=1755074) 31 | json2_cached = api.request(route="dqInfoByDiscussionId", query=1824796) 32 | delta_time = datetime.utcnow() - time 33 | 34 | assert delta_time.total_seconds() < 0.1 35 | assert json1 == json1_cached 36 | assert json2 == json2_cached 37 | 38 | def test_request_last_eval_kick(): 39 | # https://osu.ppy.sh/users/5875419 40 | json = api.request_last_eval(user_id=5875419) 41 | assert json 42 | assert json["consensus"] == "removeFromBn" 43 | assert from_string(json["updatedAt"]) 44 | 45 | def test_request_last_eval_resign(): 46 | # https://osu.ppy.sh/users/1263669 47 | json = api.request_last_eval(user_id=8140944) 48 | assert json 49 | assert json["kind"] == "resignation" 50 | assert from_string(json["updatedAt"]) 51 | 52 | def test_request_last_eval_missing(): 53 | json = api.request_last_eval(user_id=4) 54 | assert not json 55 | 56 | def test_request_user_info(): 57 | # https://osu.ppy.sh/users/8129817 58 | json = api.request_user_info(user_id=8129817) 59 | assert json 60 | assert json["username"] == "Naxess" 61 | assert json["modes"] == ["osu"] 62 | 63 | def test_request_user_info_missing(): 64 | json = api.request_user_info(user_id=4) 65 | assert not json 66 | 67 | def test_request_dq_info(): 68 | # https://osu.ppy.sh/beatmapsets/1179039/discussion#/1755074 69 | json = api.request_dq_info(discussion_id=1755074) 70 | assert json 71 | assert json["obviousness"] == 0 72 | assert json["severity"] == 0 73 | 74 | def test_request_dq_info_missing(): 75 | json = api.request_dq_info(discussion_id=4) 76 | assert not json 77 | 78 | def test_request_sev_events(): 79 | tuples = api.request_discussion_sev(datetime.utcnow() - timedelta(days=7)) 80 | assert tuples 81 | amount = 0 82 | for discussion_id, obv, sev, time in tuples: 83 | assert discussion_id is not None 84 | assert obv is not None or sev is not None 85 | assert time is not None 86 | amount += 1 87 | 88 | assert amount > 10 -------------------------------------------------------------------------------- /bnsite/tests/test_interface.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | 6 | from aiess import Event, Beatmapset, Discussion, User 7 | from aiess.timestamp import from_string 8 | from aiess import event_types as types 9 | 10 | from bnsite.interface import Document 11 | 12 | @pytest.fixture 13 | def dq_event(): 14 | disqualifier = User(1, "someone") 15 | creator = User(2, "sometwo") 16 | beatmapset = Beatmapset(_id=4, artist="artist", title="title", creator=creator, modes=["osu", "catch"], allow_api=False) 17 | discussion = Discussion(_id=3, beatmapset=beatmapset, user=disqualifier, content="dqed") 18 | return Event( 19 | _type = types.DISQUALIFY, 20 | time = from_string("2020-01-01 03:00:00"), 21 | beatmapset = beatmapset, 22 | discussion = discussion, 23 | user = disqualifier, 24 | content = "dqed" 25 | ) 26 | 27 | def test_document(dq_event): 28 | document = Document(dq_event) 29 | 30 | assert document.type == "disqualify" 31 | assert document.timestamp == from_string("2020-01-01 03:00:00") 32 | assert document.beatmapsetId == 4 33 | assert document.creatorId == 2 34 | assert document.creatorName == "sometwo" 35 | assert document.modes == ["osu", "catch"] 36 | assert document.discussionId == 3 37 | assert document.userId == 1 38 | assert document.artistTitle == "artist - title" 39 | assert document.content == "dqed" -------------------------------------------------------------------------------- /bot/.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=./ -------------------------------------------------------------------------------- /bot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/bot/__init__.py -------------------------------------------------------------------------------- /bot/activity.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import asyncio 5 | from datetime import datetime, timedelta 6 | 7 | from discord import Client, Activity, Game, Status 8 | 9 | from aiess.logger import log_err 10 | from aiess import Reader 11 | 12 | from bot.formatter import format_time 13 | 14 | async def loop(client: Client, reader: Reader) -> None: 15 | """Updates the client activity ("Playing" indicator), and status (Online indicator), every minute.""" 16 | try: 17 | while True: 18 | await client.change_presence( 19 | activity = get_activity(client, reader), 20 | status = get_status(client, reader) 21 | ) 22 | # Presence updates are ratelimited at 1 update / 15s. 23 | await asyncio.sleep(60) 24 | except Exception as ex: 25 | log_err(f"WARNING | Discord presence raised \"{ex}\"") 26 | 27 | def get_activity(client: Client, reader: Reader) -> Activity: 28 | """Returns the "Playing +help | x servers" indicator for the bot.""" 29 | if not client.is_ready(): 30 | return Game(f"/subscribe | Starting...") 31 | 32 | time_since_event = (datetime.utcnow() - reader.latest_event_time) if reader.latest_event_time else None 33 | guild_n = len(client.guilds) 34 | return Game( 35 | f"/subscribe" + 36 | f" | {guild_n} server" + ("s" if guild_n != 1 else "") + 37 | (f" | {format_time(time_since_event, max_units=1, long=True)} delay" 38 | if time_since_event and time_since_event > timedelta(minutes=30) 39 | else "") 40 | ) 41 | 42 | def get_status(client: Client, reader: Reader) -> Status: 43 | """Returs online / idle / do not disturb, depending on how long ago the last event batch was.""" 44 | if not client.is_ready(): 45 | return Status.do_not_disturb 46 | 47 | time_since_event = (datetime.utcnow() - reader.latest_event_time) if reader.latest_event_time else None 48 | if (time_since_event is None or 49 | time_since_event > timedelta(hours=2)): return Status.do_not_disturb 50 | elif time_since_event > timedelta(minutes=30): return Status.idle 51 | else: return Status.online -------------------------------------------------------------------------------- /bot/cmd_modules_deprecated/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/bot/cmd_modules_deprecated/__init__.py -------------------------------------------------------------------------------- /bot/cmdcommon.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from discord import Embed 5 | 6 | from bot.filterer import expand, get_invalid_gates, get_invalid_keys, get_invalid_filters, get_invalid_words, get_missing_gate 7 | from bot.filterer import FilterContext 8 | from bot.formatter import format_dotted_list 9 | 10 | async def validate_filter(ctx, _filter: str, filter_context: FilterContext) -> bool: 11 | """Returns whether the filter was considered valid. If invalid, an appropriate response is sent 12 | where the command was called. Requires a filter context to determine filter validity.""" 13 | try: 14 | expansion = expand(_filter) 15 | except ValueError as err: 16 | # E.g. parenthesis inequality. 17 | await ctx.respond(f"{str(err)}", embed=filters_embed(filter_context), ephemeral=True) 18 | return False 19 | 20 | invalid_gates = set(get_invalid_gates(_filter)) 21 | if invalid_gates: 22 | invalids_formatted = "`" + "`, `".join(invalid_gates) + "`" 23 | await ctx.respond(f"✗ Invalid positioning of gate(s) {invalids_formatted} in expansion `{expansion}`.", embed=filters_embed(filter_context), ephemeral=True) 24 | return False 25 | 26 | invalid_keys = set(get_invalid_keys(_filter, filter_context)) 27 | if invalid_keys: 28 | invalids_formatted = "`" + "`, `".join(invalid_keys) + "`" 29 | await ctx.respond(f"✗ Invalid key(s) {invalids_formatted} in expansion `{expansion}`.", embed=filters_embed(filter_context), ephemeral=True) 30 | return False 31 | 32 | invalid_filters = set(get_invalid_filters(_filter, filter_context)) 33 | if invalid_filters: 34 | keys = [] 35 | invalids_strs = [] 36 | for key, value in invalid_filters: 37 | invalids_strs.append(f"{key}:{value}") 38 | keys.append(key) 39 | invalids_formatted = "`" + "`, `".join(invalids_strs) + "`" 40 | # `keys` will have at least one element, else `invalid_filters` would be falsy. 41 | await ctx.respond(f"✗ Invalid value(s) for key(s) {invalids_formatted} in expansion `{expansion}`.", embed=filter_embed(keys[0], filter_context), ephemeral=True) 42 | return False 43 | 44 | invalid_words = set(get_invalid_words(_filter)) 45 | if invalid_words: 46 | invalids_formatted = "`" + "`, `".join(invalid_words) + "`" 47 | await ctx.respond(f"✗ Invalid word(s) {invalids_formatted} in expansion `{expansion}`.", embed=filters_embed(filter_context), ephemeral=True) 48 | return False 49 | 50 | parts = get_missing_gate(_filter) 51 | if parts: 52 | left_part, right_part = parts 53 | await ctx.respond(f"✗ Missing gate between `{left_part}` and `{right_part}` in expansion `{expansion}`.", embed=filters_embed(filter_context), ephemeral=True) 54 | return False 55 | 56 | if not hasattr(ctx.channel, "guild"): 57 | # Prevents excessive discord rate limiting (5 DMs per second globally). 58 | await ctx.respond("✗ Cannot subscribe in DM channels.", ephemeral=True) 59 | return False 60 | 61 | return True 62 | 63 | def filters_embed(filter_context: FilterContext) -> Embed: 64 | """Returns an embed representing the given filter context; showing all tag names and examples.""" 65 | embed = Embed() 66 | embed.title = f"Filter ({filter_context.name})" 67 | embed.description = """ 68 | A string of key:value pairs (e.g. `key1:(value1 or value2) and key2:value3`). 69 | Keys and values are always case insensitive. Gates are `and`, `or`, and `not`. 70 | """ 71 | 72 | keys = "\u2000".join("**`" + ("/".join(f"{name}" for name in tag.names) + "`**") for tag in filter_context.tags) 73 | embed.add_field( 74 | name = "Keys" + (" (`/` denotes aliases)" if "/" in keys else ""), 75 | value = keys, 76 | inline = True 77 | ) 78 | embed.add_field( 79 | name = "Example(s)", 80 | value = format_dotted_list(filter_context.examples), 81 | inline = True 82 | ) 83 | return embed 84 | 85 | def filter_embed(key: str, filter_context: FilterContext) -> Embed: 86 | """Returns an embed representing the tag with the given name `key`, for this context. 87 | Goes more in detail regarding how to use this specific tag.""" 88 | key = key.lower().strip() 89 | tag = filter_context.get_tag(key) 90 | keys = filter_context.get_tag(key).names 91 | 92 | embed = Embed() 93 | embed.add_field( 94 | name = "/".join(f"**`{key}`**" for key in keys), 95 | value = tag.description, 96 | inline = True 97 | ) 98 | embed.add_field( 99 | name = "Value(s)", 100 | value = tag.value_hint, 101 | inline = True 102 | ) 103 | embed.add_field( 104 | name = "Example(s)", 105 | value = format_dotted_list(f"`{key}:{value}`" for value in tag.example_values), 106 | inline = True 107 | ) 108 | return embed -------------------------------------------------------------------------------- /bot/cogs/general_commands.py: -------------------------------------------------------------------------------- 1 | from discord.ext import commands 2 | from discord import Embed 3 | 4 | from aiess.database import SCRAPER_DB_NAME 5 | 6 | from bot.formatter import format_timeago 7 | from bot.database import Database, BOT_DB_NAME 8 | 9 | class General(commands.Cog): 10 | def __init__(self, bot): 11 | self.bot = bot 12 | 13 | @commands.slash_command() 14 | async def ping(self, ctx): 15 | """Returns the bot latency (e.g. "134 ms").""" 16 | await ctx.respond(f"Pong! ({ctx.bot.latency * 1000:.0f} ms)", ephemeral=True) 17 | 18 | @commands.slash_command() 19 | async def info(self, ctx): 20 | """Returns general information about the bot (e.g. creator and source code).""" 21 | await ctx.defer(ephemeral=True) 22 | 23 | app_info = await self.bot.application_info() 24 | 25 | info_embed = Embed() 26 | info_embed.set_author(name=self.bot.user.name, icon_url=self.bot.user.avatar.url) 27 | info_embed.description = app_info.description 28 | 29 | created_at = self.bot.user.created_at 30 | guilds_n = len(self.bot.guilds) 31 | subscriptions_n = retrieve_subscription_count() 32 | events_n = retrieve_event_count() 33 | events_today_n = retrieve_event_count_today() 34 | first_event_at = retrieve_first_event_at() 35 | 36 | info_embed.add_field(name="Created", value=f"**{created_at.date()}**\n({format_timeago(created_at)})") 37 | info_embed.add_field(name="Author", value=f"{app_info.owner}") 38 | info_embed.add_field(name="Source", value="https://github.com/Naxesss/Aiess") 39 | info_embed.add_field(name="Events", value=f"**{events_n}** in total, **{events_today_n}** in past 24h") 40 | info_embed.add_field(name="Subscriptions", value=f"**{subscriptions_n}** in total across **{guilds_n}** server" + ("s" if guilds_n != 1 else "")) 41 | info_embed.add_field(name="First event", value=f"**{first_event_at.date()}**\n({format_timeago(first_event_at)})") 42 | 43 | info_embed.set_image(url="https://i.imgur.com/RR3937R.jpg") 44 | 45 | await ctx.followup.send(f"https://discord.com/api/oauth2/authorize?client_id={app_info.id}&permissions=0&scope=bot%20applications.commands", embed=info_embed) 46 | 47 | def retrieve_event_count(): 48 | return retrieve_with_timeout( 49 | db_name = SCRAPER_DB_NAME, 50 | table = "events", 51 | where = "TRUE", 52 | selection = "id", 53 | order_by = "id DESC", 54 | limit = 1 55 | ) 56 | 57 | def retrieve_event_count_today(): 58 | return retrieve_with_timeout( 59 | db_name = SCRAPER_DB_NAME, 60 | table = "events", 61 | where = "time >= NOW() - INTERVAL 24 HOUR", 62 | selection = "COUNT(*)" 63 | ) 64 | 65 | def retrieve_subscription_count(): 66 | return retrieve_with_timeout( 67 | db_name = BOT_DB_NAME, 68 | table = "subscriptions", 69 | selection = "COUNT(*)" 70 | ) 71 | 72 | def retrieve_first_event_at(): 73 | return retrieve_with_timeout( 74 | db_name = SCRAPER_DB_NAME, 75 | table = "events", 76 | where = "TRUE", 77 | selection = "time", 78 | order_by = "time ASC", 79 | limit = 1 80 | ) 81 | 82 | def retrieve_with_timeout( 83 | db_name, table, where="TRUE", selection="*", 84 | group_by: str=None, order_by: str=None, limit: int=None 85 | ): 86 | try: 87 | db = Database(db_name) 88 | return db.retrieve_table_data( 89 | table = table, 90 | where = where, 91 | selection = selection, 92 | group_by = group_by, 93 | order_by = order_by, 94 | limit = limit 95 | )[0][0] 96 | except TimeoutError: 97 | return "(timed out)" 98 | 99 | def setup(bot): 100 | bot.add_cog(General(bot)) -------------------------------------------------------------------------------- /bot/cogs/ready_events.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import discord 4 | from discord.ext import commands 5 | 6 | import aiess 7 | from aiess import Event 8 | from aiess import logger 9 | from aiess.database import SCRAPER_DB_NAME 10 | 11 | from bot import activity 12 | from bot import database 13 | from bot import subscriber 14 | 15 | class Reader(aiess.Reader): 16 | def __init__(self, reader_id: str, db_name: str, bot: discord.Bot): 17 | super().__init__(reader_id, db_name) 18 | self.bot = bot 19 | 20 | async def on_events(self, _): 21 | # This is called before any on_event for each batch. 22 | database.clear_cache(SCRAPER_DB_NAME) 23 | 24 | async def on_event(self, event: Event): 25 | logger.log(event, postfix=self.reader_id) 26 | await subscriber.forward(event, self.bot) 27 | 28 | class Ready(commands.Cog): 29 | def __init__(self, bot): 30 | self.bot = bot 31 | self.reader = Reader("bot", db_name=SCRAPER_DB_NAME, bot=bot) 32 | 33 | @commands.Cog.listener() 34 | async def on_connect(self) -> None: 35 | logger.log(f"Connected as {self.bot.user}!", postfix="bot") 36 | asyncio.create_task(activity.loop(self.bot, self.reader)) 37 | 38 | @commands.Cog.listener() 39 | async def on_ready(self) -> None: 40 | logger.log("Ready!", postfix="bot") 41 | 42 | if not self.reader.running: 43 | await self.reader.run() 44 | 45 | def setup(bot): 46 | bot.add_cog(Ready(bot)) -------------------------------------------------------------------------------- /bot/cogs/sub_commands.py: -------------------------------------------------------------------------------- 1 | from discord.ext import commands 2 | from discord import Embed, Colour 3 | from discord.commands import Option 4 | 5 | from aiess.database import SCRAPER_DB_NAME 6 | 7 | from bot.cmdcommon import validate_filter 8 | from bot.subscriber import get_subscription 9 | from bot.subscriber import subscribe 10 | from bot.subscriber import unsubscribe 11 | from bot.filterer import expand 12 | from bot.formatter import escape_markdown 13 | from bot.formatter import format_link, format_embed 14 | from bot.cmdcommon import filters_embed, filter_embed 15 | from bot.filterers.event_filterer import filter_to_sql 16 | from bot.filterers.event_filterer import filter_context 17 | from bot.database import Database 18 | 19 | class Subscription(commands.Cog): 20 | def __init__(self, bot): 21 | self.bot = bot 22 | 23 | @commands.slash_command() 24 | async def show_subscription(self, ctx): 25 | """Shows the current channel subscription.""" 26 | subscription = get_subscription(ctx.channel) 27 | 28 | embed = Embed() 29 | embed.colour = Colour.from_rgb(255, 170, 50) 30 | embed.add_field( 31 | name="🔔\u2000Current Subscription", 32 | value=f""" 33 | {escape_markdown(subscription.filter)} 34 | `{expand(subscription.filter)}` 35 | """ if subscription else "None" 36 | ) 37 | 38 | await ctx.respond(embed=embed, ephemeral=True) 39 | 40 | @commands.slash_command() 41 | async def subscribe(self, ctx, filter: Option(str, "Any event matching this will be sent in the channel.", required=True)): 42 | """Subscribes this channel to events matching `filter`.""" 43 | if ctx.author.bot: 44 | return 45 | 46 | if not ctx.channel.guild: 47 | await ctx.respond("✗ This command can only be used in a server.", ephemeral=True) 48 | return 49 | 50 | if not ctx.channel.permissions_for(ctx.author).manage_channels: 51 | await ctx.respond("✗ You need the `Manage Channels` permission here to use this command.", ephemeral=True) 52 | return 53 | 54 | if not await validate_filter(ctx, filter, filter_context): 55 | return # `validate_filter` will respond for us. 56 | 57 | subscribe(ctx.channel, filter) 58 | 59 | embed = Embed() 60 | embed.colour = Colour.from_rgb(255, 170, 50) 61 | embed.add_field( 62 | name="🔔\u2000Subscribed to", 63 | value=f""" 64 | {escape_markdown(filter)} 65 | `{expand(filter)}` 66 | """ 67 | ) 68 | 69 | await ctx.respond("✓", embed=embed) 70 | 71 | @commands.slash_command() 72 | async def unsubscribe(self, ctx): 73 | """Unsubscribes this channel from any event subscriptions.""" 74 | if ctx.author.bot: 75 | return 76 | 77 | if not ctx.channel.guild: 78 | await ctx.respond("✗ This command can only be used in a server.", ephemeral=True) 79 | return 80 | 81 | if not ctx.channel.permissions_for(ctx.author).manage_channels: 82 | await ctx.respond("✗ You need the `Manage Channels` permission here to use this command.", ephemeral=True) 83 | return 84 | 85 | subscription = get_subscription(ctx.channel) 86 | if not subscription: 87 | await ctx.respond("✗ This channel has no subscriptions.", ephemeral=True) 88 | return 89 | 90 | unsubscribe(subscription) 91 | 92 | embed = Embed() 93 | embed.colour = Colour.from_rgb(255, 170, 50) 94 | embed.add_field( 95 | name="🔕\u2000Unsubscribed from", 96 | value=f""" 97 | {escape_markdown(subscription.filter)} 98 | `{expand(subscription.filter)}` 99 | """ 100 | ) 101 | 102 | await ctx.respond("✓", embed=embed) 103 | 104 | @commands.slash_command() 105 | async def recent(self, ctx, filter: Option(str, "The first event matching this will be sent in the channel.", required=False, default=None)): 106 | """Returns the most recent event gathered, optionally matching `filter`.""" 107 | 108 | if filter and not await validate_filter(ctx, filter, filter_context): 109 | return # `validate_filter` will respond for us. 110 | 111 | await ctx.defer(ephemeral=False) 112 | 113 | matching_filter_str = f" matching `{filter}`" if filter else "" 114 | 115 | filter_query, filter_values = filter_to_sql(filter) 116 | database = Database(SCRAPER_DB_NAME) 117 | try: 118 | event = await database.retrieve_event( 119 | where = filter_query, 120 | where_values = filter_values, 121 | order_by = "time DESC", 122 | extensive = True if filter else False 123 | ) 124 | except TimeoutError: 125 | await ctx.followup.send(f"✗ Took too long to find an event{matching_filter_str}.") 126 | return 127 | 128 | if not event: 129 | await ctx.followup.send(f"✗ No event{matching_filter_str} could be found.") 130 | return 131 | 132 | await ctx.followup.send(f"✓ Most recent event{matching_filter_str}:\r\n{format_link(event)}", embed=await format_embed(event)) 133 | 134 | @commands.slash_command() 135 | async def filters(self, ctx, key: Option(str, "Explains how this specific filter key works (e.g. `creator`).", required=False, default=None)): 136 | """Explains how filters work with examples.""" 137 | if key: 138 | key = key.lower().strip() 139 | tag = filter_context.get_tag(key) 140 | keys = tag.names if tag else None 141 | 142 | if not tag or not keys: 143 | await ctx.respond(f"✗ No filter key `{key}` exists.", ephemeral=True) 144 | return 145 | 146 | await ctx.respond("Type `/filters` for a list of keys and gates.", embed=filter_embed(key, filter_context), ephemeral=True) 147 | return 148 | 149 | await ctx.respond("Type `/filters ` for usage.", embed=filters_embed(filter_context), ephemeral=True) 150 | 151 | def setup(bot): 152 | bot.add_cog(Subscription(bot)) -------------------------------------------------------------------------------- /bot/filterers/perms_filterer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import re as regex 5 | 6 | from bot.prefixes import DEFAULT_PREFIX 7 | from bot.filterer import FilterContext, Tag 8 | 9 | filter_context = FilterContext( 10 | name = "Permissions", 11 | examples = [ 12 | f"**everyone** leave empty OR `role:@@everyone` (two @)", 13 | f"**admin-only** see `{DEFAULT_PREFIX}disable `", 14 | "**in #offtopic** `channel:#offtopic`", 15 | "**from @verified role** `role:@verified`", 16 | "**from @someone** `user:@someone`", 17 | "**from @verified role in #offtopic** `role:@verified and channel:#offtopic`", 18 | "**from @verified role in #offtopic or #bot** `role:@verified and channel:(#offtopic or #bot)`", 19 | "**from @verified role in #bot, or from @moderators anywhere** `role:@verified and channel:#bot or role:@moderators`" 20 | ], 21 | tags = [ 22 | Tag( 23 | names = ["user"], 24 | description = "Ensure caller is this user.", 25 | example_values = ["@user", "<@user_id>"], 26 | value_hint = "Any user mention.", 27 | value_predicate = lambda value: regex.match(r"<@!?(\d+)>", value) is not None, 28 | value_func = lambda message: [f"<@{message.author.id}>", f"<@!{message.author.id}>"] if message.author else None 29 | ), 30 | Tag( 31 | names = ["channel"], 32 | description = "Ensure called in this channel.", 33 | example_values = ["#channel", "<#channel_id>"], 34 | value_hint = "Any channel mention.", 35 | value_predicate = lambda value: regex.match(r"<#(\d+)>", value) is not None, 36 | value_func = lambda message: [f"<#{message.channel.id}>"] if message.channel else None 37 | ), 38 | Tag( 39 | names = ["role"], 40 | description = "Ensure caller has this role.", 41 | example_values = ["@role", "<@&role_id>"], 42 | value_hint = "Any role mention.", 43 | value_predicate = lambda value: regex.match(r"<@&(\d+)>", value) is not None, 44 | value_func = lambda message: ( 45 | [f"<@&{role.id}>" for role in message.author.roles] 46 | if message.author and hasattr(message.author, "roles") 47 | else None 48 | ) 49 | ) 50 | ] 51 | ) -------------------------------------------------------------------------------- /bot/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import os 5 | 6 | import discord 7 | from discord import Status 8 | 9 | from aiess import logger 10 | 11 | from bot import subscriber 12 | from bot.settings import API_KEY 13 | 14 | logger.init() 15 | subscriber.load() 16 | 17 | bot = discord.Bot(status=Status.do_not_disturb) 18 | 19 | for filename in os.listdir("./bot/cogs"): 20 | if filename.endswith('.py'): 21 | bot.load_extension(f'bot.cogs.{filename[:-3]}') 22 | 23 | bot.run(API_KEY) -------------------------------------------------------------------------------- /bot/objects.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | class Subscription(): 5 | """Represents a channel subscription, containing channel identifying information as well as a filter. 6 | This allows for figuring out where to send data, as well as which data to send.""" 7 | 8 | def __init__(self, guild_id: int, channel_id: int, _filter: str): 9 | self.guild_id = int(guild_id) if guild_id is not None else None 10 | self.channel_id = int(channel_id) 11 | self.filter = _filter 12 | 13 | def __str__(self) -> str: 14 | return f"Guild {self.guild_id}, Channel {self.channel_id}, Filter \"{self.filter}\"" 15 | 16 | def __key(self) -> tuple: 17 | return ( 18 | self.guild_id, 19 | self.channel_id, 20 | self.filter 21 | ) 22 | 23 | def __eq__(self, other) -> bool: 24 | if not isinstance(other, Subscription): 25 | return False 26 | return self.__key() == other.__key() 27 | 28 | def __hash__(self) -> str: 29 | return hash(self.__key()) 30 | 31 | class Prefix(): 32 | """Represents a command prefix object, including which guild it is associated with and the prefix string itself.""" 33 | def __init__(self, guild_id: int, prefix: str): 34 | self.guild_id = int(guild_id) 35 | self.prefix = prefix 36 | 37 | def __key(self) -> tuple: 38 | return ( 39 | self.guild_id, 40 | self.prefix 41 | ) 42 | 43 | def __eq__(self, other) -> bool: 44 | if not isinstance(other, Prefix): 45 | return False 46 | return self.__key() == other.__key() 47 | 48 | def __hash__(self) -> str: 49 | return hash(self.__key()) 50 | 51 | class CommandPermission(): 52 | """Represents the permission needed to call a command, including which guild 53 | it is associated with, the first command name, and the permission filter.""" 54 | def __init__(self, guild_id: int, command_name: str, permission_filter: str): 55 | self.guild_id = int(guild_id) 56 | self.command_name = command_name 57 | self.permission_filter = permission_filter 58 | 59 | def __key(self) -> tuple: 60 | return ( 61 | self.guild_id, 62 | self.command_name, 63 | self.permission_filter 64 | ) 65 | 66 | def __eq__(self, other) -> bool: 67 | if not isinstance(other, CommandPermission): 68 | return False 69 | return self.__key() == other.__key() 70 | 71 | def __hash__(self) -> str: 72 | return hash(self.__key()) -------------------------------------------------------------------------------- /bot/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | with open("settings.json", mode="r") as settings_file: 5 | json_str = settings_file.read() 6 | json_str = os.path.expandvars(json_str) 7 | settings = json.loads(json_str) 8 | 9 | # DISCORD 10 | API_KEY = settings["discord-api-key"] -------------------------------------------------------------------------------- /bot/subscriber.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import asyncio 5 | from typing import List, Union 6 | from aiohttp.client_exceptions import ClientOSError 7 | from aiohttp.client_exceptions import ServerDisconnectedError 8 | 9 | import discord 10 | from discord import TextChannel 11 | from discord.errors import Forbidden, HTTPException 12 | 13 | from aiess import Event 14 | from aiess import logger 15 | 16 | from bot.objects import Subscription 17 | from bot.database import Database, BOT_DB_NAME 18 | from bot.formatter import format_embed, format_link 19 | from bot.filterers.event_filterer import filter_context 20 | 21 | from aiess.web import ratelimiter 22 | 23 | DEFAULT_DB_NAME = BOT_DB_NAME 24 | 25 | cache: List[Subscription] = [] 26 | 27 | def load() -> None: 28 | """Retrieves all subscriptions from the database and appends them to the internal list.""" 29 | global cache 30 | cache = [] 31 | 32 | for sub in Database(DEFAULT_DB_NAME).retrieve_subscriptions(): 33 | cache.append(sub) 34 | 35 | def guild_id_or_none(channel: TextChannel): 36 | """Returns the id of the guild this channel belongs in, or None if the channel is a DM channel.""" 37 | return channel.guild.id if hasattr(channel, "guild") else None 38 | 39 | def subscribe(channel: TextChannel, _filter: str) -> None: 40 | """Inserts a channel and filter into the subscription table of the database and updates the cache. 41 | Causes any new events passing the filter to be sent to the channel.""" 42 | sub = Subscription(guild_id_or_none(channel), channel.id, _filter) 43 | add_subscription(sub) 44 | 45 | def add_subscription(sub: Subscription) -> None: 46 | """Inserts a subscription into the subscription table of the database and reloads the cache. 47 | Causes any new events passing the filter to be sent to the channel.""" 48 | if sub.guild_id is None: 49 | # Prevents excessive discord rate limiting (5 DMs per second globally). 50 | raise ValueError("Cannot subscribe in DM channels.") 51 | 52 | Database(DEFAULT_DB_NAME).insert_subscription(sub) 53 | load() 54 | 55 | def unsubscribe(channel_or_subscription: Union[TextChannel, Subscription]) -> None: 56 | """Deletes a channel and its filter from the subscription table of the database and reloads the cache.""" 57 | sub = channel_or_subscription 58 | if isinstance(channel_or_subscription, TextChannel): 59 | channel = channel_or_subscription 60 | sub = Subscription(guild_id_or_none(channel), channel.id, None) 61 | remove_subscription(sub) 62 | 63 | def remove_subscription(sub: Subscription) -> None: 64 | """Deletes a subscription from the subscription table of the database and reloads the cache.""" 65 | Database(DEFAULT_DB_NAME).delete_subscription(sub) 66 | load() 67 | 68 | def get_subscription(channel: TextChannel) -> Subscription: 69 | """Returns the subscription associated with the given channel, if any, otherwise None.""" 70 | return Database(DEFAULT_DB_NAME).retrieve_subscription("guild_id=%s AND channel_id=%s", (guild_id_or_none(channel), channel.id)) 71 | 72 | async def forward(event: Event, bot: discord.Bot) -> None: 73 | """Attempts to forward an event through all subscription filters.""" 74 | pre_generated_embed = await format_embed(event, skip_timeago_if_recent=True) 75 | 76 | for sub in cache: 77 | await forward_sub(event, sub, bot, pre_generated_embed) 78 | 79 | async def forward_sub(event: Event, sub: Subscription, bot: discord.Bot, pre_generated_embed: discord.Embed) -> None: 80 | """Attempts to forward an event through the filter of the given subscription.""" 81 | if filter_context.test(sub.filter, event): 82 | await send_event(event, sub, bot, pre_generated_embed) 83 | 84 | async def send_event(event: Event, subscription: Subscription, bot: discord.Bot, pre_generated_embed: discord.Embed): 85 | channel = bot.get_channel(subscription.channel_id) 86 | if not channel: 87 | # Ignore channels we no longer have access to (e.g. deleted / power outage). 88 | return 89 | 90 | while True: 91 | try: 92 | await ratelimiter.async_call_with_rate_limit( 93 | awaited_result_func = lambda: channel.send( 94 | content = format_link(event), 95 | embed = pre_generated_embed 96 | ), 97 | is_result_invalid = lambda result: result is None, 98 | rate_limit = 0, 99 | rate_limit_id = "bot_channel_send" 100 | ) 101 | break 102 | except Forbidden as ex: 103 | break # In case we're subscribed to a channel we don't have access to. 104 | except ClientOSError as ex: 105 | logger.log_err(f"WARNING | Encountered ClientOSError \"{ex}\" when sending to channel \"{channel}\", retrying...") 106 | except ServerDisconnectedError as ex: 107 | logger.log_err(f"WARNING | Encountered ServerDisconnectedError \"{ex}\" when sending to channel \"{channel}\", retrying...") 108 | except HTTPException as ex: 109 | if str(ex.status).startswith("5"): 110 | # 500-type codes are server-related (i.e. on Discord's end) and can be safely ignored. 111 | # Commonly "503: Service Unavailable" and "504: Gateway Time-out". 112 | logger.log_err(f"WARNING | Encountered HTTPException \"{ex}\" when sending to channel \"{channel}\", retrying...") 113 | if ex.status == 429: 114 | # We are being ratelimited by Discord and should back off our next attempt. 115 | ratelimiter.back_off("bot_channel_send") 116 | except asyncio.TimeoutError as ex: 117 | logger.log_err(f"WARNING | Encountered asyncio.TimeoutError \"{ex}\" when sending to channel \"{channel}\", retrying...") -------------------------------------------------------------------------------- /bot/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/bot/tests/__init__.py -------------------------------------------------------------------------------- /bot/tests/test_activity.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import mock 5 | import pytest 6 | from datetime import datetime 7 | 8 | from discord import Game, Status 9 | 10 | from aiess import timestamp 11 | 12 | from bot.activity import get_activity 13 | from bot.activity import get_status 14 | 15 | @pytest.fixture 16 | def mock_client(): 17 | client = mock.MagicMock() 18 | client.guilds = [object()] * 3 19 | client.is_ready = lambda: True 20 | return client 21 | 22 | @pytest.fixture 23 | def mock_reader(): 24 | reader = mock.MagicMock() 25 | reader.latest_event_time = None 26 | return reader 27 | 28 | def test_activity(mock_client, mock_reader): 29 | assert get_activity(mock_client, mock_reader) == Game("/subscribe | 3 servers") 30 | 31 | def test_activity_singular(mock_client, mock_reader): 32 | mock_client.guilds = [object()] * 1 33 | assert get_activity(mock_client, mock_reader) == Game("/subscribe | 1 server") 34 | 35 | def test_activity_not_ready(mock_client, mock_reader): 36 | mock_client.is_ready = lambda: False 37 | assert get_activity(mock_client, mock_reader) == Game("/subscribe | Starting...") 38 | 39 | def test_activity_delay_small(mock_client, mock_reader): 40 | mock_reader.latest_event_time = timestamp.from_string("2020-01-01 00:00:00") 41 | 42 | with mock.patch("bot.activity.datetime") as mock_datetime: 43 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 00:03:30") 44 | mock_datetime.side_effect = datetime 45 | 46 | # Delay is too small to be worth displaying here. 47 | assert get_activity(mock_client, mock_reader) == Game("/subscribe | 3 servers") 48 | 49 | def test_activity_delay_large(mock_client, mock_reader): 50 | mock_reader.latest_event_time = timestamp.from_string("2020-01-01 00:00:00") 51 | 52 | with mock.patch("bot.activity.datetime") as mock_datetime: 53 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 04:01:30") 54 | mock_datetime.side_effect = datetime 55 | 56 | assert get_activity(mock_client, mock_reader) == Game("/subscribe | 3 servers | 4 hours delay") 57 | 58 | 59 | 60 | def test_status_no_event_time(mock_client, mock_reader): 61 | with mock.patch("bot.activity.datetime") as mock_datetime: 62 | # Mock `datetime.utcnow`, but retain the original datetime class functionality through the `side_effect` attribute. 63 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 00:00:31") 64 | mock_datetime.side_effect = datetime 65 | 66 | assert get_status(mock_client, mock_reader) == Status.do_not_disturb 67 | 68 | def test_status_online(mock_client, mock_reader): 69 | mock_reader.latest_event_time = timestamp.from_string("2020-01-01 00:00:00") 70 | 71 | with mock.patch("bot.activity.datetime") as mock_datetime: 72 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 00:00:31") 73 | mock_datetime.side_effect = datetime 74 | 75 | assert get_status(mock_client, mock_reader) == Status.online 76 | 77 | def test_status_idle(mock_client, mock_reader): 78 | mock_reader.latest_event_time = timestamp.from_string("2020-01-01 00:00:00") 79 | 80 | with mock.patch("bot.activity.datetime") as mock_datetime: 81 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 00:30:01") 82 | mock_datetime.side_effect = datetime 83 | 84 | assert get_status(mock_client, mock_reader) == Status.idle 85 | 86 | def test_status_do_not_disturb(mock_client, mock_reader): 87 | mock_reader.latest_event_time = timestamp.from_string("2020-01-01 00:00:00") 88 | 89 | with mock.patch("bot.activity.datetime") as mock_datetime: 90 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 02:00:01") 91 | mock_datetime.side_effect = datetime 92 | 93 | assert get_status(mock_client, mock_reader) == Status.do_not_disturb 94 | 95 | def test_status_not_ready(mock_client, mock_reader): 96 | mock_client.is_ready = lambda: False 97 | 98 | with mock.patch("bot.activity.datetime") as mock_datetime: 99 | mock_datetime.utcnow.return_value = timestamp.from_string("2020-01-01 00:00:00") 100 | mock_datetime.side_effect = datetime 101 | 102 | assert get_status(mock_client, mock_reader) == Status.do_not_disturb -------------------------------------------------------------------------------- /bot/tests/test_cmdcommon.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | 6 | from bot.filterers.event_filterer import filter_context 7 | 8 | from bot.cmdcommon import validate_filter 9 | from bot.cmdcommon import filters_embed 10 | from bot.cmdcommon import filter_embed 11 | 12 | class MockChannel: 13 | def __init__(self, has_guild=True): 14 | if has_guild: 15 | self.guild = True 16 | 17 | class MockContext: 18 | def __init__(self, has_guild=True): 19 | self.text = None 20 | self.embed = None 21 | self.ephemeral = False 22 | 23 | self.channel = MockChannel(has_guild) 24 | 25 | async def respond(self, text, embed=None, ephemeral=False): 26 | self.text = text 27 | self.embed = embed 28 | self.ephemeral = ephemeral 29 | 30 | @pytest.mark.asyncio 31 | async def test_validate_filter(): 32 | ctx = MockContext() 33 | assert await validate_filter(ctx, _filter="type:nominate", filter_context=filter_context) 34 | 35 | @pytest.mark.asyncio 36 | async def test_validate_filter_complex(): 37 | ctx = MockContext() 38 | assert await validate_filter( 39 | ctx = ctx, 40 | _filter = "type:(nom or qual or reset or dq) and not user:(banchobot or peppy)", 41 | filter_context = filter_context 42 | ) 43 | 44 | @pytest.mark.asyncio 45 | async def test_validate_filter_invalid_key(): 46 | ctx = MockContext() 47 | assert not await validate_filter(ctx, _filter="undefined:undefined", filter_context=filter_context) 48 | assert "✗" in ctx.text 49 | assert "invalid key" in ctx.text.lower() 50 | 51 | embed = filters_embed(filter_context=filter_context) 52 | assert ctx.embed.title == embed.title 53 | assert ctx.embed.description == embed.description 54 | assert ctx.embed.fields[0].name == embed.fields[0].name 55 | assert ctx.embed.fields[0].value == embed.fields[0].value 56 | assert ctx.embed.fields[1].name == embed.fields[1].name 57 | assert ctx.embed.fields[1].value == embed.fields[1].value 58 | 59 | @pytest.mark.asyncio 60 | async def test_validate_filter_invalid_value(): 61 | ctx = MockContext() 62 | assert not await validate_filter(ctx, _filter="type:undefined", filter_context=filter_context) 63 | assert "✗" in ctx.text 64 | assert "invalid value" in ctx.text.lower() 65 | 66 | embed = filter_embed(key="type", filter_context=filter_context) 67 | assert ctx.embed.fields[0].name == embed.fields[0].name 68 | assert ctx.embed.fields[0].value == embed.fields[0].value 69 | assert ctx.embed.fields[1].name == embed.fields[1].name 70 | assert ctx.embed.fields[1].value == embed.fields[1].value 71 | assert ctx.embed.fields[2].name == embed.fields[2].name 72 | assert ctx.embed.fields[2].value == embed.fields[2].value 73 | 74 | @pytest.mark.asyncio 75 | async def test_validate_filter_invalid_word(): 76 | ctx = MockContext() 77 | assert not await validate_filter(ctx, _filter="user:sometwo annd type:qualify", filter_context=filter_context) 78 | assert "✗" in ctx.text 79 | assert "invalid word" in ctx.text.lower() 80 | 81 | embed = filters_embed(filter_context=filter_context) 82 | assert ctx.embed.title == embed.title 83 | assert ctx.embed.description == embed.description 84 | assert ctx.embed.fields[0].name == embed.fields[0].name 85 | assert ctx.embed.fields[0].value == embed.fields[0].value 86 | assert ctx.embed.fields[1].name == embed.fields[1].name 87 | assert ctx.embed.fields[1].value == embed.fields[1].value 88 | 89 | @pytest.mark.asyncio 90 | async def test_validate_filter_missing_gate(): 91 | ctx = MockContext() 92 | assert not await validate_filter(ctx, _filter="user:sometwo type:qualify", filter_context=filter_context) 93 | assert "✗" in ctx.text 94 | assert "missing gate" in ctx.text.lower() 95 | assert "between `user:sometwo` and `type:qualify`" in ctx.text.lower() 96 | 97 | embed = filters_embed(filter_context=filter_context) 98 | assert ctx.embed.title == embed.title 99 | assert ctx.embed.description == embed.description 100 | assert ctx.embed.fields[0].name == embed.fields[0].name 101 | assert ctx.embed.fields[0].value == embed.fields[0].value 102 | assert ctx.embed.fields[1].name == embed.fields[1].name 103 | assert ctx.embed.fields[1].value == embed.fields[1].value 104 | 105 | @pytest.mark.asyncio 106 | async def test_validate_filter_no_guild(): 107 | ctx = MockContext(has_guild=False) 108 | assert not await validate_filter(ctx, _filter="type:nominate", filter_context=filter_context) -------------------------------------------------------------------------------- /bot/tests/test_database.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | 6 | from aiess import Event, User, Beatmapset 7 | from aiess.timestamp import from_string 8 | from aiess.database import SCRAPER_TEST_DB_NAME 9 | 10 | from bot import database as db_module 11 | from bot.database import Database, BOT_TEST_DB_NAME 12 | from bot.objects import Subscription, Prefix, CommandPermission 13 | 14 | @pytest.fixture 15 | def scraper_test_database(): 16 | database = Database(SCRAPER_TEST_DB_NAME) 17 | # Reset database to state before any tests ran. 18 | database.clear_table_data("events") 19 | db_module.clear_cache(SCRAPER_TEST_DB_NAME) 20 | return database 21 | 22 | @pytest.fixture 23 | def bot_test_database(): 24 | database = Database(BOT_TEST_DB_NAME) 25 | database.clear_table_data("subscriptions") 26 | database.clear_table_data("prefixes") 27 | database.clear_table_data("permissions") 28 | db_module.clear_cache(BOT_TEST_DB_NAME) 29 | return database 30 | 31 | def test_correct_bot_db_setup(bot_test_database): 32 | assert not bot_test_database.retrieve_table_data("subscriptions") 33 | assert not bot_test_database.retrieve_table_data("prefixes") 34 | assert not db_module.beatmapset_event_cache[SCRAPER_TEST_DB_NAME] 35 | 36 | def test_correct_scraper_db_setup(scraper_test_database): 37 | assert not scraper_test_database.retrieve_table_data("events") 38 | assert not db_module.beatmapset_event_cache[BOT_TEST_DB_NAME] 39 | 40 | def test_insert_retrieve_channel_sub(bot_test_database): 41 | sub1 = Subscription(guild_id=3, channel_id=1, _filter="type:problem and state:qualified") 42 | sub2 = Subscription(guild_id=3, channel_id=2, _filter="type:ranked") 43 | 44 | bot_test_database.insert_subscription(sub1) 45 | bot_test_database.insert_subscription(sub2) 46 | 47 | retrieved_sub = bot_test_database.retrieve_subscription("guild_id=%s AND channel_id=%s", (3, 1)) 48 | assert retrieved_sub == sub1 49 | 50 | def test_insert_retrieve_channel_subs(bot_test_database): 51 | sub1 = Subscription(guild_id=3, channel_id=1, _filter="type:problem and state:qualified") 52 | sub2 = Subscription(guild_id=3, channel_id=2, _filter="type:ranked") 53 | 54 | bot_test_database.insert_subscription(sub1) 55 | bot_test_database.insert_subscription(sub2) 56 | 57 | retrieved_subs = bot_test_database.retrieve_subscriptions() 58 | assert next(retrieved_subs, None) == sub1 59 | assert next(retrieved_subs, None) == sub2 60 | assert next(retrieved_subs, None) is None 61 | 62 | def test_insert_retrieve_channel_sub_no_filter(bot_test_database): 63 | sub = Subscription(guild_id=1, channel_id=1, _filter=None) 64 | 65 | # A subscription should always have an explicit filter to prevent 66 | # the creation of an unfiltered subscription unintentionally. 67 | with pytest.raises(ValueError) as err: 68 | bot_test_database.insert_subscription(sub) 69 | 70 | assert "filter cannot be falsy" in str(err).lower() 71 | 72 | def test_insert_retrieve_prefix(bot_test_database): 73 | prefix1 = Prefix(guild_id=3, prefix="&") 74 | prefix2 = Prefix(guild_id=4, prefix="%") 75 | 76 | bot_test_database.insert_prefix(prefix1) 77 | bot_test_database.insert_prefix(prefix2) 78 | 79 | retrieved_prefix = bot_test_database.retrieve_prefix("guild_id=%s", (3,)) 80 | assert retrieved_prefix == prefix1 81 | 82 | def test_insert_retrieve_prefixes(bot_test_database): 83 | prefix1 = Prefix(guild_id=3, prefix="&") 84 | prefix2 = Prefix(guild_id=4, prefix="%") 85 | 86 | bot_test_database.insert_prefix(prefix1) 87 | bot_test_database.insert_prefix(prefix2) 88 | 89 | retrieved_prefixes = bot_test_database.retrieve_prefixes() 90 | assert next(retrieved_prefixes, None) == prefix1 91 | assert next(retrieved_prefixes, None) == prefix2 92 | assert next(retrieved_prefixes, None) is None 93 | 94 | def test_insert_retrieve_permission(bot_test_database): 95 | perm1 = CommandPermission(guild_id=3, command_name="test", permission_filter="filter1") 96 | perm2 = CommandPermission(guild_id=4, command_name="test", permission_filter="filter2") 97 | 98 | bot_test_database.insert_permission(perm1) 99 | bot_test_database.insert_permission(perm2) 100 | 101 | retrieved_perm = bot_test_database.retrieve_permission("guild_id=%s AND command_name=%s", (3, "test")) 102 | assert retrieved_perm == perm1 103 | 104 | def test_insert_retrieve_permissions(bot_test_database): 105 | perm1 = CommandPermission(guild_id=3, command_name="test", permission_filter="filter1") 106 | perm2 = CommandPermission(guild_id=4, command_name="test", permission_filter="filter2") 107 | 108 | bot_test_database.insert_permission(perm1) 109 | bot_test_database.insert_permission(perm2) 110 | 111 | retrieved_perms = bot_test_database.retrieve_permissions() 112 | assert next(retrieved_perms, None) == perm1 113 | assert next(retrieved_perms, None) == perm2 114 | assert next(retrieved_perms, None) is None 115 | 116 | @pytest.mark.asyncio 117 | async def test_retrieve_beatmapset_events(scraper_test_database): 118 | beatmapset = Beatmapset(1, creator=User(4, "creator"), allow_api=False) 119 | nom_event = Event("nominate", from_string("2020-01-01 00:00:00"), beatmapset, user=User(1, "someone")) 120 | qual_event = Event("qualify", from_string("2020-01-01 05:00:00"), beatmapset, user=User(2, "sometwo")) 121 | suggestion_event = Event("suggestion", from_string("2020-01-01 01:00:00"), beatmapset, user=User(3, "somethree")) 122 | 123 | scraper_test_database.insert_event(suggestion_event) 124 | scraper_test_database.insert_event(nom_event) 125 | scraper_test_database.insert_event(qual_event) 126 | 127 | events = await scraper_test_database.retrieve_beatmapset_events(beatmapset) 128 | assert nom_event in events 129 | assert qual_event in events 130 | assert suggestion_event in events 131 | 132 | @pytest.mark.asyncio 133 | async def test_retrieve_beatmapset_events_cache(scraper_test_database): 134 | beatmapset = Beatmapset(1, creator=User(4, "creator"), allow_api=False) 135 | nom_event = Event("nominate", from_string("2020-01-01 00:00:00"), beatmapset, user=User(1, "someone")) 136 | qual_event = Event("qualify", from_string("2020-01-01 05:00:00"), beatmapset, user=User(2, "sometwo")) 137 | 138 | scraper_test_database.insert_event(nom_event) 139 | scraper_test_database.insert_event(qual_event) 140 | 141 | await scraper_test_database.retrieve_beatmapset_events(beatmapset) 142 | assert db_module.beatmapset_event_cache[SCRAPER_TEST_DB_NAME][1] == [qual_event, nom_event] 143 | 144 | db_module.clear_cache(SCRAPER_TEST_DB_NAME) 145 | assert not db_module.beatmapset_event_cache[SCRAPER_TEST_DB_NAME] -------------------------------------------------------------------------------- /bot/tests/test_objects.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bot.objects import Subscription 5 | from bot.objects import Prefix 6 | from bot.objects import CommandPermission 7 | 8 | def test_sub_init_str_ids(): 9 | sub = Subscription(guild_id="1", channel_id="3", _filter="type:nominate") 10 | assert sub.guild_id == 1 11 | assert sub.channel_id == 3 12 | 13 | def test_sub_str(): 14 | sub = Subscription(guild_id=1, channel_id=3, _filter="type:nominate") 15 | assert "\"type:nominate\"" in str(sub) 16 | assert "1" in str(sub) 17 | assert "3" in str(sub) 18 | 19 | def test_sub_eq(): 20 | sub1 = Subscription(guild_id=1, channel_id=3, _filter="type:nominate") 21 | sub2 = Subscription(guild_id=1, channel_id=3, _filter="type:nominate") 22 | sub3 = Subscription(guild_id=1, channel_id=4, _filter="type:nominate") 23 | assert sub1 == sub2 24 | assert sub1 != sub3 25 | 26 | def test_sub_eq_type_mismatch(): 27 | sub = Subscription(guild_id=1, channel_id=3, _filter="type:nominate") 28 | assert sub != "not a sub" 29 | 30 | def test_sub_hash(): 31 | sub = Subscription(guild_id=1, channel_id=3, _filter="type:nominate") 32 | assert sub.__hash__() 33 | 34 | 35 | 36 | def test_prefix_init_str_id(): 37 | prefix = Prefix(guild_id="1", prefix="&") 38 | assert prefix.guild_id == 1 39 | 40 | def test_prefix_eq(): 41 | prefix1 = Prefix(guild_id=1, prefix="&") 42 | prefix2 = Prefix(guild_id=1, prefix="&") 43 | prefix3 = Prefix(guild_id=3, prefix="&") 44 | assert prefix1 == prefix2 45 | assert prefix1 != prefix3 46 | 47 | def test_prefix_eq_type_mismatch(): 48 | prefix = Prefix(guild_id=1, prefix="&") 49 | assert prefix != "not a sub" 50 | 51 | def test_prefix_hash(): 52 | prefix = Prefix(guild_id=1, prefix="&") 53 | assert prefix.__hash__() 54 | 55 | 56 | 57 | def test_permission_init_str_id(): 58 | permission = CommandPermission(guild_id="1", command_name="test", permission_filter="filter") 59 | assert permission.guild_id == 1 60 | 61 | def test_permission_eq(): 62 | permission1 = CommandPermission(guild_id=1, command_name="test", permission_filter="filter") 63 | permission2 = CommandPermission(guild_id=1, command_name="test", permission_filter="filter") 64 | permission3 = CommandPermission(guild_id=3, command_name="test", permission_filter="filter") 65 | assert permission1 == permission2 66 | assert permission1 != permission3 67 | 68 | def test_permission_eq_type_mismatch(): 69 | permission = CommandPermission(guild_id=1, command_name="test", permission_filter="filter") 70 | assert permission != "not a sub" 71 | 72 | def test_permission_hash(): 73 | permission = CommandPermission(guild_id=1, command_name="test", permission_filter="filter") 74 | assert permission.__hash__() -------------------------------------------------------------------------------- /bot/tests/test_subscriber.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | import asyncio 6 | from mock import patch 7 | from datetime import datetime 8 | 9 | from aiess import Event 10 | 11 | from bot import subscriber 12 | from bot.objects import Subscription 13 | from bot.database import Database, BOT_TEST_DB_NAME 14 | 15 | def setup_function(): 16 | subscriber.DEFAULT_DB_NAME = BOT_TEST_DB_NAME 17 | # Reset database to state before any tests ran. 18 | Database(BOT_TEST_DB_NAME).clear_table_data("subscriptions") 19 | 20 | def test_correct_setup(): 21 | assert not Database(BOT_TEST_DB_NAME).retrieve_table_data("subscriptions") 22 | 23 | def test_load(): 24 | sub1 = Subscription(guild_id=1, channel_id=1, _filter="type:nominate") 25 | sub2 = Subscription(guild_id=1, channel_id=2, _filter="type:ranked") 26 | 27 | database = Database(BOT_TEST_DB_NAME) 28 | database.insert_subscription(sub1) 29 | database.insert_subscription(sub2) 30 | 31 | subscriber.load() 32 | 33 | assert sub1 in subscriber.cache 34 | assert sub2 in subscriber.cache 35 | 36 | def test_add_subscription(): 37 | sub1 = Subscription(guild_id=1, channel_id=1, _filter="type:nominate") 38 | sub2 = Subscription(guild_id=1, channel_id=2, _filter="type:ranked") 39 | sub3 = Subscription(guild_id=1, channel_id=2, _filter="type:qualify") 40 | 41 | subscriber.add_subscription(sub1) 42 | subscriber.add_subscription(sub2) 43 | subscriber.add_subscription(sub3) 44 | 45 | assert sub1 in subscriber.cache 46 | assert sub2 not in subscriber.cache 47 | assert sub3 in subscriber.cache 48 | 49 | def test_add_subscription_dm_channel(): 50 | sub = Subscription(guild_id=None, channel_id=1, _filter="type:nominate") 51 | 52 | with pytest.raises(ValueError) as err: 53 | subscriber.add_subscription(sub) 54 | 55 | assert "DM channel" in str(err) 56 | 57 | def test_remove_subscription(): 58 | sub1 = Subscription(guild_id=1, channel_id=1, _filter="type:nominate") 59 | 60 | subscriber.add_subscription(sub1) 61 | assert sub1 in subscriber.cache 62 | 63 | subscriber.remove_subscription(sub1) 64 | assert sub1 not in subscriber.cache 65 | 66 | class MockBot(): 67 | def __init__(self): 68 | self.event_sub_pairs = [] 69 | 70 | async def get_channel(self, _): 71 | return True 72 | 73 | async def send_event(self, event: Event, sub: Subscription, _, __) -> None: 74 | self.event_sub_pairs.append((sub, event)) 75 | 76 | @pytest.mark.asyncio 77 | async def test_forward(): 78 | sub_both = Subscription(guild_id=1, channel_id=2, _filter="type:test1 or type:test2") 79 | sub_one = Subscription(guild_id=1, channel_id=1, _filter="type:test1") 80 | 81 | subscriber.add_subscription(sub_both) 82 | subscriber.add_subscription(sub_one) 83 | 84 | event1 = Event(_type="test1", time=datetime.utcnow()) 85 | event2 = Event(_type="test2", time=datetime.utcnow()) 86 | bot = MockBot() 87 | 88 | with patch("bot.subscriber.format_embed", return_value=None): 89 | with patch("bot.subscriber.send_event", new=bot.send_event): 90 | await subscriber.forward(event1, bot) 91 | await subscriber.forward(event2, bot) 92 | 93 | await asyncio.sleep(2) 94 | 95 | assert (sub_both, event1) in bot.event_sub_pairs 96 | assert (sub_both, event2) in bot.event_sub_pairs 97 | 98 | assert (sub_one, event1) in bot.event_sub_pairs 99 | assert (sub_one, event2) not in bot.event_sub_pairs, "A subscription was forwarded an event it was supposed to filter." -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: yes 3 | 4 | coverage: 5 | precision: 2 6 | round: down 7 | range: "70...100" 8 | status: 9 | project: 10 | default: 11 | target: 95% 12 | threshold: 1% 13 | patch: off 14 | 15 | parsers: 16 | gcov: 17 | branch_detection: 18 | conditional: yes 19 | loop: yes 20 | method: no 21 | macro: no 22 | 23 | comment: 24 | layout: "reach,diff,flags,tree" 25 | behavior: default 26 | require_changes: no -------------------------------------------------------------------------------- /schema.sql: -------------------------------------------------------------------------------- 1 | -- Bot database 2 | CREATE DATABASE IF NOT EXISTS `aiess_bot_test`; 3 | USE `aiess_bot_test`; 4 | 5 | DROP TABLE IF EXISTS `prefixes`; 6 | CREATE TABLE `prefixes` ( 7 | `guild_id` bigint(20) unsigned NOT NULL, 8 | `prefix` mediumtext, 9 | PRIMARY KEY (`guild_id`) 10 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 11 | 12 | DROP TABLE IF EXISTS `subscriptions`; 13 | CREATE TABLE `subscriptions` ( 14 | `guild_id` bigint(20) unsigned NOT NULL, 15 | `channel_id` bigint(20) unsigned NOT NULL, 16 | `filter` mediumtext NOT NULL, 17 | PRIMARY KEY (`guild_id`,`channel_id`), 18 | UNIQUE KEY `UNIQUE` (`channel_id`,`guild_id`) 19 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 20 | 21 | DROP TABLE IF EXISTS `permissions`; 22 | CREATE TABLE `permissions` ( 23 | `guild_id` bigint(20) unsigned NOT NULL, 24 | `command_name` varchar(60) NOT NULL, 25 | `permission_filter` mediumtext, 26 | PRIMARY KEY (`guild_id`,`command_name`) 27 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 28 | 29 | -- Scraper database 30 | CREATE DATABASE IF NOT EXISTS `aiess_test`; 31 | USE `aiess_test`; 32 | 33 | DROP TABLE IF EXISTS `beatmapset_modes`; 34 | CREATE TABLE `beatmapset_modes` ( 35 | `beatmapset_id` bigint(20) unsigned NOT NULL, 36 | `mode` varchar(20) NOT NULL, 37 | PRIMARY KEY (`beatmapset_id`,`mode`) 38 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 39 | 40 | DROP TABLE IF EXISTS `users`; 41 | CREATE TABLE `users` ( 42 | `id` bigint(20) unsigned NOT NULL, 43 | `name` mediumtext, 44 | PRIMARY KEY (`id`), 45 | UNIQUE KEY `id_UNIQUE` (`id`) 46 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 47 | 48 | DROP TABLE IF EXISTS `group_users`; 49 | CREATE TABLE `group_users` ( 50 | `group_id` bigint(20) unsigned NOT NULL, 51 | `user_id` bigint(20) unsigned NOT NULL, 52 | PRIMARY KEY (`group_id`,`user_id`) 53 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 54 | 55 | DROP TABLE IF EXISTS `beatmapsets`; 56 | CREATE TABLE `beatmapsets` ( 57 | `id` bigint(20) unsigned NOT NULL, 58 | `title` mediumtext, 59 | `artist` mediumtext, 60 | `creator_id` bigint(20) unsigned NOT NULL, 61 | `genre` mediumtext, 62 | `language` mediumtext, 63 | `tags` mediumtext, 64 | PRIMARY KEY (`id`), 65 | KEY `beatmapsetsfk_creator_id_idx` (`creator_id`), 66 | CONSTRAINT `beatmapsetsfk_creator_id` FOREIGN KEY (`creator_id`) REFERENCES `users` (`id`) 67 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 68 | 69 | DROP TABLE IF EXISTS `beatmaps`; 70 | CREATE TABLE `beatmaps` ( 71 | `id` bigint(20) unsigned NOT NULL, 72 | `beatmapset_id` bigint(20) unsigned NOT NULL, 73 | `version` TINYTEXT, 74 | `draintime` DOUBLE unsigned NOT NULL, 75 | `sr_total` DOUBLE unsigned NOT NULL, 76 | `favourites` bigint(20) unsigned, 77 | `userrating` FLOAT, 78 | `playcount` bigint(20) unsigned, 79 | `passcount` bigint(20) unsigned, 80 | `updated_at` DATETIME, 81 | PRIMARY KEY (`id`), 82 | CONSTRAINT `beatmapsfk_beatmapset_id` FOREIGN KEY (`beatmapset_id`) REFERENCES `beatmapsets` (`id`) 83 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 84 | 85 | DROP TABLE IF EXISTS `beatmapset_status`; 86 | CREATE TABLE `beatmapset_status` ( 87 | `id` bigint(20) unsigned NOT NULL, 88 | `beatmapset_id` bigint(20) unsigned NOT NULL, 89 | `status` VARCHAR(32) NOT NULL, 90 | `time` DATETIME NOT NULL, 91 | PRIMARY KEY (`id`), 92 | CONSTRAINT `beatmapset_statusfk_beatmapset_id` FOREIGN KEY (`beatmapset_id`) REFERENCES `beatmapsets` (`id`) 93 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 94 | 95 | DROP TABLE IF EXISTS `status_nominators`; 96 | CREATE TABLE `status_nominators` ( 97 | `status_id` bigint(20) unsigned NOT NULL, 98 | `nominator_id` bigint(20) unsigned NOT NULL, 99 | PRIMARY KEY (`status_id`, `nominator_id`), 100 | CONSTRAINT `status_nominatorsfk_status_id` FOREIGN KEY (`status_id`) REFERENCES `beatmapset_status` (`id`), 101 | CONSTRAINT `status_nominatorsfk_nominator_id` FOREIGN KEY (`nominator_id`) REFERENCES `users` (`id`) 102 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 103 | 104 | DROP TABLE IF EXISTS `discussions`; 105 | CREATE TABLE `discussions` ( 106 | `id` bigint(20) unsigned NOT NULL, 107 | `beatmapset_id` bigint(20) unsigned NOT NULL, 108 | `user_id` bigint(20) unsigned NOT NULL, 109 | `content` mediumtext, 110 | `tab` mediumtext, 111 | `difficulty` mediumtext, 112 | PRIMARY KEY (`id`), 113 | UNIQUE KEY `id_UNIQUE` (`id`), 114 | KEY `discussionsfk_beatmapset_id_idx` (`beatmapset_id`), 115 | KEY `discussionsfk_user_id_idx` (`user_id`), 116 | CONSTRAINT `discussionsfk_beatmapset_id` FOREIGN KEY (`beatmapset_id`) REFERENCES `beatmapsets` (`id`), 117 | CONSTRAINT `discussionsfk_user_id` FOREIGN KEY (`user_id`) REFERENCES `users` (`id`) 118 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 119 | 120 | DROP TABLE IF EXISTS `events`; 121 | CREATE TABLE `events` ( 122 | `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, 123 | `insert_time` datetime NOT NULL, 124 | `time` datetime NOT NULL, 125 | `type` varchar(64) NOT NULL, 126 | `beatmapset_id` bigint(20) unsigned DEFAULT NULL, 127 | `discussion_id` bigint(20) unsigned DEFAULT NULL, 128 | `user_id` bigint(20) unsigned DEFAULT NULL, 129 | `group_id` bigint(20) unsigned DEFAULT NULL, 130 | `group_mode` varchar(20) DEFAULT NULL, 131 | `news_id` bigint(20) unsigned DEFAULT NULL, 132 | `content` mediumtext, 133 | PRIMARY KEY (`id`), 134 | UNIQUE KEY `id_UNIQUE` (`id`) 135 | ) ENGINE=InnoDB AUTO_INCREMENT=22 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 136 | 137 | DROP TABLE IF EXISTS `newsposts`; 138 | CREATE TABLE `newsposts` ( 139 | `id` bigint(20) unsigned NOT NULL, 140 | `title` mediumtext NOT NULL, 141 | `preview` mediumtext NOT NULL, 142 | `author_id` bigint(20) unsigned DEFAULT NULL, 143 | `author_name` mediumtext NOT NULL, 144 | `slug` mediumtext NOT NULL, 145 | `image_url` mediumtext NOT NULL, 146 | PRIMARY KEY (`id`), 147 | UNIQUE KEY `id_UNIQUE` (`id`), 148 | KEY `newsposts_author_id_fk_idx` (`author_id`), 149 | CONSTRAINT `newsposts_author_id_fk` FOREIGN KEY (`author_id`) REFERENCES `users` (`id`) 150 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; 151 | 152 | DROP TABLE IF EXISTS `discussion_obv_sev`; 153 | CREATE TABLE `discussion_obv_sev` ( 154 | `id` bigint(20) unsigned NOT NULL, 155 | `obv` INT unsigned NOT NULL, 156 | `sev` INT unsigned NOT NULL, 157 | PRIMARY KEY (`id`), 158 | UNIQUE KEY `id_UNIQUE` (`id`), 159 | CONSTRAINT `discussion_obv_sev_id` FOREIGN KEY (`id`) REFERENCES `discussions` (`id`) 160 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; -------------------------------------------------------------------------------- /scraper/.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=./ -------------------------------------------------------------------------------- /scraper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/__init__.py -------------------------------------------------------------------------------- /scraper/crawler.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from datetime import datetime 5 | from typing import Generator, Callable 6 | 7 | from aiess.web import api 8 | from aiess.objects import Event 9 | 10 | from scraper.requester import get_discussion_events 11 | from scraper.requester import get_reply_events 12 | from scraper.requester import get_beatmapset_events 13 | from scraper.requester import get_news_events 14 | from scraper.requester import get_group_events 15 | from scraper import populator 16 | 17 | async def get_all_events_between(start_time: datetime, end_time: datetime, last_checked_time: datetime=None) -> Generator[Event, None, None]: 18 | """Returns a generator of all events within the given time frame.""" 19 | # Ensures name changes, beatmap updates, etc are considered. 20 | # Updates once for each pass (more than that isn't necessary considering time is locked). 21 | api.cache.clear() 22 | populator.cached_discussions_json = {} 23 | 24 | # These are closely intertwined; beatmapset events rely on replies, which in turn rely on discussions. 25 | async for event in __get_discussion_events_between(start_time, end_time): yield event 26 | async for event in __get_reply_events_between(start_time, end_time): yield event 27 | async for event in __get_beatmapset_events_between(start_time, end_time): yield event 28 | 29 | async def get_news_between(start_time: datetime, end_time: datetime, last_checked_time: datetime=None) -> Generator[Event, None, None]: 30 | """Returns a generator of news events (from /home/news) within the given time frame.""" 31 | # `get_news_events` generates events before a given time, rather than page, hence `generate_by_page=False`. 32 | for event in __get_event_generations_between(get_news_events, start_time, end_time, generate_by_page=False): 33 | yield event 34 | 35 | async def get_group_events_between(start_time: datetime, end_time: datetime, last_checked_time: datetime) -> Generator[Event, None, None]: 36 | """Returns a generator of group additions and removal events (from /group) from the given prev end time. 37 | Note that start time does nothing in this case, as group changes are not timestamped, and as such have no 38 | time limit for latest event possible.""" 39 | # `_from` in `get_group_events` denotes the timestamp to set on any group events found. 40 | for event in get_group_events(_from=last_checked_time): 41 | await populator.populate_from_bnsite(event) 42 | yield event 43 | 44 | async def __get_discussion_events_between(start_time: datetime, end_time: datetime) -> Generator[Event, None, None]: 45 | """Returns a generator of discussion events (from /beatmap-disussions) within the given time frame.""" 46 | for event in __get_event_generations_between(get_discussion_events, start_time, end_time): 47 | yield event 48 | 49 | async def __get_reply_events_between(start_time: datetime, end_time: datetime) -> Generator[Event, None, None]: 50 | """Returns a generator of discussion events (from /beatmap-disussions) within the given time frame.""" 51 | for event in __get_event_generations_between(get_reply_events, start_time, end_time): 52 | await populator.populate_from_discussion(event) 53 | if not event.marked_for_deletion: 54 | yield event 55 | 56 | async def __get_beatmapset_events_between(start_time: datetime, end_time: datetime) -> Generator[Event, None, None]: 57 | """Returns a generator of beatmapset events (from /events) within the given time frame. 58 | Should be run after discussion events so that discussion contexts are available.""" 59 | for event in __get_event_generations_between(get_beatmapset_events, start_time, end_time): 60 | await populator.populate_from_discussion(event) 61 | if not event.marked_for_deletion: 62 | yield event 63 | 64 | def __get_event_generations_between( 65 | generator_function: Callable[[int], Generator[Event, None, None]], 66 | start_time: datetime, end_time: datetime, generate_by_page: bool=True) -> Generator[Event, None, None]: 67 | """Returns the same generator as the generation function argument, but within the given time frame and across multiple 68 | generations rather than just one. This essentially bypasses the `limit` of pages by performing all requests necessary.""" 69 | current_time = start_time 70 | page = 1 71 | 72 | while current_time > end_time: 73 | event_generator = generator_function(page if generate_by_page else current_time) 74 | found_events = False 75 | found_too_new_events = False 76 | 77 | for event in event_generator: 78 | if event.time > current_time: 79 | found_too_new_events = True 80 | continue 81 | 82 | found_events = True 83 | current_time = event.time 84 | if current_time <= end_time: 85 | # Since events are sorted, any remaining element time is also <= `end_time`. 86 | break 87 | 88 | yield event 89 | 90 | if not found_events and not found_too_new_events: 91 | # There are no more events. 92 | break 93 | 94 | page += 1 95 | if page > 500: 96 | # Assuming 60 s / page, this would take 8 hours to hit, but that's within acceptable bounds for this purpose. 97 | raise ValueError(""" 98 | The page index has exceeded 500, we're probably stuck in a loop.""") -------------------------------------------------------------------------------- /scraper/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import asyncio 5 | from datetime import datetime 6 | 7 | from aiess import timestamp, logger 8 | from aiess import Event 9 | from aiess.logger import log, colors, fmt 10 | from aiess.database import Database, SCRAPER_DB_NAME 11 | from aiess.reader import merge_concurrent 12 | 13 | from scraper.crawler import get_all_events_between, get_news_between, get_group_events_between 14 | 15 | LAST_CHECKED_POSTFIX = "-last-checked" 16 | 17 | async def gather_loop() -> None: 18 | """Gathers new events in an infinite loop.""" 19 | while(True): 20 | await gather_new_events() 21 | # We only need to check newsposts between exact hours, as this is when they're posted. 22 | if timestamp.get_last("news" + LAST_CHECKED_POSTFIX).hour != datetime.utcnow().hour: 23 | await gather_news() 24 | # Group changes happen very rarely compared to other events, but people tend to want these updates quickly. 25 | if (datetime.utcnow() - timestamp.get_last("groups" + LAST_CHECKED_POSTFIX)).total_seconds() > 300: 26 | await gather_group_changes() 27 | 28 | async def gather_new_events() -> None: 29 | """Gathers any new beatmapset/discussion/reply events.""" 30 | await gather(get_all_events_between, "events") 31 | 32 | async def gather_news() -> None: 33 | """Gathers any new newsposts.""" 34 | await gather(get_news_between, "news") 35 | 36 | async def gather_group_changes() -> None: 37 | """Gathers any new newsposts.""" 38 | await gather(get_group_events_between, "groups") 39 | 40 | async def gather(async_event_generator, _id: str) -> None: 41 | """Iterates over new events since the last time, inserts them into the database, 42 | and then updates the last time if any were found.""" 43 | current_time = datetime.utcnow().replace(microsecond=0) 44 | last_time = timestamp.get_last(_id).replace(microsecond=0) 45 | last_checked_time = timestamp.get_last(_id + LAST_CHECKED_POSTFIX).replace(microsecond=0) 46 | 47 | if await push_events(async_event_generator, current_time, last_time, last_checked_time): 48 | last_updated(current_time, _id) 49 | 50 | last_updated(current_time, _id + LAST_CHECKED_POSTFIX) 51 | 52 | async def push_events(async_event_generator, current_time, last_time, last_checked_time) -> None: 53 | """Parses and inserts events generated by the given function over the timeframe. 54 | Returns whether any events were generated.""" 55 | events = [] 56 | await parse_events(events, async_event_generator, current_time, last_time, last_checked_time) 57 | insert_db(merge_concurrent(events)) 58 | 59 | return bool(events) 60 | 61 | async def parse_events(event_list, async_event_generator, current_time, last_time, last_checked_time) -> None: 62 | """Parses events generated by the given function over the timeframe and appends them to the event list.""" 63 | log(f"--- Parsing Events from {last_time} to {current_time} ---") 64 | async for event in async_event_generator(current_time, last_time, last_checked_time): 65 | progress_ratio = (current_time - event.time).total_seconds() / (current_time - last_time).total_seconds() 66 | progress_str = ( 67 | fmt(" " * int(progress_ratio * 20), colors.LOADING_FILLED) + 68 | fmt(" " * int((1 - progress_ratio) * 20), colors.LOADING_EMPTY ) 69 | ) 70 | 71 | log(f"{progress_str} | {format_event_log(event)}") 72 | event_list.append(event) 73 | 74 | def format_event_log(event: Event) -> str: 75 | return "".join([ 76 | f"{fmt(event.type, colors.EVENT)}", 77 | f" ({fmt(event.user, colors.AUTHOR)})" if event.user else "", 78 | f" on {fmt(event.beatmapset, colors.CONTEXT)}" if event.beatmapset else "", 79 | f" to/from {fmt(event.group, colors.CONTEXT)}" if event.group else "", 80 | f" for [{fmt(event.group.mode, colors.CONTEXT)}]" if event.group and event.group.mode else "", 81 | f" \"{event.content}\"" if event.content else "" 82 | ]) 83 | 84 | def insert_db(events) -> None: 85 | """Inserts the given event list into the database in reversed order.""" 86 | if not events: 87 | return 88 | 89 | events.sort(key=lambda event: event.time) 90 | 91 | log(f"--- Inserting {len(events)} Events into the Database ---") 92 | for event in events: 93 | log(".", newline=False) 94 | database.insert_event(event) 95 | log() 96 | 97 | def last_updated(current_time: datetime, _id: str) -> None: 98 | """Updates the last updated file to reflect the given time.""" 99 | log(f"--- Last Updated [{_id}] {current_time} ---") 100 | timestamp.set_last(current_time, _id) 101 | 102 | logger.init() 103 | database = Database(SCRAPER_DB_NAME) 104 | 105 | loop = asyncio.get_event_loop() 106 | loop.run_until_complete(gather_loop()) -------------------------------------------------------------------------------- /scraper/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/parsers/__init__.py -------------------------------------------------------------------------------- /scraper/parsers/beatmapset_event_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from typing import Generator 5 | from bs4 import BeautifulSoup 6 | from bs4.element import Tag 7 | import json 8 | 9 | from aiess.objects import Event, Beatmapset, Discussion, User 10 | from aiess.errors import DeletedContextError 11 | from aiess.logger import log_err 12 | from aiess import timestamp 13 | from aiess import event_types as types 14 | 15 | from scraper.parsers.event_parser import EventParser 16 | 17 | class BeatmapsetEventParser(EventParser): 18 | 19 | def parse(self, events: BeautifulSoup) -> Generator[Event, None, None]: 20 | """Returns a generator of BeatmapsetEvents, from the given /events BeautifulSoup response, parsed top-down.""" 21 | json_events = events.find("script", {"id": "json-events"}) 22 | json_users = events.find("script", {"id": "json-users"}) 23 | 24 | if not json_events or not json_users: 25 | raise ValueError("Missing either json-events or json-users when parsing beatmapset events.") 26 | 27 | event_jsons = json.loads(json_events.string) 28 | user_jsons = json.loads(json_users.string) 29 | 30 | for event_json in event_jsons: 31 | event = self.parse_event_json(event_json, user_jsons) 32 | if event: 33 | yield event 34 | 35 | def parse_event_json(self, event_json: object, user_jsons: object=None) -> Event: 36 | """Returns a BeatmapsetEvent reflecting the given event json object. 37 | Ignores any event with an incomplete context (e.g. deleted beatmaps). 38 | 39 | Requests user names from the api unless supplied with the json-users.""" 40 | if not event_json: 41 | # Seems to occur when the respective beatmapset has been deleted. 42 | log_err("WARNING | An event is missing; the beatmapset was probably deleted.") 43 | return None 44 | 45 | try: 46 | # Scrape object data 47 | _type = event_json["type"] 48 | time = timestamp.from_string(event_json["created_at"]) 49 | 50 | if "beatmapset" not in event_json or not event_json["beatmapset"]: 51 | raise DeletedContextError("No beatmapset was found in this event. It was likely deleted.") 52 | 53 | beatmapset_id = event_json["beatmapset"]["id"] 54 | discussion_id = event_json["discussion"]["id"] if "discussion" in event_json and event_json["discussion"] else None 55 | 56 | user_id = event_json["user_id"] if "user_id" in event_json else None 57 | user_json = self.__lookup_user_json(user_id, user_jsons) 58 | user_name = user_json["username"] if user_json else None 59 | 60 | content = None 61 | if _type in [types.LANGUAGE_EDIT, types.GENRE_EDIT]: 62 | # Language/genre edits always have "old" and "new" fields, which no other type has. 63 | old = event_json["comment"]["old"] 64 | new = event_json["comment"]["new"] 65 | content = f"{old} -> {new}" 66 | 67 | if _type in [types.UNLOVE]: 68 | # E.g. "Mapper has asked for it to be removed from Loved". 69 | content = event_json["comment"]["reason"] 70 | 71 | # Reconstruct objects 72 | beatmapset = Beatmapset(beatmapset_id) 73 | user = User(user_id, user_name) if user_id is not None else None 74 | discussion = Discussion(discussion_id, beatmapset) if discussion_id is not None else None 75 | except DeletedContextError as err: 76 | log_err(err) 77 | else: 78 | return Event( 79 | _type = _type, 80 | time = time, 81 | beatmapset = beatmapset, 82 | discussion = discussion, 83 | user = user, 84 | content = content 85 | ) 86 | 87 | return None 88 | 89 | def __lookup_user_json(self, user_id: str, user_jsons: object): 90 | if not user_jsons: 91 | return None 92 | 93 | for user_json in user_jsons: 94 | if user_json["id"] == user_id: 95 | return user_json 96 | 97 | return None 98 | 99 | # Only need one instance since it's always the same. 100 | beatmapset_event_parser = BeatmapsetEventParser() -------------------------------------------------------------------------------- /scraper/parsers/discussion_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from typing import Generator 5 | 6 | from aiess import Discussion, Beatmapset, User 7 | 8 | class DiscussionParser(): 9 | 10 | def parse(self, discussions_json: object, beatmapset: Beatmapset) -> Generator[Discussion, None, None]: 11 | """Returns a generator of discussions from the given beatmapset discussion page json, or None if no discussions exist.""" 12 | discussion_jsons = discussions_json["discussions"] 13 | for discussion_json in discussion_jsons: 14 | if not discussion_json: continue 15 | yield self.parse_discussion(discussion_json, discussions_json, beatmapset) 16 | 17 | def parse_discussion(self, discussion_json: object, beatmapset_json: object, beatmapset: Beatmapset) -> Discussion: 18 | """Returns a discussion from the given discussion json. The beatmapset json is also included for efficient username querying.""" 19 | _id = discussion_json["id"] 20 | user = self.parse_user(discussion_json["user_id"], beatmapset_json) 21 | content = discussion_json["posts"][0]["message"] if discussion_json["posts"] else None 22 | tab = self.parse_tab(discussion_json, beatmapset_json) 23 | difficulty = self.parse_diff(discussion_json, beatmapset_json) 24 | return Discussion(_id, beatmapset, user, content, tab, difficulty) 25 | 26 | def parse_user(self, user_id: str, beatmapset_json: object) -> User: 27 | """Returns a user with the given id and name supplied by the beatmapset json.""" 28 | for related_user in beatmapset_json["related_users"]: 29 | if related_user["id"] == user_id: 30 | return User(user_id, related_user["username"]) 31 | 32 | def parse_discussion_post_author(self, post_id: str, beatmapset_json: object) -> User: 33 | """Returns the author of the given discussion post id if one exists, otherwise None.""" 34 | for page_discussion in beatmapset_json["discussions"]: 35 | if not page_discussion: continue 36 | for page_discussion_post in page_discussion["posts"]: 37 | if not page_discussion_post: continue 38 | if page_discussion_post["id"] == post_id: 39 | return self.parse_user(page_discussion_post["user_id"], beatmapset_json) 40 | return None 41 | 42 | def parse_tab(self, discussion_json: str, beatmapset_json: object) -> User: 43 | """Returns the tab which the given discussion is posted on.""" 44 | has_timestamp = "timestamp" in discussion_json and discussion_json["timestamp"] is not None 45 | has_difficulty = "beatmap_id" in discussion_json and discussion_json["beatmap_id"] 46 | 47 | if has_timestamp: return "timeline" 48 | elif has_difficulty: return "general" 49 | else: return "generalAll" 50 | 51 | def parse_diff(self, discussion_json: str, beatmapset_json: object) -> User: 52 | """Returns the name of the difficulty which the given discussion is posted on, if any, otherwise None.""" 53 | # Key may be missing in case the value would be N/A. 54 | if "beatmap_id" not in discussion_json or not discussion_json["beatmap_id"]: 55 | return None 56 | 57 | beatmap_id = discussion_json["beatmap_id"] 58 | for beatmap_json in beatmapset_json["beatmaps"]: 59 | if beatmap_json["id"] == beatmap_id: 60 | return beatmap_json["version"] 61 | 62 | discussion_parser = DiscussionParser() -------------------------------------------------------------------------------- /scraper/parsers/event_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4.element import Tag 5 | from datetime import datetime 6 | import re as regex 7 | 8 | from aiess.errors import ParsingError, DeletedContextError 9 | from aiess import timestamp 10 | 11 | class EventParser(): 12 | 13 | def parse_event_type(self, event: Tag, event_class: str, class_prefix: str) -> str: 14 | """Returns the type of the given event (e.g. "suggestion", "problem", "hype", "nominate", "issue-resolve", "disqualify"). 15 | Only use this function if you know what you're doing, use child classes for maintainability (e.g. BeatmapsetEventParser).""" 16 | event_type = None 17 | event_tag = event and event.find(attrs={"class": event_class}) 18 | if event_tag: 19 | for class_name in event_tag.attrs["class"]: 20 | if class_name.startswith(class_prefix): 21 | # CSS class types use -, whereas json types use _, we should standardize this. 22 | event_type = class_name[len(class_prefix):].replace("-", "_") 23 | break 24 | 25 | # Failure to parse event type should be met with an exception, as this is abnormal and will likely cause further issues if silent. 26 | if not event_type: 27 | raise ParsingError("" 28 | f"The type of an event could not be parsed. Expected some class starting with \"{class_prefix}\":\r\n{event}") 29 | 30 | return event_type 31 | 32 | def parse_event_time(self, event: Tag) -> datetime: 33 | """Returns the datetime of the given event (e.g. "2019-10-12T02:19:27+00:00" into its datetime equivalent).""" 34 | event_time = None 35 | time = event and event.find(attrs={"class": "js-timeago"}) 36 | if time: 37 | event_time = time.attrs["datetime"] 38 | 39 | # Failure to parse event time should be met with an exception for the same reason as failing to parse event type. 40 | if event_time is None: 41 | raise ParsingError(f""" 42 | The time of an event could not be parsed. Expected some time object with class \"timeago\":\r\n{event}""") 43 | 44 | return timestamp.from_string(event_time) 45 | 46 | def parse_event_link(self, event: Tag) -> str: 47 | """Returns the beatmapset/discussion link from the thumbnail of a given event 48 | (e.g. "https://osu.ppy.sh/beatmapsets/818013/discussion#/1211219").""" 49 | thumb = event and event.find("a", {"href": regex.compile(r"^https:\/\/osu\.ppy\.sh\/beatmapsets\/")}) 50 | href = thumb and thumb.attrs["href"] 51 | 52 | if not href: 53 | self.raise_if_deleted(event) 54 | raise ParsingError(""" 55 | The thumbnail link of an event could not be found. Expected some tag with attribute href.""") 56 | 57 | return href 58 | 59 | def parse_event_author_id(self, event: Tag, href_class: str, must_find: bool=False) -> str: 60 | """Returns the user id associated with the given event, if applicable (e.g. the user nominating or starting a discussion, "1314547"). 61 | Only use this function if you know what you're doing, see child classes for abstracted versions (e.g. BeatmapsetEventParser).""" 62 | user_id = None 63 | user_a = event and event.find(attrs={"class": href_class}) 64 | if user_a: 65 | if user_a.has_attr("data-user-id"): 66 | user_id = str(user_a["data-user-id"]) 67 | 68 | if not user_id and user_a.has_attr("href"): 69 | user_id = self.parse_id_from_user_link(user_a["href"]) 70 | 71 | if must_find and not user_id: 72 | self.raise_if_deleted(event) 73 | raise ParsingError(f""" 74 | The user id of the author of an event could not be parsed. Expected some tag with class \"user-name\" 75 | and attr \"data-user-id\":\r\n{event}""") 76 | 77 | return user_id 78 | 79 | def parse_event_author_name(self, event: Tag, name_class: str, must_find: bool=False) -> str: 80 | """Returns the user name associated the given event, if applicable (e.g. the user nominating or starting a discussion, 5129592). 81 | Only use this function if you know what you're doing, see child classes for abstracted versions (e.g. BeatmapsetEventParser).""" 82 | user_name = None 83 | user_href = event and event.find(attrs={"class": name_class}) 84 | if user_href: 85 | user_name = user_href.getText() 86 | 87 | if must_find and not user_name: 88 | self.raise_if_deleted(event) 89 | raise ParsingError(f""" 90 | The user name of the author of an event could not be parsed. Expected some href object with class \"user-name\" 91 | and attr \"data-user-id\":\r\n{event}""") 92 | 93 | return user_name 94 | 95 | def parse_id_from_user_link(self, link: str) -> str: 96 | """Returns the user id from the given user link (assuming the link contains the id, otherwise None) 97 | (i.e. link following this format https://osu.ppy.sh/users/5129592).""" 98 | if not link: 99 | return None 100 | match = regex.search(r"https:\/\/osu\.ppy\.sh\/users\/(\d+)", link) 101 | if not match: 102 | return None 103 | return match.group(1) 104 | 105 | def parse_id_from_discussion_link(self, link: str) -> str: 106 | """Returns the discussion id from the given discussion link (assuming the link contains the id, otherwise None) 107 | (i.e. link following this format https://osu.ppy.sh/beatmapsets/1016042/discussion#/1294751).""" 108 | if not link: 109 | return None 110 | match = regex.search(r"https:\/\/osu\.ppy\.sh\/beatmapsets\/\d+\/discussion#\/(\d+)", link) 111 | if not match: 112 | return None 113 | return match.group(1) 114 | 115 | def parse_id_from_beatmapset_link(self, link: str) -> str: 116 | """Returns the beatmapset id from the given beatmapset link (assuming the link contains the id, otherwise None) 117 | (i.e. link following this format https://osu.ppy.sh/beatmapsets/1016042).""" 118 | if not link: 119 | return None 120 | match = regex.search(r"https:\/\/osu\.ppy\.sh\/beatmapsets\/(\d+)", link) 121 | if not match: 122 | return None 123 | return match.group(1) 124 | 125 | def is_beatmap_deleted(self, event: Tag) -> bool: 126 | """Returns whether the event includes some span containing "deleted", 127 | as any deleted beatmap will include "deleted
beatmap
".""" 128 | span = event and event.find("span") 129 | return span and "deleted" in span.text 130 | 131 | def raise_if_deleted(self, event: Tag) -> bool: 132 | if self.is_beatmap_deleted(event): 133 | raise DeletedContextError() -------------------------------------------------------------------------------- /scraper/parsers/group_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from typing import Generator, List 5 | from bs4 import BeautifulSoup 6 | from datetime import datetime 7 | import json 8 | from dataclasses import dataclass 9 | 10 | from aiess import Event, User, Usergroup 11 | from aiess import event_types as types 12 | from aiess.database import Database, SCRAPER_DB_NAME 13 | 14 | @dataclass 15 | class GroupUser: 16 | group_id: int 17 | user: User 18 | mode: str 19 | 20 | def parse(group_id: int, group_page: BeautifulSoup, last_checked_at: datetime) -> Generator[Event, None, None]: 21 | """Returns a generator of group addition and removal events from the given BeautifulSoup group page and its id.""" 22 | json_users = group_page.find("script", {"id": "json-users"}) 23 | if not json_users: 24 | raise ValueError("No group users json could be found.") 25 | 26 | users_json = json.loads(json_users.string) 27 | return parse_users_json(group_id, users_json, last_checked_at) 28 | 29 | def parse_users_json(group_id: int, users_json: object, last_checked_at: datetime) -> Generator[Event, None, None]: 30 | """Returns a generator of group addition and removal events from the given users json and group id.""" 31 | missing_group_users = retrieve_group_users(group_id) 32 | new_group_users: List[GroupUser] = [] 33 | for user_json in users_json: 34 | user_id = user_json["id"] 35 | modes = [None] 36 | 37 | for user_group_json in user_json["groups"]: 38 | has_modes = "playmodes" in user_group_json and user_group_json["playmodes"] is not None 39 | if user_group_json["id"] == group_id and has_modes: 40 | if len(user_group_json["playmodes"]): 41 | modes = user_group_json["playmodes"] 42 | else: 43 | # A user part of the group, but with no modes (e.g. non-captins in Project Loved / managers in the BSC). 44 | modes = [None] 45 | 46 | for mode in modes: 47 | user = User(_id=user_id, name=user_json["username"]) 48 | was_in_group_with_mode = False 49 | for group_user in missing_group_users: 50 | if group_user.user.id == user.id and group_user.mode == mode: 51 | was_in_group_with_mode = True 52 | missing_group_users.remove(group_user) 53 | 54 | if not was_in_group_with_mode: 55 | new_group_users.append(GroupUser(group_id, user, mode)) 56 | 57 | content = None 58 | time = last_checked_at 59 | 60 | for group_user in missing_group_users: 61 | yield Event( 62 | _type = types.REMOVE, 63 | time = time, 64 | user = retrieve_user_from_group(group_id=group_id, user_id=group_user.user.id, mode=group_user.mode), 65 | group = Usergroup(_id=group_id, mode=group_user.mode), 66 | content = content 67 | ) 68 | for group_user in new_group_users: 69 | yield Event( 70 | _type = types.ADD, 71 | time = time, 72 | user = group_user.user, 73 | group = Usergroup(_id=group_id, mode=group_user.mode), 74 | content = content 75 | ) 76 | 77 | def retrieve_group_users(group_id: int) -> List[GroupUser]: 78 | """Returns the last remembered user ids beloning to the given group id.""" 79 | group_user_relations = Database(SCRAPER_DB_NAME).retrieve_group_users("group_id=%s", (group_id,)) 80 | return [GroupUser(group.id, user, group.mode) for group, user in group_user_relations] 81 | 82 | def retrieve_user_from_group(group_id: int, user_id: int, mode: str=None) -> List[int]: 83 | """Returns the last remembered user beloning to the given group id with the given user id.""" 84 | if mode is not None: 85 | return Database(SCRAPER_DB_NAME).retrieve_group_user("group_id=%s AND user_id=%s AND mode=%s", (group_id, user_id, mode))[1] 86 | else: 87 | return Database(SCRAPER_DB_NAME).retrieve_group_user("group_id=%s AND user_id=%s", (group_id, user_id))[1] -------------------------------------------------------------------------------- /scraper/parsers/news_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from typing import Generator, Iterator 5 | from bs4 import BeautifulSoup 6 | import json 7 | 8 | from aiess.objects import Event, User, NewsPost 9 | from aiess.timestamp import from_string 10 | 11 | # deprecated since https://github.com/ppy/osu-web/pull/8070 12 | def parse(events: BeautifulSoup) -> Generator[Event, None, None]: 13 | """Returns a generator of news events from the given /news BeautifulSoup response, parsed top-down.""" 14 | json_index = events.find("script", {"id": "json-index"}) 15 | if not json_index: 16 | raise ValueError("No news json could be found.") 17 | 18 | post_jsons = json.loads(json_index.string)["news_posts"] 19 | return parse_post_jsons(post_jsons) 20 | 21 | def parse_json(news_posts_json: object) -> Generator[Event, None, None]: 22 | """Returns a generator of news events from the given /news BeautifulSoup response, parsed top-down.""" 23 | return parse_post_jsons(news_posts_json) 24 | 25 | def parse_post_jsons(post_jsons: Iterator[object]) -> Generator[Event, None, None]: 26 | """Returns a generator of news events representing the given news post json objects.""" 27 | for post_json in post_jsons: 28 | yield parse_post_json(post_json) 29 | 30 | def parse_post_json(post_json: object) -> Event: 31 | """Returns an event representing the given news post json object 32 | (a single news post instance, for multiple see `parse_post_jsons`).""" 33 | author = User(name=post_json["author"].strip()) 34 | return Event( 35 | _type = "news", 36 | time = from_string(post_json["published_at"]), 37 | newspost = NewsPost( 38 | _id = post_json["id"], 39 | title = post_json["title"], 40 | preview = post_json["preview"], 41 | author = author, 42 | slug = post_json["slug"], 43 | image_url = complete_image_url(post_json["first_image"]) 44 | ), 45 | user = author if author.id else None, 46 | content = post_json["preview"] 47 | ) 48 | 49 | def complete_image_url(url: str) -> str: 50 | """Returns the given url, with https://osu.ppy.sh/ prepended, if it does not contain 51 | that part already. Some image urls in newsposts include the domain, while others do not 52 | (e.g "/help/wiki/shared/news/banners/community-mentorship-program.jpg").""" 53 | if url.startswith("https://"): 54 | return url 55 | return f"https://osu.ppy.sh{url}" -------------------------------------------------------------------------------- /scraper/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/tests/__init__.py -------------------------------------------------------------------------------- /scraper/tests/last_datetime-test.txt: -------------------------------------------------------------------------------- 1 | 2019-12-11 14:46:48 -------------------------------------------------------------------------------- /scraper/tests/mocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/tests/mocks/__init__.py -------------------------------------------------------------------------------- /scraper/tests/mocks/discussion_diff_and_tabs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup, Tag 5 | 6 | from scraper.requester import soupify 7 | 8 | DIFFICULTY_HTML = """ 9 |
92 | """ 93 | 94 | TAB_HTML = """ 95 | 139 | """ 140 | 141 | HTML = f""" 142 | {DIFFICULTY_HTML} 143 | {TAB_HTML} 144 |
145 | """ 146 | 147 | soup: BeautifulSoup = soupify(HTML) 148 | tag: Tag = soup.find("div", {"class": "mock-event"}) -------------------------------------------------------------------------------- /scraper/tests/mocks/discussion_jsons/crawler_json.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | JSON = r""" 5 | { 6 | "beatmapset": { 7 | "artist": "artist", 8 | "creator": "someone", 9 | "id": 4, 10 | "title": "title", 11 | "user_id": 1, 12 | "discussions": [ 13 | { 14 | "id": 20, 15 | "user_id": 2, 16 | "message_type": "praise", 17 | "created_at": "2020-01-01T00:00:00+00:00", 18 | "posts": [ 19 | { 20 | "id": 400, 21 | "user_id": 2, 22 | "created_at": "2020-01-01T00:00:00+00:00", 23 | "message": "wow" 24 | }, 25 | { 26 | "id": 401, 27 | "user_id": 2, 28 | "created_at": "2020-01-01T00:00:00+00:00", 29 | "message": "please reply" 30 | }, 31 | { 32 | "id": 402, 33 | "user_id": 1, 34 | "created_at": "2020-01-01T00:30:00+00:00", 35 | "message": "yes?" 36 | }, 37 | { 38 | "id": 403, 39 | "user_id": 2, 40 | "created_at": "2020-01-01T00:31:00+00:00", 41 | "message": "say hi back" 42 | }, 43 | { 44 | "id": 404, 45 | "user_id": 1, 46 | "created_at": "2020-01-01T01:00:00+00:00", 47 | "message": "hi" 48 | }, 49 | { 50 | "id": 405, 51 | "user_id": 2, 52 | "created_at": "2020-01-01T01:04:00+00:00", 53 | "message": "thanks" 54 | } 55 | ] 56 | }, 57 | { 58 | "id": 21, 59 | "user_id": 2, 60 | "message_type": "hype", 61 | "created_at": "2020-01-01T02:30:00+00:00", 62 | "posts": [ 63 | { 64 | "id": 500, 65 | "user_id": 2, 66 | "created_at": "2020-01-01T02:30:00+00:00", 67 | "message": "hype" 68 | } 69 | ] 70 | }, 71 | { 72 | "id": 22, 73 | "user_id": 2, 74 | "message_type": "problem", 75 | "created_at": "2020-01-01T03:00:00+00:00", 76 | "posts": [ 77 | { 78 | "id": 600, 79 | "user_id": 2, 80 | "created_at": "2020-01-01T03:00:00+00:00", 81 | "message": "no wait" 82 | } 83 | ] 84 | } 85 | ], 86 | "events": [ 87 | { 88 | "type": "issue_resolve", 89 | "comment": { 90 | "beatmap_discussion_id": 20, 91 | "beatmap_discussion_post_id": 404 92 | }, 93 | "created_at": "2020-01-01T01:00:00+00:00", 94 | "user_id": 1 95 | }, 96 | { 97 | "type": "nominate", 98 | "created_at": "2020-01-01T02:30:30+00:00", 99 | "user_id": 2 100 | }, 101 | { 102 | "type": "nominate", 103 | "created_at": "2020-01-01T02:31:00+00:00", 104 | "user_id": 1 105 | }, 106 | { 107 | "type": "qualify", 108 | "comment": null, 109 | "created_at": "2020-01-01T02:31:00+00:00", 110 | "user_id": null 111 | }, 112 | { 113 | "type": "disqualify", 114 | "comment": { 115 | "beatmap_discussion_id": 22, 116 | "beatmap_discussion_post_id": 600 117 | }, 118 | "created_at": "2020-01-01T03:00:00+00:00", 119 | "user_id": 2 120 | } 121 | ], 122 | "related_users": [ 123 | { 124 | "id": 1, 125 | "username": "someone" 126 | }, 127 | { 128 | "id": 2, 129 | "username": "sometwo" 130 | } 131 | ] 132 | } 133 | } 134 | """ -------------------------------------------------------------------------------- /scraper/tests/mocks/events/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/tests/mocks/events/__init__.py -------------------------------------------------------------------------------- /scraper/tests/mocks/events/faulty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/tests/mocks/events/faulty/__init__.py -------------------------------------------------------------------------------- /scraper/tests/mocks/events/faulty/beatmapset_events.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup 5 | 6 | from scraper.requester import soupify 7 | 8 | HTML = """ 9 |
10 |
11 |
12 |
13 | 17 | 18 | 19 |
20 |
21 |
22 | Issue #1261263 marked as resolved. 23 |
24 |
25 | 28 |
29 |
30 |
31 |
32 |
33 |
34 | 38 | 39 | 40 |
41 |
42 |
43 | Nominated by _Stan. 44 |
45 |
46 |
47 |
48 |
49 |
50 | """ 51 | soup: BeautifulSoup = soupify(HTML) -------------------------------------------------------------------------------- /scraper/tests/mocks/events/faulty/kudosu_deleted_beatmap.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup, Tag 5 | 6 | from scraper.requester import soupify 7 | 8 | HTML = """ 9 |
10 |
11 | deleted
beatmap
12 |
13 |
14 |
15 | Discussion #1370674 by 16 | 17 | ekisu 18 | 19 | obtained enough votes for kudosu. 20 |
21 |
22 | 25 |
26 |
27 |
28 |
29 | """ 30 | soup: BeautifulSoup = soupify(HTML) 31 | tag: Tag = soup.find("div", {"class": "beatmapset-event"}) -------------------------------------------------------------------------------- /scraper/tests/mocks/events/faulty/no_events.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup, Tag 5 | 6 | from scraper.requester import soupify 7 | 8 | HTML = """ 9 |
10 |
11 | Something isn't right here... 12 |
13 |
14 | """ 15 | soup: BeautifulSoup = soupify(HTML) 16 | tag: Tag = soup.find("div", {"class": "beatmapset-event"}) -------------------------------------------------------------------------------- /scraper/tests/mocks/events/faulty/resolve_deleted_beatmap.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup, Tag 5 | 6 | from scraper.requester import soupify 7 | 8 | HTML = """ 9 |
10 |
11 | deleted
beatmap
12 |
13 |
14 |
15 | Issue #1336967 marked as resolved. 16 |
17 |
18 |
19 |
20 |
21 | """ 22 | soup: BeautifulSoup = soupify(HTML) 23 | tag: Tag = soup.find("div", {"class": "beatmapset-event"}) -------------------------------------------------------------------------------- /scraper/tests/mocks/events/issue_resolve.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup, Tag 5 | 6 | from scraper.requester import soupify 7 | 8 | HTML = """ 9 |
10 |
11 | 15 | 16 | 17 |
18 |
19 |
20 | Issue #1294675 marked as resolved. 21 |
22 |
23 |
24 |
25 |
26 | """ 27 | soup: BeautifulSoup = soupify(HTML) 28 | tag: Tag = soup.find("div", {"class": "beatmapset-event"}) -------------------------------------------------------------------------------- /scraper/tests/mocks/events/news.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from bs4 import BeautifulSoup, Tag 5 | 6 | from scraper.requester import soupify 7 | 8 | HTML = """ 9 | 89 | """ 90 | 91 | soup: BeautifulSoup = soupify(HTML) 92 | tag: Tag = soup.find("div", {"class": "news-card"}) -------------------------------------------------------------------------------- /scraper/tests/mocks/events_json_deleted_mapset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from scraper.requester import soupify 5 | 6 | EVENTS_JSON = r""" 7 | [ 8 | { 9 | "id": 2004213, 10 | "type": "kudosu_gain", 11 | "comment": { 12 | "beatmap_discussion_id": 1605631, 13 | "beatmap_discussion_post_id": null, 14 | "new_vote": { 15 | "user_id": 3552948, 16 | "score": 1 17 | }, 18 | "votes": [ 19 | { 20 | "user_id": 3552948, 21 | "score": 1 22 | } 23 | ] 24 | }, 25 | "created_at": "2020-05-24T00:47:48+00:00", 26 | "user_id": 2688103, 27 | "discussion": { 28 | "id": 1605631, 29 | "beatmapset_id": 534054, 30 | "beatmap_id": 1708984, 31 | "user_id": 2688103, 32 | "deleted_by_id": null, 33 | "message_type": "suggestion", 34 | "parent_id": null, 35 | "timestamp": 18592, 36 | "resolved": true, 37 | "can_be_resolved": true, 38 | "can_grant_kudosu": true, 39 | "created_at": "2020-05-23T22:39:36+00:00", 40 | "updated_at": "2020-05-24T00:47:52+00:00", 41 | "deleted_at": null, 42 | "last_post_at": "2020-05-24T00:47:52+00:00", 43 | "kudosu_denied": false, 44 | "starting_post": { 45 | "id": 4521936, 46 | "beatmap_discussion_id": 1605631, 47 | "user_id": 2688103, 48 | "last_editor_id": null, 49 | "deleted_by_id": null, 50 | "system": false, 51 | "message": "00:18:592 (4) - this rhythm is kind of questionable.. doesn't fit the song very obviously so i don't think it's too suitable for low diff", 52 | "created_at": "2020-05-23T22:39:36+00:00", 53 | "updated_at": "2020-05-23T22:39:36+00:00", 54 | "deleted_at": null 55 | } 56 | } 57 | } 58 | ] 59 | """ 60 | 61 | USER_JSON = r""" 62 | [ 63 | { 64 | "avatar_url": "https://a.ppy.sh/2688103?1588879943.png", 65 | "country_code": "US", 66 | "default_group": "default", 67 | "id": 2688103, 68 | "is_active": true, 69 | "is_bot": false, 70 | "is_online": false, 71 | "is_supporter": true, 72 | "last_visit": null, 73 | "pm_friends_only": false, 74 | "profile_colour": null, 75 | "username": "IOException" 76 | } 77 | ] 78 | """ 79 | 80 | HTML = f""" 81 | 84 | 87 | """ 88 | 89 | soup = soupify(HTML) -------------------------------------------------------------------------------- /scraper/tests/mocks/events_json_lang_genre.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from scraper.requester import soupify 5 | 6 | EVENTS_JSON = """ 7 | [ 8 | { 9 | "id": 2434750, 10 | "type": "language_edit", 11 | "comment": { 12 | "beatmap_discussion_id": null, 13 | "beatmap_discussion_post_id": null, 14 | "old": "Unspecified", 15 | "new": "Instrumental" 16 | }, 17 | "created_at": "2020-10-17T16:08:11+00:00", 18 | "user_id": 10660777, 19 | "beatmapset": { 20 | "artist": "ALEPH", 21 | "artist_unicode": "ALEPH", 22 | "covers": { 23 | "cover": "https://assets.ppy.sh/beatmaps/1280248/covers/cover.jpg?1602950856", 24 | "cover@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/cover@2x.jpg?1602950856", 25 | "card": "https://assets.ppy.sh/beatmaps/1280248/covers/card.jpg?1602950856", 26 | "card@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/card@2x.jpg?1602950856", 27 | "list": "https://assets.ppy.sh/beatmaps/1280248/covers/list.jpg?1602950856", 28 | "list@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/list@2x.jpg?1602950856", 29 | "slimcover": "https://assets.ppy.sh/beatmaps/1280248/covers/slimcover.jpg?1602950856", 30 | "slimcover@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/slimcover@2x.jpg?1602950856" 31 | }, 32 | "creator": "quantumvortex", 33 | "favourite_count": 0, 34 | "id": 1280248, 35 | "play_count": 0, 36 | "preview_url": "//b.ppy.sh/preview/1280248.mp3", 37 | "source": "osu!", 38 | "status": "pending", 39 | "title": "BREAKING AND ENTERING", 40 | "title_unicode": "BREAKING AND ENTERING", 41 | "user_id": 10660777, 42 | "video": false, 43 | "user": { 44 | "avatar_url": "https://a.ppy.sh/10660777?1589014084.jpeg", 45 | "country_code": "TH", 46 | "default_group": "default", 47 | "id": 10660777, 48 | "is_active": true, 49 | "is_bot": false, 50 | "is_online": true, 51 | "is_supporter": true, 52 | "last_visit": "2020-10-17T16:08:00+00:00", 53 | "pm_friends_only": false, 54 | "profile_colour": null, 55 | "username": "quantumvortex" 56 | } 57 | } 58 | }, 59 | { 60 | "id": 2434749, 61 | "type": "genre_edit", 62 | "comment": { 63 | "beatmap_discussion_id": null, 64 | "beatmap_discussion_post_id": null, 65 | "old": "Unspecified", 66 | "new": "Electronic" 67 | }, 68 | "created_at": "2020-10-17T16:08:11+00:00", 69 | "user_id": 10660777, 70 | "beatmapset": { 71 | "artist": "ALEPH", 72 | "artist_unicode": "ALEPH", 73 | "covers": { 74 | "cover": "https://assets.ppy.sh/beatmaps/1280248/covers/cover.jpg?1602950856", 75 | "cover@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/cover@2x.jpg?1602950856", 76 | "card": "https://assets.ppy.sh/beatmaps/1280248/covers/card.jpg?1602950856", 77 | "card@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/card@2x.jpg?1602950856", 78 | "list": "https://assets.ppy.sh/beatmaps/1280248/covers/list.jpg?1602950856", 79 | "list@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/list@2x.jpg?1602950856", 80 | "slimcover": "https://assets.ppy.sh/beatmaps/1280248/covers/slimcover.jpg?1602950856", 81 | "slimcover@2x": "https://assets.ppy.sh/beatmaps/1280248/covers/slimcover@2x.jpg?1602950856" 82 | }, 83 | "creator": "quantumvortex", 84 | "favourite_count": 0, 85 | "id": 1280248, 86 | "play_count": 0, 87 | "preview_url": "//b.ppy.sh/preview/1280248.mp3", 88 | "source": "osu!", 89 | "status": "pending", 90 | "title": "BREAKING AND ENTERING", 91 | "title_unicode": "BREAKING AND ENTERING", 92 | "user_id": 10660777, 93 | "video": false, 94 | "user": { 95 | "avatar_url": "https://a.ppy.sh/10660777?1589014084.jpeg", 96 | "country_code": "TH", 97 | "default_group": "default", 98 | "id": 10660777, 99 | "is_active": true, 100 | "is_bot": false, 101 | "is_online": true, 102 | "is_supporter": true, 103 | "last_visit": "2020-10-17T16:08:00+00:00", 104 | "pm_friends_only": false, 105 | "profile_colour": null, 106 | "username": "quantumvortex" 107 | } 108 | } 109 | } 110 | ] 111 | """ 112 | 113 | USER_JSON = """ 114 | [ 115 | { 116 | "avatar_url": "https://a.ppy.sh/10660777?1589014084.jpeg", 117 | "country_code": "TH", 118 | "default_group": "default", 119 | "id": 10660777, 120 | "is_active": true, 121 | "is_bot": false, 122 | "is_online": true, 123 | "is_supporter": true, 124 | "last_visit": "2020-10-17T16:08:00+00:00", 125 | "pm_friends_only": false, 126 | "profile_colour": null, 127 | "username": "quantumvortex", 128 | "groups": [] 129 | } 130 | ] 131 | """ 132 | 133 | HTML = f""" 134 | 137 | 140 | """ 141 | 142 | soup = soupify(HTML) -------------------------------------------------------------------------------- /scraper/tests/mocks/events_json_nominate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from scraper.requester import soupify 5 | 6 | EVENTS_JSON = """ 7 | [ 8 | { 9 | "id": 2357896, 10 | "type": "nominate", 11 | "comment": null, 12 | "created_at": "2020-09-17T21:06:21+00:00", 13 | "user_id": 33599, 14 | "beatmapset": { 15 | "artist": "Mia REGINA", 16 | "artist_unicode": "Mia REGINA", 17 | "covers": { 18 | "cover": "https://assets.ppy.sh/beatmaps/1164305/covers/cover.jpg?1600315549", 19 | "cover@2x": "https://assets.ppy.sh/beatmaps/1164305/covers/cover@2x.jpg?1600315549", 20 | "card": "https://assets.ppy.sh/beatmaps/1164305/covers/card.jpg?1600315549", 21 | "card@2x": "https://assets.ppy.sh/beatmaps/1164305/covers/card@2x.jpg?1600315549", 22 | "list": "https://assets.ppy.sh/beatmaps/1164305/covers/list.jpg?1600315549", 23 | "list@2x": "https://assets.ppy.sh/beatmaps/1164305/covers/list@2x.jpg?1600315549", 24 | "slimcover": "https://assets.ppy.sh/beatmaps/1164305/covers/slimcover.jpg?1600315549", 25 | "slimcover@2x": "https://assets.ppy.sh/beatmaps/1164305/covers/slimcover@2x.jpg?1600315549" 26 | }, 27 | "creator": "kunka", 28 | "favourite_count": 7, 29 | "id": 1164305, 30 | "play_count": 884, 31 | "preview_url": "//b.ppy.sh/preview/1164305.mp3", 32 | "source": "天晴爛漫!", 33 | "status": "qualified", 34 | "title": "I got it! (TV Size)", 35 | "title_unicode": "I got it! (TV Size)", 36 | "user_id": 1741295, 37 | "video": true, 38 | "user": { 39 | "avatar_url": "https://a.ppy.sh/1741295?1557856409.jpg", 40 | "country_code": "JP", 41 | "default_group": "default", 42 | "id": 1741295, 43 | "is_active": true, 44 | "is_bot": false, 45 | "is_online": false, 46 | "is_supporter": true, 47 | "last_visit": null, 48 | "pm_friends_only": false, 49 | "profile_colour": null, 50 | "username": "kunka" 51 | } 52 | } 53 | } 54 | ] 55 | """ 56 | 57 | USER_JSON = """ 58 | [ 59 | { 60 | "avatar_url": "https://a.ppy.sh/33599?1599317457.jpeg", 61 | "country_code": "IT", 62 | "default_group": "bng", 63 | "id": 33599, 64 | "is_active": true, 65 | "is_bot": false, 66 | "is_online": false, 67 | "is_supporter": true, 68 | "last_visit": null, 69 | "pm_friends_only": false, 70 | "profile_colour": "#6B3FA0", 71 | "username": "Andrea", 72 | "groups": [ 73 | { 74 | "id": 28, 75 | "identifier": "bng", 76 | "name": "Beatmap Nominators", 77 | "short_name": "BN", 78 | "description": "", 79 | "colour": "#A347EB" 80 | }, 81 | { 82 | "id": 16, 83 | "identifier": "alumni", 84 | "name": "osu! Alumni", 85 | "short_name": "ALM", 86 | "description": "", 87 | "colour": "#999999" 88 | } 89 | ] 90 | } 91 | ] 92 | """ 93 | 94 | HTML = f""" 95 | 98 | 101 | """ 102 | 103 | soup = soupify(HTML) -------------------------------------------------------------------------------- /scraper/tests/mocks/requester.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from datetime import datetime 5 | 6 | from aiess import Event, Beatmapset, User, Discussion 7 | from aiess.timestamp import from_string 8 | 9 | beatmapset = Beatmapset(4, "artist", "title", creator=User(1, "someone"), allow_api=False) 10 | discussion = Discussion(20, beatmapset=beatmapset, user=User(2, "sometwo"), content="hi") 11 | discussion_dq = Discussion(22, beatmapset=beatmapset, user=User(2, "sometwo"), content="no wait") 12 | 13 | # Note that all events are yielded from newest to oldest. 14 | 15 | def get_news_events(_from: datetime, limit: int=20): 16 | # The actual newspost doesn't matter, we're just making sure crawling the events works properly. 17 | if _from == from_string("2020-01-01 03:00:00"): 18 | yield Event("news", from_string("2020-01-01 03:00:00"), newspost=None, user=User(2, "sometwo")) 19 | yield Event("news", from_string("2020-01-01 02:30:00"), newspost=None, user=User(1, "someone")) 20 | yield Event("news", from_string("2020-01-01 02:00:00"), newspost=None, user=User(4, "somefour")) 21 | if _from == from_string("2020-01-01 02:00:00"): 22 | yield Event("news", from_string("2020-01-01 01:00:00"), newspost=None, user=User(3, "somethree")) 23 | yield Event("news", from_string("2020-01-01 00:00:00"), newspost=None, user=User(5, "somefive")) 24 | 25 | def get_discussion_events(page: int=1, limit: int=50): 26 | if page == 1: 27 | yield Event("problem", from_string("2020-01-01 03:00:00"), beatmapset, discussion_dq, user=User(2, "sometwo"), content="no wait") 28 | yield Event("hype", from_string("2020-01-01 02:30:00"), beatmapset, user=User(2, "sometwo"), content="hype") 29 | yield Event("praise", from_string("2020-01-01 02:00:00"), beatmapset, user=User(2, "sometwo"), content="amazing") 30 | if page == 2: 31 | yield Event("issue_resolve", from_string("2020-01-01 01:00:00"), beatmapset, discussion, user=User(1, "someone")) 32 | yield Event("praise", from_string("2020-01-01 00:00:00"), beatmapset, user=User(2, "sometwo"), content="wow") 33 | 34 | def get_reply_events(page: int=1, limit: int=50): 35 | if page == 1: 36 | yield Event("reply", from_string("2020-01-01 01:04:00"), beatmapset, user=User(2, "sometwo"), content="thanks") 37 | yield Event("reply", from_string("2020-01-01 01:00:00"), beatmapset, user=User(1, "someone"), content="hi") 38 | yield Event("reply", from_string("2020-01-01 00:31:00"), beatmapset, discussion, user=User(2, "sometwo"), content="say hi back") 39 | if page == 2: 40 | yield Event("reply", from_string("2020-01-01 00:30:00"), beatmapset, discussion, user=User(1, "someone"), content="yes?") 41 | yield Event("reply", from_string("2020-01-01 00:00:00"), beatmapset, discussion, user=User(2, "sometwo"), content="please reply") 42 | 43 | def get_beatmapset_events(page: int=1, limit: int=50): 44 | if page == 1: 45 | yield Event("disqualify", from_string("2020-01-01 03:00:00"), beatmapset, discussion_dq, user=User(2, "sometwo")) 46 | yield Event("qualify", from_string("2020-01-01 02:31:00"), beatmapset) 47 | yield Event("nominate", from_string("2020-01-01 02:31:00"), beatmapset, user=User(2, "sometwo")) 48 | if page == 2: 49 | yield Event("nominate", from_string("2020-01-01 02:30:30"), beatmapset, user=User(1, "someone")) 50 | 51 | def get_beatmapset_events_too_new(page: int=1, limit: int=50): 52 | if page == 1: 53 | # Feburary for this one 54 | yield Event("disqualify", from_string("2020-02-01 03:00:00"), beatmapset, discussion_dq, user=User(2, "sometwo")) 55 | if page == 2: 56 | # January for the rest 57 | yield Event("qualify", from_string("2020-01-01 02:31:00"), beatmapset) 58 | yield Event("nominate", from_string("2020-01-01 02:31:00"), beatmapset, user=User(2, "sometwo")) 59 | if page == 3: 60 | yield Event("nominate", from_string("2020-01-01 02:30:30"), beatmapset, user=User(1, "someone")) -------------------------------------------------------------------------------- /scraper/tests/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Naxesss/Aiess/f7485b3ecc75ca1369960c5652036c4a7865be21/scraper/tests/parsers/__init__.py -------------------------------------------------------------------------------- /scraper/tests/parsers/test_beatmapset_event_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | 6 | from aiess import timestamp 7 | 8 | from scraper.requester import soupify 9 | 10 | from scraper.tests.mocks.events.faulty import beatmapset_events as mock_beatmapset_events 11 | from scraper.tests.mocks import events_json as mock_events_json 12 | from scraper.tests.mocks import events_json_nominate as mock_events_nominate_json 13 | from scraper.tests.mocks import events_json_deleted_mapset as mock_events_json_deleted_mapset 14 | from scraper.tests.mocks import events_json_lang_genre as mock_events_lang_genre_json 15 | 16 | from scraper.parsers.beatmapset_event_parser import beatmapset_event_parser 17 | 18 | def test_parse_no_json(): 19 | generated_events = [] 20 | with pytest.raises(ValueError) as err: 21 | for event in beatmapset_event_parser.parse(soupify("")): 22 | generated_events.append(event) 23 | 24 | assert len(generated_events) == 0 25 | assert "Missing either json-events or json-users" in str(err) 26 | 27 | def test_parse_json(): 28 | generated_events = [] 29 | for event in beatmapset_event_parser.parse(mock_events_json.soup): 30 | generated_events.append(event) 31 | 32 | assert len(generated_events) == 5 33 | assert generated_events[0].type == "kudosu_gain" 34 | assert generated_events[0].beatmapset.id == 534054 35 | assert generated_events[0].beatmapset.creator.name == "SkyFlame" 36 | assert generated_events[4].type == "issue_resolve" 37 | 38 | def test_parse_json_deleted_beatmapset(): 39 | generated_events = [] 40 | for event in beatmapset_event_parser.parse(mock_events_json_deleted_mapset.soup): 41 | generated_events.append(event) 42 | 43 | assert not generated_events 44 | 45 | def test_parse_nominate_json(): 46 | generated_events = [] 47 | for event in beatmapset_event_parser.parse(mock_events_nominate_json.soup): 48 | generated_events.append(event) 49 | 50 | assert len(generated_events) == 1 51 | assert generated_events[0].type == "nominate" 52 | assert generated_events[0].user.id == 33599 53 | assert generated_events[0].beatmapset.id == 1164305 54 | assert generated_events[0].beatmapset.creator.name == "kunka" 55 | 56 | def test_parse_lang_genre_json(): 57 | generated_events = [] 58 | for event in beatmapset_event_parser.parse(mock_events_lang_genre_json.soup): 59 | generated_events.append(event) 60 | 61 | assert len(generated_events) == 2 62 | assert generated_events[0].type == "language_edit" 63 | assert generated_events[0].user.id == 10660777 64 | assert generated_events[0].content == "Unspecified -> Instrumental" 65 | assert generated_events[1].type == "genre_edit" 66 | assert generated_events[1].content == "Unspecified -> Electronic" -------------------------------------------------------------------------------- /scraper/tests/parsers/test_discussion_event_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | import json 6 | 7 | from aiess import timestamp 8 | 9 | from scraper.tests.mocks.events import problem, reply 10 | from scraper.tests.mocks.events.faulty import discussion_events 11 | from scraper.tests.mocks import discussion_diff_and_tabs 12 | from scraper.tests.mocks import discussion_events_json 13 | from scraper.parsers.discussion_event_parser import discussion_event_parser 14 | from scraper import populator 15 | 16 | def test_parse_discussion_message(): 17 | actual_content = discussion_event_parser.parse_discussion_message(problem.tag) 18 | expected_content = problem.CONTENT 19 | 20 | assert actual_content == expected_content 21 | 22 | def test_parse_reply_message(): 23 | actual_content = discussion_event_parser.parse_discussion_message(reply.tag) 24 | expected_content = reply.CONTENT 25 | 26 | assert actual_content == expected_content 27 | 28 | def test_parse_discussion_tab(): 29 | actual_content = discussion_event_parser.parse_discussion_tab(discussion_diff_and_tabs.tag) 30 | expected_content = "general" 31 | 32 | assert actual_content == expected_content 33 | 34 | def test_parse_discussion_diff(): 35 | actual_content = discussion_event_parser.parse_discussion_diff(discussion_diff_and_tabs.tag) 36 | expected_content = "Expert" 37 | 38 | assert actual_content == expected_content 39 | 40 | def test_parse_json(): 41 | generator = discussion_event_parser.parse(json.loads(discussion_events_json.DISCUSSIONS_JSON)) 42 | 43 | generated_events = [] 44 | for event in generator: 45 | generated_events.append(event) 46 | 47 | # There are 7 events, but one is of a beatmapset that has been deleted, and another is empty. 48 | assert len(generated_events) == 5 49 | assert generated_events[0].type == "suggestion" 50 | assert generated_events[0].time == timestamp.from_string("2020-03-07T20:42:58+00:00") 51 | assert generated_events[2].user.id == 2597417 52 | assert generated_events[2].user.name == "Jaltzu" 53 | assert generated_events[2].discussion.tab == "timeline" 54 | assert generated_events[2].discussion.difficulty == "Muzukashii" -------------------------------------------------------------------------------- /scraper/tests/parsers/test_discussion_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import pytest 5 | 6 | from aiess import Beatmapset, User 7 | from scraper.requester import request_discussions_json 8 | 9 | from scraper.parsers.discussion_parser import discussion_parser 10 | 11 | @pytest.fixture(scope="session") 12 | def discussions_json(): 13 | return request_discussions_json(beatmapset_id=1001546) 14 | 15 | @pytest.fixture 16 | def beatmapset(): 17 | return Beatmapset( 18 | _id = 1001546, 19 | artist = "Carpool Tunnel", 20 | title = "Afterlight", 21 | creator = User(_id=7342798, 22 | name = "_Epreus"), 23 | modes = ["osu"], 24 | genre = "g", 25 | language = "l" 26 | ) 27 | 28 | def test_parse(discussions_json, beatmapset): 29 | discussions = discussion_parser.parse(discussions_json=discussions_json, beatmapset=beatmapset) 30 | for discussion in discussions: 31 | assert discussion 32 | assert discussion.id 33 | assert discussion.beatmapset == beatmapset 34 | assert discussion.user 35 | assert discussion.content is not None 36 | assert discussion.tab is not None 37 | 38 | def test_parse_discussion(discussions_json, beatmapset): 39 | discussion_jsons = discussions_json["discussions"] 40 | for discussion_json in discussion_jsons: 41 | if not discussion_json: continue 42 | 43 | discussion = discussion_parser.parse_discussion( 44 | discussion_json = discussion_json, 45 | beatmapset_json = discussions_json, 46 | beatmapset = beatmapset 47 | ) 48 | assert discussion 49 | assert discussion.id 50 | assert discussion.beatmapset == beatmapset 51 | assert discussion.user 52 | assert discussion.content is not None 53 | assert discussion.tab is not None 54 | 55 | def test_parse_user(discussions_json): 56 | discussion_jsons = discussions_json["discussions"] 57 | for discussion_json in discussion_jsons: 58 | if not discussion_json: continue 59 | 60 | user = discussion_parser.parse_user(user_id=discussion_json["user_id"], beatmapset_json=discussions_json) 61 | assert user 62 | assert user.id 63 | assert user.name 64 | 65 | def test_parse_discussion_post_author(discussions_json): 66 | discussion_jsons = discussions_json["discussions"] 67 | for discussion_json in discussion_jsons: 68 | if not discussion_json: continue 69 | 70 | user = discussion_parser.parse_discussion_post_author(post_id=3016640, beatmapset_json=discussions_json) 71 | assert user 72 | assert user.id 73 | assert user.name 74 | 75 | def test_parse_tab(discussions_json): 76 | discussion_jsons = discussions_json["discussions"] 77 | tabs = set() 78 | for discussion_json in discussion_jsons: 79 | if not discussion_json: continue 80 | 81 | tab = discussion_parser.parse_tab(discussion_json=discussion_json, beatmapset_json=discussions_json) 82 | tabs.add(tab) 83 | assert tab 84 | 85 | assert "timeline" in tabs 86 | assert "general" in tabs 87 | assert "generalAll" in tabs 88 | 89 | def test_parse_diff(discussions_json): 90 | discussion_jsons = discussions_json["discussions"] 91 | difficulties = set() 92 | for discussion_json in discussion_jsons: 93 | if not discussion_json: continue 94 | 95 | difficulty = discussion_parser.parse_diff(discussion_json=discussion_json, beatmapset_json=discussions_json) 96 | difficulties.add(difficulty) 97 | 98 | for beatmap_json in discussions_json["beatmaps"]: 99 | assert beatmap_json["version"] in difficulties -------------------------------------------------------------------------------- /scraper/tests/parsers/test_group_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import mock 5 | from datetime import datetime 6 | 7 | from aiess import Event, User, Usergroup 8 | from aiess.timestamp import from_string 9 | from aiess.database import Database, SCRAPER_TEST_DB_NAME 10 | 11 | from scraper.tests.mocks import groups as mock_groups 12 | from scraper.parsers import group_parser 13 | 14 | def setup_function(): 15 | Database(SCRAPER_TEST_DB_NAME).clear_table_data("group_users") 16 | 17 | def test_parse_timing(): 18 | # Test both additions and removals. 19 | Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7), user=User(1, "one")) 20 | Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7), user=User(2, "two")) 21 | Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7), user=User(3, "three")) 22 | Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7), user=User(4, "four")) 23 | Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7), user=User(5, "five")) 24 | 25 | start_time = datetime.utcnow() 26 | 27 | events = [] 28 | with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME", SCRAPER_TEST_DB_NAME): 29 | for event in group_parser.parse(group_id=7, group_page=mock_groups.soup, last_checked_at=from_string("2020-07-22T21:00:00+00:00")): 30 | events.append(event) 31 | 32 | end_time = datetime.utcnow() 33 | # We should not be using the api to fill in user names and such, as this data is available within the users json. 34 | assert (end_time - start_time).total_seconds() < 3 35 | 36 | def test_parse_additions(): 37 | events = [] 38 | with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME", SCRAPER_TEST_DB_NAME): 39 | for event in group_parser.parse(group_id=7, group_page=mock_groups.soup, last_checked_at=from_string("2020-07-22T21:00:00+00:00")): 40 | events.append(event) 41 | 42 | assert len(events) == 27 43 | assert events[0] == Event( 44 | _type = "add", 45 | time = from_string("2020-07-22T21:00:00+00:00"), 46 | group = Usergroup(7, mode="mania"), 47 | user = User(_id=1653229) 48 | ) 49 | assert events[1] == Event( 50 | _type = "add", 51 | time = from_string("2020-07-22T21:00:00+00:00"), 52 | group = Usergroup(7, mode="osu"), 53 | user = User(_id=2202163) 54 | ) 55 | 56 | def test_parse_removals(): 57 | Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7, mode="taiko"), user=User(1, "someone")) 58 | 59 | events = [] 60 | with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME", SCRAPER_TEST_DB_NAME): 61 | for event in group_parser.parse(group_id=7, group_page=mock_groups.soup, last_checked_at=from_string("2020-07-22T21:00:00+00:00")): 62 | events.append(event) 63 | 64 | assert len(events) == 28 65 | assert events[0] == Event( 66 | _type = "remove", 67 | time = from_string("2020-07-22T21:00:00+00:00"), 68 | group = Usergroup(7, mode="taiko"), 69 | user = User(_id=1, name="someone") 70 | ) -------------------------------------------------------------------------------- /scraper/tests/parsers/test_news_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from aiess import Event, User, NewsPost 5 | from aiess.timestamp import from_string 6 | 7 | from scraper.tests.mocks.events import news as mock_news_events 8 | from scraper.parsers import news_parser 9 | 10 | def test_parse(): 11 | events = [] 12 | for event in news_parser.parse(mock_news_events.soup): 13 | events.append(event) 14 | 15 | assert len(events) == 6 16 | assert events[0] == Event( 17 | _type = "news", 18 | time = from_string("2020-07-22T21:00:00+00:00"), 19 | newspost = NewsPost( 20 | _id = 812, 21 | title = "Aspire V - Finals Stage Voting", 22 | preview = "You've chosen your favourite beatmaps from the Aspire V categories, now it's time to pick the best of the best!", 23 | author = User(_id=None, name="-Mo- & Ephemeral"), 24 | slug = "2020-07-21-aspire-v-finals-stage-voting", 25 | image_url = "https://assets.ppy.sh/contests/94/header.jpg" 26 | ), 27 | user = None, 28 | content = "You've chosen your favourite beatmaps from the Aspire V categories, now it's time to pick the best of the best!" 29 | ) 30 | assert events[1] == Event( 31 | _type = "news", 32 | time = from_string("2020-07-22T08:00:00+00:00"), 33 | newspost = NewsPost( 34 | _id = 811, 35 | title = "New Featured Artist: Receptor", 36 | preview = "We're excited to welcome Receptor aboard as our latest Featured Artist!", 37 | author = User(_id=102335, name="Ephemeral"), 38 | slug = "2020-07-22-new-featured-artist-receptor", 39 | image_url = "https://assets.ppy.sh/artists/91/header.jpg" 40 | ), 41 | user = User(_id=102335, name="Ephemeral"), 42 | content = "We're excited to welcome Receptor aboard as our latest Featured Artist!" 43 | ) 44 | 45 | def test_complete_image_url(): 46 | assert news_parser.complete_image_url("/hello") == "https://osu.ppy.sh/hello" 47 | 48 | def test_complete_image_url_complete(): 49 | assert news_parser.complete_image_url("https://assets.ppy.sh/hello") == "https://assets.ppy.sh/hello" -------------------------------------------------------------------------------- /scraper/tests/test_requester.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | import mock 5 | from datetime import datetime, timedelta 6 | 7 | from aiess.database import Database, SCRAPER_TEST_DB_NAME 8 | from aiess import event_types as types 9 | 10 | from scraper.requester import get_news_events 11 | from scraper.requester import get_group_events 12 | from scraper.requester import get_beatmapset_events 13 | from scraper.requester import get_discussion_events 14 | from scraper.requester import get_reply_events 15 | 16 | def test_get_group_events(): 17 | Database(SCRAPER_TEST_DB_NAME).clear_table_data("group_users") 18 | 19 | with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME", SCRAPER_TEST_DB_NAME): 20 | events = get_group_events(_from=datetime.utcnow()) 21 | 22 | event_n = 0 23 | for event in events: 24 | assert event.type == types.ADD 25 | assert event.user 26 | assert event.group 27 | event_n += 1 28 | 29 | assert event_n > 100 30 | 31 | def test_get_news_events(): 32 | events = get_news_events(_from=datetime.utcnow(), limit=20) 33 | 34 | event_n = 0 35 | for event in events: 36 | assert event.type == types.NEWS 37 | assert event.newspost 38 | assert event.newspost.title 39 | assert event.newspost.preview 40 | assert event.newspost.image_url 41 | assert event.newspost.author 42 | assert event.newspost.slug 43 | event_n += 1 44 | 45 | assert event_n == 20 46 | 47 | def test_get_beatmapset_events(): 48 | events = get_beatmapset_events(page=1, limit=50) 49 | 50 | event_n = 0 51 | for event in events: 52 | assert event.beatmapset 53 | event_n += 1 54 | 55 | assert event_n >= 45 # Leniency in case a beatmapset was deleted. 56 | 57 | def test_get_discussion_events(): 58 | events = get_discussion_events(page=1, limit=50) 59 | 60 | event_n = 0 61 | for event in events: 62 | assert event.beatmapset 63 | assert event.discussion 64 | event_n += 1 65 | 66 | assert event_n >= 45 # Leniency in case a discussion was deleted. 67 | 68 | def test_get_reply_events(): 69 | events = get_reply_events(page=1, limit=50) 70 | 71 | event_n = 0 72 | for event in events: 73 | assert event.type == types.REPLY 74 | assert event.user 75 | assert event.beatmapset 76 | assert event.discussion 77 | event_n += 1 78 | 79 | assert event_n >= 45 # Leniency in case a reply was deleted. -------------------------------------------------------------------------------- /settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "api-key": "${OSU_API_KEY}", 3 | "api-rate-limit": 1, 4 | "page-rate-limit": 60, 5 | "bnsite-rate-limit": 1, 6 | "discord-api-key": "${DISCORD_API_KEY}", 7 | "root-path": "${ROOT_PATH}", 8 | "db-config": 9 | { 10 | "host": "127.0.0.1", 11 | "port": "${DB_PORT}", 12 | "database": "aiess", 13 | "user": "root", 14 | "password": "${DB_PASSWORD}" 15 | }, 16 | "bnsite-mongodb-uri": "${BNSITE_MONGODB_URI}", 17 | "bnsite-headers": 18 | { 19 | "username": "${BNSITE_HEADER_USERNAME}", 20 | "secret": "${BNSITE_HEADER_SECRET}" 21 | }, 22 | "bnstats-headers": 23 | { 24 | "Authorization": "${BNSTATS_HEADER_VALUE}" 25 | }, 26 | "bnplanner-headers": 27 | { 28 | "Authorization": "${BNPLANNER_HEADER_VALUE}" 29 | }, 30 | "apiv2-client-id": "${APIV2_CLIENT_ID}", 31 | "apiv2-client-secret": "${APIV2_CLIENT_SECRET}" 32 | } --------------------------------------------------------------------------------