├── plugin-import-name-rpggeek_source.txt ├── .gitignore ├── CHANGELOG.md ├── requirements.txt ├── __init__.py ├── calibre-rpggeek.code-workspace ├── README.md ├── .pydocstyle ├── test.py └── rpggeek_source.py /plugin-import-name-rpggeek_source.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__ 3 | .vscode -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # RPGGeek Metadata Source Change Log 2 | 3 | ## [0.0.1] - 2023-x-x 4 | _Early development phase_ 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Project specific 2 | beautifulsoup4 3 | 4 | # Style and linting 5 | black 6 | mypy 7 | pydocstyle 8 | pylint -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | """Metadata source plugin that gets data from RPGGeek. 2 | 3 | Using this API: https://rpggeek.com/wiki/page/BGG_XML_API2 4 | """ 5 | 6 | from .rpggeek_source import * 7 | 8 | __all__ = ["rpggeek_source"] 9 | -------------------------------------------------------------------------------- /calibre-rpggeek.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": { 8 | "editor.formatOnSave": true, 9 | "python.formatting.provider": "black", 10 | "python.linting.pydocstyleEnabled": true, 11 | "python.linting.mypyEnabled": true, 12 | "python.linting.pylintEnabled": true, 13 | "python.defaultInterpreterPath": ".venv/Scripts/python" 14 | }, 15 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RPGGeek Metadata Source Plugin for Calibre 2 | 3 | ## Overview 4 | 5 | This is a plugin for [Calibre](https://calibre-ebook.com/) that searches for book metadata from [RPGGeek](https://www.rpggeek.com/). 6 | 7 | ## Develop 8 | 9 | Requirements: 10 | - Python 11 | - Calibre 12 | 13 | 14 | ``` 15 | git clone https://github.com/kovidgoyal/calibre.git 16 | 17 | git clone https://github.com/ErikLevin/calibre_rpggeek_plugin.git 18 | cd calibre_rpggeek_plugin 19 | python -m venv .venv 20 | .venv/Scripts/activate 21 | pip install -r requirements.txt 22 | ``` 23 | 24 | In .venv/Lib/site-packages, create a file Calibre.pth. 25 | 26 | In that file, enter {path to where you cloned Calibre}/src. 27 | 28 | ### Test 29 | 30 | ``` 31 | calibre-customize -b . 32 | calibre-debug -e test.py 33 | ``` -------------------------------------------------------------------------------- /.pydocstyle: -------------------------------------------------------------------------------- 1 | [pydocstyle] 2 | ignore = D100, D101, D102, D103, D203, D213, D406, D407, D413 3 | 4 | # D100 Missing docstring in public module. Covered by pylint. 5 | 6 | # D101 Missing docstring in public class. Covered by pylint. 7 | 8 | # D102 Missing docstring in public method. Covered by pylint. 9 | 10 | # D103 Missing docstring in public function. Covered by pylint. 11 | 12 | # D203 1 blank line required before class docstring. You need to choose between D203 and D211. 13 | 14 | # D213 Multi-line docstring summary should start at the second line. You need to choose one between D212 and D213. 15 | 16 | # D406 Section name should end with a newline. numpy style. You need to choose between numpy or Google style. 17 | 18 | # D407 Missing dashed underline after section. numpy style. You need to choose between numpy or Google style. 19 | 20 | # D413 Missing blank line after last section. Adds unnecessary whitespace, none of the conventions use this. 21 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """Tests the RPGGeek metadata source plugin.""" 2 | 3 | from queue import Queue 4 | from typing import Callable, Any 5 | from calibre.ebooks.metadata.sources.test import ( 6 | test_identify_plugin, 7 | title_test, 8 | authors_test, 9 | pubdate_test, 10 | series_test, 11 | # isbn_test, 12 | comments_test, 13 | ) 14 | from calibre.ebooks.metadata.book.base import Metadata 15 | from calibre.ebooks.metadata.sources.base import create_log 16 | 17 | # No variant of import (absolute/relative) makes this work both for pylint and when 18 | # running through calibre-debug. So just suppress import warning. 19 | from rpggeek_source import RPGGeekSource # pylint: disable=import-error 20 | 21 | # TODO It would make a lot more sense if the tests would rely on a test double that 22 | # returns saved data, instead of relying on RPGGeek's actual, live API and a working 23 | # internet connection. Searches, especially, can change results at any time... 24 | # But since Calibre's test framework seems to rely on actually installing the plugin 25 | # and running it, this seems nontrivial. 26 | 27 | 28 | # region "unit tests" 29 | 30 | 31 | # For some reason, I can't get it to work to run unittest via calibre-debug. 32 | # It refuses to discover any tests. So, making a few assertion functions of my own. 33 | 34 | 35 | def assert_eq(first: Any, second: Any) -> None: 36 | """Fail if the two parameters are not equal.""" 37 | if first != second: 38 | raise AssertionError(f"{first} was not equal to {second}!") 39 | 40 | 41 | def assert_true(expr: bool): 42 | """Fail if parameter is not True.""" 43 | if not expr: 44 | raise AssertionError("Expression was False!") 45 | 46 | 47 | def _test_get_book_url(): 48 | _test_get_book_url__normal_case() 49 | _test_get_book_url__no_id() 50 | 51 | 52 | def _test_get_book_url__normal_case(): 53 | assert_eq( 54 | ( 55 | "rpggeek", 56 | "648979", 57 | "https://rpggeek.com/rpgitemversion/648979", 58 | ), 59 | rpggeek_source.get_book_url({"rpggeek": "648979"}), 60 | ) 61 | 62 | 63 | def _test_get_book_url__no_id(): 64 | assert_eq(None, rpggeek_source.get_book_url({"isbn": "123456789"})) 65 | 66 | 67 | def _test_id_from_url(): 68 | _test_id_from_url__normal_url() 69 | _test_id_from_url__alt_domain() 70 | _test_id_from_url__invalid_url() 71 | 72 | 73 | def _test_id_from_url__normal_url(): 74 | assert_eq( 75 | ("rpggeek", "648979"), 76 | rpggeek_source.id_from_url( 77 | "https://rpggeek.com/rpgitemversion/648979/pdf-version" 78 | ), 79 | ) 80 | 81 | 82 | def _test_id_from_url__alt_domain(): 83 | assert_eq( 84 | ("rpggeek", "648979"), 85 | rpggeek_source.id_from_url( 86 | "https://boardgamegeek.com/rpgitemversion/648979/pdf-version" 87 | ), 88 | ) 89 | 90 | 91 | def _test_id_from_url__invalid_url(): 92 | assert_eq( 93 | None, 94 | rpggeek_source.id_from_url( 95 | "https://example.com/rpgitemversion/648979/pdf-version" 96 | ), 97 | ) 98 | assert_eq( 99 | None, 100 | rpggeek_source.id_from_url( 101 | "https://rpggeek.com/rpg/56388/pathfinder-roleplaying-game-2nd-edition" 102 | ), 103 | ) 104 | 105 | 106 | def _test_id_with_no_match(): 107 | result_queue = Queue() 108 | rpggeek_source.identify( 109 | result_queue=result_queue, 110 | identifiers={"rpggeek": "0"}, 111 | log=create_log(), 112 | abort=False, 113 | ) 114 | assert_true(result_queue.empty()) 115 | 116 | 117 | def _test_id_is_not_rpgitem(): 118 | result_queue = Queue() 119 | rpggeek_source.identify( 120 | result_queue=result_queue, 121 | identifiers={"rpggeek": "13"}, # this 'thing' in the API is a board game 122 | log=create_log(), 123 | abort=False, 124 | ) 125 | assert_true(result_queue.empty()) 126 | 127 | 128 | def _test_no_search_hits(): 129 | result_queue = Queue() 130 | rpggeek_source.identify( 131 | # I asked ChatGPT for "the name of an RPG book that absolutely does 132 | # not exist". It delivered. 133 | title="The Tome of Unending Misfortunes: A Guide to Living a Life of Constant " 134 | "Suffering in Your RPG Adventures", 135 | result_queue=result_queue, 136 | log=create_log(), 137 | abort=False, 138 | ) 139 | assert_true(result_queue.empty()) 140 | 141 | 142 | # endregion 143 | 144 | 145 | def identifier_test(id_type: str, id_val: str) -> Callable[[Metadata], bool]: 146 | """Return function that tests if metadata contains specific identifier.""" 147 | 148 | def test(metadata: Metadata) -> bool: 149 | metadata_id = metadata.get_identifiers()[id_type] 150 | if metadata_id and metadata_id == id_val: 151 | return True 152 | print(f"Identifier test failed. Expected: {id_val}' found {metadata_id}") 153 | return False 154 | 155 | return test 156 | 157 | 158 | def publisher_test(publisher: str) -> Callable[[Metadata], bool]: 159 | """Return function that tests if metadata has correct publisher set.""" 160 | 161 | def test(metadata: Metadata) -> bool: 162 | if metadata.publisher and metadata.publisher == publisher: 163 | return True 164 | print( 165 | f"Publisher test failed. Expected: {publisher}' found {metadata.publisher}" 166 | ) 167 | return False 168 | 169 | return test 170 | 171 | 172 | def pubdate_none_test() -> Callable[[Metadata], bool]: 173 | """Return function that tests if metadata has None pubdate.""" 174 | 175 | def test(metadata: Metadata) -> bool: 176 | return metadata.pubdate is None 177 | 178 | return test 179 | 180 | 181 | if __name__ == "__main__": 182 | # To run these test use: 183 | # calibre-debug -e test.py 184 | 185 | rpggeek_source = RPGGeekSource(None) 186 | _test_get_book_url() 187 | _test_id_from_url() 188 | _test_id_with_no_match() 189 | _test_id_is_not_rpgitem() 190 | _test_no_search_hits() 191 | 192 | test_identify_plugin( 193 | RPGGeekSource.name, 194 | [ 195 | ( # RPGGeek ID -> item with title, authors, pubdate, publisher, identifier 196 | { 197 | "identifiers": {"rpggeek": "363105"}, 198 | }, 199 | [ 200 | title_test("A Fistful of Flowers", exact=True), 201 | authors_test(["Eleanor Ferron", "Linda Zayas-Palmer"]), 202 | pubdate_test(2022, 1, 1), 203 | publisher_test("Devir"), 204 | identifier_test("rpggeek", "363105"), 205 | comments_test("furious flowers and pugilistic plants"), 206 | ], 207 | ), 208 | ( # RPGGeek ID -> item with non-ASCII title 209 | { 210 | "identifiers": {"rpggeek": "293597"}, 211 | }, 212 | [ 213 | title_test("永い後日談のネクロニカ", exact=True), 214 | ], 215 | ), 216 | ( # RPGGeek ID -> item with uncredited designer 217 | { 218 | "identifiers": {"rpggeek": "329480"}, 219 | }, 220 | [ 221 | title_test("Abomination Vaults Pawn Collection"), 222 | authors_test(["(Uncredited)"]), 223 | ], 224 | ), 225 | ( # RPGGeek ID -> item without published year 226 | { 227 | "identifiers": {"rpggeek": "61154"}, 228 | }, 229 | [ 230 | title_test("Creatures of the Nightcycle"), 231 | pubdate_none_test(), 232 | ], 233 | ), 234 | ( # RGPGeek ID -> item with series and index 235 | { 236 | "identifiers": {"rpggeek": "346266"}, 237 | }, 238 | [ 239 | title_test("Spoken on the Song Wind", exact=True), 240 | series_test("Pathfinder Adventure Path", 170), 241 | ], 242 | ), 243 | ( # Title -> one matching search result 244 | {"title": "A Fistful of Flowers"}, 245 | [ 246 | title_test("A Fistful of Flowers", exact=True), 247 | authors_test(["Eleanor Ferron", "Linda Zayas-Palmer"]), 248 | pubdate_test(2022, 1, 1), 249 | publisher_test("Devir"), 250 | identifier_test("rpggeek", "363105"), 251 | comments_test("furious flowers and pugilistic plants"), 252 | ], 253 | ), 254 | ( # Title -> many search results 255 | {"title": "GameMastery Guide"}, 256 | [ 257 | # Check that a non-first result is returned 258 | title_test("GameMastery Guide", exact=True), 259 | identifier_test("rpggeek", "303391"), 260 | ], 261 | ), 262 | ], 263 | fail_missing_meta=False, 264 | ) 265 | 266 | print("SUCCESS!") 267 | -------------------------------------------------------------------------------- /rpggeek_source.py: -------------------------------------------------------------------------------- 1 | """The module containing the plugin class.""" 2 | 3 | import re 4 | from datetime import datetime 5 | from functools import total_ordering 6 | from queue import Queue 7 | from urllib.parse import ParseResult, urlparse 8 | 9 | from bs4 import BeautifulSoup 10 | from calibre.ebooks.metadata.book.base import Metadata 11 | from calibre.ebooks.metadata.sources.base import Source 12 | 13 | _ID_TYPE = "rpggeek" 14 | _API_THING_URL = "https://rpggeek.com/xmlapi2/thing?id=" 15 | _API_SEARCH_URL = "https://rpggeek.com/xmlapi2/search?type=rpgitem&query=" 16 | 17 | # TODO Was this URL correct? Check again... 18 | # _WEB_SEARCH_URL = ( 19 | # "https://rpggeek.com/geeksearch.php?action=search&objecttype=rpgitem&" 20 | # 'searchinfilters=[{"filtertype":"geekitemtype","filtertext":"RPG Item"}' 21 | # ',{"filtertype":"itemtype","filtertext":"RPG Item"}]&B1=Search&q=' 22 | # ) 23 | 24 | 25 | def _get_pub_date(soup: BeautifulSoup) -> datetime | None: 26 | tag = soup.find("yearpublished") 27 | if not tag: 28 | return None 29 | year = int(tag["value"]) 30 | if year <= 0: 31 | return None 32 | return datetime(year, 1, 1) 33 | 34 | 35 | def _get_publisher(soup: BeautifulSoup) -> str | None: 36 | # There can be many publishers. Just using first one for now. Need to scrape 37 | # for versions to narrow down actual publisher. 38 | tag = soup.find("link", attrs={"type": "rpgpublisher"}) 39 | if tag: 40 | return tag["value"] 41 | return None 42 | 43 | 44 | def _get_series(soup: BeautifulSoup) -> tuple[str, int]: 45 | series = "" 46 | index = 0 47 | # Same here, take first series for now... 48 | series_tag = soup.find("link", attrs={"type": "rpgseries"}) 49 | if series_tag: 50 | series = series_tag["value"] 51 | # Guess that the last number in seriescode is the series index 52 | # Any way to do better? Probably not? 53 | series_code_tag = soup.find("seriescode") 54 | if series_code_tag: 55 | series_code = series_code_tag["value"] 56 | match = re.search(r"(\d+)\D*$", series_code) 57 | index = int(match.group(1)) if match else 0 58 | return series, index 59 | 60 | 61 | def _get_comments(soup: BeautifulSoup) -> str | None: 62 | tag = soup.find("description") 63 | if not tag or not tag.contents: 64 | return None 65 | return tag.contents[0] 66 | 67 | 68 | class RPGGeekSource(Source): 69 | """The plugin class.""" 70 | 71 | name = "RPGGeek" 72 | description = "Retrieves metadata from RPGGeek" 73 | version = (0, 0, 1) 74 | author = "Erik Levin" 75 | supported_platforms = ["windows", "osx", "linux"] 76 | capabilities = frozenset(["identify"]) 77 | touched_fields = frozenset( 78 | [ 79 | "identifier:rpggeek", 80 | "title", 81 | "authors", 82 | "comments", 83 | "pubdate", 84 | "publisher", 85 | "series", 86 | ] 87 | ) 88 | 89 | # TODO Future work - settings 90 | # - What to use as authors. First designer, all designers, fallback to artists, 91 | # fallback to producers, fallback to publishers. 92 | # - Setting various things as designated tags. rpg, rpgsetting, rpggenre 93 | # - Use original published date or specific version published date 94 | def is_customizable(self): 95 | """Return whether this plugin has config.""" 96 | return False 97 | 98 | # TODO Future work - cover art 99 | # - get_cached_cover_url 100 | # - download_cover 101 | 102 | # TODO Future work - ISBN, product code, and language. Need to get them from 103 | # rpgitemversion, which aren't in the API, so, need to search and scrape. 104 | 105 | # TODO When I have rpgitemversion, get actual publisher from there instead. 106 | 107 | def get_book_url(self, identifiers): 108 | """Return ("rpggeek", RPGGeek item ID, RPG item URL), based on RPGGeek item ID. 109 | 110 | Returns None if there is no RPGGeek ID. 111 | See parent class for more information. 112 | """ 113 | rpggeek_id = identifiers.get(_ID_TYPE, None) 114 | if rpggeek_id: 115 | return ( 116 | _ID_TYPE, 117 | rpggeek_id, 118 | "https://rpggeek.com/rpgitemversion/" + rpggeek_id, 119 | ) 120 | return None 121 | 122 | def id_from_url(self, url) -> tuple[str, str] | None: 123 | """Return ("rpggeek", RPG item ID) from RPG item URL, or None if that fails. 124 | 125 | See parent class for more information. 126 | """ 127 | parsed_url: ParseResult = urlparse(url) 128 | # xGeek's three domains are interchangeable 129 | if parsed_url.netloc not in ( 130 | "rpggeek.com", 131 | "boardgamegeek.com", 132 | "videogamegeek.com", 133 | ): 134 | return None 135 | 136 | path_parts: list[str] = parsed_url.path.strip("/").split("/") 137 | if len(path_parts) < 2 or path_parts[0] != "rpgitemversion": 138 | return None 139 | return (_ID_TYPE, path_parts[1]) 140 | 141 | def identify_results_keygen(self, title=None, authors=None, identifiers=None): 142 | """Sort results based on RPGGeek's search result sorting instead.""" 143 | if not identifiers: 144 | identifiers = {} 145 | if not authors: 146 | authors = [] 147 | 148 | @total_ordering 149 | class _KeyGen: 150 | def __init__(self, metadata): 151 | self.relevance = metadata.source_relevance 152 | 153 | def __eq__(self, other): 154 | return self.relevance == other.relevance 155 | 156 | def __ne__(self, other): 157 | return self.relevance != other.relevance 158 | 159 | def __lt__(self, other): 160 | return self.relevance < other.relevance 161 | 162 | def __le__(self, other): 163 | return self.relevance <= other.relevance 164 | 165 | def __gt__(self, other): 166 | return self.relevance > other.relevance 167 | 168 | def __ge__(self, other): 169 | return self.relevance >= other.relevance 170 | 171 | def keygen(metadata): 172 | return _KeyGen(metadata) 173 | 174 | return keygen 175 | 176 | def _get_metadata_from_thing_api( 177 | self, rpggeek_id: str, result_queue: Queue, relevance: int, log 178 | ) -> None: 179 | response = self.browser.open_novisit(_API_THING_URL + rpggeek_id) 180 | soup = BeautifulSoup(response, features="lxml") 181 | log.debug(soup.prettify()) 182 | if not soup.find("item", attrs={"type": "rpgitem"}): 183 | return 184 | title = soup.find("name", attrs={"type": "primary"})["value"] 185 | authors = [ 186 | x["value"] for x in soup.find_all("link", attrs={"type": "rpgdesigner"}) 187 | ] 188 | pub_date = _get_pub_date(soup) 189 | publisher = _get_publisher(soup) 190 | series, index = _get_series(soup) 191 | comments = _get_comments(soup) 192 | 193 | metadata = Metadata(title, authors) 194 | metadata.set_identifier(_ID_TYPE, rpggeek_id) 195 | metadata.pubdate = pub_date 196 | metadata.publisher = publisher 197 | metadata.series = series 198 | metadata.comments = comments 199 | metadata.series_index = index 200 | metadata.source = self.name 201 | metadata.source_relevance = relevance 202 | self.clean_downloaded_metadata(metadata) 203 | result_queue.put(metadata) 204 | 205 | def _search_title(self, title: str, result_queue: Queue, log) -> None: 206 | """Search for item based on title, using API.""" 207 | title_tokens = self.get_title_tokens(title) 208 | query = "+".join(title_tokens) 209 | query = _API_SEARCH_URL + query 210 | log.debug(query) 211 | response = self.browser.open_novisit(query) 212 | 213 | soup = BeautifulSoup(response, features="lxml") 214 | log.debug(soup.prettify()) 215 | 216 | items = soup.find_all("item", attrs={"type": "rpgitem"}) 217 | # TODO You can get multiple things from one query 218 | # Query for all search results with one API call instead of the below. 219 | for i, item in enumerate(items): 220 | self._get_metadata_from_thing_api( 221 | item["id"], result_queue, relevance=i, log=log 222 | ) 223 | 224 | def identify( 225 | self, 226 | log, 227 | result_queue, 228 | abort, 229 | title=None, 230 | authors=None, 231 | identifiers=None, 232 | timeout=30, 233 | ): # pylint: disable=too-many-arguments 234 | """See parent class.""" 235 | if not identifiers: 236 | identifiers = {} 237 | if not authors: 238 | authors = [] 239 | 240 | # TODO Respect abort and timeout... 241 | 242 | rpggeek_id = identifiers.get(_ID_TYPE, None) 243 | if rpggeek_id: 244 | self._get_metadata_from_thing_api( 245 | rpggeek_id, result_queue, relevance=0, log=log 246 | ) 247 | else: 248 | self._search_title(title, result_queue, log) 249 | --------------------------------------------------------------------------------