├── plugin-import-name-rpggeek_source.txt
├── .gitignore
├── CHANGELOG.md
├── requirements.txt
├── __init__.py
├── calibre-rpggeek.code-workspace
├── README.md
├── .pydocstyle
├── test.py
└── rpggeek_source.py


/plugin-import-name-rpggeek_source.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | __pycache__
3 | .vscode


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # RPGGeek Metadata Source Change Log
2 | 
3 | ## [0.0.1] - 2023-x-x
4 | _Early development phase_
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Project specific
2 | beautifulsoup4
3 | 
4 | # Style and linting
5 | black
6 | mypy
7 | pydocstyle
8 | pylint


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | """Metadata source plugin that gets data from RPGGeek.
2 | 
3 | Using this API: https://rpggeek.com/wiki/page/BGG_XML_API2
4 | """
5 | 
6 | from .rpggeek_source import *
7 | 
8 | __all__ = ["rpggeek_source"]
9 | 


--------------------------------------------------------------------------------
/calibre-rpggeek.code-workspace:
--------------------------------------------------------------------------------
 1 | {
 2 |   "folders": [
 3 |     {
 4 |       "path": "."
 5 |     }
 6 |   ],
 7 |   "settings": {
 8 |     "editor.formatOnSave": true,
 9 |     "python.formatting.provider": "black",
10 |     "python.linting.pydocstyleEnabled": true,
11 |     "python.linting.mypyEnabled": true,
12 |     "python.linting.pylintEnabled": true,
13 |     "python.defaultInterpreterPath": ".venv/Scripts/python"
14 |   },
15 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RPGGeek Metadata Source Plugin for Calibre
 2 | 
 3 | ## Overview
 4 | 
 5 | This is a plugin for [Calibre](https://calibre-ebook.com/) that searches for book metadata from [RPGGeek](https://www.rpggeek.com/).
 6 | 
 7 | ## Develop
 8 | 
 9 | Requirements:
10 | - Python
11 | - Calibre
12 | 
13 | 
14 | ```
15 | git clone https://github.com/kovidgoyal/calibre.git
16 | 
17 | git clone https://github.com/ErikLevin/calibre_rpggeek_plugin.git
18 | cd calibre_rpggeek_plugin
19 | python -m venv .venv
20 | .venv/Scripts/activate
21 | pip install -r requirements.txt
22 | ```
23 | 
24 | In .venv/Lib/site-packages, create a file Calibre.pth.
25 | 
26 | In that file, enter {path to where you cloned Calibre}/src.
27 | 
28 | ### Test
29 | 
30 | ```
31 | calibre-customize -b .
32 | calibre-debug -e test.py
33 | ```


--------------------------------------------------------------------------------
/.pydocstyle:
--------------------------------------------------------------------------------
 1 | [pydocstyle]
 2 | ignore = D100, D101, D102, D103, D203, D213, D406, D407, D413
 3 | 
 4 | # D100 Missing docstring in public module. Covered by pylint.
 5 | 
 6 | # D101 Missing docstring in public class. Covered by pylint.
 7 | 
 8 | # D102 Missing docstring in public method. Covered by pylint.
 9 | 
10 | # D103 Missing docstring in public function. Covered by pylint.
11 | 
12 | # D203 1 blank line required before class docstring. You need to choose between D203 and D211.
13 | 
14 | # D213 Multi-line docstring summary should start at the second line. You need to choose one between D212 and D213.
15 | 
16 | # D406 Section name should end with a newline. numpy style. You need to choose between numpy or Google style.
17 | 
18 | # D407 Missing dashed underline after section. numpy style. You need to choose between numpy or Google style.
19 | 
20 | # D413 Missing blank line after last section. Adds unnecessary whitespace, none of the conventions use this.
21 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | """Tests the RPGGeek metadata source plugin."""
  2 | 
  3 | from queue import Queue
  4 | from typing import Callable, Any
  5 | from calibre.ebooks.metadata.sources.test import (
  6 |     test_identify_plugin,
  7 |     title_test,
  8 |     authors_test,
  9 |     pubdate_test,
 10 |     series_test,
 11 |     # isbn_test,
 12 |     comments_test,
 13 | )
 14 | from calibre.ebooks.metadata.book.base import Metadata
 15 | from calibre.ebooks.metadata.sources.base import create_log
 16 | 
 17 | # No variant of import (absolute/relative) makes this work both for pylint and when
 18 | # running through calibre-debug. So just suppress import warning.
 19 | from rpggeek_source import RPGGeekSource  # pylint: disable=import-error
 20 | 
 21 | # TODO It would make a lot more sense if the tests would rely on a test double that
 22 | # returns saved data, instead of relying on RPGGeek's actual, live API and a working
 23 | # internet connection. Searches, especially, can change results at any time...
 24 | # But since Calibre's test framework seems to rely on actually installing the plugin
 25 | # and running it, this seems nontrivial.
 26 | 
 27 | 
 28 | # region "unit tests"
 29 | 
 30 | 
 31 | # For some reason, I can't get it to work to run unittest via calibre-debug.
 32 | # It refuses to discover any tests. So, making a few assertion functions of my own.
 33 | 
 34 | 
 35 | def assert_eq(first: Any, second: Any) -> None:
 36 |     """Fail if the two parameters are not equal."""
 37 |     if first != second:
 38 |         raise AssertionError(f"{first} was not equal to {second}!")
 39 | 
 40 | 
 41 | def assert_true(expr: bool):
 42 |     """Fail if parameter is not True."""
 43 |     if not expr:
 44 |         raise AssertionError("Expression was False!")
 45 | 
 46 | 
 47 | def _test_get_book_url():
 48 |     _test_get_book_url__normal_case()
 49 |     _test_get_book_url__no_id()
 50 | 
 51 | 
 52 | def _test_get_book_url__normal_case():
 53 |     assert_eq(
 54 |         (
 55 |             "rpggeek",
 56 |             "648979",
 57 |             "https://rpggeek.com/rpgitemversion/648979",
 58 |         ),
 59 |         rpggeek_source.get_book_url({"rpggeek": "648979"}),
 60 |     )
 61 | 
 62 | 
 63 | def _test_get_book_url__no_id():
 64 |     assert_eq(None, rpggeek_source.get_book_url({"isbn": "123456789"}))
 65 | 
 66 | 
 67 | def _test_id_from_url():
 68 |     _test_id_from_url__normal_url()
 69 |     _test_id_from_url__alt_domain()
 70 |     _test_id_from_url__invalid_url()
 71 | 
 72 | 
 73 | def _test_id_from_url__normal_url():
 74 |     assert_eq(
 75 |         ("rpggeek", "648979"),
 76 |         rpggeek_source.id_from_url(
 77 |             "https://rpggeek.com/rpgitemversion/648979/pdf-version"
 78 |         ),
 79 |     )
 80 | 
 81 | 
 82 | def _test_id_from_url__alt_domain():
 83 |     assert_eq(
 84 |         ("rpggeek", "648979"),
 85 |         rpggeek_source.id_from_url(
 86 |             "https://boardgamegeek.com/rpgitemversion/648979/pdf-version"
 87 |         ),
 88 |     )
 89 | 
 90 | 
 91 | def _test_id_from_url__invalid_url():
 92 |     assert_eq(
 93 |         None,
 94 |         rpggeek_source.id_from_url(
 95 |             "https://example.com/rpgitemversion/648979/pdf-version"
 96 |         ),
 97 |     )
 98 |     assert_eq(
 99 |         None,
100 |         rpggeek_source.id_from_url(
101 |             "https://rpggeek.com/rpg/56388/pathfinder-roleplaying-game-2nd-edition"
102 |         ),
103 |     )
104 | 
105 | 
106 | def _test_id_with_no_match():
107 |     result_queue = Queue()
108 |     rpggeek_source.identify(
109 |         result_queue=result_queue,
110 |         identifiers={"rpggeek": "0"},
111 |         log=create_log(),
112 |         abort=False,
113 |     )
114 |     assert_true(result_queue.empty())
115 | 
116 | 
117 | def _test_id_is_not_rpgitem():
118 |     result_queue = Queue()
119 |     rpggeek_source.identify(
120 |         result_queue=result_queue,
121 |         identifiers={"rpggeek": "13"},  # this 'thing' in the API is a board game
122 |         log=create_log(),
123 |         abort=False,
124 |     )
125 |     assert_true(result_queue.empty())
126 | 
127 | 
128 | def _test_no_search_hits():
129 |     result_queue = Queue()
130 |     rpggeek_source.identify(
131 |         # I asked ChatGPT for "the name of an RPG book that absolutely does
132 |         # not exist". It delivered.
133 |         title="The Tome of Unending Misfortunes: A Guide to Living a Life of Constant "
134 |         "Suffering in Your RPG Adventures",
135 |         result_queue=result_queue,
136 |         log=create_log(),
137 |         abort=False,
138 |     )
139 |     assert_true(result_queue.empty())
140 | 
141 | 
142 | # endregion
143 | 
144 | 
145 | def identifier_test(id_type: str, id_val: str) -> Callable[[Metadata], bool]:
146 |     """Return function that tests if metadata contains specific identifier."""
147 | 
148 |     def test(metadata: Metadata) -> bool:
149 |         metadata_id = metadata.get_identifiers()[id_type]
150 |         if metadata_id and metadata_id == id_val:
151 |             return True
152 |         print(f"Identifier test failed. Expected: {id_val}' found {metadata_id}")
153 |         return False
154 | 
155 |     return test
156 | 
157 | 
158 | def publisher_test(publisher: str) -> Callable[[Metadata], bool]:
159 |     """Return function that tests if metadata has correct publisher set."""
160 | 
161 |     def test(metadata: Metadata) -> bool:
162 |         if metadata.publisher and metadata.publisher == publisher:
163 |             return True
164 |         print(
165 |             f"Publisher test failed. Expected: {publisher}' found {metadata.publisher}"
166 |         )
167 |         return False
168 | 
169 |     return test
170 | 
171 | 
172 | def pubdate_none_test() -> Callable[[Metadata], bool]:
173 |     """Return function that tests if metadata has None pubdate."""
174 | 
175 |     def test(metadata: Metadata) -> bool:
176 |         return metadata.pubdate is None
177 | 
178 |     return test
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     # To run these test use:
183 |     # calibre-debug -e test.py
184 | 
185 |     rpggeek_source = RPGGeekSource(None)
186 |     _test_get_book_url()
187 |     _test_id_from_url()
188 |     _test_id_with_no_match()
189 |     _test_id_is_not_rpgitem()
190 |     _test_no_search_hits()
191 | 
192 |     test_identify_plugin(
193 |         RPGGeekSource.name,
194 |         [
195 |             (  # RPGGeek ID -> item with title, authors, pubdate, publisher, identifier
196 |                 {
197 |                     "identifiers": {"rpggeek": "363105"},
198 |                 },
199 |                 [
200 |                     title_test("A Fistful of Flowers", exact=True),
201 |                     authors_test(["Eleanor Ferron", "Linda Zayas-Palmer"]),
202 |                     pubdate_test(2022, 1, 1),
203 |                     publisher_test("Devir"),
204 |                     identifier_test("rpggeek", "363105"),
205 |                     comments_test("furious flowers and pugilistic plants"),
206 |                 ],
207 |             ),
208 |             (  # RPGGeek ID -> item with non-ASCII title
209 |                 {
210 |                     "identifiers": {"rpggeek": "293597"},
211 |                 },
212 |                 [
213 |                     title_test("永い後日談のネクロニカ", exact=True),
214 |                 ],
215 |             ),
216 |             (  # RPGGeek ID -> item with uncredited designer
217 |                 {
218 |                     "identifiers": {"rpggeek": "329480"},
219 |                 },
220 |                 [
221 |                     title_test("Abomination Vaults Pawn Collection"),
222 |                     authors_test(["(Uncredited)"]),
223 |                 ],
224 |             ),
225 |             (  # RPGGeek ID -> item without published year
226 |                 {
227 |                     "identifiers": {"rpggeek": "61154"},
228 |                 },
229 |                 [
230 |                     title_test("Creatures of the Nightcycle"),
231 |                     pubdate_none_test(),
232 |                 ],
233 |             ),
234 |             (  # RGPGeek ID -> item with series and index
235 |                 {
236 |                     "identifiers": {"rpggeek": "346266"},
237 |                 },
238 |                 [
239 |                     title_test("Spoken on the Song Wind", exact=True),
240 |                     series_test("Pathfinder Adventure Path", 170),
241 |                 ],
242 |             ),
243 |             (  # Title -> one matching search result
244 |                 {"title": "A Fistful of Flowers"},
245 |                 [
246 |                     title_test("A Fistful of Flowers", exact=True),
247 |                     authors_test(["Eleanor Ferron", "Linda Zayas-Palmer"]),
248 |                     pubdate_test(2022, 1, 1),
249 |                     publisher_test("Devir"),
250 |                     identifier_test("rpggeek", "363105"),
251 |                     comments_test("furious flowers and pugilistic plants"),
252 |                 ],
253 |             ),
254 |             (  # Title -> many search results
255 |                 {"title": "GameMastery Guide"},
256 |                 [
257 |                     # Check that a non-first result is returned
258 |                     title_test("GameMastery Guide", exact=True),
259 |                     identifier_test("rpggeek", "303391"),
260 |                 ],
261 |             ),
262 |         ],
263 |         fail_missing_meta=False,
264 |     )
265 | 
266 |     print("SUCCESS!")
267 | 


--------------------------------------------------------------------------------
/rpggeek_source.py:
--------------------------------------------------------------------------------
  1 | """The module containing the plugin class."""
  2 | 
  3 | import re
  4 | from datetime import datetime
  5 | from functools import total_ordering
  6 | from queue import Queue
  7 | from urllib.parse import ParseResult, urlparse
  8 | 
  9 | from bs4 import BeautifulSoup
 10 | from calibre.ebooks.metadata.book.base import Metadata
 11 | from calibre.ebooks.metadata.sources.base import Source
 12 | 
 13 | _ID_TYPE = "rpggeek"
 14 | _API_THING_URL = "https://rpggeek.com/xmlapi2/thing?id="
 15 | _API_SEARCH_URL = "https://rpggeek.com/xmlapi2/search?type=rpgitem&query="
 16 | 
 17 | # TODO Was this URL correct? Check again...
 18 | # _WEB_SEARCH_URL = (
 19 | #     "https://rpggeek.com/geeksearch.php?action=search&objecttype=rpgitem&"
 20 | #     'searchinfilters=[{"filtertype":"geekitemtype","filtertext":"RPG Item"}'
 21 | #     ',{"filtertype":"itemtype","filtertext":"RPG Item"}]&B1=Search&q='
 22 | # )
 23 | 
 24 | 
 25 | def _get_pub_date(soup: BeautifulSoup) -> datetime | None:
 26 |     tag = soup.find("yearpublished")
 27 |     if not tag:
 28 |         return None
 29 |     year = int(tag["value"])
 30 |     if year <= 0:
 31 |         return None
 32 |     return datetime(year, 1, 1)
 33 | 
 34 | 
 35 | def _get_publisher(soup: BeautifulSoup) -> str | None:
 36 |     # There can be many publishers. Just using first one for now. Need to scrape
 37 |     # for versions to narrow down actual publisher.
 38 |     tag = soup.find("link", attrs={"type": "rpgpublisher"})
 39 |     if tag:
 40 |         return tag["value"]
 41 |     return None
 42 | 
 43 | 
 44 | def _get_series(soup: BeautifulSoup) -> tuple[str, int]:
 45 |     series = ""
 46 |     index = 0
 47 |     # Same here, take first series for now...
 48 |     series_tag = soup.find("link", attrs={"type": "rpgseries"})
 49 |     if series_tag:
 50 |         series = series_tag["value"]
 51 |     # Guess that the last number in seriescode is the series index
 52 |     # Any way to do better? Probably not?
 53 |     series_code_tag = soup.find("seriescode")
 54 |     if series_code_tag:
 55 |         series_code = series_code_tag["value"]
 56 |         match = re.search(r"(\d+)\D*$", series_code)
 57 |         index = int(match.group(1)) if match else 0
 58 |     return series, index
 59 | 
 60 | 
 61 | def _get_comments(soup: BeautifulSoup) -> str | None:
 62 |     tag = soup.find("description")
 63 |     if not tag or not tag.contents:
 64 |         return None
 65 |     return tag.contents[0]
 66 | 
 67 | 
 68 | class RPGGeekSource(Source):
 69 |     """The plugin class."""
 70 | 
 71 |     name = "RPGGeek"
 72 |     description = "Retrieves metadata from RPGGeek"
 73 |     version = (0, 0, 1)
 74 |     author = "Erik Levin"
 75 |     supported_platforms = ["windows", "osx", "linux"]
 76 |     capabilities = frozenset(["identify"])
 77 |     touched_fields = frozenset(
 78 |         [
 79 |             "identifier:rpggeek",
 80 |             "title",
 81 |             "authors",
 82 |             "comments",
 83 |             "pubdate",
 84 |             "publisher",
 85 |             "series",
 86 |         ]
 87 |     )
 88 | 
 89 |     # TODO Future work - settings
 90 |     # - What to use as authors. First designer, all designers, fallback to artists,
 91 |     #   fallback to producers, fallback to publishers.
 92 |     # - Setting various things as designated tags. rpg, rpgsetting, rpggenre
 93 |     # - Use original published date or specific version published date
 94 |     def is_customizable(self):
 95 |         """Return whether this plugin has config."""
 96 |         return False
 97 | 
 98 |     # TODO Future work - cover art
 99 |     # - get_cached_cover_url
100 |     # - download_cover
101 | 
102 |     # TODO Future work - ISBN, product code, and language. Need to get them from
103 |     # rpgitemversion, which aren't in the API, so, need to search and scrape.
104 | 
105 |     # TODO When I have rpgitemversion, get actual publisher from there instead.
106 | 
107 |     def get_book_url(self, identifiers):
108 |         """Return ("rpggeek", RPGGeek item ID, RPG item URL), based on RPGGeek item ID.
109 | 
110 |         Returns None if there is no RPGGeek ID.
111 |         See parent class for more information.
112 |         """
113 |         rpggeek_id = identifiers.get(_ID_TYPE, None)
114 |         if rpggeek_id:
115 |             return (
116 |                 _ID_TYPE,
117 |                 rpggeek_id,
118 |                 "https://rpggeek.com/rpgitemversion/" + rpggeek_id,
119 |             )
120 |         return None
121 | 
122 |     def id_from_url(self, url) -> tuple[str, str] | None:
123 |         """Return ("rpggeek", RPG item ID) from RPG item URL, or None if that fails.
124 | 
125 |         See parent class for more information.
126 |         """
127 |         parsed_url: ParseResult = urlparse(url)
128 |         # xGeek's three domains are interchangeable
129 |         if parsed_url.netloc not in (
130 |             "rpggeek.com",
131 |             "boardgamegeek.com",
132 |             "videogamegeek.com",
133 |         ):
134 |             return None
135 | 
136 |         path_parts: list[str] = parsed_url.path.strip("/").split("/")
137 |         if len(path_parts) < 2 or path_parts[0] != "rpgitemversion":
138 |             return None
139 |         return (_ID_TYPE, path_parts[1])
140 | 
141 |     def identify_results_keygen(self, title=None, authors=None, identifiers=None):
142 |         """Sort results based on RPGGeek's search result sorting instead."""
143 |         if not identifiers:
144 |             identifiers = {}
145 |         if not authors:
146 |             authors = []
147 | 
148 |         @total_ordering
149 |         class _KeyGen:
150 |             def __init__(self, metadata):
151 |                 self.relevance = metadata.source_relevance
152 | 
153 |             def __eq__(self, other):
154 |                 return self.relevance == other.relevance
155 | 
156 |             def __ne__(self, other):
157 |                 return self.relevance != other.relevance
158 | 
159 |             def __lt__(self, other):
160 |                 return self.relevance < other.relevance
161 | 
162 |             def __le__(self, other):
163 |                 return self.relevance <= other.relevance
164 | 
165 |             def __gt__(self, other):
166 |                 return self.relevance > other.relevance
167 | 
168 |             def __ge__(self, other):
169 |                 return self.relevance >= other.relevance
170 | 
171 |         def keygen(metadata):
172 |             return _KeyGen(metadata)
173 | 
174 |         return keygen
175 | 
176 |     def _get_metadata_from_thing_api(
177 |         self, rpggeek_id: str, result_queue: Queue, relevance: int, log
178 |     ) -> None:
179 |         response = self.browser.open_novisit(_API_THING_URL + rpggeek_id)
180 |         soup = BeautifulSoup(response, features="lxml")
181 |         log.debug(soup.prettify())
182 |         if not soup.find("item", attrs={"type": "rpgitem"}):
183 |             return
184 |         title = soup.find("name", attrs={"type": "primary"})["value"]
185 |         authors = [
186 |             x["value"] for x in soup.find_all("link", attrs={"type": "rpgdesigner"})
187 |         ]
188 |         pub_date = _get_pub_date(soup)
189 |         publisher = _get_publisher(soup)
190 |         series, index = _get_series(soup)
191 |         comments = _get_comments(soup)
192 | 
193 |         metadata = Metadata(title, authors)
194 |         metadata.set_identifier(_ID_TYPE, rpggeek_id)
195 |         metadata.pubdate = pub_date
196 |         metadata.publisher = publisher
197 |         metadata.series = series
198 |         metadata.comments = comments
199 |         metadata.series_index = index
200 |         metadata.source = self.name
201 |         metadata.source_relevance = relevance
202 |         self.clean_downloaded_metadata(metadata)
203 |         result_queue.put(metadata)
204 | 
205 |     def _search_title(self, title: str, result_queue: Queue, log) -> None:
206 |         """Search for item based on title, using API."""
207 |         title_tokens = self.get_title_tokens(title)
208 |         query = "+".join(title_tokens)
209 |         query = _API_SEARCH_URL + query
210 |         log.debug(query)
211 |         response = self.browser.open_novisit(query)
212 | 
213 |         soup = BeautifulSoup(response, features="lxml")
214 |         log.debug(soup.prettify())
215 | 
216 |         items = soup.find_all("item", attrs={"type": "rpgitem"})
217 |         # TODO You can get multiple things from one query
218 |         # Query for all search results with one API call instead of the below.
219 |         for i, item in enumerate(items):
220 |             self._get_metadata_from_thing_api(
221 |                 item["id"], result_queue, relevance=i, log=log
222 |             )
223 | 
224 |     def identify(
225 |         self,
226 |         log,
227 |         result_queue,
228 |         abort,
229 |         title=None,
230 |         authors=None,
231 |         identifiers=None,
232 |         timeout=30,
233 |     ):  # pylint: disable=too-many-arguments
234 |         """See parent class."""
235 |         if not identifiers:
236 |             identifiers = {}
237 |         if not authors:
238 |             authors = []
239 | 
240 |         # TODO Respect abort and timeout...
241 | 
242 |         rpggeek_id = identifiers.get(_ID_TYPE, None)
243 |         if rpggeek_id:
244 |             self._get_metadata_from_thing_api(
245 |                 rpggeek_id, result_queue, relevance=0, log=log
246 |             )
247 |         else:
248 |             self._search_title(title, result_queue, log)
249 | 


--------------------------------------------------------------------------------