├── .gitattributes ├── .github └── workflows │ ├── python-package.yml │ └── static-analysis.yaml ├── .gitignore ├── .travis.yml ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── requirements.txt ├── setup.py ├── sfpl ├── __init__.py ├── exceptions.py └── sfpl.py └── tests ├── __init__.py ├── assets ├── checkouts.html ├── holds.html └── shelf.html └── test_api.py /.gitattributes: -------------------------------------------------------------------------------- 1 | tests/assets/* linguist-vendored -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # Run `make lint test`: flake8 and pytest. 2 | name: python-test 3 | 4 | on: 5 | push: 6 | branches: [ master ] 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python-version: ["3.13"] 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | cache: 'pip' 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r requirements.txt 28 | - name: Test with pytest 29 | run: | 30 | pip install pytest 31 | pytest tests 32 | -------------------------------------------------------------------------------- /.github/workflows/static-analysis.yaml: -------------------------------------------------------------------------------- 1 | name: static-analysis 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up flake8 annotations 15 | uses: rbialon/flake8-annotations@v1 16 | - name: Set up ruff 17 | uses: astral-sh/ruff-action@v3 18 | - run: ruff check . 19 | - run: ruff format . 20 | audit: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r requirements.txt 28 | # - name: Audit dependencies 29 | # run: | 30 | # python -m pip_audit --strict --requirement requirements.txt 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | \.DS_Store 61 | 62 | \.pytest_cache/v/cache/ 63 | 64 | .idea/ 65 | test.py -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.4" 5 | - "3.5" 6 | - "3.6" 7 | 8 | install: 9 | - python setup.py install 10 | 11 | script: 12 | - pytest 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2018 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SFPL 2 | 3 | Python Package for accessing account, book, and author data from the SFPL Website. 4 | 5 | ## Status 6 | [![python-test](https://github.com/kaijchang/SFPL/actions/workflows/python-package.yml/badge.svg)](https://github.com/kaijchang/SFPL/actions/workflows/python-package.yml) 7 | [![static-analysis](https://github.com/kaijchang/SFPL/actions/workflows/static-analysis.yaml/badge.svg)](https://github.com/kaijchang/SFPL/actions/workflows/static-analysis.yaml) 8 | [![pypi](https://badge.fury.io/py/sfpl.svg)](https://pypi.org/project/sfpl/) 9 | 10 | ## Installation 11 | 12 | From `pip`: 13 | 14 | `$ pip install sfpl` 15 | 16 | From source: 17 | 18 | `$ pip install git+git://github.com/kajchang/SFPL.git` 19 | 20 | Or clone / download this repository and `$ python setup.py install` or `$ pip install .` 21 | 22 | ## Frameworks Used 23 | 24 | `requests` - Used for getting data from the SFPL website and managing login cookies. 25 | 26 | `bs4 + lxml` - Used for parsing information from HTML. 27 | 28 | ## Features 29 | 30 | * Managing current checkouts and holds for your SFPL library account. 31 | 32 | * Searching for books by keyword, title, author, subject, and tag and searching for user-created book lists. 33 | 34 | * Following other library users and viewing their book lists. 35 | 36 | * Getting libary branch hours. 37 | 38 | ### TODO 39 | 40 | * Better Book Status Messages 41 | 42 | ## How to Use 43 | 44 | Searching for books on Python: 45 | 46 | ```python 47 | >>> from sfpl import Search 48 | >>> python_search = Search('Python') 49 | >>> results = python_search.getResults(pages=2) # .getResults is a generator that yields / streams pages of results 50 | >>> for page in results: 51 | print(page) 52 | [Python by Donaldson, Toby, Python by Johansen, Andrew, Python! by Moses, Brian, Python by McGrath, Mike, Python by Vo. T. H, Phuong] 53 | [Python by Romano, Fabrizio, Python by Phillips, Dusty, Python by Joshi, Prateek, Python by Lassoff, Mark, Python by Wayani, Rafiq] 54 | ``` 55 | 56 | Searching for books by J.K. Rowling: 57 | 58 | ```python 59 | >>> from sfpl import Search 60 | >>> jk_search = Search('J.K. Rowling', _type='author') 61 | >>> results = jk_search.getResults() 62 | >>> first_page = next(results).getBooks() 63 | >>> first_page[0].title 64 | "Harry Potter and the Deathly Hallows" 65 | >>> first_page[0].getDetails()["brief"]["description"] 66 | "Harry discovers what fate truly has in store for him as he inevitably makes his way to the final meeting with Voldemort. Book #7" 67 | ``` 68 | 69 | Searching for book lists related to San Francisco: 70 | 71 | ```python 72 | >>> from sfpl import Search 73 | >>> list_search = Search('San Francisco', _type='list') 74 | >>> results = list_search.getResults() 75 | >>> first_page = next(results) 76 | >>> first_page[0].title 77 | 'Made in SF - San Francisco love for young readers' 78 | >>> for book in first_page[0].getBooks(): 79 | print(book) 80 | Al Capone Does My Shirts by Choldenko, Gennifer 81 | Book Scavenger by Bertman, Jennifer Chambliss 82 | ... 83 | ``` 84 | 85 | Getting all your books on hold: 86 | 87 | ```python 88 | >>> from sfpl import Account 89 | >>> my_account = Account('barcode', 'pin') # Replace with your barcode and pin. 90 | >>> my_holds = my_account.getHolds() 91 | >>> for book in my_holds: 92 | print(book.title) 93 | 'Python for Data Analysis' 94 | 'Automate the Boring Stuff With Python' 95 | >>> for book in my_holds: 96 | print(book.status) 97 | '#4 on 6 copies' 98 | '#7 on 3 copies' 99 | >>> for book in my_holds: 100 | print(book.author) 101 | 'McKinney, Wes' 102 | 'Sweigart, Al' 103 | ``` 104 | 105 | Searching for books by J.K. Rowling but not about Harry Potter: 106 | 107 | ```python 108 | >>> from sfpl import AdvancedSearch 109 | >>> search = AdvancedSearch(includeauthor='J.K. Rowling', excludekeyword='Harry Potter') # Search for books by J.K. Rowling but don't include 'Harry Potter' 110 | >>> results = search.getResults() 111 | >>> for book in results: 112 | print(book.title) 113 | 'Fantastic Beasts and Where to Find Them' 114 | 'Fantastic Beasts and Where to Find Them : The Original Screenplay' 115 | 'The Casual Vacancy' 116 | 'Very Good Lives' 117 | 'Una vacante imprevista' 118 | ``` 119 | 120 | Getting hours for a library branch: 121 | 122 | ```python 123 | >>> from sfpl import Branch 124 | >>> branch = Branch('anza') 125 | >>> branch.getHours() 126 | {'Sun': '1 - 5', 'Mon': '12 - 6', 'Tue': '10 - 9', 'Wed': '1 - 9', 'Thu': '10 - 6', 'Fri': '1 - 6', 'Sat': '10 - 6'} 127 | ``` 128 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4~=4.13.4 2 | requests~=2.31.0 3 | lxml~=5.4.0 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from setuptools import setup 4 | import os 5 | 6 | setup( 7 | name="sfpl", 8 | packages=["sfpl"], 9 | version="1.5.7", 10 | description="Unofficial Python API for SFPL", 11 | author="Kai Chang", 12 | author_email="kaijchang@gmail.com", 13 | url="https://github.com/kajchang/sfpl-scraper", 14 | license="MIT", 15 | long_description_content_type="text/markdown", 16 | long_description=open( 17 | os.path.join(os.path.abspath(os.path.dirname(__file__)), "README.md") 18 | ).read(), 19 | install_requires=[ 20 | open( 21 | os.path.join(os.path.abspath(os.path.dirname(__file__)), "requirements.txt") 22 | ) 23 | .read() 24 | .split("\n")[:-1] 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /sfpl/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | This module is an unofficial Python API for the San Francisco Public Library's Website that uses requests and BeautifulSoup, with lxml as the HTML parser. 5 | 6 | The module uses a combination of using requests to simulate AJAX requests sent by the webpage to the SFPL's internal "API" and using requests in conjuction with BeautifulSoup and lxml to scrape data from the HTML of the website. 7 | With this module, you can check your holds and checked out books, as well as request and cancel holds and renew books. You can also search for books and user-created book lists using a variety of different filters. 8 | Additionally, you can get the operating times of different SFPL library branches. 9 | """ 10 | 11 | from .sfpl import Account, Search, User, Branch, AdvancedSearch 12 | 13 | __all__ = ["Account", "Search", "User", "Branch", "AdvancedSearch"] 14 | -------------------------------------------------------------------------------- /sfpl/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Custom exception classes raised by the sfpl module.""" 4 | 5 | 6 | class NotOnHold(Exception): 7 | """Raised when a user tries to cancel a hold on a book they aren't holding.""" 8 | 9 | def __init__(self, book): 10 | Exception.__init__(self, "{} is not on hold.".format(book)) 11 | 12 | 13 | class NotCheckedOut(Exception): 14 | """Raised when a user tries to renew a book they haven't checked out.""" 15 | 16 | def __init__(self, book): 17 | Exception.__init__(self, "{} is not checked out.".format(book)) 18 | 19 | 20 | class InvalidSearchType(Exception): 21 | """Raised when a user passes an invalid search type for the Search class.""" 22 | 23 | def __init__(self, _type): 24 | Exception.__init__( 25 | self, 26 | "{} is not a valid search type. Valid search types are 'keyword', 'title', 'author', 'subject', 'tag' and 'list'.".format( 27 | _type 28 | ), 29 | ) 30 | 31 | 32 | class NoBranchFound(Exception): 33 | """Raised when no matches are found for a user's branch search.""" 34 | 35 | def __init__(self, branch): 36 | Exception.__init__(self, "No matches found for {}.".format(branch)) 37 | 38 | 39 | class NoUserFound(Exception): 40 | """Raised when no matches are found for a user's user search.""" 41 | 42 | def __init__(self, user): 43 | Exception.__init__(self, "No match found for {}".format(user)) 44 | 45 | 46 | class LoginError(Exception): 47 | """Raised when a user's barcode and pin are rejected.""" 48 | 49 | def __init__(self, msg): 50 | Exception.__init__(self, msg) 51 | 52 | 53 | class HoldError(Exception): 54 | """Raised when a user's hold request is denied.""" 55 | 56 | def __init__(self, msg): 57 | Exception.__init__(self, msg) 58 | 59 | 60 | class RenewError(Exception): 61 | """Raised when a user's renew request is denied.""" 62 | 63 | def __init__(self, msg): 64 | Exception.__init__(self, msg) 65 | 66 | 67 | class MissingFilterTerm(Exception): 68 | """Raised when a search term doesn't have include or exclude in it.""" 69 | 70 | def __init__(self): 71 | Exception.__init__( 72 | self, 73 | "Each search term needs to include 'exclude' or 'include' and a valid term type such as 'keyword', 'author', 'title', 'subject', 'series', 'award', 'identifier', 'region', 'genre', 'publisher' or 'callnumber'", 74 | ) 75 | 76 | 77 | class MissingScriptError(Exception): 78 | """Raised when the data script is missing from the page.""" 79 | 80 | pass 81 | 82 | 83 | class NotLoggedIn(Exception): 84 | """Raised when an authentication token is rejected.""" 85 | 86 | pass 87 | -------------------------------------------------------------------------------- /sfpl/sfpl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import requests 5 | 6 | import re 7 | import math 8 | import json 9 | from typing import Generator 10 | 11 | from bs4 import BeautifulSoup 12 | from . import exceptions 13 | 14 | # Regex Patterns 15 | 16 | id_regex = r"https://sfpl.bibliocommons.com/.+/(\d+)" 17 | book_page_regex = r"[\d,]+ to [\d,]+ of ([\d,]+) results?" 18 | list_page_regex = r"[\d,]+ - [\d,]+ of ([\d,]+) items?" 19 | 20 | 21 | def _extract_data(response_text: str) -> dict: 22 | soup = BeautifulSoup(response_text, "lxml") 23 | script_tag = soup.find("script", {"type": "application/json", "data-iso-key": "_0"}) 24 | if not script_tag: 25 | raise exceptions.MissingScriptError 26 | 27 | return json.loads(script_tag.text) 28 | 29 | 30 | class User: 31 | """A library user account. 32 | 33 | Attributes: 34 | name (str): the account's username. 35 | _id (str): the account's id. 36 | """ 37 | 38 | def __init__(self, name, _id=None): 39 | """ 40 | Args: 41 | name (str): The account's username. 42 | 43 | Raises: 44 | NoUserFound: If the search doesn't return any users. 45 | """ 46 | if not _id: 47 | self.name = name 48 | 49 | resp = requests.get( 50 | "https://sfpl.bibliocommons.com/search?t=user&search_category=user&q={}".format( 51 | self.name 52 | ) 53 | ) 54 | 55 | match = re.match(id_regex, resp.url) 56 | 57 | if not match: 58 | raise exceptions.NoUserFound(name) 59 | 60 | self._id = match.group(1) 61 | 62 | else: 63 | self.name = name 64 | self._id = _id 65 | 66 | def getFollowing(self): 67 | """Gets all the users the account follows. 68 | 69 | Returns: 70 | list: A list of User objects. 71 | """ 72 | return [ 73 | User( 74 | user.find("a").text, re.match(id_regex, user.find("a")["href"]).group(1) 75 | ) 76 | for user in BeautifulSoup( 77 | requests.get( 78 | "https://sfpl.bibliocommons.com/user_profile/{}/following".format( 79 | self._id 80 | ) 81 | ).text, 82 | "lxml", 83 | )(class_="col-xs-12 col-md-4") 84 | ] 85 | 86 | def getFollowers(self): 87 | """Gets all the account's followers. 88 | 89 | Returns: 90 | list: A list of User objects. 91 | """ 92 | return [ 93 | User( 94 | user.find("a").text, re.match(id_regex, user.find("a")["href"]).group(1) 95 | ) 96 | for user in BeautifulSoup( 97 | requests.get( 98 | "https://sfpl.bibliocommons.com/user_profile/{}/followers".format( 99 | self._id 100 | ) 101 | ).text, 102 | "lxml", 103 | )(class_="col-xs-12 col-md-4") 104 | ] 105 | 106 | def getLists(self): 107 | """Gets all the lists the user has created. 108 | 109 | Returns: 110 | list: A list of List objects. 111 | """ 112 | return [ 113 | List( 114 | { 115 | "type": _list("td")[1].text.strip(), 116 | "title": _list.find("a").text, 117 | "user": self, 118 | "createdon": _list("td")[2].text.strip(), 119 | "itemcount": int(_list("td")[3].text), 120 | "description": None, 121 | "id": _list.find("a")["href"].split("/")[4], 122 | } 123 | ) 124 | for _list in BeautifulSoup( 125 | requests.get( 126 | "https://sfpl.bibliocommons.com/lists/show/{}".format(self._id) 127 | ).text, 128 | "lxml", 129 | ).find("tbody")("tr") 130 | ] 131 | 132 | # def getForLater(self): 133 | # """Get's user's for later shelf. 134 | 135 | # Returns: 136 | # list: A list of Book objects. 137 | # """ 138 | # return self.parseShelf(BeautifulSoup(requests.get( 139 | # 'https://sfpl.bibliocommons.com/collection/show/{}/library/for_later'.format(self._id)).text, 'lxml')) 140 | 141 | # def getInProgress(self): 142 | # """Get's user's in progress shelf. 143 | 144 | # Returns: 145 | # list: A list of Book objects. 146 | # """ 147 | # return self.parseShelf(BeautifulSoup(requests.get( 148 | # 'https://sfpl.bibliocommons.com/collection/{}/my/library/in_progress'.format(self._id)).text, 'lxml')) 149 | 150 | # def getCompleted(self): 151 | # """Get's user's completed shelf. 152 | 153 | # Returns: 154 | # list: A list of Book objects. 155 | # """ 156 | # return self.parseShelf(BeautifulSoup(requests.get( 157 | # 'https://sfpl.bibliocommons.com/collection/show/{}/library/completed'.format(self._id)).text, 'lxml')) 158 | 159 | # @staticmethod 160 | # def parseShelf(response): 161 | # return [Book({'title': book.find(testid='bib_link').text, 162 | # 'author': book.find(testid='author_search').text if book.find(testid='author_search') else None, 163 | # 'subtitle': book.find(class_='subTitle').text if book.find(class_='subTitle') else None, 164 | # '_id': int(''.join(s for s in book.find(testid='bib_link')['href'] if s.isdigit()))}) 165 | # for book in response('div', lambda value: value and value.startswith('listItem clearfix'))] 166 | 167 | def __str__(self): 168 | return self.name 169 | 170 | def __repr__(self): 171 | return self.name 172 | 173 | def __eq__(self, other): 174 | return self._id == other._id 175 | 176 | def __ne__(self, other): 177 | return self._id != other._id 178 | 179 | 180 | class Account(User): 181 | """The SFPL account class. 182 | 183 | Attributes: 184 | session (requests.Session): The requests session with cookies. 185 | name (str): the account's username. 186 | _id (str): the account's id. 187 | """ 188 | 189 | def __init__(self, barcode, pin): 190 | """ 191 | Args: 192 | barcode (str): The library card barcode. 193 | pin (str): PIN/ password for library account. 194 | 195 | Raises: 196 | LoginError: If we aren't redirected to the main page after login. 197 | """ 198 | self.session = requests.Session() 199 | 200 | resp = self.session.post( 201 | "https://sfpl.bibliocommons.com/user/login", 202 | data={"name": barcode, "user_pin": pin}, 203 | headers={ 204 | "X-Requested-With": "XMLHttpRequest", 205 | "Accept": "application/json", 206 | }, 207 | ) 208 | 209 | if not resp.json()["logged_in"]: 210 | raise exceptions.LoginError(resp.json()["messages"][0]["key"]) 211 | 212 | main = BeautifulSoup( 213 | self.session.get("https://sfpl.bibliocommons.com/user_dashboard").text, 214 | "lxml", 215 | ) 216 | 217 | super().__init__( 218 | main.find(class_="cp_user_card")["data-name"], 219 | main.find(class_="cp_user_card")["data-id"], 220 | ) 221 | 222 | def hold(self, book, branch): 223 | """Holds the book. 224 | 225 | Args: 226 | book (Book): Book object to hold. 227 | branch (Branch): Branch to have book delivered to. 228 | 229 | Raises: 230 | HoldError: If the hold request is denied. 231 | NotLoggedIn: If the server doesn't accept the token. 232 | """ 233 | resp = self.session.post( 234 | "https://sfpl.bibliocommons.com/holds/place_single_click_hold/{}".format( 235 | book._id 236 | ), 237 | data={ 238 | "authenticity_token": BeautifulSoup( 239 | self.session.get( 240 | "https://sfpl.bibliocommons.com/item/show/{}".format(book._id) 241 | ).text, 242 | "lxml", 243 | ).find("input", {"name": "authenticity_token"})["value"], 244 | "bib": book._id, 245 | "branch": branch._id, 246 | }, 247 | headers={ 248 | "X-Requested-With": "XMLHttpRequest", 249 | "Accept": "application/json", 250 | }, 251 | ) 252 | 253 | if not resp.json()["logged_in"]: 254 | raise exceptions.NotLoggedIn 255 | 256 | if not resp.json()["success"]: 257 | raise exceptions.HoldError(resp.json()["messages"][0]["key"]) 258 | 259 | def cancelHold(self, book): 260 | """Cancels the hold on the book. 261 | 262 | Args: 263 | book (Book): Book to cancel the hold for. 264 | 265 | Raises: 266 | NotOnHold: If the book isn't being held. 267 | NotLoggedIn: If the server doesn't accept the token. 268 | """ 269 | resp = self.session.get( 270 | "https://sfpl.bibliocommons.com/holds/index/not_yet_available" 271 | ) 272 | 273 | if resp.history: 274 | raise exceptions.NotLoggedIn 275 | 276 | holds = BeautifulSoup(resp.text, "lxml") 277 | 278 | for hold in holds( 279 | "div", 280 | lambda class_: class_ 281 | and class_.startswith("listItem col-sm-offset-1 col-sm-10 col-xs-12"), 282 | ): 283 | if hold.find(testid="bib_link").text == book.title: 284 | resp = self.session.post( 285 | "https://sfpl.bibliocommons.com/holds/delete.json", 286 | data={ 287 | "authenticity_token": holds.find( 288 | "input", {"name": "authenticity_token"} 289 | )["value"], 290 | "confirm_hold_delete": True, 291 | "items[]": hold.find(class_="btn btn-link single_circ_action")[ 292 | "href" 293 | ].split("/")[3], 294 | "bib_status": "future", 295 | "is_private": True, 296 | }, 297 | headers={"X-Requested-With": "XMLHttpRequest"}, 298 | ) 299 | 300 | if not resp.json()["logged_in"]: 301 | raise exceptions.NotLoggedIn 302 | 303 | break 304 | 305 | else: 306 | raise exceptions.NotOnHold(book.title) 307 | 308 | def renew(self, book): 309 | """Renews the hold on the book. 310 | 311 | Args: 312 | book (Book): Book to renew. 313 | 314 | Raises: 315 | NotCheckedOut: If the user is trying to renew a book that they haven't checked out. 316 | RenewError: If the renew request is denied. 317 | NotLoggedIn: If the server doesn't accept the token. 318 | """ 319 | resp = self.session.get("https://sfpl.bibliocommons.com/checkedout") 320 | 321 | if resp.history: 322 | raise exceptions.NotLoggedIn 323 | 324 | checkouts = BeautifulSoup(resp.text, "lxml") 325 | 326 | for checkout in checkouts( 327 | "div", lambda class_: class_ and class_.startswith("listItem") 328 | ): 329 | if checkout.find(class_="title title_extended").text == book.title: 330 | confirmation = self.session.get( 331 | "https://sfpl.bibliocommons.com/{}".format( 332 | checkout.find(class_="btn btn-link single_circ_action")["href"] 333 | ), 334 | headers={ 335 | "X-CSRF-Token": checkouts.find( 336 | "input", {"name": "authenticity_token"} 337 | )["value"] 338 | }, 339 | ).json() 340 | 341 | if not confirmation["logged_in"]: 342 | raise exceptions.NotLoggedIn 343 | 344 | resp = self.session.post( 345 | "https://sfpl.bibliocommons.com/checkedout/renew", 346 | data={ 347 | "authenticity_token": BeautifulSoup( 348 | confirmation["html"], "lxml" 349 | ).find("input", {"name": "authenticity_token"})["value"], 350 | "items[]": BeautifulSoup(confirmation["html"], "lxml").find( 351 | "input", id="items_" 352 | )["value"], 353 | }, 354 | headers={ 355 | "X-Requested-With": "XMLHttpRequest", 356 | "Accept": "application/json", 357 | "Referer": "https://sfpl.bibliocommons.com/checkedout", 358 | }, 359 | ) 360 | 361 | if not resp.json()["logged_in"]: 362 | raise exceptions.NotLoggedIn 363 | 364 | if not resp.json()["success"]: 365 | raise exceptions.RenewError(resp.json()["messages"][0]["key"]) 366 | 367 | break 368 | 369 | else: 370 | raise exceptions.NotCheckedOut(book.title) 371 | 372 | def follow(self, user): 373 | """Follows the user. 374 | 375 | Args: 376 | user (User): User to follow. 377 | 378 | Raises: 379 | NotLoggedIn: If the server doesn't accept the token. 380 | """ 381 | resp = self.session.put( 382 | "https://sfpl.bibliocommons.com/user_profile/{}?type=follow&value={}".format( 383 | self._id, user._id 384 | ), 385 | headers={ 386 | "X-Requested-With": "XMLHttpRequest", 387 | "X-CSRF-Token": BeautifulSoup( 388 | self.session.get( 389 | "https://sfpl.bibliocommons.com/user_profile/{}".format( 390 | user._id 391 | ) 392 | ).text, 393 | "lxml", 394 | ).find("meta", {"name": "csrf-token"})["content"], 395 | }, 396 | ) 397 | 398 | if not resp.json()["logged_in"]: 399 | raise exceptions.NotLoggedIn 400 | 401 | def unfollow(self, user): 402 | """Unfollows the user. 403 | 404 | Args: 405 | user (User): User to unfollow. 406 | 407 | Raises: 408 | NotLoggedIn: If the server doesn't accept the token. 409 | """ 410 | resp = self.session.put( 411 | "https://sfpl.bibliocommons.com/user_profile/{}?type=unfollow&value={}".format( 412 | self._id, user._id 413 | ), 414 | headers={ 415 | "X-Requested-With": "XMLHttpRequest", 416 | "X-CSRF-Token": BeautifulSoup( 417 | self.session.get( 418 | "https://sfpl.bibliocommons.com/user_profile/{}".format( 419 | user._id 420 | ) 421 | ).text, 422 | "lxml", 423 | ).find("meta", {"name": "csrf-token"})["content"], 424 | }, 425 | ) 426 | 427 | if not resp.json()["logged_in"]: 428 | raise exceptions.NotLoggedIn 429 | 430 | def getCheckouts(self) -> list["Book"]: 431 | """Gets the user's checked out items. 432 | Returns: 433 | list: A list of Book objects. 434 | """ 435 | resp = self.session.get("https://sfpl.bibliocommons.com/checkedout") 436 | 437 | return self.parseCheckouts(resp.text) 438 | 439 | def getHolds(self) -> list["Book"]: 440 | """Gets the user's held items. 441 | Returns: 442 | list: A list of Book objects. 443 | """ 444 | resp = self.session.get( 445 | "https://sfpl.bibliocommons.com/holds/index/not_yet_available" 446 | ) 447 | 448 | return self.parseHolds(resp.text) 449 | 450 | @staticmethod 451 | def parseCheckouts(response_text: str) -> list["Book"]: 452 | data = Account.__extract_data(response_text) 453 | 454 | bibs = data["entities"]["bibs"].values() 455 | checkouts = {b["metadataId"]: b for b in data["entities"]["checkouts"].values()} 456 | 457 | # TODO: determine a reasonable status string spec. 458 | def parseStatus(id: str) -> str: 459 | return "Due {}".format(checkouts[id]["dueDate"]) 460 | 461 | return [ 462 | Book(Account._parseDataDict(b), status=parseStatus(b["id"])) for b in bibs 463 | ] 464 | 465 | @staticmethod 466 | def parseHolds(response_text: str) -> list["Book"]: 467 | data = Account.__extract_data(response_text) 468 | 469 | bibs = data["entities"]["bibs"].values() 470 | holds = {b["metadataId"]: b for b in data["entities"]["holds"].values()} 471 | 472 | # TODO: determine a reasonable status string spec. 473 | def parseStatus(id: str) -> str: 474 | status = holds[id]["status"] 475 | if holds[id].get("holdText"): 476 | status = "{}: {}".format(status, holds[id]["holdText"]) 477 | return status 478 | 479 | return [ 480 | Book(Account._parseDataDict(b), status=parseStatus(b["id"])) for b in bibs 481 | ] 482 | 483 | @staticmethod 484 | def _parseDataDict(bib: dict) -> dict[str, str]: 485 | return { 486 | "title": bib["briefInfo"]["title"], 487 | "subtitle": bib["briefInfo"]["subtitle"], 488 | "author": " & ".join(bib["briefInfo"]["authors"]), 489 | "_id": Book.metaDataIdToId(bib["id"]), 490 | } 491 | 492 | @staticmethod 493 | def __extract_data(response_text: str) -> dict: 494 | try: 495 | return _extract_data(response_text) 496 | except exceptions.MissingScriptError: 497 | raise exceptions.NotLoggedIn 498 | 499 | def loggedIn(self): 500 | return not bool( 501 | self.session.get("https://sfpl.bibliocommons.com/user_dashboard").history 502 | ) 503 | 504 | def logout(self): 505 | """Logs out of the account.""" 506 | self.session.get("https://sfpl.bibliocommons.com/user/logout") 507 | 508 | 509 | class Book: 510 | """A book from the San Francisco Public Library 511 | 512 | Attributes: 513 | title (str): The title of the book. 514 | author (str): The book's author's name. 515 | subtitle (str): The subtitle of the book. 516 | _id (str): SFPL's id for the book. 517 | status (str): The book's status, if applicable. (e.g. duedate, hold position) 518 | """ 519 | 520 | def __init__(self, data_dict, status=None): 521 | self.title = data_dict["title"] 522 | self.author = data_dict["author"] 523 | self.subtitle = data_dict["subtitle"] 524 | self._id = data_dict["_id"] 525 | 526 | self.status = status 527 | 528 | def getDetails(self): 529 | """Get the book's details. 530 | 531 | Returns: 532 | dict: Book details. 533 | """ 534 | return list( 535 | _extract_data( 536 | requests.get( 537 | "https://sfpl.bibliocommons.com/item/show/{}".format(self._id) 538 | ).text, 539 | )["entities"]["catalogBibs"].values() 540 | )[0] 541 | 542 | @staticmethod 543 | def metaDataIdToId(metaDataId): 544 | """Converts a metadata ID to an ID contained in urls 545 | 546 | Args: 547 | metaDataId (str): The metadataId to convert. 548 | 549 | Returns: 550 | str: The ID contained in urls. 551 | """ 552 | 553 | # var _id = id.split(/[SC]/g), 554 | metaDataID = re.split("[SC]", metaDataId) 555 | 556 | # sourceLibId = _id[1] 557 | sourceLibId = metaDataID[1] 558 | 559 | # var paddedSourceLibId = sourceLibId.padStart(3, '0'); 560 | paddedSourceLibId = ( 561 | (3 - len(sourceLibId)) * "0" + sourceLibId 562 | if len(sourceLibId) < 3 563 | else sourceLibId 564 | ) 565 | 566 | # bibId = _id[2]; 567 | bibId = metaDataID[2] 568 | 569 | return bibId + paddedSourceLibId 570 | 571 | def __str__(self): 572 | return "{} by {}".format(self.title, self.author) if self.author else self.title 573 | 574 | def __repr__(self): 575 | return "{} by {}".format(self.title, self.author) if self.author else self.title 576 | 577 | def __eq__(self, other): 578 | return self._id == other._id 579 | 580 | def __ne__(self, other): 581 | return self._id != other._id 582 | 583 | 584 | class Search: 585 | """A search for books or user-created lists. 586 | 587 | Attributes: 588 | term (str): Search term. 589 | _type(str): The type of search. 590 | """ 591 | 592 | def __init__(self, term, _type="keyword"): 593 | """ 594 | Args: 595 | term (str): Search term. 596 | _type(str, optional): The type of search. 597 | 598 | Raises: 599 | InvalidSearchType: If the search type is not valid. 600 | """ 601 | if _type.lower() in ["keyword", "title", "author", "subject", "tag", "list"]: 602 | self.term = term 603 | self._type = _type.lower() 604 | 605 | else: 606 | raise exceptions.InvalidSearchType(_type.lower()) 607 | 608 | def getResults(self, pages=1) -> Generator[list["Book"], None, None]: 609 | """Gets the results of the search. 610 | 611 | Args: 612 | pages(int): Number of pages to get. 613 | 614 | Yields: 615 | list: A list of books or lists on the page. 616 | """ 617 | if self._type in ["keyword", "title", "author", "subject", "tag"]: 618 | for x in range(1, pages + 1): 619 | query = "+".join(self.term.split()) 620 | url = f"https://sfpl.bibliocommons.com/v2/search?page={x}&query={query}&searchType={self._type}" 621 | resp = requests.get(url) 622 | soup = BeautifulSoup(resp.text, "lxml") 623 | pages_element = soup.find(string=re.compile(book_page_regex)) 624 | 625 | if not pages_element: 626 | raise StopIteration 627 | 628 | pages = ( 629 | re.match(book_page_regex, pages_element).group(1).replace(",", "") 630 | ) 631 | 632 | if math.ceil(int(pages) / 10) < x: 633 | raise StopIteration 634 | 635 | bib_data = json.loads(soup.find(type="application/json").text)[ 636 | "entities" 637 | ]["bibs"] 638 | 639 | books = [] 640 | 641 | for book in bib_data: 642 | authors = bib_data[book]["briefInfo"]["authors"] 643 | b = Book( 644 | { 645 | "title": bib_data[book]["briefInfo"]["title"], 646 | "author": authors[0] if authors else None, 647 | "subtitle": bib_data[book]["briefInfo"]["subtitle"], 648 | "_id": Book.metaDataIdToId(book), 649 | } 650 | ) 651 | books.append(b) 652 | 653 | yield books 654 | 655 | elif self._type == "list": 656 | for x in range(1, pages + 1): 657 | resp = requests.get( 658 | "https://sfpl.bibliocommons.com/search?page={}&q={}&search_category=userlist&t=userlist".format( 659 | x, self.term 660 | ) 661 | ) 662 | 663 | soup = BeautifulSoup(resp.text, "lxml") 664 | 665 | if ( 666 | math.ceil( 667 | int( 668 | re.match( 669 | list_page_regex, 670 | str( 671 | soup.find(string=re.compile(list_page_regex)) 672 | ).strip(), 673 | ) 674 | .group(1) 675 | .replace(",", "") 676 | ) 677 | / 25 678 | ) 679 | < x 680 | ): 681 | raise StopIteration 682 | 683 | yield [ 684 | List( 685 | { 686 | "type": _list.find(class_="list_type small").text.strip(), 687 | "title": _list.find(class_="title").text, 688 | "user": User( 689 | _list.find(class_="username").text, 690 | _list.find(class_="username")["href"].split("/")[4], 691 | ) 692 | if not _list.find(class_="username muted") 693 | else _list.find(class_="username muted").text.strip(), 694 | "createdon": _list.find( 695 | class_="dataPair clearfix small list_created_date" 696 | ) 697 | .find(class_="value") 698 | .text, 699 | "itemcount": int( 700 | _list.find(class_="list_item_count").text.replace( 701 | "items", "" 702 | ) 703 | ), 704 | "description": _list.find( 705 | class_="description" 706 | ).text.replace("\n", ""), 707 | "id": _list.find(class_="title") 708 | .find("a")["href"] 709 | .split("/")[4], 710 | } 711 | ) 712 | for _list in soup(class_="col-xs-12 col-sm-4 cp_user_list_item") 713 | ] 714 | 715 | def __str__(self): 716 | return "Search Type: {} Search Term {}".format(self._type, self.term) 717 | 718 | def __repr__(self): 719 | return "Search Type: {} Search Term {}".format(self._type, self.term) 720 | 721 | def __eq__(self, other): 722 | return self._type == other._type and self.term == other.term 723 | 724 | def __ne__(self, other): 725 | return self._type != other._type or self.term != other.term 726 | 727 | 728 | class AdvancedSearch: 729 | """An advanced, multi-term search. 730 | 731 | Attributes: 732 | query(str): The formatted query. 733 | """ 734 | 735 | def __init__(self, exclusive=True, **kwargs): 736 | """ 737 | Args: 738 | exclusive (bool): Whether or not to include all results that match or any that match. 739 | **kwargs: Search terms including one of 'include' or 'exclude' and one type such as 'keyword' or 'author'. 740 | An example kwarg would be: includeauthor='J.K Rowling' or excludekeyword='Chamber'. 741 | You can include multiple of the same type with includekeyword1='Chamber' and includekeyword2='Secrets'. 742 | 743 | Raises: 744 | MissingFilterTerm: If the term is missing a required part. 745 | """ 746 | term_map = { 747 | "keyword": "anywhere", 748 | "author": "contributor", 749 | "title": "title", 750 | "subject": "subject", 751 | "series": "series", 752 | "award": "award", 753 | "identifier": "identifier", 754 | "region": "region", 755 | "genre": "genre", 756 | "publisher": "publisher", 757 | "callnumber": "callnumber", 758 | } 759 | 760 | for term in kwargs: 761 | if not any( 762 | term.lower() in "{}{}".format(t, s) 763 | for t in ["include", "exclude"] 764 | for s in term_map 765 | ): 766 | raise exceptions.MissingFilterTerm 767 | 768 | include = [ 769 | "{}:({})".format( 770 | "".join(term_map[t] for t in term_map if t in term.lower()), 771 | kwargs[term], 772 | ) 773 | for term in kwargs 774 | if "include" in term.lower() 775 | ] 776 | 777 | exclude = [ 778 | "{}:({})".format( 779 | "".join(term_map[t] for t in term_map if t in term.lower()), 780 | kwargs[term], 781 | ) 782 | for term in kwargs 783 | if "exclude" in term.lower() 784 | ] 785 | 786 | self.query = "({}){}".format( 787 | (" AND " if exclusive else " OR ").join(include), 788 | " -" + "-".join(exclude) if exclude else "", 789 | ) 790 | 791 | def getResults(self, pages=1): 792 | """Generator that yields a stream of results. 793 | 794 | Args: 795 | pages(int): Number of pages to get. 796 | 797 | Yields: 798 | list: A list of books on the page. 799 | 800 | Examples: 801 | >>> search = sfpl.AdvancedSearch(includeauthor='J. K. Rowling', excludekeyword='Harry Potter') 802 | >>> stream = search.getResults(pages=2) 803 | >>> next(stream) 804 | [Fantastic Beasts and Where to Find Them by Rowling, J. K., Fantastic Beasts and Where to Find Them : The Original Screenplay by Rowling, J. K., The Casual Vacancy by Rowling, J. K., Very Good Lives by Rowling, J. K., Animales fantásticos y dónde encontrarlos by Rowling, J. K.] 805 | """ 806 | for x in range(1, pages + 1): 807 | resp = requests.get( 808 | "https://sfpl.bibliocommons.com/v2/search?page={}&query={}&searchType=bl".format( 809 | x, self.query 810 | ) 811 | ) 812 | 813 | soup = BeautifulSoup(resp.text, "lxml") 814 | pages_element = soup.find(string=re.compile(book_page_regex)) 815 | 816 | if not pages_element: 817 | raise StopIteration 818 | 819 | pages = re.match(book_page_regex, pages_element).group(1).replace(",", "") 820 | 821 | if math.ceil(int(pages) / 10) < x: 822 | raise StopIteration 823 | 824 | bib_data = json.loads(soup.find(type="application/json").text)["entities"][ 825 | "bibs" 826 | ] 827 | 828 | books = [] 829 | 830 | for book in bib_data: 831 | authors = bib_data[book]["briefInfo"]["authors"] 832 | b = Book( 833 | { 834 | "title": bib_data[book]["briefInfo"]["title"], 835 | "author": authors[0] if authors else None, 836 | "subtitle": bib_data[book]["briefInfo"]["subtitle"], 837 | "_id": Book.metaDataIdToId(book), 838 | } 839 | ) 840 | books.append(b) 841 | 842 | yield books 843 | 844 | def __str__(self): 845 | return self.query 846 | 847 | def __repr__(self): 848 | return self.query 849 | 850 | def __eq__(self, other): 851 | return self.query == other.query 852 | 853 | def __ne__(self, other): 854 | return self.query != other.query 855 | 856 | 857 | class List: 858 | """A user-created list of books. 859 | 860 | Atrributes: 861 | _type (str): type of list. 862 | title (str): title of the list. 863 | user (User): creator of the list. 864 | createdOn (str): the day the list was created. 865 | itemcount (int): the number of books in the list. 866 | description (str): a description of the list. 867 | _id (str): SFPL's id for the list. 868 | """ 869 | 870 | def __init__(self, data_dict): 871 | self._type = data_dict["type"] 872 | self.title = data_dict["title"] 873 | self.user = data_dict["user"] 874 | self.createdOn = data_dict["createdon"] 875 | self.itemcount = data_dict["itemcount"] 876 | self.description = data_dict["description"] 877 | self._id = data_dict["id"] 878 | 879 | def getBooks(self): 880 | return [ 881 | Book( 882 | { 883 | "title": book.find(class_="list_item_title").text.strip(), 884 | "author": book.find(testid="author_search").text, 885 | "subtitle": book.find(class_="list_item_subtitle").text.strip() 886 | if book.find(class_="list_item_subtitle") 887 | else None, 888 | "_id": int( 889 | "".join(s for s in book.find("a")["href"] if s.isdigit()) 890 | ), 891 | } 892 | ) 893 | for book in BeautifulSoup( 894 | requests.get( 895 | "https://sfpl.bibliocommons.com/list/share/{}_{}/{}".format( 896 | self.user._id, self.user.name, self._id 897 | ) 898 | ).text, 899 | "lxml", 900 | )(class_="listItem bg_white col-xs-12") 901 | ] 902 | 903 | def __str__(self): 904 | return self.title 905 | 906 | def __repr__(self): 907 | return self.title 908 | 909 | def __eq__(self, other): 910 | return self._id == other._id 911 | 912 | def __ne__(self, other): 913 | return self._id != other._id 914 | 915 | 916 | class Branch: 917 | """A library branch. 918 | 919 | Attributes: 920 | name (str): The name of the library branch. 921 | _id (str): SFPL's ID for the library branch. 922 | """ 923 | 924 | BRANCHES = { 925 | "anza": "44563120", 926 | "bayview": "44563121", 927 | "bernal heights": "44563122", 928 | "chinatown": "44563123", 929 | "chinatown children's": "44563124", 930 | "eureka valley": "44563125", 931 | "excelsior": "44563126", 932 | "glen park": "44563127", 933 | "golden gate valley": "44563128", 934 | "ingleside": "44563130", 935 | "main library": "44563151", 936 | "marina": "44563131", 937 | "merced": "44563132", 938 | "mission": "44563133", 939 | "mission bay": "44563134", 940 | "noe valley": "44563135", 941 | "north beach": "44563136", 942 | "ocean view": "44563137", 943 | "ortega": "44563138", 944 | "park": "44563139", 945 | "parkside": "44563140", 946 | "portola": "44563141", 947 | "potrero": "44563142", 948 | "presidio": "44563143", 949 | "richmond": "44563144", 950 | "richmond children's": "44563145", 951 | "sunset": "44563146", 952 | "sunset children's": "44563147", 953 | "visitacion valley": "44563148", 954 | "west portal": "44563149", 955 | "western addition": "44563150", 956 | } 957 | 958 | def __init__(self, name): 959 | """ 960 | Args: 961 | name (str): Name of library branch to match. 962 | 963 | Raises: 964 | NoBranchFound: No matches for the given name were found. 965 | """ 966 | for branch in Branch.BRANCHES: 967 | if name.lower() in branch.lower(): 968 | self.name = branch 969 | self._id = Branch.BRANCHES[self.name] 970 | break 971 | 972 | else: 973 | raise exceptions.NoBranchFound(name) 974 | 975 | def getHours(self): 976 | """Get the operating hours of the library. 977 | 978 | Returns: 979 | dict: A dictionary mapping days of the week to operating hours. 980 | """ 981 | branch = self.name.replace(" children's", "").replace(" ", "-").lower() 982 | response = requests.get(f"https://sfpl.org/locations/{branch}") 983 | response.raise_for_status() 984 | soup = BeautifulSoup(response.text, "lxml") 985 | result = {} 986 | 987 | for day in soup.select(".office-hours__item"): 988 | day_of_week = day.select_one(".office-hours__item-label").text.strip() 989 | hours = day.select_one(".office-hours__item-slots").text.strip() 990 | result[day_of_week] = hours 991 | 992 | return result 993 | 994 | def __str__(self): 995 | return self.name 996 | 997 | def __repr__(self): 998 | return self.name 999 | 1000 | def __eq__(self, other): 1001 | return self.name == other.name 1002 | 1003 | def __ne__(self, other): 1004 | return self.name != other.name 1005 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaijchang/SFPL/57ce8256674e1132c028b5fc6d614d811f33c960/tests/__init__.py -------------------------------------------------------------------------------- /tests/assets/shelf.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 | 6 | Bitcoin 7 |
8 | Bitcoin 9 | Examining the Benefits and Risks for Small Business : Hearing Before the Committee on Small Business, United States House of Representatives, One Hundred Thirteenth Congress, Second Session, Hearing Held April 2, 2014 10 | 11 | By United States 12 | 13 | 14 | Website or Online Data 15 | - 2014 16 | 17 | 18 | 19 | 20 | 21 |
22 | Added on: 23 | Jun 07, 2018 24 |
25 | 26 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import unittest 5 | import os 6 | import codecs 7 | 8 | import sfpl 9 | 10 | 11 | class TestScraper(unittest.TestCase): 12 | def test_holds(self): 13 | with codecs.open( 14 | os.path.join( 15 | os.path.abspath(os.path.dirname(__file__)), "assets/holds.html" 16 | ), 17 | encoding="utf-8", 18 | ) as mockup: 19 | result = sfpl.Account.parseHolds(mockup.read()) 20 | 21 | self.assertEqual(len(result), 8) 22 | self.assertEqual(result[0].title, "War on Gaza") 23 | self.assertEqual(result[0].author, "Sacco, Joe") 24 | self.assertEqual(result[0].status, "IN_TRANSIT: IN TRANSIT") 25 | self.assertEqual(result[0].subtitle, "") 26 | self.assertEqual(result[0]._id, "7165420093") 27 | 28 | def test_checkouts(self): 29 | with codecs.open( 30 | os.path.join( 31 | os.path.abspath(os.path.dirname(__file__)), "assets/checkouts.html" 32 | ), 33 | encoding="utf-8", 34 | ) as mockup: 35 | result = sfpl.Account.parseCheckouts(mockup.read()) 36 | 37 | self.assertEqual(len(result), 9) 38 | self.assertEqual(result[0].title, "The Children of the Dead") 39 | self.assertEqual(result[0].author, "Jelinek, Elfriede") 40 | self.assertEqual(result[0].status, "Due 2025-05-16") 41 | self.assertEqual(result[0].subtitle, "") 42 | self.assertEqual(result[0]._id, "6223776093") 43 | 44 | def test_author_search(self): 45 | author = sfpl.Search("J.K. Rowling", _type="author") 46 | results = author.getResults() 47 | 48 | for result in next(results): 49 | self.assertTrue("Rowling, J. K" in result.author) 50 | 51 | def test_pagination(self): 52 | "test pagination" 53 | author = sfpl.Search("J.K. Rowling", _type="author") 54 | results = author.getResults(pages=2) 55 | 56 | page_one = next(results) 57 | page_two = next(results) 58 | self.assertNotEqual(page_one, page_two) 59 | 60 | def test_book_search(self): 61 | search = sfpl.Search("Python") 62 | 63 | results = search.getResults() 64 | 65 | for result in next(results): 66 | self.assertTrue("python" in result.title.lower()) 67 | 68 | def test_book_search_with_zero_results(self): 69 | "test book search with zero results" 70 | search = sfpl.Search("qwteyut_does_not_exist") 71 | self.assertRaises(RuntimeError, lambda: next(search.getResults())) 72 | 73 | def test_book_search_with_one_result(self): 74 | "test book search with one result" 75 | search = sfpl.Search("Everything Keeps Dissolving") 76 | self.assertEqual(len(list(search.getResults())), 1) 77 | 78 | def test_list_search(self): 79 | search = sfpl.Search("red", _type="list") 80 | 81 | lists = search.getResults() 82 | 83 | for list_ in next(lists): 84 | self.assertTrue("red" in list_.title.lower()) 85 | 86 | def test_user_search(self): 87 | user = sfpl.User("Sublurbanite") 88 | 89 | user.getFollowers() 90 | user.getFollowing() 91 | user.getLists() 92 | 93 | def test_user_error(self): 94 | with self.assertRaises(sfpl.exceptions.NoUserFound): 95 | sfpl.User("eopghpeghip") 96 | 97 | def test_branch_hours(self): 98 | "test branch hours" 99 | branch = sfpl.Branch("west portal") 100 | actual_hours = branch.getHours() 101 | for day in ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]: 102 | self.assertIn(day, actual_hours) 103 | self.assertRegex(actual_hours[day], r"\d+ - \d+") 104 | 105 | def test_branch_hours_all(self): 106 | "test branch hours on all branches approximately" 107 | for branch_name in sfpl.Branch.BRANCHES: 108 | branch = sfpl.Branch(branch_name) 109 | actual_hours = branch.getHours() 110 | expected_hours = "12 - 6" if branch_name == "main library" else "1 - 5" 111 | err_msg = f"Sun hours were incorrect for {branch_name}" 112 | self.assertEqual(actual_hours["Sun"], expected_hours, err_msg) 113 | 114 | def test_branch_error(self): 115 | with self.assertRaises(sfpl.exceptions.NoBranchFound): 116 | sfpl.Branch("eighhegiohi;eg") 117 | 118 | def test_account_error(self): 119 | with self.assertRaises(sfpl.exceptions.LoginError): 120 | sfpl.Account("flbknnklvd", "uhoegwohi") 121 | 122 | def test_advanced_search(self): 123 | search = sfpl.AdvancedSearch( 124 | includeauthor="J. K. Rowling", excludekeyword="Harry Potter" 125 | ) 126 | 127 | results = search.getResults() 128 | 129 | for result in next(results): 130 | self.assertTrue("harry potter" not in result.title.lower()) 131 | 132 | def test_advanced_search_with_zero_results(self): 133 | "test advanced search with zero results" 134 | search = sfpl.AdvancedSearch(includeauthor="asdafa_does_not_exist") 135 | self.assertRaises(RuntimeError, lambda: next(search.getResults())) 136 | 137 | def test_advanced_search_error(self): 138 | with self.assertRaises(sfpl.exceptions.MissingFilterTerm): 139 | sfpl.AdvancedSearch(soemthingkeyword="Harry Potter") 140 | 141 | with self.assertRaises(sfpl.exceptions.MissingFilterTerm): 142 | sfpl.AdvancedSearch(excludesomething="Harry Potter") 143 | 144 | 145 | if __name__ == "__main__": 146 | unittest.main(verbosity=2) 147 | --------------------------------------------------------------------------------