├── fangraphs ├── __init__.py ├── tests │ ├── __init__.py │ └── test_leaders.py ├── exceptions │ └── __init__.py ├── leaders │ ├── __init__.py │ └── leaders.py └── selectors │ ├── __init__.py │ └── leaders_sel.py ├── setup.py ├── AUTHORS ├── pyproject.toml ├── .gitignore ├── docs ├── api_reference.rst ├── fangraphs.exceptions.rst ├── index.rst ├── fangraphs.selectors.rst ├── fangraphs.leaders.rst ├── fangraphs.rst ├── conf.py └── quickstart.rst ├── requirements.txt ├── setup.cfg ├── LICENSE ├── README.md └── ChangeLog /fangraphs/__init__.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # FanGraphs/__init__.py 3 | -------------------------------------------------------------------------------- /fangraphs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # tests/__init__.py 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup(setup_requires=["pbr"], pbr=True) 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Jacob Lee <72679601+JLpython-py@users.noreply.github.com> 2 | Jacob Lee 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | virtualenv/ 3 | .idea/ 4 | out/ 5 | .vs/ 6 | Makefile 7 | make.bat 8 | docs/_*/ 9 | .eggs/ 10 | build/ 11 | dist/ 12 | fangraphs.egg-info/ 13 | -------------------------------------------------------------------------------- /docs/api_reference.rst: -------------------------------------------------------------------------------- 1 | FanGraphs API Reference 2 | ======================= 3 | 4 | .. toctree:: 5 | 6 | fangraphs 7 | 8 | 9 | .. autosummary:: 10 | 11 | fangraphs 12 | -------------------------------------------------------------------------------- /docs/fangraphs.exceptions.rst: -------------------------------------------------------------------------------- 1 | Fangraphs.exceptions Package 2 | ============================ 3 | 4 | .. automodule:: fangraphs.exceptions 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | FanGraphs documentation 2 | ======================= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | quickstart 8 | api_reference 9 | 10 | 11 | Indices and tables 12 | ================== 13 | 14 | * :ref:`genindex` 15 | * :ref:`modindex` 16 | * :ref:`search` 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | atomicwrites==1.4.0 2 | attrs==20.3.0 3 | beautifulsoup4==4.9.3 4 | certifi==2020.12.5 5 | chardet==4.0.0 6 | colorama==0.4.4 7 | greenlet==1.0.0 8 | idna==2.10 9 | iniconfig==1.1.1 10 | lxml==4.6.3 11 | packaging==20.9 12 | playwright==1.10.0 13 | pluggy==0.13.1 14 | py==1.10.0 15 | pyee==8.1.0 16 | pyparsing==2.4.7 17 | pytest==6.2.2 18 | requests==2.25.1 19 | soupsieve==2.2.1 20 | toml==0.10.2 21 | urllib3==1.26.4 22 | -------------------------------------------------------------------------------- /docs/fangraphs.selectors.rst: -------------------------------------------------------------------------------- 1 | Fangraphs.selectors Package 2 | =========================== 3 | 4 | .. automodule:: fangraphs.selectors 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | Modules 11 | ------- 12 | 13 | .. autosummary:: 14 | 15 | fangraphs.selectors.leaders_sel 16 | 17 | 18 | Fangraphs.selectors.leaders\_sel 19 | -------------------------------- 20 | 21 | .. automodule:: fangraphs.selectors.leaders_sel 22 | :members: 23 | :show-inheritance: 24 | -------------------------------------------------------------------------------- /docs/fangraphs.leaders.rst: -------------------------------------------------------------------------------- 1 | Fangraphs.leaders Package 2 | ========================= 3 | 4 | .. automodule:: fangraphs.leaders 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | Package Modules 11 | --------------- 12 | 13 | .. autosummary:: 14 | 15 | fangraphs.leaders.leaders 16 | 17 | 18 | FanGraphs.leaders.leaders Module 19 | -------------------------------- 20 | 21 | .. automodule:: fangraphs.leaders.leaders 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = fangraphs 3 | author = Jacob Lee 4 | author_email = JLpython@outlook.com 5 | license = MIT 6 | long_description = file: README.md 7 | long_description_content_type = text/markdown 8 | url = http://github.com/JLpython-py/FanGraphs-Export 9 | requires_python = >= 3.6 10 | classifiers = 11 | Development Status :: 5 - Production/Stable 12 | Intended Audience :: Developers 13 | License :: OSI Approved :: MIT License 14 | Operating System :: OS Independent 15 | Programming Language :: Python 16 | Programming Language :: Python :: 3.6 17 | Programming Language :: Python :: 3.7 18 | Programming Language :: Python :: 3.8 19 | Programming Language :: Python :: 3.9 20 | -------------------------------------------------------------------------------- /docs/fangraphs.rst: -------------------------------------------------------------------------------- 1 | Fangraphs 2 | ========= 3 | 4 | .. automodule:: fangraphs 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | .. toctree:: 11 | :numbered: 12 | :maxdepth: 4 13 | 14 | fangraphs.exceptions 15 | fangraphs.leaders 16 | fangraphs.selectors 17 | 18 | Leaders 19 | ------------------------------------------------------------------------------ 20 | 21 | .. autosummary:: 22 | 23 | fangraphs.leaders 24 | fangraphs.leaders.leaders 25 | 26 | 27 | Exceptions 28 | ------------------------------------------------------------------------------ 29 | 30 | .. autosummary:: 31 | 32 | fangraphs.exceptions 33 | 34 | 35 | Selectors 36 | ------------------------------------------------------------------------------ 37 | 38 | .. autosummary:: 39 | 40 | fangraphs.selectors 41 | fangraphs.selectors.leaders_sel 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jacob Lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fangraphs/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # FanGraphs/exceptions.py 3 | 4 | """ 5 | Warning and exceptions 6 | """ 7 | 8 | 9 | class FilterUpdateIncapability(Warning): 10 | """ 11 | Raised when the filter queries cannot be updated. 12 | This usually occurs when no filter queries have been configured since the last update. 13 | """ 14 | def __init__(self): 15 | self.message = "No filter query configurations to update" 16 | super().__init__(self.message) 17 | 18 | 19 | class InvalidFilterGroup(Exception): 20 | """ 21 | Raised when an invalid filter group is used. 22 | """ 23 | def __init__(self, group): 24 | """ 25 | :param group: The filter group used 26 | """ 27 | self.group = group 28 | self.message = f"No filter group names '{self.group}' could be found" 29 | super().__init__(self.message) 30 | 31 | 32 | class InvalidFilterQuery(Exception): 33 | """ 34 | Raised when an invalid filter query is used. 35 | """ 36 | def __init__(self, query): 37 | """ 38 | :param query: The filter query used 39 | """ 40 | self.query = query 41 | self.message = f"No filter named '{self.query}' could be found" 42 | super().__init__(self.message) 43 | 44 | 45 | class InvalidFilterOption(Exception): 46 | """ 47 | Raised when a filter query is configured to a nonexistend option. 48 | """ 49 | def __init__(self, option): 50 | """ 51 | :param option: The option which the filter query was configured to 52 | """ 53 | self.option = option 54 | self.message = f"Could not configure to '{self.option}'" 55 | super().__init__(self.message) 56 | 57 | 58 | class InvalidQuickSplit(Exception): 59 | """ 60 | Raised when an invalid quick split is used. 61 | """ 62 | def __init__(self, quick_split): 63 | """ 64 | :param quick_split: The quick split used 65 | """ 66 | self.quick_split = quick_split 67 | self.message = f"No quick split '{self.quick_split}` could be found" 68 | super().__init__(self.message) 69 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath("..")) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'FanGraphs-Export' 21 | copyright = '2021, Jacob Lee' 22 | author = 'Jacob Lee' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = 'v1.1.0' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "sphinx.ext.autosummary" 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # List of patterns, relative to source directory, that match files and 41 | # directories to ignore when looking for source files. 42 | # This pattern also affects html_static_path and html_extra_path. 43 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 44 | 45 | 46 | # -- Options for HTML output ------------------------------------------------- 47 | 48 | # The theme to use for HTML and HTML Help pages. See the documentation for 49 | # a list of builtin themes. 50 | # 51 | html_theme = 'classic' 52 | 53 | # Add any paths that contain custom static files (such as style sheets) here, 54 | # relative to this directory. They are copied after the builtin static files, 55 | # so a file named "default.css" will overwrite the builtin "default.css". 56 | html_static_path = ['_static'] 57 | -------------------------------------------------------------------------------- /fangraphs/leaders/__init__.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # FanGraphs/leaders/__init__.py 3 | 4 | """ 5 | Subpackage for scraping the FanGraphs **Leaders** pages. 6 | """ 7 | 8 | import datetime 9 | import os 10 | 11 | import bs4 12 | from playwright.sync_api import sync_playwright 13 | 14 | 15 | class ScrapingUtilities: 16 | """ 17 | Manages the various objects used for scraping the FanGraphs webpages. 18 | Intializes and manages ``Playwright`` browsers and pages. 19 | Intializes and manages ``bs4.BeautifulSoup`` objects. 20 | """ 21 | def __init__(self, address, *, waitfor=""): 22 | """ 23 | :param address: The base URL address of the FanGraphs page 24 | .. py:attribute:: address 25 | The base URL address of the FanGraphs page 26 | :type: str 27 | .. py:attribute:: page 28 | The generated synchronous ``Playwright`` page for browser automation. 29 | :type: playwright.sync_api._generated.Page 30 | .. py:attribute:: soup 31 | The ``BeautifulSoup4`` HTML parser for scraping the webpage. 32 | :type: bs4.BeautifulSoup 33 | """ 34 | self.address = address 35 | self.waitfor = waitfor 36 | os.makedirs("out", exist_ok=True) 37 | 38 | self.__play = None 39 | self.__browser = None 40 | self.page = None 41 | 42 | self.soup = None 43 | 44 | def _browser_init(self): 45 | self.__play = sync_playwright().start() 46 | self.__browser = self.__play.chromium.launch( 47 | downloads_path=os.path.abspath("out") 48 | ) 49 | self.page = self.__browser.new_page( 50 | accept_downloads=True 51 | ) 52 | self._refresh_parser() 53 | 54 | def _refresh_parser(self): 55 | """ 56 | Re-initializes the ``bs4.BeautifulSoup`` object stored in :py:attr:`soup`. 57 | """ 58 | if self.waitfor: 59 | self.page.wait_for_selector(self.waitfor) 60 | self.soup = bs4.BeautifulSoup( 61 | self.page.content(), features="lxml" 62 | ) 63 | 64 | def _close_ad(self): 65 | """ 66 | Closes the ad which may interfere with clicking other page elements. 67 | """ 68 | elem = self.page.query_selector(".ezmob-footer-close") 69 | if self.soup.select("#ezmob-wrapper > div[style='display: none;']"): 70 | return 71 | if elem: 72 | elem.click() 73 | 74 | def export_data(self, selector: str, path=""): 75 | """ 76 | Uses the **Export Data** button on the webpage to export the current leaderboard. 77 | The data will be exported as a CSV file and the file will be saved to *out/*. 78 | The file will be saved to the filepath ``path``, if specified. 79 | Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* 80 | :param selector: The CSS selector of the **Export Data** button 81 | :param path: The path to save the exported data to 82 | """ 83 | self._close_ad() 84 | if not path or os.path.splitext(path)[1] != ".csv": 85 | path = "out/{}.csv".format( 86 | datetime.datetime.now().strftime("%d.%m.%y %H.%M.%S") 87 | ) 88 | with self.page.expect_download() as down_info: 89 | self.page.click(selector) 90 | download = down_info.value 91 | download_path = download.path() 92 | os.rename(download_path, path) 93 | 94 | def reset(self): 95 | """ 96 | Navigates :py:attr:`page` to :py:attr:`address`. 97 | """ 98 | self.page.goto(self.address, timeout=0) 99 | self._refresh_parser() 100 | 101 | def quit(self): 102 | """ 103 | Terminates the ``Playwright`` browser and context manager. 104 | """ 105 | self.__browser.close() 106 | self.__play.stop() 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FanGraphs-Export 2 | 3 | _This package is planned to be integrated into the [SABRmetrics](https://github.com/JacobLee23/SABRmetrics) package._ 4 | 5 | ![FanGraphs logo](https://user-images.githubusercontent.com/72679601/112188979-c335d980-8bc0-11eb-8ab9-992663e9e0e6.png) 6 | 7 | ![Last Commit: master](https://img.shields.io/github/last-commit/JLpython-py/FanGraphs-export/master) 8 | ![Last Commit: development](https://img.shields.io/github/last-commit/JLpython-py/FanGraphs-Export/development) 9 | 10 | ![Milestone 1](https://img.shields.io/github/milestones/progress/JLpython-py/FanGraphs-Export/1) 11 | ![Latest Release](https://img.shields.io/github/v/tag/JLpython-py/FanGraphs-Export) 12 | ![License: MIT](https://img.shields.io/github/license/JLpython-py/FanGraphs-Export) 13 | ![Read the Docs](https://img.shields.io/readthedocs/fangraphs-export) 14 | 15 | The [FanGraphs](https://fangraphs.com/) website, well-known among baseball fans, provides a variety of baseball statistics. 16 | The statistics available are extremely expansive, as the website brags stats for every player in MLB history. 17 | 18 | The `fangraphs` package allows for simple, intuitive parsing of the many webpages available. 19 | While not every page is "scrape-able" (i.e. the pages are most composed of graphics), 20 | there are plans for covering as many pages as possible, including the most popular ones. 21 | This package contains modules for scraping and exporting data from each of the covered webpages. 22 | 23 | ## Dependencies 24 | 25 | The `fangraphs` library requires Python version 3.6 or higher. 26 | 27 | The following libraries along are required for the `fangraphs` library. 28 | 29 | - `BeautifulSoup4` 30 | - `lxml` 31 | - `playwright` 32 | - `pytest` 33 | - `requests` 34 | 35 | *Note: The dependencies of each package listed above are also required.* 36 | 37 | To install all the necessary packages, run: 38 | 39 | ```commandline 40 | pip install -r requirements.txt 41 | ``` 42 | 43 | *Note: The browser binaries of `playwright` are needed for proper usage. 44 | To install the browser binaries, run `playwright install`. 45 | See the [Playwright documentation](https://playwright.dev/python/docs/intro/) for more information.* 46 | 47 | ## Documentation 48 | 49 | The **Read the Docs** documentation can be found [here](https://fangraphs-export.readthedocs.io/en/latest/?). 50 | ## Basic Usage 51 | 52 | Each group of FanGraphs pages (e.g. Leaders, Projections, etc.) which is covered has an individual module. 53 | Each webpage in each group of webpages has an individual class covering the page. 54 | 55 | Covered FanGraphs webpage groups: 56 | 57 | - [Leaders](#Leaders) 58 | 59 | ### Leaders 60 | 61 | FanGraphs Leaders pages: 62 | 63 | - [Major League Leaderboards](https://fangraphs.com/leaders.aspx) 64 | - [Splits Leaderboards](https://fangraphs.com/leaders/splits-leaderboards) 65 | - [Season Stat Grid](https://fangraphs.com/leaders/season-stat-grid) 66 | - [60-Game Span Leaderboards](https://fangraphs.com/leaders/special/game-span) 67 | - [KBO Leaderboards](https://fangraphs.com/leaders/international) 68 | - [WAR Leaderboards](https://fangraphs.com/warleaders.aspx) 69 | 70 | ```python 71 | from fangraphs.leaders import leaders 72 | 73 | mll = leaders.MajorLeague() 74 | splits = leaders.Splits() 75 | ssg = leaders.SeasonStat() 76 | gsl = leaders.GameSpan() 77 | intl = leaders.International() 78 | war = leaders.WAR() 79 | ``` 80 | 81 | ## Tests 82 | 83 | To run all tests, run `pytest FanGraphs` 84 | 85 | To run the tests for a specific module, run `pytest fangraphs/tests/test_module_name.py`. 86 | For example, 87 | 88 | ```commandline 89 | pytest fangraphs/tests/test_leaders.py 90 | ``` 91 | 92 | To run the tests for a specific class, run `pytest -k "TestClassName"`. 93 | For example, 94 | 95 | ```commandline 96 | pytest -k "TestMajorLeagueLeaderboards" 97 | ``` 98 | 99 | ## License 100 | 101 | The code in this repository is licensed under an MIT License. 102 | 103 | **Copyright (c) 2021 Jacob Lee** 104 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Fangraphs Quickstart 2 | ==================== 3 | 4 | Leaders 5 | ------- 6 | 7 | Pages 8 | ^^^^^ 9 | 10 | The webpages under the FanGraphs **Leaders** tab are covered by the ``fangraphs.leaders`` package. 11 | Each page has its own web scraper class in the ``fangraphs.leaders.leaders`` module. 12 | Each covered FanGraphs **Leaders** page with the corresponding class is listed below: 13 | 14 | +-------------------------------+-----------------------------------------------+ 15 | | FanGraphs Leaders page | ``fangraphs`` class | 16 | +===============================+===============================================+ 17 | | `60-Game Span Leaderboards`_ | ``fangraphs.leaders.leaders.GameSpan`` | 18 | +-------------------------------+-----------------------------------------------+ 19 | | `KBO Leaders`_ | ``fangraphs.leaders.leaders.International`` | 20 | +-------------------------------+-----------------------------------------------+ 21 | | `Major League Leaders`_ | ``fangraphs.leaders.leaders.MajorLeague`` | 22 | +-------------------------------+-----------------------------------------------+ 23 | | `Season Stat Grid`_ | ``fangraphs.leaders.leaders.SeasonStat`` | 24 | +-------------------------------+-----------------------------------------------+ 25 | | `Splits Leaderboards`_ | ``fangraphs.leaders.leaders.Splits`` | 26 | +-------------------------------+-----------------------------------------------+ 27 | | `Combined WAR Leaderboards`_ | ``fangraphs.leaders.leaders.WAR`` | 28 | +-------------------------------+-----------------------------------------------+ 29 | 30 | .. _60-Game Span Leaderboards: https://fangraphs.com/leaders/special/game-span 31 | .. _KBO Leaders: https://fangraphs.com/leaders/international 32 | .. _Major League Leaders: https://fangraphs.com/leaders.aspx 33 | .. _Season Stat Grid: https://fangraphs.com/leaders/season-stat-grid 34 | .. _Splits Leaderboards: https://fangraphs.com/leaders/splits-leaderboards 35 | .. _Combined WAR Leaderboards: https://fangraphs.com/warleaders.aspx 36 | 37 | Usage Introduction 38 | ^^^^^^^^^^^^^^^^^^ 39 | 40 | An object can be created to scrape the corresponding webpage by calling the class, with not arguments:: 41 | 42 | from fangraphs.leaders import leaders 43 | 44 | gsl = leaders.GameSpan() 45 | inter = leaders.International() 46 | mll = leaders.MajorLeague() 47 | ssg = leaders.SeasonStat() 48 | splitsl = leaders.Splits() 49 | warl = leaders.WAR() 50 | 51 | Alternatively, the classes can be used as context managers:: 52 | 53 | from fangraphs.leaders import leaders 54 | 55 | with leaders.MajorLeague() as scraper: 56 | # Do stuff here 57 | 58 | Basic Usage 59 | ^^^^^^^^^^^ 60 | 61 | All the classes share a few methods which perform the same tasks: 62 | 63 | - `list_queries(self)`: Lists the usable filter queries of the page 64 | - `list_options(self, query)`: Lists the options which ``query`` can be configured to 65 | - `current_option(self, query)`: Returns the current option which ``query`` is configured to 66 | - `configure(self, query, option)`: Configures ``query`` to ``option``. 67 | - `export(self, path="")`: Exports the current data table as a CSV file, to ``path``. 68 | 69 | Since each class inherits the same parent class, the following methods are also available: 70 | 71 | - `reset(self)`: Navigates the remote browser to the original webpage. 72 | - `quit(self)`: Terminates the remote browser. 73 | 74 | Below is a basic example with a ``MajorLeague`` object:: 75 | 76 | from fangraphs.leaders import leaders 77 | scraper = leaders.MajorLeague() 78 | scraper.configure("stat", "Pitching") 79 | scraper.configure("team", "LAD") 80 | scraper.export("LADPitching.csv") 81 | scraper.quit() 82 | 83 | Or, using the context manager syntax:: 84 | 85 | from fangraphs.leaders import leaders 86 | with leaders.MajorLeague() as scraper: 87 | scraper.configure("stat", "Pitching") 88 | scraper.configure("team", "LAD") 89 | scraper.export("LADPitching.csv") 90 | 91 | -------------------------------------------------------------------------------- /fangraphs/selectors/__init__.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # FanGraphs/selectors/__init__.py 3 | 4 | import fangraphs.exceptions 5 | 6 | 7 | class Selections: 8 | """ 9 | Manages selection-class filter queries. 10 | """ 11 | def __init__(self, soup, selector, descendant=""): 12 | self.soup = soup 13 | self.selector = selector 14 | self.descendant = descendant 15 | 16 | def list_options(self): 17 | if isinstance(self.selector, str): 18 | elems = self.soup.select(f"{self.selector} {self.descendant}") 19 | elif isinstance(self.selector, list): 20 | elems = [ 21 | self.soup.select(s)[0] 22 | for s in self.selector 23 | ] 24 | else: 25 | raise Exception 26 | options = [e.getText() for e in elems] 27 | return options 28 | 29 | def current_option(self): 30 | if isinstance(self.selector, str): 31 | elem = self.soup.select(f"{self.selector} .rtsLink.rtsSelected") 32 | option = elem.getText() 33 | elif isinstance(self.selector, list): 34 | option = "" 35 | for sel in self.selector: 36 | elem = self.soup.select(sel)[0] 37 | if "active" in elem.get("class"): 38 | option = elem.getText() 39 | else: 40 | raise Exception 41 | return option 42 | 43 | async def configure(self, page, option: str): 44 | option = option.lower() 45 | options = [o.lower() for o in self.list_options()] 46 | try: 47 | index = options.index(option) 48 | except ValueError as err: 49 | raise fangraphs.exceptions.InvalidFilterOption(option) from err 50 | if isinstance(self.selector, str): 51 | elem = await page.query_selector_all( 52 | f"{self.selector} {self.descendant}" 53 | )[index] 54 | await elem.click() 55 | elif isinstance(self.selector, list): 56 | await page.click(self.selector[index]) 57 | else: 58 | raise Exception 59 | 60 | 61 | class Dropdowns: 62 | """ 63 | Manage dropdown-class filter queries. 64 | """ 65 | def __init__(self, soup, selector, descendants="", dd_options=None): 66 | self.soup = soup 67 | self.selector = selector 68 | self.descendants = descendants 69 | self.dd_options = dd_options 70 | 71 | def list_options(self): 72 | if self.dd_options: 73 | elems = self.soup.select(f"{self.dd_options} {self.descendants}") 74 | options = [e.getText() for e in elems] 75 | else: 76 | elems = self.soup.select(f"{self.selector} {self.descendants}") 77 | options = [e.getText() for e in elems] 78 | return options 79 | 80 | def current_option(self, opt_type, *, multiple=False): 81 | if opt_type == 1: 82 | elem = self.soup.select(self.selector)[0] 83 | option = elem.get("value") 84 | elif opt_type == 2: 85 | elems = self.soup.select(f"{self.selector} {self.descendants}") 86 | option = [ 87 | e.getText() for e in elems 88 | if "highlight-selection" in e.get("class") 89 | ] 90 | if not multiple: 91 | option = option[0] if option else "" 92 | elif opt_type == 3: 93 | elem = self.soup.select(f"{self.selector} > div > span")[0] 94 | option = elem.getText() 95 | else: 96 | raise Exception 97 | return option 98 | 99 | async def configure(self, page, option: str): 100 | options = [o.lower() for o in self.list_options()] 101 | try: 102 | index = options.index(option.lower()) 103 | except ValueError as err: 104 | raise fangraphs.exceptions.InvalidFilterOption(option) from err 105 | await page.click(self.selector) 106 | elem = await page.query_selector_all( 107 | f"{self.selector} {self.descendants}" 108 | )[index] 109 | await elem.click() 110 | 111 | 112 | class Switches: 113 | """ 114 | Manages checkbox-class filter queries. 115 | """ 116 | def __init__(self, soup, selector): 117 | self.soup = soup 118 | self.selector = selector 119 | 120 | def current_option(self, opt_type): 121 | if opt_type == 1: 122 | elem = self.soup.select(self.selector)[0] 123 | option = "True" if elem.get("checked") == "checked" else "False" 124 | elif opt_type == 2: 125 | elem = self.soup.select(self.selector)[0] 126 | option = "True" if "isActive" in elem.get("class") else "False" 127 | else: 128 | raise Exception 129 | return option 130 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | v1.1.0 5 | ------ 6 | 7 | * Update documentation, add quickstart 8 | * Update documentation, add quickstart 9 | * Restructure classes to allow context manager syntax 10 | * Update requirements 11 | * Release v1.0.2 12 | 13 | v1.0.2 14 | ------ 15 | 16 | * Update REAMDE, documentation 17 | * Relocate all fangraphs.leaders.\* modules to fangraphs.leaders.leaders 18 | * Refactor list\_options, current\_option, and configure (and private helper) methods 19 | * Update documentation 20 | * Move classes from fangraphs.leaders for fangraphs.leaders.\* 21 | * Release v1.0.1 22 | 23 | v1.0.1 24 | ------ 25 | 26 | * Release v1.0.1 27 | * Correct README test instructions 28 | * Fix naming inconsistincies within modules; Fix bug in tests 29 | * Update files for v1.0.0 30 | 31 | v1.0.0 32 | ------ 33 | 34 | * Add remaining files 35 | * Generate AUTHORS and ChangeLog 36 | * Create setup.cfg and setup.py files 37 | * Revise docstrings 38 | * Update .gitignore 39 | * Correct typos in docstrings 40 | * Correct docstrings in FanGraphs.selectors.leaders\_sel 41 | * Move ScrapingUtilities to FanGraphs.utilities 42 | * Rename class names in FanGraphs.leaders 43 | * Apply CSS selector changes to docs 44 | * Relocate CSS selectors 45 | * Add doc files for all modules 46 | * Add FanGraphs.leaders autodocumentation 47 | * Run Sphinx Quickstart 48 | 49 | v0.8.2 50 | ------ 51 | 52 | * Add/Edit module docstrings 53 | * Update README 54 | * Update requirements 55 | 56 | v0.8.1 57 | ------ 58 | 59 | * Run pylint 60 | * Update README.md 61 | 62 | v0.8.0 63 | ------ 64 | 65 | * Add method docstrings 66 | * Create export method 67 | * Create configure method and private helper methods 68 | * Create current\_option method 69 | * Create list\_queries and list\_options methods 70 | * Compile CSS Selectors 71 | * Refactor setup\_class methods in test classes 72 | * Initialize InternationalLeaderboards class 73 | * Update README.md 74 | 75 | v0.7.0 76 | ------ 77 | 78 | * Add method docstrings 79 | * Move waitfor selector to FanGraphs.selectors.leader\_sel package modules 80 | * Create export method 81 | * Create configure method and private helper methods 82 | * Create current\_option method 83 | * Create list\_options method 84 | * Create list\_queries method 85 | * Compile CSS selectors for WARLeaderboards 86 | * Initialize WARLeaderboards class 87 | * Move CSS Selectors to FanGraphs.selectors.leaders\_sel subpackage 88 | * Update README.md 89 | * Create LICENSE 90 | * Update README.md 91 | 92 | v0.6.0 93 | ------ 94 | 95 | * Pylint 96 | * Add class and method docstrings 97 | * Create export method; Rework export methods of other classes 98 | * Create configure method and helper private methods 99 | * Create current\_option method 100 | * Create list\_queries and list\_options methods 101 | * Update unit tests 102 | * Refactor classes (\_\_Utils integration) 103 | * Refactor reset instance methods of classes 104 | * Intialize GameSpanLeaderboards 105 | 106 | v0.5.1 107 | ------ 108 | 109 | * Change FanGraphs.leaders.SplitsLeaderboards.list\_quick\_splits from instance method to class method 110 | 111 | v0.5.0 112 | ------ 113 | 114 | * Update README 115 | * Update README 116 | 117 | v0.4.0 118 | ------ 119 | 120 | * Delete functional tests 121 | * Add docstrings to TestSeasonStatGrid 122 | * Add docstrings to TestSeasonSTatGrid 123 | * Add docstrings to TestMajorLeagueLeaderboards 124 | * Refactor TestSplitsLeaderboards; Add docstrings 125 | * Refactor functional tests into unit tests: TestMajorLeagueLeaderboards 126 | * Refactor TestSeasonStatGrid 127 | * Refactor TestSplitsLeaderboards 128 | * Refactor TestMajorLeagueLeaderboards 129 | * Refactor TestSeasonStatGrid 130 | * Refactor TestSplitsLeaderboards 131 | * Refactor TestMajorLeagueLeaderboards 132 | * Refactor Playwright browser initiation 133 | * Restructure file hierarchy 134 | * Remove FanGraphs.projections module 135 | * Install pytest 136 | 137 | v0.3.0 138 | ------ 139 | 140 | * Add module-level docstrings 141 | * Update docstrings 142 | * Refactor functional tests 143 | * Bugfix unit tests 144 | * Bugfix unit tests 145 | * Refactor SeasonStatGrid and SplitsLeaderboards unit tests 146 | * Refactor MajorLeagueLeaderboards unit tests 147 | * Refactor MajorLeagueLeaderboards to use Playwright 148 | * Refactor SeasonStatGrid to use Playwright 149 | * Refactor SplitsLeaderboards to use Playwright 150 | * Install Playwright 151 | * Refactor quick splits 152 | * Modify execute method (still buggy) 153 | * Add quick splits, quick\_split and configure\_quick\_split methods 154 | * Add export method (buggy) 155 | * Add configure method 156 | * Remove quick split compatibility; Add current\_option method 157 | * Add list\_filter\_groups and list\_options methods 158 | * Add list\_queries method 159 | * Compile quick splits and splits CSS Selectors 160 | * Add update method 161 | * Compile dropdown CSS selectors 162 | * Compile selections CSS selectors 163 | * Add configure\_group method 164 | * Add reset\_filters method 165 | 166 | v0.2.0 167 | ------ 168 | 169 | * Separate functional tests 170 | * Add docstrings to class and class methods 171 | * Create export method 172 | * Add explicit wait before creating bs4.BeautifulSoup object 173 | * Use browser page source instead of '\*' CSS Selector 174 | * Fix bug in configure method 175 | * Refactor tests 176 | * Finalize configure method 177 | * Switch main parser from Selenium to BeautifulSoup4 178 | * Install requests, BeautifulSoup4, cssselect 179 | * Add configure method (buggy) 180 | * Fix bugs in current\_option method 181 | * Add current\_option method 182 | * Add list\_options method 183 | * Add list\_queries method 184 | * Migrate exceptions to separate file 185 | * Compile remaining CSS selectors 186 | * Compile CSS selectors 187 | * Initialize SeasonStatGrid class 188 | * Initialize SplitsLeaderboards class 189 | * Update README.md 190 | 191 | v0.1.0 192 | ------ 193 | 194 | * Add query configuration for leaderboard type 195 | * Create export method, update functional and unit tests 196 | * Create current\_options and configure classmethods 197 | * Create list\_queries classmethod 198 | * Write listoptions classmethod 199 | * Add page filter element XPaths 200 | * Add \_\_init\_\_ to MajorLeagueLeaderboards 201 | * Initialize tests for leaders.MajorLeagueLeaderboards 202 | * Initialize directory for unit tests 203 | * Initialize classes 204 | * Initial Commit 205 | -------------------------------------------------------------------------------- /fangraphs/selectors/leaders_sel.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # FanGraphs/selectors/leaders_sel.py 3 | 4 | """ 5 | CSS selectors for the classes in :py:mod:`FanGraphs.leaders`. 6 | """ 7 | 8 | 9 | class GameSpan: 10 | """ 11 | CSS selectors for :py:mod:`fangraphs.leaders.gamespan`. 12 | """ 13 | selections = { 14 | "stat": [ 15 | ".controls-stats > .fgButton:nth-child(1)", 16 | ".controls-stats > .fgButton:nth-child(2)" 17 | ], 18 | "type": [ 19 | ".controls-board-view > .fgButton:nth-child(1)", 20 | ".controls-board-view > .fgButton:nth-child(2)", 21 | ".controls-board-view > .fgButton:nth-child(3)" 22 | ] 23 | } 24 | dropdowns = { 25 | "min_pa": ".controls-stats:nth-child(1) > div:nth-child(3) > .fg-selection-box__selection", 26 | "single_season": ".controls-stats:nth-child(2) > div:nth-child(1) > .fg-selection-box__selection", 27 | "season1": ".controls-stats:nth-child(2) > div:nth-child(2) > .fg-selection-box__selection", 28 | "season2": ".controls-stats:nth-child(2) > div:nth-child(3) > .fg-selection-box__selection", 29 | "determine": ".controls-stats.stat-determined > div:nth-child(1) > .fg-selection-box__selection" 30 | } 31 | waitfor = ".fg-data-grid.table-type" 32 | 33 | 34 | class International: 35 | """ 36 | CSS selectors for :py:mod:`fangraphs.leaders.international`. 37 | """ 38 | selections = { 39 | "stat": [ 40 | ".controls-stats > .fgButton:nth-child(1)", 41 | ".controls-stats > .fgButton:nth-child(2)" 42 | ], 43 | "type": [ 44 | ".controls-board-view > .fgButton:nth-child(1)", 45 | ".controls-board-view > .fgButton:nth-child(2)" 46 | ] 47 | } 48 | dropdowns = { 49 | "position": ".controls-stats:nth-child(1) > div:nth-child(3) > .fg-selection-box__selection", 50 | "min": ".controls-stats:nth-child(1) > div:nth-child(4) > .fg-selection-box__selection", 51 | "single_season": ".controls-stats:nth-child(2) > div:nth-child(1) > .fg-selection-box__selection", 52 | "season1": ".controls-stats:nth-child(2) > div:nth-child(2) > .fg-selection-box__selection", 53 | "season2": ".controls-stats:nth-child(2) > div:nth-child(3) > .fg-selection-box__selection", 54 | "league": ".controls-stats:nth-child(3) > div:nth-child(1) > .fg-selection-box__selection", 55 | "team": ".controls-stats:nth-child(3) > div:nth-child(2) > .fg-selection-box__selection", 56 | } 57 | switches = { 58 | "split_seasons": ".controls-stats > .fg-checkbox" 59 | } 60 | waitfor = ".fg-data-grid.table-type" 61 | 62 | 63 | class MajorLeague: 64 | """ 65 | CSS selectors for :py:mod:`fangraphs.leaders.majorleague`. 66 | """ 67 | selections = { 68 | "group": "#LeaderBoard1_tsGroup", 69 | "stat": "#LeaderBoard1_tsStats", 70 | "position": "#LeaderBoard1_tsPosition", 71 | "type": "#LeaderBoard1_tsType" 72 | } 73 | dropdowns = { 74 | "league": "#LeaderBoard1_rcbLeague_Input", 75 | "team": "#LeaderBoard1_rcbTeam_Input", 76 | "single_season": "#LeaderBoard1_rcbSeason_Input", 77 | "split": "#LeaderBoard1_rcbMonth_Input", 78 | "min_pa": "#LeaderBoard1_rcbMin_Input", 79 | "season1": "#LeaderBoard1_rcbSeason1_Input", 80 | "season2": "#LeaderBoard1_rcbSeason2_Input", 81 | "age1": "#LeaderBoard1_rcbAge1_Input", 82 | "age2": "#LeaderBoard1_rcbAge2_Input" 83 | } 84 | dropdown_options = { 85 | "league": "#LeaderBoard1_rcbLeague_DropDown", 86 | "team": "#LeaderBoard1_rcbTeam_DropDown", 87 | "single_season": "#LeaderBoard1_rcbSeason_DropDown", 88 | "split": "#LeaderBoard1_rcbMonth_DropDown", 89 | "min_pa": "#LeaderBoard1_rcbMin_DropDown", 90 | "season1": "#LeaderBoard1_rcbSeason1_DropDown", 91 | "season2": "#LeaderBoard1_rcbSeason2_DropDown", 92 | "age1": "#LeaderBoard1_rcbAge1_DropDown", 93 | "age2": "#LeaderBoard1_rcbAge2_DropDown" 94 | } 95 | switches = { 96 | "split_teams": "#LeaderBoard1_cbTeams", 97 | "active_roster": "#LeaderBoard1_cbActive", 98 | "hof": "#LeaderBoard1_cbHOF", 99 | "split_seasons": "#LeaderBoard1_cbSeason", 100 | "rookies": "#LeaderBoard1_cbRookie" 101 | } 102 | buttons = { 103 | "season1": "#LeaderBoard1_btnMSeason", 104 | "season2": "#LeaderBoard1_btnMSeason", 105 | "age1": "#LeaderBoard1_cmdAge", 106 | "age2": "#LeaderBoard1_cmdAge" 107 | } 108 | 109 | 110 | class SeasonStat: 111 | """ 112 | CSS selectors for :py:mod:`fangraphs.leaders.seasonstat`. 113 | """ 114 | selections = { 115 | "stat": [ 116 | "div[class*='fgButton button-green']:nth-child(1)", 117 | "div[class*='fgButton button-green']:nth-child(2)" 118 | ], 119 | "type": [ 120 | "div[class*='fgButton button-green']:nth-child(4)", 121 | "div[class*='fgButton button-green']:nth-child(5)", 122 | "div[class*='fgButton button-green']:nth-child(6)" 123 | ] 124 | } 125 | dropdowns = { 126 | "start_season": ".row-season > div:nth-child(2)", 127 | "end_season": ".row-season > div:nth-child(4)", 128 | "popular": ".season-grid-controls-dropdown-row-stats > div:nth-child(1)", 129 | "standard": ".season-grid-controls-dropdown-row-stats > div:nth-child(2)", 130 | "advanced": ".season-grid-controls-dropdown-row-stats > div:nth-child(3)", 131 | "statcast": ".season-grid-controls-dropdown-row-stats > div:nth-child(4)", 132 | "batted_ball": ".season-grid-controls-dropdown-row-stats > div:nth-child(5)", 133 | "win_probability": ".season-grid-controls-dropdown-row-stats > div:nth-child(6)", 134 | "pitch_type": ".season-grid-controls-dropdown-row-stats > div:nth-child(7)", 135 | "plate_discipline": ".season-grid-controls-dropdown-row-stats > div:nth-child(8)", 136 | "value": ".season-grid-controls-dropdown-row-stats > div:nth-child(9)" 137 | } 138 | waitfor = ".fg-data-grid.undefined" 139 | 140 | 141 | class Splits: 142 | """ 143 | CSS selectors for :py:mod:`fangraphs.leaders.splits`. 144 | """ 145 | selections = { 146 | "group": [ 147 | ".fgBin.row-button > div[class*='button-green fgButton']:nth-child(1)", 148 | ".fgBin.row-button > div[class*='button-green fgButton']:nth-child(2)", 149 | ".fgBin.row-button > div[class*='button-green fgButton']:nth-child(3)", 150 | ".fgBin.row-button > div[class*='button-green fgButton']:nth-child(4)" 151 | ], 152 | "stat": [ 153 | ".fgBin.row-button > div[class*='button-green fgButton']:nth-child(6)", 154 | ".fgBin.row-button > div[class*='button-green fgButton']:nth-child(7)" 155 | ], 156 | "type": [ 157 | "#root-buttons-stats > div:nth-child(1)", 158 | "#root-buttons-stats > div:nth-child(2)", 159 | "#root-buttons-stats > div:nth-child(3)" 160 | ] 161 | } 162 | dropdowns = { 163 | "time_filter": "#root-menu-time-filter > .fg-dropdown.splits.multi-choice", 164 | "preset_range": "#root-menu-time-filter > .fg-dropdown.splits.single-choice", 165 | "groupby": ".fg-dropdown.group-by" 166 | } 167 | splits = { 168 | "handedness": ".fgBin:nth-child(1) > .fg-dropdown.splits.multi-choice:nth-child(1)", 169 | "home_away": ".fgBin:nth-child(1) > .fg-dropdown.splits.multi-choice:nth-child(2)", 170 | "batted_ball": ".fgBin:nth-child(1) > .fg-dropdown.splits.multi-choice:nth-child(3)", 171 | "situation": ".fgBin:nth-child(1) > .fg-dropdown.splits.multi-choice:nth-child(4)", 172 | "count": ".fgBin:nth-child(1) > .fg-dropdown.splits.multi-choice:nth-child(5)", 173 | "batting_order": ".fgBin:nth-child(2) > .fg-dropdown.splits.multi-choice:nth-child(1)", 174 | "position": ".fgBin:nth-child(2) > .fg-dropdown.splits.multi-choice:nth-child(2)", 175 | "inning": ".fgBin:nth-child(2) > .fg-dropdown.splits.multi-choice:nth-child(3)", 176 | "leverage": ".fgBin:nth-child(2) > .fg-dropdown.splits.multi-choice:nth-child(4)", 177 | "shifts": ".fgBin:nth-child(2) > .fg-dropdown.splits.multi-choice:nth-child(5)", 178 | "team": ".fgBin:nth-child(3) > .fg-dropdown.splits.multi-choice:nth-child(1)", 179 | "opponent": ".fgBin:nth-child(3) > .fg-dropdown.splits.multi-choice:nth-child(2)", 180 | } 181 | quick_splits = { 182 | "batting_home": ".quick-splits > div:nth-child(1) > div:nth-child(2) > .fgButton:nth-child(1)", 183 | "batting_away": ".quick-splits > div:nth-child(1) > div:nth-child(2) > .fgButton:nth-child(2)", 184 | "vs_lhp": ".quick-splits > div:nth-child(1) > div:nth-child(3) > .fgButton:nth-child(1)", 185 | "vs_lhp_home": ".quick-splits > div:nth-child(1) > div:nth-child(3) > .fgButton:nth-child(2)", 186 | "vs_lhp_away": ".quick-splits > div:nth-child(1) > div:nth-child(3) > .fgButton:nth-child(3)", 187 | "vs_lhp_as_lhh": ".quick-splits > div:nth-child(1) > div:nth-child(3) > .fgButton:nth-child(4)", 188 | "vs_lhp_as_rhh": ".quick-splits > div:nth-child(1) > div:nth-child(3) > .fgButton:nth-child(5)", 189 | "vs_rhp": ".quick-splits > div:nth-child(1) > div:nth-child(4) > .fgButton:nth-child(1)", 190 | "vs_rhp_home": ".quick-splits > div:nth-child(1) > div:nth-child(4) > .fgButton:nth-child(2)", 191 | "vs_rhp_away": ".quick-splits > div:nth-child(1) > div:nth-child(4) > .fgButton:nth-child(3)", 192 | "vs_rhp_as_lhh": ".quick-splits > div:nth-child(1) > div:nth-child(4) > .fgButton:nth-child(4)", 193 | "vs_rhp_as_rhh": ".quick-splits > div:nth-child(1) > div:nth-child(4) > .fgButton:nth-child(5)", 194 | "pitching_as_sp": ".quick-splits > div:nth-child(2) > div:nth-child(1) .fgButton:nth-child(1)", 195 | "pitching_as_rp": ".quick-splits > div:nth-child(2) > div:nth-child(1) .fgButton:nth-child(2)", 196 | "pitching_home": ".quick-splits > div:nth-child(2) > div:nth-child(2) > .fgButton:nth-child(1)", 197 | "pitching_away": ".quick-splits > div:nth-child(2) > div:nth-child(2) > .fgButton:nth-child(2)", 198 | "vs_lhh": ".quick-splits > div:nth-child(2) > div:nth-child(3) > .fgButton:nth-child(1)", 199 | "vs_lhh_home": ".quick-splits > div:nth-child(2) > div:nth-child(3) > .fgButton:nth-child(2)", 200 | "vs_lhh_away": ".quick-splits > div:nth-child(2) > div:nth-child(3) > .fgButton:nth-child(3)", 201 | "vs_lhh_as_rhp": ".quick-splits > div:nth-child(2) > div:nth-child(3) > .fgButton:nth-child(4)", 202 | "vs_lhh_as_lhp": ".quick-splits > div:nth-child(2) > div:nth-child(3) > .fgButton:nth-child(5)", 203 | "vs_rhh": ".quick-splits > div:nth-child(2) > div:nth-child(4) > .fgButton:nth-child(1)", 204 | "vs_rhh_home": ".quick-splits > div:nth-child(2) > div:nth-child(4) > .fgButton:nth-child(1)", 205 | "vs_rhh_away": ".quick-splits > div:nth-child(2) > div:nth-child(4) > .fgButton:nth-child(1)", 206 | "vs_rhh_as_rhp": ".quick-splits > div:nth-child(2) > div:nth-child(4) > .fgButton:nth-child(1)", 207 | "vs_rhh_as_lhp": ".quick-splits > div:nth-child(2) > div:nth-child(4) > .fgButton:nth-child(1)" 208 | } 209 | switches = { 210 | "split_teams": "#stack-buttons > div:nth-child(2)", 211 | "auto_pt": "#stack-buttons > div:nth-child(3)" 212 | } 213 | waitfor = ".fg-data-grid.undefined" 214 | 215 | 216 | class WAR: 217 | """ 218 | CSS selectors for :py:mod:`fangraphs.leaders.war`. 219 | """ 220 | dropdowns = { 221 | "season": "#WARBoard1_rcbSeason_Input", 222 | "team": "#WARBoard1_rcbTeam_Input", 223 | "type": "#WARBoard1_rcbType_Input" 224 | } 225 | dropdown_options = { 226 | "season": "#WARBoard1_rcbSeason_DropDown", 227 | "team": "#WARBoard1_rcbTeam_DropDown", 228 | "type": "#WARBoard1_rcbType_DropDown" 229 | } 230 | waitfor = ".rgMasterTable" 231 | -------------------------------------------------------------------------------- /fangraphs/tests/test_leaders.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # tests/test_leaders.py 3 | 4 | """ 5 | The docstring in each class identifies the class in :py:mod:`FanGraphs.leaders` being tested. 6 | The docstring in each test identifies the class attribute(s)/method(s) being tested. 7 | """ 8 | 9 | import bs4 10 | from playwright.sync_api import sync_playwright 11 | import pytest 12 | import requests 13 | 14 | from fangraphs.selectors import leaders_sel 15 | 16 | 17 | def fetch_soup(address, waitfor=""): 18 | """ 19 | Initializes the ``bs4.BeautifulSoup`` object for parsing the FanGraphs page 20 | 21 | :param address: The base URL address of the FanGraphs page 22 | :param waitfor: The CSS selector to wait for 23 | :return: A ``BeautifulSoup`` object for parsing the page 24 | :rtype: bs4.BeautifulSoup 25 | """ 26 | with sync_playwright() as play: 27 | browser = play.chromium.launch() 28 | page = browser.new_page() 29 | page.goto(address, timeout=0) 30 | if waitfor: 31 | page.wait_for_selector(waitfor) 32 | soup = bs4.BeautifulSoup( 33 | page.content(), features="lxml" 34 | ) 35 | browser.close() 36 | return soup 37 | 38 | 39 | class TestMajorLeague: 40 | """ 41 | :py:class:`FanGraphs.leaders.MajorLeague` 42 | """ 43 | __selections = leaders_sel.MajorLeague.selections 44 | __dropdowns = leaders_sel.MajorLeague.dropdowns 45 | __dropdown_options = leaders_sel.MajorLeague.dropdown_options 46 | __checkboxes = leaders_sel.MajorLeague.checkboxes 47 | __buttons = leaders_sel.MajorLeague.buttons 48 | 49 | address = "https://fangraphs.com/leaders.aspx" 50 | 51 | @classmethod 52 | def setup_class(cls): 53 | """ 54 | Initialize class 55 | :return: 56 | """ 57 | cls.soup = fetch_soup(cls.address) 58 | 59 | def test_address(self): 60 | """ 61 | Class attribute ``MajorLeagueLeaderboards.address``. 62 | """ 63 | res = requests.get(self.address) 64 | assert res.status_code == 200 65 | 66 | @pytest.mark.parametrize( 67 | "selectors", 68 | [__selections, __dropdown_options] 69 | ) 70 | def test_list_options(self, selectors: dict): 71 | """ 72 | Instance method ``MajorLeagueLeaderboards.lsit_options``. 73 | 74 | :param selectors: CSS Selectors 75 | """ 76 | elem_count = { 77 | "group": 3, "stat": 3, "position": 13, "type": 19, 78 | "league": 3, "team": 31, "single_season": 151, "split": 67, 79 | "min_pa": 60, "season1": 151, "season2": 151, "age1": 45, "age2": 45, 80 | "split_teams": 2, "active_roster": 2, "hof": 2, "split_seasons": 2, 81 | "rookies": 2 82 | } 83 | for query, sel in selectors.items(): 84 | elems = self.soup.select(f"{sel} li") 85 | assert len(elems) == elem_count[query], query 86 | assert all(isinstance(e.getText(), str) for e in elems), query 87 | 88 | def test_current_option_selections(self): 89 | """ 90 | Instance method ``MajorLeagueLeaderboards.current_option``. 91 | 92 | Uses the selectors in: 93 | 94 | - ``MajorLeagueLeaderboards.__selections`` 95 | """ 96 | elem_text = { 97 | "group": "Player Stats", "stat": "Batting", "position": "All", 98 | "type": "Dashboard" 99 | } 100 | for query, sel in self.__selections.items(): 101 | elem = self.soup.select(f"{sel} .rtsLink.rtsSelected") 102 | assert len(elem) == 1, query 103 | assert isinstance(elem[0].getText(), str), query 104 | assert elem[0].getText() == elem_text[query] 105 | 106 | def test_current_option_dropdowns(self): 107 | """ 108 | Instance method ``MajorLeagueLeaderboards.current_option``. 109 | 110 | Uses the selectors in: 111 | 112 | - ``MajorLeagueLeaderboards.__dropdowns`` 113 | """ 114 | elem_value = { 115 | "league": "All Leagues", "team": "All Teams", "single_season": "2020", 116 | "split": "Full Season", "min_pa": "Qualified", "season1": "2020", 117 | "season2": "2020", "age1": "14", "age2": "58" 118 | } 119 | for query, sel in self.__dropdowns.items(): 120 | elem = self.soup.select(sel)[0] 121 | assert elem.get("value") is not None, query 122 | assert elem_value[query] == elem.get("value") 123 | 124 | @pytest.mark.parametrize( 125 | "selectors", 126 | [__selections, __dropdowns, __dropdown_options, 127 | __checkboxes, __buttons] 128 | ) 129 | def test_configure(self, selectors: dict): 130 | """ 131 | Private instance method ``MajorLeagueLeaderboards.__configure_selection``. 132 | Private instance method ``MajorLeagueLeaderboards.__configure_dropdown``. 133 | Private instance method ``MajorLeagueLeaderboards.__configure_checkbox``. 134 | Private instance method ``MajorLeagueLeaderboards.__click_button``. 135 | 136 | :param selectors: CSS Selectors 137 | """ 138 | for query, sel in selectors.items(): 139 | elems = self.soup.select(sel) 140 | assert len(elems) == 1, query 141 | 142 | def test_expand_sublevel(self): 143 | """ 144 | Statement in private instance method ``MajorLeagueLeaderboards.__configure_selection``. 145 | """ 146 | elems = self.soup.select("#LeaderBoard1_tsType a[href='#']") 147 | assert len(elems) == 1 148 | 149 | def test_export(self): 150 | """ 151 | Instance method ``MajorLeagueLeaderboards.export``. 152 | """ 153 | elems = self.soup.select("#LeaderBoard1_cmdCSV") 154 | assert len(elems) == 1 155 | 156 | 157 | class TestSplits: 158 | """ 159 | :py:class:`FanGraphs.leaders.Splits`. 160 | """ 161 | 162 | __selections = leaders_sel.Splits.selections 163 | __dropdowns = leaders_sel.Splits.dropdowns 164 | __splits = leaders_sel.Splits.splits 165 | __quick_splits = leaders_sel.Splits.quick_splits 166 | __switches = leaders_sel.Splits.switches 167 | 168 | address = "https://fangraphs.com/leaders/splits-leaderboards" 169 | 170 | @classmethod 171 | def setup_class(cls): 172 | """ 173 | Initialize class 174 | """ 175 | cls.soup = fetch_soup( 176 | cls.address, leaders_sel.Splits.waitfor 177 | ) 178 | 179 | def test_address(self): 180 | """ 181 | Class attribute ``SplitsLeaderboards.address``. 182 | """ 183 | res = requests.get(self.address) 184 | assert res.status_code == 200 185 | 186 | def test_list_options_selections(self): 187 | """ 188 | Instance method ``SplitsLeaderboards.list_options``. 189 | 190 | Uses the selectors in: 191 | 192 | - ``SplitsLeaderboards.__selections`` 193 | """ 194 | elem_count = { 195 | "group": 4, "stat": 2, "type": 3 196 | } 197 | for query, sel_list in self.__selections.items(): 198 | elems = [self.soup.select(s)[0] for s in sel_list] 199 | assert len(elems) == elem_count[query] 200 | assert all(e.getText() for e in elems) 201 | 202 | @pytest.mark.parametrize( 203 | "selectors", 204 | [__dropdowns, __splits] 205 | ) 206 | def test_list_options(self, selectors: dict): 207 | """ 208 | Instance method ``SplitsLeaderboards.list_options``. 209 | 210 | Uses the selectors in: 211 | 212 | - ``SplitsLeaderboards.__dropdowns`` 213 | - ``SplitsLeaderboards.__splits`` 214 | 215 | :param selectors: CSS selectors 216 | """ 217 | elem_count = { 218 | "time_filter": 10, "preset_range": 12, "groupby": 5, 219 | "handedness": 4, "home_away": 2, "batted_ball": 15, 220 | "situation": 7, "count": 11, "batting_order": 9, "position": 12, 221 | "inning": 10, "leverage": 3, "shifts": 3, "team": 32, 222 | "opponent": 32, 223 | } 224 | for query, sel in selectors.items(): 225 | elems = self.soup.select(f"{sel} li") 226 | assert len(elems) == elem_count[query] 227 | 228 | def test_current_option_selections(self): 229 | """ 230 | Instance method ``SplitsLeaderboards.current_option``. 231 | 232 | Uses the selectors in: 233 | 234 | - ``SplitsLeaderboards.__selections`` 235 | """ 236 | elem_text = { 237 | "group": "Player", "stat": "Batting", "type": "Standard" 238 | } 239 | for query, sel_list in self.__selections.items(): 240 | elems = [] 241 | for sel in sel_list: 242 | elem = self.soup.select(sel)[0] 243 | assert elem.get("class") is not None 244 | elems.append(elem) 245 | active = ["isActive" in e.get("class") for e in elems] 246 | assert active.count(True) == 1, query 247 | text = [e.getText() for e in elems] 248 | assert elem_text[query] in text 249 | 250 | @pytest.mark.parametrize( 251 | "selectors", 252 | [__dropdowns, __splits, __switches] 253 | ) 254 | def test_current_option(self, selectors: dict): 255 | """ 256 | Instance method ``SplitsLeaderboards.current_option``. 257 | 258 | Uses the selectors in: 259 | 260 | - ``SplitsLeaderboards.__dropdowns`` 261 | - ``SplitsLeaderboards.__splits`` 262 | - ``SplitsLeaderboards.__switches`` 263 | 264 | :param selectors: CSS selectors 265 | """ 266 | for query, sel in selectors.items(): 267 | elems = self.soup.select(f"{sel} li") 268 | for elem in elems: 269 | assert elem.get("class") is not None, query 270 | 271 | def test_configure_selection(self): 272 | """ 273 | Private instance method ``SplitsLeaderboards.__configure_selection``. 274 | """ 275 | for query, sel_list in self.__selections.items(): 276 | for sel in sel_list: 277 | elems = self.soup.select(sel) 278 | assert len(elems) == 1, query 279 | 280 | @pytest.mark.parametrize( 281 | "selectors", 282 | [__dropdowns, __splits, __switches] 283 | ) 284 | def test_configure(self, selectors: dict): 285 | """ 286 | Private instance method ``SplitsLeaderboards.__configure_dropdown``. 287 | Private instance method ``SplitsLeaderboards.__configure_split``. 288 | Private instance method ``SplitsLeaderboards.__configure_switch``. 289 | 290 | :param selectors: CSS Selectors 291 | """ 292 | for query, sel in selectors.items(): 293 | elems = self.soup.select(sel) 294 | assert len(elems) == 1, query 295 | 296 | def test_update(self): 297 | """ 298 | Instance method ``SplitsLeaderboards.update``. 299 | """ 300 | elems = self.soup.select("#button-update") 301 | assert len(elems) == 0 302 | 303 | def test_list_filter_groups(self): 304 | """ 305 | Instance method ``SplitsLeaderboards.list_filter_groups``. 306 | """ 307 | elems = self.soup.select(".fgBin.splits-bin-controller div") 308 | assert len(elems) == 4 309 | options = ["Quick Splits", "Splits", "Filters", "Show All"] 310 | assert [e.getText() for e in elems] == options 311 | 312 | def test_configure_filter_group(self): 313 | """ 314 | Instance method ``SplitsLeaderboards.configure_filter_group``. 315 | """ 316 | groups = ["Quick Splits", "Splits", "Filters", "Show All"] 317 | elems = self.soup.select(".fgBin.splits-bin-controller div") 318 | assert len(elems) == 4 319 | assert [e.getText() for e in elems] == groups 320 | 321 | def test_reset_filters(self): 322 | """ 323 | Instance method ``SplitsLeaderboards.reset_filters``. 324 | """ 325 | elems = self.soup.select("#stack-buttons .fgButton.small:nth-last-child(1)") 326 | assert len(elems) == 1 327 | 328 | def test_configure_quick_split(self): 329 | """ 330 | Instance method ``SplitsLeaderboards.configure_quick_split``. 331 | """ 332 | for qsplit, sel in self.__quick_splits.items(): 333 | elems = self.soup.select(sel) 334 | assert len(elems) == 1, qsplit 335 | 336 | def test_export(self): 337 | """ 338 | Instance method ``SplitsLeaderboards.export``. 339 | """ 340 | elems = self.soup.select(".data-export") 341 | assert len(elems) == 1 342 | 343 | 344 | class TestSeasonStatGrid: 345 | """ 346 | :py:class:`FanGraphs.leaders.SeasonStatGrid`. 347 | """ 348 | __selections = leaders_sel.SeasonStatGrid.selections 349 | __dropdowns = leaders_sel.SeasonStatGrid.dropdowns 350 | 351 | address = "https://fangraphs.com/leaders/season-stat-grid" 352 | 353 | @classmethod 354 | def setup_class(cls): 355 | """ 356 | Initialize 357 | """ 358 | cls.soup = fetch_soup( 359 | cls.address, leaders_sel.SeasonStatGrid.waitfor 360 | ) 361 | 362 | def test_address(self): 363 | """ 364 | Class attribute ``SeasonStatGrid.address`` 365 | """ 366 | res = requests.get(self.address) 367 | assert res.status_code == 200 368 | 369 | def test_list_options_selections(self): 370 | """ 371 | Instance method ``SeasonStatGrid.list_options``. 372 | 373 | Uses the following class attributes: 374 | 375 | - ``SeasonStatGrid.__selections`` 376 | """ 377 | elem_count = { 378 | "stat": 2, "group": 3, "type": 3 379 | } 380 | for query, sel_list in self.__selections.items(): 381 | elems = [self.soup.select(s)[0] for s in sel_list] 382 | assert len(elems) == elem_count[query] 383 | assert all(e.getText() for e in elems) 384 | 385 | def test_list_options_dropdowns(self): 386 | """ 387 | Instance method ``SeasonStatGrid.list_options``. 388 | 389 | Uses the following class attributes: 390 | 391 | - ``SeasonStatGrid.__dropdowns`` 392 | """ 393 | elem_count = { 394 | "start_season": 71, "end_season": 71, "popular": 6, 395 | "standard": 20, "advanced": 17, "statcast": 8, "batted_ball": 24, 396 | "win_probability": 10, "pitch_type": 25, "plate_discipline": 25, 397 | "value": 11 398 | } 399 | for query, sel in self.__dropdowns.items(): 400 | elems = self.soup.select(f"{sel} li") 401 | assert len(elems) == elem_count[query], query 402 | assert all(e.getText() for e in elems) 403 | 404 | def test_current_option_selections(self): 405 | """ 406 | Instance method ``SeasonStatGrid.current_option``. 407 | 408 | Tests the following class attributes: 409 | 410 | - ``SeasonStatGrid.__selections`` 411 | """ 412 | selector = "div[class='fgButton button-green active isActive']" 413 | elems = self.soup.select(selector) 414 | assert len(elems) == 2 415 | 416 | def test_current_options_dropdowns(self): 417 | """ 418 | Instance method ``SeasonStatGrid.current_option``. 419 | 420 | Uses the following class attributes: 421 | 422 | - ``SeasonStatGrid.__dropdowns`` 423 | """ 424 | for query, sel in self.__dropdowns.items(): 425 | elems = self.soup.select( 426 | f"{sel} li[class$='highlight-selection']" 427 | ) 428 | if query in ["start_season", "end_season", "popular", "value"]: 429 | assert len(elems) == 1, query 430 | assert elems[0].getText() is not None 431 | else: 432 | assert len(elems) == 0, query 433 | 434 | def test_configure_selection(self): 435 | """ 436 | Private instance method ``SeasonStatGrid.__configure_selection``. 437 | """ 438 | for query, sel_list in self.__selections.items(): 439 | for sel in sel_list: 440 | elems = self.soup.select(sel) 441 | assert len(elems) == 1, query 442 | 443 | def test_configure_dropdown(self): 444 | """ 445 | Private instance method ``SeasonStatGrid.__configure_dropdown``. 446 | """ 447 | for query, sel in self.__dropdowns.items(): 448 | elems = self.soup.select(sel) 449 | assert len(elems) == 1, query 450 | 451 | def test_export(self): 452 | """ 453 | Instance method ``SeasonStatGrid.export``. 454 | """ 455 | total_pages = self.soup.select( 456 | ".table-page-control:nth-last-child(1) > .table-control-total" 457 | ) 458 | assert len(total_pages) == 1 459 | assert total_pages[0].getText().isdecimal() 460 | arrow = self.soup.select( 461 | ".table-page-control:nth-last-child(1) > .next" 462 | ) 463 | assert len(arrow) == 1 464 | assert arrow[0].getText() == "chevron_right" 465 | 466 | 467 | class TestGameSpan: 468 | """ 469 | :py:class:`GameSpan`. 470 | """ 471 | __selections = leaders_sel.GameSpan.selections 472 | __dropdowns = leaders_sel.GameSpan.dropdowns 473 | 474 | address = "https://www.fangraphs.com/leaders/special/60-game-span" 475 | 476 | @classmethod 477 | def setup_class(cls): 478 | """ 479 | Initialize class 480 | """ 481 | cls.soup = fetch_soup( 482 | cls.address, leaders_sel.GameSpan.waitfor 483 | ) 484 | 485 | def test_address(self): 486 | """ 487 | Class attribute ``GameSpanLeaderboards.address``. 488 | """ 489 | res = requests.get(self.address) 490 | assert res.status_code == 200 491 | 492 | def test_list_options_selections(self): 493 | """ 494 | Instance method ``GameSpanLeaderboards.list_options``. 495 | 496 | Uses the following class attributes: 497 | 498 | - ``GameSpanLeaderboards.__selections`` 499 | """ 500 | elem_count = { 501 | "stat": 2, "type": 3 502 | } 503 | for query, sel_list in self.__selections.items(): 504 | elems = [self.soup.select(s)[0] for s in sel_list] 505 | assert len(elems) == elem_count[query], query 506 | assert all(e.getText() for e in elems), query 507 | 508 | def test_list_options_dropdowns(self): 509 | """ 510 | Instance method ``GameSpanLeaderboards.list_options``. 511 | 512 | Uses the following class attributes: 513 | 514 | - ``GameSpanLeaderboards.__dropdowns`` 515 | """ 516 | elem_count = { 517 | "min_pa": 9, "single_season": 46, "season1": 46, "season2": 46, 518 | "determine": 11 519 | } 520 | for query, sel in self.__dropdowns.items(): 521 | elems = self.soup.select(f"{sel} > div > a") 522 | assert len(elems) == elem_count[query], query 523 | assert all(e.getText() for e in elems), query 524 | 525 | def test_current_option_selections(self): 526 | """ 527 | Instance method ``GameSpanLeaderboards.current_option``. 528 | 529 | Uses the following class attributes: 530 | 531 | - ``GameSpanLeaderboards.__selections`` 532 | """ 533 | elem_text = { 534 | "stat": "Batters", "type": "Best 60-Game Span" 535 | } 536 | for query, sel_list in self.__selections.items(): 537 | elems = [] 538 | for sel in sel_list: 539 | elem = self.soup.select(sel)[0] 540 | assert elem.get("class") is not None, query 541 | elems.append(elem) 542 | active = ["active" in e.get("class") for e in elems] 543 | assert active.count(True) == 1, query 544 | text = [e.getText() for e in elems] 545 | assert elem_text[query] in text, query 546 | 547 | def test_current_option_dropdown(self): 548 | """ 549 | Instance method ``GameSpanLeaderboards.current_option``. 550 | 551 | Uses the following class attributes: 552 | 553 | - ``GameSpanLeaderboards.__dropdowns`` 554 | """ 555 | elem_text = { 556 | "min_pa": "Qualified", "single_season": "Select", 557 | "season1": "Select", "season2": "Select", 558 | "determine": "WAR" 559 | } 560 | for query, sel in self.__dropdowns.items(): 561 | elems = self.soup.select(f"{sel} > div > span") 562 | assert len(elems) == 1, query 563 | text = elems[0].getText() 564 | assert text == elem_text[query], query 565 | 566 | def test_configure_selections(self): 567 | """ 568 | Private instance method ``GameSpanLeaderboards.__configure_selection``. 569 | """ 570 | for query, sel_list in self.__selections.items(): 571 | for sel in sel_list: 572 | elems = self.soup.select(sel) 573 | assert len(elems) == 1, query 574 | 575 | def test_configure_dropdown(self): 576 | """ 577 | Private instance method ``GameSpanLeaderboards.__configure_dropdown``. 578 | """ 579 | for query, sel in self.__dropdowns.items(): 580 | elems = self.soup.select(sel) 581 | assert len(elems) == 1, query 582 | 583 | def test_export(self): 584 | """ 585 | Instance method ``GameSpanLeaderboards.export``. 586 | """ 587 | elems = self.soup.select(".data-export") 588 | assert len(elems) == 1 589 | 590 | 591 | class TestInternational: 592 | """ 593 | :py:class:`FanGraphs.leaders.InternationalLeaderboards` 594 | """ 595 | __selections = leaders_sel.International.selections 596 | __dropdowns = leaders_sel.International.dropdowns 597 | __checkboxes = leaders_sel.International.checkboxes 598 | address = "https://www.fangraphs.com/leaders/international" 599 | 600 | @classmethod 601 | def setup_class(cls): 602 | """ 603 | Initialize class 604 | """ 605 | cls.soup = fetch_soup( 606 | cls.address, waitfor=leaders_sel.International.waitfor 607 | ) 608 | 609 | def test_address(self): 610 | """ 611 | Class attribute ``InternationalLeaderboards.address``. 612 | """ 613 | res = requests.get(self.address) 614 | assert res.status_code == 200 615 | 616 | def test_list_options_selections(self): 617 | """ 618 | Instance method ``InternationalLeaderboards.list_options``. 619 | 620 | Uses the following class attributes: 621 | 622 | - ``InternationalLeaderboards.__selections`` 623 | """ 624 | elem_count = { 625 | "stat": 2, "type": 2 626 | } 627 | for query, sel_list in self.__selections.items(): 628 | elems = [self.soup.select(s)[0] for s in sel_list] 629 | assert len(elems) == elem_count[query], query 630 | assert all(e.getText() for e in elems), query 631 | 632 | def test_list_options_dropdowns(self): 633 | """ 634 | Instance method ``InternationalLeaderboards.list_options``. 635 | 636 | Uses the following class attributes: 637 | 638 | - ``InternationalLeaderboards.__dropdowns`` 639 | """ 640 | elem_count = { 641 | "position": 11, "min": 42, "single_season": 19, "season1": 19, "season2": 19, 642 | "league": 1, "team": 11 643 | } 644 | for query, sel in self.__dropdowns.items(): 645 | elems = self.soup.select(f"{sel} > div > a") 646 | assert len(elems) == elem_count[query], query 647 | assert all(e.getText() for e in elems), query 648 | 649 | def test_current_option_selections(self): 650 | """ 651 | Instance method ``InternationalLeaderboards.current_option``. 652 | 653 | Uses the following class attributes: 654 | 655 | - ``InternationalLeaderboards.__selections`` 656 | """ 657 | elem_text = { 658 | "stat": "Batters", "type": "Standard" 659 | } 660 | for query, sel_list in self.__selections.items(): 661 | elems = [] 662 | for sel in sel_list: 663 | elem = self.soup.select(sel)[0] 664 | assert elem.get("class") is not None, query 665 | elems.append(elem) 666 | active = ["active" in e.get("class") for e in elems] 667 | assert active.count(True) == 1, query 668 | text = [e.getText() for e in elems] 669 | assert elem_text[query] in text, query 670 | 671 | def test_current_option_dropdown(self): 672 | """ 673 | Instance method ``InternationalLeaderboards.current_option``. 674 | 675 | Uses the following class attributes: 676 | 677 | - ``InternationalLeaderboards.__dropdowns`` 678 | """ 679 | elem_text = { 680 | "position": "All", "min": "Qualified", "single_season": "2020", 681 | "season1": "2020", "season2": "2020", "league": "KBO", 682 | "team": "Select" 683 | } 684 | for query, sel in self.__dropdowns.items(): 685 | elems = self.soup.select(f"{sel} > div > span") 686 | assert len(elems) == 1, query 687 | text = elems[0].getText() 688 | assert text == elem_text[query], query 689 | 690 | def test_configure_selections(self): 691 | """ 692 | Private instance method ``InternationalLeaderboards.__configure_selection``. 693 | """ 694 | for query, sel_list in self.__selections.items(): 695 | for sel in sel_list: 696 | elems = self.soup.select(sel) 697 | assert len(elems) == 1, query 698 | 699 | def test_configure_dropdown(self): 700 | """ 701 | Private instance method ``InternationalLeaderboards.__configure_dropdown``. 702 | """ 703 | for query, sel in self.__dropdowns.items(): 704 | elems = self.soup.select(sel) 705 | assert len(elems) == 1, query 706 | 707 | def test_export(self): 708 | """ 709 | Instance method ``InternationalLeaderboards.export``. 710 | """ 711 | elems = self.soup.select(".data-export") 712 | assert len(elems) == 1 713 | 714 | 715 | class TestWAR: 716 | """ 717 | :py:class:`FanGraphs.leaders.WARLeaderboards` 718 | """ 719 | __dropdowns = leaders_sel.WAR.dropdowns 720 | __dropdown_options = leaders_sel.WAR.dropdown_options 721 | 722 | address = "https://fangraphs.com/warleaders.aspx" 723 | 724 | @classmethod 725 | def setup_class(cls): 726 | """ 727 | Initialize class 728 | """ 729 | cls.soup = fetch_soup( 730 | cls.address, waitfor=leaders_sel.WAR.waitfor 731 | ) 732 | 733 | @pytest.mark.parametrize( 734 | "selectors", 735 | [__dropdown_options] 736 | ) 737 | def test_list_options(self, selectors: dict): 738 | """ 739 | Instance method ``WARLeaderboards.list_options``. 740 | 741 | :param selectors: CSS selectors 742 | """ 743 | elem_count = { 744 | "season": 151, "team": 33, "type": 3 745 | } 746 | for query, sel in selectors.items(): 747 | elems = self.soup.select(f"{sel} > ul > li") 748 | assert len(elems) == elem_count[query], query 749 | 750 | @pytest.mark.parametrize( 751 | "selectors", 752 | [__dropdowns] 753 | ) 754 | def test_current_option(self, selectors: dict): 755 | """ 756 | Instance method ``WARLeaderboards.current_option``. 757 | 758 | :param selectors: CSS selectors 759 | """ 760 | elem_text = { 761 | "season": "2020", "team": "All Teams", "type": "WAR (FIP Based)" 762 | } 763 | for query, sel in selectors.items(): 764 | elems = self.soup.select(sel) 765 | assert len(elems) == 1, query 766 | assert elems[0].get("value") is not None, query 767 | assert elems[0].get("value") == elem_text[query], query 768 | 769 | def test_configure_dropdown(self): 770 | """ 771 | Private instance method ``WARLeaderboards.__configure_dropdown``. 772 | """ 773 | for query, sel in self.__dropdowns.items(): 774 | elems = self.soup.select(sel) 775 | assert len(elems) == 1, query 776 | 777 | def test_export(self): 778 | """ 779 | Instance method ``WARLeaderboards.export``. 780 | """ 781 | elems = self.soup.select("#WARBoard1_cmdCSV") 782 | assert len(elems) == 1 783 | -------------------------------------------------------------------------------- /fangraphs/leaders/leaders.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # FanGraphs/leaders/leaders.py 3 | 4 | """ 5 | Scrpaer for the webpages under the FanGaphs **Leaders** tab. 6 | """ 7 | 8 | import csv 9 | import datetime 10 | import os 11 | 12 | import fangraphs.exceptions 13 | from fangraphs.leaders import ScrapingUtilities 14 | from fangraphs import selectors 15 | from fangraphs.selectors import leaders_sel 16 | 17 | 18 | class GameSpan(ScrapingUtilities): 19 | """ 20 | Scraper for the FanGraphs `60-Game Span Leaderboards`_ page. 21 | 22 | .. _60-Game Span Leaderboards: https://www.fangraphs.com/leaders/special/60-game-span 23 | """ 24 | __selections = {} 25 | __dropdowns = {} 26 | __waitfor = leaders_sel.GameSpan.waitfor 27 | 28 | address = "https://fangraphs.com/leaders/special/60-game-span" 29 | 30 | def __init__(self): 31 | super().__init__(self.address, waitfor=self.__waitfor) 32 | 33 | def __enter__(self): 34 | self._browser_init() 35 | self.reset() 36 | self.__compile_selectors() 37 | return self 38 | 39 | def __exit__(self, exc_type, value, traceback): 40 | self.quit() 41 | 42 | def __compile_selectors(self): 43 | for cat, sel in leaders_sel.GameSpan.selections.items(): 44 | self.__selections.setdefault( 45 | cat, selectors.Selections(self.soup, sel) 46 | ) 47 | for cat, sel in leaders_sel.GameSpan.dropdowns.items(): 48 | self.__dropdowns.setdefault( 49 | cat, selectors.Dropdowns(self.soup, sel, "> div > a") 50 | ) 51 | 52 | @classmethod 53 | def list_queries(cls): 54 | """ 55 | Lists the possible filter queries which can be used to modify search results. 56 | 57 | :return: Filter queries which can be used to modify search results 58 | :rtype: list 59 | """ 60 | queries = [] 61 | queries.extend(list(cls.__selections)) 62 | queries.extend(list(cls.__dropdowns)) 63 | return queries 64 | 65 | def list_options(self, query: str): 66 | """ 67 | Lists the possible options which a filter query can be configured to. 68 | 69 | :param query: The filter query 70 | :return: Options which the filter query can be configured to 71 | :rtype: list 72 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 73 | """ 74 | query = query.lower() 75 | if query in self.__selections: 76 | options = self.__selections[query].list_options() 77 | elif query in self.__dropdowns: 78 | options = self.__dropdowns[query].list_options() 79 | else: 80 | raise fangraphs.exceptions.InvalidFilterQuery(query) 81 | return options 82 | 83 | def current_option(self, query: str): 84 | """ 85 | Retrieves the option which a filter query is currently set to. 86 | 87 | :param query: The filter query being retrieved of its current option 88 | :return: The option which the filter query is currently set to 89 | :rtype: str 90 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 91 | """ 92 | query = query.lower() 93 | if query in self.__selections: 94 | option = self.__selections[query].current_option() 95 | elif query in self.__dropdowns: 96 | option = self.__dropdowns[query].current_option(opt_type=3) 97 | else: 98 | raise fangraphs.exceptions.InvalidFilterQuery(query) 99 | return option 100 | 101 | def configure(self, query: str, option: str): 102 | """ 103 | Configures a filter query to a specified option. 104 | 105 | :param query: The filter query to be configured 106 | :param option: The option to set the filter query to 107 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 108 | """ 109 | query = query.lower() 110 | self._close_ad() 111 | if query in self.__selections: 112 | self.__selections[query].configure(self.page, option) 113 | elif query in self.__dropdowns: 114 | self.__dropdowns[query].configure(self.page, option) 115 | else: 116 | raise fangraphs.exceptions.InvalidFilterQuery(query) 117 | self._refresh_parser() 118 | 119 | def export(self, path=""): 120 | """ 121 | Uses the **Export Data** button on the webpage to export the current leaderboard. 122 | The data will be exported as a CSV file and the file will be saved to *out/*. 123 | The file will be saved to the filepath ``path``, if specified. 124 | Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* 125 | 126 | :param path: The path to save the exported data to 127 | """ 128 | self.export_data(".data-export", path) 129 | 130 | 131 | class International(ScrapingUtilities): 132 | """ 133 | Scraper for the FanGraphs `KBO Leaderboards`_ page. 134 | 135 | .. _KBO Leaderboards: https://www.fangraphs.com/leaders/international 136 | """ 137 | __selections = {} 138 | __dropdowns = {} 139 | __switches = {} 140 | __waitfor = leaders_sel.International.waitfor 141 | 142 | address = "https://www.fangraphs.com/leaders/international" 143 | 144 | def __init__(self): 145 | super().__init__(self.address, waitfor=self.__waitfor) 146 | 147 | def __enter__(self): 148 | self._browser_init() 149 | self.reset() 150 | self.__compile_selectors() 151 | return self 152 | 153 | def __exit__(self, exc_type, value, traceback): 154 | self.quit() 155 | 156 | def __compile_selectors(self): 157 | for cat, sel in leaders_sel.International.selections.items(): 158 | self.__selections.setdefault( 159 | cat, selectors.Selections(self.soup, sel) 160 | ) 161 | for cat, sel in leaders_sel.International.dropdowns.items(): 162 | self.__dropdowns.setdefault( 163 | cat, selectors.Dropdowns(self.soup, sel, "> div > a") 164 | ) 165 | for cat, sel in leaders_sel.International.switches.items(): 166 | self.__switches.setdefault( 167 | cat, selectors.Switches(self.soup, sel) 168 | ) 169 | 170 | @classmethod 171 | def list_queries(cls): 172 | """ 173 | Lists the possible filter queries which can be used to modify search results. 174 | 175 | :return: Filter queries which can be used to modify search results 176 | :rtype: list 177 | """ 178 | queries = [] 179 | queries.extend(cls.__selections) 180 | queries.extend(cls.__dropdowns) 181 | queries.extend(cls.__switches) 182 | return queries 183 | 184 | def list_options(self, query: str): 185 | """ 186 | Retrieves the option which a filter query is currently set to. 187 | 188 | :param query: The filter query being retrieved of its current option 189 | :return: The option which the filter query is currently set to 190 | :rtype: str 191 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 192 | """ 193 | query = query.lower() 194 | if query in self.__selections: 195 | options = self.__selections[query].list_options() 196 | elif query in self.__dropdowns: 197 | options = self.__dropdowns[query].list_options() 198 | elif query in self.__switches: 199 | options = ["True", "False"] 200 | else: 201 | raise fangraphs.exceptions.InvalidFilterQuery(query) 202 | return options 203 | 204 | def current_option(self, query: str): 205 | """ 206 | 207 | :param query: 208 | :return: 209 | """ 210 | query = query.lower() 211 | if query in self.__selections: 212 | option = self.__selections[query].current_option() 213 | elif query in self.__dropdowns: 214 | option = self.__dropdowns[query].current_option(opt_type=3) 215 | elif query in self.__switches: 216 | option = "True" if ",to" in self.page.url else "False" 217 | else: 218 | raise fangraphs.exceptions.InvalidFilterQuery(query) 219 | return option 220 | 221 | def configure(self, query: str, option: str): 222 | """ 223 | Configures a filter query to a specified option. 224 | 225 | :param query: The filter query to be configured 226 | :param option: The option to set the filter query to 227 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 228 | """ 229 | query = query.lower() 230 | self._close_ad() 231 | if query in self.__selections: 232 | self.__selections[query].configure(self.page, option) 233 | elif query in self.__dropdowns: 234 | self.__dropdowns[query].configure(self.page, option) 235 | elif query in self.__switches: 236 | options = [o.lower() for o in self.list_options(query)] 237 | if option not in options: 238 | raise fangraphs.exceptions.InvalidFilterOption(option) 239 | if option == self.current_option(query): 240 | return 241 | self.page.click(self.__switches[query]) 242 | else: 243 | raise fangraphs.exceptions.InvalidFilterQuery(query) 244 | self._refresh_parser() 245 | 246 | def export(self, path=""): 247 | """ 248 | Uses the **Export Data** button on the webpage to export the current leaderboard. 249 | The data will be exported as a CSV file and the file will be saved to *out/*. 250 | The file will be saved to the filepath ``path``, if specified. 251 | Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* 252 | 253 | :param path: The path to save the exported data to 254 | """ 255 | self.export_data(".data-export", path) 256 | 257 | 258 | class MajorLeague(ScrapingUtilities): 259 | """ 260 | Scraper for the FanGraphs `Major League Leaderboards`_ page. 261 | 262 | Note that the Splits Leaderboard is not covered. 263 | Instead, it is covered by :py:class:`SplitsLeaderboards`. 264 | 265 | .. _Major League Leaderboards: https://fangraphs.com/leaders.aspx 266 | """ 267 | __selections = {} 268 | __dropdowns = {} 269 | __switches = {} 270 | __buttons = leaders_sel.MajorLeague.buttons 271 | 272 | address = "https://fangraphs.com/leaders.aspx" 273 | 274 | def __init__(self): 275 | super().__init__(self.address, waitfor="") 276 | 277 | def __enter__(self): 278 | self._browser_init() 279 | self.reset() 280 | self.__compile_selectors() 281 | return self 282 | 283 | def __exit__(self, exc_type, value, traceback): 284 | self.quit() 285 | 286 | def __compile_selectors(self): 287 | for cat, sel in leaders_sel.MajorLeague.selections.items(): 288 | self.__selections.setdefault( 289 | cat, selectors.Selections(self.soup, sel, "> div > ul > li") 290 | ) 291 | for cat, sel in leaders_sel.MajorLeague.dropdowns.items(): 292 | options = leaders_sel.MajorLeague.dropdown_options[cat] 293 | self.__dropdowns.setdefault( 294 | cat, selectors.Dropdowns(self.soup, sel, "> div > ul > li", options) 295 | ) 296 | for cat, sel in leaders_sel.MajorLeague.switches.items(): 297 | self.__switches.setdefault( 298 | cat, selectors.Switches(self.soup, sel) 299 | ) 300 | 301 | @classmethod 302 | def list_queries(cls): 303 | """ 304 | Lists the possible filter queries which can be used to modify search results. 305 | 306 | :return: Filter queries which can be used to modify search results 307 | :rtype: list 308 | """ 309 | queries = [] 310 | queries.extend(list(cls.__selections)) 311 | queries.extend(list(cls.__dropdowns)) 312 | queries.extend(list(cls.__switches)) 313 | return queries 314 | 315 | def list_options(self, query: str): 316 | """ 317 | Lists the possible options which a filter query can be configured to. 318 | 319 | :param query: The filter query 320 | :return: Options which the filter query can be configured to 321 | :rtype: list 322 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 323 | """ 324 | query = query.lower() 325 | if query in self.__switches: 326 | options = ["True", "False"] 327 | elif query in self.__dropdowns: 328 | options = self.__dropdowns[query].list_options() 329 | elif query in self.__selections: 330 | options = self.__selections[query].list_options() 331 | else: 332 | raise fangraphs.exceptions.InvalidFilterQuery(query) 333 | return options 334 | 335 | def current_option(self, query: str): 336 | """ 337 | Retrieves the option which a filter query is currently set to. 338 | 339 | :param query: The filter query being retrieved of its current option 340 | :return: The option which the filter query is currently set to 341 | :rtype: str 342 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 343 | """ 344 | query = query.lower() 345 | if query in self.__switches: 346 | option = self.__switches[query].current_option(opt_type=1) 347 | elif query in self.__dropdowns: 348 | option = self.__dropdowns[query].current_option(opt_type=1) 349 | elif query in self.__selections: 350 | option = self.__selections[query].current_option() 351 | else: 352 | raise fangraphs.exceptions.InvalidFilterQuery(query) 353 | return option 354 | 355 | def configure(self, query: str, option: str, *, autoupdate=True): 356 | """ 357 | Configures a filter query to a specified option. 358 | 359 | :param query: The filter query to be configured 360 | :param option: The option to set the filter query to 361 | :param autoupdate: If ``True``, any buttons attached to the filter query will be clicked 362 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 363 | """ 364 | query, option = query.lower(), str(option).lower() 365 | self._close_ad() 366 | if query in self.__selections: 367 | self.__selections[query].configure(self.page, option) 368 | elif query in self.__dropdowns: 369 | self.__dropdowns[query].configure(self.page, option) 370 | elif query in self.__switches: 371 | options = [o.lower() for o in self.list_options(query)] 372 | if option.lower() not in options: 373 | raise fangraphs.exceptions.InvalidFilterOption(option) 374 | if option != self.current_option(query).title(): 375 | self.page.click(self.__switches[query]) 376 | else: 377 | raise fangraphs.exceptions.InvalidFilterQuery(query) 378 | if query in self.__buttons and autoupdate: 379 | self.page.click(self.__buttons[query]) 380 | self._refresh_parser() 381 | 382 | def export(self, path=""): 383 | """ 384 | Uses the **Export Data** button on the webpage to export the current leaderboard. 385 | The data will be exported as a CSV file and the file will be saved to *out/*. 386 | The file will be saved to the filepath ``path``, if specified. 387 | Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* 388 | 389 | :param path: The path to save the exported data to 390 | """ 391 | self.export_data("#LeaderBoard1_cmdCSV", path) 392 | 393 | 394 | class SeasonStat(ScrapingUtilities): 395 | """ 396 | Scraper for the FanGraphs `Season Stat Grid`_ page. 397 | 398 | .. _Season Stat Grid: https://fangraphs.com/leaders/season-stat-grid 399 | """ 400 | __selections = {} 401 | __dropdowns = {} 402 | __waitfor = leaders_sel.SeasonStat.waitfor 403 | 404 | address = "https://fangraphs.com/leaders/season-stat-grid" 405 | 406 | def __init__(self): 407 | super().__init__(self.address, waitfor=self.__waitfor) 408 | 409 | def __enter__(self): 410 | self._browser_init() 411 | self.reset() 412 | self.__compile_selectors() 413 | return self 414 | 415 | def __exit__(self, exc_type, value, traceback): 416 | self.quit() 417 | 418 | def __compile_selectors(self): 419 | for cat, sel in leaders_sel.SeasonStat.selections.items(): 420 | self.__selections.setdefault( 421 | cat, selectors.Selections(self.soup, sel) 422 | ) 423 | for cat, sel in leaders_sel.SeasonStat.dropdowns.items(): 424 | self.__dropdowns.setdefault( 425 | cat, selectors.Dropdowns(self.soup, sel, "> ul > li") 426 | ) 427 | 428 | @classmethod 429 | def list_queries(cls): 430 | """ 431 | Lists the possible filter queries which can be sued to modify search results. 432 | 433 | :return: Filter queries which can be used to modify search results 434 | :type: list 435 | """ 436 | queries = [] 437 | queries.extend(list(cls.__selections)) 438 | queries.extend(list(cls.__dropdowns)) 439 | return queries 440 | 441 | def list_options(self, query: str): 442 | """ 443 | Lists the possible options which a filter query can be configured to. 444 | 445 | :param query: The filter query 446 | :return: Options which the filter query can be configured to 447 | :rtyp: list 448 | :raises FanGraphs.exceptions.InvalidFilterQuery: Argument ``query`` is invalid 449 | """ 450 | query = query.lower() 451 | if query in self.__selections: 452 | options = self.__selections[query].list_options() 453 | elif query in self.__dropdowns: 454 | options = self.__dropdowns[query].list_options() 455 | else: 456 | raise fangraphs.exceptions.InvalidFilterQuery(query) 457 | return options 458 | 459 | def current_option(self, query: str): 460 | """ 461 | Retrieves the option which a filter query is currently configured to. 462 | 463 | :param query: The filter query 464 | :return: The option which the filter query is currently configured to 465 | :rtype: str 466 | :raises FanGraphs.exceptions.InvalidFilterQuery: Argument ``query`` is invalid 467 | """ 468 | query = query.lower() 469 | if query in self.__selections: 470 | option = self.__selections[query].current_option() 471 | elif query in self.__dropdowns: 472 | option = self.__dropdowns[query].current_option(opt_type=2) 473 | else: 474 | raise fangraphs.exceptions.InvalidFilterQuery(query) 475 | return option 476 | 477 | def configure(self, query: str, option: str): 478 | """ 479 | Configures a filter query to a specified option. 480 | 481 | :param query: The filter query 482 | :param option: The option to configure ``query`` to 483 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 484 | """ 485 | query = query.lower() 486 | self._close_ad() 487 | if query in self.__selections: 488 | self.__selections[query].configure(self.page, option) 489 | elif query in self.__dropdowns: 490 | self.__dropdowns[query].configure(self.page, option) 491 | else: 492 | raise fangraphs.exceptions.InvalidFilterQuery(query) 493 | self._refresh_parser() 494 | 495 | def _write_table_headers(self, writer: csv.writer): 496 | """ 497 | Writes the headers of the data table to the CSV file. 498 | 499 | :param writer: The ``csv.writer`` object 500 | """ 501 | elems = self.soup.select(".table-scroll thead tr th") 502 | headers = [e.getText() for e in elems] 503 | writer.writerow(headers) 504 | 505 | def _write_table_rows(self, writer: csv.writer): 506 | """ 507 | Iterates through the rows of the current data table. 508 | The data in each row is written to the CSV file. 509 | 510 | :param writer: The ``csv.writer`` object 511 | """ 512 | row_elems = self.soup.select(".table-scroll tbody tr") 513 | for row in row_elems: 514 | elems = row.select("td") 515 | items = [e.getText() for e in elems] 516 | writer.writerow(items) 517 | 518 | def export(self, path=""): 519 | """ 520 | Scrapes and saves the data from the table of the current leaderboards. 521 | The data will be exported as a CSV file and the file will be saved to *out/*. 522 | The file will be saved to the filepath ``path``, if specified. 523 | Otherwise, the file will be saved to the filepath *out/%d.%m.%y %H.%M.%S.csv*. 524 | 525 | *Note: This is a 'manual' export of the data. 526 | In other words, the data is scraped from the table. 527 | This is unlike other forms of export where a button is clicked. 528 | Thus, there will be no record of a download when the data is exported.* 529 | 530 | :param path: The path to save the exported file to 531 | """ 532 | self._close_ad() 533 | if not path or os.path.splitext(path)[1] != ".csv": 534 | path = "out/{}.csv".format( 535 | datetime.datetime.now().strftime("%d.%m.%y %H.%M.%S") 536 | ) 537 | total_pages = int( 538 | self.soup.select( 539 | ".table-page-control:nth-last-child(1) > .table-control-total" 540 | )[0].getText() 541 | ) 542 | with open(path, "w", newline="") as file: 543 | writer = csv.writer(file) 544 | self._write_table_headers(writer) 545 | for _ in range(0, total_pages): 546 | self._write_table_rows(writer) 547 | self.page.click( 548 | ".table-page-control:nth-last-child(1) > .next" 549 | ) 550 | self._refresh_parser() 551 | 552 | 553 | class Splits(ScrapingUtilities): 554 | """ 555 | Scraper for the FanGraphs `Splits Leaderboards`_ page. 556 | 557 | .. _Splits Leaderboards: https://fangraphs.com/leaders/splits-leaderboards 558 | """ 559 | __selections = {} 560 | __dropdowns = {} 561 | __splits = {} 562 | __quick_splits = leaders_sel.Splits.quick_splits 563 | __switches = {} 564 | __waitfor = leaders_sel.Splits.waitfor 565 | 566 | address = "https://fangraphs.com/leaders/splits-leaderboards" 567 | 568 | def __init__(self): 569 | super().__init__(self.address, waitfor=self.__waitfor) 570 | 571 | def __enter__(self): 572 | self._browser_init() 573 | self.reset() 574 | self.__compile_selectors() 575 | 576 | self.set_filter_group("Show All") 577 | self.configure("auto_pt", "False", autoupdate=True) 578 | return self 579 | 580 | def __exit__(self, exc_type, value, traceback): 581 | self.quit() 582 | 583 | def __compile_selectors(self): 584 | for cat, sel in leaders_sel.Splits.selections.items(): 585 | self.__selections.setdefault( 586 | cat, selectors.Selections(self.soup, sel) 587 | ) 588 | for cat, sel in leaders_sel.Splits.dropdowns.items(): 589 | self.__dropdowns.setdefault( 590 | cat, selectors.Dropdowns(self.soup, sel, "> ul > li") 591 | ) 592 | for cat, sel in leaders_sel.Splits.splits.items(): 593 | self.__splits.setdefault( 594 | cat, selectors.Dropdowns(self.soup, sel, "> ul > li") 595 | ) 596 | for cat, sel in leaders_sel.Splits.switches.items(): 597 | self.__switches.setdefault( 598 | cat, selectors.Switches(self.soup, sel) 599 | ) 600 | 601 | @classmethod 602 | def list_queries(cls): 603 | """ 604 | Lists the possible filter queries which can be used to modify search results. 605 | 606 | :return: Filter queries which can be used to modify search results 607 | :rtype: list 608 | """ 609 | queries = [] 610 | queries.extend(list(cls.__selections)) 611 | queries.extend(list(cls.__dropdowns)) 612 | queries.extend(list(cls.__splits)) 613 | queries.extend(list(cls.__switches)) 614 | return queries 615 | 616 | def list_options(self, query: str): 617 | """ 618 | Lists the possible options which a filter query can be configured to. 619 | 620 | :param query: The filter query 621 | :return: Options which the filter query can be configured to 622 | :rtype: list 623 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 624 | """ 625 | query = query.lower() 626 | if query in self.__selections: 627 | options = self.__selections[query].list_options() 628 | elif query in self.__dropdowns: 629 | options = self.__dropdowns[query].list_options() 630 | elif query in self.__splits: 631 | options = self.__splits[query].list_options() 632 | elif query in self.__switches: 633 | options = ["True", "False"] 634 | else: 635 | raise fangraphs.exceptions.InvalidFilterQuery(query) 636 | return options 637 | 638 | def current_option(self, query: str): 639 | """ 640 | Retrieves the option(s) which a filter query is currently set to. 641 | 642 | Most dropdown- and split-class filter queries can be configured to multiple options. 643 | For those filter classes, a list is returned, while other filter classes return a string. 644 | 645 | - Selection-class: ``str`` 646 | - Dropdown-class: ``list`` 647 | - Split-class: ``list`` 648 | - Switch-class: ``str`` 649 | 650 | :param query: The filter query being retrieved of its current option 651 | :return: The option(s) which the filter query is currently set to 652 | :rtype: str or list 653 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 654 | """ 655 | query = query.lower() 656 | if query in self.__selections: 657 | option = self.__selections[query].current_option() 658 | elif query in self.__dropdowns: 659 | option = self.__dropdowns[query].current_option(opt_type=2, multiple=True) 660 | elif query in self.__splits: 661 | option = self.__splits[query].current_option(opt_type=2, multiple=True) 662 | elif query in self.__switches: 663 | option = self.__switches[query].current_option(opt_type=2) 664 | else: 665 | raise fangraphs.exceptions.InvalidFilterQuery(query) 666 | return option 667 | 668 | def configure(self, query: str, option: str, *, autoupdate=False): 669 | """ 670 | Configures a filter query to a specified option. 671 | 672 | :param query: The filter query to be configured 673 | :param option: The option to set the filter query to 674 | :param autoupdate: If ``True``, :py:meth:`update` will be called following configuration 675 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 676 | """ 677 | self._close_ad() 678 | query = query.lower() 679 | if query in self.__selections: 680 | self.__selections[query].configure(self.page, option) 681 | elif query in self.__dropdowns: 682 | self.__dropdowns[query].configure(self.page, option) 683 | elif query in self.__splits: 684 | self.__splits[query].configure(self.page, option) 685 | elif query in self.__switches: 686 | options = [o.lower() for o in self.list_options(query)] 687 | if option.lower() not in options: 688 | raise fangraphs.exceptions.InvalidFilterOption(option) 689 | if option != self.current_option(query)[0].title(): 690 | self.page.click(self.__switches[query]) 691 | else: 692 | raise fangraphs.exceptions.InvalidFilterQuery(query) 693 | if autoupdate: 694 | self.update() 695 | self._refresh_parser() 696 | 697 | def update(self): 698 | """ 699 | Clicks the **Update** button of the page. 700 | All configured filters are submitted and the page is refreshed. 701 | 702 | :raises FanGraphs.exceptions.FilterUpdateIncapability: No filter queries to update 703 | """ 704 | elem = self.page.query_selector("#button-update") 705 | if elem is None: 706 | raise fangraphs.exceptions.FilterUpdateIncapability() 707 | self._close_ad() 708 | elem.click() 709 | self._refresh_parser() 710 | 711 | def list_filter_groups(self): 712 | """ 713 | Lists the possible groups of filter queries which can be used 714 | 715 | :return: Names of the groups of filter queries 716 | :rtype: list 717 | """ 718 | elems = self.soup.select(".fgBin.splits-bin-controller div") 719 | groups = [e.getText() for e in elems] 720 | return groups 721 | 722 | def set_filter_group(self, group="Show All"): 723 | """ 724 | Configures the available filters to a specified group of filters 725 | 726 | :param group: The name of the group of filters 727 | """ 728 | selector = ".fgBin.splits-bin-controller div" 729 | elems = self.soup.select(selector) 730 | options = [e.getText() for e in elems] 731 | try: 732 | index = options.index(group) 733 | except ValueError as err: 734 | raise fangraphs.exceptions.InvalidFilterGroup(group) from err 735 | self._close_ad() 736 | elem = self.page.query_selector_all(selector)[index] 737 | elem.click() 738 | 739 | def reset_filters(self): 740 | """ 741 | Resets filters to the original option(s). 742 | This does not affect the following filter queries: 743 | 744 | - ``group`` 745 | - ``stat`` 746 | - ``type`` 747 | - ``groupby`` 748 | - ``preset_range`` 749 | - ``auto_pt`` 750 | - ``split_teams`` 751 | """ 752 | elem = self.page.query_selector( 753 | "#stack-buttons .fgButton.small:nth-last-child(1)" 754 | ) 755 | if elem is None: 756 | return 757 | self._close_ad() 758 | elem.click() 759 | 760 | @classmethod 761 | def list_quick_splits(cls): 762 | """ 763 | Lists all the quick splits which can be used. 764 | Quick splits allow for the configuration of multiple filter queries at once. 765 | 766 | :return: All available quick splits 767 | :rtype: list 768 | """ 769 | return list(cls.__quick_splits) 770 | 771 | def set_to_quick_split(self, quick_split: str, autoupdate=True): 772 | """ 773 | Invokes the configuration of a quick split. 774 | All filter queries affected by :py:meth:`reset_filters` are reset prior to configuration. 775 | This action is performed by the FanGraphs API and cannot be prevented. 776 | 777 | :param quick_split: The quick split to invoke 778 | :param autoupdate: If ``True``, :py:meth:`reset_filters` will be called 779 | :raises FanGraphs.exceptions.InvalidQuickSplits: Invalid argument ``quick_split`` 780 | """ 781 | quick_split = quick_split.lower() 782 | try: 783 | selector = self.__quick_splits[quick_split] 784 | except ValueError as err: 785 | raise fangraphs.exceptions.InvalidQuickSplit(quick_split) from err 786 | self._close_ad() 787 | self.page.click(selector) 788 | if autoupdate: 789 | self.update() 790 | 791 | def export(self, path=""): 792 | """ 793 | Uses the **Export Data** button on the webpage to export the current leaderboard. 794 | The data will be exported as a CSV file and the file will be saved to *out/*. 795 | The file will be saved to the filepath ``path``, if specified. 796 | Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* 797 | 798 | :param path: The path to save the exported data to 799 | """ 800 | self.export_data(".data-export", path) 801 | 802 | 803 | class WAR(ScrapingUtilities): 804 | """ 805 | Scraper for the FanGraphs `Combined WAR Leaderboards`_ page. 806 | 807 | .. _Combined WAR Leaderboards: https://www.fangraphs.com/warleaders.aspx 808 | """ 809 | __dropdowns = {} 810 | __waitfor = leaders_sel.WAR.waitfor 811 | 812 | address = "https://fangraphs.com/warleaders.aspx" 813 | 814 | def __init__(self): 815 | super().__init__(self.address, waitfor=self.__waitfor) 816 | 817 | def __enter__(self): 818 | self._browser_init() 819 | self.reset() 820 | self.__compile_selectors() 821 | return self 822 | 823 | def __exit__(self, exc_type, value, traceback): 824 | self.quit() 825 | 826 | def __compile_selectors(self): 827 | for cat, sel in leaders_sel.WAR.dropdowns.items(): 828 | options = leaders_sel.WAR.dropdown_options[cat] 829 | self.__dropdowns.setdefault( 830 | cat, selectors.Dropdowns(self.soup, sel, "> div > ul > li", options) 831 | ) 832 | 833 | @classmethod 834 | def list_queries(cls): 835 | """ 836 | Lists the possible filter queries which can be used to modify search results. 837 | 838 | :return: Filter queries which can be used to modify search results 839 | :rtype: list 840 | """ 841 | queries = [] 842 | queries.extend(list(cls.__dropdowns)) 843 | return queries 844 | 845 | def list_options(self, query: str): 846 | """ 847 | Lists the possible options which a filter query can be configured to. 848 | 849 | :param query: The filter query 850 | :return: Options which the filter query can be configured to 851 | :rtype: list 852 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 853 | """ 854 | query = query.lower() 855 | if query in self.__dropdowns: 856 | options = self.__dropdowns[query].list_options() 857 | else: 858 | raise fangraphs.exceptions.InvalidFilterQuery(query) 859 | return options 860 | 861 | def current_option(self, query: str): 862 | """ 863 | Retrieves the option which a filter query is currently set to. 864 | 865 | :param query: The filter query being retrieved of its current option 866 | :return: The option which the filter query is currently set to 867 | :rtype: str 868 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 869 | """ 870 | query = query.lower() 871 | if query in self.__dropdowns: 872 | option = self.__dropdowns[query].current_option(opt_type=1) 873 | else: 874 | raise fangraphs.exceptions.InvalidFilterQuery(query) 875 | return option 876 | 877 | def configure(self, query: str, option: str): 878 | """ 879 | Configures a filter query to a specified option. 880 | 881 | :param query: The filter query to be configured 882 | :param option: The option to set the filter query to 883 | :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` 884 | """ 885 | query = query.lower() 886 | self._close_ad() 887 | if query in self.__dropdowns: 888 | self.__dropdowns[query].configure(self.page, option) 889 | else: 890 | raise fangraphs.exceptions.InvalidFilterQuery(query) 891 | self._refresh_parser() 892 | 893 | def export(self, path=""): 894 | """ 895 | Uses the **Export Data** button on the webpage to export the current leaderboard. 896 | The data will be exported as a CSV file and the file will be saved to *out/*. 897 | The file will be saved to the filepath ``path``, if specified. 898 | Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* 899 | 900 | :param path: The path to save the exported data to 901 | """ 902 | self.export_data("#WARBoard1_cmdCSV", path) 903 | --------------------------------------------------------------------------------