├── docs ├── CNAME ├── CHANGELOG.md ├── LICENSE.md ├── CONTRIBUTING.md ├── api │ ├── cache.md │ ├── utils.md │ ├── configuration.md │ └── load_functions.md ├── assets │ └── nflverse.png └── index.md ├── src └── nflreadpy │ ├── data │ ├── __init__.py │ ├── team_abbr_mapping.parquet │ ├── player_name_mapping.parquet │ └── team_abbr_mapping_norelocate.parquet │ ├── load_teams.py │ ├── load_trades.py │ ├── load_contracts.py │ ├── load_players.py │ ├── load_combine.py │ ├── load_draft_picks.py │ ├── load_schedules.py │ ├── load_officials.py │ ├── load_pbp.py │ ├── load_rosters.py │ ├── load_injuries.py │ ├── load_ftn_charting.py │ ├── load_snap_counts.py │ ├── load_rosters_weekly.py │ ├── load_depth_charts.py │ ├── load_participation.py │ ├── datasets.py │ ├── load_nextgen_stats.py │ ├── __init__.py │ ├── load_stats.py │ ├── utils_date.py │ ├── load_pfr_advstats.py │ ├── downloader.py │ ├── load_ffverse.py │ ├── config.py │ └── cache.py ├── tests ├── __init__.py └── test_integration.py ├── .claude └── settings.local.json ├── .pre-commit-config.yaml ├── .github ├── workflows │ ├── ci-docs.yaml │ ├── ci-publish.yaml │ └── ci-test.yaml ├── CONTRIBUTING.md └── ISSUE_TEMPLATE │ └── bug_form.yml ├── .gitignore ├── LICENSE.md ├── mkdocs.yml ├── CHANGELOG.md ├── pyproject.toml ├── CLAUDE.md └── README.md /docs/CNAME: -------------------------------------------------------------------------------- 1 | nflreadpy.nflverse.com 2 | -------------------------------------------------------------------------------- /docs/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | --8<-- "CHANGELOG.md" 2 | -------------------------------------------------------------------------------- /docs/LICENSE.md: -------------------------------------------------------------------------------- 1 | --8<-- "LICENSE.md" 2 | -------------------------------------------------------------------------------- /src/nflreadpy/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Placeholder 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test suite for nflreadpy.""" 2 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | --8<-- ".github/CONTRIBUTING.md" 2 | -------------------------------------------------------------------------------- /docs/api/cache.md: -------------------------------------------------------------------------------- 1 | # Cache Management 2 | 3 | ::: nflreadpy.clear_cache 4 | -------------------------------------------------------------------------------- /docs/assets/nflverse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nflverse/nflreadpy/HEAD/docs/assets/nflverse.png -------------------------------------------------------------------------------- /docs/api/utils.md: -------------------------------------------------------------------------------- 1 | # Utilities 2 | 3 | ::: nflreadpy.get_current_season 4 | ::: nflreadpy.get_current_week -------------------------------------------------------------------------------- /src/nflreadpy/data/team_abbr_mapping.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nflverse/nflreadpy/HEAD/src/nflreadpy/data/team_abbr_mapping.parquet -------------------------------------------------------------------------------- /src/nflreadpy/data/player_name_mapping.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nflverse/nflreadpy/HEAD/src/nflreadpy/data/player_name_mapping.parquet -------------------------------------------------------------------------------- /src/nflreadpy/data/team_abbr_mapping_norelocate.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nflverse/nflreadpy/HEAD/src/nflreadpy/data/team_abbr_mapping_norelocate.parquet -------------------------------------------------------------------------------- /docs/api/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | ::: nflreadpy.config.NflreadpyConfig 4 | ::: nflreadpy.config.update_config 5 | ::: nflreadpy.config.get_config 6 | ::: nflreadpy.config.reset_config 7 | -------------------------------------------------------------------------------- /.claude/settings.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "permissions": { 3 | "allow": [ 4 | "WebFetch(domain:github.com)", 5 | "WebFetch(domain:raw.githubusercontent.com)", 6 | "WebFetch(domain:nflreadr.nflverse.com)", 7 | "Bash(uv run pytest:*)", 8 | "Bash(uv run ruff:*)" 9 | ], 10 | "deny": [], 11 | "ask": [] 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.1.6 4 | hooks: 5 | - id: ruff 6 | args: [--fix] 7 | - id: ruff-format 8 | 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v4.4.0 11 | hooks: 12 | - id: trailing-whitespace 13 | - id: end-of-file-fixer 14 | - id: check-yaml 15 | - id: check-toml 16 | - id: check-merge-conflict 17 | - id: debug-statements 18 | -------------------------------------------------------------------------------- /src/nflreadpy/load_teams.py: -------------------------------------------------------------------------------- 1 | """Load NFL team data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | 7 | 8 | def load_teams() -> pl.DataFrame: 9 | """ 10 | Load NFL team information. 11 | 12 | Returns: 13 | Polars DataFrame with team data including abbreviations, names,\ 14 | colors, logos, and other team metadata. 15 | 16 | See Also: 17 | 18 | """ 19 | downloader = get_downloader() 20 | 21 | # Load teams data from nflverse-data repository 22 | df = downloader.download("nflverse-data", "teams/teams_colors_logos") 23 | 24 | return df 25 | -------------------------------------------------------------------------------- /src/nflreadpy/load_trades.py: -------------------------------------------------------------------------------- 1 | """Load NFL trades data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | 7 | 8 | def load_trades() -> pl.DataFrame: 9 | """ 10 | Load NFL trades data. 11 | 12 | Returns: 13 | Polars DataFrame with NFL trade information including players,\ 14 | teams, draft picks, and trade details. 15 | 16 | See Also: 17 | 18 | 19 | Data Dictionary: 20 | 21 | """ 22 | downloader = get_downloader() 23 | 24 | df = downloader.download("nflverse-data", "trades/trades") 25 | 26 | return df 27 | -------------------------------------------------------------------------------- /.github/workflows/ci-docs.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://docs.astral.sh/uv/guides/integration/github/ 2 | 3 | on: 4 | push: 5 | branches: [main, master] 6 | workflow_dispatch: 7 | 8 | name: Build docs site 9 | 10 | jobs: 11 | ci-build: 12 | runs-on: ubuntu-latest 13 | 14 | name: Build mkdocs site 15 | 16 | env: 17 | UV_PYTHON: "3.10" 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: Install uv 23 | uses: astral-sh/setup-uv@v6 24 | with: 25 | enable-cache: true 26 | version: "0.8.17" 27 | 28 | - name: Install the project 29 | run: uv sync --locked --all-extras --dev 30 | 31 | - name: Build site 32 | run: uv run mkdocs gh-deploy --force 33 | -------------------------------------------------------------------------------- /docs/api/load_functions.md: -------------------------------------------------------------------------------- 1 | # Load Functions 2 | 3 | ::: nflreadpy.load_pbp 4 | ::: nflreadpy.load_player_stats 5 | ::: nflreadpy.load_team_stats 6 | ::: nflreadpy.load_schedules 7 | ::: nflreadpy.load_teams 8 | ::: nflreadpy.load_players 9 | ::: nflreadpy.load_rosters 10 | ::: nflreadpy.load_rosters_weekly 11 | ::: nflreadpy.load_snap_counts 12 | ::: nflreadpy.load_nextgen_stats 13 | ::: nflreadpy.load_ftn_charting 14 | ::: nflreadpy.load_participation 15 | ::: nflreadpy.load_draft_picks 16 | ::: nflreadpy.load_injuries 17 | ::: nflreadpy.load_contracts 18 | ::: nflreadpy.load_officials 19 | ::: nflreadpy.load_combine 20 | ::: nflreadpy.load_depth_charts 21 | ::: nflreadpy.load_trades 22 | ::: nflreadpy.load_pfr_advstats 23 | ::: nflreadpy.load_ff_playerids 24 | ::: nflreadpy.load_ff_rankings 25 | ::: nflreadpy.load_ff_opportunity 26 | -------------------------------------------------------------------------------- /src/nflreadpy/load_contracts.py: -------------------------------------------------------------------------------- 1 | """Load NFL contract data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | 7 | 8 | def load_contracts() -> pl.DataFrame: 9 | """ 10 | Load NFL historical contract data. 11 | 12 | Returns: 13 | Polars DataFrame with historical contract information including\ 14 | player details, contract terms, values, and team information. 15 | 16 | See Also: 17 | 18 | 19 | Data Dictionary: 20 | 21 | """ 22 | downloader = get_downloader() 23 | 24 | # Load historical contracts data from nflverse-data repository 25 | df = downloader.download("nflverse-data", "contracts/historical_contracts") 26 | 27 | return df 28 | -------------------------------------------------------------------------------- /.github/workflows/ci-publish.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://docs.astral.sh/uv/guides/integration/github/ 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: 7 | 8 | name: Publish package to PyPI 9 | 10 | jobs: 11 | ci-publish: 12 | runs-on: ubuntu-latest 13 | 14 | name: Upload to PyPI 15 | 16 | env: 17 | UV_PYTHON: "3.10" 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: Install uv 23 | uses: astral-sh/setup-uv@v6 24 | with: 25 | enable-cache: true 26 | version: "0.8.17" 27 | 28 | - name: Install the project 29 | run: uv sync --locked --all-extras --dev 30 | 31 | - name: Run build 32 | run: uv build 33 | 34 | - name: Publish package 35 | uses: pypa/gh-action-pypi-publish@release/v1 36 | with: 37 | user: __token__ 38 | password: ${{ secrets.PYPI_API_TOKEN }} 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | pip-wheel-metadata/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | site/* 26 | 27 | # Virtual environments 28 | .env 29 | .venv 30 | env/ 31 | venv/ 32 | ENV/ 33 | env.bak/ 34 | venv.bak/ 35 | 36 | # IDEs 37 | .vscode/ 38 | .idea/ 39 | *.swp 40 | *.swo 41 | *~ 42 | 43 | # Testing 44 | .coverage 45 | .pytest_cache/ 46 | .tox/ 47 | .nox/ 48 | htmlcov/ 49 | .coverage.* 50 | 51 | # mypy 52 | .mypy_cache/ 53 | .dmypy.json 54 | dmypy.json 55 | 56 | # Jupyter 57 | .ipynb_checkpoints 58 | 59 | # OS 60 | .DS_Store 61 | .DS_Store? 62 | ._* 63 | .Spotlight-V100 64 | .Trashes 65 | ehthumbs.db 66 | Thumbs.db 67 | 68 | # Project specific 69 | *.log 70 | .cache/ 71 | .ruff_cache/ 72 | 73 | # uv 74 | .uv_cache/ 75 | -------------------------------------------------------------------------------- /.github/workflows/ci-test.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://docs.astral.sh/uv/guides/integration/github/ 2 | 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | workflow_dispatch: 9 | 10 | name: Run CI tests 11 | 12 | jobs: 13 | ci-check: 14 | runs-on: ${{ matrix.os }} 15 | 16 | name: ${{ matrix.os }} (${{ matrix.python-version }}) 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | os: [macos-latest, ubuntu-latest, windows-latest] 22 | python-version: ["3.10", "3.11", "3.12", "3.13"] 23 | 24 | env: 25 | UV_PYTHON: ${{ matrix.python-version }} 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | 30 | - name: Install uv 31 | uses: astral-sh/setup-uv@v6 32 | with: 33 | enable-cache: true 34 | version: "0.8.17" 35 | 36 | - name: Install the project 37 | run: uv sync --locked --all-extras --dev 38 | 39 | - name: Run tests 40 | run: uv run pytest tests -vv 41 | -------------------------------------------------------------------------------- /src/nflreadpy/load_players.py: -------------------------------------------------------------------------------- 1 | """Load NFL player data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | 7 | 8 | def load_players() -> pl.DataFrame: 9 | """ 10 | Load NFL player information. 11 | 12 | This is a comprehensive source of player information including basic details, 13 | draft information, positions, and ID mappings across multiple data sources 14 | (GSIS, PFR, PFF, OTC, ESB, ESPN). 15 | 16 | Returns: 17 | Polars DataFrame with player data - one row per player with comprehensive \ 18 | player information including names, physical stats, draft info, and \ 19 | cross-platform ID mappings. 20 | 21 | See Also: 22 | 23 | 24 | Data Dictionary: 25 | 26 | """ 27 | downloader = get_downloader() 28 | 29 | # Load players data from nflverse-data repository 30 | df = downloader.download("nflverse-data", "players/players") 31 | 32 | return df 33 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 nflreadpy contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/nflreadpy/load_combine.py: -------------------------------------------------------------------------------- 1 | """Load NFL Combine data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_combine(seasons: int | list[int] | bool | None = True) -> pl.DataFrame: 10 | """ 11 | Load NFL Combine data. 12 | 13 | Args: 14 | seasons: Season(s) to load. If True (default), loads all available data. 15 | If int or list of ints, loads specified season(s). 16 | If None, loads current season. 17 | 18 | Returns: 19 | Polars DataFrame with NFL Combine data including player measurements,\ 20 | test results (40-yard dash, bench press, etc.), and draft information. 21 | 22 | See Also: 23 | 24 | 25 | Data Dictionary: 26 | 27 | """ 28 | downloader = get_downloader() 29 | 30 | # Load the full combine dataset 31 | df = downloader.download("nflverse-data", "combine/combine") 32 | 33 | # Filter by seasons if specified 34 | if seasons is not True: 35 | if seasons is None: 36 | seasons = [get_current_season()] 37 | elif isinstance(seasons, int): 38 | seasons = [seasons] 39 | 40 | # Filter the dataframe by season 41 | if "season" in df.columns: 42 | df = df.filter(pl.col("season").is_in(seasons)) 43 | 44 | return df 45 | -------------------------------------------------------------------------------- /src/nflreadpy/load_draft_picks.py: -------------------------------------------------------------------------------- 1 | """Load NFL draft pick data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_draft_picks(seasons: int | list[int] | bool | None = True) -> pl.DataFrame: 10 | """ 11 | Load NFL draft pick data. 12 | 13 | Data covers draft picks since 1980, sourced from Pro Football Reference. 14 | 15 | Args: 16 | seasons: Season(s) to load. If True (default), loads all available data. 17 | If int or list of ints, loads specified season(s). 18 | If None, loads current season. 19 | 20 | Returns: 21 | Polars DataFrame with draft pick data including draft year, round,\ 22 | pick number, player information, and team data. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | downloader = get_downloader() 31 | 32 | # Load the full draft picks dataset 33 | df = downloader.download("nflverse-data", "draft_picks/draft_picks") 34 | 35 | # Filter by seasons if specified 36 | if seasons is not True: 37 | if seasons is None: 38 | seasons = [get_current_season()] 39 | elif isinstance(seasons, int): 40 | seasons = [seasons] 41 | 42 | # Filter the dataframe by season 43 | df = df.filter(pl.col("season").is_in(seasons)) 44 | 45 | return df 46 | -------------------------------------------------------------------------------- /src/nflreadpy/load_schedules.py: -------------------------------------------------------------------------------- 1 | """Load NFL schedule data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_schedules(seasons: int | list[int] | bool | None = True) -> pl.DataFrame: 10 | """ 11 | Load NFL schedules. 12 | 13 | Args: 14 | seasons: Season(s) to load. If True (default), loads all available data. 15 | If int or list of ints, loads specified season(s). 16 | If None, loads current season. 17 | 18 | Returns: 19 | Polars DataFrame with schedule data. 20 | 21 | See Also: 22 | 23 | 24 | Data Dictionary: 25 | 26 | """ 27 | downloader = get_downloader() 28 | 29 | # Load the full games dataset 30 | df = downloader.download("nflverse-data", "schedules/games") 31 | 32 | # Filter by seasons if specified 33 | if seasons is not True: 34 | if seasons is None: 35 | seasons = [get_current_season()] 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Filter the dataframe by season 40 | df = df.filter(pl.col("season").is_in(seasons)) 41 | 42 | # Validate and clean roof values (matching nflreadr logic) 43 | if "roof" in df.columns: 44 | valid_roof_values = ["dome", "outdoors", "closed", "open"] 45 | df = df.with_columns( 46 | pl.when(pl.col("roof").is_in(valid_roof_values)) 47 | .then(pl.col("roof")) 48 | .otherwise(None) 49 | .alias("roof") 50 | ) 51 | 52 | return df 53 | -------------------------------------------------------------------------------- /src/nflreadpy/load_officials.py: -------------------------------------------------------------------------------- 1 | """Load NFL officials data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_officials(seasons: int | list[int] | bool | None = True) -> pl.DataFrame: 10 | """ 11 | Load NFL officials data. 12 | 13 | Data covers NFL officials assigned to games from 2015 onwards. 14 | 15 | Args: 16 | seasons: Season(s) to load. If True (default), loads all available data. 17 | If int or list of ints, loads specified season(s). 18 | If None, loads current season. 19 | 20 | Returns: 21 | Polars DataFrame with officials data including referee assignments,\ 22 | crew information, and game details. 23 | 24 | See Also: 25 | 26 | """ 27 | downloader = get_downloader() 28 | 29 | # Load the full officials dataset 30 | df = downloader.download("nflverse-data", "officials/officials") 31 | 32 | # Filter by seasons if specified 33 | if seasons is not True: 34 | if seasons is None: 35 | seasons = [get_current_season()] 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Validate seasons (2015 minimum) 40 | current_season = get_current_season() 41 | for season in seasons: 42 | if not isinstance(season, int) or season < 2015 or season > current_season: 43 | raise ValueError(f"Season must be between 2015 and {current_season}") 44 | 45 | # Filter the dataframe by season 46 | if "season" in df.columns: 47 | df = df.filter(pl.col("season").is_in(seasons)) 48 | 49 | return df 50 | -------------------------------------------------------------------------------- /src/nflreadpy/load_pbp.py: -------------------------------------------------------------------------------- 1 | """Load NFL play-by-play data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_pbp(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL play-by-play data. 12 | 13 | Args: 14 | seasons: Season(s) to load. If None, loads current season. 15 | If True, loads all available data since 1999. 16 | If int or list of ints, loads specified season(s). 17 | 18 | Returns: 19 | Polars DataFrame with play-by-play data. 20 | 21 | See Also: 22 | 23 | 24 | Data Dictionary: 25 | 26 | """ 27 | if seasons is None: 28 | seasons = [get_current_season()] 29 | elif seasons is True: 30 | # Load all available seasons (1999 to current) 31 | current_season = get_current_season() 32 | seasons = list(range(1999, current_season + 1)) 33 | elif isinstance(seasons, int): 34 | seasons = [seasons] 35 | 36 | # Validate seasons 37 | current_season = get_current_season() 38 | for season in seasons: 39 | if not isinstance(season, int) or season < 1999 or season > current_season: 40 | raise ValueError(f"Season must be between 1999 and {current_season}") 41 | 42 | downloader = get_downloader() 43 | dataframes = [] 44 | 45 | for season in seasons: 46 | path = f"pbp/play_by_play_{season}" 47 | df = downloader.download("nflverse-data", path, season=season) 48 | dataframes.append(df) 49 | 50 | if len(dataframes) == 1: 51 | return dataframes[0] 52 | else: 53 | return pl.concat(dataframes, how="diagonal_relaxed") 54 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Many hands make light work! Here are some ways you can contribute to this project: 4 | 5 | ### Open an issue 6 | 7 | - You can [open an issue](https://github.com/nflverse/nflreadpy/issues/new/choose) if you'd like to request a specific function or report a bug/error. 8 | 9 | ### Fixing typos 10 | 11 | * You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 12 | 13 | ### Bigger changes 14 | 15 | * If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. If you’ve found a bug, please file an issue that illustrates the bug with a minimal 16 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed). 17 | 18 | * You can also bring up changes in the [nflverse discord](https://discord.com/invite/5Er2FBnnQa) and our team will be happy to discuss! 19 | 20 | ### Tooling and Code Practices 21 | 22 | This project uses the following tooling: 23 | 24 | - uv for dependency management 25 | - ruff for linting and formatting 26 | - mypy for type checking 27 | - pytest for testing 28 | - mkdocs for documentation site 29 | 30 | ```bash 31 | # Install development dependencies 32 | uv sync --dev 33 | 34 | # Run tests 35 | uv run pytest 36 | 37 | # Format code 38 | uv run ruff format 39 | 40 | # Type check 41 | uv run mypy src 42 | 43 | # Serve docs site locally 44 | uv run mkdocs serve 45 | 46 | # Build docs site 47 | uv run mkdocs build 48 | ``` 49 | 50 | ## Code of Conduct 51 | 52 | Please note that the nflverse project is released with a 53 | [Contributor Code of Conduct](https://github.com/nflverse/.github/blob/main/.github/CODE_OF_CONDUCT.md). By contributing to this 54 | project you agree to abide by its terms. 55 | -------------------------------------------------------------------------------- /src/nflreadpy/load_rosters.py: -------------------------------------------------------------------------------- 1 | """Load NFL roster data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_rosters(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL team rosters. 12 | 13 | Args: 14 | seasons: Season(s) to load. If None, loads current roster year. 15 | If True, loads all available data since 1920. 16 | If int or list of ints, loads specified season(s). 17 | 18 | Returns: 19 | Polars DataFrame with roster data. 20 | 21 | See Also: 22 | 23 | 24 | Data Dictionary: 25 | 26 | """ 27 | if seasons is None: 28 | seasons = [get_current_season(roster=True)] 29 | elif seasons is True: 30 | # Load all available seasons (1920 to current roster year) 31 | current_roster_year = get_current_season(roster=True) 32 | seasons = list(range(1920, current_roster_year + 1)) 33 | elif isinstance(seasons, int): 34 | seasons = [seasons] 35 | 36 | # Validate seasons 37 | current_roster_year = get_current_season(roster=True) 38 | for season in seasons: 39 | if not isinstance(season, int) or season < 1920 or season > current_roster_year: 40 | raise ValueError(f"Season must be between 1920 and {current_roster_year}") 41 | 42 | downloader = get_downloader() 43 | dataframes = [] 44 | 45 | for season in seasons: 46 | path = f"rosters/roster_{season}" 47 | df = downloader.download("nflverse-data", path, season=season) 48 | dataframes.append(df) 49 | 50 | if len(dataframes) == 1: 51 | return dataframes[0] 52 | else: 53 | return pl.concat(dataframes, how="diagonal_relaxed") 54 | -------------------------------------------------------------------------------- /src/nflreadpy/load_injuries.py: -------------------------------------------------------------------------------- 1 | """Load NFL injury data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_injuries(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL injury data. 12 | 13 | Data available since 2009. 14 | 15 | Args: 16 | seasons: Season(s) to load. If None, loads current season. 17 | If True, loads all available data since 2009. 18 | If int or list of ints, loads specified season(s). 19 | 20 | Returns: 21 | Polars DataFrame with injury data including player information,\ 22 | injury details, and status reports. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | if seasons is None: 31 | seasons = [get_current_season()] 32 | elif seasons is True: 33 | # Load all available seasons (2009 to current) 34 | current_season = get_current_season() 35 | seasons = list(range(2009, current_season + 1)) 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Validate seasons 40 | current_season = get_current_season() 41 | for season in seasons: 42 | if not isinstance(season, int) or season < 2009 or season > current_season: 43 | raise ValueError(f"Season must be between 2009 and {current_season}") 44 | 45 | downloader = get_downloader() 46 | dataframes = [] 47 | 48 | for season in seasons: 49 | path = f"injuries/injuries_{season}" 50 | df = downloader.download("nflverse-data", path, season=season) 51 | dataframes.append(df) 52 | 53 | if len(dataframes) == 1: 54 | return dataframes[0] 55 | else: 56 | return pl.concat(dataframes, how="diagonal_relaxed") 57 | -------------------------------------------------------------------------------- /src/nflreadpy/load_ftn_charting.py: -------------------------------------------------------------------------------- 1 | """Load FTN charting data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_ftn_charting(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load FTN charting data. 12 | 13 | Data available since 2022. 14 | 15 | Args: 16 | seasons: Season(s) to load. If None, loads current season. 17 | If True, loads all available data since 2022. 18 | If int or list of ints, loads specified season(s). 19 | 20 | Returns: 21 | Polars DataFrame with FTN charting data including detailed\ 22 | play-by-play charting information and advanced metrics. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | if seasons is None: 31 | seasons = [get_current_season()] 32 | elif seasons is True: 33 | # Load all available seasons (2022 to current) 34 | current_season = get_current_season() 35 | seasons = list(range(2022, current_season + 1)) 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Validate seasons 40 | current_season = get_current_season() 41 | for season in seasons: 42 | if not isinstance(season, int) or season < 2022 or season > current_season: 43 | raise ValueError(f"Season must be between 2022 and {current_season}") 44 | 45 | downloader = get_downloader() 46 | dataframes = [] 47 | 48 | for season in seasons: 49 | path = f"ftn_charting/ftn_charting_{season}" 50 | df = downloader.download("nflverse-data", path, season=season) 51 | dataframes.append(df) 52 | 53 | if len(dataframes) == 1: 54 | return dataframes[0] 55 | else: 56 | return pl.concat(dataframes, how="diagonal_relaxed") 57 | -------------------------------------------------------------------------------- /src/nflreadpy/load_snap_counts.py: -------------------------------------------------------------------------------- 1 | """Load NFL snap count data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_snap_counts(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL snap count data. 12 | 13 | Data sourced from Pro Football Reference, available since 2012. 14 | 15 | Args: 16 | seasons: Season(s) to load. If None, loads current season. 17 | If True, loads all available data since 2012. 18 | If int or list of ints, loads specified season(s). 19 | 20 | Returns: 21 | Polars DataFrame with snap count data including player information,\ 22 | offensive/defensive snaps, and snap percentages. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | if seasons is None: 31 | seasons = [get_current_season()] 32 | elif seasons is True: 33 | # Load all available seasons (2012 to current) 34 | current_season = get_current_season() 35 | seasons = list(range(2012, current_season + 1)) 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Validate seasons 40 | current_season = get_current_season() 41 | for season in seasons: 42 | if not isinstance(season, int) or season < 2012 or season > current_season: 43 | raise ValueError(f"Season must be between 2012 and {current_season}") 44 | 45 | downloader = get_downloader() 46 | dataframes = [] 47 | 48 | for season in seasons: 49 | path = f"snap_counts/snap_counts_{season}" 50 | df = downloader.download("nflverse-data", path, season=season) 51 | dataframes.append(df) 52 | 53 | if len(dataframes) == 1: 54 | return dataframes[0] 55 | else: 56 | return pl.concat(dataframes, how="diagonal_relaxed") 57 | -------------------------------------------------------------------------------- /src/nflreadpy/load_rosters_weekly.py: -------------------------------------------------------------------------------- 1 | """Load NFL weekly rosters data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_rosters_weekly(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL weekly rosters data. 12 | 13 | Data available from 2002 onwards. 14 | 15 | Args: 16 | seasons: Season(s) to load. If None, loads current season. 17 | If True, loads all available data since 2002. 18 | If int or list of ints, loads specified season(s). 19 | 20 | Returns: 21 | Polars DataFrame with weekly roster data including player status\ 22 | changes, injury designations, and week-by-week roster moves. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | if seasons is None: 31 | seasons = [get_current_season()] 32 | elif seasons is True: 33 | # Load all available seasons (2002 to current) 34 | current_season = get_current_season() 35 | seasons = list(range(2002, current_season + 1)) 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Validate seasons 40 | current_season = get_current_season() 41 | for season in seasons: 42 | if not isinstance(season, int) or season < 2002 or season > current_season: 43 | raise ValueError(f"Season must be between 2002 and {current_season}") 44 | 45 | downloader = get_downloader() 46 | dataframes = [] 47 | 48 | for season in seasons: 49 | path = f"weekly_rosters/roster_weekly_{season}" 50 | df = downloader.download("nflverse-data", path, season=season) 51 | dataframes.append(df) 52 | 53 | if len(dataframes) == 1: 54 | return dataframes[0] 55 | else: 56 | return pl.concat(dataframes, how="diagonal_relaxed") 57 | -------------------------------------------------------------------------------- /src/nflreadpy/load_depth_charts.py: -------------------------------------------------------------------------------- 1 | """Load NFL depth charts data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season 7 | 8 | 9 | def load_depth_charts(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL depth charts data. 12 | 13 | Data available from 2001 onwards. 14 | 15 | Args: 16 | seasons: Season(s) to load. If None, loads current season. 17 | If True, loads all available data since 2001. 18 | If int or list of ints, loads specified season(s). 19 | 20 | Returns: 21 | Polars DataFrame with depth charts data including player positions,\ 22 | depth chart rankings, and team information. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | if seasons is None: 31 | seasons = [get_current_season(roster=True)] 32 | elif seasons is True: 33 | # Load all available seasons (2001 to current) 34 | current_season = get_current_season(roster=True) 35 | seasons = list(range(2001, current_season + 1)) 36 | elif isinstance(seasons, int): 37 | seasons = [seasons] 38 | 39 | # Validate seasons 40 | current_season = get_current_season(roster=True) 41 | for season in seasons: 42 | if not isinstance(season, int) or season < 2001 or season > current_season: 43 | raise ValueError(f"Season must be between 2001 and {current_season}") 44 | 45 | downloader = get_downloader() 46 | dataframes = [] 47 | 48 | for season in seasons: 49 | path = f"depth_charts/depth_charts_{season}" 50 | df = downloader.download("nflverse-data", path, season=season) 51 | dataframes.append(df) 52 | 53 | if len(dataframes) == 1: 54 | return dataframes[0] 55 | else: 56 | return pl.concat(dataframes, how="diagonal_relaxed") 57 | -------------------------------------------------------------------------------- /src/nflreadpy/load_participation.py: -------------------------------------------------------------------------------- 1 | """Load NFL participation data.""" 2 | 3 | import polars as pl 4 | 5 | from .downloader import get_downloader 6 | from .utils_date import get_current_season, get_current_week 7 | 8 | 9 | def load_participation(seasons: int | list[int] | bool | None = None) -> pl.DataFrame: 10 | """ 11 | Load NFL participation data. 12 | 13 | Data available since 2016. 14 | 15 | Args: 16 | seasons: Season(s) to load. If None, loads current season. 17 | If True, loads all available data since 2016. 18 | If int or list of ints, loads specified season(s). 19 | 20 | Returns: 21 | Polars DataFrame with participation data including player involvement\ 22 | on specific plays and snap participation details. 23 | 24 | See Also: 25 | 26 | 27 | Data Dictionary: 28 | 29 | """ 30 | # we expect to have participation data available after the final week of the 31 | # season from FTN 32 | current_week = get_current_week(use_date=False) 33 | if current_week == 22: 34 | max_season = get_current_season() 35 | else: 36 | max_season = get_current_season() - 1 37 | 38 | if seasons is None: 39 | seasons = [max_season] 40 | elif seasons is True: 41 | # Load all available seasons (2016 to max_season) 42 | seasons = list(range(2016, max_season + 1)) 43 | elif isinstance(seasons, int): 44 | seasons = [seasons] 45 | 46 | # Validate seasons 47 | for season in seasons: 48 | if not isinstance(season, int) or season < 2016 or season > max_season: 49 | raise ValueError(f"Season must be between 2016 and {max_season}") 50 | 51 | downloader = get_downloader() 52 | dataframes = [] 53 | 54 | for season in seasons: 55 | path = f"pbp_participation/pbp_participation_{season}" 56 | df = downloader.download("nflverse-data", path, season=season) 57 | dataframes.append(df) 58 | 59 | if len(dataframes) == 1: 60 | return dataframes[0] 61 | else: 62 | return pl.concat(dataframes, how="diagonal_relaxed") 63 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: nflreadpy 2 | site_description: A Python package for downloading NFL data from nflverse repositories 3 | site_url: https://nflreadpy.nflverse.com 4 | repo_url: https://github.com/nflverse/nflreadpy 5 | repo_name: nflverse/nflreadpy 6 | 7 | theme: 8 | name: material 9 | font: 10 | text: IBM Plex Sans 11 | code: IBM Plex Mono 12 | logo: assets/nflverse.png 13 | favicon: assets/nflverse.png 14 | 15 | palette: 16 | - scheme: default 17 | primary: black 18 | accent: indigo 19 | toggle: 20 | icon: material/brightness-7 21 | name: Switch to dark mode 22 | - scheme: slate 23 | primary: black 24 | accent: indigo 25 | toggle: 26 | icon: material/brightness-4 27 | name: Switch to light mode 28 | features: 29 | - navigation.tabs 30 | - navigation.tabs.sticky 31 | - navigation.sections 32 | - navigation.expand 33 | - navigation.path 34 | - navigation.top 35 | - search.highlight 36 | - content.code.copy 37 | 38 | nav: 39 | - Home: index.md 40 | - API Reference: 41 | - Load Functions: api/load_functions.md 42 | - Configuration: api/configuration.md 43 | - Cache Management: api/cache.md 44 | - Utilities: api/utils.md 45 | - Changelog: CHANGELOG.md 46 | - Automation Status: https://nflreadr.nflverse.com/articles/nflverse_data_schedule.html 47 | - Data Dictionaries: https://nflreadr.nflverse.com/articles/index.html 48 | - nflverse GitHub: https://github.com/nflverse 49 | 50 | not_in_nav: | 51 | CONTRIBUTING.md 52 | LICENSE.md 53 | 54 | plugins: 55 | - search 56 | - mkdocstrings: 57 | handlers: 58 | python: 59 | options: 60 | docstring_style: google 61 | show_source: true 62 | show_root_heading: true 63 | show_root_toc_entry: false 64 | merge_init_into_class: true 65 | show_signature_annotations: true 66 | separate_signature: true 67 | 68 | markdown_extensions: 69 | - admonition 70 | - pymdownx.details 71 | - pymdownx.superfences 72 | - pymdownx.highlight: 73 | anchor_linenums: true 74 | - pymdownx.inlinehilite 75 | - pymdownx.snippets 76 | - pymdownx.tabbed: 77 | alternate_style: true 78 | - toc: 79 | permalink: true 80 | toc_depth: 2 81 | -------------------------------------------------------------------------------- /src/nflreadpy/datasets.py: -------------------------------------------------------------------------------- 1 | """Load nflreadpy datasets.""" 2 | 3 | import os.path 4 | from importlib import resources 5 | 6 | import polars as pl 7 | 8 | 9 | def data_path(dataset=None) -> str: 10 | """Get Path to nflreadpy Data Files. 11 | 12 | Returns: 13 | Path to file. Empty string if `dataset = None` and error if the file doesn't exist. 14 | """ 15 | if dataset is None: 16 | return "" 17 | with resources.path("nflreadpy.data", dataset + ".parquet") as f: 18 | data_file_path = f 19 | if os.path.isfile(data_file_path): 20 | return data_file_path 21 | else: 22 | raise FileNotFoundError(f"The file {data_file_path} doesn't exist!") 23 | 24 | 25 | def team_abbr_mapping() -> pl.DataFrame: 26 | """Alternate team abbreviation mappings 27 | 28 | A lookup table mapping common alternate team abbreviations. 29 | 30 | Returns: 31 | Polars DataFrame with two columns `name` and `value` where `value` reflects 32 | the standardized nflverse team abbreviation. 33 | 34 | See Also: 35 | 36 | 37 | """ 38 | return pl.read_parquet(data_path("team_abbr_mapping")) 39 | 40 | 41 | def team_abbr_mapping_norelocate() -> pl.DataFrame: 42 | """Alternate team abbreviation mappings, no relocation 43 | 44 | A lookup table mapping common alternate team abbreviations, 45 | but does not follow relocations to their current city. 46 | 47 | Returns: 48 | Polars DataFrame with two columns `name` and `value` where `value` reflects 49 | the standardized nflverse team abbreviation. 50 | 51 | See Also: 52 | 53 | 54 | """ 55 | return pl.read_parquet(data_path("team_abbr_mapping_norelocate")) 56 | 57 | 58 | def player_name_mapping() -> pl.DataFrame: 59 | """Alternate player name mappings 60 | 61 | A lookup table mapping common alternate player names. 62 | 63 | Returns: 64 | Polars DataFrame with two columns `name` and `value` where `value` reflects 65 | the standardized player name. 66 | 67 | See Also: 68 | 69 | 70 | """ 71 | return pl.read_parquet(data_path("player_name_mapping")) 72 | -------------------------------------------------------------------------------- /src/nflreadpy/load_nextgen_stats.py: -------------------------------------------------------------------------------- 1 | """Load NFL Next Gen Stats data.""" 2 | 3 | from typing import Literal 4 | 5 | import polars as pl 6 | 7 | from .downloader import get_downloader 8 | from .utils_date import get_current_season 9 | 10 | 11 | def load_nextgen_stats( 12 | seasons: int | list[int] | bool | None = None, 13 | stat_type: Literal["passing", "receiving", "rushing"] = "passing", 14 | ) -> pl.DataFrame: 15 | """ 16 | Load NFL Next Gen Stats data. 17 | 18 | Data available since 2016. 19 | 20 | Args: 21 | seasons: Season(s) to load. If None, loads current season. 22 | If True, loads all available data since 2016. 23 | If int or list of ints, loads specified season(s). 24 | stat_type: Type of stats to load. Options: "passing", "receiving", "rushing". 25 | 26 | Returns: 27 | Polars DataFrame with Next Gen Stats data including advanced metrics\ 28 | for passing, receiving, or rushing performance. 29 | 30 | See Also: 31 | 32 | 33 | Data Dictionary: 34 | 35 | """ 36 | if stat_type not in ["passing", "receiving", "rushing"]: 37 | raise ValueError("stat_type must be 'passing', 'receiving', or 'rushing'") 38 | 39 | if seasons is None: 40 | seasons = [get_current_season()] 41 | elif seasons is True: 42 | # Load all available seasons (2016 to current) 43 | current_season = get_current_season() 44 | seasons = list(range(2016, current_season + 1)) 45 | elif isinstance(seasons, int): 46 | seasons = [seasons] 47 | 48 | # Validate seasons 49 | current_season = get_current_season() 50 | for season in seasons: 51 | if not isinstance(season, int) or season < 2016 or season > current_season: 52 | raise ValueError(f"Season must be between 2016 and {current_season}") 53 | 54 | downloader = get_downloader() 55 | 56 | # Load the full dataset for the stat type 57 | path = f"nextgen_stats/ngs_{stat_type}" 58 | df = downloader.download("nflverse-data", path, stat_type=stat_type) 59 | 60 | # Filter by seasons 61 | if "season" in df.columns: 62 | df = df.filter(pl.col("season").is_in(seasons)) 63 | 64 | return df 65 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_form.yml: -------------------------------------------------------------------------------- 1 | name: 🐞 Report a Bug 2 | description: Report a bug/issue 3 | title: "[BUG] " 4 | body: 5 | - type: checkboxes 6 | attributes: 7 | label: Is there an existing issue for this? 8 | description: Please search to see if an issue already exists for the bug you encountered. 9 | options: 10 | - label: I have searched the existing issues 11 | required: true 12 | 13 | - type: checkboxes 14 | attributes: 15 | label: Have you installed the latest development version of the package(s) in question? 16 | description: Use `uv pip install "git+https://github.com/nflverse/nflreadpy"` to do this efficiently. 17 | options: 18 | - label: I have installed the latest development version of the package. 19 | required: true 20 | 21 | - type: dropdown 22 | attributes: 23 | label: If this is a data issue, have you tried clearing your nflverse cache? 24 | description: Use `nflreadpy.clear_cache()` to do this efficiently. 25 | options: 26 | - I have cleared my nflverse cache and the issue persists. 27 | - This is not a data-related issue. 28 | validations: 29 | required: true 30 | 31 | - type: input 32 | attributes: 33 | label: What version of the package do you have? 34 | description: Use `nflreadpy.__version__` to print out the current version. 35 | validations: 36 | required: true 37 | 38 | - type: textarea 39 | attributes: 40 | label: Describe the bug 41 | description: A clear and concise description of what the bug is. 42 | validations: 43 | required: true 44 | 45 | - type: textarea 46 | attributes: 47 | label: Reprex 48 | render: python 49 | description: | 50 | Please provide a short, reproducible example of the code you're trying to run. 51 | For more on reprexes, check out https://reprex.tidyverse.org 52 | validations: 53 | required: true 54 | 55 | - type: textarea 56 | attributes: 57 | label: Expected Behavior 58 | description: A concise description of what you expected to happen. 59 | validations: 60 | required: true 61 | 62 | - type: textarea 63 | attributes: 64 | label: Screenshots 65 | description: If applicable/helpful for your problem 66 | render: markdown 67 | validations: 68 | required: false 69 | 70 | - type: textarea 71 | attributes: 72 | label: Additional context 73 | description: If applicable/helpful for your problem 74 | validations: 75 | required: false 76 | -------------------------------------------------------------------------------- /src/nflreadpy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | nflreadpy: A Python package for downloading NFL data from nflverse repositories. 3 | 4 | This package provides a Python interface to access NFL data from various 5 | nflverse repositories, with caching, progress tracking, and data validation. 6 | """ 7 | 8 | from importlib.metadata import version 9 | 10 | __version__ = version("nflreadpy") 11 | 12 | from .cache import clear_cache 13 | from .datasets import ( 14 | team_abbr_mapping, 15 | team_abbr_mapping_norelocate, 16 | player_name_mapping, 17 | ) 18 | from .load_combine import load_combine 19 | from .load_contracts import load_contracts 20 | from .load_depth_charts import load_depth_charts 21 | from .load_draft_picks import load_draft_picks 22 | from .load_ffverse import load_ff_opportunity, load_ff_playerids, load_ff_rankings 23 | from .load_ftn_charting import load_ftn_charting 24 | from .load_injuries import load_injuries 25 | from .load_nextgen_stats import load_nextgen_stats 26 | from .load_officials import load_officials 27 | from .load_participation import load_participation 28 | from .load_pbp import load_pbp 29 | from .load_pfr_advstats import load_pfr_advstats 30 | from .load_players import load_players 31 | from .load_rosters import load_rosters 32 | from .load_rosters_weekly import load_rosters_weekly 33 | from .load_schedules import load_schedules 34 | from .load_snap_counts import load_snap_counts 35 | from .load_stats import load_player_stats, load_team_stats 36 | from .load_teams import load_teams 37 | from .load_trades import load_trades 38 | from .utils_date import get_current_season, get_current_week 39 | 40 | __all__ = [ 41 | # Core loading functions 42 | "load_pbp", 43 | "load_player_stats", 44 | "load_team_stats", 45 | "load_rosters", 46 | "load_schedules", 47 | "load_teams", 48 | "load_players", 49 | "load_draft_picks", 50 | "load_injuries", 51 | "load_contracts", 52 | "load_snap_counts", 53 | "load_nextgen_stats", 54 | "load_officials", 55 | "load_participation", 56 | "load_pfr_advstats", 57 | "load_combine", 58 | "load_depth_charts", 59 | "load_trades", 60 | "load_ftn_charting", 61 | "load_rosters_weekly", 62 | # ffverse functions 63 | "load_ff_playerids", 64 | "load_ff_rankings", 65 | "load_ff_opportunity", 66 | # Utility functions 67 | "get_current_season", 68 | "get_current_week", 69 | "clear_cache", 70 | # datasets 71 | "team_abbr_mapping", 72 | "team_abbr_mapping_norelocate", 73 | "player_name_mapping", 74 | ] 75 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # nflreadpy devel 2 | 3 | * update `load_participation()` season assertions to use improved week-level logic. 4 | * Added `team_abbr_mapping()`, `team_abbr_mapping_norelocate()`, and `player_name_mapping()` 5 | 6 | --- 7 | # nflreadpy v0.1.5 8 | Release date: 2025-11-19 9 | 10 | * Adds `load_pfr_advstats()` 11 | * Added argument `use_date` to get_current_week() to implement both, a date based approach and a schedules based approach to current week caluclations. (#39) 12 | 13 | --- 14 | 15 | # nflreadpy v0.1.4 16 | Release date: 2025-10-19 17 | 18 | Changes: 19 | 20 | * load_teams() now points to nflverse-data@teams for compat with nflreadr (breaking change) 21 | * load_schedules() now points to nflverse-data@schedules to access parquet version of file 22 | * load_trades() now points to nflverse-data@trades to access parquet version of file 23 | 24 | --- 25 | 26 | # nflreadpy v0.1.3 27 | Release date: 2025-09-20 28 | 29 | Minor tweaks: 30 | 31 | * config defaults to verbose = FALSE 32 | * load_nextgen_stats() argument order updated for consistency 33 | * summary_level arg in stats functions now a Literal type instead of a plain string 34 | 35 | --- 36 | 37 | # nflreadpy v0.1.2 38 | Release date: 2025-09-15 39 | 40 | First version of nflreadpy, a port of nflreadr and a successor to nfl_data_py, 41 | with the goal of starting fresh and maintaining API compatibility with nflreadr 42 | so that it's easier for nflverse maintainers to keep it in parallel with nflreadr 43 | updates. This first version was mostly written with the help of Claude Code. 44 | 45 | ## New functions 46 | The following functions are included in this release: 47 | 48 | - load_pbp() - play-by-play data 49 | - load_player_stats() - player game or season statistics 50 | - load_team_stats() - team game or season statistics 51 | - load_schedules() - game schedules and results 52 | - load_players() - player information 53 | - load_rosters() - team rosters 54 | - load_rosters_weekly() - team rosters by season-week 55 | - load_snap_counts() - snap counts 56 | - load_nextgen_stats() - advanced stats from nextgenstats.nfl.com 57 | - load_ftn_charting() - charted stats from ftnfantasy.com/data 58 | - load_participation() - participation data (historical) 59 | - load_draft_picks() - nfl draft picks 60 | - load_injuries() - injury statuses and practice participation 61 | - load_contracts() - historical contract data from OTC 62 | - load_officials() - officials for each game 63 | - load_combine() - nfl combine results 64 | - load_depth_charts() - depth charts 65 | - load_trades() - trades 66 | - load_ff_playerids() - ffverse/dynastyprocess player ids 67 | - load_ff_rankings() - fantasypros rankings 68 | - load_ff_opportunity() - expected yards, touchdowns, and fantasy points 69 | - clear_cache() - Clear cached data 70 | - get_current_season() - Get current NFL season 71 | - get_current_week() - Get current NFL week 72 | 73 | ## Feature comparisons 74 | 75 | - [feature comparison with nflreadr](https://github.com/nflverse/nflreadpy/issues/2) 76 | - [feature comparison with nfl_data_py](https://github.com/nflverse/nflreadpy/issues/6) 77 | 78 | ## Acknowledgements 79 | 80 | Thanks to @mrcaseb, @guga31bb, @guidopetri, and @akeaswaran for reviewing the 81 | code in this release, and to @alecglen and @cooperdff for their stewardship of 82 | the nfl_data_py package. 83 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["uv_build>=0.8.15,<0.9.0"] 3 | build-backend = "uv_build" 4 | 5 | [project] 6 | name = "nflreadpy" 7 | version = "0.1.6" 8 | description = "A Python package for downloading NFL data from nflverse repositories" 9 | readme = "README.md" 10 | license = "MIT" 11 | requires-python = ">=3.10" 12 | authors = [ 13 | {name = "Tan Ho", email = "nflverse@tanho.ca"}, 14 | ] 15 | keywords = ["nfl", "football", "sports", "data", "analytics"] 16 | classifiers = [ 17 | "Development Status :: 4 - Beta", 18 | "Intended Audience :: Science/Research", 19 | "License :: OSI Approved :: MIT License", 20 | "Programming Language :: Python :: 3", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | "Programming Language :: Python :: 3.13", 25 | "Topic :: Scientific/Engineering :: Information Analysis", 26 | "Topic :: Software Development :: Libraries :: Python Modules", 27 | ] 28 | dependencies = [ 29 | "requests>=2.28.0", 30 | "polars>=0.20.0", 31 | "platformdirs>=3.0.0", 32 | "tqdm>=4.64.0", 33 | "pydantic>=2.0.0", 34 | "pydantic-settings>=2.0.0", 35 | ] 36 | 37 | [project.optional-dependencies] 38 | pandas = [ 39 | "pandas>=1.5.0", 40 | ] 41 | [dependency-groups] 42 | dev = [ 43 | "pytest>=7.0.0", 44 | "pytest-cov>=4.0.0", 45 | "ruff>=0.1.0", 46 | "mypy>=1.5.0", 47 | "pre-commit>=3.0.0", 48 | "types-requests>=2.32.4.20250809", 49 | "types-tqdm>=4.67.0.20250809", 50 | "mkdocs>=1.6.1", 51 | "mkdocs-material>=9.0.0", 52 | "mkdocstrings[python]>=0.24.0" 53 | ] 54 | 55 | [project.urls] 56 | Homepage = "https://github.com/nflverse/nflreadpy" 57 | Repository = "https://github.com/nflverse/nflreadpy" 58 | Documentation = "https://nflreadpy.nflverse.com" 59 | Issues = "https://github.com/nflverse/nflreadpy/issues" 60 | 61 | [tool.ruff] 62 | target-version = "py310" 63 | line-length = 88 64 | 65 | [tool.ruff.lint] 66 | select = [ 67 | "E", # pycodestyle errors 68 | "W", # pycodestyle warnings 69 | "F", # pyflakes 70 | "I", # isort 71 | "B", # flake8-bugbear 72 | "C4", # flake8-comprehensions 73 | "UP", # pyupgrade 74 | ] 75 | ignore = [ 76 | "E501", # line too long, handled by formatter 77 | "B008", # do not perform function calls in argument defaults 78 | ] 79 | 80 | [tool.ruff.lint.per-file-ignores] 81 | "__init__.py" = ["F401"] 82 | 83 | [tool.ruff.format] 84 | quote-style = "double" 85 | indent-style = "space" 86 | skip-magic-trailing-comma = false 87 | line-ending = "auto" 88 | 89 | [tool.mypy] 90 | python_version = "3.10" 91 | warn_return_any = true 92 | warn_unused_configs = true 93 | disallow_untyped_defs = true 94 | disallow_incomplete_defs = true 95 | check_untyped_defs = true 96 | disallow_untyped_decorators = true 97 | no_implicit_optional = true 98 | warn_redundant_casts = true 99 | warn_unused_ignores = true 100 | warn_no_return = true 101 | warn_unreachable = true 102 | strict_equality = true 103 | 104 | [tool.pytest.ini_options] 105 | minversion = "7.0" 106 | addopts = "-ra -q --strict-markers --strict-config" 107 | testpaths = ["tests"] 108 | python_files = ["test_*.py"] 109 | python_classes = ["Test*"] 110 | python_functions = ["test_*"] 111 | 112 | [tool.coverage.run] 113 | source = ["src"] 114 | branch = true 115 | 116 | [tool.coverage.report] 117 | precision = 2 118 | show_missing = true 119 | skip_covered = false 120 | exclude_lines = [ 121 | "pragma: no cover", 122 | "def __repr__", 123 | "if self.debug:", 124 | "if settings.DEBUG", 125 | "raise AssertionError", 126 | "raise NotImplementedError", 127 | "if 0:", 128 | "if __name__ == .__main__.:", 129 | ] 130 | -------------------------------------------------------------------------------- /src/nflreadpy/load_stats.py: -------------------------------------------------------------------------------- 1 | """Load NFL player and team statistics.""" 2 | 3 | from typing import Literal 4 | 5 | import polars as pl 6 | 7 | from .downloader import get_downloader 8 | from .utils_date import get_current_season 9 | 10 | 11 | def _load_stats( 12 | stat_type: str, 13 | seasons: int | list[int] | bool | None = None, 14 | summary_level: Literal["week", "reg", "post", "reg+post"] = "week", 15 | ) -> pl.DataFrame: 16 | """ 17 | Internal function to load NFL statistics. 18 | 19 | Args: 20 | stat_type: Type of stats ("player" or "team"). 21 | seasons: Season(s) to load. If None, loads current season. 22 | If True, loads all available data. 23 | If int or list of ints, loads specified season(s). 24 | summary_level: Summary level ("week", "reg", "post", "reg+post"). 25 | 26 | Returns: 27 | Polars DataFrame with statistics. 28 | """ 29 | if seasons is None: 30 | seasons = [get_current_season()] 31 | elif seasons is True: 32 | # Load all available seasons 33 | current_season = get_current_season() 34 | seasons = list(range(1999, current_season + 1)) 35 | elif isinstance(seasons, int): 36 | seasons = [seasons] 37 | 38 | if summary_level not in ["week", "reg", "post", "reg+post"]: 39 | raise ValueError("summary_level must be 'week', 'reg', 'post', or 'reg+post'") 40 | 41 | if stat_type not in ["player", "team"]: 42 | raise ValueError("stat_type must be 'player' or 'team'") 43 | 44 | # Convert summary level for URL path 45 | level_str = summary_level.replace("+", "") # "reg+post" becomes "regpost" 46 | 47 | downloader = get_downloader() 48 | dataframes = [] 49 | 50 | for season in seasons: 51 | path = f"stats_{stat_type}/stats_{stat_type}_{level_str}_{season}" 52 | df = downloader.download( 53 | "nflverse-data", 54 | path, 55 | season=season, 56 | summary_level=summary_level, 57 | stat_type=stat_type, 58 | ) 59 | dataframes.append(df) 60 | 61 | if len(dataframes) == 1: 62 | return dataframes[0] 63 | else: 64 | return pl.concat(dataframes, how="diagonal_relaxed") 65 | 66 | 67 | def load_player_stats( 68 | seasons: int | list[int] | bool | None = None, 69 | summary_level: Literal["week", "reg", "post", "reg+post"] = "week", 70 | ) -> pl.DataFrame: 71 | """ 72 | Load NFL player statistics. 73 | 74 | Args: 75 | seasons: Season(s) to load. If None, loads current season. 76 | If True, loads all available data. 77 | If int or list of ints, loads specified season(s). 78 | summary_level: Summary level ("week", "reg", "post", "reg+post"). 79 | 80 | Returns: 81 | Polars DataFrame with player statistics. 82 | 83 | See Also: 84 | <https://nflreadr.nflverse.com/reference/load_player_stats.html> 85 | 86 | Data Dictionary: 87 | <https://nflreadr.nflverse.com/articles/dictionary_player_stats.html> 88 | """ 89 | return _load_stats("player", seasons, summary_level) 90 | 91 | 92 | def load_team_stats( 93 | seasons: int | list[int] | bool | None = None, 94 | summary_level: Literal["week", "reg", "post", "reg+post"] = "week", 95 | ) -> pl.DataFrame: 96 | """ 97 | Load NFL team statistics. 98 | 99 | Args: 100 | seasons: Season(s) to load. If None, loads current season. 101 | If True, loads all available data. 102 | If int or list of ints, loads specified season(s). 103 | summary_level: Summary level ("week", "reg", "post", "reg+post"). 104 | 105 | Returns: 106 | Polars DataFrame with team statistics. 107 | 108 | See Also: 109 | <https://nflreadr.nflverse.com/reference/load_team_stats.html> 110 | 111 | Data Dictionary: 112 | <https://nflreadr.nflverse.com/articles/dictionary_team_stats.html> 113 | """ 114 | return _load_stats("team", seasons, summary_level) 115 | -------------------------------------------------------------------------------- /src/nflreadpy/utils_date.py: -------------------------------------------------------------------------------- 1 | """Date utility functions for nflreadpy.""" 2 | 3 | from datetime import date 4 | 5 | import polars as pl 6 | 7 | 8 | def get_current_season(roster: bool = False) -> int: 9 | """ 10 | Get the current NFL season year. 11 | 12 | Args: 13 | roster: 14 | - If True, uses roster year logic (current year after March 15). 15 | - If False, uses season logic (current year after Thursday following Labor Day). 16 | 17 | Returns: 18 | The current season/roster year. 19 | 20 | See Also: 21 | <https://nflreadr.nflverse.com/reference/get_current_season.html> 22 | """ 23 | if not isinstance(roster, bool): 24 | raise TypeError("argument `roster` must be boolean") 25 | 26 | today = date.today() 27 | current_year = today.year 28 | 29 | if roster: 30 | # Roster logic: current year after March 15, otherwise previous year 31 | march_15 = date(current_year, 3, 15) 32 | return current_year if today >= march_15 else current_year - 1 33 | else: 34 | # Season logic: current year after Thursday following Labor Day 35 | # Labor Day is first Monday in September 36 | # Find first Monday in September 37 | for day in range(1, 8): 38 | if date(current_year, 9, day).weekday() == 0: # Monday 39 | labor_day = date(current_year, 9, day) 40 | break 41 | 42 | # Thursday following Labor Day 43 | season_start = date(labor_day.year, labor_day.month, labor_day.day + 3) 44 | return current_year if today >= season_start else current_year - 1 45 | 46 | 47 | def get_current_week(use_date: bool = False, **kwargs) -> int: 48 | """ 49 | Get the current NFL week (rough approximation). 50 | 51 | Args: 52 | use_date: 53 | - If `True`, calculates week as the number of weeks since Thursday following Labor Day. 54 | - If `False`, loads schedules via `load_schedules(seasons = get_current_season(**kwargs))` and returns week of the next game. 55 | **kwargs: 56 | Arguments passed on to `get_current_season()` 57 | 58 | Returns: 59 | The current NFL week (1-22). 60 | 61 | See Also: 62 | <https://nflreadr.nflverse.com/reference/get_current_week.html> 63 | """ 64 | if not isinstance(use_date, bool): 65 | raise TypeError("argument `use_date` must be boolean") 66 | 67 | from .load_schedules import load_schedules 68 | 69 | if use_date: 70 | today = date.today() 71 | season_year = get_current_season(**kwargs) 72 | 73 | # NFL season typically starts around first Thursday of September 74 | # Find first Thursday in September 75 | for day in range(1, 8): 76 | if date(season_year, 9, day).weekday() == 3: # Thursday 77 | season_start = date(season_year, 9, day) 78 | break 79 | 80 | if today < season_start: 81 | return 1 82 | 83 | # Calculate weeks since season start 84 | days_since_start = (today - season_start).days 85 | week = min(days_since_start // 7 + 1, 22) # Cap at week 22 86 | 87 | return int(week) 88 | else: 89 | sched = load_schedules(seasons=get_current_season(**kwargs)) 90 | count_na_weeks = sched.select("result").null_count().item() 91 | if count_na_weeks == 0: 92 | # no NA values in result, return max(week) 93 | return sched.select("week").drop_nulls().max().item() 94 | else: 95 | # there are NA values in result. Filter table to NA results only, 96 | # and return min(week) 97 | return ( 98 | sched.filter(pl.col("result").is_null()) 99 | .select("week") 100 | .drop_nulls() 101 | .min() 102 | .item() 103 | ) 104 | 105 | 106 | def most_recent_season(roster: bool = False) -> int: 107 | """ 108 | Alias for get_current_season for compatibility with nflreadr. 109 | 110 | Args: 111 | roster: If True, uses roster year logic. 112 | 113 | Returns: 114 | The most recent season/roster year. 115 | """ 116 | return get_current_season(roster=roster) 117 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # nflreadpy <a href='https://nflreadpy.nflverse.com'><img src='assets/nflverse.png' align="right" width="25%" min-width="120px" /></a> 2 | <!-- badges: start --> 3 | [![PyPI status](https://img.shields.io/pypi/v/nflreadpy?style=flat-square&logo=python&label=pypi)](https://pypi.org/project/nflreadpy/) 4 | [![Dev status](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fgithub.com%2Fnflverse%2Fnflreadpy%2Fraw%2Fmain%2Fpyproject.toml&query=%24.project.version&prefix=v&style=flat-square&label=dev%20version 5 | )](https://nflreadpy.nflverse.com/) 6 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg?style=flat-square)](https://lifecycle.r-lib.org/articles/stages.html) 7 | [![CI test status](https://img.shields.io/github/actions/workflow/status/nflverse/nflreadpy/ci-test.yaml?label=CI%20tests&style=flat-square&logo=github)](https://github.com/nflverse/nflreadpy/actions) 8 | [![nflverse discord](https://img.shields.io/discord/789805604076126219?color=7289da&label=nflverse%20discord&logo=discord&logoColor=fff&style=flat-square)](https://discord.com/invite/5Er2FBnnQa) 9 | <!-- badges: end --> 10 | 11 | A Python package for downloading NFL data from nflverse repositories. This is a 12 | Python port of the popular R package [nflreadr](https://github.com/nflverse/nflreadr), 13 | designed to provide easy access to NFL data with caching, progress tracking, and 14 | modern Python conventions. 15 | 16 | ## Features 17 | 18 | - Compatible API with nflreadr R package 19 | - Fast data loading with Polars DataFrames 20 | - Intelligent caching (memory or filesystem) 21 | - Progress tracking for large downloads 22 | 23 | ## Install 24 | 25 | ```bash 26 | # Using uv (recommended) 27 | uv add nflreadpy 28 | 29 | # Using pip 30 | pip install nflreadpy 31 | ``` 32 | 33 | ## Usage 34 | 35 | ```python 36 | import nflreadpy as nfl 37 | 38 | # Load current season play-by-play data 39 | pbp = nfl.load_pbp() 40 | 41 | # Load player game-level stats for multiple seasons 42 | player_stats = nfl.load_player_stats([2022, 2023]) 43 | 44 | # Load all available team level stats 45 | team_stats = nfl.load_team_stats(seasons=True) 46 | 47 | # nflreadpy uses Polars instead of pandas. Convert to pandas if needed: 48 | pbp_pandas = pbp.to_pandas() 49 | ``` 50 | 51 | ## Available Functions 52 | 53 | [Reference page for all load functions](api/load_functions.md) 54 | 55 | ## Configuration 56 | 57 | [Reference page for config options](api/configuration.md) 58 | 59 | ## Getting help 60 | 61 | The best places to get help on this package are: 62 | 63 | - the [nflverse discord](https://discord.com/invite/5Er2FBnnQa) (for 64 | both this package as well as anything NFL analytics related) 65 | - opening [an issue](https://github.com/nflverse/nflreadpy/issues/new/choose) 66 | 67 | ## Data Sources 68 | 69 | nflreadpy downloads data from the following nflverse repositories: 70 | 71 | - [nflverse-data](https://github.com/nflverse/nflverse-data) - Play-by-play, rosters, stats 72 | - [dynastyprocess](https://github.com/dynastyprocess/data) - fantasy football data 73 | - [ffopportunity](https://github.com/ffverse/ffopportunity) - expected yards and fantasy points 74 | 75 | See the automation status page [here](https://nflreadr.nflverse.com/articles/nflverse_data_schedule.html) 76 | for last update date/times for each release. 77 | 78 | ## License 79 | 80 | MIT License - see [LICENSE](LICENSE.md) file for details. 81 | 82 | The majority of all nflverse data available (ie all but the FTN data as of July 2025) 83 | is broadly licensed as CC-BY 4.0, and the FTN data is CC-BY-SA 4.0 (see nflreadr 84 | docs for each main data file). 85 | 86 | ## Development 87 | 88 | This project uses the following tooling: 89 | 90 | - uv for dependency management 91 | - ruff for linting and formatting 92 | - mypy for type checking 93 | - pytest for testing 94 | - mkdocs for website docs 95 | 96 | ```bash 97 | # Install development dependencies 98 | uv sync --dev 99 | 100 | # Run tests 101 | uv run pytest 102 | 103 | # Format code 104 | uv run ruff format 105 | 106 | # Type check 107 | uv run mypy src 108 | 109 | # Serve docs site locally 110 | uv run mkdocs serve 111 | 112 | ``` 113 | 114 | ## Disclaimer 115 | Most of the first version was written by Claude based on nflreadr, use at your 116 | own risk. 117 | 118 | ## Contributing 119 | 120 | Many hands make light work! Here are some ways you can contribute to 121 | this project: 122 | 123 | - You can [open an issue](https://github.com/nflverse/nflreadpy/issues/new/choose) if 124 | you'd like to request a feature or report a bug/error. 125 | 126 | - If you'd like to contribute code, please check out [the contribution guidelines](CONTRIBUTING.md). 127 | -------------------------------------------------------------------------------- /src/nflreadpy/load_pfr_advstats.py: -------------------------------------------------------------------------------- 1 | """Load Pro Football Reference advanced statistics.""" 2 | 3 | from typing import Literal 4 | 5 | import polars as pl 6 | 7 | from .downloader import get_downloader 8 | from .utils_date import get_current_season 9 | 10 | 11 | def load_pfr_advstats( 12 | seasons: int | list[int] | bool | None = None, 13 | stat_type: Literal["pass", "rush", "rec", "def"] = "pass", 14 | summary_level: Literal["week", "season"] = "week", 15 | ) -> pl.DataFrame: 16 | """ 17 | Load Pro Football Reference advanced statistics. 18 | 19 | Args: 20 | seasons: Season(s) to load. If None, loads current season. 21 | If True, loads all available data (2018-current). 22 | If int or list of ints, loads specified season(s). 23 | Only used when summary_level="week". 24 | stat_type: Type of statistics to load: 25 | - "pass": Passing statistics 26 | - "rush": Rushing statistics 27 | - "rec": Receiving statistics 28 | - "def": Defensive statistics 29 | summary_level: Summary level: 30 | - "week": Weekly statistics by season 31 | - "season": Season-level statistics (all seasons combined) 32 | 33 | Returns: 34 | Polars DataFrame with Pro Football Reference advanced statistics. 35 | 36 | Note: 37 | Data is available from 2018 onwards. 38 | 39 | See Also: 40 | - [nflreadr docs](https://nflreadr.nflverse.com/reference/load_pfr_advstats.html) 41 | - [example of advanced passing season-level stats](https://www.pro-football-reference.com/years/2025/passing_advanced.htm) 42 | - [example of advanced passing week-level stats](https://www.pro-football-reference.com/boxscores/202509040phi.htm#all_passing_advanced) 43 | 44 | """ 45 | # Validate stat_type 46 | if stat_type not in ["pass", "rush", "rec", "def"]: 47 | raise ValueError("stat_type must be 'pass', 'rush', 'rec', or 'def'") 48 | 49 | # Validate summary_level 50 | if summary_level not in ["week", "season"]: 51 | raise ValueError("summary_level must be 'week' or 'season'") 52 | 53 | # Handle seasons parameter 54 | if seasons is None: 55 | seasons = [get_current_season()] 56 | elif seasons is True: 57 | # Load all available seasons (2018-current) 58 | current_season = get_current_season() 59 | seasons = list(range(2018, current_season + 1)) 60 | elif isinstance(seasons, int): 61 | seasons = [seasons] 62 | 63 | # Validate seasons 64 | current_season = get_current_season() 65 | for season in seasons: 66 | if not isinstance(season, int) or season < 2018 or season > current_season: 67 | raise ValueError(f"Season must be between 2018 and {current_season}") 68 | 69 | if summary_level == "season": 70 | return _load_pfr_advstats_season(seasons, stat_type) 71 | else: 72 | return _load_pfr_advstats_week(seasons, stat_type) 73 | 74 | 75 | def _load_pfr_advstats_week( 76 | seasons: list[int], 77 | stat_type: Literal["pass", "rush", "rec", "def"], 78 | ) -> pl.DataFrame: 79 | """ 80 | Load weekly Pro Football Reference advanced statistics. 81 | 82 | Args: 83 | seasons: List of seasons to load. 84 | stat_type: Type of statistics to load. 85 | 86 | Returns: 87 | Polars DataFrame with weekly advanced statistics. 88 | """ 89 | downloader = get_downloader() 90 | dataframes = [] 91 | 92 | for season in seasons: 93 | path = f"pfr_advstats/advstats_week_{stat_type}_{season}" 94 | df = downloader.download( 95 | "nflverse-data", 96 | path, 97 | season=season, 98 | stat_type=stat_type, 99 | summary_level="week", 100 | ) 101 | dataframes.append(df) 102 | 103 | if len(dataframes) == 1: 104 | return dataframes[0] 105 | else: 106 | return pl.concat(dataframes, how="diagonal_relaxed") 107 | 108 | 109 | def _load_pfr_advstats_season( 110 | seasons: list[int], 111 | stat_type: Literal["pass", "rush", "rec", "def"], 112 | ) -> pl.DataFrame: 113 | """ 114 | Load season-level Pro Football Reference advanced statistics. 115 | 116 | Args: 117 | stat_type: Type of statistics to load. 118 | 119 | Returns: 120 | Polars DataFrame with season-level advanced statistics. 121 | """ 122 | downloader = get_downloader() 123 | path = f"pfr_advstats/advstats_season_{stat_type}" 124 | df = downloader.download( 125 | repository="nflverse-data", 126 | path=path, 127 | stat_type=stat_type, 128 | summary_level="season", 129 | ) 130 | # Filter the dataframe by season 131 | df = df.filter(pl.col("season").is_in(seasons)) 132 | return df 133 | -------------------------------------------------------------------------------- /src/nflreadpy/downloader.py: -------------------------------------------------------------------------------- 1 | """Data downloading functionality for nflreadpy.""" 2 | 3 | from typing import Any 4 | from urllib.parse import urljoin 5 | 6 | import polars as pl 7 | import requests 8 | from tqdm import tqdm 9 | 10 | from .cache import get_cache_manager 11 | from .config import DataFormat, get_config 12 | 13 | 14 | class NflverseDownloader: 15 | """Downloads data from nflverse repositories.""" 16 | 17 | BASE_URLS = { 18 | "nflverse-data": "https://github.com/nflverse/nflverse-data/releases/download/", 19 | "espnscraper": "https://github.com/nflverse/espnscrapeR-data/raw/master/data/", 20 | "dynastyprocess": "https://github.com/dynastyprocess/data/raw/master/files/", 21 | "ffopportunity": "https://github.com/ffverse/ffopportunity/releases/download/", 22 | } 23 | 24 | def __init__(self) -> None: 25 | self.session = requests.Session() 26 | self.cache = get_cache_manager() 27 | 28 | def _get_headers(self) -> dict[str, str]: 29 | """Get HTTP headers for requests.""" 30 | config = get_config() 31 | return { 32 | "User-Agent": config.user_agent, 33 | "Accept": "application/octet-stream, text/csv, */*", 34 | } 35 | 36 | def _build_url(self, repository: str, path: str, format_type: DataFormat) -> str: 37 | """Build the full URL for a data file.""" 38 | if repository not in self.BASE_URLS: 39 | raise ValueError(f"Unknown repository: {repository}") 40 | 41 | base_url = self.BASE_URLS[repository] 42 | 43 | # Add format extension if not present 44 | if not path.endswith((".parquet", ".csv")): 45 | ext = ".parquet" if format_type == DataFormat.PARQUET else ".csv" 46 | path = f"{path}{ext}" 47 | 48 | return urljoin(base_url, path) 49 | 50 | def _download_file(self, url: str, **kwargs: Any) -> pl.DataFrame: 51 | """Download and parse a data file.""" 52 | config = get_config() 53 | 54 | # Check cache first 55 | cached_data = self.cache.get(url, **kwargs) 56 | if cached_data is not None: 57 | return cached_data 58 | 59 | # Configure session 60 | self.session.headers.update(self._get_headers()) 61 | 62 | try: 63 | if config.verbose: 64 | print(f"Downloading {url}") 65 | 66 | response = self.session.get(url, timeout=config.timeout, stream=True) 67 | response.raise_for_status() 68 | 69 | # Get content length for progress bar 70 | total_size = int(response.headers.get("content-length", 0)) 71 | 72 | # Download with progress bar if verbose 73 | content = b"" 74 | if config.verbose and total_size > 0: 75 | with tqdm( 76 | total=total_size, unit="B", unit_scale=True, desc="Downloading" 77 | ) as pbar: 78 | for chunk in response.iter_content(chunk_size=8192): 79 | if chunk: 80 | content += chunk 81 | pbar.update(len(chunk)) 82 | else: 83 | content = response.content 84 | 85 | # Parse data based on URL extension 86 | if url.endswith(".parquet"): 87 | data = pl.read_parquet(content) 88 | elif url.endswith(".csv"): 89 | data = pl.read_csv(content, null_values=["NA", "NULL", ""]) 90 | 91 | # Cache the result 92 | self.cache.set(url, data, **kwargs) 93 | 94 | return data 95 | 96 | except requests.exceptions.RequestException as e: 97 | raise ConnectionError(f"Failed to download {url}: {e}") from e 98 | except Exception as e: 99 | raise ValueError(f"Failed to parse data from {url}: {e}") from e 100 | 101 | def download( 102 | self, 103 | repository: str, 104 | path: str, 105 | format: DataFormat = DataFormat.PARQUET, 106 | **kwargs: Any, 107 | ) -> pl.DataFrame: 108 | """ 109 | Download data from an nflverse repository. 110 | 111 | Args: 112 | repository: The repository name (e.g., 'nflverse-data') 113 | path: The path to the data file within the repository 114 | format: Data format (parquet or csv) 115 | **kwargs: Additional parameters for caching 116 | 117 | Returns: 118 | Polars DataFrame with the requested data 119 | """ 120 | url = self._build_url(repository, path, format) 121 | return self._download_file(url, **kwargs) 122 | 123 | 124 | # Global downloader instance 125 | _downloader = NflverseDownloader() 126 | 127 | 128 | def get_downloader() -> NflverseDownloader: 129 | """Get the global downloader instance.""" 130 | return _downloader 131 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md - Development Guidelines 2 | 3 | This file contains guidelines and patterns for maintaining and extending the nflreadpy package. 4 | 5 | ## Project Overview 6 | 7 | nflreadpy is a Python port of the R package nflreadr, providing access to NFL data from nflverse repositories. The package uses modern Python conventions with Polars DataFrames, intelligent caching, and comprehensive type hints. 8 | 9 | ## Key Architecture Decisions 10 | 11 | ### Core Technologies 12 | - **Python 3.10+** (not 3.9 - it's nearly end of life) 13 | - **Polars** as the default DataFrame library (not pandas - much faster for large datasets) 14 | - **uv** for package management and build system (not pip/setuptools) 15 | - **Ruff** for both linting and formatting (not Black + separate linter) 16 | 17 | ### Package Structure 18 | - Modular design with separate `load_*.py` files matching nflreadr's structure 19 | - Separated utility modules (`utils_date.py`, not monolithic `utils.py`) 20 | - Source layout: `src/nflreadpy/` (not flat package structure) 21 | 22 | ## Development Patterns 23 | 24 | ### Load Functions 25 | When adding new load functions, follow this pattern: 26 | 27 | 1. **File Structure**: Create `src/nflreadpy/load_[function_name].py` 28 | 2. **Import Pattern**: 29 | ```python 30 | from .utils_date import get_current_season # NOT from .utils 31 | ``` 32 | 3. **Season Logic**: 33 | - Use `get_current_season()` for game data 34 | - Use `get_current_season(roster=True)` for roster/depth chart data 35 | 4. **URL Structure**: Check the actual nflreadr R files for correct paths 36 | 5. **Format Preference**: Default to Parquet, explicitly use CSV only where needed 37 | 38 | ### Data Format Preferences 39 | - **Default**: Parquet (fastest, most efficient) 40 | - **Fallback**: CSV (when Parquet unavailable) 41 | - **Never**: RDS (R-specific format, not readable by Python) 42 | 43 | ### Season Validation 44 | Always validate season ranges with appropriate minimum years: 45 | ```python 46 | # Validate seasons 47 | current_season = get_current_season() 48 | for season in seasons: 49 | if not isinstance(season, int) or season < MIN_YEAR or season > current_season: 50 | raise ValueError(f"Season must be between {MIN_YEAR} and {current_season}") 51 | ``` 52 | 53 | ### Testing 54 | - Update `tests/test_integration.py` when adding new functions 55 | - Add import tests for all new load functions 56 | - Update the `expected_exports` list in `test_all_exports()` 57 | 58 | ## Common Mistakes to Avoid 59 | 60 | 1. **Import Paths**: Use `from .utils_date import get_current_season`, not `from .utils import` 61 | 2. **Data Formats**: Don't default to CSV globally - only use it where Parquet isn't available 62 | 3. **R Dependencies**: Don't try to read RDS files - they're R-specific format 63 | 64 | ## URL Pattern Examples 65 | 66 | Common nflverse data URL patterns: 67 | - **Seasonal data**: `{repo}/releases/download/{category}/{name}_{season}.{format}` 68 | - **Static data**: `{repo}/releases/download/{category}/{name}.{format}` 69 | - **CSV files**: Use `format_preference=DataFormat.CSV` for known CSV-only sources 70 | 71 | ## Repository Structure 72 | 73 | ``` 74 | nflreadpy/ 75 | ├── src/nflreadpy/ 76 | │ ├── __init__.py # Main exports 77 | │ ├── config.py # Configuration management 78 | │ ├── cache.py # Caching system 79 | │ ├── downloader.py # HTTP client and data fetching 80 | │ ├── utils_date.py # Date utilities (separated) 81 | │ └── load_*.py # Individual load functions 82 | ├── tests/ # Test suite 83 | ├── pyproject.toml # uv + modern Python packaging 84 | └── README.md # User documentation 85 | ``` 86 | 87 | ## Development Commands 88 | 89 | ```bash 90 | # Install dependencies 91 | uv sync --dev 92 | 93 | # Format code 94 | uv run ruff format 95 | 96 | # Lint code 97 | uv run ruff check --fix 98 | 99 | # Type check 100 | uv run mypy src 101 | 102 | # Run tests 103 | uv run pytest 104 | 105 | # Serve docs site for local devel 106 | uv run mkdocs serve 107 | 108 | # Build docs site 109 | uv run mkdocs build 110 | 111 | # Build package 112 | uv build 113 | ``` 114 | 115 | ## When Adding New Load Functions 116 | 117 | 1. **Research**: Check the corresponding R file in nflreadr for URL patterns 118 | 2. **Validate**: Ensure minimum season years are correct 119 | 3. **Test**: Add to integration tests 120 | 4. **Export**: Add to `__init__.py` imports and `__all__` 121 | 5. **Document**: Update README.md if it's a major function 122 | 123 | ## Performance Considerations 124 | 125 | - Polars is much faster than pandas for large NFL datasets 126 | - Use filesystem caching by default (configurable via environment) 127 | - Implement progress bars for large downloads 128 | - Batch multiple seasons efficiently with `pl.concat()` 129 | 130 | This package aims to provide a modern, fast, and maintainable Python interface to NFL data while preserving API compatibility with the original nflreadr R package. 131 | -------------------------------------------------------------------------------- /src/nflreadpy/load_ffverse.py: -------------------------------------------------------------------------------- 1 | """Load fantasy football data from ffverse.""" 2 | 3 | from typing import Literal 4 | 5 | import polars as pl 6 | 7 | from .config import DataFormat 8 | from .downloader import get_downloader 9 | from .utils_date import get_current_season 10 | 11 | 12 | def load_ff_playerids() -> pl.DataFrame: 13 | """ 14 | Load fantasy football player IDs from DynastyProcess.com database. 15 | 16 | Returns: 17 | Polars DataFrame with comprehensive player ID mappings across platforms. 18 | 19 | Note: 20 | This function loads data from an R data file (.rds). While Python cannot 21 | directly read RDS files, we attempt to use CSV format if available. 22 | 23 | See Also: 24 | <https://nflreadr.nflverse.com/reference/load_ff_playerids.html> 25 | """ 26 | downloader = get_downloader() 27 | 28 | df = downloader.download("dynastyprocess", "db_playerids", format=DataFormat.CSV) 29 | 30 | return df 31 | 32 | 33 | def load_ff_rankings(type: Literal["draft", "week", "all"] = "draft") -> pl.DataFrame: 34 | """ 35 | Load fantasy football rankings and projections. 36 | 37 | Args: 38 | type: Type of rankings to load: 39 | - "draft": Draft rankings/projections 40 | - "week": Weekly rankings/projections 41 | - "all": All historical rankings/projections 42 | 43 | Returns: 44 | Polars DataFrame with fantasy football rankings data. 45 | 46 | See Also: 47 | <https://nflreadr.nflverse.com/reference/load_ff_rankings.html> 48 | """ 49 | downloader = get_downloader() 50 | 51 | # Map ranking types to file names 52 | file_mapping = { 53 | "draft": "db_fpecr_latest", 54 | "week": "fp_latest_weekly", 55 | "all": "db_fpecr", 56 | } 57 | 58 | if type not in file_mapping: 59 | raise ValueError(f"Invalid type '{type}'. Must be one of: draft, week, all") 60 | 61 | filename = file_mapping[type] 62 | 63 | if type == "all": 64 | df = downloader.download("dynastyprocess", filename) 65 | else: 66 | df = downloader.download("dynastyprocess", filename, format=DataFormat.CSV) 67 | 68 | return df 69 | 70 | 71 | def load_ff_opportunity( 72 | seasons: int | list[int] | None = None, 73 | stat_type: Literal["weekly", "pbp_pass", "pbp_rush"] = "weekly", 74 | model_version: Literal["latest", "v1.0.0"] = "latest", 75 | ) -> pl.DataFrame: 76 | """ 77 | Load fantasy football opportunity data. 78 | 79 | This function loads opportunity and target share data for fantasy football 80 | analysis from the ffverse/ffopportunity repository. 81 | 82 | Args: 83 | seasons: Season(s) to load. If None (default), loads current season. 84 | If int or list of ints, loads specified season(s). True loads all seasons. 85 | stat_type: Type of stats to load: 86 | - "weekly": Weekly opportunity data 87 | - "pbp_pass": Play-by-play passing data 88 | - "pbp_rush": Play-by-play rushing data 89 | model_version: Model version to load: 90 | - "latest": Most recent model version 91 | - "v1.0.0": Specific model version 92 | 93 | Returns: 94 | Polars DataFrame with fantasy football opportunity data. 95 | 96 | Raises: 97 | ValueError: If season is outside valid range or invalid parameters provided. 98 | 99 | See Also: 100 | <https://nflreadr.nflverse.com/reference/load_ff_opportunity.html> 101 | """ 102 | downloader = get_downloader() 103 | 104 | # Validate parameters 105 | valid_stat_types = ["weekly", "pbp_pass", "pbp_rush"] 106 | if stat_type not in valid_stat_types: 107 | raise ValueError( 108 | f"Invalid stat_type '{stat_type}'. Must be one of: {valid_stat_types}" 109 | ) 110 | 111 | valid_versions = ["latest", "v1.0.0"] 112 | if model_version not in valid_versions: 113 | raise ValueError( 114 | f"Invalid model_version '{model_version}'. Must be one of: {valid_versions}" 115 | ) 116 | 117 | min_year = 2006 118 | current_season = get_current_season() 119 | # Handle seasons parameter 120 | if seasons is None: 121 | seasons = [current_season] 122 | elif seasons is True: 123 | # Load all available seasons (min_year to current) 124 | current_season = get_current_season() 125 | seasons = list(range(min_year, current_season + 1)) 126 | elif isinstance(seasons, int): 127 | seasons = [seasons] 128 | 129 | # Validate season range 130 | for season in seasons: 131 | if not isinstance(season, int) or season < min_year or season > current_season: 132 | raise ValueError(f"Season must be between {min_year} and {current_season}") 133 | 134 | # Load data for each season 135 | dataframes = [] 136 | for season in seasons: 137 | # Build the release tag and filename based on the R implementation 138 | release_tag = f"{model_version}-data" 139 | filename = f"ep_{stat_type}_{season}" 140 | 141 | # Build the path for the ffopportunity repository 142 | path = f"{release_tag}/{filename}" 143 | 144 | df = downloader.download("ffopportunity", path) 145 | 146 | dataframes.append(df) 147 | 148 | # Combine all seasons 149 | if len(dataframes) == 1: 150 | return dataframes[0] 151 | else: 152 | return pl.concat(dataframes, how="diagonal_relaxed") 153 | -------------------------------------------------------------------------------- /src/nflreadpy/config.py: -------------------------------------------------------------------------------- 1 | """Configuration management for nflreadpy.""" 2 | 3 | from enum import Enum 4 | from importlib.metadata import version 5 | from pathlib import Path 6 | from typing import Any 7 | 8 | from platformdirs import user_cache_dir 9 | from pydantic import Field 10 | from pydantic_settings import BaseSettings, SettingsConfigDict 11 | 12 | 13 | class CacheMode(str, Enum): 14 | """Cache modes for data storage. 15 | 16 | Attributes: 17 | MEMORY: Cache data in memory (faster, but cleared on restart) 18 | FILESYSTEM: Cache data to disk (persistent across restarts) 19 | OFF: Disable caching entirely 20 | """ 21 | 22 | MEMORY = "memory" 23 | FILESYSTEM = "filesystem" 24 | OFF = "off" 25 | 26 | 27 | class DataFormat(str, Enum): 28 | """Preferred data format for downloads. 29 | 30 | Attributes: 31 | PARQUET: Apache Parquet format (recommended - faster and more efficient) 32 | CSV: Comma-separated values format (universal compatibility) 33 | """ 34 | 35 | PARQUET = "parquet" 36 | CSV = "csv" 37 | 38 | 39 | class NflreadpyConfig(BaseSettings): 40 | """Configuration settings for nflreadpy. 41 | 42 | This class manages all configuration options for the nflreadpy package. 43 | Settings can be configured via environment variables or programmatically. 44 | 45 | Environment Variables: 46 | - NFLREADPY_CACHE: Cache mode ("memory", "filesystem", or "off") 47 | - NFLREADPY_CACHE_DIR: Directory path for filesystem cache 48 | - NFLREADPY_CACHE_DURATION: Cache duration in seconds 49 | - NFLREADPY_VERBOSE: Enable verbose output (true/false) 50 | - NFLREADPY_TIMEOUT: HTTP request timeout in seconds 51 | - NFLREADPY_USER_AGENT: Custom user agent string 52 | 53 | Example: 54 | ```python 55 | from nflreadpy.config import update_config, get_config 56 | 57 | # Update settings programmatically 58 | update_config(cache_mode="filesystem", verbose=False) 59 | 60 | # Get current settings 61 | config = get_config() 62 | print(f"Cache mode: {config.cache_mode}") 63 | ``` 64 | """ 65 | 66 | # Cache settings 67 | cache_mode: CacheMode = Field( 68 | default=CacheMode.MEMORY, 69 | description="Cache mode for storing downloaded data. 'memory' caches in RAM (fast but temporary), 'filesystem' saves to disk (persistent), 'off' disables caching.", 70 | alias="NFLREADPY_CACHE", 71 | ) 72 | 73 | cache_dir: Path = Field( 74 | default_factory=lambda: Path(user_cache_dir("nflreadpy")), 75 | description="Directory path for filesystem cache storage. Only used when cache_mode is 'filesystem'. Defaults to system cache directory.", 76 | alias="NFLREADPY_CACHE_DIR", 77 | ) 78 | 79 | cache_duration: int = Field( 80 | default=86400, 81 | description="How long to keep cached data before re-downloading, in seconds. Default is 86400 (24 hours). Set to 0 to always refresh.", 82 | alias="NFLREADPY_CACHE_DURATION", 83 | ) 84 | 85 | # Progress and logging 86 | verbose: bool = Field( 87 | default=False, 88 | description="Enable verbose output including progress bars and download status messages. Set to False for silent operation.", 89 | alias="NFLREADPY_VERBOSE", 90 | ) 91 | 92 | # Request settings 93 | timeout: int = Field( 94 | default=30, 95 | description="HTTP request timeout in seconds. How long to wait for server responses before giving up. Increase for slow connections.", 96 | alias="NFLREADPY_TIMEOUT", 97 | ) 98 | 99 | user_agent: str = Field( 100 | default=f"nflverse/nflreadpy {version('nflreadpy')}", 101 | description="User agent string sent with HTTP requests. Identifies the client to servers. Default includes package name and version.", 102 | alias="NFLREADPY_USER_AGENT", 103 | ) 104 | 105 | model_config = SettingsConfigDict( 106 | env_file=".env", 107 | case_sensitive=False, 108 | extra="ignore", 109 | ) 110 | 111 | 112 | # Global configuration instance 113 | config = NflreadpyConfig() 114 | 115 | 116 | def get_config() -> NflreadpyConfig: 117 | """Get the current configuration instance. 118 | 119 | Returns: 120 | The global configuration object containing all current settings. 121 | 122 | Example: 123 | ```python 124 | config = get_config() 125 | print(f"Cache directory: {config.cache_dir}") 126 | print(f"Verbose mode: {config.verbose}") 127 | ``` 128 | """ 129 | return config 130 | 131 | 132 | def update_config(**kwargs: Any) -> None: 133 | """Update configuration settings programmatically. 134 | 135 | Args: 136 | **kwargs: Configuration options to update. Valid options include: 137 | 138 | - cache_mode: "memory", "filesystem", or "off" 139 | - cache_dir: Path to cache directory (str or Path) 140 | - cache_duration: Cache duration in seconds (int) 141 | - verbose: Enable verbose output (bool) 142 | - timeout: HTTP timeout in seconds (int) 143 | - user_agent: Custom user agent string (str) 144 | 145 | Raises: 146 | ValueError: If an unknown configuration option is provided. 147 | 148 | Example: 149 | ```python 150 | # Enable filesystem caching with custom directory 151 | update_config( 152 | cache_mode="filesystem", 153 | cache_dir="/path/to/my/cache", 154 | verbose=True 155 | ) 156 | 157 | # Disable caching and increase timeout 158 | update_config( 159 | cache_mode="off" 160 | timeout=60 161 | ) 162 | ``` 163 | """ 164 | global config 165 | for key, value in kwargs.items(): 166 | if hasattr(config, key): 167 | setattr(config, key, value) 168 | else: 169 | raise ValueError(f"Unknown configuration option: {key}") 170 | 171 | 172 | def reset_config() -> None: 173 | """Reset all configuration settings to their default values. 174 | 175 | This will restore all settings to their initial state, clearing any 176 | programmatic or environment variable overrides. 177 | 178 | Example: 179 | ```python 180 | # Make some changes 181 | update_config(cache_mode="off", verbose=False) 182 | 183 | # Reset everything back to defaults 184 | reset_config() 185 | 186 | # Now cache_mode is "memory" and verbose is True again 187 | ``` 188 | """ 189 | global config 190 | config = NflreadpyConfig() 191 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nflreadpy <a href='https://nflreadpy.nflverse.com'><img src='docs/assets/nflverse.png' align="right" width="25%" min-width="120px" /></a> 2 | <!-- badges: start --> 3 | [![PyPI status](https://img.shields.io/pypi/v/nflreadpy?style=flat-square&logo=python&label=pypi)](https://pypi.org/project/nflreadpy/) 4 | [![Dev status](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fgithub.com%2Fnflverse%2Fnflreadpy%2Fraw%2Fmain%2Fpyproject.toml&query=%24.project.version&prefix=v&style=flat-square&label=dev%20version 5 | )](https://nflreadpy.nflverse.com/) 6 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg?style=flat-square)](https://lifecycle.r-lib.org/articles/stages.html) 7 | [![CI test status](https://img.shields.io/github/actions/workflow/status/nflverse/nflreadpy/ci-test.yaml?label=CI%20tests&style=flat-square&logo=github)](https://github.com/nflverse/nflreadpy/actions) 8 | [![nflverse discord](https://img.shields.io/discord/789805604076126219?color=7289da&label=nflverse%20discord&logo=discord&logoColor=fff&style=flat-square)](https://discord.com/invite/5Er2FBnnQa) 9 | 10 | <!-- badges: end --> 11 | 12 | A Python package for downloading NFL data from nflverse repositories. This is a 13 | Python port of the popular R package [nflreadr](https://github.com/nflverse/nflreadr), 14 | designed to provide easy access to NFL data with caching, progress tracking, and 15 | modern Python conventions. 16 | 17 | ## Features 18 | 19 | - Compatible API with nflreadr R package 20 | - Fast data loading with Polars DataFrames 21 | - Intelligent caching (memory or filesystem) 22 | - Progress tracking for large downloads 23 | 24 | ## Install 25 | 26 | Install the latest release from PyPI with: 27 | 28 | ```bash 29 | # Using uv (recommended) 30 | uv add nflreadpy 31 | 32 | # Using pip 33 | pip install nflreadpy 34 | ``` 35 | 36 | You can also install the latest development version from GitHub with: 37 | 38 | ```bash 39 | # Using uv (recommended) 40 | uv add nflreadpy@git+https://github.com/nflverse/nflreadpy 41 | 42 | # Using pip 43 | pip install nflreadpy@git+https://github.com/nflverse/nflreadpy 44 | ``` 45 | 46 | ## Usage 47 | 48 | ```python 49 | import nflreadpy as nfl 50 | 51 | # Load current season play-by-play data 52 | pbp = nfl.load_pbp() 53 | 54 | # Load player game-level stats for multiple seasons 55 | player_stats = nfl.load_player_stats([2022, 2023]) 56 | 57 | # Load all available team level stats 58 | team_stats = nfl.load_team_stats(seasons=True) 59 | 60 | # nflreadpy uses Polars instead of pandas. Convert to pandas if needed: 61 | pbp_pandas = pbp.to_pandas() 62 | ``` 63 | 64 | ## Available Functions 65 | 66 | ### Core Loading Functions 67 | 68 | - `load_pbp()` - play-by-play data 69 | - `load_player_stats()` - player game or season statistics 70 | - `load_team_stats()` - team game or season statistics 71 | - `load_schedules()` - game schedules and results 72 | - `load_players()` - player information 73 | - `load_rosters()` - team rosters 74 | - `load_rosters_weekly()` - team rosters by season-week 75 | - `load_snap_counts()` - snap counts 76 | - `load_nextgen_stats()` - advanced stats from nextgenstats.nfl.com 77 | - `load_ftn_charting()` - charted stats from ftnfantasy.com/data 78 | - `load_participation()` - participation data (historical) 79 | - `load_draft_picks()` - nfl draft picks 80 | - `load_injuries()` - injury statuses and practice participation 81 | - `load_contracts()` - historical contract data from OTC 82 | - `load_officials()` - officials for each game 83 | - `load_combine()` - nfl combine results 84 | - `load_depth_charts()` - depth charts 85 | - `load_trades()` - trades 86 | - `load_ff_playerids()` - ffverse/dynastyprocess player ids 87 | - `load_ff_rankings()` - fantasypros rankings 88 | - `load_ff_opportunity()` - expected yards, touchdowns, and fantasy points 89 | 90 | ### Utility Functions 91 | 92 | - `clear_cache()` - Clear cached data 93 | - `get_current_season()` - Get current NFL season 94 | - `get_current_week()` - Get current NFL week 95 | 96 | ## Configuration 97 | 98 | Configure nflreadpy using environment variables: 99 | 100 | ```bash 101 | export NFLREADPY_CACHE='memory' # Cache mode ("memory", "filesystem", or "off") 102 | export NFLREADPY_CACHE_DIR='~/my_cache_dir' # Directory path for filesystem cache 103 | export NFLREADPY_CACHE_DURATION=86400 # Cache duration in seconds 104 | 105 | export NFLREADPY_VERBOSE='False' # Enable verbose output (true/false) 106 | export NFLREADPY_TIMEOUT=30 # HTTP request timeout in seconds 107 | export NFLREADPY_USER_AGENT='nflreadpy/v0.1.1' # Custom user agent string 108 | ``` 109 | 110 | or configure programmatically: 111 | 112 | ```python 113 | from nflreadpy.config import update_config 114 | 115 | update_config( 116 | cache_mode="memory", 117 | cache_dir='~/my_cache_dir', 118 | cache_duration=86400, 119 | verbose=False, 120 | timeout=30, 121 | user_agent='nflreadpy/v0.1.1' 122 | ) 123 | ``` 124 | 125 | ## Getting help 126 | 127 | The best places to get help on this package are: 128 | 129 | - the [nflverse discord](https://discord.com/invite/5Er2FBnnQa) (for 130 | both this package as well as anything NFL analytics related) 131 | - opening [an issue](https://github.com/nflverse/nflreadpy/issues/new/choose) 132 | 133 | ## Data Sources 134 | 135 | nflreadpy downloads data from the following nflverse repositories: 136 | 137 | - [nflverse-data](https://github.com/nflverse/nflverse-data) - Play-by-play, rosters, stats 138 | - [dynastyprocess](https://github.com/dynastyprocess/data) - fantasy football data 139 | - [ffopportunity](https://github.com/ffverse/ffopportunity) - expected yards and fantasy points 140 | 141 | See the automation status page [here](https://nflreadr.nflverse.com/articles/nflverse_data_schedule.html) 142 | for last update date/times for each release. 143 | 144 | ## License 145 | 146 | MIT License - see [LICENSE](LICENSE) file for details. 147 | 148 | The majority of all nflverse data available (ie all but the FTN data as of July 2025) 149 | is broadly licensed as CC-BY 4.0, and the FTN data is CC-BY-SA 4.0 (see nflreadr 150 | docs for each main data file). 151 | 152 | ## Development 153 | 154 | This project uses the following tooling: 155 | 156 | - uv for dependency management 157 | - ruff for linting and formatting 158 | - mypy for type checking 159 | - pytest for testing 160 | - mkdocs for documentation site 161 | 162 | ```bash 163 | # Install development dependencies 164 | uv sync --dev 165 | 166 | # Run tests 167 | uv run pytest 168 | 169 | # Format code 170 | uv run ruff format 171 | 172 | # Type check 173 | uv run mypy src 174 | 175 | # Serve docs site locally 176 | uv run mkdocs serve 177 | 178 | # Build docs site 179 | uv run mkdocs build 180 | ``` 181 | 182 | ## Disclaimer 183 | Most of the first version was written by Claude based on nflreadr, use at your 184 | own risk. 185 | 186 | ## Contributing 187 | 188 | Many hands make light work! Here are some ways you can contribute to 189 | this project: 190 | 191 | - You can [open an issue](https://github.com/nflverse/nflreadpy/issues/new/choose) if 192 | you’d like to request a feature or report a bug/error. 193 | 194 | - If you’d like to contribute code, please check out [the contribution guidelines](CONTRIBUTING.md). 195 | -------------------------------------------------------------------------------- /src/nflreadpy/cache.py: -------------------------------------------------------------------------------- 1 | """Caching functionality for nflreadpy. 2 | 3 | This module provides intelligent caching capabilities for NFL data to improve performance 4 | and reduce network requests. It supports both memory and filesystem caching with 5 | configurable expiration and cache modes. 6 | 7 | The caching system is designed to be transparent to users - data functions automatically 8 | check the cache before downloading and store results after successful downloads. 9 | 10 | Key Features: 11 | - Memory caching for fast repeated access 12 | - Filesystem caching for persistence across sessions 13 | - Configurable cache duration and storage location 14 | - Automatic cache expiration and cleanup 15 | - Pattern-based cache clearing 16 | 17 | Examples: 18 | >>> import nflreadpy as nfl 19 | >>> 20 | >>> # Data is automatically cached 21 | >>> pbp = nfl.load_pbp([2023]) 22 | >>> 23 | >>> # Subsequent calls use cached data 24 | >>> pbp_again = nfl.load_pbp([2023]) # Much faster! 25 | >>> 26 | >>> # Clear specific cached data 27 | >>> nfl.clear_cache("pbp_2023") 28 | >>> 29 | >>> # Clear all cached data 30 | >>> nfl.clear_cache() 31 | """ 32 | 33 | import hashlib 34 | import time 35 | from pathlib import Path 36 | 37 | import polars as pl 38 | 39 | from .config import CacheMode, get_config 40 | 41 | 42 | class CacheManager: 43 | """Manages caching for nflreadpy data. 44 | 45 | The CacheManager handles both memory and filesystem caching of NFL data to improve 46 | performance and reduce network requests. It supports configurable cache modes and 47 | automatic expiration of cached data. 48 | 49 | Attributes: 50 | _memory_cache: Internal dictionary storing cached DataFrames with timestamps. 51 | """ 52 | 53 | def __init__(self) -> None: 54 | """Initialize a new CacheManager instance.""" 55 | self._memory_cache: dict[str, tuple[pl.DataFrame, float]] = {} 56 | 57 | def _get_cache_key(self, url: str, **kwargs: str | int | float | bool) -> str: 58 | """Generate a unique cache key from URL and parameters. 59 | 60 | Args: 61 | url: The data source URL. 62 | **kwargs: Additional parameters that affect the data. 63 | 64 | Returns: 65 | MD5 hash string to use as cache key. 66 | """ 67 | key_string = f"{url}_{str(sorted(kwargs.items()))}" 68 | return hashlib.md5(key_string.encode()).hexdigest() 69 | 70 | def _get_file_path(self, cache_key: str) -> Path: 71 | """Get the filesystem path for storing cached data. 72 | 73 | Args: 74 | cache_key: The unique cache identifier. 75 | 76 | Returns: 77 | Path to the cache file (creates directory if needed). 78 | """ 79 | config = get_config() 80 | cache_dir = config.cache_dir 81 | cache_dir.mkdir(parents=True, exist_ok=True) 82 | return cache_dir / f"{cache_key}.parquet" 83 | 84 | def get(self, url: str, **kwargs: str | int | float | bool) -> pl.DataFrame | None: 85 | """Retrieve cached data if available and not expired. 86 | 87 | Args: 88 | url: The data source URL. 89 | **kwargs: Additional parameters that were used when caching. 90 | 91 | Returns: 92 | Cached DataFrame if available and valid, None otherwise. 93 | 94 | Note: 95 | Checks memory cache first (if using MEMORY mode), then filesystem cache. 96 | Automatically removes expired cache entries. 97 | """ 98 | config = get_config() 99 | 100 | if config.cache_mode == CacheMode.OFF: 101 | return None 102 | 103 | cache_key = self._get_cache_key(url, **kwargs) 104 | current_time = time.time() 105 | 106 | # Try memory cache first 107 | if config.cache_mode == CacheMode.MEMORY: 108 | cached_item = self._memory_cache.get(cache_key) 109 | if cached_item: 110 | data, timestamp = cached_item 111 | if current_time - timestamp < config.cache_duration: 112 | return data 113 | else: 114 | # Remove expired item 115 | del self._memory_cache[cache_key] 116 | 117 | # Try filesystem cache 118 | elif config.cache_mode == CacheMode.FILESYSTEM: 119 | file_path = self._get_file_path(cache_key) 120 | if file_path.exists(): 121 | try: 122 | # Check if file is expired based on modification time 123 | file_mtime = file_path.stat().st_mtime 124 | if current_time - file_mtime < config.cache_duration: 125 | return pl.read_parquet(file_path) 126 | else: 127 | # Remove expired cache file 128 | file_path.unlink(missing_ok=True) 129 | except Exception as e: 130 | if config.verbose: 131 | print(f"Failed to read cache file {file_path}: {e}") 132 | # Remove corrupted cache file 133 | file_path.unlink(missing_ok=True) 134 | 135 | return None 136 | 137 | def set( 138 | self, url: str, data: pl.DataFrame, **kwargs: str | int | float | bool 139 | ) -> None: 140 | """Store data in the cache. 141 | 142 | Args: 143 | url: The data source URL. 144 | data: The DataFrame to cache. 145 | **kwargs: Additional parameters that affect the data. 146 | 147 | Note: 148 | Storage location depends on cache mode configuration: 149 | - MEMORY: Stores in memory with timestamp 150 | - FILESYSTEM: Saves as Parquet file with current timestamp 151 | - OFF: No caching performed 152 | """ 153 | config = get_config() 154 | 155 | if config.cache_mode == CacheMode.OFF: 156 | return 157 | 158 | cache_key = self._get_cache_key(url, **kwargs) 159 | 160 | # Store in memory cache 161 | if config.cache_mode == CacheMode.MEMORY: 162 | self._memory_cache[cache_key] = (data, time.time()) 163 | 164 | # Store in filesystem cache 165 | elif config.cache_mode == CacheMode.FILESYSTEM: 166 | file_path = self._get_file_path(cache_key) 167 | try: 168 | data.write_parquet(file_path) 169 | except Exception as e: 170 | if config.verbose: 171 | print(f"Failed to write cache file {file_path}: {e}") 172 | 173 | def clear(self, pattern: str | None = None) -> None: 174 | """Clear cache entries matching a pattern. 175 | 176 | Args: 177 | pattern: Optional string pattern to match against cache keys. 178 | If None, clears all cache entries. 179 | 180 | Examples: 181 | >>> cache_manager = get_cache_manager() 182 | >>> cache_manager.clear() # Clear all cache 183 | >>> cache_manager.clear("pbp_2023") # Clear entries containing "pbp_2023" 184 | 185 | Note: 186 | Clears both memory and filesystem cache entries that match the pattern. 187 | """ 188 | config = get_config() 189 | 190 | # Clear memory cache 191 | if pattern is None: 192 | self._memory_cache.clear() 193 | else: 194 | keys_to_remove = [k for k in self._memory_cache.keys() if pattern in k] 195 | for key in keys_to_remove: 196 | del self._memory_cache[key] 197 | 198 | # Clear filesystem cache 199 | if config.cache_mode == CacheMode.FILESYSTEM: 200 | cache_dir = config.cache_dir 201 | if cache_dir.exists(): 202 | if pattern is None: 203 | # Remove all cache files 204 | for cache_file in cache_dir.glob("*.parquet"): 205 | cache_file.unlink() 206 | else: 207 | # Remove matching cache files 208 | for cache_file in cache_dir.glob("*.parquet"): 209 | if pattern in cache_file.stem: 210 | cache_file.unlink() 211 | 212 | def size(self) -> dict[str, int | float]: 213 | """Get cache size and entry count information. 214 | 215 | Returns: 216 | Dictionary containing cache statistics: 217 | - memory_entries: Number of entries in memory cache 218 | - filesystem_entries: Number of files in filesystem cache (if enabled) 219 | - filesystem_size_mb: Total size of filesystem cache in MB (if enabled) 220 | 221 | Examples: 222 | >>> cache_manager = get_cache_manager() 223 | >>> stats = cache_manager.size() 224 | >>> print(f"Memory entries: {stats['memory_entries']}") 225 | >>> print(f"Disk entries: {stats.get('filesystem_entries', 0)}") 226 | """ 227 | config = get_config() 228 | result: dict[str, int | float] = {"memory_entries": len(self._memory_cache)} 229 | 230 | if config.cache_mode == CacheMode.FILESYSTEM: 231 | cache_dir = config.cache_dir 232 | if cache_dir.exists(): 233 | cache_files = list(cache_dir.glob("*.parquet")) 234 | result["filesystem_entries"] = len(cache_files) 235 | result["filesystem_size_mb"] = sum( 236 | f.stat().st_size for f in cache_files 237 | ) / (1024 * 1024) 238 | else: 239 | result["filesystem_entries"] = 0 240 | result["filesystem_size_mb"] = 0.0 241 | 242 | return result 243 | 244 | 245 | # Global cache manager instance 246 | _cache_manager = CacheManager() 247 | 248 | 249 | def get_cache_manager() -> CacheManager: 250 | """Get the global cache manager instance. 251 | 252 | Returns: 253 | The singleton CacheManager instance used by all nflreadpy functions. 254 | 255 | Examples: 256 | >>> cache_manager = get_cache_manager() 257 | >>> cache_stats = cache_manager.size() 258 | >>> cache_manager.clear("pbp_2023") 259 | """ 260 | return _cache_manager 261 | 262 | 263 | def clear_cache(pattern: str | None = None) -> None: 264 | """Clear cached data entries matching a pattern. 265 | 266 | This is the main function for clearing nflreadpy's cache. It provides a simple 267 | interface to the underlying CacheManager functionality. 268 | 269 | Args: 270 | pattern: Optional string pattern to match against cached data. 271 | If None, clears all cached data. Pattern matching is performed 272 | on cache keys, which typically contain URLs and parameters. 273 | 274 | Examples: 275 | >>> import nflreadpy as nfl 276 | >>> nfl.clear_cache() # Clear all cached data 277 | >>> nfl.clear_cache("pbp_2023") # Clear 2023 play-by-play data 278 | >>> nfl.clear_cache("roster") # Clear all roster data 279 | 280 | Note: 281 | This affects both memory and filesystem cache depending on your 282 | cache configuration. See nflreadpy.config for cache settings. 283 | 284 | See Also: 285 | [nflreadr clear_cache reference](https://nflreadr.nflverse.com/reference/clear_cache.html) 286 | """ 287 | _cache_manager.clear(pattern) 288 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | """Integration tests for all nflreadpy functions.""" 2 | 3 | import nflreadpy as nfl 4 | import polars as pl 5 | import pytest 6 | 7 | 8 | class TestImports: 9 | """Test that all functions can be imported successfully.""" 10 | 11 | def test_all_exports(self): 12 | """Test that all expected exports are available.""" 13 | expected_exports = [ 14 | # Core loading functions 15 | "load_pbp", 16 | "load_player_stats", 17 | "load_team_stats", 18 | "load_rosters", 19 | "load_schedules", 20 | "load_teams", 21 | "load_players", 22 | "load_draft_picks", 23 | "load_injuries", 24 | "load_contracts", 25 | "load_snap_counts", 26 | "load_nextgen_stats", 27 | "load_officials", 28 | "load_participation", 29 | "load_pfr_advstats", 30 | "load_combine", 31 | "load_depth_charts", 32 | "load_trades", 33 | "load_ftn_charting", 34 | "load_rosters_weekly", 35 | # ffverse functions 36 | "load_ff_playerids", 37 | "load_ff_rankings", 38 | "load_ff_opportunity", 39 | # Utility functions 40 | "get_current_season", 41 | "get_current_week", 42 | "clear_cache", 43 | # datasets 44 | "team_abbr_mapping", 45 | "team_abbr_mapping_norelocate", 46 | ] 47 | 48 | for export in expected_exports: 49 | assert hasattr(nfl, export), f"Missing export: {export}" 50 | assert callable(getattr(nfl, export)), f"Export is not callable: {export}" 51 | 52 | 53 | class TestUtilityFunctions: 54 | """Test utility functions.""" 55 | 56 | def test_get_current_season(self): 57 | """Test get_current_season function.""" 58 | season = nfl.get_current_season() 59 | assert isinstance(season, int) 60 | assert 2025 <= season <= 2100 # Reasonable bounds 61 | 62 | def test_get_current_week(self): 63 | """Test get_current_week function.""" 64 | week = nfl.get_current_week(use_date=True) 65 | assert isinstance(week, int) 66 | assert 1 <= week <= 22 # Reasonable bounds for NFL weeks 67 | week = nfl.get_current_week(use_date=False, roster=True) 68 | assert isinstance(week, int) 69 | assert 1 <= week <= 22 # Reasonable bounds for NFL weeks 70 | 71 | def test_clear_cache(self): 72 | """Test clear_cache function.""" 73 | # Should not raise an exception 74 | nfl.clear_cache() 75 | 76 | 77 | class TestStaticDataLoaders: 78 | """Test loaders that don't require season parameters.""" 79 | 80 | def test_load_teams(self): 81 | """Test load_teams function.""" 82 | df = nfl.load_teams() 83 | assert isinstance(df, pl.DataFrame) 84 | assert len(df) > 0 85 | # Should have 32+ teams (accounting for relocations) 86 | assert len(df) >= 32 87 | 88 | def test_load_players(self): 89 | """Test load_players function.""" 90 | df = nfl.load_players() 91 | assert isinstance(df, pl.DataFrame) 92 | assert len(df) > 0 93 | 94 | def test_load_trades(self): 95 | """Test load_trades function.""" 96 | df = nfl.load_trades() 97 | assert isinstance(df, pl.DataFrame) 98 | assert len(df) > 0 99 | 100 | def test_load_contracts(self): 101 | """Test load_contracts function.""" 102 | df = nfl.load_contracts() 103 | assert isinstance(df, pl.DataFrame) 104 | assert len(df) >= 0 105 | 106 | def test_load_ff_playerids(self): 107 | """Test load_ff_playerids function.""" 108 | df = nfl.load_ff_playerids() 109 | assert isinstance(df, pl.DataFrame) 110 | assert len(df) >= 0 111 | 112 | def test_load_ff_rankings_draft(self): 113 | """Test load_ff_rankings with draft type.""" 114 | df = nfl.load_ff_rankings("draft") 115 | assert isinstance(df, pl.DataFrame) 116 | assert len(df) >= 0 117 | 118 | def test_load_ff_rankings_week(self): 119 | """Test load_ff_rankings with week type.""" 120 | df = nfl.load_ff_rankings("week") 121 | assert isinstance(df, pl.DataFrame) 122 | assert len(df) >= 0 123 | 124 | def test_load_ff_rankings_all(self): 125 | """Test load_ff_rankings with all type.""" 126 | df = nfl.load_ff_rankings("all") 127 | assert isinstance(df, pl.DataFrame) 128 | assert len(df) >= 0 129 | 130 | def test_team_abbr_mappings(self): 131 | """Test team_abbr_mappings.""" 132 | df1 = nfl.team_abbr_mapping() 133 | df2 = nfl.team_abbr_mapping_norelocate() 134 | assert isinstance(df1, pl.DataFrame) 135 | assert isinstance(df2, pl.DataFrame) 136 | assert len(df1) > 0 137 | assert len(df2) > 0 138 | assert len(df1) >= 143 139 | assert len(df2) >= 149 140 | # Map to 32 teams + AFC, NFC, NFL 141 | assert df1.select("value").n_unique() == 35 142 | # Map to 32 teams + AFC, NFC, NFL + SD, STL, OAK 143 | assert df2.select("value").n_unique() == 38 144 | 145 | def test_player_name_mapping(self): 146 | """Test player_name_mapping.""" 147 | df = nfl.player_name_mapping() 148 | assert isinstance(df, pl.DataFrame) 149 | assert len(df) > 0 150 | assert len(df) >= 136 151 | 152 | 153 | class TestSeasonalDataLoaders: 154 | """Test loaders that require season parameters.""" 155 | 156 | def test_load_pbp_2024_season(self): 157 | """Test load_pbp with 2024 season.""" 158 | df = nfl.load_pbp(2024) 159 | assert isinstance(df, pl.DataFrame) 160 | assert len(df) > 0 161 | 162 | def test_load_rosters_2024_season(self): 163 | """Test load_rosters with 2024 season.""" 164 | df = nfl.load_rosters(2024) 165 | assert isinstance(df, pl.DataFrame) 166 | assert len(df) > 0 167 | 168 | def test_load_schedules_2024_season(self): 169 | """Test load_schedules with 2024 season.""" 170 | df = nfl.load_schedules(2024) 171 | assert isinstance(df, pl.DataFrame) 172 | assert len(df) > 0 173 | 174 | def test_load_player_stats_2024_season(self): 175 | """Test load_player_stats with 2024 season.""" 176 | df = nfl.load_player_stats(2024) 177 | assert isinstance(df, pl.DataFrame) 178 | assert len(df) > 0 179 | 180 | def test_load_player_stats_multiple_seasons(self): 181 | """Test load_pbp with multiple seasons.""" 182 | df = nfl.load_player_stats([2022, 2023]) 183 | assert isinstance(df, pl.DataFrame) 184 | assert len(df) > 0 185 | 186 | def test_load_team_stats_2024_season(self): 187 | """Test load_team_stats with 2024 season.""" 188 | df = nfl.load_team_stats(2024) 189 | assert isinstance(df, pl.DataFrame) 190 | assert len(df) > 0 191 | 192 | def test_load_injuries_specific_season(self): 193 | """Test load_injuries with current season.""" 194 | df = nfl.load_injuries(2023) 195 | assert isinstance(df, pl.DataFrame) 196 | assert len(df) > 0 197 | 198 | def test_load_depth_charts_2024_season(self): 199 | """Test load_depth_charts with 2024 season.""" 200 | df = nfl.load_depth_charts(2024) 201 | assert isinstance(df, pl.DataFrame) 202 | assert len(df) >= 0 203 | 204 | def test_load_snap_counts_2024_season(self): 205 | """Test load_snap_counts with 2024 season.""" 206 | df = nfl.load_snap_counts(2024) 207 | assert isinstance(df, pl.DataFrame) 208 | assert len(df) >= 0 209 | 210 | def test_load_nextgen_stats_2024_season(self): 211 | """Test load_nextgen_stats with 2024 season.""" 212 | df = nfl.load_nextgen_stats(2024, "passing") 213 | assert isinstance(df, pl.DataFrame) 214 | assert len(df) >= 0 215 | 216 | def test_load_officials_2024_season(self): 217 | """Test load_officials with 2024 season.""" 218 | df = nfl.load_officials(2024) 219 | assert isinstance(df, pl.DataFrame) 220 | assert len(df) >= 0 221 | 222 | def test_load_participation_2024_season(self): 223 | """Test load_participation with 2024 season.""" 224 | df = nfl.load_participation(2024) 225 | assert isinstance(df, pl.DataFrame) 226 | assert len(df) >= 0 227 | 228 | def test_load_draft_picks_2024_season(self): 229 | """Test load_draft_picks with 2024 season.""" 230 | df = nfl.load_draft_picks(2024) 231 | assert isinstance(df, pl.DataFrame) 232 | assert len(df) >= 0 233 | 234 | def test_load_ftn_charting_2024_season(self): 235 | """Test load_ftn_charting with 2024 season.""" 236 | df = nfl.load_ftn_charting(2024) 237 | assert isinstance(df, pl.DataFrame) 238 | assert len(df) >= 0 239 | 240 | def test_load_rosters_weekly_2024_season(self): 241 | """Test load_rosters_weekly with 2024 season.""" 242 | df = nfl.load_rosters_weekly(2024) 243 | assert isinstance(df, pl.DataFrame) 244 | assert len(df) >= 0 245 | 246 | def test_load_combine_2024_season(self): 247 | """Test load_combine with 2024 season.""" 248 | df = nfl.load_combine(2024) 249 | assert isinstance(df, pl.DataFrame) 250 | assert len(df) >= 0 251 | 252 | def test_load_pfr_advstats_week_pass(self): 253 | """Test load_pfr_advstats with weekly passing stats.""" 254 | df = nfl.load_pfr_advstats(2023, stat_type="pass", summary_level="week") 255 | assert isinstance(df, pl.DataFrame) 256 | assert len(df) >= 0 257 | 258 | def test_load_pfr_advstats_season_pass(self): 259 | """Test load_pfr_advstats with season-level passing stats.""" 260 | df = nfl.load_pfr_advstats(2023, stat_type="pass", summary_level="season") 261 | assert isinstance(df, pl.DataFrame) 262 | assert len(df) >= 0 263 | 264 | def test_load_pfr_advstats_week_def(self): 265 | """Test load_pfr_advstats with weekly defensive stats.""" 266 | df = nfl.load_pfr_advstats(2023, stat_type="def", summary_level="week") 267 | assert isinstance(df, pl.DataFrame) 268 | assert len(df) >= 0 269 | 270 | def test_load_ff_opportunity_2024_season_week(self): 271 | """Test load_ff_opportunity week with specific season.""" 272 | df = nfl.load_ff_opportunity(2024, stat_type="weekly") 273 | assert isinstance(df, pl.DataFrame) 274 | assert len(df) >= 0 275 | 276 | def test_load_ff_opportunity_2024_season_pbp_rush(self): 277 | """Test load_ff_opportunity pbp_rush with specific season.""" 278 | df = nfl.load_ff_opportunity(2024, stat_type="pbp_rush") 279 | assert isinstance(df, pl.DataFrame) 280 | assert len(df) >= 0 281 | 282 | def test_load_ff_opportunity_2024_season_pbp_pass(self): 283 | """Test load_ff_opportunity pbp_pass with specific season.""" 284 | df = nfl.load_ff_opportunity(2024, stat_type="pbp_pass") 285 | assert isinstance(df, pl.DataFrame) 286 | assert len(df) >= 0 287 | 288 | 289 | class TestErrorHandling: 290 | """Test error handling for invalid inputs.""" 291 | 292 | def test_load_pbp_invalid_season(self): 293 | """Test load_pbp with invalid season.""" 294 | with pytest.raises(ValueError): 295 | nfl.load_pbp(1990) # Too early 296 | 297 | with pytest.raises(ValueError): 298 | nfl.load_pbp(2100) # Too far in future 299 | 300 | def test_load_pbp_invalid_type(self): 301 | """Test load_pbp with invalid type.""" 302 | with pytest.raises((ValueError, TypeError)): 303 | nfl.load_pbp("invalid") 304 | 305 | def test_load_ff_rankings_invalid_type(self): 306 | """Test load_ff_rankings with invalid type.""" 307 | with pytest.raises(ValueError): 308 | nfl.load_ff_rankings("invalid") 309 | 310 | def test_load_ff_opportunity_invalid_season(self): 311 | """Test load_ff_opportunity with invalid season.""" 312 | with pytest.raises(ValueError): 313 | nfl.load_ff_opportunity(2005) # Too early 314 | 315 | def test_load_ff_opportunity_invalid_stat_type(self): 316 | """Test load_ff_opportunity with invalid stat_type.""" 317 | with pytest.raises(ValueError): 318 | nfl.load_ff_opportunity(2023, stat_type="invalid") 319 | 320 | def test_load_ff_opportunity_invalid_model_version(self): 321 | """Test load_ff_opportunity with invalid model_version.""" 322 | with pytest.raises(ValueError): 323 | nfl.load_ff_opportunity(2023, model_version="invalid") 324 | 325 | def test_load_pfr_advstats_invalid_season(self): 326 | """Test load_pfr_advstats with invalid season.""" 327 | with pytest.raises(ValueError): 328 | nfl.load_pfr_advstats(2017, summary_level="season") 329 | 330 | def test_load_pfr_advstats_invalid_stat_type(self): 331 | """Test load_pfr_advstats with invalid stat_type.""" 332 | with pytest.raises(ValueError): 333 | nfl.load_pfr_advstats(2023, stat_type="invalid") 334 | 335 | def test_load_pfr_advstats_invalid_summary_level(self): 336 | """Test load_pfr_advstats with invalid summary_level.""" 337 | with pytest.raises(ValueError): 338 | nfl.load_pfr_advstats(2023, summary_level="invalid") 339 | 340 | 341 | class TestDataQuality: 342 | """Test basic data quality for a subset of functions.""" 343 | 344 | def test_teams_data_structure(self): 345 | """Test that teams data has expected structure.""" 346 | df = nfl.load_teams() 347 | assert isinstance(df, pl.DataFrame) 348 | assert len(df) > 0 349 | # Should have some basic columns (exact names may vary) 350 | assert len(df.columns) > 5 351 | 352 | def test_pbp_data_structure(self): 353 | """Test that PBP data has expected structure for a recent season.""" 354 | df = nfl.load_pbp(2023) 355 | assert isinstance(df, pl.DataFrame) 356 | assert len(df) > 0 357 | # Should have many columns for PBP data 358 | assert len(df.columns) > 50 359 | 360 | def test_schedules_data_structure(self): 361 | """Test that schedules data has expected structure.""" 362 | df = nfl.load_schedules(2023) 363 | assert isinstance(df, pl.DataFrame) 364 | assert len(df) > 0 365 | # Should have ~272 games per season (17 weeks * 16 games + playoffs) 366 | assert len(df) > 250 367 | --------------------------------------------------------------------------------