├── .gitattributes ├── tests ├── __init__.py ├── test_scrape_service.py ├── fixtures │ ├── __init__.py │ ├── json_samples.py │ └── html_samples.py ├── test_http.py ├── test_coordinator.py ├── conftest.py ├── test_selector.py ├── test_button.py ├── test_file.py ├── test_util.py ├── test_scraper.py ├── test_entity.py └── test_init.py ├── hacs.json ├── scripts ├── lint ├── setup └── develop ├── requirements.txt ├── .vscode ├── settings.json ├── tasks.json └── launch.json ├── custom_components └── multiscrape │ ├── icons.json │ ├── services.yaml │ ├── manifest.json │ ├── file.py │ ├── const.py │ ├── util.py │ ├── button.py │ ├── selector.py │ ├── __init__.py │ ├── scraper.py │ ├── sensor.py │ ├── entity.py │ ├── service.py │ ├── binary_sensor.py │ ├── coordinator.py │ ├── schema.py │ ├── form.py │ └── http.py ├── .gitignore ├── renovate.json ├── pyproject.toml ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── issue.md ├── workflows │ ├── lint.yml │ ├── tests.yml │ ├── validate.yml │ ├── release-drafter.yml │ └── codeql-analysis.yml └── release-drafter.yml ├── LICENSE ├── config └── configuration.yaml ├── .yamllint ├── .devcontainer.json ├── .ruff.toml ├── .pre-commit-config.yaml └── CONTRIBUTING.md /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Init tests for Multiscrape integration.""" 2 | -------------------------------------------------------------------------------- /tests/test_scrape_service.py: -------------------------------------------------------------------------------- 1 | """Test for simple scraping.""" 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | """Test fixtures and sample data for multiscrape tests.""" 2 | -------------------------------------------------------------------------------- /hacs.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Multiscrape", 3 | "homeassistant": "2023.8.0", 4 | "render_readme": true 5 | } 6 | -------------------------------------------------------------------------------- /scripts/lint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "$0")/.." 6 | 7 | ruff check . --fix 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | colorlog==6.10.1 2 | pytest-homeassistant-custom-component==0.13.296 3 | pip>=25,<26 4 | lxml>=4.9.1 5 | beautifulsoup4>=4.12.2 6 | ruff==0.14.5 -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": ["tests"], 3 | "python.testing.unittestEnabled": false, 4 | "python.testing.pytestEnabled": true 5 | } 6 | -------------------------------------------------------------------------------- /scripts/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "$0")/.." 6 | 7 | python3 -m pip install --requirement requirements.txt 8 | python3 -m pip install pre-commit 9 | pre-commit install 10 | -------------------------------------------------------------------------------- /custom_components/multiscrape/icons.json: -------------------------------------------------------------------------------- 1 | { 2 | "services": { 3 | "reload": "mdi:reload", 4 | "get_content": "mdi:content-copy", 5 | "scrape": "mdi:web-sync", 6 | "trigger": "mdi:web-refresh" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /custom_components/multiscrape/services.yaml: -------------------------------------------------------------------------------- 1 | reload: 2 | name: Reload 3 | description: Reload all rest entities and notify services 4 | 5 | trigger: 6 | name: Trigger 7 | description: Trigger a scrape run independent of the update interval 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # artifacts 2 | __pycache__ 3 | .pytest* 4 | *.egg-info 5 | */build/* 6 | */dist/* 7 | 8 | 9 | # misc 10 | .coverage 11 | coverage.xml 12 | .todo 13 | 14 | 15 | # Home Assistant configuration 16 | config/* 17 | !config/configuration.yaml 18 | .claude/settings.local.json 19 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["config:recommended"], 4 | "packageRules": [ 5 | { 6 | "groupName": "ruff", 7 | "matchPackageNames": ["ruff"], 8 | "matchManagers": ["pip_requirements", "pre-commit"] 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "Run Home Assistant on port 8123", 6 | "type": "shell", 7 | "command": "scripts/develop", 8 | "problemMatcher": [] 9 | }, 10 | { 11 | "label": "Upgrade Home Assistant or dependencies", 12 | "type": "shell", 13 | "command": "scripts/setup", 14 | "problemMatcher": [] 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /custom_components/multiscrape/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "domain": "multiscrape", 3 | "name": "Multiscrape", 4 | "codeowners": ["@danieldotnl"], 5 | "config_flow": false, 6 | "dependencies": [], 7 | "documentation": "https://github.com/danieldotnl/ha-multiscrape", 8 | "iot_class": "local_polling", 9 | "issue_tracker": "https://github.com/danieldotnl/ha-multiscrape/issues", 10 | "requirements": ["lxml>=4.9.1", "beautifulsoup4>=4.12.2"], 11 | "version": "8.0.2" 12 | } 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ha-multiscrape" 3 | requires-python = ">=3.13" 4 | 5 | [tool.pytest.ini_options] 6 | asyncio_mode = "auto" 7 | asyncio_default_fixture_loop_scope = "function" 8 | timeout = 10 9 | markers = [ 10 | "unit: Unit tests that test individual functions/classes in isolation", 11 | "integration: Integration tests that test multiple components together", 12 | "slow: Tests that take a long time to run (>1s)", 13 | "async_test: Tests that involve async operations", 14 | "http: Tests that make HTTP requests or mock them", 15 | ] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | --- 5 | 6 | **Is your feature request related to a problem? Please describe.** 7 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 8 | 9 | **Describe the solution you'd like** 10 | A clear and concise description of what you want to happen. 11 | 12 | **Describe alternatives you've considered** 13 | A clear and concise description of any alternative solutions or features you've considered. 14 | 15 | **Additional context** 16 | Add any other context or screenshots about the feature request here. 17 | -------------------------------------------------------------------------------- /scripts/develop: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "$0")/.." 6 | 7 | # Create config dir if not present 8 | if [[ ! -d "${PWD}/config" ]]; then 9 | mkdir -p "${PWD}/config" 10 | hass --config "${PWD}/config" --script ensure_config 11 | fi 12 | 13 | # Set the path to custom_components 14 | ## This let's us have the structure we want /custom_components/integration_blueprint 15 | ## while at the same time have Home Assistant configuration inside /config 16 | ## without resulting to symlinks. 17 | export PYTHONPATH="${PYTHONPATH}:${PWD}/custom_components" 18 | 19 | # Start Home Assistant 20 | hass --config "${PWD}/config" --debug 21 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: "Lint" 2 | 3 | "on": 4 | push: 5 | branches: 6 | - "master" 7 | pull_request: 8 | branches: 9 | - "master" 10 | 11 | jobs: 12 | ruff: 13 | name: "Ruff" 14 | runs-on: "ubuntu-latest" 15 | steps: 16 | - name: "Checkout the repository" 17 | uses: "actions/checkout@v5.0.1" 18 | 19 | - name: "Set up Python" 20 | uses: actions/setup-python@v6.1.0 21 | with: 22 | python-version: "3.13" 23 | cache: "pip" 24 | 25 | - name: "Install requirements" 26 | run: python3 -m pip install -r requirements.txt 27 | 28 | - name: "Run" 29 | run: python3 -m ruff check . 30 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION 🌈' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: '🧰 Maintenance' 14 | label: 'chore' 15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 16 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. 17 | version-resolver: 18 | major: 19 | labels: 20 | - 'major' 21 | minor: 22 | labels: 23 | - 'minor' 24 | patch: 25 | labels: 26 | - 'patch' 27 | default: patch 28 | template: | 29 | ## Changes 30 | 31 | $CHANGES 32 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: "Tests" 2 | 3 | # yamllint disable-line rule:truthy 4 | on: 5 | push: 6 | branches: 7 | - "dev**" 8 | - devel 9 | - master 10 | pull_request: ~ 11 | workflow_dispatch: 12 | 13 | jobs: 14 | test: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: ["3.13"] 19 | steps: 20 | - uses: actions/checkout@v5 21 | - name: Set up Python 22 | uses: actions/setup-python@v6 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install -r requirements.txt 29 | - name: Run tests and collect coverage 30 | run: pytest tests 31 | -------------------------------------------------------------------------------- /tests/fixtures/json_samples.py: -------------------------------------------------------------------------------- 1 | """Sample JSON data for testing scrapers.""" 2 | 3 | # Simple JSON object 4 | SAMPLE_JSON_SIMPLE = '{"name":"John", "age":30, "car":null}' 5 | 6 | # Complex nested JSON 7 | SAMPLE_JSON_NESTED = """{ 8 | "user": { 9 | "id": 123, 10 | "name": "John Doe", 11 | "email": "john@example.com", 12 | "preferences": { 13 | "theme": "dark", 14 | "notifications": true 15 | } 16 | }, 17 | "data": [1, 2, 3, 4, 5] 18 | }""" 19 | 20 | # JSON array 21 | SAMPLE_JSON_ARRAY = '[{"id": 1, "name": "Item 1"}, {"id": 2, "name": "Item 2"}]' 22 | 23 | # Invalid JSON for error testing 24 | SAMPLE_JSON_INVALID = '{"name": "John", "age": 30' # Missing closing brace 25 | 26 | # JSON with special characters 27 | SAMPLE_JSON_SPECIAL_CHARS = '{"text": "Text with \\"quotes\\" and \\n newlines"}' 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue 3 | about: Create a report to help us improve 4 | --- 5 | 6 | 15 | 16 | ## Version of the custom_component 17 | 18 | 19 | 20 | ## Configuration 21 | 22 | ```yaml 23 | Add your logs here. 24 | ``` 25 | 26 | ## Describe the bug 27 | 28 | A clear and concise description of what the bug is. 29 | 30 | ## Debug log 31 | 32 | 33 | 34 | ```text 35 | 36 | Add your logs here. 37 | 38 | ``` 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 danieldotnl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/validate.yml: -------------------------------------------------------------------------------- 1 | name: "Validate" 2 | 3 | "on": 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 * * *" 7 | push: 8 | branches: 9 | - "master" 10 | pull_request: 11 | branches: 12 | - "master" 13 | 14 | jobs: 15 | hassfest: # https://developers.home-assistant.io/blog/2020/04/16/hassfest 16 | name: "Hassfest Validation" 17 | runs-on: "ubuntu-latest" 18 | steps: 19 | - name: "Checkout the repository" 20 | uses: "actions/checkout@v5.0.1" 21 | 22 | - name: "Run hassfest validation" 23 | uses: "home-assistant/actions/hassfest@master" 24 | 25 | hacs: # https://github.com/hacs/action 26 | name: "HACS Validation" 27 | runs-on: "ubuntu-latest" 28 | steps: 29 | - name: "Checkout the repository" 30 | uses: "actions/checkout@v5.0.1" 31 | 32 | - name: "Run HACS validation" 33 | uses: "hacs/action@main" 34 | with: 35 | category: "integration" 36 | # Remove this 'ignore' key when you have added brand images for your integration to https://github.com/home-assistant/brands 37 | # ignore: "brands" 38 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Draft a release note 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | update_release_draft: 14 | permissions: 15 | # write permission is required to create a github release 16 | contents: write 17 | # write permission is required for autolabeler 18 | # otherwise, read permission is required at least 19 | pull-requests: write 20 | runs-on: ubuntu-latest 21 | steps: 22 | # (Optional) GitHub Enterprise requires GHE_HOST variable set 23 | #- name: Set GHE_HOST 24 | # run: | 25 | # echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV 26 | 27 | # Drafts your next Release notes as Pull Requests are merged into "master" 28 | - uses: release-drafter/release-drafter@v6 29 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml 30 | # with: 31 | # config-name: my-config.yml 32 | # disable-autolabeler: true 33 | env: 34 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 35 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 3 | "version": "0.2.0", 4 | "configurations": [ 5 | { 6 | "name": "Home Assistant debugging", 7 | "type": "python", 8 | "request": "launch", 9 | "module": "homeassistant", 10 | "justMyCode": false, 11 | "args": ["--debug", "-c", "config"] 12 | }, 13 | { 14 | // Example of attaching to local debug server 15 | "name": "Python: Attach Local", 16 | "type": "python", 17 | "request": "attach", 18 | "port": 5678, 19 | "host": "localhost", 20 | "pathMappings": [ 21 | { 22 | "localRoot": "${workspaceFolder}", 23 | "remoteRoot": "." 24 | } 25 | ] 26 | }, 27 | { 28 | // Example of attaching to my production server 29 | "name": "Python: Attach Remote", 30 | "type": "python", 31 | "request": "attach", 32 | "port": 5678, 33 | "host": "homeassistant.local", 34 | "pathMappings": [ 35 | { 36 | "localRoot": "${workspaceFolder}", 37 | "remoteRoot": "/usr/src/homeassistant" 38 | } 39 | ] 40 | } 41 | ] 42 | } 43 | -------------------------------------------------------------------------------- /config/configuration.yaml: -------------------------------------------------------------------------------- 1 | default_config: 2 | 3 | logger: 4 | default: info 5 | logs: 6 | custom_components.multiscrape: debug 7 | # If you need to debug uncommment the line below (doc: https://www.home-assistant.io/integrations/debugpy/) 8 | # debugpy: 9 | 10 | multiscrape: 11 | - name: HA scraper 12 | resource: https://www.home-assistant.io 13 | scan_interval: 3600 14 | sensor: 15 | - unique_id: ha_latest_version 16 | name: Latest version 17 | select: ".current-version > h1:nth-child(1)" 18 | value_template: '{{ (value.split(":")[1]) }}' 19 | - unique_id: ha_release_date 20 | icon: >- 21 | {% if is_state('binary_sensor.ha_version_check', 'on') %} 22 | mdi:alarm-light 23 | {% else %} 24 | mdi:bat 25 | {% endif %} 26 | name: Release date 27 | select: ".release-date" 28 | binary_sensor: 29 | - unique_id: ha_version_check 30 | name: Latest version == 2021.7.0 31 | select: ".current-version > h1:nth-child(1)" 32 | value_template: '{{ (value.split(":")[1]) | trim == "2021.7.0" }}' 33 | attributes: 34 | - name: Release notes link 35 | select: "div.links:nth-child(3) > a:nth-child(1)" 36 | attribute: href -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | ignore: | 2 | azure-*.yml 3 | rules: 4 | braces: 5 | level: error 6 | min-spaces-inside: 0 7 | max-spaces-inside: 1 8 | min-spaces-inside-empty: -1 9 | max-spaces-inside-empty: -1 10 | brackets: 11 | level: error 12 | min-spaces-inside: 0 13 | max-spaces-inside: 0 14 | min-spaces-inside-empty: -1 15 | max-spaces-inside-empty: -1 16 | colons: 17 | level: error 18 | max-spaces-before: 0 19 | max-spaces-after: 1 20 | commas: 21 | level: error 22 | max-spaces-before: 0 23 | min-spaces-after: 1 24 | max-spaces-after: 1 25 | comments: 26 | level: error 27 | require-starting-space: true 28 | min-spaces-from-content: 1 29 | comments-indentation: 30 | level: error 31 | document-end: 32 | level: error 33 | present: false 34 | document-start: 35 | level: error 36 | present: false 37 | empty-lines: 38 | level: error 39 | max: 1 40 | max-start: 0 41 | max-end: 1 42 | hyphens: 43 | level: error 44 | max-spaces-after: 1 45 | indentation: 46 | level: error 47 | spaces: 2 48 | indent-sequences: true 49 | check-multi-line-strings: false 50 | key-duplicates: 51 | level: error 52 | line-length: disable 53 | new-line-at-end-of-file: 54 | level: error 55 | new-lines: 56 | level: error 57 | type: unix 58 | trailing-spaces: 59 | level: error 60 | truthy: 61 | level: error 62 | -------------------------------------------------------------------------------- /.devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "danieldotnl/multiscrape", 3 | "image": "mcr.microsoft.com/vscode/devcontainers/python:3.13-bullseye", 4 | "postCreateCommand": "scripts/setup", 5 | "forwardPorts": [8123], 6 | "portsAttributes": { 7 | "8123": { 8 | "label": "Home Assistant", 9 | "onAutoForward": "notify" 10 | } 11 | }, 12 | "features": { 13 | "ghcr.io/devcontainers/features/github-cli:1.0.15": {} 14 | }, 15 | "customizations": { 16 | "vscode": { 17 | "extensions": [ 18 | "ms-python.python", 19 | "github.vscode-pull-request-github", 20 | "ryanluker.vscode-coverage-gutters", 21 | "ms-python.vscode-pylance", 22 | "eamodio.gitlens" 23 | ], 24 | "settings": { 25 | "files.eol": "\n", 26 | "editor.tabSize": 4, 27 | "python.pythonPath": "/usr/bin/python3", 28 | "python.analysis.autoSearchPaths": false, 29 | "python.formatting.provider": "black", 30 | "python.formatting.blackPath": "/usr/local/py-utils/bin/black", 31 | "editor.formatOnPaste": false, 32 | "editor.formatOnSave": true, 33 | "editor.formatOnType": true, 34 | "files.trimTrailingWhitespace": true, 35 | "python.experiments.optOutFrom": ["pythonTestAdapter"], 36 | "python.testing.pytestArgs": ["tests"], 37 | "python.testing.unittestEnabled": false, 38 | "python.testing.pytestEnabled": true, 39 | "python.experiments.enabled": false 40 | } 41 | } 42 | }, 43 | "remoteUser": "vscode" 44 | // "features": { 45 | // "ghcr.io/devcontainers/features/rust:1": {} 46 | // } 47 | } 48 | -------------------------------------------------------------------------------- /custom_components/multiscrape/file.py: -------------------------------------------------------------------------------- 1 | """LoggingFileManager for file utilities.""" 2 | import logging 3 | import os 4 | 5 | from homeassistant.core import HomeAssistant 6 | from homeassistant.util import slugify 7 | 8 | _LOGGER = logging.getLogger(__name__) 9 | 10 | async def create_file_manager(hass: HomeAssistant, config_name: str, log_response: bool): 11 | """Create a file manager instance.""" 12 | file_manager = None 13 | if log_response: 14 | folder = os.path.join( 15 | hass.config.config_dir, f"multiscrape/{slugify(config_name)}/" 16 | ) 17 | _LOGGER.debug( 18 | "%s # Log responses enabled, creating logging folder: %s", 19 | config_name, 20 | folder, 21 | ) 22 | file_manager = LoggingFileManager(folder) 23 | hass.async_add_executor_job(file_manager.create_folders) 24 | return file_manager 25 | 26 | class LoggingFileManager: 27 | """LoggingFileManager for handling logging files.""" 28 | 29 | def __init__(self, folder): 30 | """Initialize the LoggingFileManager.""" 31 | self.folder = folder 32 | 33 | def create_folders(self): 34 | """Create folders for the logging files.""" 35 | os.makedirs(self.folder, exist_ok=True) 36 | 37 | def empty_folder(self): 38 | """Empty the logging folders (typically called before a new run).""" 39 | for filename in os.listdir(self.folder): 40 | file_path = os.path.join(self.folder, filename) 41 | if os.path.isfile(file_path) or os.path.islink(file_path): 42 | os.unlink(file_path) 43 | 44 | def write(self, filename, content): 45 | """Write the logging content to a file.""" 46 | path = os.path.join(self.folder, filename) 47 | with open(path, "w", encoding="utf8") as file: 48 | file.write(str(content)) 49 | -------------------------------------------------------------------------------- /tests/fixtures/html_samples.py: -------------------------------------------------------------------------------- 1 | """Sample HTML data for testing scrapers.""" 2 | 3 | # Standard test HTML with various elements and structure 4 | # Note: Whitespace is preserved as it affects extraction behavior 5 | SAMPLE_HTML_FULL = """

Current Version: 2024.8.3

Released: January 17, 2022

Current Time:

2022-12-22T13:15:30Z
""" 6 | 7 | # Simple HTML for basic testing 8 | SAMPLE_HTML_SIMPLE = """ 9 |
10 |

Test Header

11 |

Test content here

12 | Test Link 13 |
14 | """ 15 | 16 | # HTML with special characters and encoding 17 | SAMPLE_HTML_SPECIAL_CHARS = """ 18 |
19 |

Text with & ampersand < less than > greater than

20 |

Unicode: café, naïve, 日本語

21 |
22 | """ 23 | 24 | # Malformed HTML for error testing 25 | SAMPLE_HTML_MALFORMED = """ 26 |
27 |

This paragraph is not closed 28 |

Nested div 29 |
30 | """ 31 | 32 | # HTML with no content 33 | SAMPLE_HTML_EMPTY = "" 34 | 35 | # HTML for list selector testing 36 | SAMPLE_HTML_LIST = """ 37 |
    38 |
  • Item 1
  • 39 |
  • Item 2
  • 40 |
  • Item 3
  • 41 |
42 | """ 43 | 44 | # HTML with script and style tags (special extraction) 45 | SAMPLE_HTML_SPECIAL_TAGS = """ 46 | 47 | 50 | 53 | 56 | 57 | """ 58 | -------------------------------------------------------------------------------- /custom_components/multiscrape/const.py: -------------------------------------------------------------------------------- 1 | """The scraper component constants.""" 2 | 3 | DOMAIN = "multiscrape" 4 | 5 | DEFAULT_METHOD = "GET" 6 | DEFAULT_VERIFY_SSL = True 7 | DEFAULT_FORCE_UPDATE = False 8 | 9 | DEFAULT_BINARY_SENSOR_NAME = "Multiscrape Binary Sensor" 10 | DEFAULT_SENSOR_NAME = "Multiscrape Sensor" 11 | DEFAULT_BUTTON_NAME = "Multiscrape Refresh Button" 12 | 13 | CONF_STATE_CLASS = "state_class" 14 | CONF_ON_ERROR = "on_error" 15 | CONF_ON_ERROR_LOG = "log" 16 | CONF_ON_ERROR_VALUE = "value" 17 | CONF_ON_ERROR_VALUE_LAST = "last" 18 | CONF_ON_ERROR_VALUE_NONE = "none" 19 | CONF_ON_ERROR_VALUE_DEFAULT = "default" 20 | CONF_ON_ERROR_DEFAULT = "default" 21 | CONF_PICTURE = "picture" 22 | CONF_PARSER = "parser" 23 | CONF_SELECT = "select" 24 | CONF_SELECT_LIST = "select_list" 25 | CONF_SEPARATOR = "list_separator" 26 | CONF_ATTR = "attribute" 27 | CONF_SENSOR_ATTRS = "attributes" 28 | CONF_FORM_SUBMIT = "form_submit" 29 | CONF_FORM_SELECT = "select" 30 | CONF_FORM_INPUT = "input" 31 | CONF_FORM_INPUT_FILTER = "input_filter" 32 | CONF_FORM_SUBMIT_ONCE = "submit_once" 33 | CONF_FORM_RESUBMIT_ERROR = "resubmit_on_error" 34 | CONF_FORM_VARIABLES = "variables" 35 | CONF_LOG_RESPONSE = "log_response" 36 | CONF_EXTRACT = "extract" 37 | EXTRACT_OPTIONS = ["text", "content", "tag"] 38 | DEFAULT_PARSER = "lxml" 39 | DEFAULT_EXTRACT = "text" 40 | 41 | CONF_FIELDS = "fields" 42 | 43 | SCRAPER_IDX = "scraper_idx" 44 | PLATFORM_IDX = "platform_idx" 45 | 46 | COORDINATOR = "coordinator" 47 | SCRAPER = "scraper" 48 | 49 | SCRAPER_DATA = "scraper" 50 | 51 | METHODS = ["POST", "GET", "PUT"] 52 | DEFAULT_SEPARATOR = "," 53 | 54 | LOG_ERROR = "error" 55 | LOG_WARNING = "warning" 56 | LOG_INFO = "info" 57 | LOG_FALSE = False 58 | LOG_LEVELS = { 59 | LOG_INFO: 20, 60 | LOG_WARNING: 30, 61 | LOG_ERROR: 40, 62 | LOG_FALSE: False, 63 | "false": False, 64 | "False": False, 65 | } 66 | 67 | 68 | DEFAULT_ON_ERROR_LOG = LOG_ERROR 69 | DEFAULT_ON_ERROR_VALUE = CONF_ON_ERROR_VALUE_NONE 70 | 71 | # Retry configuration 72 | MAX_RETRIES = 3 73 | RETRY_DELAY_SECONDS = 30 74 | -------------------------------------------------------------------------------- /custom_components/multiscrape/util.py: -------------------------------------------------------------------------------- 1 | """Some utility functions.""" 2 | import logging 3 | 4 | from homeassistant.exceptions import TemplateError 5 | from homeassistant.helpers.template import Template 6 | 7 | _LOGGER: logging.Logger = logging.getLogger(__name__) 8 | 9 | 10 | def create_renderer(hass, value_template, context=""): 11 | """Create a template renderer based on value_template. 12 | 13 | Args: 14 | hass: Home Assistant instance 15 | value_template: Template string or Template object 16 | context: Optional context description for better error messages (e.g., "resource URL", "header value") 17 | 18 | """ 19 | if value_template is None: 20 | return lambda variables={}, parse_result=None: None 21 | 22 | if not isinstance(value_template, Template): 23 | value_template = Template(value_template, hass) 24 | else: 25 | value_template.hass = hass 26 | 27 | def _render(variables: dict = {}, parse_result=False): 28 | try: 29 | return value_template.async_render(variables, parse_result) 30 | except TemplateError: 31 | _LOGGER.exception( 32 | "Error rendering template%s: %s with variables %s", 33 | f" in {context}" if context else "", 34 | value_template, 35 | variables, 36 | ) 37 | raise 38 | 39 | return _render 40 | 41 | 42 | def create_dict_renderer(hass, templates_dict): 43 | """Create template renderers for a dictionary with value_templates.""" 44 | if templates_dict is None: 45 | return lambda variables={}, parse_result=None: {} 46 | 47 | # Create a copy of the templates_dict to avoid modification of the original 48 | templates_dict = templates_dict.copy() 49 | for item in templates_dict: 50 | templates_dict[item] = create_renderer(hass, templates_dict[item]) 51 | 52 | def _render(variables: dict = {}, parse_result=False): 53 | return { 54 | item: templates_dict[item](variables, parse_result) for item in templates_dict 55 | } 56 | 57 | return _render 58 | -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- 1 | # The contents of this file is based on https://github.com/home-assistant/core/blob/dev/pyproject.toml 2 | 3 | target-version = "py313" 4 | 5 | select = [ 6 | "B007", # Loop control variable {name} not used within loop body 7 | "B014", # Exception handler with duplicate exception 8 | "C", # complexity 9 | "D", # docstrings 10 | "E", # pycodestyle 11 | "F", # pyflakes/autoflake 12 | "ICN001", # import concentions; {name} should be imported as {asname} 13 | "PGH004", # Use specific rule codes when using noqa 14 | "PLC0414", # Useless import alias. Import alias does not rename original package. 15 | "SIM105", # Use contextlib.suppress({exception}) instead of try-except-pass 16 | "SIM117", # Merge with-statements that use the same scope 17 | "SIM118", # Use {key} in {dict} instead of {key} in {dict}.keys() 18 | "SIM201", # Use {left} != {right} instead of not {left} == {right} 19 | "SIM212", # Use {a} if {a} else {b} instead of {b} if not {a} else {a} 20 | "SIM300", # Yoda conditions. Use 'age == 42' instead of '42 == age'. 21 | "SIM401", # Use get from dict with default instead of an if block 22 | "T20", # flake8-print 23 | "TRY004", # Prefer TypeError exception for invalid type 24 | "RUF006", # Store a reference to the return value of asyncio.create_task 25 | "UP", # pyupgrade 26 | "W", # pycodestyle 27 | ] 28 | 29 | ignore = [ 30 | "D202", # No blank lines allowed after function docstring 31 | "D203", # 1 blank line required before class docstring 32 | "D213", # Multi-line docstring summary should start at the second line 33 | "D404", # First word of the docstring should not be This 34 | "D406", # Section name should end with a newline 35 | "D407", # Section name underlining 36 | "D411", # Missing blank line before section 37 | "E501", # line too long 38 | "E731", # do not assign a lambda expression, use a def 39 | ] 40 | 41 | [flake8-pytest-style] 42 | fixture-parentheses = false 43 | 44 | [pyupgrade] 45 | keep-runtime-typing = true 46 | 47 | [mccabe] 48 | max-complexity = 25 49 | -------------------------------------------------------------------------------- /custom_components/multiscrape/button.py: -------------------------------------------------------------------------------- 1 | """Support for Multiscrape refresh button.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | 6 | from homeassistant.components.button import ButtonEntity 7 | from homeassistant.const import CONF_NAME 8 | from homeassistant.const import CONF_UNIQUE_ID 9 | from homeassistant.const import Platform 10 | from homeassistant.core import HomeAssistant 11 | from homeassistant.helpers.entity import async_generate_entity_id 12 | from homeassistant.helpers.entity import EntityCategory 13 | from homeassistant.helpers.entity_platform import AddEntitiesCallback 14 | from homeassistant.helpers.typing import ConfigType 15 | from homeassistant.helpers.typing import DiscoveryInfoType 16 | 17 | from . import async_get_config_and_coordinator 18 | 19 | ENTITY_ID_FORMAT = Platform.BUTTON + ".{}" 20 | _LOGGER = logging.getLogger(__name__) 21 | 22 | 23 | async def async_setup_platform( 24 | hass: HomeAssistant, 25 | config: ConfigType, 26 | async_add_entities: AddEntitiesCallback, 27 | discovery_info: DiscoveryInfoType | None = None, 28 | ) -> None: 29 | """Set up the multiscrape refresh button.""" 30 | 31 | conf, coordinator, scraper = await async_get_config_and_coordinator( 32 | hass, Platform.BUTTON, discovery_info 33 | ) 34 | name = conf.get(CONF_NAME) 35 | unique_id = conf.get(CONF_UNIQUE_ID) 36 | 37 | async_add_entities( 38 | [ 39 | MultiscrapeRefreshButton( 40 | hass, 41 | coordinator, 42 | unique_id, 43 | name, 44 | ) 45 | ] 46 | ) 47 | 48 | 49 | class MultiscrapeRefreshButton(ButtonEntity): 50 | """Multiscrape refresh button.""" 51 | 52 | def __init__(self, hass, coordinator, unique_id, name): 53 | """Initialize MultiscrapeRefreshButton.""" 54 | self._attr_icon = "mdi:refresh" 55 | self._attr_entity_category = EntityCategory.CONFIG 56 | self._attr_name = name 57 | self._coordinator = coordinator 58 | 59 | self.entity_id = async_generate_entity_id( 60 | ENTITY_ID_FORMAT, unique_id or name, hass=hass 61 | ) 62 | 63 | self._attr_unique_id = unique_id 64 | 65 | async def async_press(self) -> None: 66 | """Press the button.""" 67 | _LOGGER.info("Multiscrape triggered by button") 68 | await self._coordinator.async_request_refresh() 69 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: no-commit-to-branch 6 | args: ["--branch", "master"] 7 | - repo: https://github.com/astral-sh/ruff-pre-commit 8 | rev: v0.14.0 9 | hooks: 10 | - id: ruff 11 | args: 12 | - --fix 13 | # - repo: https://github.com/psf/black-pre-commit-mirror 14 | # rev: 23.7.0 15 | # hooks: 16 | # - id: black 17 | # args: 18 | # - --quiet 19 | # files: ^((homeassistant|pylint|script|tests)/.+)?[^/]+\.py$ 20 | - repo: https://github.com/codespell-project/codespell 21 | rev: v2.2.2 22 | hooks: 23 | - id: codespell 24 | args: 25 | - --ignore-words-list=additionals,alle,alot,bund,currenty,datas,farenheit,falsy,fo,haa,hass,iif,incomfort,ines,ist,nam,nd,pres,pullrequests,resset,rime,ser,serie,te,technik,ue,unsecure,withing,zar 26 | - --skip="./.*,*.csv,*.json,*.ambr" 27 | - --quiet-level=2 28 | exclude_types: [csv, json] 29 | # exclude: ^tests/fixtures/|homeassistant/generated/ 30 | # - repo: https://github.com/adrienverge/yamllint.git 31 | # rev: v1.32.0 32 | # hooks: 33 | # - id: yamllint 34 | - repo: https://github.com/pre-commit/mirrors-prettier 35 | rev: v2.7.1 36 | hooks: 37 | - id: prettier 38 | - repo: https://github.com/cdce8p/python-typing-update 39 | rev: v0.6.0 40 | hooks: 41 | # Run `python-typing-update` hook manually from time to time 42 | # to update python typing syntax. 43 | # Will require manual work, before submitting changes! 44 | # pre-commit run --hook-stage manual python-typing-update --all-files 45 | - id: python-typing-update 46 | stages: [manual] 47 | args: 48 | - --py311-plus 49 | - --force 50 | - --keep-updates 51 | files: ^(homeassistant|tests|script)/.+\.py$ 52 | - repo: local 53 | hooks: 54 | - id: pylint 55 | name: pylint 56 | entry: script/run-in-env.sh pylint -j 0 --ignore-missing-annotations=y 57 | language: script 58 | types: [python] 59 | files: ^homeassistant/.+\.py$ 60 | - repo: local 61 | hooks: 62 | - id: pytest-check 63 | name: pytest-check 64 | entry: pytest 65 | language: system 66 | pass_filenames: false 67 | always_run: true 68 | - repo: https://github.com/pycqa/isort 69 | rev: 5.13.2 70 | hooks: 71 | - id: isort 72 | name: isort (python) 73 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [master] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [master] 20 | schedule: 21 | - cron: "21 18 * * 3" 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: ["python"] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 37 | # Learn more: 38 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 39 | 40 | steps: 41 | - name: Checkout repository 42 | uses: actions/checkout@v5.0.1 43 | 44 | # Initializes the CodeQL tools for scanning. 45 | - name: Initialize CodeQL 46 | uses: github/codeql-action/init@v4 47 | with: 48 | languages: ${{ matrix.language }} 49 | # If you wish to specify custom queries, you can do so here or in a config file. 50 | # By default, queries listed here will override any specified in a config file. 51 | # Prefix the list here with "+" to use these queries and those in the config file. 52 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 53 | 54 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 55 | # If this step fails, then you should remove it and run the build manually (see below) 56 | - name: Autobuild 57 | uses: github/codeql-action/autobuild@v4 58 | 59 | # ℹ️ Command-line programs to run using the OS shell. 60 | # 📚 https://git.io/JvXDl 61 | 62 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 63 | # and modify them (or add more) to build your code if your project 64 | # uses a compiled language 65 | 66 | #- run: | 67 | # make bootstrap 68 | # make release 69 | 70 | - name: Perform CodeQL Analysis 71 | uses: github/codeql-action/analyze@v4 72 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution guidelines 2 | 3 | Contributing to this project should be as easy and transparent as possible, whether it's: 4 | 5 | - Reporting a bug 6 | - Discussing the current state of the code 7 | - Submitting a fix 8 | - Proposing new features 9 | 10 | ## Github is used for everything 11 | 12 | Github is used to host code, to track issues and feature requests, as well as accept pull requests. 13 | 14 | Pull requests are the best way to propose changes to the codebase. 15 | 16 | 1. Fork the repo and create your branch from `master`. 17 | 2. If you've changed something, update the documentation. 18 | 3. Make sure your code lints (using black). 19 | 4. Test you contribution. 20 | 5. Issue that pull request! 21 | 22 | ## Any contributions you make will be under the MIT Software License 23 | 24 | In short, when you submit code changes, your submissions are understood to be under the same [MIT License](http://choosealicense.com/licenses/mit/) that covers the project. Feel free to contact the maintainers if that's a concern. 25 | 26 | ## Report bugs using Github's [issues](../../issues) 27 | 28 | GitHub issues are used to track public bugs. 29 | Report a bug by [opening a new issue](../../issues/new/choose); it's that easy! 30 | 31 | ## Write bug reports with detail, background, and sample code 32 | 33 | **Great Bug Reports** tend to have: 34 | 35 | - A quick summary and/or background 36 | - Steps to reproduce 37 | - Be specific! 38 | - Give sample code if you can. 39 | - What you expected would happen 40 | - What actually happens 41 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 42 | 43 | People _love_ thorough bug reports. I'm not even kidding. 44 | 45 | ## Use a Consistent Coding Style 46 | 47 | Use [black](https://github.com/ambv/black) and [prettier](https://prettier.io/) 48 | to make sure the code follows the style. 49 | 50 | Or use the `pre-commit` settings implemented in this repository 51 | (see deicated section below). 52 | 53 | ## Test your code modification 54 | 55 | This custom component is based on [integration_blueprint template](https://github.com/custom-components/integration_blueprint). 56 | 57 | It comes with development environment in a container, easy to launch 58 | if you use Visual Studio Code. With this container you will have a stand alone 59 | Home Assistant instance running and already configured with the included 60 | [`.devcontainer/configuration.yaml`](./.devcontainer/configuration.yaml) 61 | file. 62 | 63 | You can use the `pre-commit` settings implemented in this repository to have 64 | linting tool checking your contributions (see deicated section below). 65 | 66 | ## Pre-commit 67 | 68 | You can use the [pre-commit](https://pre-commit.com/) settings included in the 69 | repostory to have code style and linting checks. 70 | 71 | With `pre-commit` tool already installed, 72 | activate the settings of the repository: 73 | 74 | ```console 75 | $ pre-commit install 76 | ``` 77 | 78 | Now the pre-commit tests will be done every time you commit. 79 | 80 | You can run the tests on all repository file with the command: 81 | 82 | ```console 83 | $ pre-commit run --all-files 84 | ``` 85 | 86 | ## License 87 | 88 | By contributing, you agree that your contributions will be licensed under its MIT License. 89 | -------------------------------------------------------------------------------- /custom_components/multiscrape/selector.py: -------------------------------------------------------------------------------- 1 | """Abstraction of the CSS selectors defined in the config.""" 2 | from collections import namedtuple 3 | 4 | from homeassistant.const import CONF_NAME, CONF_VALUE_TEMPLATE 5 | 6 | from .const import (CONF_ATTR, CONF_EXTRACT, CONF_ON_ERROR, 7 | CONF_ON_ERROR_DEFAULT, CONF_ON_ERROR_LOG, 8 | CONF_ON_ERROR_VALUE, CONF_SELECT, CONF_SELECT_LIST, 9 | DEFAULT_ON_ERROR_LOG, DEFAULT_ON_ERROR_VALUE) 10 | 11 | 12 | class Selector: 13 | """Implementation of a Selector handling the css selectors from the config.""" 14 | 15 | def __init__(self, hass, conf): 16 | """Initialize a Selector.""" 17 | self.name = conf.get(CONF_NAME) 18 | 19 | self.select_template = conf.get(CONF_SELECT) 20 | if self.select_template and self.select_template.hass is None: 21 | self.select_template.hass = hass 22 | 23 | self.select_list_template = conf.get(CONF_SELECT_LIST) 24 | if self.select_list_template and self.select_list_template.hass is None: 25 | self.select_list_template.hass = hass 26 | 27 | self.attribute = conf.get(CONF_ATTR) 28 | self.value_template = conf.get(CONF_VALUE_TEMPLATE) 29 | if self.value_template and self.value_template.hass is None: 30 | self.value_template.hass = hass 31 | 32 | self.extract = conf.get(CONF_EXTRACT) 33 | self.on_error = self.create_on_error(conf.get(CONF_ON_ERROR), hass) 34 | 35 | if ( 36 | not self.select_template 37 | and not self.select_list_template 38 | and not self.value_template 39 | ): 40 | raise ValueError( 41 | "Selector error: either select, select_list or a value_template should be provided." 42 | ) 43 | 44 | def create_on_error(self, conf, hass): 45 | """Determine from config what to do in case of scrape errors.""" 46 | On_Error = namedtuple( 47 | "On_Error", 48 | f"{CONF_ON_ERROR_LOG} {CONF_ON_ERROR_VALUE} {CONF_ON_ERROR_DEFAULT}", 49 | ) 50 | 51 | if not conf: 52 | return On_Error(DEFAULT_ON_ERROR_LOG, DEFAULT_ON_ERROR_VALUE, None) 53 | 54 | log = conf.get(CONF_ON_ERROR_LOG, DEFAULT_ON_ERROR_LOG) 55 | value = conf.get(CONF_ON_ERROR_VALUE, DEFAULT_ON_ERROR_VALUE) 56 | default_template = conf.get(CONF_ON_ERROR_DEFAULT) 57 | if default_template is not None: 58 | default_template.hass = hass 59 | 60 | return On_Error(log, value, default_template) 61 | 62 | @property 63 | def is_list(self): 64 | """Determine whether this selector is a list selector.""" 65 | return self.select_list_template is not None 66 | 67 | @property 68 | def element(self): 69 | """Render the select template and return the CSS selector for a single element.""" 70 | return self.select_template.async_render(parse_result=True) 71 | 72 | @property 73 | def list(self): 74 | """Render the select template and return the CSS selector for a list of elements.""" 75 | return self.select_list_template.async_render(parse_result=True) 76 | 77 | @property 78 | def just_value(self): 79 | """Determine if this selector define a static value and no select is required.""" 80 | return not self.select_list_template and not self.select_template 81 | 82 | @property 83 | def on_error_default(self): 84 | """Return the default on_error value as defined in the config.""" 85 | if self.on_error.default is None: 86 | return None 87 | return self.on_error.default.async_render(parse_result=True) 88 | -------------------------------------------------------------------------------- /tests/test_http.py: -------------------------------------------------------------------------------- 1 | """Tests for the http module.""" 2 | import pytest 3 | 4 | from custom_components.multiscrape.http import merge_url_with_params 5 | 6 | 7 | @pytest.mark.unit 8 | @pytest.mark.timeout(2) 9 | @pytest.mark.parametrize( 10 | "url,params,expected", 11 | [ 12 | # Basic parameter merging 13 | ( 14 | "https://example.com", 15 | {"param1": "value1", "param2": "value2"}, 16 | "https://example.com?param1=value1¶m2=value2", 17 | ), 18 | # Integer parameter value 19 | ( 20 | "https://example.com", 21 | {"param1": "value1", "param2": 2}, 22 | "https://example.com?param1=value1¶m2=2", 23 | ), 24 | # Merging with existing params 25 | ( 26 | "https://example.com?param1=value1", 27 | {"param2": "value2"}, 28 | "https://example.com?param1=value1¶m2=value2", 29 | ), 30 | # Empty params dict 31 | ( 32 | "https://example.com?param1=value1", 33 | {}, 34 | "https://example.com?param1=value1", 35 | ), 36 | # None params 37 | ( 38 | "https://example.com?param1=33", 39 | None, 40 | "https://example.com?param1=33", 41 | ), 42 | # Multiple existing params 43 | ( 44 | "https://example.com?param1=value1¶m2=value2", 45 | {"param3": "value3"}, 46 | "https://example.com?param1=value1¶m2=value2¶m3=value3", 47 | ), 48 | ], 49 | ) 50 | def test_merge_url_with_params(url, params, expected): 51 | """Test merge_url_with_params function with various inputs.""" 52 | result = merge_url_with_params(url, params) 53 | assert result == expected 54 | 55 | 56 | @pytest.mark.unit 57 | @pytest.mark.timeout(2) 58 | def test_merge_url_with_params_override_existing(): 59 | """Test merge_url_with_params overriding existing URL parameters.""" 60 | url = "https://example.com?param1=value1" 61 | params = {"param1": "new_value1", "param2": "value2"} 62 | result = merge_url_with_params(url, params) 63 | assert result == "https://example.com?param1=new_value1¶m2=value2" 64 | 65 | 66 | @pytest.mark.unit 67 | @pytest.mark.timeout(2) 68 | @pytest.mark.parametrize( 69 | "url,params,expected", 70 | [ 71 | # Single array parameter 72 | ( 73 | "https://example.com", 74 | {"param1": ["value1", "value2"]}, 75 | "https://example.com?param1=value1¶m1=value2", 76 | ), 77 | # Array parameter overriding existing 78 | ( 79 | "https://example.com?param1=value1", 80 | {"param1": ["value2", "value3"]}, 81 | "https://example.com?param1=value2¶m1=value3", 82 | ), 83 | ], 84 | ) 85 | def test_merge_url_with_params_array_values(url, params, expected): 86 | """Test merge_url_with_params with array values.""" 87 | result = merge_url_with_params(url, params) 88 | assert result == expected 89 | 90 | 91 | @pytest.mark.unit 92 | @pytest.mark.timeout(2) 93 | def test_merge_url_with_params_special_characters(): 94 | """Test merge_url_with_params with special characters in parameters.""" 95 | url = "https://example.com" 96 | params = {"param1": "value with spaces", "param2": "value&with&special&chars"} 97 | result = merge_url_with_params(url, params) 98 | assert result == "https://example.com?param1=value+with+spaces¶m2=value%26with%26special%26chars" 99 | 100 | 101 | @pytest.mark.unit 102 | @pytest.mark.timeout(2) 103 | @pytest.mark.parametrize( 104 | "url,params,expected", 105 | [ 106 | # URL with port 107 | ( 108 | "https://example.com:8080", 109 | {"param1": "value1"}, 110 | "https://example.com:8080?param1=value1", 111 | ), 112 | # URL with fragment 113 | ( 114 | "https://example.com#section1", 115 | {"param1": "value1"}, 116 | "https://example.com?param1=value1#section1", 117 | ), 118 | ], 119 | ) 120 | def test_merge_url_with_params_url_components(url, params, expected): 121 | """Test merge_url_with_params with various URL components.""" 122 | result = merge_url_with_params(url, params) 123 | assert result == expected 124 | -------------------------------------------------------------------------------- /tests/test_coordinator.py: -------------------------------------------------------------------------------- 1 | """Tests for the coordinator module.""" 2 | from datetime import timedelta 3 | from unittest.mock import AsyncMock, MagicMock 4 | 5 | import pytest 6 | from homeassistant.core import HomeAssistant 7 | 8 | from custom_components.multiscrape.coordinator import ( 9 | ContentRequestManager, MultiscrapeDataUpdateCoordinator) 10 | 11 | 12 | @pytest.mark.unit 13 | @pytest.mark.async_test 14 | @pytest.mark.timeout(5) 15 | async def test_content_request_manager_get_content_basic( 16 | content_request_manager, mock_http_wrapper 17 | ): 18 | """Test basic content retrieval without form submission.""" 19 | # Arrange 20 | mock_http_wrapper.async_request.return_value.text = "Test Content" 21 | 22 | # Act 23 | result = await content_request_manager.get_content() 24 | 25 | # Assert 26 | assert result == "Test Content" 27 | mock_http_wrapper.async_request.assert_called_once() 28 | 29 | 30 | @pytest.mark.unit 31 | @pytest.mark.async_test 32 | @pytest.mark.timeout(5) 33 | async def test_content_request_manager_with_form_submission( 34 | mock_http_wrapper, mock_resource_renderer, mock_http_response 35 | ): 36 | """Test content retrieval with form submission.""" 37 | # Arrange 38 | mock_form = AsyncMock() 39 | mock_form.should_submit = True 40 | mock_form.async_submit = AsyncMock( 41 | return_value=("Form Response", {"cookie": "value"}) 42 | ) 43 | mock_form.scrape_variables = MagicMock(return_value={"var": "value"}) 44 | 45 | manager = ContentRequestManager( 46 | config_name="test", 47 | http=mock_http_wrapper, 48 | resource_renderer=mock_resource_renderer, 49 | form=mock_form, 50 | ) 51 | 52 | # Act 53 | result = await manager.get_content() 54 | 55 | # Assert 56 | assert result == "Form Response" 57 | mock_form.async_submit.assert_called_once() 58 | # HTTP request should NOT be called since form submission returned content 59 | mock_http_wrapper.async_request.assert_not_called() 60 | 61 | 62 | @pytest.mark.unit 63 | @pytest.mark.async_test 64 | @pytest.mark.timeout(5) 65 | async def test_content_request_manager_form_submission_no_result( 66 | mock_http_wrapper, mock_resource_renderer, mock_http_response 67 | ): 68 | """Test content retrieval when form submission returns None.""" 69 | # Arrange 70 | mock_form = AsyncMock() 71 | mock_form.should_submit = True 72 | mock_form.async_submit = AsyncMock(return_value=(None, {"cookie": "value"})) 73 | mock_form.scrape_variables = MagicMock(return_value={"var": "value"}) 74 | 75 | manager = ContentRequestManager( 76 | config_name="test", 77 | http=mock_http_wrapper, 78 | resource_renderer=mock_resource_renderer, 79 | form=mock_form, 80 | ) 81 | 82 | mock_http_wrapper.async_request.return_value = mock_http_response( 83 | text="Page Content" 84 | ) 85 | 86 | # Act 87 | result = await manager.get_content() 88 | 89 | # Assert 90 | assert result == "Page Content" 91 | mock_http_wrapper.async_request.assert_called_once() 92 | 93 | 94 | @pytest.mark.integration 95 | @pytest.mark.async_test 96 | @pytest.mark.timeout(10) 97 | async def test_coordinator_successful_update(coordinator, mock_http_wrapper, scraper): 98 | """Test successful data update through coordinator.""" 99 | # Arrange 100 | mock_http_wrapper.async_request.return_value.text = "Updated Content" 101 | 102 | # Act 103 | await coordinator.async_refresh() 104 | 105 | # Assert 106 | assert coordinator.last_update_success 107 | assert not coordinator.update_error 108 | # Verify the scraper received the content 109 | assert scraper._data == "Updated Content" 110 | 111 | 112 | @pytest.mark.integration 113 | @pytest.mark.async_test 114 | @pytest.mark.timeout(10) 115 | async def test_coordinator_update_failure(coordinator, mock_http_wrapper): 116 | """Test coordinator behavior on update failure.""" 117 | # Arrange 118 | mock_http_wrapper.async_request.side_effect = Exception("Network error") 119 | 120 | # Act 121 | await coordinator.async_refresh() 122 | 123 | # Assert 124 | assert coordinator.update_error 125 | # The coordinator should handle the exception gracefully 126 | 127 | 128 | @pytest.mark.unit 129 | @pytest.mark.async_test 130 | @pytest.mark.timeout(5) 131 | async def test_coordinator_notify_scrape_exception( 132 | coordinator, content_request_manager, mock_form_submitter 133 | ): 134 | """Test that scrape exceptions are properly notified.""" 135 | # Act 136 | coordinator.notify_scrape_exception() 137 | 138 | # Assert 139 | mock_form_submitter.notify_scrape_exception.assert_called_once() 140 | 141 | 142 | @pytest.mark.integration 143 | @pytest.mark.async_test 144 | @pytest.mark.timeout(10) 145 | async def test_coordinator_with_zero_scan_interval( 146 | hass: HomeAssistant, 147 | content_request_manager, 148 | mock_file_manager, 149 | scraper, 150 | mock_http_wrapper, 151 | ): 152 | """Test coordinator with scan_interval set to zero (manual updates only). 153 | 154 | When scan_interval is 0, the coordinator should: 155 | 1. Set _update_interval to None (disables automatic updates) 156 | 2. Only update when manually triggered via async_request_refresh() 157 | """ 158 | # Arrange 159 | coordinator = MultiscrapeDataUpdateCoordinator( 160 | config_name="test_coordinator", 161 | hass=hass, 162 | request_manager=content_request_manager, 163 | file_manager=mock_file_manager, 164 | scraper=scraper, 165 | update_interval=timedelta(seconds=0), 166 | ) 167 | 168 | # Assert - interval is disabled 169 | assert coordinator._update_interval is None 170 | 171 | # Verify manual update still works 172 | mock_http_wrapper.async_request.return_value.text = "Manual Update" 173 | await coordinator.async_request_refresh() 174 | await hass.async_block_till_done() 175 | 176 | assert scraper._data == "Manual Update" 177 | assert coordinator.last_update_success 178 | -------------------------------------------------------------------------------- /custom_components/multiscrape/__init__.py: -------------------------------------------------------------------------------- 1 | """The multiscrape component.""" 2 | import asyncio 3 | import contextlib 4 | import logging 5 | 6 | import voluptuous as vol 7 | from homeassistant.config_entries import ConfigEntry 8 | from homeassistant.const import (CONF_NAME, CONF_RESOURCE, 9 | CONF_RESOURCE_TEMPLATE, SERVICE_RELOAD, 10 | Platform) 11 | from homeassistant.core import HomeAssistant 12 | from homeassistant.exceptions import HomeAssistantError 13 | from homeassistant.helpers import discovery 14 | from homeassistant.helpers.reload import (async_integration_yaml_config, 15 | async_reload_integration_platforms) 16 | 17 | from .const import (CONF_FORM_SUBMIT, CONF_LOG_RESPONSE, CONF_PARSER, 18 | COORDINATOR, DOMAIN, PLATFORM_IDX, SCRAPER, SCRAPER_DATA, 19 | SCRAPER_IDX) 20 | from .coordinator import (create_content_request_manager, 21 | create_multiscrape_coordinator) 22 | from .file import create_file_manager 23 | from .form import create_form_submitter 24 | from .http import create_http_wrapper 25 | from .schema import COMBINED_SCHEMA, CONFIG_SCHEMA # noqa: F401 26 | from .scraper import create_scraper 27 | from .service import setup_config_services, setup_integration_services 28 | 29 | _LOGGER = logging.getLogger(__name__) 30 | PLATFORMS = [Platform.SENSOR, Platform.BINARY_SENSOR, Platform.BUTTON] 31 | 32 | 33 | async def async_setup(hass: HomeAssistant, entry: ConfigEntry): 34 | """Set up the multiscrape platforms.""" 35 | _LOGGER.debug("# Start loading multiscrape") 36 | _async_setup_shared_data(hass) 37 | 38 | async def reload_service_handler(service): 39 | """Remove all user-defined groups and load new ones from config.""" 40 | conf = None 41 | with contextlib.suppress(HomeAssistantError): 42 | conf = await async_integration_yaml_config(hass, DOMAIN) 43 | if conf is None: 44 | return 45 | await async_reload_integration_platforms(hass, DOMAIN, PLATFORMS) 46 | _async_setup_shared_data(hass) 47 | await _async_process_config(hass, conf) 48 | 49 | hass.services.async_register( 50 | DOMAIN, SERVICE_RELOAD, reload_service_handler, schema=vol.Schema({}) 51 | ) 52 | _LOGGER.debug("# Reload service registered") 53 | 54 | await setup_integration_services(hass) 55 | 56 | if len(entry[DOMAIN]) == 1: 57 | if not entry[DOMAIN][0].get(CONF_RESOURCE) and not entry[DOMAIN][0].get( 58 | CONF_RESOURCE_TEMPLATE 59 | ): 60 | _LOGGER.info( 61 | "Did not find any configuration. Assuming we want just the integration level services." 62 | ) 63 | return True 64 | 65 | return await _async_process_config(hass, entry) 66 | 67 | 68 | def _async_setup_shared_data(hass: HomeAssistant): 69 | """Create shared data for platform config and scraper coordinators.""" 70 | hass.data[DOMAIN] = {key: [] for key in [SCRAPER_DATA, *PLATFORMS]} 71 | 72 | 73 | async def _async_process_config(hass: HomeAssistant, config) -> bool: 74 | """Process scraper configuration.""" 75 | 76 | _LOGGER.debug("# Start processing config from configuration.yaml") 77 | 78 | refresh_tasks = [] 79 | load_tasks = [] 80 | 81 | for scraper_idx, conf in enumerate(config[DOMAIN]): 82 | config_name = conf.get(CONF_NAME) 83 | if config_name is None: 84 | config_name = f"Scraper_noname_{scraper_idx}" 85 | _LOGGER.debug( 86 | "# Found no name for scraper, generated a unique name: %s", config_name 87 | ) 88 | 89 | _LOGGER.debug( 90 | "%s # Setting up multiscrape with config:\n %s", config_name, conf 91 | ) 92 | 93 | file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE)) 94 | form_submit_config = conf.get(CONF_FORM_SUBMIT) 95 | form_submitter = None 96 | if form_submit_config: 97 | parser = conf.get(CONF_PARSER) 98 | form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) 99 | form_submitter = create_form_submitter( 100 | config_name, 101 | form_submit_config, 102 | hass, 103 | form_http, 104 | file_manager, 105 | parser, 106 | ) 107 | 108 | http = create_http_wrapper(config_name, conf, hass, file_manager) 109 | scraper = create_scraper(config_name, conf, hass, file_manager) 110 | request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter) 111 | coordinator = create_multiscrape_coordinator( 112 | config_name, 113 | conf, 114 | hass, 115 | request_manager, 116 | file_manager, 117 | scraper, 118 | ) 119 | await coordinator.async_register_shutdown() 120 | 121 | hass.data[DOMAIN][SCRAPER_DATA].append( 122 | {SCRAPER: scraper, COORDINATOR: coordinator} 123 | ) 124 | 125 | await setup_config_services(hass, coordinator, config_name) 126 | 127 | for platform_domain in PLATFORMS: 128 | if platform_domain not in conf: 129 | continue 130 | 131 | for platform_conf in conf[platform_domain]: 132 | hass.data[DOMAIN][platform_domain].append(platform_conf) 133 | platform_idx = len(hass.data[DOMAIN][platform_domain]) - 1 134 | 135 | load = discovery.async_load_platform( 136 | hass, 137 | platform_domain, 138 | DOMAIN, 139 | {SCRAPER_IDX: scraper_idx, PLATFORM_IDX: platform_idx}, 140 | config, 141 | ) 142 | load_tasks.append(load) 143 | 144 | if refresh_tasks: 145 | await asyncio.gather(*refresh_tasks) 146 | 147 | if load_tasks: 148 | await asyncio.gather(*load_tasks) 149 | 150 | 151 | return True 152 | 153 | 154 | async def async_get_config_and_coordinator(hass, platform_domain, discovery_info): 155 | """Get the config and coordinator for the platform from discovery.""" 156 | shared_data = hass.data[DOMAIN][SCRAPER_DATA][discovery_info[SCRAPER_IDX]] 157 | conf = hass.data[DOMAIN][platform_domain][discovery_info[PLATFORM_IDX]] 158 | coordinator = shared_data[COORDINATOR] 159 | scraper = shared_data[SCRAPER] 160 | return conf, coordinator, scraper 161 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Global fixtures for Multiscrape integration.""" 2 | 3 | # Fixtures allow you to replace functions with a Mock object. You can perform 4 | # many options via the Mock to reflect a particular behavior from the original 5 | # function that you want to see without going through the function's actual logic. 6 | # Fixtures can either be passed into tests as parameters, or if autouse=True, they 7 | # will automatically be used across all tests. 8 | # 9 | # Fixtures that are defined in conftest.py are available across all tests. You can also 10 | # define fixtures within a particular test file to scope them locally. 11 | # 12 | # pytest_homeassistant_custom_component provides some fixtures that are provided by 13 | # Home Assistant core. You can find those fixture definitions here: 14 | # https://github.com/MatthewFlamm/pytest-homeassistant-custom-component/blob/master/pytest_homeassistant_custom_component/common.py 15 | # 16 | # See here for more info: https://docs.pytest.org/en/latest/fixture.html (note that 17 | # pytest includes fixtures OOB which you can use as defined on this page) 18 | from unittest.mock import AsyncMock, MagicMock, patch 19 | 20 | import pytest 21 | import respx 22 | from homeassistant.core import HomeAssistant 23 | 24 | from custom_components.multiscrape.const import DEFAULT_SEPARATOR 25 | from custom_components.multiscrape.coordinator import ( 26 | ContentRequestManager, MultiscrapeDataUpdateCoordinator) 27 | from custom_components.multiscrape.scraper import Scraper 28 | 29 | # from custom_components.multiscrape.const import (CONF_CONFIG_NAME, 30 | # CONF_METER_TYPE, DOMAIN) 31 | 32 | pytest_plugins = "pytest_homeassistant_custom_component" 33 | 34 | 35 | # This fixture enables loading custom integrations in all tests. 36 | # Remove to enable selective use of this fixture 37 | @pytest.fixture(autouse=True) 38 | def auto_enable_custom_integrations(enable_custom_integrations): 39 | """Enable custom integrations in all tests.""" 40 | yield 41 | 42 | 43 | # This fixture is used to prevent HomeAssistant from attempting to create and dismiss persistent 44 | # notifications. These calls would fail without this fixture since the persistent_notification 45 | # integration is never loaded during a test. 46 | @pytest.fixture(name="skip_notifications", autouse=True) 47 | def skip_notifications_fixture(): 48 | """Skip notification calls.""" 49 | with patch("homeassistant.components.persistent_notification.async_create"), patch( 50 | "homeassistant.components.persistent_notification.async_dismiss" 51 | ): 52 | yield 53 | 54 | 55 | # ============================================================================ 56 | # Phase 1: Proper Async Fixtures 57 | # ============================================================================ 58 | 59 | 60 | @pytest.fixture 61 | def mock_http_response(): 62 | """Create a mock HTTP response object.""" 63 | class MockResponse: 64 | def __init__(self, text="", status_code=200, headers=None, cookies=None): 65 | self.text = text 66 | self.status_code = status_code 67 | self.headers = headers or {} 68 | self.cookies = cookies or {} 69 | 70 | def raise_for_status(self): 71 | if 400 <= self.status_code <= 599: 72 | raise Exception(f"HTTP {self.status_code}") 73 | 74 | return MockResponse 75 | 76 | 77 | @pytest.fixture 78 | async def http_wrapper(hass): 79 | """Create a real HttpWrapper instance for testing (requires respx mocking).""" 80 | from homeassistant.helpers.httpx_client import get_async_client 81 | 82 | from custom_components.multiscrape.http import HttpWrapper 83 | from custom_components.multiscrape.util import (create_dict_renderer, 84 | create_renderer) 85 | 86 | client = get_async_client(hass, verify_ssl=True) 87 | wrapper = HttpWrapper( 88 | config_name="test_wrapper", 89 | hass=hass, 90 | client=client, 91 | file_manager=None, 92 | timeout=10, 93 | params_renderer=create_dict_renderer(hass, None), 94 | headers_renderer=create_dict_renderer(hass, None), 95 | data_renderer=create_renderer(hass, None), 96 | ) 97 | return wrapper 98 | 99 | 100 | @pytest.fixture 101 | def mock_http_wrapper(mock_http_response): 102 | """Create a mock HttpWrapper with proper async behavior.""" 103 | mock = AsyncMock() 104 | mock.async_request = AsyncMock(return_value=mock_http_response( 105 | text='
Test Content
' 106 | )) 107 | return mock 108 | 109 | 110 | @pytest.fixture 111 | async def scraper(hass: HomeAssistant): 112 | """Create a Scraper instance for testing.""" 113 | return Scraper( 114 | config_name="test_scraper", 115 | hass=hass, 116 | file_manager=None, 117 | parser="lxml", 118 | separator=DEFAULT_SEPARATOR, 119 | ) 120 | 121 | 122 | @pytest.fixture 123 | def mock_file_manager(): 124 | """Create a mock file manager.""" 125 | mock = MagicMock() 126 | mock.write = MagicMock() 127 | mock.empty_folder = MagicMock() 128 | return mock 129 | 130 | 131 | @pytest.fixture 132 | def mock_form_submitter(): 133 | """Create a mock form submitter.""" 134 | mock = AsyncMock() 135 | mock.should_submit = False 136 | mock.async_submit = AsyncMock(return_value=(None, None)) 137 | mock.scrape_variables = MagicMock(return_value={}) 138 | mock.notify_scrape_exception = MagicMock() 139 | return mock 140 | 141 | 142 | @pytest.fixture 143 | def mock_resource_renderer(): 144 | """Create a mock resource renderer.""" 145 | return lambda: "https://example.com" 146 | 147 | 148 | @pytest.fixture 149 | def content_request_manager( 150 | mock_http_wrapper, mock_resource_renderer, mock_form_submitter 151 | ): 152 | """Create a ContentRequestManager for testing.""" 153 | return ContentRequestManager( 154 | config_name="test_request_manager", 155 | http=mock_http_wrapper, 156 | resource_renderer=mock_resource_renderer, 157 | form=mock_form_submitter, 158 | ) 159 | 160 | 161 | @pytest.fixture 162 | async def coordinator( 163 | hass: HomeAssistant, 164 | content_request_manager, 165 | mock_file_manager, 166 | scraper, 167 | ): 168 | """Create a MultiscrapeDataUpdateCoordinator for testing.""" 169 | from datetime import timedelta 170 | 171 | coordinator = MultiscrapeDataUpdateCoordinator( 172 | config_name="test_coordinator", 173 | hass=hass, 174 | request_manager=content_request_manager, 175 | file_manager=mock_file_manager, 176 | scraper=scraper, 177 | update_interval=timedelta(seconds=60), 178 | ) 179 | return coordinator 180 | 181 | 182 | @pytest.fixture 183 | def respx_mock(): 184 | """Provide a respx mock for HTTP testing.""" 185 | with respx.mock: 186 | yield respx 187 | 188 | -------------------------------------------------------------------------------- /custom_components/multiscrape/scraper.py: -------------------------------------------------------------------------------- 1 | """Support for multiscrape requests.""" 2 | import logging 3 | 4 | from bs4 import BeautifulSoup 5 | 6 | from .const import CONF_PARSER, CONF_SEPARATOR 7 | 8 | DEFAULT_TIMEOUT = 10 9 | _LOGGER = logging.getLogger(__name__) 10 | 11 | 12 | def create_scraper(config_name, config, hass, file_manager): 13 | """Create a scraper instance.""" 14 | _LOGGER.debug("%s # Creating scraper", config_name) 15 | parser = config.get(CONF_PARSER) 16 | separator = config.get(CONF_SEPARATOR) 17 | 18 | return Scraper( 19 | config_name, 20 | hass, 21 | file_manager, 22 | parser, 23 | separator, 24 | ) 25 | 26 | 27 | class Scraper: 28 | """Class for handling the retrieval and scraping of data.""" 29 | 30 | def __init__( 31 | self, 32 | config_name, 33 | hass, 34 | file_manager, 35 | parser, 36 | separator, 37 | ): 38 | """Initialize the data object.""" 39 | _LOGGER.debug("%s # Initializing scraper", config_name) 40 | 41 | self._hass = hass 42 | self._file_manager = file_manager 43 | self._config_name = config_name 44 | self._parser = parser 45 | self._soup: BeautifulSoup = None 46 | self._data = None 47 | self._separator = separator 48 | self.reset() 49 | 50 | @property 51 | def name(self): 52 | """Property for config name.""" 53 | return self._config_name 54 | 55 | def reset(self): 56 | """Reset the scraper object.""" 57 | self._data = None 58 | self._soup = None 59 | 60 | @property 61 | def formatted_content(self): 62 | """Property for getting the content. HTML will be prettified.""" 63 | if self._soup: 64 | return self._soup.prettify() 65 | return self._data 66 | 67 | async def set_content(self, content): 68 | """Set the content to be scraped.""" 69 | self._data = content 70 | 71 | # Try to detect JSON more robustly 72 | content_stripped = content.lstrip() if content else "" 73 | if content_stripped and content_stripped[0] in ["{", "["]: 74 | _LOGGER.debug( 75 | "%s # Response seems to be json. Skip parsing with BeautifulSoup.", 76 | self._config_name, 77 | ) 78 | else: 79 | try: 80 | _LOGGER.debug( 81 | "%s # Loading the content in BeautifulSoup.", 82 | self._config_name, 83 | ) 84 | self._soup = await self._hass.async_add_executor_job( 85 | BeautifulSoup, self._data, self._parser 86 | ) 87 | 88 | if self._file_manager: 89 | await self._async_file_log("page_soup", self._soup.prettify()) 90 | 91 | except Exception as ex: 92 | self.reset() 93 | _LOGGER.error( 94 | "%s # Unable to parse response with BeautifulSoup: %s", 95 | self._config_name, 96 | ex, 97 | ) 98 | raise 99 | 100 | def scrape(self, selector, sensor, attribute=None, variables: dict = {}): 101 | """Scrape based on given selector the data.""" 102 | # This is required as this function is called separately for sensors and attributes 103 | log_prefix = f"{self._config_name} # {sensor}" 104 | if attribute: 105 | log_prefix = log_prefix + f"# {attribute}" 106 | 107 | if selector.just_value: 108 | _LOGGER.debug("%s # Applying value_template only.", log_prefix) 109 | result = selector.value_template.async_render_with_possible_json_value( 110 | self._data, None, variables=variables 111 | ) 112 | return selector.value_template._parse_result(result) 113 | 114 | # Check if content is JSON 115 | content_stripped = self._data.lstrip() if self._data else "" 116 | if content_stripped and content_stripped[0] in ["{", "["]: 117 | raise ValueError( 118 | "JSON cannot be scraped. Please provide a value template to parse JSON response." 119 | ) 120 | 121 | if selector.is_list: 122 | tags = self._soup.select(selector.list) 123 | _LOGGER.debug("%s # List selector selected tags: %s", 124 | log_prefix, tags) 125 | if selector.attribute is not None: 126 | _LOGGER.debug( 127 | "%s # Try to find attributes: %s", 128 | log_prefix, 129 | selector.attribute, 130 | ) 131 | values = [tag[selector.attribute] for tag in tags] 132 | else: 133 | values = [self.extract_tag_value(tag, selector) for tag in tags] 134 | value = self._separator.join(values) 135 | _LOGGER.debug("%s # List selector csv: %s", log_prefix, value) 136 | 137 | else: 138 | tag = self._soup.select_one(selector.element) 139 | _LOGGER.debug("%s # Tag selected: %s", log_prefix, tag) 140 | if tag is None: 141 | raise ValueError("Could not find a tag for given selector") 142 | 143 | if selector.attribute is not None: 144 | _LOGGER.debug( 145 | "%s # Try to find attribute: %s", log_prefix, selector.attribute 146 | ) 147 | value = tag[selector.attribute] 148 | else: 149 | value = self.extract_tag_value(tag, selector) 150 | _LOGGER.debug("%s # Selector result: %s", log_prefix, value) 151 | 152 | if value is not None and selector.value_template is not None: 153 | _LOGGER.debug( 154 | "%s # Applying value_template on selector result", log_prefix) 155 | variables["value"] = value 156 | value = selector.value_template.async_render(variables=variables, parse_result=True 157 | ) 158 | 159 | _LOGGER.debug( 160 | "%s # Final selector value: %s of type %s", log_prefix, value, type( 161 | value) 162 | ) 163 | return value 164 | 165 | def extract_tag_value(self, tag, selector): 166 | """Extract value from a tag.""" 167 | if tag.name in ("style", "script", "template"): 168 | return tag.string 169 | else: 170 | if selector.extract == "text": 171 | return tag.text 172 | elif selector.extract == "content": 173 | return ''.join(map(str, tag.contents)) 174 | elif selector.extract == "tag": 175 | return str(tag) 176 | 177 | async def _async_file_log(self, content_name, content): 178 | try: 179 | filename = f"{content_name}.txt" 180 | await self._hass.async_add_executor_job( 181 | self._file_manager.write, filename, content 182 | ) 183 | except Exception as ex: 184 | _LOGGER.error( 185 | "%s # Unable to write %s to file: %s. \nException: %s", 186 | self._config_name, 187 | content_name, 188 | filename, 189 | ex, 190 | ) 191 | _LOGGER.debug( 192 | "%s # %s written to file: %s", 193 | self._config_name, 194 | content_name, 195 | filename, 196 | ) 197 | -------------------------------------------------------------------------------- /custom_components/multiscrape/sensor.py: -------------------------------------------------------------------------------- 1 | """Support for Multiscrape sensors.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | 6 | from homeassistant.components.sensor import SensorDeviceClass, SensorEntity 7 | from homeassistant.components.sensor.helpers import async_parse_date_datetime 8 | from homeassistant.const import (CONF_DEVICE_CLASS, CONF_FORCE_UPDATE, 9 | CONF_ICON, CONF_NAME, CONF_UNIQUE_ID, 10 | CONF_UNIT_OF_MEASUREMENT, Platform) 11 | from homeassistant.core import HomeAssistant 12 | from homeassistant.exceptions import PlatformNotReady 13 | from homeassistant.helpers.entity import async_generate_entity_id 14 | from homeassistant.helpers.entity_platform import AddEntitiesCallback 15 | from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType 16 | from homeassistant.util import slugify 17 | 18 | from . import async_get_config_and_coordinator 19 | from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, 20 | CONF_ON_ERROR_VALUE_NONE, CONF_PICTURE, CONF_SENSOR_ATTRS, 21 | CONF_STATE_CLASS, LOG_LEVELS) 22 | from .entity import MultiscrapeEntity 23 | from .selector import Selector 24 | 25 | _LOGGER = logging.getLogger(__name__) 26 | ENTITY_ID_FORMAT = Platform.SENSOR + ".{}" 27 | 28 | 29 | async def async_setup_platform( 30 | hass: HomeAssistant, 31 | config: ConfigType, 32 | async_add_entities: AddEntitiesCallback, 33 | discovery_info: DiscoveryInfoType | None = None, 34 | ) -> None: 35 | """Set up the multiscrape sensor.""" 36 | # Must update the sensor now (including fetching the scraper resource) to 37 | # ensure it's updating its state. 38 | if discovery_info is not None: 39 | conf, coordinator, scraper = await async_get_config_and_coordinator( 40 | hass, Platform.SENSOR, discovery_info 41 | ) 42 | else: 43 | _LOGGER.info("?? # Could not find sensor configuration") 44 | 45 | if not coordinator.last_update_success: 46 | raise PlatformNotReady 47 | 48 | sensor_name = conf.get(CONF_NAME) 49 | _LOGGER.debug("%s # %s # Setting up sensor", scraper.name, sensor_name) 50 | unique_id = conf.get(CONF_UNIQUE_ID) 51 | unit = conf.get(CONF_UNIT_OF_MEASUREMENT) 52 | device_class = conf.get(CONF_DEVICE_CLASS) 53 | state_class = conf.get(CONF_STATE_CLASS) 54 | force_update = conf.get(CONF_FORCE_UPDATE) 55 | icon_template = conf.get(CONF_ICON) 56 | picture = conf.get(CONF_PICTURE) 57 | 58 | sensor_selector = Selector(hass, conf) 59 | attribute_selectors = {} 60 | for attr_conf in conf.get(CONF_SENSOR_ATTRS) or []: 61 | attr_name = slugify(attr_conf[CONF_NAME]) 62 | attribute_selectors[attr_name] = Selector(hass, attr_conf) 63 | 64 | async_add_entities( 65 | [ 66 | MultiscrapeSensor( 67 | hass, 68 | coordinator, 69 | scraper, 70 | unique_id, 71 | sensor_name, 72 | unit, 73 | device_class, 74 | state_class, 75 | force_update, 76 | icon_template, 77 | picture, 78 | sensor_selector, 79 | attribute_selectors, 80 | ) 81 | ], 82 | ) 83 | 84 | 85 | class MultiscrapeSensor(MultiscrapeEntity, SensorEntity): 86 | """Implementation of a multiscrape sensor.""" 87 | 88 | def __init__( 89 | self, 90 | hass, 91 | coordinator, 92 | scraper, 93 | unique_id, 94 | name, 95 | unit_of_measurement, 96 | device_class, 97 | state_class, 98 | force_update, 99 | icon_template, 100 | picture, 101 | sensor_selector, 102 | attribute_selectors, 103 | ): 104 | """Initialize the multiscrape sensor.""" 105 | super().__init__( 106 | hass, 107 | coordinator, 108 | scraper, 109 | name, 110 | device_class, 111 | force_update, 112 | icon_template, 113 | picture, 114 | attribute_selectors, 115 | ) 116 | 117 | self.entity_id = async_generate_entity_id( 118 | ENTITY_ID_FORMAT, unique_id or name, hass=hass 119 | ) 120 | self._attr_unique_id = unique_id 121 | self._attr_state_class = state_class 122 | self._attr_native_unit_of_measurement = unit_of_measurement 123 | 124 | self._sensor_selector = sensor_selector 125 | 126 | def _update_sensor(self): 127 | """Update state from the scraper data.""" 128 | _LOGGER.debug( 129 | "%s # %s # Start scraping to update sensor", self.scraper.name, self._name 130 | ) 131 | self._attr_available = True 132 | 133 | try: 134 | if self.coordinator.update_error is True: 135 | raise ValueError( 136 | "Skipped scraping because data couldn't be updated") 137 | 138 | value = self.scraper.scrape( 139 | self._sensor_selector, self._name, variables=self.coordinator.form_variables) 140 | _LOGGER.debug( 141 | "%s # %s # Selected: %s", self.scraper.name, self._name, value 142 | ) 143 | 144 | if self.device_class not in { 145 | SensorDeviceClass.DATE, 146 | SensorDeviceClass.TIMESTAMP, 147 | }: 148 | self._attr_native_value = value 149 | 150 | else: 151 | self._attr_native_value = async_parse_date_datetime( 152 | value, self.entity_id, self.device_class 153 | ) 154 | except Exception as exception: 155 | self.coordinator.notify_scrape_exception() 156 | 157 | if self._sensor_selector.on_error.log not in [False, "false", "False"]: 158 | level = LOG_LEVELS[self._sensor_selector.on_error.log] 159 | _LOGGER.log( 160 | level, 161 | "%s # %s # Unable to scrape data: %s \nConsider using debug logging and log_response for further investigation.", 162 | self.scraper.name, 163 | self._name, 164 | exception, 165 | ) 166 | 167 | if self._sensor_selector.on_error.value == CONF_ON_ERROR_VALUE_NONE: 168 | self._attr_available = False 169 | _LOGGER.debug( 170 | "%s # %s # On-error, set value to None", 171 | self.scraper.name, 172 | self._name, 173 | ) 174 | elif self._sensor_selector.on_error.value == CONF_ON_ERROR_VALUE_LAST: 175 | _LOGGER.debug( 176 | "%s # %s # On-error, keep old value: %s", 177 | self.scraper.name, 178 | self._name, 179 | self._attr_native_value, 180 | ) 181 | if self._attr_native_value is None: 182 | self._attr_available = False 183 | return 184 | elif self._sensor_selector.on_error.value == CONF_ON_ERROR_VALUE_DEFAULT: 185 | self._attr_native_value = self._sensor_selector.on_error_default 186 | _LOGGER.debug( 187 | "%s # %s # On-error, set default value: %s", 188 | self.scraper.name, 189 | self._name, 190 | self._sensor_selector.on_error_default, 191 | ) 192 | # determine icon after exception so it's also set for on_error cases 193 | if self._icon_template: 194 | self._set_icon(self._attr_native_value) 195 | -------------------------------------------------------------------------------- /tests/test_selector.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the Selector class.""" 2 | 3 | import pytest 4 | from homeassistant.const import CONF_NAME, CONF_VALUE_TEMPLATE 5 | from homeassistant.core import HomeAssistant 6 | from homeassistant.helpers.template import Template 7 | 8 | from custom_components.multiscrape.const import (CONF_ATTR, CONF_EXTRACT, 9 | CONF_ON_ERROR, 10 | CONF_ON_ERROR_DEFAULT, 11 | CONF_ON_ERROR_LOG, 12 | CONF_ON_ERROR_VALUE, 13 | CONF_SELECT, CONF_SELECT_LIST, 14 | DEFAULT_ON_ERROR_LOG, 15 | DEFAULT_ON_ERROR_VALUE) 16 | from custom_components.multiscrape.selector import Selector 17 | 18 | 19 | @pytest.mark.unit 20 | @pytest.mark.timeout(2) 21 | def test_selector_with_select_only(hass: HomeAssistant): 22 | """Test selector with only 'select' configuration.""" 23 | # Arrange 24 | conf = {CONF_SELECT: Template(".test", hass)} 25 | 26 | # Act 27 | selector = Selector(hass, conf) 28 | 29 | # Assert 30 | assert selector.element == ".test" 31 | assert not selector.is_list 32 | assert not selector.just_value 33 | 34 | 35 | @pytest.mark.unit 36 | @pytest.mark.timeout(2) 37 | def test_selector_with_select_list_only(hass: HomeAssistant): 38 | """Test selector with only 'select_list' configuration.""" 39 | # Arrange 40 | conf = {CONF_SELECT_LIST: Template(".item", hass)} 41 | 42 | # Act 43 | selector = Selector(hass, conf) 44 | 45 | # Assert 46 | assert selector.list == ".item" 47 | assert selector.is_list 48 | assert not selector.just_value 49 | 50 | 51 | @pytest.mark.unit 52 | @pytest.mark.timeout(2) 53 | def test_selector_with_value_template_only(hass: HomeAssistant): 54 | """Test selector with only 'value_template' (no select).""" 55 | # Arrange 56 | conf = {CONF_VALUE_TEMPLATE: Template("{{ 'test' }}", hass)} 57 | 58 | # Act 59 | selector = Selector(hass, conf) 60 | 61 | # Assert 62 | assert selector.just_value 63 | assert selector.value_template is not None 64 | 65 | 66 | @pytest.mark.unit 67 | @pytest.mark.timeout(2) 68 | def test_selector_raises_error_without_any_selector(hass: HomeAssistant): 69 | """Test that Selector raises ValueError when no selector is provided.""" 70 | # Arrange 71 | conf = {CONF_NAME: "test"} # No select, select_list, or value_template 72 | 73 | # Act & Assert 74 | with pytest.raises( 75 | ValueError, 76 | match="either select, select_list or a value_template should be provided", 77 | ): 78 | Selector(hass, conf) 79 | 80 | 81 | @pytest.mark.unit 82 | @pytest.mark.timeout(2) 83 | def test_selector_with_attribute(hass: HomeAssistant): 84 | """Test selector with attribute extraction.""" 85 | # Arrange 86 | conf = {CONF_SELECT: Template(".link", hass), CONF_ATTR: "href"} 87 | 88 | # Act 89 | selector = Selector(hass, conf) 90 | 91 | # Assert 92 | assert selector.attribute == "href" 93 | 94 | 95 | @pytest.mark.unit 96 | @pytest.mark.timeout(2) 97 | def test_selector_with_extract_type(hass: HomeAssistant): 98 | """Test selector with extract type specified.""" 99 | # Arrange 100 | conf = {CONF_SELECT: Template(".content", hass), CONF_EXTRACT: "text"} 101 | 102 | # Act 103 | selector = Selector(hass, conf) 104 | 105 | # Assert 106 | assert selector.extract == "text" 107 | 108 | 109 | @pytest.mark.unit 110 | @pytest.mark.timeout(2) 111 | def test_selector_default_on_error_configuration(hass: HomeAssistant): 112 | """Test selector with default on_error configuration (no config provided).""" 113 | # Arrange 114 | conf = {CONF_SELECT: Template(".test", hass)} 115 | 116 | # Act 117 | selector = Selector(hass, conf) 118 | 119 | # Assert 120 | assert selector.on_error.log == DEFAULT_ON_ERROR_LOG 121 | assert selector.on_error.value == DEFAULT_ON_ERROR_VALUE 122 | assert selector.on_error.default is None 123 | 124 | 125 | @pytest.mark.unit 126 | @pytest.mark.timeout(2) 127 | def test_selector_custom_on_error_log(hass: HomeAssistant): 128 | """Test selector with custom on_error log level.""" 129 | # Arrange 130 | conf = { 131 | CONF_SELECT: Template(".test", hass), 132 | CONF_ON_ERROR: {CONF_ON_ERROR_LOG: "error"}, 133 | } 134 | 135 | # Act 136 | selector = Selector(hass, conf) 137 | 138 | # Assert 139 | assert selector.on_error.log == "error" 140 | assert selector.on_error.value == DEFAULT_ON_ERROR_VALUE 141 | 142 | 143 | @pytest.mark.unit 144 | @pytest.mark.timeout(2) 145 | def test_selector_custom_on_error_value(hass: HomeAssistant): 146 | """Test selector with custom on_error value strategy.""" 147 | # Arrange 148 | conf = { 149 | CONF_SELECT: Template(".test", hass), 150 | CONF_ON_ERROR: {CONF_ON_ERROR_VALUE: "none"}, 151 | } 152 | 153 | # Act 154 | selector = Selector(hass, conf) 155 | 156 | # Assert 157 | assert selector.on_error.value == "none" 158 | assert selector.on_error.log == DEFAULT_ON_ERROR_LOG 159 | 160 | 161 | @pytest.mark.unit 162 | @pytest.mark.timeout(2) 163 | def test_selector_on_error_default_template(hass: HomeAssistant): 164 | """Test selector with on_error default value template.""" 165 | # Arrange 166 | default_template = Template("{{ 'fallback_value' }}", hass) 167 | conf = { 168 | CONF_SELECT: Template(".test", hass), 169 | CONF_ON_ERROR: {CONF_ON_ERROR_DEFAULT: default_template}, 170 | } 171 | 172 | # Act 173 | selector = Selector(hass, conf) 174 | 175 | # Assert 176 | assert selector.on_error.default is not None 177 | assert selector.on_error_default == "fallback_value" 178 | 179 | 180 | @pytest.mark.unit 181 | @pytest.mark.timeout(2) 182 | def test_selector_hass_assignment_to_templates(hass: HomeAssistant): 183 | """Test that hass is properly assigned to all templates.""" 184 | # Arrange 185 | select_template = Template(".test", None) # hass intentionally None 186 | value_template = Template("{{ 'value' }}", None) 187 | 188 | conf = { 189 | CONF_SELECT: select_template, 190 | CONF_VALUE_TEMPLATE: value_template, 191 | } 192 | 193 | # Act 194 | selector = Selector(hass, conf) 195 | 196 | # Assert - hass should be assigned to templates 197 | assert selector.select_template.hass is not None 198 | assert selector.value_template.hass is not None 199 | 200 | 201 | @pytest.mark.unit 202 | @pytest.mark.timeout(2) 203 | def test_selector_with_all_configurations(hass: HomeAssistant): 204 | """Test selector with all possible configurations.""" 205 | # Arrange 206 | conf = { 207 | CONF_NAME: "comprehensive_test", 208 | CONF_SELECT: Template(".content", hass), 209 | CONF_VALUE_TEMPLATE: Template("{{ value }}", hass), 210 | CONF_ATTR: "data-value", 211 | CONF_EXTRACT: "content", 212 | CONF_ON_ERROR: { 213 | CONF_ON_ERROR_LOG: "warning", 214 | CONF_ON_ERROR_VALUE: "last", 215 | CONF_ON_ERROR_DEFAULT: Template("{{ 'default' }}", hass), 216 | }, 217 | } 218 | 219 | # Act 220 | selector = Selector(hass, conf) 221 | 222 | # Assert 223 | assert selector.name == "comprehensive_test" 224 | assert selector.element == ".content" 225 | assert selector.attribute == "data-value" 226 | assert selector.extract == "content" 227 | assert selector.value_template is not None 228 | assert selector.on_error.log == "warning" 229 | assert selector.on_error.value == "last" 230 | assert selector.on_error_default == "default" 231 | -------------------------------------------------------------------------------- /custom_components/multiscrape/entity.py: -------------------------------------------------------------------------------- 1 | """The base entity for the scraper component.""" 2 | import logging 3 | from abc import abstractmethod 4 | from typing import Any 5 | 6 | from homeassistant.core import callback 7 | from homeassistant.exceptions import TemplateError 8 | from homeassistant.helpers.restore_state import RestoreEntity 9 | from homeassistant.helpers.update_coordinator import DataUpdateCoordinator 10 | 11 | from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, 12 | CONF_ON_ERROR_VALUE_NONE, LOG_LEVELS) 13 | from .scraper import Scraper 14 | 15 | _LOGGER = logging.getLogger(__name__) 16 | 17 | 18 | class MultiscrapeEntity(RestoreEntity): 19 | """A class for entities using DataUpdateCoordinator.""" 20 | 21 | def __init__( 22 | self, 23 | hass, 24 | coordinator: DataUpdateCoordinator[Any], 25 | scraper: Scraper, 26 | name, 27 | device_class, 28 | force_update, 29 | icon_template, 30 | picture, 31 | attribute_selectors, 32 | ) -> None: 33 | """Create the entity that may have a coordinator.""" 34 | 35 | self.coordinator = coordinator 36 | self.scraper = scraper 37 | self._name = name 38 | 39 | self._attr_name = name 40 | self._attr_device_class = device_class 41 | self._attr_force_update = force_update 42 | self._attr_should_poll = False 43 | self._attr_extra_state_attributes = {} 44 | if picture: 45 | self._attr_entity_picture = picture 46 | _LOGGER.debug( 47 | "%s # %s # Set picture to: %s", 48 | self.scraper.name, 49 | self._name, 50 | self._attr_entity_picture, 51 | ) 52 | 53 | self.hass = hass 54 | self._attribute_selectors = attribute_selectors 55 | 56 | self._icon_template = icon_template 57 | if self._icon_template: 58 | self._icon_template.hass = hass 59 | 60 | super().__init__() 61 | 62 | def _set_icon(self, value): 63 | try: 64 | self._attr_icon = self._icon_template.async_render( 65 | variables={"value": value}, parse_result=False 66 | ) 67 | _LOGGER.debug( 68 | "%s # %s # Icon template rendered and set to: %s", 69 | self.scraper.name, 70 | self._name, 71 | self._attr_icon, 72 | ) 73 | except TemplateError as exception: 74 | _LOGGER.error( 75 | "%s # %s # Exception occurred when rendering icon template. Exception: %s", 76 | self.scraper.name, 77 | self._name, 78 | exception, 79 | ) 80 | 81 | async def async_added_to_hass(self) -> None: 82 | """When entity is added to hass.""" 83 | await super().async_added_to_hass() 84 | _LOGGER.debug( 85 | "%s # %s # Added sensor to HA", 86 | self.scraper.name, 87 | self._name, 88 | ) 89 | if self.coordinator: 90 | self.async_on_remove( 91 | self.coordinator.async_add_listener( 92 | self._handle_coordinator_update) 93 | ) 94 | 95 | if not (state := await self.async_get_last_state()): 96 | return 97 | _LOGGER.debug("%s # %s # Restoring previous state: %s", self.scraper.name, self._name, state.state) 98 | self._attr_native_value = state.state 99 | 100 | for name in self._attribute_selectors: 101 | if state.attributes.get(name) is not None: 102 | _LOGGER.debug("%s # %s # Restoring attribute `%s` with value: %s", self.scraper.name, self._name, name, state.attributes[name]) 103 | self._attr_extra_state_attributes[name] = state.attributes[name] 104 | 105 | 106 | @callback 107 | def _handle_coordinator_update(self) -> None: 108 | """Handle updated data from the coordinator.""" 109 | if not self.coordinator.last_update_success: 110 | _LOGGER.debug( 111 | "%s # %s # Last update of the resource was not successful. Setting sensor availability to False", 112 | self.scraper.name, 113 | self._name, 114 | ) 115 | self._attr_available = False 116 | else: 117 | self._attr_available = True 118 | self._update_sensor() 119 | self._update_attributes() 120 | self.async_write_ha_state() 121 | _LOGGER.debug( 122 | "%s # %s # Sensor updated and state written to HA", 123 | self.scraper.name, 124 | self._name, 125 | ) 126 | 127 | @abstractmethod 128 | def _update_sensor(self): 129 | """Update state from the scraper data.""" 130 | 131 | def _update_attributes(self): 132 | if self._attribute_selectors: 133 | _LOGGER.debug( 134 | "%s # %s # Start scraping attributes", 135 | self.scraper.name, 136 | self._name, 137 | ) 138 | self.old_attributes, self._attr_extra_state_attributes = ( 139 | self._attr_extra_state_attributes, 140 | {}, 141 | ) 142 | for name, attr_selector in self._attribute_selectors.items(): 143 | try: 144 | attr_value = self.scraper.scrape( 145 | attr_selector, self._name, name, variables=self.coordinator.form_variables) 146 | self._attr_extra_state_attributes[name] = attr_value 147 | except Exception as exception: 148 | _LOGGER.debug( 149 | "%s # %s # %s # Exception selecting attribute data: %s", 150 | self.scraper.name, 151 | self._name, 152 | name, 153 | exception, 154 | ) 155 | 156 | if attr_selector.on_error.log in LOG_LEVELS: 157 | level = LOG_LEVELS[attr_selector.on_error.log] 158 | _LOGGER.log( 159 | level, 160 | "%s # %s # %s # Unable to extract data from HTML", 161 | self.scraper.name, 162 | self._name, 163 | name, 164 | ) 165 | 166 | if attr_selector.on_error.value == CONF_ON_ERROR_VALUE_NONE: 167 | _LOGGER.debug( 168 | "%s # %s # %s # On-error, set value to None", 169 | self.scraper.name, 170 | self._name, 171 | name, 172 | ) 173 | self._attr_extra_state_attributes[name] = None 174 | elif attr_selector.on_error.value == CONF_ON_ERROR_VALUE_LAST: 175 | self._attr_extra_state_attributes[ 176 | name 177 | ] = self.old_attributes.get(name) 178 | _LOGGER.debug( 179 | "%s # %s # %s # On-error, keep old value: %s", 180 | self.scraper.name, 181 | self._name, 182 | name, 183 | self.old_attributes.get(name), 184 | ) 185 | elif attr_selector.on_error.value == CONF_ON_ERROR_VALUE_DEFAULT: 186 | self._attr_extra_state_attributes[ 187 | name 188 | ] = attr_selector.on_error_default 189 | _LOGGER.debug( 190 | "%s # %s # %s # On-error, set default value: %s", 191 | self.scraper.name, 192 | self._name, 193 | name, 194 | attr_selector.on_error_default, 195 | ) 196 | -------------------------------------------------------------------------------- /custom_components/multiscrape/service.py: -------------------------------------------------------------------------------- 1 | """Class for implementing the multiscrape services.""" 2 | 3 | import logging 4 | 5 | import homeassistant.helpers.config_validation as cv 6 | import voluptuous as vol 7 | from homeassistant.const import (CONF_DESCRIPTION, CONF_HEADERS, CONF_ICON, 8 | CONF_NAME, CONF_UNIQUE_ID, 9 | CONF_VALUE_TEMPLATE, Platform) 10 | from homeassistant.core import HomeAssistant, ServiceCall, SupportsResponse 11 | from homeassistant.helpers.service import async_set_service_schema 12 | from homeassistant.helpers.template import Template 13 | from homeassistant.util import slugify 14 | 15 | from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_FORM_VARIABLES, 16 | CONF_LOG_RESPONSE, CONF_PARSER, CONF_SENSOR_ATTRS, DOMAIN) 17 | from .coordinator import (MultiscrapeDataUpdateCoordinator, 18 | create_content_request_manager) 19 | from .file import create_file_manager 20 | from .form import create_form_submitter 21 | from .http import create_http_wrapper 22 | from .schema import SERVICE_COMBINED_SCHEMA 23 | from .scraper import create_scraper 24 | from .selector import Selector 25 | 26 | _LOGGER = logging.getLogger(__name__) 27 | 28 | 29 | async def setup_integration_services(hass: HomeAssistant): 30 | """Set up the multiscrape integration level services.""" 31 | _LOGGER.debug("Setting up multiscrape integration level services") 32 | await setup_get_content_service(hass) 33 | await setup_scrape_service(hass) 34 | 35 | 36 | async def setup_config_services( 37 | hass: HomeAssistant, coordinator: MultiscrapeDataUpdateCoordinator, config_name: str 38 | ): 39 | """Set up the multiscrape configuration level services.""" 40 | _LOGGER.debug( 41 | "%s # Setting up multiscrape configuration level services", config_name 42 | ) 43 | target_name = slugify(config_name) 44 | await _setup_trigger_service(hass, target_name, coordinator) 45 | 46 | 47 | async def _setup_trigger_service(hass: HomeAssistant, target_name, coordinator): 48 | async def _async_trigger_service(service: ServiceCall): 49 | _LOGGER.info("Multiscrape triggered by service: %s", service.__repr__()) 50 | await coordinator.async_request_refresh() 51 | 52 | hass.services.async_register( 53 | DOMAIN, 54 | f"trigger_{target_name}", 55 | _async_trigger_service, 56 | schema=vol.Schema({}), 57 | ) 58 | 59 | # Register the service description 60 | service_desc = { 61 | CONF_NAME: f"Trigger an update of {target_name}", 62 | CONF_DESCRIPTION: f"Triggers an update for the multiscrape {target_name} integration, independent of the update interval.", 63 | CONF_FIELDS: {}, 64 | } 65 | async_set_service_schema(hass, DOMAIN, f"trigger_{target_name}", service_desc) 66 | 67 | 68 | async def setup_get_content_service(hass: HomeAssistant): 69 | """Set up the multiscrape get_content service.""" 70 | 71 | async def _async_get_content_service(service: ServiceCall) -> None: 72 | _LOGGER.info("Get_content service triggered: %s", service.__repr__()) 73 | config_name = "get_content_service" 74 | conf = _restore_templates(service.data) 75 | request_manager, scraper = await _prepare_service_request( 76 | hass, conf, config_name 77 | ) 78 | result = await request_manager.get_content() 79 | await scraper.set_content(result) 80 | return {"content": str(scraper.formatted_content)} 81 | 82 | hass.services.async_register( 83 | DOMAIN, 84 | "get_content", 85 | _async_get_content_service, 86 | schema=SERVICE_COMBINED_SCHEMA, 87 | supports_response=SupportsResponse.ONLY, 88 | ) 89 | 90 | 91 | async def setup_scrape_service(hass: HomeAssistant): 92 | """Set up the multiscrape scrape service.""" 93 | 94 | async def _async_scrape_service(service: ServiceCall) -> None: 95 | _LOGGER.info("Scrape service triggered: %s", service.__repr__()) 96 | conf = _restore_templates(service.data) 97 | config_name = "scrape_service" 98 | request_manager, scraper = await _prepare_service_request( 99 | hass, conf, config_name 100 | ) 101 | result = await request_manager.get_content() 102 | await scraper.set_content(result) 103 | 104 | response = {} 105 | 106 | for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: 107 | for sensor in conf.get(platform) or []: 108 | name = sensor.get(CONF_UNIQUE_ID) or slugify(sensor.get(CONF_NAME)) 109 | sensor_selector = Selector(hass, sensor) 110 | response[name] = {"value": scraper.scrape(sensor_selector, config_name)} 111 | 112 | if sensor.get(CONF_ICON): 113 | response[CONF_ICON] = sensor.get(CONF_ICON).async_render( 114 | variables={"value": response[name]}, parse_result=False 115 | ) 116 | 117 | for attr_conf in sensor.get(CONF_SENSOR_ATTRS) or []: 118 | attr_name = slugify(attr_conf[CONF_NAME]) 119 | attr_selector = Selector(hass, attr_conf) 120 | response[name].setdefault(CONF_SENSOR_ATTRS, {}).update( 121 | {attr_name: scraper.scrape(attr_selector, config_name)} 122 | ) 123 | 124 | return response 125 | 126 | hass.services.async_register( 127 | DOMAIN, 128 | "scrape", 129 | _async_scrape_service, 130 | schema=SERVICE_COMBINED_SCHEMA, 131 | supports_response=SupportsResponse.ONLY, 132 | ) 133 | 134 | 135 | async def _prepare_service_request(hass: HomeAssistant, conf, config_name): 136 | file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE)) 137 | http = create_http_wrapper(config_name, conf, hass, file_manager) 138 | form_submitter = None 139 | form_submit_config = conf.get(CONF_FORM_SUBMIT) 140 | parser = conf.get(CONF_PARSER) 141 | if form_submit_config: 142 | form_http = create_http_wrapper( 143 | config_name, form_submit_config, hass, file_manager) 144 | form_submitter = create_form_submitter( 145 | config_name, form_submit_config, hass, form_http, file_manager, parser 146 | ) 147 | request_manager = create_content_request_manager( 148 | config_name, conf, hass, http, form_submitter 149 | ) 150 | scraper = create_scraper(config_name, conf, hass, file_manager) 151 | return request_manager, scraper 152 | 153 | 154 | def _restore_templates(config): 155 | config = dict(config) 156 | selectors = [] 157 | for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: 158 | selectors.extend(config.get(platform) or []) 159 | if config.get(CONF_FORM_SUBMIT): 160 | selectors.extend(config[CONF_FORM_SUBMIT].get(CONF_FORM_VARIABLES) or []) 161 | 162 | for selector in selectors: 163 | for attr_conf in selector.get(CONF_SENSOR_ATTRS) or []: 164 | attr_conf[CONF_VALUE_TEMPLATE] = ( 165 | _restore_template(attr_conf.get(CONF_VALUE_TEMPLATE)) 166 | if attr_conf.get(CONF_VALUE_TEMPLATE) 167 | else None 168 | ) 169 | if selector.get(CONF_ICON): 170 | selector[CONF_ICON] = _restore_template(selector.get(CONF_ICON)) 171 | if selector.get(CONF_VALUE_TEMPLATE): 172 | selector[CONF_VALUE_TEMPLATE] = _restore_template(selector[CONF_VALUE_TEMPLATE]) 173 | 174 | headers = config.get(CONF_HEADERS) or {} 175 | for key, value in headers.items(): 176 | headers[key] = _restore_template(value) 177 | 178 | return config 179 | 180 | def _restore_template(value: str | Template): 181 | value = value.template if isinstance(value, Template) else value 182 | return cv.template(_replace_template_characters(value)) 183 | 184 | 185 | def _replace_template_characters(template: str): 186 | template = template.replace("{!{", "{{").replace("}!}", "}}") 187 | template = template.replace("{!%", "{%").replace("%!}", "%}") 188 | return template 189 | -------------------------------------------------------------------------------- /tests/test_button.py: -------------------------------------------------------------------------------- 1 | """Integration tests for button platform.""" 2 | 3 | import pytest 4 | from homeassistant.const import CONF_NAME, CONF_UNIQUE_ID 5 | from homeassistant.core import HomeAssistant 6 | from homeassistant.helpers.entity import EntityCategory 7 | 8 | from custom_components.multiscrape.button import (MultiscrapeRefreshButton, 9 | async_setup_platform) 10 | 11 | 12 | @pytest.fixture 13 | def button_config(): 14 | """Create a basic button configuration.""" 15 | return { 16 | CONF_NAME: "test_refresh_button", 17 | CONF_UNIQUE_ID: "test_button_unique_id", 18 | } 19 | 20 | 21 | @pytest.fixture 22 | def discovery_info(): 23 | """Create discovery info for platform setup.""" 24 | return {"name": "test_scraper"} 25 | 26 | 27 | @pytest.mark.integration 28 | @pytest.mark.async_test 29 | @pytest.mark.timeout(10) 30 | async def test_button_initialization(hass: HomeAssistant, coordinator): 31 | """Test button initializes with correct attributes.""" 32 | # Arrange & Act 33 | button = MultiscrapeRefreshButton( 34 | hass=hass, 35 | coordinator=coordinator, 36 | unique_id="test_button", 37 | name="Test Refresh Button", 38 | ) 39 | 40 | # Assert 41 | assert button._attr_name == "Test Refresh Button" 42 | assert button._attr_unique_id == "test_button" 43 | assert button._attr_icon == "mdi:refresh" 44 | assert button._attr_entity_category == EntityCategory.CONFIG 45 | assert button._coordinator == coordinator 46 | assert button.entity_id == "button.test_button" 47 | 48 | 49 | @pytest.mark.integration 50 | @pytest.mark.async_test 51 | @pytest.mark.timeout(10) 52 | async def test_button_initialization_without_unique_id(hass: HomeAssistant, coordinator): 53 | """Test button initializes using name when unique_id is None.""" 54 | # Arrange & Act 55 | button = MultiscrapeRefreshButton( 56 | hass=hass, 57 | coordinator=coordinator, 58 | unique_id=None, 59 | name="Test Button Name", 60 | ) 61 | 62 | # Assert 63 | assert button._attr_unique_id is None 64 | assert button.entity_id == "button.test_button_name" 65 | 66 | 67 | @pytest.mark.integration 68 | @pytest.mark.async_test 69 | @pytest.mark.timeout(10) 70 | async def test_button_initialization_with_special_characters_in_name( 71 | hass: HomeAssistant, coordinator 72 | ): 73 | """Test button entity_id is slugified from name with special characters.""" 74 | # Arrange & Act 75 | button = MultiscrapeRefreshButton( 76 | hass=hass, 77 | coordinator=coordinator, 78 | unique_id=None, 79 | name="Test Button: Special! Name?", 80 | ) 81 | 82 | # Assert 83 | assert button._attr_name == "Test Button: Special! Name?" 84 | # Entity ID should be slugified 85 | assert "button.test_button_special_name" in button.entity_id 86 | 87 | 88 | @pytest.mark.integration 89 | @pytest.mark.async_test 90 | @pytest.mark.timeout(10) 91 | async def test_button_press_triggers_coordinator_refresh( 92 | hass: HomeAssistant, coordinator, caplog 93 | ): 94 | """Test pressing button triggers coordinator refresh.""" 95 | # Arrange 96 | button = MultiscrapeRefreshButton( 97 | hass=hass, 98 | coordinator=coordinator, 99 | unique_id="test_button", 100 | name="Test Button", 101 | ) 102 | 103 | # Track if refresh was called 104 | refresh_called = False 105 | 106 | async def mock_refresh(): 107 | nonlocal refresh_called 108 | refresh_called = True 109 | 110 | coordinator.async_request_refresh = mock_refresh 111 | 112 | # Act 113 | await button.async_press() 114 | 115 | # Assert 116 | assert refresh_called is True 117 | assert "Multiscrape triggered by button" in caplog.text 118 | 119 | 120 | @pytest.mark.integration 121 | @pytest.mark.async_test 122 | @pytest.mark.timeout(10) 123 | async def test_async_setup_platform_creates_button( 124 | hass: HomeAssistant, coordinator, mock_http_wrapper, button_config, discovery_info 125 | ): 126 | """Test async_setup_platform creates button entity.""" 127 | # Arrange 128 | entities_added = [] 129 | 130 | def mock_add_entities(entities): 131 | entities_added.extend(entities) 132 | 133 | # Mock async_get_config_and_coordinator 134 | async def mock_get_config_and_coordinator(hass, platform, discovery_info): 135 | from custom_components.multiscrape.const import DEFAULT_SEPARATOR 136 | from custom_components.multiscrape.scraper import Scraper 137 | 138 | scraper = Scraper("test_scraper", hass, None, "lxml", DEFAULT_SEPARATOR) 139 | return button_config, coordinator, scraper 140 | 141 | # Patch the function 142 | import custom_components.multiscrape.button as button_module 143 | 144 | original_func = button_module.async_get_config_and_coordinator 145 | button_module.async_get_config_and_coordinator = mock_get_config_and_coordinator 146 | 147 | try: 148 | # Act 149 | await async_setup_platform( 150 | hass, button_config, mock_add_entities, discovery_info 151 | ) 152 | 153 | # Assert 154 | assert len(entities_added) == 1 155 | button = entities_added[0] 156 | assert isinstance(button, MultiscrapeRefreshButton) 157 | assert button._attr_name == "test_refresh_button" 158 | assert button._attr_unique_id == "test_button_unique_id" 159 | finally: 160 | # Restore original function 161 | button_module.async_get_config_and_coordinator = original_func 162 | 163 | 164 | @pytest.mark.integration 165 | @pytest.mark.async_test 166 | @pytest.mark.timeout(10) 167 | async def test_button_has_config_entity_category(hass: HomeAssistant, coordinator): 168 | """Test button has CONFIG entity category (shows in settings, not main UI).""" 169 | # Arrange & Act 170 | button = MultiscrapeRefreshButton( 171 | hass=hass, 172 | coordinator=coordinator, 173 | unique_id="test_button", 174 | name="Test Button", 175 | ) 176 | 177 | # Assert - EntityCategory.CONFIG means it shows in settings area 178 | assert button._attr_entity_category == EntityCategory.CONFIG 179 | 180 | 181 | @pytest.mark.integration 182 | @pytest.mark.async_test 183 | @pytest.mark.timeout(10) 184 | async def test_button_has_refresh_icon(hass: HomeAssistant, coordinator): 185 | """Test button uses refresh icon.""" 186 | # Arrange & Act 187 | button = MultiscrapeRefreshButton( 188 | hass=hass, 189 | coordinator=coordinator, 190 | unique_id="test_button", 191 | name="Test Button", 192 | ) 193 | 194 | # Assert 195 | assert button._attr_icon == "mdi:refresh" 196 | 197 | 198 | @pytest.mark.integration 199 | @pytest.mark.async_test 200 | @pytest.mark.timeout(10) 201 | async def test_button_entity_id_format(hass: HomeAssistant, coordinator): 202 | """Test button entity ID follows platform format.""" 203 | # Arrange & Act 204 | button = MultiscrapeRefreshButton( 205 | hass=hass, 206 | coordinator=coordinator, 207 | unique_id="my_unique_button", 208 | name="My Button", 209 | ) 210 | 211 | # Assert 212 | assert button.entity_id.startswith("button.") 213 | assert "my_unique_button" in button.entity_id 214 | 215 | 216 | @pytest.mark.integration 217 | @pytest.mark.async_test 218 | @pytest.mark.timeout(10) 219 | async def test_button_press_multiple_times(hass: HomeAssistant, coordinator): 220 | """Test button can be pressed multiple times.""" 221 | # Arrange 222 | button = MultiscrapeRefreshButton( 223 | hass=hass, 224 | coordinator=coordinator, 225 | unique_id="test_button", 226 | name="Test Button", 227 | ) 228 | 229 | press_count = 0 230 | 231 | async def mock_refresh(): 232 | nonlocal press_count 233 | press_count += 1 234 | 235 | coordinator.async_request_refresh = mock_refresh 236 | 237 | # Act - press button 3 times 238 | await button.async_press() 239 | await button.async_press() 240 | await button.async_press() 241 | 242 | # Assert 243 | assert press_count == 3 244 | -------------------------------------------------------------------------------- /custom_components/multiscrape/binary_sensor.py: -------------------------------------------------------------------------------- 1 | """Support for multiscrape binary sensors.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | 6 | from homeassistant.components.binary_sensor import BinarySensorEntity 7 | from homeassistant.const import (CONF_DEVICE_CLASS, CONF_FORCE_UPDATE, 8 | CONF_ICON, CONF_NAME, CONF_UNIQUE_ID, 9 | Platform) 10 | from homeassistant.core import HomeAssistant 11 | from homeassistant.exceptions import PlatformNotReady 12 | from homeassistant.helpers.entity import async_generate_entity_id 13 | from homeassistant.helpers.entity_platform import AddEntitiesCallback 14 | from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType 15 | from homeassistant.util import slugify 16 | 17 | from . import async_get_config_and_coordinator 18 | from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, 19 | CONF_ON_ERROR_VALUE_NONE, CONF_PICTURE, CONF_SENSOR_ATTRS, 20 | LOG_LEVELS) 21 | from .entity import MultiscrapeEntity 22 | from .selector import Selector 23 | 24 | ENTITY_ID_FORMAT = Platform.BINARY_SENSOR + ".{}" 25 | _LOGGER = logging.getLogger(__name__) 26 | 27 | 28 | async def async_setup_platform( 29 | hass: HomeAssistant, 30 | config: ConfigType, 31 | async_add_entities: AddEntitiesCallback, 32 | discovery_info: DiscoveryInfoType | None = None, 33 | ) -> None: 34 | """Set up the multiscrape binary sensor.""" 35 | # Must update the sensor now (including fetching the scraper resource) to 36 | # ensure it's updating its state. 37 | if discovery_info is not None: 38 | conf, coordinator, scraper = await async_get_config_and_coordinator( 39 | hass, Platform.BINARY_SENSOR, discovery_info 40 | ) 41 | else: 42 | _LOGGER.info("?? # Could not find binary_sensor configuration") 43 | 44 | if not coordinator.last_update_success: 45 | raise PlatformNotReady 46 | 47 | sensor_name = conf.get(CONF_NAME) 48 | _LOGGER.debug("%s # %s # Setting up binary sensor", 49 | scraper.name, sensor_name) 50 | unique_id = conf.get(CONF_UNIQUE_ID) 51 | device_class = conf.get(CONF_DEVICE_CLASS) 52 | force_update = conf.get(CONF_FORCE_UPDATE) 53 | icon_template = conf.get(CONF_ICON) 54 | picture = conf.get(CONF_PICTURE) 55 | 56 | sensor_selector = Selector(hass, conf) 57 | attribute_selectors = {} 58 | for attr_conf in conf.get(CONF_SENSOR_ATTRS) or []: 59 | attr_name = slugify(attr_conf[CONF_NAME]) 60 | attribute_selectors[attr_name] = Selector(hass, attr_conf) 61 | 62 | async_add_entities( 63 | [ 64 | MultiscrapeBinarySensor( 65 | hass, 66 | coordinator, 67 | scraper, 68 | unique_id, 69 | sensor_name, 70 | device_class, 71 | force_update, 72 | icon_template, 73 | picture, 74 | sensor_selector, 75 | attribute_selectors, 76 | ) 77 | ], 78 | ) 79 | 80 | 81 | class MultiscrapeBinarySensor(MultiscrapeEntity, BinarySensorEntity): 82 | """Representation of a multiscrape binary sensor.""" 83 | 84 | def __init__( 85 | self, 86 | hass, 87 | coordinator, 88 | scraper, 89 | unique_id, 90 | name, 91 | device_class, 92 | force_update, 93 | icon_template, 94 | picture, 95 | sensor_selector, 96 | attribute_selectors, 97 | ): 98 | """Initialize a multiscrape binary sensor.""" 99 | super().__init__( 100 | hass, 101 | coordinator, 102 | scraper, 103 | name, 104 | device_class, 105 | force_update, 106 | icon_template, 107 | picture, 108 | attribute_selectors, 109 | ) 110 | 111 | self.entity_id = async_generate_entity_id( 112 | ENTITY_ID_FORMAT, unique_id or name, hass=hass 113 | ) 114 | 115 | self._attr_unique_id = unique_id 116 | self._sensor_selector = sensor_selector 117 | 118 | def _update_sensor(self): 119 | """Update state from the scraped data.""" 120 | _LOGGER.debug( 121 | "%s # %s # Start scraping to update sensor", self.scraper.name, self._name 122 | ) 123 | 124 | try: 125 | if self.coordinator.update_error is True: 126 | raise ValueError( 127 | "Skipped scraping because data couldn't be updated") 128 | 129 | value = self.scraper.scrape( 130 | self._sensor_selector, self._name, variables=self.coordinator.form_variables) 131 | try: 132 | self._attr_is_on = bool(int(value)) 133 | except (ValueError, TypeError): 134 | if isinstance(value, str): 135 | self._attr_is_on = { 136 | "true": True, 137 | "on": True, 138 | "open": True, 139 | "yes": True, 140 | }.get(value.lower(), False) 141 | else: 142 | self._attr_is_on = bool(value) 143 | 144 | _LOGGER.debug( 145 | "%s # %s # Selected: %s, set sensor to: %s", 146 | self.scraper.name, 147 | self._name, 148 | value, 149 | self._attr_is_on, 150 | ) 151 | except Exception as exception: 152 | self.coordinator.notify_scrape_exception() 153 | 154 | if self._sensor_selector.on_error.log not in [False, "false", "False"]: 155 | level = LOG_LEVELS[self._sensor_selector.on_error.log] 156 | _LOGGER.log( 157 | level, 158 | "%s # %s # Unable to scrape data: %s. \nConsider using debug logging and log_response for further investigation.", 159 | self.scraper.name, 160 | self._name, 161 | exception, 162 | ) 163 | 164 | if self._sensor_selector.on_error.value == CONF_ON_ERROR_VALUE_NONE: 165 | self._attr_available = False 166 | _LOGGER.debug( 167 | "%s # %s # On-error, set value to None", 168 | self.scraper.name, 169 | self._name, 170 | ) 171 | elif self._sensor_selector.on_error.value == CONF_ON_ERROR_VALUE_LAST: 172 | _LOGGER.debug( 173 | "%s # %s # On-error, keep old value: %s", 174 | self.scraper.name, 175 | self._name, 176 | self._attr_is_on, 177 | ) 178 | return 179 | elif self._sensor_selector.on_error.value == CONF_ON_ERROR_VALUE_DEFAULT: 180 | default_value = self._sensor_selector.on_error_default 181 | # Convert default value to boolean using the same logic as regular values 182 | try: 183 | self._attr_is_on = bool(int(default_value)) 184 | except (ValueError, TypeError): 185 | if isinstance(default_value, str): 186 | self._attr_is_on = { 187 | "true": True, 188 | "on": True, 189 | "open": True, 190 | "yes": True, 191 | }.get(default_value.lower(), False) 192 | else: 193 | self._attr_is_on = bool(default_value) 194 | _LOGGER.debug( 195 | "%s # %s # On-error, set default value: %s (converted to: %s)", 196 | self.scraper.name, 197 | self._name, 198 | default_value, 199 | self._attr_is_on, 200 | ) 201 | # determine icon after exception so it's also set for on_error cases 202 | if self._icon_template: 203 | self._set_icon(self._attr_is_on) 204 | -------------------------------------------------------------------------------- /tests/test_file.py: -------------------------------------------------------------------------------- 1 | """Integration tests for file manager functionality.""" 2 | 3 | import os 4 | import tempfile 5 | 6 | import pytest 7 | from homeassistant.core import HomeAssistant 8 | 9 | from custom_components.multiscrape.file import (LoggingFileManager, 10 | create_file_manager) 11 | 12 | 13 | @pytest.mark.integration 14 | @pytest.mark.async_test 15 | @pytest.mark.timeout(10) 16 | async def test_create_file_manager_with_logging_enabled(hass: HomeAssistant): 17 | """Test create_file_manager creates file manager when log_response is True.""" 18 | # Arrange 19 | config_name = "test_config" 20 | 21 | # Act 22 | file_manager = await create_file_manager(hass, config_name, log_response=True) 23 | 24 | # Assert 25 | assert file_manager is not None 26 | assert isinstance(file_manager, LoggingFileManager) 27 | assert "multiscrape/test_config" in file_manager.folder 28 | 29 | 30 | @pytest.mark.integration 31 | @pytest.mark.async_test 32 | @pytest.mark.timeout(10) 33 | async def test_create_file_manager_with_logging_disabled(hass: HomeAssistant): 34 | """Test create_file_manager returns None when log_response is False.""" 35 | # Arrange 36 | config_name = "test_config" 37 | 38 | # Act 39 | file_manager = await create_file_manager(hass, config_name, log_response=False) 40 | 41 | # Assert 42 | assert file_manager is None 43 | 44 | 45 | @pytest.mark.integration 46 | @pytest.mark.async_test 47 | @pytest.mark.timeout(10) 48 | async def test_create_file_manager_slugifies_config_name(hass: HomeAssistant): 49 | """Test create_file_manager slugifies config name with spaces and special chars.""" 50 | # Arrange 51 | config_name = "Test Config With Spaces!" 52 | 53 | # Act 54 | file_manager = await create_file_manager(hass, config_name, log_response=True) 55 | 56 | # Assert 57 | assert file_manager is not None 58 | assert "test_config_with_spaces" in file_manager.folder 59 | 60 | 61 | def test_logging_file_manager_initialization(): 62 | """Test LoggingFileManager initializes with correct folder.""" 63 | # Arrange 64 | folder = "/tmp/test_folder" 65 | 66 | # Act 67 | file_manager = LoggingFileManager(folder) 68 | 69 | # Assert 70 | assert file_manager.folder == folder 71 | 72 | 73 | def test_logging_file_manager_create_folders(): 74 | """Test LoggingFileManager creates folders.""" 75 | # Arrange 76 | with tempfile.TemporaryDirectory() as temp_dir: 77 | test_folder = os.path.join(temp_dir, "multiscrape", "test") 78 | file_manager = LoggingFileManager(test_folder) 79 | 80 | # Act 81 | file_manager.create_folders() 82 | 83 | # Assert 84 | assert os.path.exists(test_folder) 85 | assert os.path.isdir(test_folder) 86 | 87 | 88 | def test_logging_file_manager_create_folders_exists_ok(): 89 | """Test LoggingFileManager handles existing folders gracefully.""" 90 | # Arrange 91 | with tempfile.TemporaryDirectory() as temp_dir: 92 | test_folder = os.path.join(temp_dir, "multiscrape", "test") 93 | file_manager = LoggingFileManager(test_folder) 94 | 95 | # Create folder first time 96 | file_manager.create_folders() 97 | 98 | # Act - create again (should not raise error) 99 | file_manager.create_folders() 100 | 101 | # Assert 102 | assert os.path.exists(test_folder) 103 | 104 | 105 | def test_logging_file_manager_write(): 106 | """Test LoggingFileManager writes content to file.""" 107 | # Arrange 108 | with tempfile.TemporaryDirectory() as temp_dir: 109 | file_manager = LoggingFileManager(temp_dir) 110 | file_manager.create_folders() 111 | 112 | filename = "test_file.txt" 113 | content = "Test content" 114 | 115 | # Act 116 | file_manager.write(filename, content) 117 | 118 | # Assert 119 | file_path = os.path.join(temp_dir, filename) 120 | assert os.path.exists(file_path) 121 | with open(file_path, encoding="utf8") as f: 122 | assert f.read() == content 123 | 124 | 125 | def test_logging_file_manager_write_with_special_content(): 126 | """Test LoggingFileManager writes special characters correctly.""" 127 | # Arrange 128 | with tempfile.TemporaryDirectory() as temp_dir: 129 | file_manager = LoggingFileManager(temp_dir) 130 | file_manager.create_folders() 131 | 132 | filename = "unicode_test.txt" 133 | content = "Special chars: \u00e9\u00e8\u00ea \u4e2d\u6587 \u0440\u0443\u0441\u0441\u043a\u0438\u0439" 134 | 135 | # Act 136 | file_manager.write(filename, content) 137 | 138 | # Assert 139 | file_path = os.path.join(temp_dir, filename) 140 | with open(file_path, encoding="utf8") as f: 141 | assert f.read() == content 142 | 143 | 144 | def test_logging_file_manager_write_converts_to_string(): 145 | """Test LoggingFileManager converts non-string content to string.""" 146 | # Arrange 147 | with tempfile.TemporaryDirectory() as temp_dir: 148 | file_manager = LoggingFileManager(temp_dir) 149 | file_manager.create_folders() 150 | 151 | filename = "dict_content.txt" 152 | content = {"key": "value", "number": 42} 153 | 154 | # Act 155 | file_manager.write(filename, content) 156 | 157 | # Assert 158 | file_path = os.path.join(temp_dir, filename) 159 | with open(file_path, encoding="utf8") as f: 160 | # str(dict) includes quotes and braces 161 | assert "key" in f.read() 162 | 163 | 164 | def test_logging_file_manager_empty_folder(): 165 | """Test LoggingFileManager empties folder contents.""" 166 | # Arrange 167 | with tempfile.TemporaryDirectory() as temp_dir: 168 | file_manager = LoggingFileManager(temp_dir) 169 | file_manager.create_folders() 170 | 171 | # Create some test files 172 | file_manager.write("file1.txt", "content1") 173 | file_manager.write("file2.txt", "content2") 174 | file_manager.write("file3.txt", "content3") 175 | 176 | # Verify files exist 177 | assert os.path.exists(os.path.join(temp_dir, "file1.txt")) 178 | assert os.path.exists(os.path.join(temp_dir, "file2.txt")) 179 | assert os.path.exists(os.path.join(temp_dir, "file3.txt")) 180 | 181 | # Act 182 | file_manager.empty_folder() 183 | 184 | # Assert 185 | assert not os.path.exists(os.path.join(temp_dir, "file1.txt")) 186 | assert not os.path.exists(os.path.join(temp_dir, "file2.txt")) 187 | assert not os.path.exists(os.path.join(temp_dir, "file3.txt")) 188 | assert os.path.exists(temp_dir) # Folder itself should still exist 189 | 190 | 191 | def test_logging_file_manager_empty_folder_preserves_subdirectories(): 192 | """Test LoggingFileManager only removes files, not subdirectories.""" 193 | # Arrange 194 | with tempfile.TemporaryDirectory() as temp_dir: 195 | file_manager = LoggingFileManager(temp_dir) 196 | file_manager.create_folders() 197 | 198 | # Create a file and a subdirectory 199 | file_manager.write("file.txt", "content") 200 | subdir = os.path.join(temp_dir, "subdir") 201 | os.makedirs(subdir) 202 | 203 | # Act 204 | file_manager.empty_folder() 205 | 206 | # Assert 207 | assert not os.path.exists(os.path.join(temp_dir, "file.txt")) 208 | assert os.path.exists(subdir) # Subdirectory should be preserved 209 | 210 | 211 | def test_logging_file_manager_empty_folder_handles_symlinks(): 212 | """Test LoggingFileManager removes symlinks during empty_folder.""" 213 | # Arrange 214 | with tempfile.TemporaryDirectory() as temp_dir: 215 | file_manager = LoggingFileManager(temp_dir) 216 | file_manager.create_folders() 217 | 218 | # Create a file and a symlink 219 | target_file = os.path.join(temp_dir, "target.txt") 220 | symlink_file = os.path.join(temp_dir, "link.txt") 221 | with open(target_file, "w") as f: 222 | f.write("target content") 223 | os.symlink(target_file, symlink_file) 224 | 225 | # Act 226 | file_manager.empty_folder() 227 | 228 | # Assert 229 | assert not os.path.exists(target_file) 230 | assert not os.path.exists(symlink_file) 231 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | """Integration tests for utility functions.""" 2 | 3 | import pytest 4 | from homeassistant.core import HomeAssistant 5 | from homeassistant.exceptions import TemplateError 6 | from homeassistant.helpers.template import Template 7 | 8 | from custom_components.multiscrape.util import (create_dict_renderer, 9 | create_renderer) 10 | 11 | 12 | @pytest.mark.integration 13 | @pytest.mark.async_test 14 | @pytest.mark.timeout(10) 15 | def test_create_renderer_with_none_returns_none_renderer(hass: HomeAssistant): 16 | """Test create_renderer with None returns a renderer that returns None.""" 17 | # Act 18 | renderer = create_renderer(hass, None) 19 | 20 | # Assert 21 | assert renderer() is None 22 | assert renderer(variables={}) is None 23 | 24 | 25 | @pytest.mark.integration 26 | @pytest.mark.async_test 27 | @pytest.mark.timeout(10) 28 | def test_create_renderer_with_string_template(hass: HomeAssistant): 29 | """Test create_renderer with string template.""" 30 | # Arrange 31 | template_str = "Hello {{ name }}" 32 | 33 | # Act 34 | renderer = create_renderer(hass, template_str) 35 | result = renderer(variables={"name": "World"}) 36 | 37 | # Assert 38 | assert result == "Hello World" 39 | 40 | 41 | @pytest.mark.integration 42 | @pytest.mark.async_test 43 | @pytest.mark.timeout(10) 44 | def test_create_renderer_with_template_object(hass: HomeAssistant): 45 | """Test create_renderer with Template object.""" 46 | # Arrange 47 | template = Template("Value: {{ value }}", hass) 48 | 49 | # Act 50 | renderer = create_renderer(hass, template) 51 | result = renderer(variables={"value": 42}) 52 | 53 | # Assert 54 | assert result == "Value: 42" 55 | 56 | 57 | @pytest.mark.integration 58 | @pytest.mark.async_test 59 | @pytest.mark.timeout(10) 60 | def test_create_renderer_with_context_parameter(hass: HomeAssistant): 61 | """Test create_renderer with context for better error messages.""" 62 | # Arrange - use an invalid template syntax that actually raises TemplateError 63 | template = Template("{{ value | invalid_filter }}", hass) 64 | renderer = create_renderer(hass, template, context="test header") 65 | 66 | # Act & Assert - should include context in error 67 | with pytest.raises(TemplateError): 68 | renderer(variables={"value": "test"}) 69 | 70 | 71 | @pytest.mark.integration 72 | @pytest.mark.async_test 73 | @pytest.mark.timeout(10) 74 | def test_create_renderer_with_parse_result_true(hass: HomeAssistant): 75 | """Test create_renderer with parse_result=True.""" 76 | # Arrange 77 | template = Template("{{ value | int }}", hass) 78 | renderer = create_renderer(hass, template) 79 | 80 | # Act 81 | result = renderer(variables={"value": "42"}, parse_result=True) 82 | 83 | # Assert 84 | assert result == 42 85 | assert isinstance(result, int) 86 | 87 | 88 | @pytest.mark.integration 89 | @pytest.mark.async_test 90 | @pytest.mark.timeout(10) 91 | def test_create_renderer_with_parse_result_false(hass: HomeAssistant): 92 | """Test create_renderer with parse_result=False returns string.""" 93 | # Arrange 94 | template = Template("{{ value | int }}", hass) 95 | renderer = create_renderer(hass, template) 96 | 97 | # Act 98 | result = renderer(variables={"value": "42"}, parse_result=False) 99 | 100 | # Assert 101 | assert result == "42" 102 | assert isinstance(result, str) 103 | 104 | 105 | @pytest.mark.integration 106 | @pytest.mark.async_test 107 | @pytest.mark.timeout(10) 108 | def test_create_renderer_with_complex_template(hass: HomeAssistant): 109 | """Test create_renderer with complex Jinja2 template.""" 110 | # Arrange 111 | template_str = "{% if enabled %}{{ value | upper }}{% else %}disabled{% endif %}" 112 | renderer = create_renderer(hass, template_str) 113 | 114 | # Act 115 | result1 = renderer(variables={"enabled": True, "value": "hello"}) 116 | result2 = renderer(variables={"enabled": False, "value": "hello"}) 117 | 118 | # Assert 119 | assert result1 == "HELLO" 120 | assert result2 == "disabled" 121 | 122 | 123 | @pytest.mark.integration 124 | @pytest.mark.async_test 125 | @pytest.mark.timeout(10) 126 | def test_create_renderer_with_empty_variables(hass: HomeAssistant): 127 | """Test create_renderer with no variables provided.""" 128 | # Arrange 129 | template = Template("Static text", hass) 130 | renderer = create_renderer(hass, template) 131 | 132 | # Act 133 | result = renderer() 134 | 135 | # Assert 136 | assert result == "Static text" 137 | 138 | 139 | @pytest.mark.integration 140 | @pytest.mark.async_test 141 | @pytest.mark.timeout(10) 142 | def test_create_dict_renderer_with_none_returns_empty_dict_renderer(hass: HomeAssistant): 143 | """Test create_dict_renderer with None returns a renderer that returns empty dict.""" 144 | # Act 145 | renderer = create_dict_renderer(hass, None) 146 | 147 | # Assert 148 | assert renderer() == {} 149 | assert renderer(variables={}) == {} 150 | 151 | 152 | @pytest.mark.integration 153 | @pytest.mark.async_test 154 | @pytest.mark.timeout(10) 155 | def test_create_dict_renderer_with_string_templates(hass: HomeAssistant): 156 | """Test create_dict_renderer with string templates.""" 157 | # Arrange 158 | templates = { 159 | "header1": "Bearer {{ token }}", 160 | "header2": "application/json", 161 | } 162 | 163 | # Act 164 | renderer = create_dict_renderer(hass, templates) 165 | result = renderer(variables={"token": "abc123"}) 166 | 167 | # Assert 168 | assert result["header1"] == "Bearer abc123" 169 | assert result["header2"] == "application/json" 170 | 171 | 172 | @pytest.mark.integration 173 | @pytest.mark.async_test 174 | @pytest.mark.timeout(10) 175 | def test_create_dict_renderer_with_template_objects(hass: HomeAssistant): 176 | """Test create_dict_renderer with Template objects.""" 177 | # Arrange 178 | templates = { 179 | "Authorization": Template("Bearer {{ token }}", hass), 180 | "X-User": Template("{{ user }}", hass), 181 | } 182 | 183 | # Act 184 | renderer = create_dict_renderer(hass, templates) 185 | result = renderer(variables={"token": "secret", "user": "john"}) 186 | 187 | # Assert 188 | assert result["Authorization"] == "Bearer secret" 189 | assert result["X-User"] == "john" 190 | 191 | 192 | @pytest.mark.integration 193 | @pytest.mark.async_test 194 | @pytest.mark.timeout(10) 195 | def test_create_dict_renderer_preserves_original_dict(hass: HomeAssistant): 196 | """Test create_dict_renderer doesn't modify the original dictionary.""" 197 | # Arrange 198 | original_templates = { 199 | "key1": "{{ value1 }}", 200 | "key2": "{{ value2 }}", 201 | } 202 | original_copy = original_templates.copy() 203 | 204 | # Act 205 | create_dict_renderer(hass, original_templates) 206 | 207 | # Assert - original dict should be unchanged 208 | assert original_templates == original_copy 209 | 210 | 211 | @pytest.mark.integration 212 | @pytest.mark.async_test 213 | @pytest.mark.timeout(10) 214 | def test_create_dict_renderer_with_empty_dict(hass: HomeAssistant): 215 | """Test create_dict_renderer with empty dictionary.""" 216 | # Arrange 217 | templates = {} 218 | 219 | # Act 220 | renderer = create_dict_renderer(hass, templates) 221 | result = renderer(variables={"key": "value"}) 222 | 223 | # Assert 224 | assert result == {} 225 | 226 | 227 | @pytest.mark.integration 228 | @pytest.mark.async_test 229 | @pytest.mark.timeout(10) 230 | def test_create_dict_renderer_with_multiple_variables(hass: HomeAssistant): 231 | """Test create_dict_renderer with multiple variables in templates.""" 232 | # Arrange 233 | templates = { 234 | "url": "https://{{ domain }}/{{ path }}", 235 | "user": "{{ first }}_{{ last }}", 236 | } 237 | 238 | # Act 239 | renderer = create_dict_renderer(hass, templates) 240 | result = renderer( 241 | variables={"domain": "example.com", "path": "api", "first": "John", "last": "Doe"} 242 | ) 243 | 244 | # Assert 245 | assert result["url"] == "https://example.com/api" 246 | assert result["user"] == "John_Doe" 247 | 248 | 249 | @pytest.mark.integration 250 | @pytest.mark.async_test 251 | @pytest.mark.timeout(10) 252 | def test_create_dict_renderer_with_parse_result(hass: HomeAssistant): 253 | """Test create_dict_renderer with parse_result parameter.""" 254 | # Arrange 255 | templates = { 256 | "count": "{{ value | int }}", 257 | "name": "{{ name }}", 258 | } 259 | 260 | # Act 261 | renderer = create_dict_renderer(hass, templates) 262 | result = renderer(variables={"value": "42", "name": "test"}, parse_result=True) 263 | 264 | # Assert 265 | assert result["count"] == 42 266 | assert isinstance(result["count"], int) 267 | assert result["name"] == "test" 268 | 269 | 270 | @pytest.mark.integration 271 | @pytest.mark.async_test 272 | @pytest.mark.timeout(10) 273 | def test_create_renderer_logs_template_error(hass: HomeAssistant, caplog): 274 | """Test create_renderer logs TemplateError with context.""" 275 | # Arrange 276 | template = Template("{{ undefined_var.attribute }}", hass) 277 | renderer = create_renderer(hass, template, context="resource URL") 278 | 279 | # Act & Assert 280 | with pytest.raises(TemplateError): 281 | renderer(variables={}) 282 | 283 | # Check that error was logged with context 284 | assert "resource URL" in caplog.text or "Error rendering template" in caplog.text 285 | -------------------------------------------------------------------------------- /custom_components/multiscrape/coordinator.py: -------------------------------------------------------------------------------- 1 | """Coordinator class for multiscrape integration.""" 2 | import logging 3 | from collections.abc import Callable 4 | from datetime import timedelta 5 | 6 | from homeassistant.const import (CONF_RESOURCE, CONF_RESOURCE_TEMPLATE, 7 | CONF_SCAN_INTERVAL, 8 | EVENT_HOMEASSISTANT_STARTED) 9 | from homeassistant.core import Event, HomeAssistant 10 | from homeassistant.helpers.update_coordinator import (DataUpdateCoordinator, 11 | event) 12 | from homeassistant.util.dt import utcnow 13 | 14 | from .const import DOMAIN, MAX_RETRIES, RETRY_DELAY_SECONDS 15 | from .file import LoggingFileManager 16 | from .form import FormSubmitter 17 | from .http import HttpWrapper 18 | from .scraper import Scraper 19 | from .util import create_renderer 20 | 21 | _LOGGER = logging.getLogger(__name__) 22 | # we don't want to go with the default 15 seconds defined in helpers/entity_component 23 | DEFAULT_SCAN_INTERVAL = timedelta(seconds=60) 24 | 25 | 26 | def create_content_request_manager( 27 | config_name, config, hass: HomeAssistant, http, form_submitter 28 | ): 29 | """Create a content request manager instance.""" 30 | _LOGGER.debug("%s # Creating ContentRequestManager", config_name) 31 | resource = config.get(CONF_RESOURCE) 32 | resource_template = config.get(CONF_RESOURCE_TEMPLATE) 33 | 34 | if resource_template is not None: 35 | resource_renderer = create_renderer(hass, resource_template, "resource URL template") 36 | else: 37 | resource_renderer = create_renderer(hass, resource, "resource URL") 38 | return ContentRequestManager(config_name, http, resource_renderer, form_submitter) 39 | 40 | 41 | class ContentRequestManager: 42 | """Responsible for orchestrating all request required to retrieve the desired content.""" 43 | 44 | def __init__( 45 | self, 46 | config_name: str, 47 | http: HttpWrapper, 48 | resource_renderer: Callable, 49 | form: FormSubmitter = None, 50 | ) -> None: 51 | """Initialize ContentRequestManager.""" 52 | self._config_name = config_name 53 | self._http = http 54 | self._form_submitter = form 55 | self._resource_renderer = resource_renderer 56 | self._cookies = None 57 | self._form_variables = {} 58 | 59 | def notify_scrape_exception(self): 60 | """Notify the form_submitter of an exception so it will re-submit next trigger.""" 61 | if self._form_submitter: 62 | self._form_submitter.notify_scrape_exception() 63 | 64 | async def get_content(self) -> str: 65 | """Retrieve the content of a url and first submit a form if required.""" 66 | resource = self._resource_renderer() 67 | 68 | if self._form_submitter: 69 | try: 70 | if self._form_submitter.should_submit is True: 71 | result, self._cookies = await self._form_submitter.async_submit(resource) 72 | self._form_variables = self._form_submitter.scrape_variables() 73 | 74 | if result: 75 | _LOGGER.debug( 76 | "%s # Using response from form-submit as content for scraping.", 77 | self._config_name, 78 | ) 79 | return result 80 | else: 81 | _LOGGER.debug("%s # Skip submitting form", self._config_name) 82 | except Exception as ex: 83 | _LOGGER.error( 84 | "%s # Exception in form-submit feature. Will continue trying to scrape target page.\n%s", 85 | self._config_name, 86 | ex, 87 | ) 88 | 89 | response = await self._http.async_request("page", resource, cookies=self._cookies, variables=self._form_variables) 90 | return response.text 91 | 92 | @property 93 | def form_variables(self): 94 | """Return the form variables.""" 95 | return self._form_variables 96 | 97 | 98 | def create_multiscrape_coordinator( 99 | config_name, conf, hass, request_manager, file_manager, scraper 100 | ): 101 | """Create a multiscrape coordinator instance.""" 102 | _LOGGER.debug("%s # Creating coordinator", config_name) 103 | 104 | scan_interval = conf.get(CONF_SCAN_INTERVAL, DEFAULT_SCAN_INTERVAL) 105 | 106 | return MultiscrapeDataUpdateCoordinator( 107 | config_name, 108 | hass, 109 | request_manager, 110 | file_manager, 111 | scraper, 112 | scan_interval, 113 | ) 114 | 115 | 116 | class MultiscrapeDataUpdateCoordinator(DataUpdateCoordinator): 117 | """Multiscrape coordinator class.""" 118 | 119 | def __init__( 120 | self, 121 | config_name, 122 | hass: HomeAssistant, 123 | request_manager: ContentRequestManager, 124 | file_manager: LoggingFileManager, 125 | scraper: Scraper, 126 | update_interval: timedelta | None, 127 | ): 128 | """Initialize the coordinator.""" 129 | self._config_name = config_name 130 | self._request_manager = request_manager 131 | self._file_manager = file_manager 132 | self._scraper = scraper 133 | self._update_interval = update_interval 134 | self.update_error = False 135 | self._resource = None 136 | self._retry_count: int = 0 137 | 138 | if self._update_interval == timedelta(seconds=0): 139 | self._update_interval = None 140 | 141 | _LOGGER.debug( 142 | "%s # Scan interval is %s", self._config_name, self._update_interval 143 | ) 144 | 145 | if self._update_interval and self._update_interval > timedelta(days=1): 146 | _LOGGER.warning( 147 | "%s # Scan interval is very long: %s. This may cause delays in data updates.", 148 | self._config_name, 149 | self._update_interval, 150 | ) 151 | 152 | super().__init__( 153 | hass, _LOGGER, name=DOMAIN, update_interval=self._update_interval 154 | ) 155 | 156 | async def _on_hass_start(_: Event) -> None: 157 | """Trigger scrape on startup.""" 158 | if self.update_interval and self.update_interval > timedelta(0): 159 | _LOGGER.debug("%s # Home assistant started, triggering scrape on startup", self._config_name) 160 | await self.async_refresh() 161 | 162 | hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STARTED, _on_hass_start) 163 | 164 | def notify_scrape_exception(self): 165 | """Notify the ContentRequestManager of a scrape exception so it can notify the FormSubmitter.""" 166 | self._request_manager.notify_scrape_exception() 167 | 168 | async def _async_update_data(self): 169 | await self._prepare_new_run() 170 | 171 | try: 172 | response = await self._request_manager.get_content() 173 | await self._scraper.set_content(response) 174 | _LOGGER.debug( 175 | "%s # Data successfully refreshed. Sensors will now start scraping to update.", 176 | self._config_name, 177 | ) 178 | self._retry_count = 0 179 | 180 | except Exception as ex: 181 | _LOGGER.error( 182 | "%s # Updating failed with exception: %s", 183 | self._config_name, 184 | ex, 185 | ) 186 | self._scraper.reset() 187 | self.update_error = True 188 | if self._update_interval is None: 189 | self._async_unsub_refresh() 190 | self._retry_count += 1 191 | if self._retry_count <= MAX_RETRIES: 192 | self._unsub_refresh = event.async_track_point_in_utc_time( 193 | self.hass, 194 | self._job, 195 | utcnow().replace(microsecond=self._microsecond) 196 | + timedelta(seconds=RETRY_DELAY_SECONDS), 197 | ) 198 | _LOGGER.warning( 199 | "%s # Since updating failed and scan_interval = 0, retry %s of %s will be scheduled in %s seconds", 200 | self._config_name, 201 | self._retry_count, 202 | MAX_RETRIES, 203 | RETRY_DELAY_SECONDS, 204 | ) 205 | else: 206 | _LOGGER.error( 207 | "%s # Updating and %s retries failed and scan_interval = 0, please manually retry with trigger service.", 208 | self._config_name, 209 | MAX_RETRIES, 210 | ) 211 | 212 | async def _prepare_new_run(self): 213 | _LOGGER.debug( 214 | "%s # New run: start (re)loading data from resource", self._config_name 215 | ) 216 | self.update_error = False 217 | if self._file_manager: 218 | _LOGGER.debug( 219 | "%s # Deleting logging files from previous run", self._config_name 220 | ) 221 | try: 222 | await self.hass.async_add_executor_job(self._file_manager.empty_folder) 223 | except Exception as ex: 224 | _LOGGER.error( 225 | "%s # Error deleting files from previous run: %s", 226 | self._config_name, 227 | ex, 228 | ) 229 | 230 | self._scraper.reset() 231 | 232 | @property 233 | def form_variables(self): 234 | """Return the form variables.""" 235 | return self._request_manager.form_variables 236 | -------------------------------------------------------------------------------- /tests/test_scraper.py: -------------------------------------------------------------------------------- 1 | """Integration tests for scraper class. 2 | 3 | These tests verify the Scraper class works correctly with Selector and BeautifulSoup 4 | to extract data from HTML content using CSS selectors. 5 | """ 6 | 7 | import re 8 | 9 | import pytest 10 | from homeassistant.core import HomeAssistant 11 | from homeassistant.helpers.template import Template 12 | 13 | from custom_components.multiscrape.const import DEFAULT_SEPARATOR 14 | from custom_components.multiscrape.scraper import Scraper 15 | from custom_components.multiscrape.selector import Selector 16 | 17 | from .fixtures.html_samples import (SAMPLE_HTML_EMPTY, SAMPLE_HTML_FULL, 18 | SAMPLE_HTML_LIST, SAMPLE_HTML_MALFORMED, 19 | SAMPLE_HTML_SPECIAL_TAGS) 20 | from .fixtures.json_samples import SAMPLE_JSON_SIMPLE 21 | 22 | 23 | @pytest.fixture 24 | def scraper_instance(hass: HomeAssistant): 25 | """Create a Scraper instance for testing.""" 26 | return Scraper("test_scraper", hass, None, "lxml", DEFAULT_SEPARATOR) 27 | 28 | 29 | @pytest.mark.integration 30 | @pytest.mark.async_test 31 | @pytest.mark.timeout(5) 32 | @pytest.mark.parametrize( 33 | "selector_config,expected_value", 34 | [ 35 | # Test text extraction 36 | ( 37 | {"select": ".current-version h1", "extract": "text"}, 38 | "Current Version: 2024.8.3", 39 | ), 40 | # Test content extraction (inner HTML) - note: may include trailing newline 41 | ( 42 | {"select": ".links", "extract": "content"}, 43 | 'Release notes', 44 | ), 45 | # Test tag extraction (outer HTML) - note: may include trailing newline 46 | ( 47 | {"select": ".links", "extract": "tag"}, 48 | '', 49 | ), 50 | # Test attribute extraction 51 | ( 52 | {"select": ".links a", "attribute": "href"}, 53 | "/latest-release-notes/", 54 | ), 55 | ], 56 | ) 57 | async def test_scraper_extraction_methods( 58 | hass: HomeAssistant, scraper_instance, selector_config, expected_value 59 | ): 60 | """Test different extraction methods (text, content, tag, attribute).""" 61 | # Arrange 62 | await scraper_instance.set_content(SAMPLE_HTML_FULL) 63 | 64 | # Convert string select to Template if present 65 | if "select" in selector_config: 66 | selector_config["select"] = Template(selector_config["select"], hass) 67 | 68 | selector = Selector(hass, selector_config) 69 | 70 | # Act 71 | value = scraper_instance.scrape(selector, "test_sensor") 72 | 73 | # Assert 74 | # Normalize whitespace for comparison (BeautifulSoup may add newlines/spaces) 75 | normalized_value = re.sub(r"\s+", " ", value).strip() 76 | normalized_expected = re.sub(r"\s+", " ", expected_value).strip() 77 | assert normalized_value == normalized_expected 78 | 79 | 80 | @pytest.mark.integration 81 | @pytest.mark.async_test 82 | @pytest.mark.timeout(5) 83 | async def test_scraper_with_list_selector(hass: HomeAssistant, scraper_instance): 84 | """Test scraping multiple elements using list selector.""" 85 | # Arrange 86 | await scraper_instance.set_content(SAMPLE_HTML_LIST) 87 | 88 | selector_config = { 89 | "select_list": Template(".item", hass), 90 | "extract": "text", 91 | } 92 | selector = Selector(hass, selector_config) 93 | 94 | # Act 95 | value = scraper_instance.scrape(selector, "test_sensor") 96 | 97 | # Assert 98 | # Should return comma-separated values (DEFAULT_SEPARATOR is ", ") 99 | # Note: DEFAULT_SEPARATOR is actually "," not ", " - let's check actual output 100 | assert value == "Item 1,Item 2,Item 3" 101 | 102 | 103 | @pytest.mark.integration 104 | @pytest.mark.async_test 105 | @pytest.mark.timeout(5) 106 | async def test_scraper_with_special_tags(hass: HomeAssistant, scraper_instance): 107 | """Test extraction from special tags (script, style, template).""" 108 | # Arrange 109 | await scraper_instance.set_content(SAMPLE_HTML_SPECIAL_TAGS) 110 | 111 | # Test script tag 112 | selector_config = {"select": Template("script", hass)} 113 | selector = Selector(hass, selector_config) 114 | value = scraper_instance.scrape(selector, "test_sensor") 115 | assert 'console.log("test");' in value 116 | 117 | # Test style tag 118 | selector_config = {"select": Template("style", hass)} 119 | selector = Selector(hass, selector_config) 120 | value = scraper_instance.scrape(selector, "test_sensor") 121 | assert ".test { color: red; }" in value 122 | 123 | 124 | @pytest.mark.integration 125 | @pytest.mark.async_test 126 | @pytest.mark.timeout(5) 127 | async def test_scraper_selector_not_found_raises_error( 128 | hass: HomeAssistant, scraper_instance 129 | ): 130 | """Test that scraper raises ValueError when selector matches nothing.""" 131 | # Arrange 132 | await scraper_instance.set_content(SAMPLE_HTML_FULL) 133 | 134 | selector_config = { 135 | "select": Template(".nonexistent-class", hass), 136 | "extract": "text", 137 | } 138 | selector = Selector(hass, selector_config) 139 | 140 | # Act & Assert 141 | with pytest.raises(ValueError, match="Could not find a tag for given selector"): 142 | scraper_instance.scrape(selector, "test_sensor") 143 | 144 | 145 | @pytest.mark.integration 146 | @pytest.mark.async_test 147 | @pytest.mark.timeout(5) 148 | async def test_scraper_handles_json_content(hass: HomeAssistant, scraper_instance): 149 | """Test that scraper detects and handles JSON content correctly.""" 150 | # Arrange 151 | await scraper_instance.set_content(SAMPLE_JSON_SIMPLE) 152 | 153 | # JSON content should not be parsed by BeautifulSoup 154 | assert scraper_instance._soup is None 155 | assert scraper_instance._data == SAMPLE_JSON_SIMPLE 156 | 157 | 158 | @pytest.mark.integration 159 | @pytest.mark.async_test 160 | @pytest.mark.timeout(5) 161 | async def test_scraper_json_without_value_template_raises_error( 162 | hass: HomeAssistant, scraper_instance 163 | ): 164 | """Test that attempting to scrape JSON without value_template raises error.""" 165 | # Arrange 166 | await scraper_instance.set_content(SAMPLE_JSON_SIMPLE) 167 | 168 | selector_config = { 169 | "select": Template(".something", hass), 170 | "extract": "text", 171 | } 172 | selector = Selector(hass, selector_config) 173 | 174 | # Act & Assert 175 | with pytest.raises( 176 | ValueError, 177 | match="JSON cannot be scraped. Please provide a value template to parse JSON response.", 178 | ): 179 | scraper_instance.scrape(selector, "test_sensor") 180 | 181 | 182 | @pytest.mark.integration 183 | @pytest.mark.async_test 184 | @pytest.mark.timeout(5) 185 | async def test_scraper_reset_clears_content(hass: HomeAssistant, scraper_instance): 186 | """Test that reset() clears both data and soup.""" 187 | # Arrange 188 | await scraper_instance.set_content(SAMPLE_HTML_FULL) 189 | assert scraper_instance._data is not None 190 | assert scraper_instance._soup is not None 191 | 192 | # Act 193 | scraper_instance.reset() 194 | 195 | # Assert 196 | assert scraper_instance._data is None 197 | assert scraper_instance._soup is None 198 | 199 | 200 | @pytest.mark.integration 201 | @pytest.mark.async_test 202 | @pytest.mark.timeout(5) 203 | async def test_scraper_handles_empty_content(hass: HomeAssistant, scraper_instance): 204 | """Test scraper behavior with empty content. 205 | 206 | Current behavior: Empty content is parsed by BeautifulSoup, creating an empty soup. 207 | This allows the scraper to continue operating even with empty responses. 208 | """ 209 | # Arrange & Act 210 | await scraper_instance.set_content(SAMPLE_HTML_EMPTY) 211 | 212 | # Assert 213 | assert scraper_instance._data == SAMPLE_HTML_EMPTY 214 | # BeautifulSoup creates an empty soup object for empty string 215 | assert scraper_instance._soup is not None 216 | 217 | 218 | @pytest.mark.integration 219 | @pytest.mark.async_test 220 | @pytest.mark.timeout(5) 221 | async def test_scraper_handles_malformed_html(hass: HomeAssistant, scraper_instance): 222 | """Test that scraper handles malformed HTML gracefully.""" 223 | # Arrange & Act 224 | await scraper_instance.set_content(SAMPLE_HTML_MALFORMED) 225 | 226 | # Assert - BeautifulSoup should parse it without raising 227 | assert scraper_instance._soup is not None 228 | # Can still extract from the parsed structure 229 | selector_config = { 230 | "select": Template(".unclosed", hass), 231 | "extract": "text", 232 | } 233 | selector = Selector(hass, selector_config) 234 | value = scraper_instance.scrape(selector, "test_sensor") 235 | assert "This paragraph is not closed" in value 236 | 237 | 238 | @pytest.mark.integration 239 | @pytest.mark.async_test 240 | @pytest.mark.timeout(5) 241 | async def test_scraper_formatted_content_prettifies_html( 242 | hass: HomeAssistant, scraper_instance 243 | ): 244 | """Test that formatted_content returns prettified HTML.""" 245 | # Arrange 246 | await scraper_instance.set_content("

Test

") 247 | 248 | # Act 249 | formatted = scraper_instance.formatted_content 250 | 251 | # Assert 252 | # Prettified HTML should have newlines and indentation 253 | # BeautifulSoup wraps content in html/body tags and formats with newlines 254 | assert "\n" in formatted 255 | assert "
" in formatted 256 | assert "Test" in formatted 257 | # Verify it's actually formatted (has indentation) 258 | assert " " in formatted # Multiple spaces indicate indentation 259 | -------------------------------------------------------------------------------- /custom_components/multiscrape/schema.py: -------------------------------------------------------------------------------- 1 | """The multiscrape component schemas.""" 2 | import logging 3 | 4 | import homeassistant.helpers.config_validation as cv 5 | import voluptuous as vol 6 | from homeassistant.components.binary_sensor import \ 7 | DEVICE_CLASSES_SCHEMA as BINARY_SENSOR_DEVICE_CLASSES_SCHEMA 8 | from homeassistant.components.binary_sensor import \ 9 | DOMAIN as BINARY_SENSOR_DOMAIN 10 | from homeassistant.components.button import DOMAIN as BUTTON_DOMAIN 11 | from homeassistant.components.sensor import \ 12 | DEVICE_CLASSES_SCHEMA as SENSOR_DEVICE_CLASSES_SCHEMA 13 | from homeassistant.components.sensor import DOMAIN as SENSOR_DOMAIN 14 | from homeassistant.components.sensor import \ 15 | STATE_CLASSES_SCHEMA as SENSOR_STATE_CLASSES_SCHEMA 16 | from homeassistant.const import (CONF_AUTHENTICATION, CONF_DEVICE_CLASS, 17 | CONF_FORCE_UPDATE, CONF_HEADERS, CONF_ICON, 18 | CONF_METHOD, CONF_NAME, CONF_PARAMS, 19 | CONF_PASSWORD, CONF_PAYLOAD, CONF_RESOURCE, 20 | CONF_RESOURCE_TEMPLATE, CONF_SCAN_INTERVAL, 21 | CONF_TIMEOUT, CONF_UNIQUE_ID, 22 | CONF_UNIT_OF_MEASUREMENT, CONF_USERNAME, 23 | CONF_VALUE_TEMPLATE, CONF_VERIFY_SSL, 24 | HTTP_BASIC_AUTHENTICATION, 25 | HTTP_DIGEST_AUTHENTICATION) 26 | 27 | from .const import (CONF_ATTR, CONF_EXTRACT, CONF_FORM_INPUT, 28 | CONF_FORM_INPUT_FILTER, CONF_FORM_RESUBMIT_ERROR, 29 | CONF_FORM_SELECT, CONF_FORM_SUBMIT, CONF_FORM_SUBMIT_ONCE, 30 | CONF_FORM_VARIABLES, CONF_LOG_RESPONSE, CONF_ON_ERROR, 31 | CONF_ON_ERROR_DEFAULT, CONF_ON_ERROR_LOG, 32 | CONF_ON_ERROR_VALUE, CONF_ON_ERROR_VALUE_DEFAULT, 33 | CONF_ON_ERROR_VALUE_LAST, CONF_ON_ERROR_VALUE_NONE, 34 | CONF_PARSER, CONF_PICTURE, CONF_SELECT, CONF_SELECT_LIST, 35 | CONF_SENSOR_ATTRS, CONF_SEPARATOR, CONF_STATE_CLASS, 36 | DEFAULT_BINARY_SENSOR_NAME, DEFAULT_BUTTON_NAME, 37 | DEFAULT_EXTRACT, DEFAULT_FORCE_UPDATE, DEFAULT_METHOD, 38 | DEFAULT_PARSER, DEFAULT_SENSOR_NAME, DEFAULT_SEPARATOR, 39 | DEFAULT_VERIFY_SSL, DOMAIN, EXTRACT_OPTIONS, LOG_ERROR, 40 | LOG_LEVELS, METHODS) 41 | from .scraper import DEFAULT_TIMEOUT 42 | 43 | _LOGGER = logging.getLogger(__name__) 44 | 45 | HTTP_SCHEMA = { 46 | vol.Exclusive(CONF_RESOURCE, CONF_RESOURCE): cv.url, 47 | vol.Exclusive(CONF_RESOURCE_TEMPLATE, CONF_RESOURCE): cv.template, 48 | vol.Optional(CONF_AUTHENTICATION): vol.In( 49 | [HTTP_BASIC_AUTHENTICATION, HTTP_DIGEST_AUTHENTICATION] 50 | ), 51 | vol.Optional(CONF_HEADERS): vol.Schema({cv.string: cv.template}), 52 | vol.Optional(CONF_PARAMS): vol.Schema({cv.string: cv.template}), 53 | vol.Optional(CONF_METHOD, default=DEFAULT_METHOD): vol.All( 54 | cv.string, # Ensure it's a string 55 | lambda method: method.lower(), # Convert to lowercase 56 | vol.In([m.lower() for m in METHODS]) # Validate against lowercase methods 57 | ), 58 | vol.Optional(CONF_USERNAME): cv.string, 59 | vol.Optional(CONF_PASSWORD): cv.string, 60 | vol.Optional(CONF_PAYLOAD): cv.template, 61 | vol.Optional(CONF_VERIFY_SSL, default=DEFAULT_VERIFY_SSL): cv.boolean, 62 | vol.Optional(CONF_TIMEOUT, default=DEFAULT_TIMEOUT): cv.positive_int, 63 | } 64 | 65 | INTEGRATION_SCHEMA = { 66 | **HTTP_SCHEMA, 67 | vol.Optional(CONF_PARSER, default=DEFAULT_PARSER): cv.string, 68 | vol.Optional(CONF_NAME): cv.string, 69 | vol.Optional(CONF_SCAN_INTERVAL): cv.time_period, 70 | vol.Optional(CONF_LOG_RESPONSE, default=False): cv.boolean, 71 | vol.Optional(CONF_SEPARATOR, default=DEFAULT_SEPARATOR): cv.string, 72 | } 73 | 74 | ON_ERROR_SCHEMA = { 75 | vol.Optional(CONF_ON_ERROR_LOG, default=LOG_ERROR): vol.In(list(LOG_LEVELS.keys())), 76 | vol.Optional(CONF_ON_ERROR_VALUE, default=CONF_ON_ERROR_VALUE_NONE): vol.In( 77 | [ 78 | CONF_ON_ERROR_VALUE_LAST, 79 | CONF_ON_ERROR_VALUE_NONE, 80 | CONF_ON_ERROR_VALUE_DEFAULT, 81 | ] 82 | ), 83 | vol.Optional(CONF_ON_ERROR_DEFAULT): cv.template, 84 | } 85 | 86 | SELECTOR_SCHEMA = { 87 | vol.Optional(CONF_SELECT): cv.template, 88 | vol.Optional(CONF_SELECT_LIST): cv.template, 89 | vol.Optional(CONF_ATTR): cv.string, 90 | vol.Optional(CONF_VALUE_TEMPLATE): cv.template, 91 | vol.Optional(CONF_ON_ERROR): vol.Schema(ON_ERROR_SCHEMA), 92 | vol.Optional(CONF_EXTRACT, default=DEFAULT_EXTRACT): vol.In(EXTRACT_OPTIONS), 93 | } 94 | 95 | FORM_HEADERS_MAPPING_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} 96 | 97 | FORM_SUBMIT_SCHEMA = { 98 | **HTTP_SCHEMA, 99 | vol.Optional(CONF_FORM_SELECT): cv.string, 100 | vol.Optional(CONF_FORM_INPUT): vol.Schema({cv.string: cv.string}), 101 | vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, 102 | vol.Optional(CONF_FORM_SUBMIT_ONCE, default=False): cv.boolean, 103 | vol.Optional(CONF_FORM_RESUBMIT_ERROR, default=True): cv.boolean, 104 | vol.Optional(CONF_FORM_VARIABLES, default=[]): vol.All( 105 | cv.ensure_list, [vol.Schema(FORM_HEADERS_MAPPING_SCHEMA)] 106 | ), 107 | } 108 | 109 | SENSOR_ATTRIBUTE_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} 110 | 111 | SENSOR_SCHEMA = { 112 | vol.Optional(CONF_NAME, default=DEFAULT_SENSOR_NAME): cv.string, 113 | vol.Optional(CONF_UNIQUE_ID): cv.string, 114 | vol.Optional(CONF_UNIT_OF_MEASUREMENT): cv.string, 115 | vol.Optional(CONF_DEVICE_CLASS): SENSOR_DEVICE_CLASSES_SCHEMA, 116 | vol.Optional(CONF_STATE_CLASS): SENSOR_STATE_CLASSES_SCHEMA, 117 | vol.Optional(CONF_ICON): cv.template, 118 | vol.Optional(CONF_FORCE_UPDATE, default=DEFAULT_FORCE_UPDATE): cv.boolean, 119 | vol.Optional(CONF_PICTURE): cv.string, 120 | **SELECTOR_SCHEMA, 121 | vol.Optional(CONF_SENSOR_ATTRS): vol.All( 122 | cv.ensure_list, [vol.Schema(SENSOR_ATTRIBUTE_SCHEMA)] 123 | ), 124 | } 125 | 126 | BINARY_SENSOR_SCHEMA = { 127 | vol.Optional(CONF_NAME, default=DEFAULT_BINARY_SENSOR_NAME): cv.string, 128 | vol.Optional(CONF_UNIQUE_ID): cv.string, 129 | vol.Optional(CONF_DEVICE_CLASS): BINARY_SENSOR_DEVICE_CLASSES_SCHEMA, 130 | vol.Optional(CONF_ICON): cv.template, 131 | vol.Optional(CONF_FORCE_UPDATE, default=DEFAULT_FORCE_UPDATE): cv.boolean, 132 | vol.Optional(CONF_PICTURE): cv.string, 133 | **SELECTOR_SCHEMA, 134 | vol.Optional(CONF_SENSOR_ATTRS): vol.All( 135 | cv.ensure_list, [vol.Schema(SENSOR_ATTRIBUTE_SCHEMA)] 136 | ), 137 | } 138 | 139 | BUTTON_SCHEMA = { 140 | vol.Optional(CONF_NAME, default=DEFAULT_BUTTON_NAME): cv.string, 141 | vol.Optional(CONF_UNIQUE_ID): cv.string, 142 | } 143 | 144 | COMBINED_SCHEMA = vol.Schema( 145 | { 146 | **INTEGRATION_SCHEMA, 147 | vol.Optional(CONF_FORM_SUBMIT): vol.Schema(FORM_SUBMIT_SCHEMA), 148 | vol.Optional(SENSOR_DOMAIN): vol.All( 149 | cv.ensure_list, [vol.Schema(SENSOR_SCHEMA)] 150 | ), 151 | vol.Optional(BINARY_SENSOR_DOMAIN): vol.All( 152 | cv.ensure_list, [vol.Schema(BINARY_SENSOR_SCHEMA)] 153 | ), 154 | vol.Optional(BUTTON_DOMAIN): vol.All( 155 | cv.ensure_list, [vol.Schema(BUTTON_SCHEMA)] 156 | ), 157 | } 158 | ) 159 | 160 | CONFIG_SCHEMA = vol.Schema( 161 | {DOMAIN: vol.All(cv.ensure_list, [COMBINED_SCHEMA])}, 162 | extra=vol.ALLOW_EXTRA, 163 | ) 164 | 165 | 166 | def create_service_schema(): 167 | """Create a schema without templates that render an output value.""" 168 | # Templates are evaluated by home assistant when the service is triggered, so we make them a string and restore them afterwards. 169 | SERVICE_SELECTOR_SCHEMA = dict(SELECTOR_SCHEMA) 170 | SERVICE_SELECTOR_SCHEMA.update({vol.Optional(CONF_VALUE_TEMPLATE): cv.string}) 171 | 172 | SERVICE_SENSOR_ATTRIBUTE_SCHEMA = { 173 | vol.Required(CONF_NAME): cv.string, 174 | **SERVICE_SELECTOR_SCHEMA, 175 | } 176 | 177 | SERVICE_SENSOR_SCHEMA = dict(SENSOR_SCHEMA) 178 | SERVICE_SENSOR_SCHEMA.update({vol.Optional(CONF_VALUE_TEMPLATE): cv.string}) 179 | SERVICE_SENSOR_SCHEMA.update({vol.Optional(CONF_ICON): cv.string}) 180 | SERVICE_SENSOR_SCHEMA.update( 181 | { 182 | vol.Optional(CONF_SENSOR_ATTRS): vol.All( 183 | cv.ensure_list, [vol.Schema(SERVICE_SENSOR_ATTRIBUTE_SCHEMA)] 184 | ) 185 | } 186 | ) 187 | 188 | SERVICE_BINARY_SENSOR_SCHEMA = dict(BINARY_SENSOR_SCHEMA) 189 | SERVICE_BINARY_SENSOR_SCHEMA.update({vol.Optional(CONF_VALUE_TEMPLATE): cv.string}) 190 | SERVICE_BINARY_SENSOR_SCHEMA.update({vol.Optional(CONF_ICON): cv.string}) 191 | SERVICE_BINARY_SENSOR_SCHEMA.update( 192 | { 193 | vol.Optional(CONF_SENSOR_ATTRS): vol.All( 194 | cv.ensure_list, [vol.Schema(SERVICE_SENSOR_ATTRIBUTE_SCHEMA)] 195 | ) 196 | } 197 | ) 198 | 199 | return vol.Schema( 200 | { 201 | **INTEGRATION_SCHEMA, 202 | vol.Optional(CONF_FORM_SUBMIT): vol.Schema(FORM_SUBMIT_SCHEMA), 203 | vol.Optional(SENSOR_DOMAIN): vol.All( 204 | cv.ensure_list, [vol.Schema(SERVICE_SENSOR_SCHEMA)] 205 | ), 206 | vol.Optional(BINARY_SENSOR_DOMAIN): vol.All( 207 | cv.ensure_list, [vol.Schema(SERVICE_BINARY_SENSOR_SCHEMA)] 208 | ), 209 | vol.Optional(BUTTON_DOMAIN): vol.All( 210 | cv.ensure_list, [vol.Schema(BUTTON_SCHEMA)] 211 | ), 212 | } 213 | ) 214 | 215 | 216 | SERVICE_COMBINED_SCHEMA = create_service_schema() 217 | -------------------------------------------------------------------------------- /custom_components/multiscrape/form.py: -------------------------------------------------------------------------------- 1 | """Form submit logic.""" 2 | import logging 3 | from urllib.parse import urljoin 4 | 5 | from bs4 import BeautifulSoup 6 | from homeassistant.const import CONF_NAME, CONF_RESOURCE 7 | from homeassistant.core import HomeAssistant 8 | 9 | from custom_components.multiscrape.scraper import create_scraper 10 | 11 | from .const import (CONF_FORM_INPUT, CONF_FORM_INPUT_FILTER, 12 | CONF_FORM_RESUBMIT_ERROR, CONF_FORM_SELECT, 13 | CONF_FORM_SUBMIT_ONCE, CONF_FORM_VARIABLES) 14 | from .file import LoggingFileManager 15 | from .http import HttpWrapper 16 | from .selector import Selector 17 | 18 | _LOGGER = logging.getLogger(__name__) 19 | 20 | 21 | def create_form_submitter(config_name, config, hass, http, file_manager, parser): 22 | """Create a form submitter instance.""" 23 | resource = config.get(CONF_RESOURCE) 24 | select = config.get(CONF_FORM_SELECT) 25 | input_values = config.get(CONF_FORM_INPUT) 26 | input_filter = config.get(CONF_FORM_INPUT_FILTER) 27 | resubmit_error = config.get(CONF_FORM_RESUBMIT_ERROR) 28 | submit_once = config.get(CONF_FORM_SUBMIT_ONCE) 29 | 30 | scraper = None 31 | variables_selectors = {} 32 | variables = config.get(CONF_FORM_VARIABLES) 33 | if (variables != []): 34 | scraper = create_scraper(config_name, config, hass, file_manager) 35 | for variables_conf in variables: 36 | variables_selectors[variables_conf.get(CONF_NAME)] = Selector(hass, variables_conf) 37 | 38 | return FormSubmitter( 39 | config_name, 40 | hass, 41 | http, 42 | file_manager, 43 | resource, 44 | select, 45 | input_values, 46 | input_filter, 47 | submit_once, 48 | resubmit_error, 49 | variables_selectors, 50 | scraper, 51 | parser, 52 | ) 53 | 54 | 55 | class FormSubmitter: 56 | """Class to take care of submitting a form.""" 57 | 58 | def __init__( 59 | self, 60 | config_name, 61 | hass: HomeAssistant, 62 | http: HttpWrapper, 63 | file_manager: LoggingFileManager, 64 | form_resource, 65 | select, 66 | input_values, 67 | input_filter, 68 | submit_once, 69 | resubmit_error, 70 | variables_selectors, 71 | scraper, 72 | parser, 73 | ): 74 | """Initialize FormSubmitter class.""" 75 | _LOGGER.debug("%s # Initializing form submitter", config_name) 76 | self._config_name = config_name 77 | self._hass = hass 78 | self._http = http 79 | self._file_manager = file_manager 80 | self._form_resource = form_resource 81 | self._select = select 82 | self._input_values = input_values 83 | self._input_filter = input_filter 84 | self._submit_once = submit_once 85 | self._resubmit_error = resubmit_error 86 | self._variables_selectors = variables_selectors 87 | self._scraper = scraper 88 | self._parser = parser 89 | self._should_submit = True 90 | self._cookies = None 91 | self._payload = None 92 | 93 | def notify_scrape_exception(self): 94 | """Make sure form is re-submitted after an exception.""" 95 | if self._resubmit_error: 96 | _LOGGER.debug( 97 | "%s # Exception occurred while scraping, will try to resubmit the form next interval.", 98 | self._config_name, 99 | ) 100 | self._should_submit = True 101 | 102 | @property 103 | def should_submit(self): 104 | """Return the form variables.""" 105 | return self._should_submit 106 | 107 | async def async_submit(self, main_resource): 108 | """Submit the form.""" 109 | _LOGGER.debug("%s # Starting with form-submit", self._config_name) 110 | input_fields = {} 111 | action, method = None, None 112 | 113 | if self._select: 114 | if self._form_resource: 115 | page = await self._fetch_form_page(self._form_resource) 116 | else: 117 | page = await self._fetch_form_page(main_resource) 118 | form = await self._async_extract_form(page) 119 | 120 | input_fields = self._get_input_fields(form) 121 | for field in self._input_filter: 122 | input_fields.pop(field, None) 123 | 124 | action = form.get("action") 125 | method = form.get("method") 126 | 127 | _LOGGER.debug( 128 | "%s # Found form action %s and method %s", 129 | self._config_name, 130 | action, 131 | method, 132 | ) 133 | else: 134 | _LOGGER.debug( 135 | "%s # Skip scraping form, assuming all input is given in config.", 136 | self._config_name, 137 | ) 138 | 139 | if self._input_values is not None: 140 | 141 | input_fields.update(self._input_values) 142 | 143 | _LOGGER.debug( 144 | "%s # Merged input fields with input data in config. Result: %s", 145 | self._config_name, 146 | input_fields, 147 | ) 148 | self._payload = input_fields 149 | 150 | if not method: 151 | method = "POST" 152 | 153 | submit_resource = self._determine_submit_resource(action, main_resource) 154 | 155 | _LOGGER.debug("%s # Submitting the form", self._config_name) 156 | response = await self._http.async_request( 157 | "form_submit", 158 | submit_resource, 159 | method=method, 160 | request_data=self._payload, 161 | cookies=self._cookies 162 | ) 163 | _LOGGER.debug( 164 | "%s # Form seems to be submitted successfully (to be sure, use log_response and check file). Now continuing to retrieve target page.", 165 | self._config_name, 166 | ) 167 | 168 | if self._submit_once: 169 | self._should_submit = False 170 | 171 | if self._scraper: 172 | await self._scraper.set_content(response.text) 173 | 174 | if not self._form_resource: 175 | return response.text, response.cookies 176 | else: 177 | return None, response.cookies 178 | 179 | def scrape_variables(self): 180 | """Scrape header mappings.""" 181 | result = {} 182 | for variable_key in self._variables_selectors: 183 | result[variable_key] = self._scraper.scrape(self._variables_selectors[variable_key], variable_key) 184 | return result 185 | 186 | def _determine_submit_resource(self, action, main_resource): 187 | resource = main_resource 188 | if action and self._form_resource: 189 | resource = urljoin(self._form_resource, action) 190 | elif action: 191 | resource = urljoin(main_resource, action) 192 | elif self._form_resource: 193 | resource = self._form_resource 194 | 195 | _LOGGER.debug( 196 | "%s # Determined the url to submit the form to: %s", 197 | self._config_name, 198 | resource, 199 | ) 200 | return resource 201 | 202 | async def _fetch_form_page(self, resource): 203 | _LOGGER.debug( 204 | "%s # Requesting page with form from: %s", 205 | self._config_name, 206 | resource, 207 | ) 208 | response = await self._http.async_request( 209 | "form_page", 210 | resource, 211 | "GET", 212 | ) 213 | self._cookies = response.cookies 214 | return response.text 215 | 216 | def _get_input_fields(self, form): 217 | _LOGGER.debug("%s # Finding all input fields in form", self._config_name) 218 | elements = form.find_all("input") 219 | input_fields = { 220 | element.get("name"): element.get("value") 221 | for element in elements 222 | if element.get("name") is not None 223 | } 224 | _LOGGER.debug( 225 | "%s # Found the following input fields: %s", self._config_name, input_fields 226 | ) 227 | return input_fields 228 | 229 | async def _async_file_log(self, content_name, content): 230 | try: 231 | filename = f"{content_name}.txt" 232 | await self._hass.async_add_executor_job( 233 | self._file_manager.write, filename, content 234 | ) 235 | except Exception as ex: 236 | _LOGGER.error( 237 | "%s # Unable to write BeautifulSoup form-page result to file: %s. \nException: %s", 238 | self._config_name, 239 | filename, 240 | ex, 241 | ) 242 | _LOGGER.debug( 243 | "%s # The page with the form parsed by BeautifulSoup has been written to file: %s", 244 | self._config_name, 245 | filename, 246 | ) 247 | 248 | async def _async_extract_form(self, page): 249 | try: 250 | _LOGGER.debug( 251 | "%s # Parse page with form with BeautifulSoup parser %s", 252 | self._config_name, 253 | self._parser, 254 | ) 255 | soup = BeautifulSoup(page, self._parser) 256 | soup.prettify() 257 | if self._file_manager: 258 | await self._async_file_log("form_page_soup", soup) 259 | 260 | _LOGGER.debug( 261 | "%s # Try to find form with selector %s", 262 | self._config_name, 263 | self._select, 264 | ) 265 | form = soup.select_one(self._select) 266 | 267 | if not form: 268 | raise ValueError("Could not find form") 269 | 270 | _LOGGER.debug("%s # Form looks like this: \n%s", self._config_name, form) 271 | return form 272 | 273 | except IndexError as exception: 274 | _LOGGER.info( 275 | "%s # Unable to get the form from the page: %s", 276 | self._config_name, 277 | exception, 278 | ) 279 | raise 280 | -------------------------------------------------------------------------------- /custom_components/multiscrape/http.py: -------------------------------------------------------------------------------- 1 | """HTTP request related functionality.""" 2 | import asyncio 3 | import logging 4 | from collections.abc import Callable 5 | from urllib.parse import parse_qs, urlencode, urlparse, urlunparse 6 | 7 | import httpx 8 | from homeassistant.const import (CONF_AUTHENTICATION, CONF_HEADERS, 9 | CONF_METHOD, CONF_PARAMS, CONF_PASSWORD, 10 | CONF_PAYLOAD, CONF_TIMEOUT, CONF_USERNAME, 11 | CONF_VERIFY_SSL, HTTP_DIGEST_AUTHENTICATION) 12 | from homeassistant.helpers.httpx_client import get_async_client 13 | 14 | from .util import create_dict_renderer, create_renderer 15 | 16 | _LOGGER = logging.getLogger(__name__) 17 | 18 | 19 | def create_http_wrapper(config_name, config, hass, file_manager): 20 | """Create a http wrapper instance.""" 21 | verify_ssl = config.get(CONF_VERIFY_SSL) 22 | username = config.get(CONF_USERNAME) 23 | password = config.get(CONF_PASSWORD) 24 | auth_type = config.get(CONF_AUTHENTICATION) 25 | timeout = config.get(CONF_TIMEOUT) 26 | headers = config.get(CONF_HEADERS) 27 | params = config.get(CONF_PARAMS) 28 | payload = config.get(CONF_PAYLOAD) 29 | method = config.get(CONF_METHOD) 30 | 31 | client = get_async_client(hass, verify_ssl) 32 | http = HttpWrapper( 33 | config_name, 34 | hass, 35 | client, 36 | file_manager, 37 | timeout, 38 | method, 39 | params_renderer=create_dict_renderer(hass, params), 40 | headers_renderer=create_dict_renderer(hass, headers), 41 | data_renderer=create_renderer(hass, payload, "request payload"), 42 | ) 43 | if username and password: 44 | http.set_authentication(username, password, auth_type) 45 | return http 46 | 47 | 48 | class HttpWrapper: 49 | """Class to wrap a httpx request.""" 50 | 51 | def __init__( 52 | self, 53 | config_name, 54 | hass, 55 | client, 56 | file_manager, 57 | timeout, 58 | method: str = None, 59 | params_renderer: Callable = None, 60 | headers_renderer: Callable = None, 61 | data_renderer: Callable = None, 62 | ): 63 | """Initialize HttpWrapper.""" 64 | _LOGGER.debug("%s # Initializing http wrapper", config_name) 65 | self._client = client 66 | self._file_manager = file_manager 67 | self._config_name = config_name 68 | self._timeout = timeout 69 | self._hass = hass 70 | self._auth = None 71 | self._method = method 72 | self._params_renderer = params_renderer 73 | self._headers_renderer = headers_renderer 74 | self._data_renderer = data_renderer 75 | 76 | def set_authentication(self, username, password, auth_type): 77 | """Set http authentication.""" 78 | if auth_type == HTTP_DIGEST_AUTHENTICATION: 79 | self._auth = httpx.DigestAuth(username, password) 80 | else: 81 | self._auth = (username, password) 82 | _LOGGER.debug( 83 | "%s # Authentication configuration processed", self._config_name) 84 | 85 | async def async_request(self, context, resource, method=None, request_data=None, cookies=None, variables: dict = {}): 86 | """Execute a HTTP request. 87 | 88 | Note: We use per-request cookies (despite httpx deprecation warning) because: 89 | - Cookies come dynamically from form submissions during scraping 90 | - Each integration instance may have different session cookies 91 | - We can't modify the shared HA httpx client's cookie jar 92 | This is the correct pattern for stateful multi-page web scraping. 93 | """ 94 | data = request_data or self._data_renderer(variables) 95 | method = method or self._method or "GET" 96 | headers = self._headers_renderer(variables) 97 | params = self._params_renderer(variables) 98 | 99 | # Merging params in multiscrape since httpx doesn't do it anymore: https://github.com/encode/httpx/issues/3433 100 | merged_resource = merge_url_with_params(resource, params) 101 | 102 | _LOGGER.debug( 103 | "%s # Executing %s-request with a %s to url: %s with headers: %s and cookies: %s.", 104 | self._config_name, 105 | context, 106 | method, 107 | merged_resource, 108 | headers, 109 | cookies 110 | ) 111 | if self._file_manager: 112 | task1 = self._async_file_log("request_headers", context, headers) 113 | task2 = self._async_file_log("request_body", context, data) 114 | task3 = self._async_file_log("request_cookies", context, cookies) 115 | await asyncio.gather(task1, task2, task3) 116 | 117 | response = None 118 | 119 | try: 120 | # Use the appropriate parameter based on data type: 121 | # - dict: use data= for form-encoded POST 122 | # - str/bytes: use content= for raw content 123 | request_params = { 124 | "method": method, 125 | "url": merged_resource, 126 | "headers": headers, 127 | "auth": self._auth, 128 | "timeout": self._timeout, 129 | "follow_redirects": True, 130 | "cookies": cookies, 131 | } 132 | 133 | if data is not None: 134 | if isinstance(data, dict): 135 | request_params["data"] = data 136 | else: 137 | request_params["content"] = data 138 | 139 | response = await self._client.request(**request_params) 140 | 141 | _LOGGER.debug( 142 | "%s # Response status code received: %s", 143 | self._config_name, 144 | response.status_code, 145 | ) 146 | if self._file_manager: 147 | task1 = self._async_file_log( 148 | "response_headers", context, response.headers 149 | ) 150 | task2 = self._async_file_log( 151 | "response_body", context, response.text) 152 | task3 = self._async_file_log( 153 | "response_cookies", context, response.cookies) 154 | await asyncio.gather(task1, task2, task3) 155 | 156 | # bit of a hack since httpx also raises an exception for redirects: https://github.com/encode/httpx/blob/c6c8cb1fe2da9380f8046a19cdd5aade586f69c8/CHANGELOG.md#0200-13th-october-2021 157 | if 400 <= response.status_code <= 599: 158 | response.raise_for_status() 159 | return response 160 | except httpx.TimeoutException as ex: 161 | _LOGGER.debug( 162 | "%s # Timeout error while executing %s request to url: %s.\n Error message:\n %s", 163 | self._config_name, 164 | method, 165 | merged_resource, 166 | repr(ex), 167 | ) 168 | await self._handle_request_exception(context, response) 169 | raise 170 | except httpx.RequestError as ex: 171 | _LOGGER.debug( 172 | "%s # Request error while executing %s request to url: %s.\n Error message:\n %s", 173 | self._config_name, 174 | method, 175 | merged_resource, 176 | repr(ex), 177 | ) 178 | await self._handle_request_exception(context, response) 179 | raise 180 | except Exception as ex: 181 | _LOGGER.debug( 182 | "%s # Error executing %s request to url: %s.\n Error message:\n %s", 183 | self._config_name, 184 | method, 185 | merged_resource, 186 | repr(ex), 187 | ) 188 | await self._handle_request_exception(context, response) 189 | raise 190 | 191 | async def _handle_request_exception(self, context, response): 192 | try: 193 | if self._file_manager: 194 | task1 = self._async_file_log( 195 | "response_headers_error", context, response.headers 196 | ) 197 | task2 = self._async_file_log( 198 | "response_body_error", context, response.text 199 | ) 200 | task3 = self._async_file_log( 201 | "response_cookies_error", context, response.cookies 202 | ) 203 | await asyncio.gather(task1, task2, task3) 204 | except Exception as exc: 205 | _LOGGER.debug( 206 | "%s # Unable to write headers, cookies and/or body to file during handling of exception.\n Error message:\n %s", 207 | self._config_name, 208 | repr(exc), 209 | ) 210 | 211 | async def _async_file_log(self, content_name, context, content): 212 | """Write content to a file if content is not None.""" 213 | if content is not None: 214 | try: 215 | filename = f"{context}_{content_name}.txt" 216 | await self._hass.async_add_executor_job( 217 | self._file_manager.write, filename, content 218 | ) 219 | except Exception as ex: 220 | _LOGGER.error( 221 | "%s # Unable to write %s to file: %s. \nException: %s", 222 | self._config_name, 223 | content_name, 224 | filename, 225 | ex, 226 | ) 227 | _LOGGER.debug( 228 | "%s # %s written to file: %s", 229 | self._config_name, 230 | content_name, 231 | filename, 232 | ) 233 | 234 | 235 | def merge_url_with_params(url, params): 236 | """Merge URL with parameters.""" 237 | if not params: 238 | return url 239 | 240 | url_parts = list(urlparse(url)) 241 | query = parse_qs(url_parts[4]) 242 | query.update(params) 243 | url_parts[4] = urlencode(query, doseq=True) 244 | try: 245 | return urlunparse(url_parts) 246 | except Exception as ex: 247 | raise ValueError(f"Failed to merge URL with parameters: {ex}") from ex 248 | -------------------------------------------------------------------------------- /tests/test_entity.py: -------------------------------------------------------------------------------- 1 | """Integration tests for MultiscrapeEntity base class functionality.""" 2 | 3 | import pytest 4 | from homeassistant.const import CONF_NAME 5 | from homeassistant.core import HomeAssistant 6 | from homeassistant.helpers.template import Template 7 | 8 | from custom_components.multiscrape.const import (CONF_EXTRACT, CONF_ON_ERROR, 9 | CONF_ON_ERROR_DEFAULT, 10 | CONF_ON_ERROR_LOG, 11 | CONF_ON_ERROR_VALUE, 12 | CONF_ON_ERROR_VALUE_DEFAULT, 13 | CONF_ON_ERROR_VALUE_LAST, 14 | CONF_ON_ERROR_VALUE_NONE, 15 | CONF_SELECT) 16 | from custom_components.multiscrape.sensor import MultiscrapeSensor 17 | 18 | from .fixtures.html_samples import SAMPLE_HTML_FULL 19 | 20 | 21 | @pytest.mark.integration 22 | @pytest.mark.async_test 23 | @pytest.mark.timeout(10) 24 | async def test_entity_icon_template_rendering(hass: HomeAssistant, coordinator, scraper): 25 | """Test entity _set_icon renders icon template correctly.""" 26 | from custom_components.multiscrape.selector import Selector 27 | 28 | # Arrange 29 | icon_template = Template( 30 | "{% if value > 50 %}mdi:alert{% else %}mdi:check{% endif %}", hass 31 | ) 32 | 33 | config = { 34 | CONF_NAME: "test_sensor", 35 | CONF_SELECT: Template(".current-version h1", hass), 36 | CONF_EXTRACT: "text", 37 | } 38 | sensor_selector = Selector(hass, config) 39 | 40 | sensor = MultiscrapeSensor( 41 | hass=hass, 42 | coordinator=coordinator, 43 | scraper=scraper, 44 | unique_id="test", 45 | name="test", 46 | unit_of_measurement=None, 47 | device_class=None, 48 | state_class=None, 49 | force_update=False, 50 | icon_template=icon_template, 51 | picture=None, 52 | sensor_selector=sensor_selector, 53 | attribute_selectors={}, 54 | ) 55 | 56 | # Act - test with value > 50 57 | sensor._set_icon(60) 58 | icon1 = sensor._attr_icon 59 | 60 | # Act - test with value <= 50 61 | sensor._set_icon(30) 62 | icon2 = sensor._attr_icon 63 | 64 | # Assert 65 | assert icon1 == "mdi:alert" 66 | assert icon2 == "mdi:check" 67 | 68 | 69 | @pytest.mark.integration 70 | @pytest.mark.async_test 71 | @pytest.mark.timeout(10) 72 | async def test_entity_icon_template_error_handling(hass: HomeAssistant, coordinator, scraper, caplog): 73 | """Test entity _set_icon handles TemplateError gracefully.""" 74 | from custom_components.multiscrape.selector import Selector 75 | 76 | # Arrange - template that will raise an error 77 | icon_template = Template("{{ value | invalid_filter }}", hass) 78 | 79 | config = { 80 | CONF_NAME: "test_sensor", 81 | CONF_SELECT: Template(".current-version h1", hass), 82 | CONF_EXTRACT: "text", 83 | } 84 | sensor_selector = Selector(hass, config) 85 | 86 | sensor = MultiscrapeSensor( 87 | hass=hass, 88 | coordinator=coordinator, 89 | scraper=scraper, 90 | unique_id="test", 91 | name="test", 92 | unit_of_measurement=None, 93 | device_class=None, 94 | state_class=None, 95 | force_update=False, 96 | icon_template=icon_template, 97 | picture=None, 98 | sensor_selector=sensor_selector, 99 | attribute_selectors={}, 100 | ) 101 | 102 | # Act 103 | sensor._set_icon(42) 104 | 105 | # Assert - should log error but not crash 106 | assert "Exception occurred when rendering icon template" in caplog.text 107 | 108 | 109 | @pytest.mark.integration 110 | @pytest.mark.async_test 111 | @pytest.mark.timeout(10) 112 | async def test_entity_attribute_on_error_value_none(hass: HomeAssistant, coordinator, scraper): 113 | """Test entity attribute with on_error value set to 'none'.""" 114 | from custom_components.multiscrape.selector import Selector 115 | 116 | # Arrange 117 | config = { 118 | CONF_NAME: "test_sensor", 119 | CONF_SELECT: Template(".current-version h1", hass), 120 | CONF_EXTRACT: "text", 121 | } 122 | 123 | attr_config = { 124 | CONF_NAME: "broken_attr", 125 | CONF_SELECT: Template(".nonexistent", hass), 126 | CONF_EXTRACT: "text", 127 | CONF_ON_ERROR: { 128 | CONF_ON_ERROR_VALUE: CONF_ON_ERROR_VALUE_NONE, 129 | CONF_ON_ERROR_LOG: "warning", 130 | }, 131 | } 132 | 133 | sensor_selector = Selector(hass, config) 134 | attribute_selectors = {"broken_attr": Selector(hass, attr_config)} 135 | 136 | sensor = MultiscrapeSensor( 137 | hass=hass, 138 | coordinator=coordinator, 139 | scraper=scraper, 140 | unique_id="test", 141 | name="test", 142 | unit_of_measurement=None, 143 | device_class=None, 144 | state_class=None, 145 | force_update=False, 146 | icon_template=None, 147 | picture=None, 148 | sensor_selector=sensor_selector, 149 | attribute_selectors=attribute_selectors, 150 | ) 151 | 152 | await scraper.set_content(SAMPLE_HTML_FULL) 153 | 154 | # Act 155 | sensor._update_sensor() 156 | sensor._update_attributes() 157 | 158 | # Assert - attribute should be None 159 | assert sensor._attr_extra_state_attributes.get("broken_attr") is None 160 | 161 | 162 | @pytest.mark.integration 163 | @pytest.mark.async_test 164 | @pytest.mark.timeout(10) 165 | async def test_entity_attribute_on_error_value_last(hass: HomeAssistant, coordinator, scraper): 166 | """Test entity attribute with on_error value set to 'last'.""" 167 | from custom_components.multiscrape.selector import Selector 168 | 169 | # Arrange 170 | config = { 171 | CONF_NAME: "test_sensor", 172 | CONF_SELECT: Template(".current-version h1", hass), 173 | CONF_EXTRACT: "text", 174 | } 175 | 176 | # First set to a value that exists, then to one that doesn't 177 | attr_config_working = { 178 | CONF_NAME: "test_attr", 179 | CONF_SELECT: Template(".current-version h1", hass), 180 | CONF_EXTRACT: "text", 181 | } 182 | 183 | attr_config_broken = { 184 | CONF_NAME: "test_attr", 185 | CONF_SELECT: Template(".nonexistent", hass), 186 | CONF_EXTRACT: "text", 187 | CONF_ON_ERROR: { 188 | CONF_ON_ERROR_VALUE: CONF_ON_ERROR_VALUE_LAST, 189 | CONF_ON_ERROR_LOG: "warning", 190 | }, 191 | } 192 | 193 | sensor_selector = Selector(hass, config) 194 | 195 | sensor = MultiscrapeSensor( 196 | hass=hass, 197 | coordinator=coordinator, 198 | scraper=scraper, 199 | unique_id="test", 200 | name="test", 201 | unit_of_measurement=None, 202 | device_class=None, 203 | state_class=None, 204 | force_update=False, 205 | icon_template=None, 206 | picture=None, 207 | sensor_selector=sensor_selector, 208 | attribute_selectors={"test_attr": Selector(hass, attr_config_working)}, 209 | ) 210 | 211 | await scraper.set_content(SAMPLE_HTML_FULL) 212 | 213 | # Act - first update with working selector 214 | sensor._update_sensor() 215 | sensor._update_attributes() 216 | first_value = sensor._attr_extra_state_attributes.get("test_attr") 217 | 218 | # Change to broken selector 219 | sensor._attribute_selectors = {"test_attr": Selector(hass, attr_config_broken)} 220 | 221 | # Update again (should keep last value) 222 | sensor._update_attributes() 223 | second_value = sensor._attr_extra_state_attributes.get("test_attr") 224 | 225 | # Assert - should keep the last value 226 | assert first_value == "Current Version: 2024.8.3" 227 | assert second_value == "Current Version: 2024.8.3" 228 | 229 | 230 | @pytest.mark.integration 231 | @pytest.mark.async_test 232 | @pytest.mark.timeout(10) 233 | async def test_entity_attribute_on_error_value_default(hass: HomeAssistant, coordinator, scraper): 234 | """Test entity attribute with on_error value set to 'default'.""" 235 | from custom_components.multiscrape.selector import Selector 236 | 237 | # Arrange 238 | config = { 239 | CONF_NAME: "test_sensor", 240 | CONF_SELECT: Template(".current-version h1", hass), 241 | CONF_EXTRACT: "text", 242 | } 243 | 244 | attr_config = { 245 | CONF_NAME: "broken_attr", 246 | CONF_SELECT: Template(".nonexistent", hass), 247 | CONF_EXTRACT: "text", 248 | CONF_ON_ERROR: { 249 | CONF_ON_ERROR_VALUE: CONF_ON_ERROR_VALUE_DEFAULT, 250 | CONF_ON_ERROR_DEFAULT: Template("fallback_value", hass), 251 | CONF_ON_ERROR_LOG: "warning", 252 | }, 253 | } 254 | 255 | sensor_selector = Selector(hass, config) 256 | attribute_selectors = {"broken_attr": Selector(hass, attr_config)} 257 | 258 | sensor = MultiscrapeSensor( 259 | hass=hass, 260 | coordinator=coordinator, 261 | scraper=scraper, 262 | unique_id="test", 263 | name="test", 264 | unit_of_measurement=None, 265 | device_class=None, 266 | state_class=None, 267 | force_update=False, 268 | icon_template=None, 269 | picture=None, 270 | sensor_selector=sensor_selector, 271 | attribute_selectors=attribute_selectors, 272 | ) 273 | 274 | await scraper.set_content(SAMPLE_HTML_FULL) 275 | 276 | # Act 277 | sensor._update_sensor() 278 | sensor._update_attributes() 279 | 280 | # Assert - attribute should have default value 281 | assert sensor._attr_extra_state_attributes.get("broken_attr") == "fallback_value" 282 | 283 | 284 | @pytest.mark.integration 285 | @pytest.mark.async_test 286 | @pytest.mark.timeout(10) 287 | async def test_entity_attribute_error_logging_disabled(hass: HomeAssistant, coordinator, scraper, caplog): 288 | """Test entity attribute error logging can be disabled.""" 289 | from custom_components.multiscrape.selector import Selector 290 | 291 | # Arrange 292 | config = { 293 | CONF_NAME: "test_sensor", 294 | CONF_SELECT: Template(".current-version h1", hass), 295 | CONF_EXTRACT: "text", 296 | } 297 | 298 | # Set log to false to disable error logging 299 | attr_config = { 300 | CONF_NAME: "broken_attr", 301 | CONF_SELECT: Template(".nonexistent", hass), 302 | CONF_EXTRACT: "text", 303 | CONF_ON_ERROR: { 304 | CONF_ON_ERROR_VALUE: CONF_ON_ERROR_VALUE_NONE, 305 | CONF_ON_ERROR_LOG: False, # Disable logging 306 | }, 307 | } 308 | 309 | sensor_selector = Selector(hass, config) 310 | attribute_selectors = {"broken_attr": Selector(hass, attr_config)} 311 | 312 | sensor = MultiscrapeSensor( 313 | hass=hass, 314 | coordinator=coordinator, 315 | scraper=scraper, 316 | unique_id="test", 317 | name="test", 318 | unit_of_measurement=None, 319 | device_class=None, 320 | state_class=None, 321 | force_update=False, 322 | icon_template=None, 323 | picture=None, 324 | sensor_selector=sensor_selector, 325 | attribute_selectors=attribute_selectors, 326 | ) 327 | 328 | await scraper.set_content(SAMPLE_HTML_FULL) 329 | caplog.clear() 330 | 331 | # Act 332 | sensor._update_sensor() 333 | sensor._update_attributes() 334 | 335 | # Assert - "Unable to extract" message should not be logged 336 | assert "Unable to extract data from HTML" not in caplog.text 337 | 338 | 339 | @pytest.mark.integration 340 | @pytest.mark.async_test 341 | @pytest.mark.timeout(10) 342 | async def test_entity_attribute_error_logging_custom_level(hass: HomeAssistant, coordinator, scraper, caplog): 343 | """Test entity attribute error logging with custom log level.""" 344 | from custom_components.multiscrape.selector import Selector 345 | 346 | # Arrange 347 | config = { 348 | CONF_NAME: "test_sensor", 349 | CONF_SELECT: Template(".current-version h1", hass), 350 | CONF_EXTRACT: "text", 351 | } 352 | 353 | attr_config = { 354 | CONF_NAME: "broken_attr", 355 | CONF_SELECT: Template(".nonexistent", hass), 356 | CONF_EXTRACT: "text", 357 | CONF_ON_ERROR: { 358 | CONF_ON_ERROR_VALUE: CONF_ON_ERROR_VALUE_NONE, 359 | CONF_ON_ERROR_LOG: "error", # Custom log level 360 | }, 361 | } 362 | 363 | sensor_selector = Selector(hass, config) 364 | attribute_selectors = {"broken_attr": Selector(hass, attr_config)} 365 | 366 | sensor = MultiscrapeSensor( 367 | hass=hass, 368 | coordinator=coordinator, 369 | scraper=scraper, 370 | unique_id="test", 371 | name="test", 372 | unit_of_measurement=None, 373 | device_class=None, 374 | state_class=None, 375 | force_update=False, 376 | icon_template=None, 377 | picture=None, 378 | sensor_selector=sensor_selector, 379 | attribute_selectors=attribute_selectors, 380 | ) 381 | 382 | await scraper.set_content(SAMPLE_HTML_FULL) 383 | 384 | # Act 385 | sensor._update_sensor() 386 | sensor._update_attributes() 387 | 388 | # Assert - error should be logged with ERROR level 389 | assert "Unable to extract data from HTML" in caplog.text 390 | -------------------------------------------------------------------------------- /tests/test_init.py: -------------------------------------------------------------------------------- 1 | """Integration tests for multiscrape __init__.py setup.""" 2 | 3 | from datetime import timedelta 4 | 5 | import pytest 6 | from homeassistant.const import (CONF_NAME, CONF_RESOURCE, CONF_SCAN_INTERVAL, 7 | SERVICE_RELOAD, Platform) 8 | from homeassistant.core import HomeAssistant 9 | from homeassistant.helpers.template import Template 10 | 11 | from custom_components.multiscrape import (_async_process_config, 12 | _async_setup_shared_data, 13 | async_get_config_and_coordinator, 14 | async_setup) 15 | from custom_components.multiscrape.const import (COORDINATOR, DOMAIN, 16 | PLATFORM_IDX, SCRAPER, 17 | SCRAPER_DATA, SCRAPER_IDX) 18 | 19 | 20 | @pytest.fixture 21 | def minimal_config(): 22 | """Create minimal valid configuration.""" 23 | return { 24 | DOMAIN: [ 25 | { 26 | CONF_NAME: "test_scraper", 27 | CONF_RESOURCE: "https://example.com", 28 | CONF_SCAN_INTERVAL: timedelta(seconds=60), 29 | Platform.SENSOR: [ 30 | { 31 | CONF_NAME: "test_sensor", 32 | "select": ".value", 33 | } 34 | ], 35 | } 36 | ] 37 | } 38 | 39 | 40 | @pytest.fixture 41 | def empty_config(): 42 | """Create configuration with no resource (service-only mode).""" 43 | return {DOMAIN: [{}]} 44 | 45 | 46 | @pytest.mark.integration 47 | @pytest.mark.async_test 48 | @pytest.mark.timeout(10) 49 | async def test_async_setup_shared_data(hass: HomeAssistant): 50 | """Test _async_setup_shared_data creates required data structures.""" 51 | # Act 52 | _async_setup_shared_data(hass) 53 | 54 | # Assert 55 | assert DOMAIN in hass.data 56 | assert SCRAPER_DATA in hass.data[DOMAIN] 57 | assert Platform.SENSOR in hass.data[DOMAIN] 58 | assert Platform.BINARY_SENSOR in hass.data[DOMAIN] 59 | assert Platform.BUTTON in hass.data[DOMAIN] 60 | assert isinstance(hass.data[DOMAIN][SCRAPER_DATA], list) 61 | assert isinstance(hass.data[DOMAIN][Platform.SENSOR], list) 62 | 63 | 64 | @pytest.mark.integration 65 | @pytest.mark.async_test 66 | @pytest.mark.timeout(10) 67 | async def test_async_setup_registers_reload_service(hass: HomeAssistant, empty_config): 68 | """Test async_setup registers the reload service.""" 69 | # Act 70 | await async_setup(hass, empty_config) 71 | 72 | # Assert 73 | assert hass.services.has_service(DOMAIN, SERVICE_RELOAD) 74 | 75 | 76 | @pytest.mark.integration 77 | @pytest.mark.async_test 78 | @pytest.mark.timeout(10) 79 | async def test_async_setup_registers_integration_services( 80 | hass: HomeAssistant, empty_config 81 | ): 82 | """Test async_setup registers integration-level services.""" 83 | # Act 84 | await async_setup(hass, empty_config) 85 | 86 | # Assert - get_content and scrape services should be registered 87 | assert hass.services.has_service(DOMAIN, "get_content") 88 | assert hass.services.has_service(DOMAIN, "scrape") 89 | 90 | 91 | @pytest.mark.integration 92 | @pytest.mark.async_test 93 | @pytest.mark.timeout(10) 94 | async def test_async_setup_service_only_mode(hass: HomeAssistant, empty_config): 95 | """Test async_setup works in service-only mode (no resource configured).""" 96 | # Act 97 | result = await async_setup(hass, empty_config) 98 | 99 | # Assert 100 | assert result is True 101 | # Should have set up services but not processed any scrapers 102 | assert hass.services.has_service(DOMAIN, "get_content") 103 | assert hass.services.has_service(DOMAIN, "scrape") 104 | 105 | 106 | @pytest.mark.integration 107 | @pytest.mark.async_test 108 | @pytest.mark.timeout(10) 109 | @pytest.mark.respx 110 | async def test_async_process_config_creates_scraper_data( 111 | hass: HomeAssistant, minimal_config 112 | ): 113 | """Test _async_process_config creates scraper data structures.""" 114 | # Arrange 115 | _async_setup_shared_data(hass) 116 | 117 | # Act 118 | result = await _async_process_config(hass, minimal_config) 119 | 120 | # Assert 121 | assert result is True 122 | assert len(hass.data[DOMAIN][SCRAPER_DATA]) == 1 123 | scraper_data = hass.data[DOMAIN][SCRAPER_DATA][0] 124 | assert SCRAPER in scraper_data 125 | assert COORDINATOR in scraper_data 126 | 127 | 128 | @pytest.mark.integration 129 | @pytest.mark.async_test 130 | @pytest.mark.timeout(10) 131 | @pytest.mark.respx 132 | async def test_async_process_config_generates_name_for_unnamed_scraper( 133 | hass: HomeAssistant 134 | ): 135 | """Test _async_process_config generates name when CONF_NAME is missing.""" 136 | # Arrange 137 | _async_setup_shared_data(hass) 138 | config_without_name = { 139 | DOMAIN: [ 140 | { 141 | CONF_RESOURCE: "https://example.com", 142 | Platform.SENSOR: [ 143 | { 144 | CONF_NAME: "test_sensor", 145 | "select": ".value", 146 | } 147 | ], 148 | } 149 | ] 150 | } 151 | 152 | # Act 153 | result = await _async_process_config(hass, config_without_name) 154 | 155 | # Assert 156 | assert result is True 157 | # Should have generated a name like "Scraper_noname_0" 158 | assert len(hass.data[DOMAIN][SCRAPER_DATA]) == 1 159 | 160 | 161 | @pytest.mark.integration 162 | @pytest.mark.async_test 163 | @pytest.mark.timeout(10) 164 | @pytest.mark.respx 165 | async def test_async_process_config_creates_config_services( 166 | hass: HomeAssistant, minimal_config 167 | ): 168 | """Test _async_process_config creates per-config services.""" 169 | # Arrange 170 | _async_setup_shared_data(hass) 171 | 172 | # Act 173 | await _async_process_config(hass, minimal_config) 174 | 175 | # Assert - trigger service should be registered 176 | assert hass.services.has_service(DOMAIN, "trigger_test_scraper") 177 | 178 | 179 | @pytest.mark.integration 180 | @pytest.mark.async_test 181 | @pytest.mark.timeout(10) 182 | @pytest.mark.respx 183 | async def test_async_process_config_stores_platform_configs( 184 | hass: HomeAssistant, minimal_config 185 | ): 186 | """Test _async_process_config stores platform configurations.""" 187 | # Arrange 188 | _async_setup_shared_data(hass) 189 | 190 | # Act 191 | await _async_process_config(hass, minimal_config) 192 | 193 | # Assert 194 | assert len(hass.data[DOMAIN][Platform.SENSOR]) == 1 195 | sensor_config = hass.data[DOMAIN][Platform.SENSOR][0] 196 | assert sensor_config[CONF_NAME] == "test_sensor" 197 | 198 | 199 | @pytest.mark.integration 200 | @pytest.mark.async_test 201 | @pytest.mark.timeout(10) 202 | @pytest.mark.respx 203 | async def test_async_process_config_with_multiple_platforms(hass: HomeAssistant): 204 | """Test _async_process_config handles multiple platforms.""" 205 | # Arrange 206 | _async_setup_shared_data(hass) 207 | config = { 208 | DOMAIN: [ 209 | { 210 | CONF_NAME: "multi_platform_scraper", 211 | CONF_RESOURCE: "https://example.com", 212 | Platform.SENSOR: [ 213 | { 214 | CONF_NAME: "sensor1", 215 | "select": ".value1", 216 | } 217 | ], 218 | Platform.BINARY_SENSOR: [ 219 | { 220 | CONF_NAME: "binary1", 221 | "select": ".status", 222 | } 223 | ], 224 | Platform.BUTTON: [ 225 | { 226 | CONF_NAME: "button1", 227 | } 228 | ], 229 | } 230 | ] 231 | } 232 | 233 | # Act 234 | await _async_process_config(hass, config) 235 | 236 | # Assert 237 | assert len(hass.data[DOMAIN][Platform.SENSOR]) == 1 238 | assert len(hass.data[DOMAIN][Platform.BINARY_SENSOR]) == 1 239 | assert len(hass.data[DOMAIN][Platform.BUTTON]) == 1 240 | 241 | 242 | @pytest.mark.integration 243 | @pytest.mark.async_test 244 | @pytest.mark.timeout(10) 245 | @pytest.mark.respx 246 | async def test_async_process_config_with_multiple_sensors_per_scraper( 247 | hass: HomeAssistant, 248 | ): 249 | """Test _async_process_config handles multiple sensors in one scraper.""" 250 | # Arrange 251 | _async_setup_shared_data(hass) 252 | config = { 253 | DOMAIN: [ 254 | { 255 | CONF_NAME: "multi_sensor_scraper", 256 | CONF_RESOURCE: "https://example.com", 257 | Platform.SENSOR: [ 258 | {CONF_NAME: "sensor1", "select": ".value1"}, 259 | {CONF_NAME: "sensor2", "select": ".value2"}, 260 | {CONF_NAME: "sensor3", "select": ".value3"}, 261 | ], 262 | } 263 | ] 264 | } 265 | 266 | # Act 267 | await _async_process_config(hass, config) 268 | 269 | # Assert 270 | assert len(hass.data[DOMAIN][Platform.SENSOR]) == 3 271 | assert len(hass.data[DOMAIN][SCRAPER_DATA]) == 1 # Only one scraper 272 | 273 | 274 | @pytest.mark.integration 275 | @pytest.mark.async_test 276 | @pytest.mark.timeout(10) 277 | @pytest.mark.respx 278 | async def test_async_process_config_with_multiple_scrapers(hass: HomeAssistant): 279 | """Test _async_process_config handles multiple scraper configurations.""" 280 | # Arrange 281 | _async_setup_shared_data(hass) 282 | config = { 283 | DOMAIN: [ 284 | { 285 | CONF_NAME: "scraper1", 286 | CONF_RESOURCE: "https://example.com/1", 287 | Platform.SENSOR: [{CONF_NAME: "sensor1", "select": ".value"}], 288 | }, 289 | { 290 | CONF_NAME: "scraper2", 291 | CONF_RESOURCE: "https://example.com/2", 292 | Platform.SENSOR: [{CONF_NAME: "sensor2", "select": ".value"}], 293 | }, 294 | ] 295 | } 296 | 297 | # Act 298 | await _async_process_config(hass, config) 299 | 300 | # Assert 301 | assert len(hass.data[DOMAIN][SCRAPER_DATA]) == 2 302 | assert len(hass.data[DOMAIN][Platform.SENSOR]) == 2 303 | # Each scraper should have its own trigger service 304 | assert hass.services.has_service(DOMAIN, "trigger_scraper1") 305 | assert hass.services.has_service(DOMAIN, "trigger_scraper2") 306 | 307 | 308 | @pytest.mark.integration 309 | @pytest.mark.async_test 310 | @pytest.mark.timeout(10) 311 | @pytest.mark.respx 312 | async def test_async_get_config_and_coordinator(hass: HomeAssistant, minimal_config): 313 | """Test async_get_config_and_coordinator retrieves correct data.""" 314 | # Arrange 315 | _async_setup_shared_data(hass) 316 | await _async_process_config(hass, minimal_config) 317 | 318 | discovery_info = {SCRAPER_IDX: 0, PLATFORM_IDX: 0} 319 | 320 | # Act 321 | conf, coordinator, scraper = await async_get_config_and_coordinator( 322 | hass, Platform.SENSOR, discovery_info 323 | ) 324 | 325 | # Assert 326 | assert conf[CONF_NAME] == "test_sensor" 327 | assert coordinator is not None 328 | assert scraper is not None 329 | 330 | 331 | @pytest.mark.integration 332 | @pytest.mark.async_test 333 | @pytest.mark.timeout(10) 334 | @pytest.mark.respx 335 | async def test_async_process_config_with_form_submit(hass: HomeAssistant): 336 | """Test _async_process_config handles form submission configuration.""" 337 | # Arrange 338 | _async_setup_shared_data(hass) 339 | config = { 340 | DOMAIN: [ 341 | { 342 | CONF_NAME: "form_scraper", 343 | CONF_RESOURCE: "https://example.com/data", 344 | "form_submit": { 345 | CONF_RESOURCE: "https://example.com/login", 346 | "select": "form", 347 | "input": {"username": "user", "password": "pass"}, 348 | "variables": [], # Form variables (can be empty list) 349 | }, 350 | Platform.SENSOR: [ 351 | { 352 | CONF_NAME: "sensor1", 353 | "select": ".value", 354 | } 355 | ], 356 | } 357 | ] 358 | } 359 | 360 | # Act 361 | result = await _async_process_config(hass, config) 362 | 363 | # Assert 364 | assert result is True 365 | assert len(hass.data[DOMAIN][SCRAPER_DATA]) == 1 366 | 367 | 368 | @pytest.mark.integration 369 | @pytest.mark.async_test 370 | @pytest.mark.timeout(10) 371 | @pytest.mark.respx 372 | async def test_async_process_config_with_resource_template(hass: HomeAssistant): 373 | """Test _async_process_config handles resource_template.""" 374 | # Arrange 375 | _async_setup_shared_data(hass) 376 | config = { 377 | DOMAIN: [ 378 | { 379 | CONF_NAME: "template_scraper", 380 | "resource_template": Template( 381 | "https://example.com/{{ states('sensor.id') }}", hass 382 | ), 383 | Platform.SENSOR: [ 384 | { 385 | CONF_NAME: "sensor1", 386 | "select": ".value", 387 | } 388 | ], 389 | } 390 | ] 391 | } 392 | 393 | # Act 394 | result = await _async_process_config(hass, config) 395 | 396 | # Assert 397 | assert result is True 398 | assert len(hass.data[DOMAIN][SCRAPER_DATA]) == 1 399 | 400 | 401 | @pytest.mark.integration 402 | @pytest.mark.async_test 403 | @pytest.mark.timeout(10) 404 | @pytest.mark.respx 405 | async def test_async_process_config_skips_platforms_not_in_config(hass: HomeAssistant): 406 | """Test _async_process_config only processes platforms that are configured.""" 407 | # Arrange 408 | _async_setup_shared_data(hass) 409 | config = { 410 | DOMAIN: [ 411 | { 412 | CONF_NAME: "sensor_only_scraper", 413 | CONF_RESOURCE: "https://example.com", 414 | Platform.SENSOR: [ 415 | { 416 | CONF_NAME: "sensor1", 417 | "select": ".value", 418 | } 419 | ], 420 | # No binary_sensor or button configured 421 | } 422 | ] 423 | } 424 | 425 | # Act 426 | await _async_process_config(hass, config) 427 | 428 | # Assert 429 | assert len(hass.data[DOMAIN][Platform.SENSOR]) == 1 430 | assert len(hass.data[DOMAIN][Platform.BINARY_SENSOR]) == 0 431 | assert len(hass.data[DOMAIN][Platform.BUTTON]) == 0 432 | --------------------------------------------------------------------------------