├── .env.example ├── .flake8 ├── .gitattributes ├── .github ├── actions │ ├── linux │ │ └── action.yaml │ └── windows │ │ └── action.yaml ├── dependabot.yml └── workflows │ ├── ci_linux.yml │ ├── ci_windows.yml │ └── static.yml ├── .gitignore ├── .pydocstyle.ini ├── .pypi └── README.md ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── PROMO.md ├── README.md ├── bas_release ├── .gitkeep └── PyBasFree.zip ├── cmd_initial.py ├── cmd_worker.py ├── codecov.yml ├── docs └── images │ ├── bas_gui_window_1.png │ ├── bas_gui_window_1_proxy.png │ ├── bas_gui_window_2.png │ └── bas_gui_window_3.png ├── logs └── .gitkeep ├── poetry.lock ├── pybas_automation ├── __init__.py ├── bas_actions │ ├── __init__.py │ ├── browser │ │ ├── __init__.py │ │ ├── browser_settings │ │ │ ├── __init__.py │ │ │ └── models.py │ │ └── proxy │ │ │ ├── __init__.py │ │ │ └── models.py │ └── fingerprint_switcher │ │ ├── __init__.py │ │ └── apply_fingerprint │ │ ├── __init__.py │ │ └── models.py ├── browser_automator │ ├── __init__.py │ ├── browser_automator.py │ ├── cdp_client.py │ └── models.py ├── browser_profile │ ├── __init__.py │ ├── models.py │ ├── proxy.py │ ├── settings.py │ └── storage.py ├── fingerprint │ ├── __init__.py │ ├── fingerprint.py │ └── models.py ├── proxy_providers │ ├── __init__.py │ └── brightdata │ │ ├── __init__.py │ │ └── models.py ├── settings.py ├── task │ ├── __init__.py │ ├── models.py │ ├── settings.py │ └── storage.py └── utils │ ├── __init__.py │ ├── filesystem.py │ ├── logger.py │ └── utils.py ├── pyproject.toml ├── reports └── .gitkeep ├── scripts └── update_readme_links.py └── tests ├── __init__.py ├── conftest.py ├── contrib ├── __init__.py └── socks5_server │ ├── __init__.py │ └── server.py ├── e2e ├── __init__.py ├── basic │ └── test_basic.py └── conftest.py ├── fixtures ├── Actual.PyBasFreeTemplate.xml.default.xml └── fingerprint_raw.zip └── functional ├── __init__.py ├── browser_automator └── test_browser_automator.py ├── browser_profile ├── __init__.py ├── cassettes │ ├── TestBrowserProfile.test_save_all.yaml │ ├── TestBrowserProfile.test_save_fingerprint.yaml │ ├── TestBrowserProfileStorage.test_create_no_fingerprint.yaml │ ├── TestBrowserProfileStorage.test_create_with_profile_name.yaml │ └── TestBrowserProfileStorage.test_serialize_deserialize.yaml ├── test_browser_profile.py ├── test_browser_profile_proxy.py └── test_browser_profile_storage.py ├── cmd ├── __init__.py ├── cassettes │ ├── TestCmdInitial.test_main.yaml │ ├── TestCmdInitial.test_main_proxy.yaml │ ├── TestCmdWorker.test_main.yaml │ └── TestCmdWorker.test_main_proxy.yaml ├── test_cmd_initial.py └── test_cmd_worker.py ├── conftest.py ├── fingerprint ├── __init__.py ├── cassettes │ └── TestFingerprint.test_get_fingerprint.yaml └── test_fingerprint.py ├── proxy_providers ├── __init__.py └── brightdata │ ├── __init__.py │ └── test_brightdata.py └── task ├── __init__.py ├── conftest.py └── test_storage.py /.env.example: -------------------------------------------------------------------------------- 1 | # Used in tests 2 | DEBUG_TESTS=true 3 | DEBUG_CDP=true 4 | BAS_APP_NAME=PyBasFreeDev 5 | 6 | FINGERPRINT_KEY= 7 | BRIGHTDATA_USERNAME= 8 | BRIGHTDATA_PASSWORD= 9 | 10 | # for uploading release to pypi 11 | PYPI_PASSWORD= 12 | 13 | ##### Used for e2e tests 14 | TEST_TASK_ID= 15 | TEST_REMOTE_DEBUGGING_PORT= 16 | TEST_UNIQUE_PROCESS_ID= -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.zip filter=lfs diff=lfs merge=lfs -text 2 | tests/cassettes/*.yaml filter=lfs diff=lfs merge=lfs -text 3 | tests/**/cassettes/*.yaml filter=lfs diff=lfs merge=lfs -text 4 | *.jpg filter=lfs diff=lfs merge=lfs -text 5 | *.png filter=lfs diff=lfs merge=lfs -text 6 | -------------------------------------------------------------------------------- /.github/actions/linux/action.yaml: -------------------------------------------------------------------------------- 1 | name: 'Setup Python and Poetry.' 2 | description: 'Set up Python, install Poetry, cache dependencies, and install them with Poetry.' 3 | 4 | inputs: 5 | python-version: 6 | description: 'Version of Python to use.' 7 | required: true 8 | default: '3.11' 9 | 10 | runs: 11 | using: 'composite' 12 | steps: 13 | - name: Checkout LFS objects 14 | shell: bash 15 | run: git lfs checkout 16 | 17 | - name: Set up Python ${{ inputs.python-version }} 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: ${{ inputs.python-version }} 21 | 22 | - name: Install Poetry 23 | uses: snok/install-poetry@v1 24 | with: 25 | virtualenvs-create: true 26 | virtualenvs-in-project: true 27 | installer-parallel: true 28 | 29 | - name: Cache python dependencies 30 | uses: actions/cache@v3 31 | id: poetry-cache 32 | with: 33 | path: .venv 34 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 35 | 36 | - name: Get installed Playwright version 37 | id: playwright-version 38 | shell: bash 39 | run: | 40 | echo "::set-output name=version::$(poetry show playwright | grep "version" | awk '{print $3}')" 41 | 42 | - name: Cache playwright dependencies 43 | uses: actions/cache@v3 44 | id: playwright-cache 45 | with: 46 | path: ~/.cache/ms-playwright/ 47 | key: playwright-${{ runner.os }}-${{ steps.playwright-version.outputs.version }} 48 | 49 | - name: Ensure python, poetry installed 50 | shell: bash 51 | run: | 52 | python --version 53 | poetry --version 54 | 55 | - name: Install dependencies with Poetry 56 | shell: bash 57 | run: | 58 | mkdir ./dist && touch ./dist/README.md 59 | poetry install --no-interaction --without dev-e2e-windows 60 | 61 | - name: Install playwright dependencies with Poetry 62 | shell: bash 63 | run: poetry run playwright install chromium --with-deps 64 | 65 | - name: Ensure playwright installed 66 | shell: bash 67 | run: | 68 | poetry run playwright -V -------------------------------------------------------------------------------- /.github/actions/windows/action.yaml: -------------------------------------------------------------------------------- 1 | name: 'Setup Python and Poetry.' 2 | description: 'Set up Python, install Poetry, cache dependencies, and install them with Poetry.' 3 | 4 | runs: 5 | using: 'composite' 6 | steps: 7 | - name: Checkout LFS objects 8 | shell: bash 9 | run: git lfs checkout 10 | 11 | - name: Ensure python, poetry installed 12 | shell: bash 13 | run: | 14 | python --version 15 | poetry --version 16 | 17 | - name: Install dependencies with Poetry 18 | shell: bash 19 | run: | 20 | mkdir ./dist && touch ./dist/README.md 21 | poetry install --no-interaction 22 | 23 | - name: Install playwright dependencies with Poetry 24 | shell: bash 25 | run: | 26 | poetry run playwright install chromium --with-deps 27 | 28 | - name: Ensure playwright installed 29 | shell: bash 30 | run: | 31 | poetry run playwright -V -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | 7 | version: 2 8 | updates: 9 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem 10 | - package-ecosystem: pip # poetry package manager wil be used 11 | directory: "/" 12 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#scheduleinterval 13 | schedule: 14 | interval: daily 15 | open-pull-requests-limit: 10 16 | versioning-strategy: increase -------------------------------------------------------------------------------- /.github/workflows/ci_linux.yml: -------------------------------------------------------------------------------- 1 | name: Linux Python CI 2 | 3 | on: 4 | push: 5 | branches: [ "develop", "master" ] 6 | pull_request: 7 | branches: [ "develop", "master" ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 11 | cancel-in-progress: true 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | linux_lint: 18 | runs-on: ubuntu-22.04 # https://github.com/actions/runner-images#available-images 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v3 22 | with: 23 | lfs: true 24 | 25 | - name: Setup Python and Poetry 26 | uses: ./.github/actions/linux 27 | with: 28 | python-version: '3.11' 29 | 30 | - name: Lint with mypy and flake8 31 | env: 32 | FINGERPRINT_KEY: ${{ secrets.FINGERPRINT_KEY }} 33 | BRIGHTDATA_USERNAME: ${{ secrets.BRIGHTDATA_USERNAME }} 34 | BRIGHTDATA_PASSWORD: ${{ secrets.BRIGHTDATA_PASSWORD }} 35 | run: | 36 | poetry run mypy pybas_automation/ tests/ 37 | poetry run flake8 pybas_automation/ tests/ 38 | 39 | linux_test: 40 | needs: [ linux_lint ] 41 | runs-on: ubuntu-22.04 # https://github.com/actions/runner-images#available-images 42 | steps: 43 | - name: Checkout code 44 | uses: actions/checkout@v3 45 | with: 46 | lfs: true 47 | 48 | - name: Setup Python and Poetry 49 | uses: ./.github/actions/linux 50 | with: 51 | python-version: '3.11' 52 | 53 | - name: Run tests with pytest 54 | env: 55 | FINGERPRINT_KEY: ${{ secrets.FINGERPRINT_KEY }} 56 | BRIGHTDATA_USERNAME: ${{ secrets.BRIGHTDATA_USERNAME }} 57 | BRIGHTDATA_PASSWORD: ${{ secrets.BRIGHTDATA_PASSWORD }} 58 | run: | 59 | mkdir ./dist && touch ./dist/README.md 60 | mkdir -p ./coverage/lcov/functional 61 | poetry run pytest -s -vv --cov=pybas_automation --cov-report=lcov:coverage/lcov/functional/coverage.lcov tests/functional 62 | 63 | - name: Upload coverage reports to Codecov 64 | uses: codecov/codecov-action@v3 65 | # https://github.com/codecov/codecov-action#arguments 66 | with: 67 | token: ${{ secrets.CODECOV_TOKEN }} 68 | flags: tests_functional 69 | files: coverage/lcov/functional/coverage.lcov 70 | fail_ci_if_error: false 71 | 72 | linux_poetry_build: 73 | needs: [ linux_lint, linux_test ] 74 | runs-on: ubuntu-22.04 # https://github.com/actions/runner-images#available-images 75 | steps: 76 | - name: Checkout code 77 | uses: actions/checkout@v3 78 | with: 79 | lfs: true 80 | 81 | - name: Setup Python and Poetry 82 | uses: ./.github/actions/linux 83 | with: 84 | python-version: '3.11' 85 | 86 | - name: Build python package with poetry 87 | run: | 88 | poetry check 89 | poetry run python scripts/update_readme_links.py 90 | poetry build 91 | -------------------------------------------------------------------------------- /.github/workflows/ci_windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows Python CI 2 | 3 | on: 4 | push: 5 | branches: [ "develop", "master" ] 6 | pull_request: 7 | branches: [ "develop", "master" ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 11 | cancel-in-progress: true 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | 18 | windows_test: 19 | runs-on: self-hosted 20 | #runs-on: windows-2022 # https://github.com/actions/runner-images#available-images 21 | steps: 22 | - name: Checkout code 23 | uses: actions/checkout@v3 24 | with: 25 | lfs: true 26 | 27 | - name: Prepare 28 | uses: ./.github/actions/windows 29 | 30 | - name: Run tests with pytest 31 | shell: bash 32 | env: 33 | FINGERPRINT_KEY: ${{ secrets.FINGERPRINT_KEY }} 34 | BRIGHTDATA_USERNAME: ${{ secrets.BRIGHTDATA_USERNAME }} 35 | BRIGHTDATA_PASSWORD: ${{ secrets.BRIGHTDATA_PASSWORD }} 36 | run: | 37 | mkdir -p ./coverage/lcov/ 38 | poetry run pytest -s -vv --cov=pybas_automation --cov-report=lcov:coverage/lcov/coverage.lcov tests/ 39 | 40 | - name: Upload coverage reports to Codecov 41 | uses: codecov/codecov-action@v3 42 | # https://github.com/codecov/codecov-action#arguments 43 | with: 44 | token: ${{ secrets.CODECOV_TOKEN }} 45 | flags: tests_functional_e2e 46 | files: coverage/lcov/coverage.lcov 47 | fail_ci_if_error: false 48 | 49 | 50 | windows_poetry_build: 51 | needs: [ windows_test ] 52 | runs-on: self-hosted 53 | steps: 54 | - name: Checkout code 55 | uses: actions/checkout@v3 56 | with: 57 | lfs: true 58 | 59 | - name: Setup Python and Poetry 60 | uses: ./.github/actions/windows 61 | 62 | - name: Build python package with poetry 63 | run: | 64 | poetry check 65 | poetry run python scripts/update_readme_links.py 66 | poetry build 67 | -------------------------------------------------------------------------------- /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: [ "master" ] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Single deploy job since we're just deploying 26 | deploy: 27 | environment: 28 | name: github-pages 29 | url: ${{ steps.deployment.outputs.page_url }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v3 34 | with: 35 | lfs: true 36 | - name: Setup Pages 37 | uses: actions/configure-pages@v3 38 | - name: Upload artifact 39 | uses: actions/upload-pages-artifact@v2 40 | with: 41 | # Upload only the docs directory 42 | path: './docs/' 43 | - name: Deploy to GitHub Pages 44 | id: deployment 45 | uses: actions/deploy-pages@v2 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .env 3 | !reports/.gitkeep 4 | reports/* 5 | !logs/.gitkeep 6 | logs/* 7 | .coverage 8 | .coverage.* 9 | dist/* 10 | !htmlcov/.gitkeep 11 | htmlcov/* 12 | !bas_release/.gitkeep 13 | !bas_release/PyBasFree.zip 14 | bas_release/* 15 | /pybas_automation.svg 16 | /.mypy_cache/ 17 | /.pytest_cache/ 18 | /coverage/ 19 | -------------------------------------------------------------------------------- /.pydocstyle.ini: -------------------------------------------------------------------------------- 1 | [pydocstyle] 2 | inherit = false 3 | ignore = D202,D212,D203,D403 4 | match = .*\.py -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.1.16 (2023-10-21) 2 | 3 | ### Feat 4 | 5 | - improve export/import browser data (#70) 6 | 7 | ### Fix 8 | 9 | - remove export/import browser data 10 | 11 | ## 0.1.15 (2023-10-20) 12 | 13 | ### Feat 14 | 15 | - initial implementation of exporting browser data (#67) 16 | 17 | ## 0.1.14 (2023-10-19) 18 | 19 | ### Fix 20 | 21 | - prepare to publishing to pypi 22 | 23 | ## 0.1.13 (2023-10-19) 24 | 25 | ### Fix 26 | 27 | - ci/cd (#60) 28 | 29 | ## 0.1.12 (2023-10-17) 30 | 31 | ### Feat 32 | 33 | - add unique_process_id to BasTask (#53) 34 | - add remote_debugging_port to BasTask (#52) 35 | 36 | ## 0.1.11 (2023-10-16) 37 | 38 | ### Feat 39 | 40 | - update PROMO.md (#40) 41 | 42 | ## 0.1.10 (2023-10-15) 43 | 44 | ### Feat 45 | 46 | - improve support of BAS_SAFE (#35) 47 | - add initial support of BAS_SAFE (#34) 48 | 49 | ## 0.1.9 (2023-10-14) 50 | 51 | ### Feat 52 | 53 | - add unique_process_id (#31) 54 | 55 | ## 0.1.8 (2023-10-13) 56 | 57 | ### Feat 58 | 59 | - improve proxy support (#26) 60 | - add proxy support (#24) 61 | 62 | ## 0.1.7 (2023-10-13) 63 | 64 | ### Feat 65 | 66 | - add browser-automator (#21) 67 | 68 | ### Fix 69 | 70 | - **makefile**: bump_version 71 | 72 | ## 0.1.6 (2023-10-11) 73 | 74 | ### Feat 75 | 76 | - add commitizen 77 | - **cmd**: enable cdp logging (#18) 78 | - **github-actions**: improve ci_windows.yml 79 | - **release**: 0.1.5 80 | - **github-actions**: add ci_windows.yml (#14) 81 | - **release**: 0.1.4 82 | - **release**: 0.1.3 83 | - **release**: 0.1.1 84 | - **release**: 0.1.0 85 | - initial public release (#1) 86 | - initial public release 87 | 88 | ### Fix 89 | 90 | - cyclic imports 91 | - deserialize task in browser storage (#11) 92 | - **github-actions**: remove pr_agent.yml 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 sergerdn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all tests clean 2 | .DEFAULT_GOAL := tests 3 | 4 | include .env 5 | export 6 | 7 | GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD) 8 | GIT_COMMIT := $(shell git rev-list -1 HEAD) 9 | GIT_VERSION := $(shell git describe --tags --always) 10 | 11 | clean: 12 | @rm -rf .mypy_cache || echo "" 13 | @rm -rf .pytest_cache || echo "" 14 | @rm ./logs/* || echo "" 15 | @touch ./logs/.gitkeep && git add ./logs/.gitkeep 16 | @rm ./reports/* || echo "" 17 | @touch ./reports/.gitkeep && git add ./reports/.gitkeep 18 | @rm -rf ./coverage || echo "" 19 | @rm -rf ./.coverage.* || echo "" 20 | @rm .coverage || echo "" 21 | $(MAKE) clean_pycache 22 | rm -rf ./dist || echo "" 23 | @echo "Cleaned up the project files." 24 | 25 | clean_pycache: 26 | @if directories=$$(find . -type d -name __pycache__); then \ 27 | find . -type d -name __pycache__ -exec rm -rf {} +; \ 28 | else \ 29 | echo "No __pycache__ directories found."; \ 30 | fi 31 | 32 | poetry_install: 33 | poetry install --compile 34 | 35 | poetry_install_dev: 36 | poetry install --compile --with dev 37 | 38 | poetry_install_cmd: 39 | poetry install --compile --with cmd 40 | 41 | lint_fix: 42 | poetry run black cmd_initial.py cmd_worker.py pybas_automation/ tests/ 43 | poetry run isort cmd_initial.py cmd_worker.py pybas_automation/ tests/ 44 | #poetry run autopep8 --in-place --aggressive --aggressive pybas_automation/utils/utils.py 45 | 46 | lint: 47 | mkdir ./dist || echo "" 48 | touch ./dist/README.md 49 | poetry check 50 | poetry run mypy cmd_initial.py cmd_worker.py pybas_automation/ tests/ || echo "" 51 | poetry run flake8 cmd_initial.py cmd_worker.py pybas_automation/ tests/ || echo "" 52 | pylint --load-plugins pylint_pydantic cmd_initial.py cmd_worker.py ./pybas_automation/ || echo "" 53 | 54 | lint_docs: 55 | poetry run pydocstyle pybas_automation 56 | 57 | tests: 58 | poetry run pytest -s -vv tests/ 59 | $(MAKE) clean_pycache 60 | 61 | tests_coverage: 62 | poetry run pytest -s -vv --cov=pybas_automation --cov-report=html:coverage/html/ tests/ 63 | start "" "./coverage/html/index.html" 64 | 65 | tests_coverage_e2e: 66 | poetry run pytest -s -vv --cov=pybas_automation --cov-report=html:coverage/html/e2e/ tests/e2e/ 67 | start "" "./coverage/html/e2e/index.html" 68 | 69 | tests_coverage_functional: 70 | poetry run pytest -s -vv --cov=pybas_automation --cov-report=html:coverage/html/functional/ tests/functional/ 71 | start "" "./coverage/html/functional/index.html" 72 | 73 | run_cmd_initial: 74 | @$(MAKE) clean 75 | poetry run python cmd_initial.py --bas_fingerprint_key="${FINGERPRINT_KEY}" --limit_tasks=1 76 | 77 | run_cmd_initial_proxy: 78 | @$(MAKE) clean 79 | poetry run python cmd_initial.py --bas_fingerprint_key="${FINGERPRINT_KEY}" --limit_tasks=1 \ 80 | --proxy_provider=brightdata --proxy_username="${BRIGHTDATA_USERNAME}" \ 81 | --proxy_password="${BRIGHTDATA_PASSWORD}" 82 | 83 | run_cmd_worker: 84 | poetry run python cmd_worker.py 85 | 86 | publish: 87 | echo "Current branch is '${GIT_BRANCH}'." 88 | ifeq ($(GIT_BRANCH),master) 89 | @echo "Current branch is 'master'. Proceeding with publishing." 90 | poetry run python scripts/update_readme_links.py 91 | poetry build 92 | poetry publish --username=__token__ --password=${PYPI_PASSWORD} 93 | start "" "https://pypi.org/project/pybas-automation/" 94 | else 95 | @echo "Publishing is only allowed from the 'master' branch." 96 | endif 97 | 98 | bump_version: 99 | @echo "Current branch is '${GIT_BRANCH}'." 100 | ifeq ($(GIT_BRANCH),master) 101 | poetry run python scripts/update_readme_links.py 102 | poetry check 103 | cz bump --check-consistency --changelog --increment=patch 104 | else 105 | @echo "Bump version is only allowed from the 'master' branch." 106 | endif 107 | 108 | run_pydeps: 109 | pydeps pybas_automation 110 | 111 | poetry_upgrade: 112 | poetry up --latest 113 | poetry update 114 | poetry show --outdated -------------------------------------------------------------------------------- /PROMO.md: -------------------------------------------------------------------------------- 1 | 🚀 **py-bas-automation: Supercharge BAS with Python!** 🚀 2 | 3 | Born from a desire to harness the power of *BrowserAutomationStudio's browser capabilities* while leveraging the 4 | familiarity of *Python*, **py-bas-automation** stands out. 5 | 6 | It's more than just a tool — it's the fusion of the best of both worlds, culminating in a robust solution for web 7 | automation aficionados. 8 | 9 | 💡 **Why I Created It**: 10 | 11 | - I needed only the browser component from *BAS*. 12 | - I am more adept in *Python* than other languages. 13 | - I believe the underlying idea can be transposed to any language thanks to the *Chrome DevTools Protocol*. 14 | 15 | 🚧 **Note**: While I'm passionate about this project, you should understand that this isn't a very serious project. 16 | The main purpose is to deliver a message and share a concept. 17 | 18 | 🔍 **Key Features**: 19 | 20 | - Seamless [BrowserAutomationStudio](https://bablosoft.com/shop/BrowserAutomationStudio) Integration. 21 | - Unique fingerprinting via [FingerprintSwitcher](https://fingerprints.bablosoft.com/) (Paid feature). 22 | - Efficient management with [Playwright](https://playwright.dev/python/). 23 | - Executing Browser Automation Studio Actions from Python: Implement BAS actions from Python using the un-documented 24 | API. This includes actions such as retrieving page source, **emulating mouse movements**, etc. 25 | (Note: Not all functions are currently supported). 26 | 27 | 📸 Screenshots: 28 | 29 | ![](https://sergerdn.github.io/py-bas-automation/images/bas_gui_window_3.png) 30 | 31 | **Pycharm IDE**: 32 | 33 | ::: 34 | 35 | ![](https://i.ibb.co/cNVVBSQ/Capture.png) 36 | ![](https://i.ibb.co/59Tw8jh/Capture.png) 37 | 38 | ::: 39 | 40 | 🛠 **Requirements**: 41 | 42 | - Windows 10/11, Windows Server 2022 (21H2 tested). 43 | - Python 3.11+ 44 | - Git, Poetry & more. 45 | - 📝 **Experience**: 46 | - Familiarity with *Python* programming. 47 | - Knowledge of *Git* version control. 48 | - Understanding of dependency management, preferably with *Poetry*. 49 | 50 | 🔧 **Get Started**: 51 | 52 | 1. Clone the [repo](https://github.com/sergerdn/py-bas-automation). 53 | 2. Install dependencies with Poetry. 54 | 3. Dive into the [initial](https://github.com/sergerdn/py-bas-automation/blob/develop/cmd_initial.py) 55 | and [worker](https://github.com/sergerdn/py-bas-automation/blob/develop/cmd_worker.py) scripts to grasp the flow. 56 | 57 | 🙌 **Contribute**: 58 | 59 | Got ideas or improvements? Open an [issue](https://github.com/sergerdn/py-bas-automation/issues/new) on GitHub. 60 | 61 | Step into the future of web automation with **py-bas-automation**😎! 62 | 63 | 🚫 **No Private Support**: 64 | 65 | I do not provide free support via private messaging on forums, Telegram, or other platforms. For questions, 66 | clarifications, or any issues you encounter, kindly post your message here or create a new GitHub issue. 67 | 68 | This helps maintain transparency and also benefits others who might have similar queries. 69 | 70 | Please use *English language* because it is an English topic. Thanks. 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # py-bas-automation 2 | 3 | [![Linux Python CI](https://github.com/sergerdn/py-bas-automation/actions/workflows/ci_linux.yml/badge.svg)](https://github.com/sergerdn/py-bas-automation/actions/workflows/ci_linux.yml) 4 | [![Windows Python CI](https://github.com/sergerdn/py-bas-automation/actions/workflows/ci_windows.yml/badge.svg)](https://github.com/sergerdn/py-bas-automation/actions/workflows/ci_windows.yml) 5 | [![codecov](https://codecov.io/gh/sergerdn/py-bas-automation/graph/badge.svg?token=YQYHYG9VVM)](https://codecov.io/gh/sergerdn/py-bas-automation) 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | 8 | **Note: This library is currently in active development, and its API may undergo changes without notice at any time.** 9 | 10 | **Note:** This project originally started as a `working proof of concept` and does not aim to offer extensive support or 11 | documentation. It serves as a fundamental demonstration of the concept and should be considered a foundation for further 12 | development or experimentation. 13 | 14 | ## Description 15 | 16 | This library enables you to work with BAS (BrowserAutomationStudio) using headless Chromium browsers and a 17 | customizable Windows GUI program, while controlling it with Python ❤️. 18 | ![bas_gui_window_3.png](docs/images/bas_gui_window_3.png) 19 | 20 | ## Key Features 21 | 22 | - **BrowserAutomationStudio Integration:** Run BAS seamlessly with headless browsers while enjoying the convenience of a 23 | user-friendly and customizable Windows GUI program 24 | through [BrowserAutomationStudio](https://bablosoft.com/shop/BrowserAutomationStudio). 25 | - **Unique Fingerprint Feature:** The application includes a unique feature that assigns a `fingerprint` to each 26 | browser instance using [FingerprintSwitcher](https://fingerprints.bablosoft.com/). Please be aware that this is 27 | **paid** service. 28 | - **Proxy Support:** The application supports proxy providers such as [Brightdata](https://brightdata.com/). Please 29 | note that this is a **paid** service. 30 | - **Executing Browser Automation Studio (BAS) Actions from Python**: Implement BAS actions from Python using the 31 | un-documented API. This includes actions such as retrieving page source, `emulating mouse movements`, etc. (Note: Not 32 | all functions are currently supported). 33 | - **Playwright Control:** The application leverages [Playwright](https://playwright.dev/python/) to efficiently manage 34 | and control BAS. 35 | 36 | ## Understanding the Workflow 37 | 38 | The functioning of BAS (Browser Automation Studio) involves the following steps: 39 | 40 | 1. **Initial Execution:** Upon initiation, BAS runs [cmd_initial.py](./cmd_initial.py). This script is responsible for 41 | creating tasks and storing them on the disk for later use. 42 | 2. **Data Acquisition and Browser Configuration:** BAS then retrieves the necessary data, configures, and launches 43 | browser instances based on the tasks provided earlier. 44 | 3. **Task Execution:** Following the browser setup, BAS executes [cmd_worker.py](./cmd_worker.py) using the `task_id` 45 | and `remote-debugging-port` number as command-line parameters. 46 | 4. **Task Handling:** [cmd_worker.py](./cmd_worker.py) obtains both the `ws_endpoint` and `remote-debugging-port` 47 | from the command line. It then manages complex tasks using [Playwright](https://playwright.dev/python/). These tasks 48 | can range from opening a webpage to filling out forms or even taking screenshots. 49 | 5. **Task Completion:** Once the tasks have been completed, BAS terminates the browser instances and exits. 50 | 51 | The result of running [cmd_worker.py](./cmd_worker.py) is as follows: 52 | 53 | ```json 54 | { 55 | "tasks_file": "C:\\Users\\Administrator\\AppData\\Local\\PyBASProfilesTasks\\tasks.json" 56 | } 57 | ``` 58 | 59 | This is an example of the created `tasks_file`: 60 | 61 | ```json 62 | [ 63 | { 64 | "task_id": "9683607e-2458-4adb-9b14-7e99123bf34d", 65 | "browser_settings": { 66 | "components": { 67 | "widevine": "enable", 68 | "safe_browsing": "enable", 69 | "components": "enable" 70 | }, 71 | "network": { 72 | "enable_qiuc_protocol": true 73 | }, 74 | "rendering": { 75 | "maximum_fps": 30 76 | }, 77 | "browser_version": "default", 78 | "command_line": [ 79 | "--disk-cache-size=104857600", 80 | "--disable-gpu-program-cache", 81 | "--disable-gpu-shader-disk-cache", 82 | "--disable-features=GpuProcessHighPriorityWin,GpuUseDisplayThreadPriority", 83 | "--lang=en" 84 | ], 85 | "profile": { 86 | "profile_folder_path": "C:\\Users\\Administrator\\AppData\\Local\\PyBASProfiles\\tmp3az8nj96", 87 | "always_load_fingerprint_from_profile_folder": false, 88 | "always_load_proxy_from_profile_folder": false 89 | }, 90 | "proxy": { 91 | "server": "brd.superproxy.io", 92 | "port": "22225", 93 | "type": "http", 94 | "login": "brd-customer-hl___redacted__", 95 | "password": "__redacted__" 96 | }, 97 | "fingerprint": { 98 | "safe_canvas": true, 99 | "use_perfect_canvas": true, 100 | "safe_webgl": true, 101 | "safe_audio": true, 102 | "safe_battery": true, 103 | "use_font_pack": true, 104 | "safe_element_size": false, 105 | "emulate_sensor_api": true, 106 | "emulate_device_scale_factor": true 107 | } 108 | } 109 | } 110 | ] 111 | ``` 112 | 113 | This file contains task details such as browser settings, network configurations, rendering options, and fingerprint 114 | settings, among other things. 115 | 116 | ## System Requirements 117 | 118 | For the optimal running of this application, the following system requirements are necessary: 119 | 120 | - **Operating System:** The application is compatible with Windows 10/11 and Windows Server 2022 (tested on 21H2). 121 | - **Python:** Ensure you have Python 3.11 or higher installed. If not, you can download it from the official 122 | Python [website](https://www.python.org/downloads/). 123 | - **Poetry:** This is a necessary tool for managing Python dependencies. You can find the installation guide on the 124 | official Poetry [documentation](https://python-poetry.org/docs/#installation). 125 | - **Git:** The application requires Git for version control. If it's not already installed on your system, you can 126 | download it from the official Git [website](https://git-scm.com/downloads). 127 | - **Make:** This is an optional tool, it can be downloaded from the 128 | Chocolatey [website](https://community.chocolatey.org/packages/make). 129 | - **FingerprintSwitcher License:** Please note that this is a **paid** feature. You will need a valid license 130 | for [FingerprintSwitcher](https://fingerprints.bablosoft.com/) to access its functionalities. 131 | 132 | ## Installation Guide 133 | 134 | ### Installing the Current Development Version 135 | 136 | To work with the most recent development version of `pybas-automation`, follow the steps outlined below: 137 | 138 | 1. **Clone the Repository:** Clone the `py-bas-automation` repository from GitHub. 139 | 2. **Navigate to the Directory:** Once cloned, navigate to the `py-bas-automation` directory on your local system. 140 | 3. **Install Dependencies:** With Poetry, install all the necessary dependencies. 141 | 142 | Here are the corresponding commands for each step: 143 | 144 | ```bash 145 | git clone git@github.com:sergerdn/py-bas-automation.git 146 | cd py-bas-automation 147 | git lfs pull 148 | poetry install 149 | ``` 150 | 151 | ### Installing the Latest Release from pypi.org (Currently not recommended) 152 | 153 | If you wish to incorporate `pybas-automation` into your project, execute the following command: 154 | 155 | ```bash 156 | poetry add pybas-automation 157 | ``` 158 | 159 | Please note that this is not currently recommended as the latest release may have unresolved issues. 160 | 161 | ## How to Run the Application 162 | 163 | - **Download the BAS Program:** Begin by downloading the latest version of the compiled BAS program, 164 | called `PyBasFree.zip` . You can find this file in the project directory 165 | under [PyBasFree.zip](bas_release/PyBasFree.zip). After downloading, extract the contents and 166 | execute `PyBasFree.exe`. 167 | 168 | 169 | - **Set Variables in the BAS GUI:** After running the BAS program, proceed to set the necessary variables within the 170 | BAS graphical user interface (GUI). 171 | 172 | ![BAS GUI](docs/images/bas_gui_window_1.png) 173 | 174 | - **Set Up Proxy Provider:** If you are using a proxy provider, you will need to configure it within the BAS GUI. This 175 | can be accomplished by navigating to the `Proxy Settings` option in the vertical menu and selecting the appropriate 176 | provider. 177 | 178 | ![Set up proxy provider](docs/images/bas_gui_window_1_proxy.png) 179 | 180 | - **Start the Program:** Once all variables have been set, click the "OK" button to initiate the program. 181 | 182 | ![Start Program](docs/images/bas_gui_window_2.png) 183 | 184 | ## Advanced Usage 185 | 186 | This guide introduces a Python script that integrates the `Browser Automation Studio` (BAS) with `py-bas-automation`. 187 | The 188 | purpose is to handle the creation of browser profiles through FingerprintSwitcher and manage tasks related to these 189 | profiles. 190 | 191 | ### [Initial script: cmd_initial.py](./cmd_initial.py) 192 | 193 | ### Description: 194 | 195 | This script facilitates the integration between `BAS (Browser Automation Studio)` and `py-bas-automation`. It manages 196 | the creation of browser profiles using `FingerprintSwitcher` and generates tasks associated with these profiles. 197 | 198 | ### Overview: 199 | 200 | - **Initialization**: Import essential modules and configure logging. 201 | - **Browser Profiles**: Utilize `FingerprintSwitcher`'s fingerprint key to generate or manage browser profiles. 202 | - **Proxy Support**: Configure proxy settings for each browser profile in full-automatic mode by handling proxy 203 | providers. Note: at the moment only [`Brightdata`](https://brightdata.com/) is supported. 204 | - **Tasks Generation**: Generate an associated task for each browser profile and store it. 205 | 206 | ```python 207 | """ 208 | This script facilitates the integration between BAS (Browser Automation Studio) and `py-bas-automation`. 209 | It handles the creation of browser profiles using FingerprintSwitcher and manages tasks associated with these profiles. 210 | """ 211 | 212 | import json 213 | import logging 214 | import os 215 | 216 | import click 217 | from pydantic import FilePath 218 | 219 | from pybas_automation.browser_profile import BrowserProfileStorage 220 | from pybas_automation.task import BasTask, TaskStorage, TaskStorageModeEnum 221 | 222 | logger = logging.getLogger("[cmd_worker]") 223 | 224 | 225 | def run(fingerprint_key: str, count_profiles: int) -> FilePath: 226 | """ 227 | Initialize and run the script. 228 | 229 | :param fingerprint_key: Personal fingerprint key from FingerprintSwitcher. 230 | :param count_profiles: Number of profiles to be created. 231 | 232 | :return: Path to the generated tasks file. 233 | """ 234 | 235 | # Initialize task storage with read-write access and clear it. 236 | # The default storage location is C:\Users\{username}\AppData\Local\PyBASTasks 237 | task_storage = TaskStorage(mode=TaskStorageModeEnum.READ_WRITE) 238 | task_storage.clear() 239 | 240 | # Initialize browser profiles using the given fingerprint key. 241 | # The default profile storage location is C:\Users\{username}\AppData\Local\PyBASProfiles 242 | browser_profile_storage = BrowserProfileStorage(fingerprint_key=fingerprint_key) 243 | 244 | needs = count_profiles - browser_profile_storage.count() 245 | 246 | # Create any additional profiles if necessary 247 | if needs > 0: 248 | for _ in range(needs): 249 | browser_profile = browser_profile_storage.new() 250 | logger.debug("Created new profile: %s", browser_profile.profile_dir) 251 | 252 | # Generate tasks corresponding to each profile 253 | for browser_profile in browser_profile_storage.load_all()[:count_profiles]: 254 | task = BasTask() 255 | task.browser_settings.profile.profile_folder_path = browser_profile.profile_dir 256 | task_storage.save(task=task) 257 | 258 | logger.info("Total tasks generated: %d", task_storage.count()) 259 | task_storage.save_all() 260 | 261 | return task_storage.task_file_path 262 | 263 | 264 | @click.command() 265 | @click.option( 266 | "--bas_fingerprint_key", 267 | help="Your personal fingerprint key of FingerprintSwitcher.", 268 | required=True, 269 | ) 270 | @click.option( 271 | "--count_profiles", 272 | help="Number of profiles.", 273 | default=10, 274 | ) 275 | def main(bas_fingerprint_key: str, count_profiles: int) -> None: 276 | """ 277 | Entry point of the script. Sets up logging, validates the fingerprint key, 278 | triggers the primary function, and prints the path to the tasks file. 279 | 280 | :param bas_fingerprint_key: Personal fingerprint key from FingerprintSwitcher. 281 | :param count_profiles: Number of profiles to be created. 282 | 283 | :return: None. 284 | """ 285 | 286 | import multiprocessing 287 | 288 | process = multiprocessing.current_process() 289 | 290 | # Configure logging settings 291 | logging.basicConfig( 292 | level=logging.DEBUG, 293 | format=f"%(asctime)s {process.pid} %(levelname)s %(name)s %(message)s", 294 | filename=os.path.join(os.path.dirname(__file__), "logs", "cmd_initial.log"), 295 | ) 296 | logger.info("Script cmd_initial has started.") 297 | 298 | # Ensure the fingerprint key is present 299 | bas_fingerprint_key = bas_fingerprint_key.strip() 300 | if not bas_fingerprint_key: 301 | raise ValueError("bas_fingerprint_key is not provided") 302 | 303 | # Invoke the main function to get the path to the tasks file 304 | task_file_path = run(fingerprint_key=bas_fingerprint_key, count_profiles=count_profiles) 305 | 306 | # Print the path for potential use in BAS 307 | print(json.dumps({"tasks_file": str(task_file_path)}, indent=4)) 308 | 309 | logger.info("cmd_initial script execution completed.") 310 | 311 | 312 | if __name__ == "__main__": 313 | main() 314 | ``` 315 | 316 | ### [Worker script: cmd_worker.py](./cmd_worker.py) 317 | 318 | ### Description: 319 | 320 | This script demonstrates how to execute tasks using the `Playwright` Python library in conjunction with the 321 | `pybas_automation` package. The primary goal is to fetch task data, connect to an existing browser instance using 322 | `Playwright`, and perform actions on a webpage. 323 | 324 | ### Overview: 325 | 326 | - **Initialization**: Import necessary libraries and set up our task id and debugging port. 327 | - **Task Storage**: Fetch a specific task from our task storage. 328 | - **Remote Browser Connection**: Use the remote debugging port to get a WebSocket endpoint, which allows us to connect 329 | to an existing browser instance. 330 | - **Executing Browser Automation Studio (BAS) Actions from Python**: Implement BAS actions from Python using the 331 | un-documented API. This includes actions such as retrieving page source, emulating mouse movements, etc. (Note: Not 332 | all functions are currently supported). 333 | 334 | ```python 335 | import asyncio 336 | from uuid import UUID 337 | from playwright.async_api import async_playwright 338 | from pybas_automation.task import TaskStorage, TaskStorageModeEnum 339 | from pybas_automation.browser_automator import BrowserAutomator 340 | 341 | 342 | async def main(): 343 | # 1. Initialization 344 | # For demonstration purposes, we're using hardcoded values. In a real scenario, these will be fetched dynamically. 345 | task_id = UUID("some_task_id_that_we_getting_from_cmd_line_from_BAS") 346 | remote_debugging_port = 9222 347 | # A unique identifier for the `Worker.exe` process. Retrieved from the command line argument `--unique-process-id`. 348 | unique_process_id = "some_unique_process_id" 349 | 350 | # 2. Task Storage 351 | # Create a new task storage instance in READ mode to fetch tasks. 352 | task_storage = TaskStorage(mode=TaskStorageModeEnum.READ) 353 | found_task = task_storage.get(task_id=task_id) 354 | # Note: You can manipulate or inspect the `found_task` as needed. 355 | 356 | # 3. Remote Browser Connection 357 | async with BrowserAutomator( 358 | remote_debugging_port=remote_debugging_port, unique_process_id=unique_process_id 359 | ) as automator: 360 | # Variant 1: Work with the BrowserAutomator API 361 | await automator.page.goto("https://playwright.dev/python/") 362 | if unique_process_id: 363 | # With Automator, you can call function from the BrowserAutomationStudio API. 364 | print("Unique process ID: %s", unique_process_id) 365 | page_content = await automator.bas_get_page_content() 366 | 367 | elem = automator.page.locator("xpath=//a[@class='getStarted_Sjon']") 368 | await automator.bas_move_mouse_to_elem(elem=elem) 369 | await elem.click() 370 | 371 | print("Page content from BAS_SAFE api: %s ...", page_content[:100]) 372 | 373 | # Variant 1: Work with the Playwright API directly. 374 | ws_endpoint = automator.get_ws_endpoint() 375 | async with async_playwright() as pw: 376 | # Connect to an existing browser instance using the fetched WebSocket endpoint. 377 | browser = await pw.chromium.connect_over_cdp(ws_endpoint) 378 | # Access the main page of the connected browser instance. 379 | page = browser.contexts[0].pages[0] 380 | # Perform actions using Playwright, like navigating to a webpage. 381 | await page.goto("https://playwright.dev/python/") 382 | 383 | 384 | if __name__ == "__main__": 385 | asyncio.run(main()) 386 | ``` 387 | 388 | ## Planned Improvements: 389 | 390 | - [x] Add Proxy support. 391 | - [x] Develop end-to-end tests to thoroughly assess the entire workflow. 392 | - [ ] Include build scripts for converting Python files to executable format. 393 | - [ ] Expand the repository with more illustrative examples. 394 | 395 | ## Contributing 396 | 397 | Your ideas and contributions are highly valued. Please do not hesitate to open 398 | an [issue](https://github.com/sergerdn/py-bas-automation/issues/new) if you have suggestions, questions, or if you would 399 | like to contribute to its enhancement. 400 | 401 | ## License 402 | 403 | [LICENSE](./LICENSE) -------------------------------------------------------------------------------- /bas_release/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/bas_release/.gitkeep -------------------------------------------------------------------------------- /bas_release/PyBasFree.zip: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:cb80e4384ad134a6bdd8f48cc1ecd52828eacc574ab1cbfca5ddc03cf241cf78 3 | size 16934522 4 | -------------------------------------------------------------------------------- /cmd_initial.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script facilitates the integration between BAS (Browser Automation Studio) and `py-bas-automation`. 3 | It handles the creation of browser profiles using FingerprintSwitcher and manages tasks associated with these profiles. 4 | """ 5 | 6 | import json 7 | import logging 8 | import os 9 | 10 | import click 11 | from pydantic import FilePath 12 | 13 | from pybas_automation.browser_profile import BrowserProfileStorage 14 | from pybas_automation.proxy_providers.brightdata import BrightdataCredentialsModel, BrightDataProxyModel 15 | from pybas_automation.task import BasTask, TaskStorage, TaskStorageModeEnum 16 | 17 | logger = logging.getLogger("[cmd_worker]") 18 | 19 | 20 | def run( 21 | fingerprint_key: str, 22 | limit_tasks: int, 23 | proxy_provider: str, 24 | proxy_username: str, 25 | proxy_password: str, 26 | ) -> FilePath: 27 | """ 28 | Initialize and run the script. 29 | 30 | :param fingerprint_key: Personal fingerprint key from FingerprintSwitcher. 31 | :param limit_tasks: Number of tasks/profiles to be created. 32 | :param proxy_provider: Proxy provider to use. 33 | :param proxy_username: Proxy provider username. 34 | :param proxy_password: Proxy provider password. 35 | 36 | :return: Path to the generated tasks file. 37 | """ 38 | 39 | match proxy_provider: 40 | case "": 41 | pass 42 | case "brightdata": 43 | if not proxy_username or not proxy_password: 44 | raise ValueError(f"proxy_username or proxy_password not set for {proxy_provider}") 45 | case _: 46 | raise ValueError(f"Unknown proxy provider: {proxy_provider}") 47 | 48 | # Initialize task storage with read-write access and clear it. 49 | # The default storage location is C:\Users\{username}\AppData\Local\PyBASTasks 50 | task_storage = TaskStorage(mode=TaskStorageModeEnum.READ_WRITE) 51 | task_storage.clear() 52 | 53 | # Initialize browser profiles using the given fingerprint key. 54 | # The default profile storage location is C:\Users\{username}\AppData\Local\PyBASProfiles 55 | browser_profile_storage = BrowserProfileStorage(fingerprint_key=fingerprint_key) 56 | 57 | needs = limit_tasks - browser_profile_storage.count() 58 | 59 | # Create any additional profiles if necessary 60 | if needs > 0: 61 | for _ in range(needs): 62 | browser_profile = browser_profile_storage.new() 63 | 64 | match proxy_provider: 65 | case "brightdata": 66 | credentials = BrightdataCredentialsModel(username=proxy_username, password=proxy_password) 67 | proxy = BrightDataProxyModel(credentials=credentials) 68 | 69 | proxy_bas = proxy.to_bas_proxy(keep_session=True) 70 | browser_profile.proxy = proxy_bas 71 | browser_profile.save_proxy_to_profile() 72 | 73 | logger.debug("Created new profile: %s", browser_profile.profile_dir) 74 | 75 | # Generate tasks corresponding to each profile 76 | for browser_profile in browser_profile_storage.load_all()[:limit_tasks]: 77 | task = BasTask() 78 | 79 | task.browser_settings.profile.profile_folder_path = browser_profile.profile_dir 80 | task.browser_settings.proxy = browser_profile.proxy 81 | 82 | task_storage.save(task=task) 83 | 84 | logger.info("Total tasks generated: %d", task_storage.count()) 85 | task_storage.save_all() 86 | 87 | return task_storage.task_file_path 88 | 89 | 90 | @click.command() 91 | @click.option( 92 | "--bas_fingerprint_key", 93 | help="Personal fingerprint key of FingerprintSwitcher.", 94 | required=True, 95 | ) 96 | @click.option("--proxy_provider", help="Proxy provider to use.", type=str, default="") 97 | @click.option("--proxy_username", help="Proxy provider username.", type=str, default="") 98 | @click.option("--proxy_password", help="Proxy provider password.", type=str, default="") 99 | @click.option( 100 | "--limit_tasks", 101 | help="Number of tasks/profiles.", 102 | default=10, 103 | ) 104 | def main( 105 | bas_fingerprint_key: str, limit_tasks: int, proxy_provider: str, proxy_username: str, proxy_password: str 106 | ) -> None: 107 | """ 108 | Entry point of the script. Sets up logging, validates the fingerprint key, 109 | triggers the primary function, and prints the path to the tasks file. 110 | 111 | :param bas_fingerprint_key: Personal fingerprint key from FingerprintSwitcher. 112 | :param limit_tasks: Number of tasks/profiles to be created. 113 | :param proxy_provider: Proxy provider to use. 114 | 115 | :return: None. 116 | """ 117 | 118 | import multiprocessing # pylint: disable=import-outside-toplevel 119 | 120 | process = multiprocessing.current_process() 121 | 122 | # Configure logging settings 123 | logging.basicConfig( 124 | level=logging.DEBUG, 125 | format=f"%(asctime)s {process.pid} %(levelname)s %(name)s %(message)s", 126 | filename=os.path.join(os.path.dirname(__file__), "logs", "cmd_initial.log"), 127 | ) 128 | logger.info("Script cmd_initial has started.") 129 | 130 | # Ensure the fingerprint key is present 131 | bas_fingerprint_key = bas_fingerprint_key.strip() 132 | if not bas_fingerprint_key: 133 | raise ValueError("bas_fingerprint_key is not provided") 134 | 135 | proxy_provider = proxy_provider.strip().lower() 136 | proxy_username = proxy_username.strip() 137 | proxy_password = proxy_password.strip() 138 | 139 | logger.info("Proxy provider: %s, limit_tasks: %d", proxy_provider, limit_tasks) 140 | 141 | match proxy_provider: 142 | case "": 143 | pass 144 | case "brightdata": 145 | pass 146 | case _: 147 | raise ValueError(f"Unknown proxy provider: {proxy_provider}") 148 | 149 | # Invoke the main function to get the path to the tasks file 150 | task_file_path = run( 151 | fingerprint_key=bas_fingerprint_key, 152 | limit_tasks=limit_tasks, 153 | proxy_provider=proxy_provider, 154 | proxy_username=proxy_username, 155 | proxy_password=proxy_password, 156 | ) 157 | 158 | # Print the path for potential use in BAS 159 | print(json.dumps({"tasks_file": str(task_file_path)}, indent=4)) 160 | 161 | logger.info("cmd_initial script execution completed.") 162 | 163 | 164 | if __name__ == "__main__": 165 | main() # pylint: disable=no-value-for-parameter 166 | -------------------------------------------------------------------------------- /cmd_worker.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script manages task executions. 3 | 4 | It retrieves task specifics from BAS, runs the corresponding Playwright script, and then saves 5 | the produced screenshots to the `reports` folder. 6 | """ 7 | 8 | import asyncio 9 | import codecs 10 | import json 11 | import logging 12 | import os 13 | from uuid import UUID 14 | 15 | import click 16 | from dotenv import load_dotenv 17 | from playwright.async_api import async_playwright 18 | 19 | from pybas_automation.browser_automator import BrowserAutomator 20 | from pybas_automation.browser_profile import BrowserProfileStorage 21 | from pybas_automation.task import TaskStorage, TaskStorageModeEnum 22 | 23 | # Load environment variables 24 | load_dotenv() 25 | 26 | logger = logging.getLogger("[cmd_worker]") 27 | _debug = os.environ.get("DEBUG", "False").lower() == "true" 28 | 29 | 30 | async def run(task_id: UUID, remote_debugging_port: int, unique_process_id: str) -> None: 31 | """ 32 | Fetch the specified task and run the associated worker. 33 | 34 | :param task_id: Unique identifier of the desired task. 35 | :param remote_debugging_port: Port used for Chrome DevTools Protocol (CDP) remote debugging. 36 | :param unique_process_id: A unique identifier for the `Worker.exe` process. Retrieved from the command line. 37 | 38 | :return: None. 39 | """ 40 | 41 | # Validate input parameters 42 | if not task_id: 43 | raise ValueError("task_id is not provided") 44 | if not remote_debugging_port: 45 | raise ValueError("remote_debugging_port is not provided") 46 | 47 | logger.debug("Retrieving task with ID: %s", task_id) 48 | 49 | task_storage = TaskStorage(mode=TaskStorageModeEnum.READ_WRITE) 50 | 51 | # Ensure there are tasks to load 52 | if not task_storage.load_all(): 53 | raise ValueError("No tasks available for processing") 54 | 55 | # Fetch the specified task 56 | found_task = task_storage.get(task_id=task_id) 57 | if not found_task: 58 | raise ValueError(f"Task with ID {task_id} not found") 59 | 60 | # Update the task with the remote debugging port 61 | found_task.remote_debugging_port = remote_debugging_port 62 | if unique_process_id: 63 | # Update the task with the unique process ID 64 | found_task.unique_process_id = unique_process_id 65 | 66 | task_storage.update(found_task) 67 | 68 | # Debug: Print the task details 69 | print(json.dumps(found_task.model_dump(mode="json"), indent=4)) 70 | screenshot_filename = os.path.join(os.path.dirname(__file__), "reports", f"{found_task.task_id}_screenshot.png") 71 | 72 | browser_profile_storage = BrowserProfileStorage() 73 | browser_profile_storage.load_all() 74 | 75 | profile_name = os.path.basename(found_task.browser_settings.profile.profile_folder_path) 76 | browser_profile = browser_profile_storage.load(profile_name=profile_name) 77 | print(browser_profile.profile_dir) 78 | 79 | async with BrowserAutomator( 80 | browser_profile=browser_profile, 81 | remote_debugging_port=remote_debugging_port, 82 | unique_process_id=unique_process_id, 83 | ) as automator: 84 | # Variant 1: Work with the BrowserAutomator API 85 | await automator.page.goto("https://playwright.dev/python/") 86 | 87 | if unique_process_id: 88 | # With Automator, you can call function from the BrowserAutomationStudio API. 89 | logger.info("Unique process ID: %s", unique_process_id) 90 | page_content = await automator.bas_get_page_content() 91 | 92 | elem = automator.page.locator("xpath=//a[@class='getStarted_Sjon']") 93 | await automator.bas_move_mouse_to_elem(elem=elem) 94 | await elem.click() 95 | 96 | logger.debug("Page content from BAS_SAFE api: %s ...", page_content[:100]) 97 | 98 | # Variant 1: Work with the Playwright API directly. 99 | ws_endpoint = automator.get_ws_endpoint() 100 | async with async_playwright() as pw: 101 | # Connect to an existing browser instance using the fetched WebSocket endpoint. 102 | browser = await pw.chromium.connect_over_cdp(ws_endpoint) 103 | # Access the main page of the connected browser instance. 104 | page = browser.contexts[0].pages[0] 105 | # Perform actions using Playwright, like navigating to a webpage. 106 | await page.goto("https://playwright.dev/python/") 107 | 108 | # Save a screenshot of the current page 109 | await automator.page.screenshot(path=screenshot_filename, full_page=True) 110 | 111 | 112 | @click.command() 113 | @click.option("--task_id", help="Unique identifier of the task.", required=True) 114 | @click.option( 115 | "--remote_debugging_port", 116 | help="Port number used for Chrome DevTools Protocol (CDP) remote debugging.", 117 | type=int, 118 | required=True, 119 | ) 120 | @click.option("--unique_process_id", help="Unique identifier of the Worker.exe process.") 121 | @click.option( 122 | "--debug", 123 | help="Enable debug mode.", 124 | is_flag=True, 125 | ) 126 | def main( 127 | task_id: UUID, 128 | remote_debugging_port: int, 129 | unique_process_id: str, 130 | debug: bool, 131 | ) -> None: 132 | """ 133 | Set up logging and initiate the task execution process. 134 | 135 | :param task_id: Unique identifier of the task. :param remote_debugging_port: Port used for CDP remote debugging. 136 | :param remote_debugging_port: Port used for Chrome DevTools Protocol (CDP) remote debugging. 137 | :param unique_process_id: A unique identifier for the `Worker.exe` process. Retrieved from the command line 138 | argument `--unique-process-id`. 139 | :param debug: Enable debug mode. 140 | 141 | :return: None. 142 | """ 143 | 144 | if debug: 145 | logger.debug("Debug mode enabled") 146 | 147 | # Playwright debug logging 148 | os.environ["DEBUG"] = "pw:protocol" 149 | 150 | filename = os.path.join(os.path.dirname(__file__), "logs", "cdp_log.txt") 151 | import sys # pylint: disable=import-outside-toplevel 152 | 153 | sys.stderr = codecs.open(filename, "w", "utf-8") 154 | 155 | import multiprocessing # pylint: disable=import-outside-toplevel 156 | 157 | process = multiprocessing.current_process() 158 | 159 | # Logging configuration 160 | logging.basicConfig( 161 | level=logging.DEBUG, 162 | format=f"%(asctime)s {process.pid} %(levelname)s %(name)s %(message)s", 163 | filename=os.path.join(os.path.dirname(__file__), "logs", "cmd_worker.log"), 164 | ) 165 | 166 | logger.info("Initializing cmd_worker with PID: %s", process.pid) 167 | asyncio.run(run(task_id=task_id, remote_debugging_port=remote_debugging_port, unique_process_id=unique_process_id)) 168 | 169 | 170 | if __name__ == "__main__": 171 | main() # pylint: disable=no-value-for-parameter 172 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: off 4 | patch: off -------------------------------------------------------------------------------- /docs/images/bas_gui_window_1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:299443a51baed85773d3f83243360080cf81a8fdd67973be41398a3ca3e3ac7c 3 | size 42098 4 | -------------------------------------------------------------------------------- /docs/images/bas_gui_window_1_proxy.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b948f109c3538d61de046230b70450d7795072598e07f5da24eb25adf595a34d 3 | size 33380 4 | -------------------------------------------------------------------------------- /docs/images/bas_gui_window_2.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:529776f5e93371e4a922bf91aa980d26f3cbad8a2c1c6fd7ecac7fe9e3ad4cba 3 | size 83189 4 | -------------------------------------------------------------------------------- /docs/images/bas_gui_window_3.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e70d188105c28545c14d1a9ce7bf21712c1d9a82c72d6e5b52d7ea6c096d7230 3 | size 115414 4 | -------------------------------------------------------------------------------- /logs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/logs/.gitkeep -------------------------------------------------------------------------------- /pybas_automation/__init__.py: -------------------------------------------------------------------------------- 1 | """Default configuration for models.""" 2 | from pydantic import ConfigDict 3 | 4 | from .settings import STORAGE_SUBDIR 5 | 6 | default_model_config = ConfigDict(populate_by_name=True, extra="forbid", use_enum_values=True) 7 | 8 | __all__ = ["default_model_config", "STORAGE_SUBDIR"] 9 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/__init__.py: -------------------------------------------------------------------------------- 1 | """Implements settings of BAS actions.""" 2 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/browser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | All actions to work with browser. 3 | 4 | These actions do not require a specific element for use. 5 | For example, load the url, set proxy, make a screenshot, etc. To click on an element or enter text in a specific field, 6 | click on this item and select an action from the menu. 7 | """ 8 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/browser/browser_settings/__init__.py: -------------------------------------------------------------------------------- 1 | """Browser / Browser Settings.""" 2 | 3 | from typing import List 4 | 5 | 6 | def browser_command_line_factory() -> List[str]: 7 | """Browser defaults command line arguments.""" 8 | 9 | return [ 10 | "--disk-cache-size=104857600", # 100 Mb 11 | "--disable-gpu-program-cache", 12 | "--disable-gpu-shader-disk-cache", 13 | "--disable-features=GpuProcessHighPriorityWin,GpuUseDisplayThreadPriority", 14 | "--lang=en", 15 | ] 16 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/browser/browser_settings/models.py: -------------------------------------------------------------------------------- 1 | """Browser / Browser Settings models.""" 2 | 3 | from enum import Enum 4 | from typing import List, Union 5 | 6 | from pydantic import BaseModel, DirectoryPath, Field, field_validator 7 | 8 | from pybas_automation import default_model_config 9 | from pybas_automation.bas_actions.browser.browser_settings import browser_command_line_factory 10 | from pybas_automation.bas_actions.browser.proxy.models import BasActionBrowserProxy 11 | from pybas_automation.bas_actions.fingerprint_switcher.apply_fingerprint.models import BasActionApplyFingerprintModel 12 | 13 | 14 | class WidevineEnum(str, Enum): 15 | """Widevine settings model.""" 16 | 17 | ENABLE = "enable" 18 | DISABLE = "disable" 19 | EMPTY = "" 20 | 21 | 22 | class SafeBrowsingEnum(str, Enum): 23 | """Safe Browsing settings model.""" 24 | 25 | ENABLE = "enable" 26 | DISABLE = "disable" 27 | EMPTY = "" 28 | 29 | 30 | class ChromeComponentsEnum(str, Enum): 31 | """Chrome components settings model.""" 32 | 33 | ENABLE = "enable" 34 | DISABLE = "disable" 35 | EMPTY = "" 36 | 37 | 38 | class BasActionBrowserSettingsComponents(BaseModel): 39 | """Bas Browser components settings model.""" 40 | 41 | model_config = default_model_config 42 | 43 | # Enable or disable Widevine plugin. 44 | # enable - Use Widevine 45 | # disable - Don't use Widevine 46 | # Empty string - Don't change this setting 47 | widevine: WidevineEnum = Field(default=WidevineEnum.ENABLE) 48 | 49 | # Enable Safe Browsing. It is technology embedded in Chrome, helps to protect users from malicious websites. 50 | # Its absence can be detected. 51 | # enable - Enable Safe Browsing 52 | # disable - Disable Safe Browsing 53 | # Empty string - Don't change this setting 54 | safe_browsing: SafeBrowsingEnum = Field(default=SafeBrowsingEnum.ENABLE) 55 | 56 | # Enable Chrome components. Increase profile size, but its absence can be detected. 57 | # enable - Enable components 58 | # disable - Disable components 59 | # Empty string - Don't change this setting 60 | components: ChromeComponentsEnum = Field(default=ChromeComponentsEnum.ENABLE) 61 | 62 | 63 | class BasActionBrowserSettingsRendering(BaseModel): 64 | """Bas Browser rendering settings model.""" 65 | 66 | model_config = default_model_config 67 | 68 | # Maximum times browser content can be rendered per one second. 69 | # The lower this value, the lower the CPU usage will be. Setting it too low may affect site operability. 70 | # Reducing it below 30 may lead to unpredictable consequences. Minimum value is 10. 71 | maximum_fps: int = Field(default=30, gt=10, lt=60) 72 | 73 | 74 | class BasActionBrowserSettingsNetwork(BaseModel): 75 | """Bas Browser network settings model.""" 76 | 77 | # Unlike HTTP, QUIC protocol is build on top of UDP. Not all proxies supports UDP. 78 | # It means that enabling QUIC can cause problems when working with certain proxies. 79 | # It is recommended to enable this option only if sure, that your proxy supports it. Disabled by default. 80 | # enable - Enable QUIC 81 | # disable - Disable QUIC 82 | enable_qiuc_protocol: bool = Field(default=True) 83 | 84 | 85 | class BasActionBrowserSettingsProfile(BaseModel): 86 | """Bas Browser profile settings model.""" 87 | 88 | model_config = default_model_config 89 | 90 | # String with profile folder. Slash type is not important. If folder doesn't exist, it will be created. If folder 91 | # already exists, BAS will use it as profile and restore all data from it like cookies, localstorage, 92 | # etc. By default, browser stores all profile data in temporary folder, you can use "temporary" keyword to switch 93 | # to new temporary profile. Empty string - Don't change temporary - Switch to new temporary profile 94 | profile_folder_path: Union[DirectoryPath, str] = Field(default=DirectoryPath(".")) 95 | 96 | # In case if profile folder already exists and has fingerprint data, 97 | # tells BAS to apply fingerprint used latest for that profile. 98 | always_load_fingerprint_from_profile_folder: bool = Field(default=False) 99 | 100 | # In case if profile folder already exists and has proxy data, 101 | # tells BAS to apply proxy used latest for that profile. 102 | always_load_proxy_from_profile_folder: bool = Field(default=False) 103 | 104 | @field_validator("profile_folder_path") 105 | @classmethod 106 | def profile_folder_path_validate(cls, v: Union[DirectoryPath, str]) -> DirectoryPath: 107 | """Validate profile_folder_path field.""" 108 | 109 | return DirectoryPath(v) 110 | 111 | 112 | class BasActionBrowserSettings(BaseModel): 113 | """Browser / Browser Settings model.""" 114 | 115 | model_config = default_model_config 116 | 117 | components: BasActionBrowserSettingsComponents = Field(default_factory=BasActionBrowserSettingsComponents) 118 | network: BasActionBrowserSettingsNetwork = Field(default_factory=BasActionBrowserSettingsNetwork) 119 | rendering: BasActionBrowserSettingsRendering = Field(default_factory=BasActionBrowserSettingsRendering) 120 | 121 | # Changes browser version for current thread. 122 | # This setting will restart browser and therefore reset all settings, so it is better to use it when thread starts. 123 | browser_version: str = Field(default="default") 124 | 125 | # Chromium command line arguments. 126 | command_line: Union[List[str]] = Field(default_factory=browser_command_line_factory) 127 | 128 | # Profile folder path. 129 | profile: BasActionBrowserSettingsProfile = Field(default_factory=BasActionBrowserSettingsProfile) 130 | 131 | # Proxy settings. 132 | proxy: Union[BasActionBrowserProxy, None] = Field(default=None) 133 | 134 | # Browser fingerprint. Fingerprint switcher -> Apply fingerprint 135 | fingerprint: BasActionApplyFingerprintModel = Field(default_factory=BasActionApplyFingerprintModel) 136 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/browser/proxy/__init__.py: -------------------------------------------------------------------------------- 1 | """Browser / Proxy.""" 2 | from .models import BasActionBrowserProxy, BasActionBrowserProxyTypeEnum 3 | 4 | __all__ = ["BasActionBrowserProxy", "BasActionBrowserProxyTypeEnum"] 5 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/browser/proxy/models.py: -------------------------------------------------------------------------------- 1 | """Browser / Proxy models.""" 2 | from enum import Enum 3 | from typing import Optional 4 | 5 | from pydantic import BaseModel, Field, field_validator 6 | 7 | from pybas_automation import default_model_config 8 | 9 | 10 | class BasActionBrowserProxyTypeEnum(str, Enum): 11 | """BasActionBrowserProxyTypeEnum is used to specify the type of proxy.""" 12 | 13 | HTTP = "http" 14 | SOCKS5 = "socks5" 15 | AUTO = "auto" 16 | 17 | 18 | class BasActionBrowserProxy(BaseModel): 19 | """BasActionBrowserProxy is used to specify a proxy for a browser profile.""" 20 | 21 | model_config = default_model_config 22 | 23 | server: str = Field(default="127.0.0.1") 24 | port: int 25 | type: BasActionBrowserProxyTypeEnum = Field(default=BasActionBrowserProxyTypeEnum.HTTP) 26 | login: Optional[str] = Field(default="") 27 | password: Optional[str] = Field(default="") 28 | 29 | @field_validator("port") 30 | @classmethod 31 | def port_str_must_be_integer(cls, v: int) -> int: 32 | """Validate that port is an in range 1-65535.""" 33 | 34 | if v < 1 or v > 65535: 35 | raise ValueError(f"must be in range 1..65535, got: {v}") 36 | 37 | return v 38 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/fingerprint_switcher/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | FingerprintSwitcher is like user agent switcher on steroids. 3 | 4 | It changes not only user agent, but a lot of browser internals to make BAS actually look like firefox, chrome, safari, 5 | on desktop or on mobile. 6 | """ 7 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/fingerprint_switcher/apply_fingerprint/__init__.py: -------------------------------------------------------------------------------- 1 | """FingerprintSwitcher / Apply fingerprint.""" 2 | -------------------------------------------------------------------------------- /pybas_automation/bas_actions/fingerprint_switcher/apply_fingerprint/models.py: -------------------------------------------------------------------------------- 1 | """FingerprintSwitcher / Apply fingerprint models.""" 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from pybas_automation import default_model_config 6 | 7 | 8 | class BasActionApplyFingerprintModel(BaseModel): 9 | """Fingerprint switcher -> Apply fingerprint.""" 10 | 11 | model_config = default_model_config 12 | 13 | # If these settings is set to true, canvas will be enabled and noise will be added to all data returned from canvas. 14 | safe_canvas: bool = Field(default=True) 15 | 16 | # If these settings is set to true, PerfectCanvas replacement will be enabled. Fingerprint must contain 17 | # PerfectCanvas data in order to make it work. See "Get fingerprint" action for explanation. 18 | use_perfect_canvas: bool = Field(default=True) 19 | 20 | # If these settings is set to true, WebGL will be enabled, noise will be added to WebGL canvas and 21 | # your hardware properties, like video card vendor and renderer, will be changed 22 | safe_webgl: bool = Field(default=True) 23 | 24 | # If these settings is set to true, WebGL will be enabled, noise will be added to WebGL canvas and your hardware 25 | # properties, like video card vendor and renderer, will be changed 26 | safe_audio: bool = Field(default=True) 27 | 28 | # If these settings is set to true battery API will show different values for each thread, this prevents sites for 29 | # detecting your real identity. In case if device from which fingerprint was obtained doesn't have battery API, 30 | # 100% charge level will always be returned. 31 | safe_battery: bool = Field(default=True) 32 | 33 | # By default, browser searches for fonts only in system font folder. This may lead to inconsistencies during 34 | # fingerprint emulation if target fingerprint has more fonts than local system. Therefore, it is recommended to 35 | # download font pack with most popular fonts. This setting allows to use font pack if it is installed. 36 | # More info about font pack and download link can be found here: https://wiki.bablosoft.com/doku.php?id=fontpack 37 | use_font_pack: bool = Field(default=True) 38 | 39 | # If these settings is set to true, results of API which returns element coordinates will be updated to protect 40 | # against 'client rects' fingerprinting. 41 | safe_element_size: bool = Field(default=False) 42 | 43 | # Chrome browser has Sensor API, which allows to read data from devices like accelerometer, gyroscope or others. 44 | # Data from that devices is available only on mobile platforms. After checking this setting data for that devices 45 | # will be generated and replaced automatically. Enable this option in order to emulate mobile fingerprints more 46 | # precisely. 47 | emulate_sensor_api: bool = Field(default=True) 48 | 49 | # Allows to better emulate devices with higher pixel density. With this setting enabled, emulation will be done 50 | # in the most natural way. It means that browser will render the page in a bigger resolution, just like on real 51 | # device. The tradeoff is higher system resources usage, because you need to perform more calculations to render 52 | # a bigger picture. Javascript settings related to pixel density, for example devicePixelRatio, will be replaced 53 | # correctly regardless if this setting will be enabled or not. 54 | emulate_device_scale_factor: bool = Field(default=True) 55 | -------------------------------------------------------------------------------- /pybas_automation/browser_automator/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Browser Automator 3 | """ 4 | 5 | from .browser_automator import BrowserAutomator 6 | from .cdp_client import CDPClient 7 | 8 | __all__ = ["BrowserAutomator", "CDPClient"] 9 | -------------------------------------------------------------------------------- /pybas_automation/browser_automator/browser_automator.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides: 3 | 4 | 1. The `BrowserAutomator` class, simplifying web automation via the Chrome Developer Protocol (CDP). 5 | 6 | 2. Integration with the BAS_SAFE internal API for secure management of functions and properties within 7 | the BAS_SAFE environment, ensuring reliable execution of critical operations such as simulating mouse movements. 8 | """ 9 | 10 | import json 11 | from typing import Any, Dict, List, Tuple, Union 12 | 13 | import filelock 14 | import httpx 15 | from playwright.async_api import Browser, BrowserContext, CDPSession, Locator, Page 16 | from playwright.async_api import Playwright as AsyncPlaywright 17 | from playwright.async_api import async_playwright 18 | 19 | from pybas_automation.browser_automator.cdp_client import CDPClient 20 | from pybas_automation.browser_automator.models import WebsocketUrl, WsUrlModel 21 | from pybas_automation.browser_profile import BrowserProfile 22 | from pybas_automation.utils import get_logger 23 | 24 | logger = get_logger() 25 | 26 | 27 | class BrowserWsConnectError(Exception): 28 | """Exception raised when unable to connect to the browser's remote debugging port.""" 29 | 30 | 31 | def _url_to_ws_endpoint(endpoint_url: str) -> str: 32 | """ 33 | Convert an HTTP endpoint URL to a WebSocket endpoint URL. 34 | 35 | :param endpoint_url: HTTP endpoint URL. 36 | :return: WebSocket endpoint URL. 37 | 38 | :raises BrowserWsConnectError: If unable to connect to the HTTP endpoint URL. 39 | """ 40 | if endpoint_url.startswith("ws"): 41 | return endpoint_url 42 | 43 | logger.debug("Preparing WebSocket: retrieving WebSocket URL from %s", endpoint_url) 44 | 45 | http_url = endpoint_url if endpoint_url.endswith("/") else f"{endpoint_url}/" 46 | http_url += "json/version/" 47 | try: 48 | response = httpx.get(http_url) 49 | except httpx.ConnectError as exc: 50 | raise BrowserWsConnectError( 51 | f"Cannot connect to {http_url}. This may not be a DevTools server. Consider connecting via ws://." 52 | ) from exc 53 | 54 | if response.status_code != 200: 55 | raise ValueError( 56 | f"Unexpected status {response.status_code} when connecting to {http_url}. " 57 | "This might not be a DevTools server. Consider connecting via ws://." 58 | ) 59 | 60 | json_data = json.loads(response.text) 61 | logger.debug("WebSocket preparation response: %s", json_data) 62 | 63 | return str(json_data["webSocketDebuggerUrl"]) 64 | 65 | 66 | async def _elem_coordinates(elem: Locator) -> Tuple[int, int]: 67 | """ 68 | Get the coordinates of the given element. 69 | :param elem: The element to get the coordinates for. 70 | 71 | :raises ValueError: If unable to fetch bounding box for the given element. 72 | 73 | :return: The coordinates of the given element. 74 | """ 75 | 76 | bounding_box = await elem.bounding_box() 77 | if not bounding_box: 78 | raise ValueError(f"Unable to fetch bounding box for element: {elem}") 79 | 80 | # Calculate the coordinates for the click (center of the element) 81 | x = int(bounding_box["x"] + bounding_box["width"] / 2) 82 | y = int(bounding_box["y"] + bounding_box["height"] / 2) 83 | return x, y 84 | 85 | 86 | class BrowserAutomator: 87 | """ 88 | A Python class for simplifying web automation by connecting to and interacting with web browsers 89 | through the Chrome Developer Protocol (CDP). 90 | 91 | This class provides a user-friendly and streamlined interface built on top of the core CDP commands, 92 | making it easier to automate browser actions and extract information from web pages. 93 | 94 | Additionally, it seamlessly integrates with the BAS_SAFE internal API, enhancing security and reliability 95 | within the BAS_SAFE environment. This integration extends to various actions, such as retrieving page source, 96 | simulating mouse movements, and more (Note: Not all functions are currently supported). 97 | """ 98 | 99 | ws_endpoint: WsUrlModel 100 | remote_debugging_port: int 101 | 102 | browser_profile: BrowserProfile 103 | browser_version: Union[str, None] 104 | pw: AsyncPlaywright 105 | browser: Browser 106 | context: BrowserContext 107 | page: Page 108 | cdp_client: CDPClient 109 | cdp_session: CDPSession 110 | 111 | unique_process_id: Union[str, None] 112 | _javascript_code: str 113 | 114 | _lock: filelock.FileLock 115 | 116 | def __init__( 117 | self, browser_profile: BrowserProfile, remote_debugging_port: int, unique_process_id: Union[str, None] = None 118 | ): 119 | """ 120 | Initialize the BrowserAutomator instance. 121 | 122 | :param browser_profile: The browser profile to use. 123 | :param remote_debugging_port: The remote debugging port to connect to. 124 | :param unique_process_id: A unique identifier for the `Worker.exe` process. Retrieved from the command line. 125 | """ 126 | 127 | self.browser_profile = browser_profile 128 | self.remote_debugging_port = int(remote_debugging_port) 129 | if unique_process_id: 130 | self.unique_process_id = unique_process_id 131 | self._javascript_code = f"location.reload['_bas_hide_{unique_process_id}']" 132 | else: 133 | self.unique_process_id = None 134 | 135 | def get_ws_endpoint(self) -> str: 136 | """ 137 | Return the WebSocket endpoint URL. 138 | 139 | :return: WebSocket endpoint URL. 140 | """ 141 | return self.ws_endpoint.ws_url.unicode_string() 142 | 143 | def connect(self) -> None: 144 | """ 145 | Connect to the browser via the WebSocket protocol. 146 | """ 147 | ws_endpoint_url = _url_to_ws_endpoint(f"http://localhost:{self.remote_debugging_port}") 148 | self.ws_endpoint = WsUrlModel(ws_url=WebsocketUrl(ws_endpoint_url)) 149 | self.cdp_client = CDPClient(self.ws_endpoint) 150 | 151 | async def __aexit__(self, *args: Any) -> None: 152 | """Asynchronous exit method to stop the Playwright instance.""" 153 | if self.pw: 154 | await self.pw.stop() 155 | 156 | async def _get_browser_version(self) -> None: 157 | """ 158 | Fetch and set the browser version from the WebSocket endpoint. 159 | 160 | :raises ValueError: If unable to retrieve the browser version from the WebSocket endpoint. 161 | """ 162 | 163 | data = await self.cdp_client.send_command("Browser.getVersion") 164 | 165 | product_version = data.get("product", None) 166 | if not product_version: 167 | raise ValueError("Unable to fetch browser version") 168 | 169 | self.browser_version = product_version 170 | 171 | async def _fetch_attached_sessions(self) -> List[Dict]: 172 | """ 173 | Retrieve a list of attached session information from the WebSocket endpoint. 174 | 175 | :return: List of attached session information. 176 | :raises ValueError: If unable to retrieve the attached sessions from the WebSocket endpoint. 177 | """ 178 | 179 | data = await self.cdp_client.send_command("Target.getTargets") 180 | 181 | if not data.get("targetInfos", None): 182 | raise ValueError("Unable to fetch attached sessions") 183 | 184 | return [target_info for target_info in data["targetInfos"] if target_info["attached"]] 185 | 186 | async def _prepare_cdp(self) -> None: 187 | # Enables network tracking, network events will now be delivered to the client. 188 | await self.cdp_session.send("Network.setCacheDisabled", params={"cacheDisabled": False}) 189 | # https://chromedevtools.github.io/devtools-protocol/tot/DOMStorage/#method-enable 190 | await self.cdp_session.send("DOMStorage.enable") 191 | 192 | async def __aenter__(self) -> "BrowserAutomator": 193 | """ 194 | Asynchronous enter method to initialize the connection and retrieve session details. 195 | 196 | :return: BrowserAutomator instance. 197 | :raises BrowserWsConnectError: If unable to connect to the browser's remote debugging port. 198 | """ 199 | 200 | self.connect() 201 | 202 | await self._get_browser_version() 203 | logger.info("Retrieved browser version: %s", self.browser_version) 204 | 205 | self.pw = await async_playwright().start() 206 | self.browser = await self.pw.chromium.connect_over_cdp(self.ws_endpoint.ws_url.unicode_string()) 207 | self.context = self.browser.contexts[0] 208 | self.page = self.context.pages[0] 209 | 210 | # Fetch the attached sessions 211 | sessions = await self._fetch_attached_sessions() 212 | logger.debug("Attached sessions retrieved: %s", sessions) 213 | 214 | self.cdp_session: CDPSession = await self.context.new_cdp_session(self.page) 215 | await self._prepare_cdp() 216 | 217 | if self.unique_process_id: 218 | _bas_hide_debug_result = await self._bas_hide_debug(page=self.page) 219 | logger.debug("BAS_HIDE_DEBUG result: %s", _bas_hide_debug_result) 220 | 221 | logger.debug("Successfully connected to browser: %s", self.browser) 222 | 223 | return self 224 | 225 | async def _bas_hide_call(self, page: Page, javascript_func_code: str) -> Any: 226 | """ 227 | Call a JavaScript function in the BAS _SAFE internal API. 228 | 229 | :param page: The current page. 230 | :param javascript_func_code: The JavaScript function code to execute. 231 | 232 | :raises ValueError: If the self.unique_process_id is not set. 233 | 234 | :return: The result of the JavaScript function call. 235 | """ 236 | 237 | if not self.unique_process_id: 238 | raise ValueError("You should set self.unique_process_id to use BAS_SAFE API") 239 | 240 | return await page.evaluate(javascript_func_code) 241 | 242 | async def _bas_hide_debug(self, page: Union[Page, None] = None) -> Any: 243 | javascript_func_code = f"Object.keys({self._javascript_code})" 244 | if page is None: 245 | page = self.page 246 | 247 | return await self._bas_hide_call(page=page, javascript_func_code=javascript_func_code) 248 | 249 | async def bas_get_page_content(self, page: Union[Page, None] = None) -> Any: 250 | """ 251 | Get the current page content. 252 | 253 | :param page: The current page. 254 | 255 | :raises ValueError: If the self.unique_process_id is not set. 256 | 257 | :return: The current page content. 258 | """ 259 | 260 | if page is None: 261 | page = self.page 262 | 263 | javascript_func_code = f"{self._javascript_code}['BrowserAutomationStudio_GetPageContent']()" 264 | return await self._bas_hide_call(page=page, javascript_func_code=javascript_func_code) 265 | 266 | async def bas_scroll_mouse_to_coordinates(self, x: int, y: int, page: Union[Page, None] = None) -> Any: 267 | """ 268 | Click on the given coordinates. 269 | 270 | :param x: The x coordinate. 271 | :param y: The y coordinate. 272 | :param page: The current page. 273 | 274 | :raises ValueError: If the self.unique_process_id is not set. 275 | """ 276 | 277 | if page is None: 278 | page = self.page 279 | 280 | javascript_func_code = f"{self._javascript_code}['BrowserAutomationStudio_ScrollToCoordinates']({x},{y},true)" 281 | return await self._bas_hide_call(page=page, javascript_func_code=javascript_func_code) 282 | 283 | async def bas_move_mouse_to_elem(self, elem: Locator, page: Union[Page, None] = None) -> Any: 284 | """ 285 | Move the mouse to the given element. 286 | 287 | :param elem: The element to move the mouse to. 288 | :param page: The current page. 289 | 290 | :raises ValueError: If the self.unique_process_id is not set. 291 | 292 | :return: The result of the JavaScript function call. 293 | """ 294 | 295 | if page is None: 296 | page = self.page 297 | 298 | x, y = await _elem_coordinates(elem=elem) 299 | 300 | result = await self.bas_scroll_mouse_to_coordinates(x=x, y=y, page=page) 301 | logger.debug("Scrolled to coordinates: %s", result) 302 | return result 303 | -------------------------------------------------------------------------------- /pybas_automation/browser_automator/cdp_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | CDPClient is a wrapper around the Chrome DevTools Protocol (CDP) that allows sending commands to the browser. 3 | """ 4 | import json 5 | from typing import Any, Dict, Optional 6 | 7 | import websockets 8 | 9 | from pybas_automation.browser_automator.models import WsUrlModel 10 | from pybas_automation.utils import get_logger 11 | 12 | logger = get_logger() 13 | 14 | 15 | class CDPClient: 16 | """CDPClient is a wrapper around the Chrome DevTools Protocol (CDP) that allows sending commands to the browser.""" 17 | 18 | ws_endpoint: WsUrlModel 19 | message_id: int 20 | 21 | def __init__(self, ws_endpoint: WsUrlModel): 22 | """ 23 | Initialize CDPClient. 24 | :param ws_endpoint: The WebSocket endpoint URL. 25 | """ 26 | 27 | self.ws_endpoint = ws_endpoint 28 | self.message_id = 0 29 | 30 | async def send_command(self, method: str, params: Optional[Dict[str, Any]] = None) -> Dict: 31 | """ 32 | Send a command to the browser via CDP. 33 | 34 | :param method: The CDP method to call. 35 | :param params: The parameters to pass to the CDP method. 36 | """ 37 | url = self.ws_endpoint.ws_url.unicode_string() 38 | 39 | async with websockets.connect(url) as ws: # type: ignore 40 | self.message_id += 1 41 | message = { 42 | "id": self.message_id, 43 | "method": method, 44 | "params": params or {}, 45 | } 46 | 47 | logger.debug("Sending message: %s", message) 48 | 49 | await ws.send(json.dumps(message)) 50 | 51 | # Wait for the response 52 | response = await ws.recv() 53 | if response is None: 54 | raise ValueError("Unable to fetch response") 55 | 56 | data = json.loads(response) 57 | logger.debug("Received message: %s", data) 58 | 59 | if not data.get("result"): 60 | raise ValueError(f"Unable to fetch result: {data}") 61 | 62 | return dict(data.get("result")) 63 | -------------------------------------------------------------------------------- /pybas_automation/browser_automator/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Models for the browser_automator module. 3 | """ 4 | 5 | from typing import Annotated 6 | 7 | from pydantic import BaseModel, UrlConstraints 8 | from pydantic_core import Url 9 | 10 | WebsocketUrl = Annotated[Url, UrlConstraints(allowed_schemes=["ws"])] 11 | 12 | 13 | class WsUrlModel(BaseModel): 14 | """WsUrlModel is a model for a WebSocket URL.""" 15 | 16 | ws_url: WebsocketUrl 17 | -------------------------------------------------------------------------------- /pybas_automation/browser_profile/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `browser_profile` module. 3 | 4 | This module is responsible for managing browser profiles. It provides functionalities to create, store, 5 | and load browser profiles. The profiles can be customized with different settings like fingerprints and proxies. 6 | """ 7 | 8 | from .models import BrowserProfile 9 | from .storage import BrowserProfileStorage 10 | 11 | __all__ = ["BrowserProfile", "BrowserProfileStorage"] 12 | -------------------------------------------------------------------------------- /pybas_automation/browser_profile/models.py: -------------------------------------------------------------------------------- 1 | """Browser profile models.""" 2 | 3 | import json 4 | from typing import Union 5 | 6 | from pydantic import BaseModel, DirectoryPath, Field 7 | 8 | from pybas_automation import STORAGE_SUBDIR, default_model_config 9 | from pybas_automation.bas_actions.browser.proxy import BasActionBrowserProxy 10 | from pybas_automation.browser_profile.settings import _proxy_filename, _user_data_dir_default_factory 11 | 12 | 13 | class BrowserProfile(BaseModel): 14 | """Represents a browser profile with customizable settings.""" 15 | 16 | model_config = default_model_config 17 | 18 | profile_dir: DirectoryPath = Field(default_factory=_user_data_dir_default_factory) 19 | fingerprint_raw: Union[str, None] = Field(default=None) 20 | proxy: Union[BasActionBrowserProxy, None] = Field(default=None) 21 | 22 | def save_proxy_to_profile(self) -> bool: 23 | """ 24 | Save the proxy to the profile directory. 25 | 26 | :return: True if the proxy was saved successfully, False otherwise. 27 | """ 28 | 29 | if self.proxy is None: 30 | return False 31 | 32 | bas_proxy = BasActionBrowserProxy( 33 | server=self.proxy.server, 34 | port=self.proxy.port, 35 | type=self.proxy.type, 36 | login=self.proxy.login, 37 | password=self.proxy.password, 38 | ) 39 | 40 | sub_dir = self.profile_dir.joinpath(STORAGE_SUBDIR) 41 | sub_dir.mkdir(parents=True, exist_ok=True) 42 | 43 | proxy_filename = sub_dir.joinpath(_proxy_filename) 44 | proxy_filename.open("w", encoding="utf-8").write(json.dumps(bas_proxy.model_dump(mode="json"))) 45 | 46 | return True 47 | -------------------------------------------------------------------------------- /pybas_automation/browser_profile/proxy.py: -------------------------------------------------------------------------------- 1 | """Browser profile proxy module.""" 2 | 3 | from typing import Dict 4 | 5 | import httpx 6 | 7 | from pybas_automation.bas_actions.browser.proxy.models import BasActionBrowserProxy, BasActionBrowserProxyTypeEnum 8 | 9 | 10 | class ExternalIPRequestException(Exception): 11 | """Raised when an error occurs while requesting the external IP address.""" 12 | 13 | 14 | def get_external_info_ip(bas_proxy: BasActionBrowserProxy) -> Dict: 15 | """Get the external IP address.""" 16 | 17 | proxy_str = f"{bas_proxy.server}:{bas_proxy.port}" 18 | 19 | if bas_proxy.login and bas_proxy.password: 20 | proxy_str = f"{bas_proxy.login}:{bas_proxy.password}@{proxy_str}" 21 | if bas_proxy.type == BasActionBrowserProxyTypeEnum.HTTP: 22 | proxies = f"http://{proxy_str}" 23 | else: 24 | proxies = f"socks5://{proxy_str}" 25 | 26 | try: 27 | response = httpx.get(url="https://lumtest.com/myip.json", proxies=proxies, timeout=10) 28 | except Exception as exc: 29 | raise ExternalIPRequestException("Failed to get external IP.") from exc 30 | 31 | if response.status_code != 200: 32 | raise ExternalIPRequestException(f"Failed to get external IP: {response.text}") 33 | 34 | try: 35 | response_json_data = response.json() 36 | except Exception as exc: 37 | raise ExternalIPRequestException("Failed to get external IP.") from exc 38 | 39 | return dict(response_json_data) 40 | -------------------------------------------------------------------------------- /pybas_automation/browser_profile/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Settings for the browser profile. 3 | """ 4 | 5 | import tempfile 6 | 7 | from pydantic import DirectoryPath, FilePath 8 | 9 | from pybas_automation.utils import create_storage_dir_in_app_data 10 | 11 | _storage_dir = DirectoryPath("PyBASProfiles") 12 | _fingerprint_raw_filename = FilePath("fingerprint_raw.json") 13 | _proxy_filename = FilePath("proxy.json") 14 | 15 | _filelock_filename = FilePath("tasks.lock") 16 | 17 | 18 | def _user_data_dir_default_factory() -> DirectoryPath: 19 | """Return the default user data directory.""" 20 | 21 | profiles_dir = create_storage_dir_in_app_data(storage_dir=_storage_dir) 22 | 23 | if not profiles_dir.exists(): 24 | profiles_dir.mkdir() 25 | 26 | profile_name = tempfile.mkdtemp(dir=profiles_dir) 27 | return DirectoryPath(profile_name) 28 | -------------------------------------------------------------------------------- /pybas_automation/browser_profile/storage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Browser storage module. 3 | 4 | This module is responsible for storing browser profiles to disk and loading metadata of browser profiles from disk 5 | into memory. 6 | """ 7 | import json 8 | import os 9 | import tempfile 10 | from typing import List, Union 11 | 12 | import filelock 13 | from fastapi.encoders import jsonable_encoder 14 | from pydantic import DirectoryPath 15 | 16 | from pybas_automation import STORAGE_SUBDIR 17 | from pybas_automation.bas_actions.browser.proxy import BasActionBrowserProxy 18 | from pybas_automation.browser_profile.models import BrowserProfile 19 | from pybas_automation.browser_profile.settings import (_filelock_filename, _fingerprint_raw_filename, _proxy_filename, 20 | _storage_dir) 21 | from pybas_automation.fingerprint import BasFingerprintRequest, get_fingerprint 22 | from pybas_automation.utils import create_storage_dir_in_app_data, get_logger 23 | 24 | logger = get_logger() 25 | 26 | 27 | class BrowserProfileStorageExistsError(Exception): 28 | """Raised when a browser profile already exists in the storage.""" 29 | 30 | 31 | class FingerprintError(Exception): 32 | """Raised when a fingerprint key is empty.""" 33 | 34 | 35 | class BrowserProfileStorage: 36 | """Handles the storage and retrieval of browser profiles.""" 37 | 38 | storage_dir: DirectoryPath 39 | fingerprint_key: Union[str, None] 40 | 41 | _profiles: Union[list[BrowserProfile], None] = None 42 | _lock: filelock.FileLock 43 | 44 | def __init__( 45 | self, storage_dir: Union[DirectoryPath, None] = None, fingerprint_key: Union[str, None] = None 46 | ) -> None: 47 | """ 48 | Initialize BrowserStorage. 49 | 50 | :param storage_dir: The directory to store the browser profiles. 51 | :param fingerprint_key: Your personal fingerprint key of FingerprintSwitcher. 52 | 53 | :raises ValueError: If the storage_dir is not a directory. 54 | """ 55 | 56 | if storage_dir is None: 57 | self.storage_dir = create_storage_dir_in_app_data(storage_dir=_storage_dir) 58 | else: 59 | if not os.path.isdir(storage_dir): 60 | raise ValueError(f"storage_dir is not a directory: {storage_dir}") 61 | self.storage_dir = DirectoryPath(storage_dir) 62 | 63 | self.fingerprint_key = fingerprint_key 64 | self._lock = filelock.FileLock(os.path.join(self.storage_dir, _filelock_filename)) 65 | 66 | def count(self) -> int: 67 | """ 68 | Count the number of browser profiles in the storage. 69 | 70 | :return: The number of browser profiles in the storage. 71 | """ 72 | 73 | return len(os.listdir(self.storage_dir)) 74 | 75 | def new(self, profile_name: Union[str, None] = None, fingerprint_raw: Union[str, None] = None) -> BrowserProfile: 76 | """ 77 | Create a new browser profile. 78 | 79 | :param profile_name: The name of the browser profile. 80 | :param fingerprint_raw: The fingerprint raw string. 81 | 82 | :return: BrowserProfile instance. 83 | 84 | :raises FingerprintKeyEmptyError: If the fingerprint key is empty. 85 | """ 86 | 87 | if self.fingerprint_key is None and fingerprint_raw is None: 88 | raise FingerprintError("fingerprint_key is required.") 89 | 90 | if fingerprint_raw is not None and self.fingerprint_key is not None: 91 | raise FingerprintError("fingerprint_key and fingerprint_raw cannot be used together.") 92 | 93 | if profile_name is None: 94 | profile_dir = DirectoryPath(tempfile.mkdtemp(dir=str(self.storage_dir))) 95 | else: 96 | profile_dir = self.storage_dir.joinpath(profile_name) 97 | if profile_dir.exists(): 98 | raise BrowserProfileStorageExistsError(f"Browser profile already exists: {profile_dir}") 99 | profile_dir.mkdir(parents=False) 100 | 101 | browser_profile = BrowserProfile(profile_dir=profile_dir) 102 | 103 | if fingerprint_raw is None: 104 | if self.fingerprint_key is None: # is this dead code? 105 | raise FingerprintError("fingerprint_key is required.") 106 | 107 | request_data = BasFingerprintRequest(key=self.fingerprint_key) 108 | fingerprint_raw = get_fingerprint(request_data) 109 | 110 | browser_profile.fingerprint_raw = fingerprint_raw 111 | 112 | self.save(browser_profile=browser_profile) 113 | 114 | return browser_profile 115 | 116 | def save(self, browser_profile: BrowserProfile) -> None: 117 | """ 118 | Save the browser profile to disk. 119 | 120 | :param browser_profile: BrowserProfile instance. 121 | :return: None. 122 | """ 123 | 124 | sub_dir = browser_profile.profile_dir.joinpath(STORAGE_SUBDIR) 125 | sub_dir.mkdir(parents=True, exist_ok=True) 126 | 127 | fingerprint_filename = sub_dir.joinpath(_fingerprint_raw_filename) 128 | proxy_filename = sub_dir.joinpath(_proxy_filename) 129 | 130 | if browser_profile.fingerprint_raw is not None: 131 | fingerprint_filename.open("w", encoding="utf-8").write(browser_profile.fingerprint_raw) 132 | 133 | if browser_profile.proxy is not None: 134 | proxy_filename = sub_dir.joinpath(proxy_filename) 135 | proxy_filename.open("w", encoding="utf-8").write(json.dumps(jsonable_encoder(browser_profile.proxy))) 136 | 137 | def load(self, profile_name: str) -> BrowserProfile: 138 | """ 139 | Load a browser profile from disk. 140 | 141 | :param profile_name: The name of the browser profile. 142 | :return: BrowserProfile instance. 143 | """ 144 | profile_dir = self.storage_dir.joinpath(profile_name) 145 | 146 | if not profile_dir.exists(): 147 | raise FileNotFoundError(f"Browser profile not found: {profile_dir}") 148 | if not profile_dir.is_dir(): 149 | raise ValueError(f"Browser profile is not a directory: {profile_dir}") 150 | 151 | browser_profile = BrowserProfile(profile_dir=profile_dir) 152 | 153 | sub_dir = profile_dir.joinpath(STORAGE_SUBDIR) 154 | 155 | fingerprint_filename = sub_dir.joinpath(_fingerprint_raw_filename) 156 | if fingerprint_filename.exists(): 157 | fingerprint_raw = fingerprint_filename.open("r", encoding="utf-8").read() 158 | browser_profile.fingerprint_raw = fingerprint_raw 159 | 160 | proxy_filename = sub_dir.joinpath(_proxy_filename) 161 | if proxy_filename.exists(): 162 | _proxy = json.loads(proxy_filename.open("r", encoding="utf-8").read()) 163 | browser_profile.proxy = BasActionBrowserProxy(**_proxy) 164 | 165 | return browser_profile 166 | 167 | def load_all(self) -> List[BrowserProfile]: 168 | """ 169 | Load all browser profiles from disk. 170 | 171 | :return: List[BrowserProfile]. 172 | """ 173 | if self._profiles is None: 174 | self._profiles = [] 175 | 176 | for profile_name in os.listdir(self.storage_dir): 177 | browser_profile = self.load(profile_name=profile_name) 178 | self._profiles.append(browser_profile) 179 | 180 | return self._profiles 181 | -------------------------------------------------------------------------------- /pybas_automation/fingerprint/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for interacting with the BAS fingerprint API.""" 2 | 3 | from .fingerprint import FingerprintRequestException, get_fingerprint 4 | from .models import BasFingerprintRequest 5 | 6 | __all__ = [ 7 | "BasFingerprintRequest", 8 | "FingerprintRequestException", 9 | "get_fingerprint", 10 | ] 11 | -------------------------------------------------------------------------------- /pybas_automation/fingerprint/fingerprint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for getting fingerprints from the fingerprint API. 3 | """ 4 | 5 | from urllib.parse import urlencode 6 | 7 | import httpx 8 | 9 | from pybas_automation.fingerprint.models import BasFingerprintRequest 10 | 11 | FINGERPRINT_BASE_URL = "https://fingerprints.bablosoft.com/prepare?version=5" 12 | 13 | 14 | class FingerprintRequestException(Exception): 15 | """Raised when a fingerprint request fails.""" 16 | 17 | 18 | def get_fingerprint( 19 | request_data: BasFingerprintRequest, 20 | ) -> str: 21 | """Get a fingerprint for the given fingerprint key.""" 22 | 23 | json_data = dict(request_data.model_dump()) 24 | 25 | json_data["tags"] = ",".join(request_data.tags) 26 | json_data["returnpc"] = "true" 27 | 28 | url = f"{FINGERPRINT_BASE_URL}&{urlencode(json_data)}" 29 | 30 | response = httpx.get(url, timeout=10) 31 | if response.status_code != 200: 32 | raise FingerprintRequestException(f"Failed to get fingerprint: {response.text}") 33 | 34 | response_json_data = response.json() 35 | if not response_json_data.get("valid"): 36 | raise FingerprintRequestException(f"Failed to get fingerprint: {response_json_data}") 37 | 38 | return response.text.strip() 39 | -------------------------------------------------------------------------------- /pybas_automation/fingerprint/models.py: -------------------------------------------------------------------------------- 1 | """Models for the BAS fingerprint API.""" 2 | 3 | from typing import List 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from pybas_automation import default_model_config 8 | 9 | 10 | def tags_default_factory() -> List[str]: 11 | """Return the default tags for a fingerprint request.""" 12 | 13 | return ["Microsoft Windows", "Chrome"] 14 | 15 | 16 | class BasFingerprintRequest(BaseModel): 17 | """BasFingerprintRequest is used to request a fingerprint from the BAS fingerprint API.""" 18 | 19 | model_config = default_model_config 20 | 21 | key: str = Field(min_length=64, max_length=64) 22 | tags: List[str] = Field(default_factory=tags_default_factory) 23 | min_browser_version: int = Field(default=117, ge=117) 24 | min_width: int = Field(default=1366) 25 | min_height: int = Field(default=768) 26 | max_width: int = Field(default=1920) 27 | max_height: int = Field(default=1080) 28 | -------------------------------------------------------------------------------- /pybas_automation/proxy_providers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/pybas_automation/proxy_providers/__init__.py -------------------------------------------------------------------------------- /pybas_automation/proxy_providers/brightdata/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This package contains the BrightData proxy provider. 3 | """ 4 | 5 | from .models import BrightdataCredentialsModel, BrightDataProxyModel 6 | 7 | __all__ = ["BrightdataCredentialsModel", "BrightDataProxyModel"] 8 | -------------------------------------------------------------------------------- /pybas_automation/proxy_providers/brightdata/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | BrightData proxy provider models 3 | """ 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from pybas_automation import default_model_config 8 | from pybas_automation.bas_actions.browser.proxy import BasActionBrowserProxy, BasActionBrowserProxyTypeEnum 9 | from pybas_automation.utils import random_string 10 | 11 | 12 | class BrightdataCredentialsModel(BaseModel): 13 | """Brightdata credentials model.""" 14 | 15 | model_config = default_model_config 16 | 17 | username: str 18 | password: str 19 | 20 | 21 | class BrightDataProxyModel(BaseModel): 22 | """BrightData proxy model.""" 23 | 24 | model_config = default_model_config 25 | 26 | hostname: str = Field(default="brd.superproxy.io") 27 | port: int = Field(default=22225) 28 | credentials: BrightdataCredentialsModel 29 | 30 | def to_bas_proxy(self, keep_session: bool = True) -> BasActionBrowserProxy: 31 | """ 32 | Convert to BasActionBrowserProxy model. 33 | 34 | :param keep_session: If True, the proxy will be used with the same session to avoid ip changes. 35 | """ 36 | 37 | login = self.credentials.username 38 | if keep_session: 39 | login = f"{login}-session-{random_string(10)}" 40 | 41 | return BasActionBrowserProxy( 42 | server=self.hostname, 43 | port=self.port, 44 | type=BasActionBrowserProxyTypeEnum.HTTP, 45 | login=login, 46 | password=self.credentials.password, 47 | ) 48 | -------------------------------------------------------------------------------- /pybas_automation/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Settings for pybas. 3 | """ 4 | 5 | from pydantic import DirectoryPath 6 | 7 | STORAGE_SUBDIR = DirectoryPath(".pybas") 8 | -------------------------------------------------------------------------------- /pybas_automation/task/__init__.py: -------------------------------------------------------------------------------- 1 | """Task module for interacting with BAS actions.""" 2 | 3 | from .models import BasTask 4 | from .storage import TaskDuplicateError, TaskStorage, TaskStorageModeEnum 5 | 6 | __all__ = ["BasTask", "TaskDuplicateError", "TaskStorage", "TaskStorageModeEnum"] 7 | -------------------------------------------------------------------------------- /pybas_automation/task/models.py: -------------------------------------------------------------------------------- 1 | """Module for the BasTask model.""" 2 | 3 | from typing import Union 4 | from uuid import UUID, uuid4 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | from pybas_automation import default_model_config 9 | from pybas_automation.bas_actions.browser.browser_settings.models import BasActionBrowserSettings 10 | 11 | 12 | class BasTask(BaseModel): 13 | """ 14 | Represents a task for BAS (Browser Automation Studio). 15 | 16 | This model holds all the essential details required to execute 17 | a task through the BAS GUI. 18 | """ 19 | 20 | model_config = default_model_config 21 | # Unique identifier for the task 22 | task_id: UUID = Field(default_factory=uuid4) 23 | # Port number, updated when task is invoked by a BAS compiled script 24 | remote_debugging_port: Union[int, None] = None 25 | 26 | # Unique process ID, updated when task is invoked by a BAS compiled script 27 | unique_process_id: Union[str, None] = None 28 | 29 | # Browser settings associated with the task 30 | browser_settings: BasActionBrowserSettings = Field(default_factory=BasActionBrowserSettings) 31 | -------------------------------------------------------------------------------- /pybas_automation/task/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Settings for the task module. 3 | """ 4 | 5 | 6 | from pydantic import DirectoryPath, FilePath 7 | 8 | _storage_dir = DirectoryPath("PyBASProfilesTasks") 9 | 10 | _task_filename = FilePath("tasks.json") 11 | _filelock_filename = FilePath("tasks.lock") 12 | -------------------------------------------------------------------------------- /pybas_automation/task/storage.py: -------------------------------------------------------------------------------- 1 | """Task storage module. This module is responsible for storing tasks to disk and loading tasks from disk into memory.""" 2 | 3 | import json 4 | import os 5 | from enum import Enum 6 | from typing import Set, Union 7 | from uuid import UUID 8 | 9 | import filelock 10 | from fastapi.encoders import jsonable_encoder 11 | from pydantic import DirectoryPath, FilePath 12 | 13 | from pybas_automation.task.models import BasTask 14 | from pybas_automation.task.settings import _filelock_filename, _storage_dir, _task_filename 15 | from pybas_automation.utils import create_storage_dir_in_app_data, get_logger 16 | 17 | logger = get_logger() 18 | 19 | 20 | class TaskStorageModeEnum(str, Enum): 21 | """Task storage is used to specify the mode to open the tasks file in.""" 22 | 23 | READ = "r" 24 | READ_WRITE = "rw" 25 | 26 | 27 | class TaskDuplicateError(Exception): 28 | """Raised when a task already exists in the storage.""" 29 | 30 | 31 | class TaskStorage: 32 | """TaskStorage is responsible for storing tasks to disk and loading tasks from disk into memory.""" 33 | 34 | storage_dir: DirectoryPath 35 | mode: TaskStorageModeEnum = TaskStorageModeEnum.READ 36 | task_file_path: FilePath 37 | 38 | _tasks: Union[list[BasTask], None] = None 39 | _tasks_unique_id: Set[UUID] 40 | _lock: filelock.FileLock 41 | 42 | def __init__( 43 | self, 44 | storage_dir: Union[None, DirectoryPath] = None, 45 | task_filename: Union[None, FilePath] = None, 46 | mode: Union[TaskStorageModeEnum, None] = None, 47 | ) -> None: 48 | """ 49 | Initialize TaskStorage. If the storage_dir is not provided, the default storage directory will be used. 50 | 51 | :returns: None 52 | 53 | :param storage_dir: The directory to store the tasks file. 54 | :param task_filename: The filename of the tasks file. 55 | :param mode: The mode to open the tasks file in. Defaults to read-only. 56 | 57 | :raises ValueError: If the storage_dir is not a directory. If the mode is not a valid value. 58 | """ 59 | 60 | if storage_dir is None: 61 | self.storage_dir = create_storage_dir_in_app_data(storage_dir=_storage_dir) 62 | else: 63 | if not os.path.isdir(storage_dir): 64 | raise ValueError(f"storage_dir is not a directory: {storage_dir}") 65 | self.storage_dir = DirectoryPath(storage_dir) 66 | 67 | if task_filename is None: 68 | self.task_file_path = FilePath(os.path.join(self.storage_dir, _task_filename)) 69 | else: 70 | task_filename = FilePath(task_filename) 71 | if task_filename.parent.__str__() != ".": 72 | raise ValueError(f"task_filename is not a relative path: {task_filename}") 73 | 74 | self.task_file_path = FilePath(os.path.join(self.storage_dir, task_filename)) 75 | 76 | # Set the mode of the task storage 77 | match mode: 78 | case None: 79 | self.mode = TaskStorageModeEnum.READ 80 | case TaskStorageModeEnum.READ: 81 | self.mode = TaskStorageModeEnum.READ 82 | case TaskStorageModeEnum.READ_WRITE: 83 | self.mode = TaskStorageModeEnum.READ_WRITE 84 | case _: 85 | raise ValueError(f"mode is not a valid value: {mode}") 86 | 87 | self._tasks_unique_id = set() 88 | self._lock = filelock.FileLock(os.path.join(self.storage_dir, _filelock_filename)) 89 | 90 | self.load_all() 91 | 92 | def __repr__(self) -> str: 93 | """Return a string representation of the TaskStorage.""" 94 | return f"" 95 | 96 | def clear(self) -> bool: 97 | """ 98 | Clear all tasks from the storage. This will also delete the tasks file and clear the tasks in memory. 99 | 100 | :return: True if the tasks were cleared, False otherwise. 101 | 102 | :raises ValueError: If the task storage is in read-only mode. 103 | """ 104 | if self.mode == TaskStorageModeEnum.READ: 105 | raise ValueError("Cannot clear tasks in read mode.") 106 | if self._lock is None: 107 | raise ValueError("Lock is not initialized.") 108 | 109 | with self._lock: 110 | self._tasks = None 111 | self._tasks_unique_id = set() 112 | if os.path.exists(self.task_file_path): 113 | self.task_file_path.unlink() 114 | return True 115 | 116 | return False 117 | 118 | def save(self, task: BasTask) -> None: 119 | """ 120 | Save a task to the storage. 121 | 122 | :return: None 123 | 124 | :param task: The task to save. 125 | 126 | :raises ValueError: If the task storage is in read-only mode or if the task already exists. 127 | """ 128 | 129 | if self.mode == TaskStorageModeEnum.READ: 130 | raise ValueError("Cannot store tasks in read mode.") 131 | if self._lock is None: 132 | raise ValueError("Lock is not initialized.") 133 | 134 | with self._lock: 135 | if self._tasks is None: 136 | self._tasks = [] 137 | if task.task_id in self._tasks_unique_id: 138 | raise TaskDuplicateError(f"Task with id {task.task_id} already exists.") 139 | 140 | self._tasks.append(task) 141 | self._tasks_unique_id.add(task.task_id) 142 | 143 | _tasks = jsonable_encoder(self._tasks) 144 | 145 | with self.task_file_path.open(mode="w", encoding="utf-8") as f: 146 | json.dump(_tasks, f, indent=4) 147 | 148 | def update(self, task: BasTask) -> None: 149 | if self.mode == TaskStorageModeEnum.READ: 150 | raise ValueError("Cannot store tasks in read mode.") 151 | if self._lock is None: 152 | raise ValueError("Lock is not initialized.") 153 | if self._tasks is None: 154 | raise ValueError("No tasks to update.") 155 | 156 | with self._lock: 157 | if task.task_id not in self._tasks_unique_id: 158 | raise ValueError(f"Task with id {task.task_id} does not exist.") 159 | found = False 160 | 161 | for num, t in enumerate(self._tasks): 162 | if t.task_id == task.task_id: 163 | found = True 164 | self._tasks[num] = task 165 | break 166 | if not found: 167 | raise ValueError(f"Task with id {task.task_id} does not exist.") 168 | 169 | _tasks = jsonable_encoder(self._tasks) 170 | 171 | with self.task_file_path.open(mode="w", encoding="utf-8") as f: 172 | json.dump(_tasks, f, indent=4) 173 | 174 | def save_all(self) -> bool: 175 | """ 176 | Save all tasks to the storage. 177 | 178 | :return: True if the tasks were saved, False otherwise. 179 | 180 | :raises ValueError: If the task storage is in read-only mode. 181 | """ 182 | if self.mode == TaskStorageModeEnum.READ: 183 | raise ValueError("Cannot store tasks in read mode.") 184 | 185 | if self._lock is False: 186 | raise ValueError("Lock is not initialized.") 187 | 188 | if self._tasks is None: 189 | raise ValueError("No tasks to save.") 190 | 191 | if self._lock is None: 192 | raise ValueError("Lock is not initialized.") 193 | 194 | with self._lock: 195 | with self.task_file_path.open(mode="w", encoding="utf-8") as f: 196 | json.dump([t.model_dump(mode="json") for t in self._tasks], f, indent=4) 197 | 198 | return True 199 | 200 | def get(self, task_id: UUID) -> Union[BasTask, None]: 201 | """ 202 | Get a task from the storage. 203 | 204 | :param task_id: The task id to get. 205 | 206 | :return: The task if it exists, False otherwise. 207 | """ 208 | if self._tasks is None: 209 | return None 210 | 211 | for task in self._tasks: 212 | if task.task_id == task_id or str(task.task_id) == task_id: 213 | return task 214 | 215 | return None 216 | 217 | def get_all(self) -> Union[list[BasTask], None]: 218 | """ 219 | Get all tasks from the storage. 220 | 221 | :return: A list of tasks if they exist, None otherwise. 222 | """ 223 | if self._tasks is None: 224 | return None 225 | return self._tasks 226 | 227 | def count(self) -> int: 228 | """ 229 | Get the number of tasks in the storage. 230 | 231 | :return:int The number of tasks in the storage. 232 | """ 233 | if self._tasks is None: 234 | return 0 235 | return len(self._tasks) 236 | 237 | def load_all(self) -> bool: 238 | """ 239 | Load all tasks from the storage into memory. 240 | 241 | :return: True if the tasks were loaded, False otherwise. 242 | 243 | :raises ValueError: If the task storage is in read-only mode. 244 | """ 245 | 246 | # Check if the task file exists. 247 | if not os.path.exists(self.task_file_path): 248 | return False 249 | 250 | # Ensure the lock has been initialized. 251 | if self._lock is None: 252 | raise ValueError("Lock is not initialized.") 253 | 254 | with self._lock: # Acquire the lock. 255 | with self.task_file_path.open(mode="r", encoding="utf-8") as f: 256 | tasks_from_file = json.load(f) 257 | 258 | # Clear existing tasks in memory. 259 | self._tasks = [] 260 | self._tasks_unique_id = set() 261 | 262 | # Populate tasks from the file into memory. 263 | for task_data in tasks_from_file: 264 | task = BasTask(**task_data) 265 | self._tasks.append(task) 266 | self._tasks_unique_id.add(task.task_id) 267 | 268 | return True 269 | -------------------------------------------------------------------------------- /pybas_automation/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Collection of utility functions.""" 2 | 3 | from .filesystem import create_storage_dir_in_app_data 4 | from .logger import get_logger 5 | from .utils import random_string, timing 6 | 7 | __all__ = ["create_storage_dir_in_app_data", "get_logger", "timing", "random_string"] 8 | -------------------------------------------------------------------------------- /pybas_automation/utils/filesystem.py: -------------------------------------------------------------------------------- 1 | """ 2 | Filesystem utilities. 3 | """ 4 | 5 | import os 6 | 7 | from pydantic import DirectoryPath 8 | 9 | 10 | def create_storage_dir_in_app_data(storage_dir: DirectoryPath) -> DirectoryPath: 11 | """ 12 | Create a storage directory in the local app data directory. 13 | 14 | :param storage_dir: The name of the storage directory. 15 | :return: Directory path to the created storage directory in the local app data directory. 16 | """ 17 | 18 | _env_path = os.getenv("LOCALAPPDATA") 19 | if not _env_path: 20 | raise ValueError("Environment variable LOCALAPPDATA is not set.") 21 | 22 | local_app_data_path = DirectoryPath(_env_path) 23 | 24 | if not local_app_data_path.is_dir() or not local_app_data_path.exists(): 25 | raise ValueError("Cannot find local app data path.") 26 | 27 | storage_dir = local_app_data_path.joinpath(storage_dir) 28 | if not storage_dir.exists(): 29 | os.mkdir(storage_dir) 30 | 31 | return DirectoryPath(storage_dir) 32 | -------------------------------------------------------------------------------- /pybas_automation/utils/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Return a logger named based on the caller's full module path. 3 | """ 4 | 5 | import inspect 6 | import logging 7 | 8 | 9 | def get_logger() -> logging.Logger: 10 | """ 11 | Return a logger named based on the caller's full module path. 12 | Format: "[]" 13 | """ 14 | 15 | # Get the module name of the caller. 16 | frame = inspect.stack()[1] 17 | module = inspect.getmodule(frame[0]) 18 | 19 | if module is None: 20 | raise ValueError("Could not determine the caller's module.") 21 | 22 | logger_name = f"[{module.__name__}]" 23 | 24 | logger = logging.getLogger(logger_name) 25 | # Optional: Set the logging level and formatter, or any other logger configuration you want here. 26 | 27 | return logger 28 | -------------------------------------------------------------------------------- /pybas_automation/utils/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for the application. 3 | """ 4 | import random 5 | import string 6 | from functools import wraps 7 | from time import time 8 | 9 | from pybas_automation.utils.logger import get_logger 10 | 11 | logger = get_logger() 12 | 13 | 14 | def timing(f): # type: ignore 15 | """Time functions for debugging purposes.""" 16 | 17 | @wraps(f) 18 | def wrap(*args, **kw): # type: ignore 19 | ts = time() 20 | result = f(*args, **kw) 21 | te = time() 22 | # pylint: disable=logging-fstring-interpolation 23 | logger.debug(f"func:{f.__name__} args:[{args}, {kw}] took: {te - ts:.4f} sec") 24 | 25 | return result 26 | 27 | return wrap 28 | 29 | 30 | def random_string(length: int = 10) -> str: 31 | """ 32 | Generate a random string. 33 | 34 | :param length: The length of the string. 35 | """ 36 | 37 | return "".join(random.choices(string.ascii_lowercase + string.digits, k=length)) 38 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pybas-automation" 3 | version = "0.1.16" 4 | description = "A Python library for automating BrowserAutomationStudio (BAS) using headless Chromium browsers and Windows GUI program." 5 | authors = ["sergerdn <64213648+sergerdn@users.noreply.github.com>"] 6 | homepage = "https://github.com/sergerdn/py-bas-automation" 7 | repository = "https://github.com/sergerdn/py-bas-automation" 8 | keywords = [ 9 | "headless Chromium", "Python browser automation", "browser fingerprint protection", 10 | "prevent browser fingerprinting" 11 | ] 12 | classifiers = [ 13 | "Topic :: Internet :: WWW/HTTP :: Browsers", 14 | "Topic :: Software Development :: Libraries :: Python Modules" 15 | ] 16 | readme = ".pypi/README.md" 17 | license = "MIT" 18 | include = ["CHANGELOG.md"] 19 | 20 | [tool.commitizen] 21 | name = "cz_conventional_commits" 22 | version = "0.1.16" 23 | version_files = [ 24 | "pyproject.toml:version", 25 | ] 26 | 27 | [tool.poetry.dependencies] 28 | python = "^3.11" 29 | httpx = { extras = ["socks"], version = "^0.25.1" } 30 | pydantic = "^2.5.0" 31 | python-dotenv = "^1.0.0" 32 | websockets = "^12.0" 33 | filelock = "^3.13.1" 34 | fastapi = "^0.104.1" 35 | 36 | [tool.poetry.group.dev.dependencies] 37 | pytest = "^7.4.3" 38 | pytest-vcr = "^1.0.2" 39 | pycountry = "^23.12.11" 40 | black = "^23.11.0" 41 | mypy = "^1.7.0" 42 | isort = "^5.12.0" 43 | flake8 = "^6.1.0" 44 | pytest-cov = "^4.1.0" 45 | pytest-asyncio = "^0.21.1" 46 | pydocstyle = "^6.3.0" 47 | pylint = { extras = ["spelling"], version = "^3.0.2" } 48 | pylint-pydantic = "^0.3.0" 49 | autopep8 = "^2.0.4" 50 | pydeps = "^1.12.17" 51 | commitizen = "^3.12.0" 52 | nest-asyncio = "^1.5.8" 53 | 54 | [tool.poetry.group.dev-e2e-windows.dependencies] 55 | psutil = "^5.9.6" 56 | types-psutil = "^5.9.5.17" 57 | pywinauto = "^0.6.8" 58 | 59 | [tool.poetry.group.cmd.dependencies] 60 | click = "^8.1.7" 61 | playwright = {extras = ["chromium"], version = "^1.39.0"} 62 | 63 | [tool.poetry.group.cmd-dev.dependencies] 64 | pytest-playwright = "^0.4.2" 65 | 66 | [tool.poetry.group.docs.dependencies] 67 | humanmark = "^0.5.1" 68 | 69 | [tool.black] 70 | line-length = 120 71 | 72 | [tool.isort] 73 | line_length = 120 74 | py_version = 311 75 | 76 | [tool.mypy] 77 | python_version = "3.11" 78 | warn_return_any = true 79 | warn_unused_configs = true 80 | disallow_untyped_defs = true 81 | disallow_untyped_calls = true 82 | exclude = "tests/contrib" 83 | 84 | [tool.pylint] 85 | max-line-length = 120 86 | 87 | #[tool.poetry.scripts] 88 | #install = "python scripts/update_readme_links.py" 89 | 90 | #[tool.pytest] 91 | #addopts = "--cov=pybas_automation --cov-report html" 92 | 93 | [build-system] 94 | requires = ["poetry-core"] 95 | build-backend = "poetry.core.masonry.api" 96 | -------------------------------------------------------------------------------- /reports/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/reports/.gitkeep -------------------------------------------------------------------------------- /scripts/update_readme_links.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adopted from https://gist.github.com/TkTech/29d8fd346c941c981752595b202ac75d 3 | """ 4 | 5 | import codecs 6 | 7 | import humanmark 8 | from urllib.parse import urljoin 9 | import os 10 | 11 | PROJECT_PATH = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")) 12 | DIST_FOLDER = os.path.join(PROJECT_PATH, "./.pypi") 13 | 14 | 15 | def update_readme_links(readme_path, target_readme_path, base_url, image_base_url): 16 | with codecs.open(readme_path, 'r', encoding='utf-8') as f: 17 | doc = humanmark.load(f) 18 | 19 | print("Readme content has been loaded.") 20 | 21 | links = doc.find( 22 | # Only find link nodes 23 | humanmark.ast.Link, 24 | # We don't want to rewrite links that use a reference 25 | f=lambda l: l.reference is None, 26 | # A negative value means to search the entire tree. 27 | depth=-1 28 | ) 29 | 30 | for link in links: 31 | if link.url.startswith("http://") or link.url.startswith("https://"): 32 | continue 33 | 34 | old_url = str(link.url) 35 | link.url = urljoin(base_url, link.url) 36 | 37 | print(f"Updating link from: {old_url}, to: {link.url}") 38 | 39 | img_links = doc.find( 40 | # Only find link nodes 41 | humanmark.ast.Image, 42 | # We don't want to rewrite links that use a reference 43 | f=lambda l: l.reference is None, 44 | # A negative value means to search the entire tree. 45 | depth=-1 46 | ) 47 | 48 | for link in img_links: 49 | if link.url.startswith("http://") or link.url.startswith("https://"): 50 | continue 51 | 52 | old_url = str(link.url) 53 | link.url = urljoin(image_base_url, link.url) 54 | link.url = link.url.replace("/docs/images/", "/images/") 55 | 56 | print(f"Updating image link from: {old_url}, to: {link.url}") 57 | 58 | with codecs.open(target_readme_path, 'w', encoding='utf-8') as f: 59 | humanmark.dump(doc, f) 60 | 61 | 62 | def main(): 63 | if not os.path.exists(DIST_FOLDER): 64 | os.makedirs(DIST_FOLDER) 65 | 66 | base_url = "https://github.com/sergerdn/py-bas-automation/blob/develop/" 67 | # https://raw.githubusercontent.com/greyli/flask-share/master/images/demo.png 68 | image_base_url = "https://sergerdn.github.io/py-bas-automation/" 69 | 70 | readme_path = os.path.join(os.path.dirname(__file__), "../README.md") 71 | target_readme_path = os.path.join(DIST_FOLDER, "README.md") 72 | update_readme_links(readme_path, target_readme_path, base_url, image_base_url) 73 | 74 | print("README links have been updated.") 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import socket 4 | from contextlib import closing 5 | 6 | from dotenv import load_dotenv 7 | 8 | # Get the absolute path to the root project directory. 9 | ABS_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) 10 | 11 | # Determine the path to the '.env' file within the root project directory. 12 | dotenv_path = os.path.join(ABS_PATH, ".env") 13 | 14 | # Load environment variables from '.env' file if it exists. 15 | if os.path.exists(dotenv_path): 16 | load_dotenv(dotenv_path=dotenv_path) 17 | 18 | # Check if the current operating system is Windows. 19 | is_windows = platform.system().lower() == "windows" 20 | 21 | # Define the path to the fixtures directory used in tests. 22 | FIXTURES_DIR = os.path.join(ABS_PATH, "tests", "fixtures") 23 | 24 | # Ensure the fixtures directory exists; if not, this will raise an AssertionError. 25 | assert os.path.exists(FIXTURES_DIR) 26 | 27 | 28 | def _find_free_port() -> int: 29 | """ 30 | Finds and returns a free port on the local machine. 31 | 32 | This function will bind a new socket to an arbitrary free port and return 33 | the port number before closing the socket. This method is useful to get 34 | a port for temporary use (e.g., for testing purposes). 35 | 36 | :return: An integer representing the free port number. 37 | """ 38 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 39 | s.bind(("", 0)) 40 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 41 | return int(s.getsockname()[1]) 42 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | import time 5 | from typing import Generator, Union 6 | 7 | import pytest 8 | from _pytest.monkeypatch import MonkeyPatch 9 | 10 | 11 | @pytest.fixture(autouse=True) 12 | def _debug_cdp() -> None: 13 | debug_cdp = os.environ.get("DEBUG_TESTS", "false").lower() == "true" 14 | if debug_cdp: 15 | print("Debug cdp enabled") 16 | 17 | # Playwright debug logging 18 | os.environ["DEBUG"] = "pw:protocol" 19 | os.environ["DEBUGP"] = "True" 20 | 21 | 22 | @pytest.fixture(scope="function", autouse=True) 23 | def _mock_app_data_dir() -> Generator[None, None, None]: 24 | """ 25 | Mocks the app data directory for the duration of the tests. 26 | This fixture is automatically applied to every test due to `autouse=True`. 27 | """ 28 | 29 | # Get a MonkeyPatch object to override environment variables and other attributes. 30 | monkeypatch = MonkeyPatch() 31 | 32 | # Create a temporary directory to mock the app data directory. 33 | test_dir = tempfile.mkdtemp(prefix="pybas-mocks_test_") 34 | assert os.path.exists(test_dir) 35 | assert os.path.isdir(test_dir) 36 | 37 | # Override the "LOCALAPPDATA" environment variable to use the temporary directory. 38 | monkeypatch.setenv("LOCALAPPDATA", test_dir) 39 | 40 | try: 41 | yield # run tests which use this fixture 42 | finally: 43 | # Once tests are done, undo the monkey patch and clean up the temporary directory. 44 | monkeypatch.undo() 45 | if os.path.exists(test_dir): 46 | for _x in range(0, 60): 47 | try: 48 | shutil.rmtree(test_dir) 49 | except Exception: # type: ignore 50 | time.sleep(1) 51 | continue 52 | break 53 | 54 | shutil.rmtree(test_dir, ignore_errors=True) 55 | 56 | 57 | @pytest.fixture(scope="module") 58 | def fingerprint_key() -> Union[str, None]: 59 | """ 60 | Returns the fingerprint key from the environment variables. 61 | 62 | :return: Fingerprint key 63 | :raises ValueError: If the fingerprint key is not set 64 | """ 65 | 66 | fingerprint_key = os.environ.get("FINGERPRINT_KEY", None) 67 | if not fingerprint_key: 68 | raise ValueError("FINGERPRINT_KEY not set") 69 | 70 | return fingerprint_key 71 | -------------------------------------------------------------------------------- /tests/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/contrib/__init__.py -------------------------------------------------------------------------------- /tests/contrib/socks5_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/contrib/socks5_server/__init__.py -------------------------------------------------------------------------------- /tests/contrib/socks5_server/server.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import select 3 | import socket 4 | import struct 5 | import sys 6 | import time 7 | from socketserver import BaseRequestHandler, TCPServer, ThreadingMixIn 8 | 9 | logger = logging.getLogger("[socks5-server]") 10 | # Constants 11 | SOCKS_VERSION = 5 12 | CONNECT = 1 13 | RESERVED = 0 14 | FAILURE = 0xFF 15 | USERNAME_PASSWORD_VERSION = 1 16 | CONNECTION_TIMEOUT = 60 * 15 * 1000 17 | COPY_LOOP_BUFFER_SIZE = 4096 18 | BIND_PORT = 0 # set to 0 if we are binding an address, lets the kernel decide a free port 19 | 20 | # Buffer sizes 21 | GREETING_SIZE = 2 22 | AUTH_METHOD_SIZE = 1 23 | VERSION_SIZE = 1 24 | ID_LEN_SIZE = 1 25 | PW_LEN_SIZE = 1 26 | CONN_NO_PORT_SIZE = 4 27 | CONN_PORT_SIZE = 2 28 | DOMAIN_SIZE = 1 29 | 30 | 31 | class AuthMethod: 32 | NoAuth = 0 33 | GSSAPI = 1 34 | UsernamePassword = 2 35 | Invalid = 0xFF 36 | 37 | 38 | class StatusCode: 39 | Success = 0 40 | GeneralFailure = 1 41 | NotAllowed = 2 42 | NetUnreachable = 3 43 | HostUnreachable = 4 44 | ConnRefused = 5 45 | TTLExpired = 6 46 | CommandNotSupported = 7 47 | AddressTypeNotSupported = 8 48 | 49 | 50 | class AddressDataType: 51 | IPv4 = 1 52 | DomainName = 3 53 | IPv6 = 4 54 | 55 | 56 | class SOCKS5ProxyServer(ThreadingMixIn, TCPServer): 57 | """Just the server which will process a dictionary of options and initialise the socket server""" 58 | 59 | def __init__(self, options): 60 | options = options or {} 61 | # Check types, if the options are valid 62 | if "auth" in options: 63 | if not isinstance(options["auth"], tuple): 64 | logger.error("Auth must be a tuple with 2 items (username, password) or not set") 65 | sys.exit() 66 | if len(options["auth"]) != 2: 67 | logger.error("Auth must be a tuple with 2 items (username, password)") 68 | sys.exit() 69 | for item in options["auth"]: 70 | if not isinstance(item, str): 71 | logger.error("Tuple item must be a string (type str)") 72 | sys.exit() 73 | self._auth = options["auth"] 74 | 75 | if "bind_address" in options: 76 | # This should error out if invalid 77 | # This allows us to parse the address given by a user on the start of the server 78 | bind_addr_info = socket.getaddrinfo( 79 | options["bind_address"], 80 | BIND_PORT, 81 | family=socket.AF_UNSPEC, 82 | type=socket.SOCK_STREAM, 83 | flags=socket.AI_PASSIVE, 84 | ) 85 | if len(bind_addr_info) > 0: 86 | self._bind = bind_addr_info[0][4] # Is picking first a good idea? 87 | else: 88 | logger.error("Failed to resolve bind address") 89 | sys.exit() 90 | 91 | port = int(options["port"]) if "port" in options else 1080 92 | host_port_tuple = (options["listen_ip"] if "listen_ip" in options else "0.0.0.0", port) 93 | super().__init__(host_port_tuple, SOCKS5ProxyHandler) 94 | 95 | 96 | class SOCKS5ProxyHandler(BaseRequestHandler): 97 | """The handler used for a request from a client. 98 | Make sure _bind and _auth is set in self.server (like in SOCKS5ProxyServer) if a custom server uses this handler 99 | in order to use username and password authentication or use binding for the request socket 100 | """ 101 | 102 | def handle(self): 103 | logger.info("Accepting connection from %s:%s" % self.client_address) 104 | 105 | # Handle the greeting 106 | # Greeting header 107 | header = self._recv(GREETING_SIZE, self._send_greeting_failure, AuthMethod.Invalid) 108 | version, nmethods = struct.unpack("!BB", header) 109 | # Only accept SOCKS5 110 | if version != SOCKS_VERSION: 111 | self._send_greeting_failure(self.auth_method) 112 | # We need at least one method 113 | if nmethods < 1: 114 | self._send_greeting_failure(AuthMethod.Invalid) 115 | 116 | # Get available methods 117 | methods = self._get_available_methods(nmethods) 118 | logger.debug(f"Received methods {methods}") 119 | 120 | # Accept only USERNAME/PASSWORD auth if we are asking for auth 121 | # Accept only no auth if we are not asking for USERNAME/PASSWORD 122 | if (self.auth_method and AuthMethod.UsernamePassword not in set(methods)) or ( 123 | not self.auth_method and AuthMethod.NoAuth not in set(methods) 124 | ): 125 | self._send_greeting_failure(AuthMethod.Invalid) 126 | 127 | # Choose an authentication method and send it to the client 128 | self._send(struct.pack("!BB", SOCKS_VERSION, self.auth_method)) 129 | 130 | # If we are asking for USERNAME/PASSWORD auth verify it 131 | if self.auth_method: 132 | self._verify_credentials() 133 | 134 | # Auth/greeting handled... 135 | logger.debug("Successfully authenticated") 136 | 137 | # Handle the request 138 | conn_buffer = self._recv(CONN_NO_PORT_SIZE, self._send_failure, StatusCode.GeneralFailure) 139 | version, cmd, rsv, address_type = struct.unpack("!BBBB", conn_buffer) 140 | # Do this so we can send an address_type in our errors 141 | # We don't want to send an invalid one back in an error so we will handle an invalid address type first 142 | # Microsocks just always sends IPv4 instead 143 | if address_type in [AddressDataType.IPv4, AddressDataType.IPv6, AddressDataType.DomainName]: 144 | self._address_type = address_type 145 | else: 146 | self._send_failure(StatusCode.AddressTypeNotSupported) 147 | 148 | if version != SOCKS_VERSION: 149 | self._send_failure(StatusCode.GeneralFailure) 150 | if cmd != CONNECT: # We only support connect 151 | self._send_failure(StatusCode.CommandNotSupported) 152 | if rsv != RESERVED: # Malformed packet 153 | self._send_failure(StatusCode.GeneralFailure) 154 | 155 | logger.debug(f"Handling request with address type: {address_type}") 156 | 157 | if address_type == AddressDataType.IPv4 or address_type == AddressDataType.IPv6: # IPv4 or IPv6 158 | address_family = socket.AF_INET if address_type == AddressDataType.IPv4 else socket.AF_INET6 159 | minlen = 4 if address_type == AddressDataType.IPv4 else 16 160 | raw = self._recv(minlen, self._send_failure, StatusCode.GeneralFailure) # Raw IP address bytes 161 | 162 | # Convert the IP address from binary to text 163 | try: 164 | address = socket.inet_ntop(address_family, raw) 165 | except Exception as err: 166 | logger.debug(f"Could not convert packed IP {raw} to string") 167 | logger.error(err) 168 | self._send_failure(StatusCode.GeneralFailure) 169 | elif address_type == AddressDataType.DomainName: # Domain name 170 | domain_buffer = self._recv(DOMAIN_SIZE, self._send_failure, StatusCode.GeneralFailure) 171 | domain_length = domain_buffer[0] 172 | if domain_length > 255: # Invalid 173 | self._send_failure(StatusCode.GeneralFailure) 174 | address = self._recv(domain_length, self._send_failure, StatusCode.GeneralFailure) 175 | 176 | port_buffer = self._recv(CONN_PORT_SIZE, self._send_failure, StatusCode.GeneralFailure) 177 | port = struct.unpack("!H", port_buffer)[0] 178 | 179 | # Translate our address and port into data from which we can create a socket connection 180 | try: 181 | remote_info = socket.getaddrinfo( 182 | address, port, family=socket.AF_UNSPEC, type=socket.SOCK_STREAM, flags=socket.AI_PASSIVE 183 | ) 184 | # Pick the first one returned, probably IPv6 if IPv6 is available or IPv4 if not 185 | # TO-DO: Try as many as possible in a loop instead of picking only the first returned 186 | remote_info = remote_info[0] 187 | except Exception as err: # There's no suitable errorcode in RFC1928 for DNS lookup failure 188 | logger.error(err) 189 | self._send_failure(StatusCode.GeneralFailure) 190 | 191 | af, socktype, proto, _, sa = remote_info 192 | 193 | # Connect to the socket 194 | try: 195 | # Make the socket 196 | self._remote = socket.socket(af, socktype, proto) 197 | # Bind it to an IP 198 | if hasattr(self.server, "_bind"): 199 | self._remote.bind(self.server._bind) 200 | self._remote.connect(sa) 201 | bind_address = self._remote.getsockname() 202 | logger.info(f"Connected to {address} {port}") 203 | 204 | # Get the bind address and port 205 | addr = struct.unpack("!I", socket.inet_aton(bind_address[0]))[0] 206 | port = bind_address[1] 207 | logger.debug(f"Bind address {addr} {port}") 208 | except Exception as err: 209 | logger.error(err) 210 | # TO-DO: Get the actual failure code instead of giving ConnRefused each time 211 | self._send_failure(StatusCode.ConnRefused) 212 | 213 | # TO-DO: Are the BND.ADDR and BND.PORT returned correct values? 214 | self._send( 215 | struct.pack("!BBBBIH", SOCKS_VERSION, StatusCode.Success, RESERVED, AddressDataType.IPv4, addr, port) 216 | ) 217 | 218 | # Run the copy loop 219 | self._copy_loop(self.request, self._remote) 220 | self._exit(True) 221 | 222 | @property 223 | def auth_method(self): 224 | """Gives us the authentication method we will use""" 225 | return AuthMethod.UsernamePassword if hasattr(self.server, "_auth") else AuthMethod.NoAuth 226 | 227 | def _send(self, data): 228 | """Convenience method to send bytes to a client""" 229 | return self.request.sendall(data) 230 | 231 | def _recv(self, bufsize, failure_method=False, code=False): 232 | """Convenience method to receive bytes from a client 233 | If bufsize is less than the size of the data received, then failure_method is called 234 | with code as a parameter and kills the thread 235 | """ 236 | buf = self.request.recv(bufsize) 237 | if len(buf) < bufsize: 238 | if failure_method and code: 239 | failure_method(code) 240 | elif failure_method: 241 | failure_method() 242 | else: 243 | self._exit() # Kill thread if we aren't calling the failure methods (they already do this) 244 | return buf 245 | 246 | def _shutdown_client(self): 247 | """Convenience method to shutdown and close the connection with a client""" 248 | self.server.shutdown_request(self.request) 249 | 250 | def _exit(self, dontExit=False): 251 | """Convenience method to exit the thread and cleanup any connections""" 252 | self._shutdown_client() 253 | if hasattr(self, "_remote"): 254 | # self._remote.shutdown(socket.SHUT_RDWR) 255 | self._remote.close() 256 | if not dontExit: 257 | sys.exit() 258 | 259 | def _get_available_methods(self, n): 260 | """Receive the methods a client supported and return them as a list""" 261 | methods = [] 262 | for i in range(n): 263 | methods.append(ord(self._recv(AUTH_METHOD_SIZE, self._send_greeting_failure, AuthMethod.Invalid))) 264 | return methods 265 | 266 | def _verify_credentials(self): 267 | """Verify the credentials of a client and send a response relevant response 268 | and possibly close the connection + thread if unauthenticated 269 | """ 270 | version = ord(self._recv(VERSION_SIZE)) 271 | if version != USERNAME_PASSWORD_VERSION: 272 | logger.error("USERNAME_PASSWORD_VERSION did not match") 273 | self._send_authentication_failure(FAILURE) 274 | 275 | username_len = self._recv(ID_LEN_SIZE, self._send_authentication_failure, FAILURE) 276 | username = self._recv(ord(username_len), self._send_authentication_failure, FAILURE) 277 | 278 | password_len = self._recv(PW_LEN_SIZE, self._send_authentication_failure, FAILURE) 279 | password = self._recv(ord(password_len), self._send_authentication_failure, FAILURE) 280 | 281 | server_username, server_password = self.server._auth 282 | 283 | if username.decode("utf-8") == server_username and password.decode("utf-8") == server_password: 284 | self._send(struct.pack("!BB", USERNAME_PASSWORD_VERSION, StatusCode.Success)) 285 | return True 286 | 287 | logger.error("Authentication failed") 288 | self._send_authentication_failure(FAILURE) 289 | 290 | def _send_greeting_failure(self, code): 291 | """Convinence method to send a failure message to a client in the greeting stage""" 292 | self._send(struct.pack("!BB", SOCKS_VERSION, code)) 293 | self._exit() 294 | 295 | def _send_authentication_failure(self, code): 296 | """Convinence method to send a failure message to a client in the authentication stage""" 297 | self._send(struct.pack("!BB", USERNAME_PASSWORD_VERSION, code)) 298 | self._exit() 299 | 300 | def _send_failure(self, code): 301 | """Convinence method to send a failure message to a client in the socket stage""" 302 | address_type = self._address_type if hasattr(self, "_address_type") else AddressDataType.IPv4 303 | self._send(struct.pack("!BBBBIH", SOCKS_VERSION, code, RESERVED, address_type, 0, 0)) 304 | self._exit() 305 | 306 | def _copy_loop(self, client, remote): 307 | """Waits for network activity and forwards it to the other connection""" 308 | while True: 309 | # Wait until client or remote is available for read 310 | # 311 | # Alternatively use poll() instead of select() due to these reasons 312 | # https://github.com/rofl0r/microsocks/commit/31557857ccce5e4fdd2cfdae7ab640d589aa2b41 313 | # May not be ideal for a cross platform implementation however 314 | r, w, e = select.select([client, remote], [], [], CONNECTION_TIMEOUT) 315 | 316 | # Kill inactive/unused connections 317 | if not r and not w and not e: 318 | self._send_failure(StatusCode.TTLExpired) 319 | 320 | for sock in r: 321 | try: 322 | data = sock.recv(COPY_LOOP_BUFFER_SIZE) 323 | except Exception as err: 324 | logger.debug("Copy loop failed to read") 325 | logger.error(err) 326 | return 327 | 328 | if not data or len(data) <= 0: 329 | return 330 | 331 | outfd = remote if sock is client else client 332 | try: 333 | outfd.sendall(data) # Python has its own sendall implemented 334 | except Exception as exc: 335 | logger.debug("Copy loop failed to send all data") 336 | logger.error(exc) 337 | return 338 | 339 | 340 | if __name__ == "__main__": 341 | import threading 342 | 343 | import httpx 344 | 345 | logging.basicConfig(level=logging.DEBUG) 346 | 347 | server = SOCKS5ProxyServer( 348 | { 349 | "auth": ("username", "password"), 350 | "listen_ip": "127.0.0.1", 351 | "port": 1080, 352 | "bind_address": "0.0.0.0", 353 | } 354 | ) 355 | 356 | def run_server(s) -> None: 357 | print("Starting server") 358 | s.serve_forever() 359 | 360 | p = threading.Thread( 361 | target=run_server, 362 | name="socks5-server", 363 | args=[ 364 | server, 365 | ], 366 | ) 367 | p.start() 368 | proxies = {"all://": "socks5://username:password@127.0.0.1:1080"} 369 | 370 | time.sleep(1) 371 | 372 | try: 373 | response = httpx.get(url="https://lumtest.com/echo.json", proxies=proxies) 374 | print(response.content) 375 | print(response.status_code) 376 | print(response.headers) 377 | finally: 378 | print("Killing background process") 379 | server.shutdown() 380 | p.join() 381 | -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import shutil 4 | import tempfile 5 | 6 | import filelock 7 | 8 | FIXTURES_TEMP_DIR = os.path.join(tempfile.gettempdir(), "pybas_automation_tests_e2e_fixtures") 9 | FILE_LOCK_FILENAME = filelock.FileLock(os.path.join(FIXTURES_TEMP_DIR, ".lock")) 10 | 11 | 12 | async def clean_dir(dir_name: str) -> None: 13 | """Asynchronously cleans (deletes) the specified directory. 14 | 15 | :param dir_name: The name of the directory to be cleaned. 16 | """ 17 | 18 | # Sleep for a short while before attempting to remove the directory. 19 | # This might be necessary to let any processes that might be using the directory to release it. 20 | await asyncio.sleep(5) 21 | 22 | # Attempt to remove the directory. If unsuccessful, retries up to 5 times. 23 | for _ in range(0, 5): 24 | try: 25 | shutil.rmtree(dir_name) 26 | except Exception as exc: # type: ignore 27 | print(exc) 28 | # Wait for a bit before the next attempt 29 | await asyncio.sleep(5) 30 | continue 31 | 32 | # If the directory was successfully removed, break out of the loop 33 | break 34 | 35 | # As a last attempt, try to remove the directory while ignoring any errors 36 | shutil.rmtree(dir_name, ignore_errors=True) 37 | -------------------------------------------------------------------------------- /tests/e2e/basic/test_basic.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | import pytest 5 | from pywinauto import Application # type: ignore 6 | 7 | from pybas_automation.browser_automator import BrowserAutomator 8 | from pybas_automation.browser_profile import BrowserProfileStorage 9 | from pybas_automation.task import TaskStorage, TaskStorageModeEnum 10 | from tests import is_windows 11 | 12 | 13 | @pytest.mark.skipif(not is_windows, reason="requires Windows") 14 | class TestBasic2e2: 15 | @pytest.mark.asyncio 16 | async def test_blah(self, bas_app: Application) -> None: 17 | print("test_blah") 18 | 19 | @pytest.mark.asyncio 20 | async def test_blah_another(self, bas_app: Application) -> None: 21 | print("test_blah_another") 22 | 23 | @pytest.mark.asyncio 24 | async def test_basic_compiled_app_works(self, bas_app: Application, fingerprint_key: str) -> None: 25 | """Test to check if the compiled app works as expected.""" 26 | 27 | # Start the application by simulating a click on 'OK Enter' button 28 | bas_app.window(title="OK Enter", top_level_only=False).wrapper_object().click() 29 | 30 | # Give the app some time to process after the click 31 | await asyncio.sleep(5) 32 | 33 | # Wait for a new window to appear indicating the app has finished its operation 34 | while len(bas_app.windows()) != 2: 35 | print("Waiting for the app to finished...") 36 | await asyncio.sleep(5) 37 | 38 | # Print the local application data directory for debugging purposes 39 | print(os.environ["LOCALAPPDATA"]) 40 | 41 | # Load tasks from the storage 42 | task_storage = TaskStorage(mode=TaskStorageModeEnum.READ_WRITE) 43 | assert task_storage.load_all() is True 44 | tasks = task_storage.get_all() 45 | assert tasks is not None 46 | assert len(tasks) == 1 47 | 48 | # Load browser profiles using the provided fingerprint key 49 | browser_profile_storage = BrowserProfileStorage(fingerprint_key=fingerprint_key) 50 | assert browser_profile_storage.count() == 1 51 | profiles = browser_profile_storage.load_all() 52 | profile = profiles[0] 53 | 54 | # Validate that the profile directory exists 55 | assert os.path.exists(profile.profile_dir) 56 | 57 | # Validate that our specific directory and fingerprint file exist within the profile directory 58 | assert os.path.exists(os.path.join(profile.profile_dir, ".pybas")) is True 59 | assert os.path.exists(os.path.join(profile.profile_dir, ".pybas", "fingerprint_raw.json")) is True 60 | 61 | # Ensure that the browser started by checking for the 'Default' directory within the profile directory 62 | assert os.path.exists(os.path.join(profile.profile_dir, "Default")) is True 63 | 64 | @pytest.mark.asyncio 65 | async def test_basic(self, bas_app: Application) -> None: 66 | # Find the main window or parent which contains the DEBUG Static control 67 | debug_window = bas_app.window(title="DEBUG", control_type="Text", top_level_only=False) 68 | 69 | # Search for the "true" RadioButton among all descendants of the main window 70 | true_radiobutton = debug_window.parent().descendants(title="true", control_type="RadioButton")[0] 71 | 72 | # Interact with the found RadioButton 73 | true_radiobutton.click() 74 | 75 | # Start the application by simulating a click on 'OK Enter' button 76 | bas_app.window(title="OK Enter", top_level_only=False).wrapper_object().click() 77 | 78 | # Give the app some time to process after the click 79 | await asyncio.sleep(15) 80 | 81 | # Continuously check if the browser is clickable by evaluating the number of children of the browser's parent 82 | # window. In DEBUG mode, once the script finishes execution, the count of children should change, 83 | # signaling that the browser is clickable. 84 | browser_1_panel = bas_app.window(title="Show browser", control_type="Text", top_level_only=False) 85 | 86 | while len(browser_1_panel.wrapper_object().parent().children()) == 3: 87 | print("Waiting for the app to finished...") 88 | # Pause for 1 second before rechecking to prevent excessive CPU usage. 89 | await asyncio.sleep(1) 90 | 91 | # Print the local application data directory for debugging purposes 92 | print(os.environ["LOCALAPPDATA"]) 93 | 94 | # Load tasks from the storage 95 | task_storage = TaskStorage(mode=TaskStorageModeEnum.READ) 96 | assert task_storage.load_all() is True 97 | tasks = task_storage.get_all() 98 | assert tasks is not None 99 | assert len(tasks) == 1 100 | 101 | task = tasks[0] 102 | task_id, unique_process_id, remote_debugging_port = ( 103 | task.task_id, 104 | task.unique_process_id, 105 | task.remote_debugging_port, 106 | ) 107 | assert task_id is not None 108 | assert unique_process_id is not None 109 | assert remote_debugging_port is not None 110 | 111 | print(task_id, unique_process_id, remote_debugging_port) 112 | 113 | browser_profile_storage = BrowserProfileStorage() 114 | browser_profile_storage.load_all() 115 | 116 | profile_name = os.path.basename(task.browser_settings.profile.profile_folder_path) 117 | browser_profile = browser_profile_storage.load(profile_name=profile_name) 118 | # print(browser_profile) 119 | 120 | async with BrowserAutomator( 121 | browser_profile=browser_profile, 122 | remote_debugging_port=remote_debugging_port, 123 | unique_process_id=unique_process_id, 124 | ) as automator: 125 | ws_endpoint = automator.get_ws_endpoint() 126 | 127 | assert ws_endpoint is not None 128 | assert ws_endpoint.startswith("ws://") 129 | 130 | assert automator.browser_version is not None 131 | assert automator.browser_version.startswith("Chrome/") 132 | print(automator.browser_version) 133 | 134 | await automator.page.goto("https://playwright.dev/python/") 135 | elem = automator.page.locator("xpath=//a[@class='getStarted_Sjon']") 136 | await automator.bas_move_mouse_to_elem(elem=elem) 137 | await elem.click() 138 | 139 | page_content = await automator.bas_get_page_content() 140 | 141 | assert page_content is not None 142 | assert "html" in page_content 143 | -------------------------------------------------------------------------------- /tests/e2e/conftest.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import codecs 3 | import os 4 | import zipfile 5 | from typing import AsyncGenerator, Generator 6 | 7 | import pytest 8 | import pytest_asyncio 9 | from pydantic import FilePath 10 | from pywinauto import Application # type: ignore 11 | from pywinauto.timings import TimeoutError # type: ignore 12 | 13 | from tests import ABS_PATH, FIXTURES_DIR 14 | from tests.e2e import FILE_LOCK_FILENAME, FIXTURES_TEMP_DIR 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: 19 | loop = asyncio.get_event_loop() 20 | yield loop 21 | loop.close() 22 | 23 | 24 | def get_bas_app_name() -> str: 25 | bas_app_name = os.environ.get("BAS_APP_NAME", "PyBasFree").strip() 26 | if not bas_app_name: 27 | raise ValueError("BAS_APP_NAME not set") 28 | 29 | return bas_app_name 30 | 31 | 32 | def generate_xml_config() -> str: 33 | """ 34 | Prepares the XML config for the e2e tests. 35 | """ 36 | 37 | # Fetch and validate the fingerprint key from environment variables 38 | fingerprint_key = os.environ.get("FINGERPRINT_KEY", None) 39 | assert fingerprint_key is not None 40 | assert len(fingerprint_key) == 64 41 | 42 | # Validate the presence of XML config and command files 43 | xml_config_path = os.path.join(FIXTURES_DIR, "Actual.PyBasFreeTemplate.xml.default.xml") 44 | assert os.path.exists(xml_config_path) 45 | assert os.path.isfile(xml_config_path) 46 | 47 | cmd_initial_filename = os.path.join(ABS_PATH, "cmd_initial.py") 48 | assert os.path.exists(cmd_initial_filename) 49 | assert os.path.isfile(cmd_initial_filename) 50 | cmd_worker_filename = os.path.join(ABS_PATH, "cmd_worker.py") 51 | assert os.path.exists(cmd_worker_filename) 52 | assert os.path.isfile(cmd_worker_filename) 53 | 54 | # Fetch and validate the proxy credentials from environment variables 55 | proxy_username = os.environ.get("BRIGHTDATA_USERNAME", None) 56 | proxy_password = os.environ.get("BRIGHTDATA_PASSWORD", None) 57 | if proxy_username is None or proxy_password is None: 58 | raise ValueError("BRIGHTDATA_USERNAME or BRIGHTDATA_PASSWORD not set") 59 | 60 | # Read the xml config and replace placeholders with respective values 61 | with codecs.open(xml_config_path, "r", "utf-8") as f: 62 | xml_config = f.read() 63 | 64 | # Setting various configurations in the xml 65 | xml_config = xml_config.replace("{{PYTHON_RUN_COMMANDS}}", "poetry run python") 66 | xml_config = xml_config.replace("{{FINGERPRINT_KEY}}", fingerprint_key) 67 | xml_config = xml_config.replace("{{CMD_INITIAL}}", cmd_initial_filename) 68 | xml_config = xml_config.replace("{{CMD_WORKER}}", cmd_worker_filename) 69 | xml_config = xml_config.replace("{{BRIGHTDATA_USERNAME}}", proxy_username) 70 | xml_config = xml_config.replace("{{BRIGHTDATA_PASSWORD}}", proxy_password) 71 | xml_config = xml_config.replace("{{LIMIT_TASKS}}", "1") 72 | xml_config = xml_config.replace("{{THREADS}}", "1") 73 | 74 | # Ensure all placeholders are replaced 75 | assert "{{" not in xml_config 76 | assert "}}" not in xml_config 77 | 78 | return xml_config 79 | 80 | 81 | def write_xml_config() -> None: 82 | bas_app_name = get_bas_app_name() 83 | 84 | # Generate xml configuration required to run the application 85 | xml_config = generate_xml_config() 86 | 87 | # Define the directory path where the application was extracted 88 | working_dir = os.path.join(FIXTURES_TEMP_DIR, f"{bas_app_name}") 89 | 90 | assert os.path.exists(working_dir) 91 | assert os.path.isdir(working_dir) 92 | 93 | # List directories under 'appsremote' to find the application's session directory 94 | created_dirs = os.listdir(os.path.join(working_dir, "appsremote", f"{bas_app_name}")) 95 | 96 | # Get the first directory starting with 'SID' which represents the application's session 97 | matching_dir = next((x for x in created_dirs if x.startswith("SID")), None) 98 | assert matching_dir is not None 99 | 100 | # Define the path to the XML configuration file of the application 101 | xml_config_filename = os.path.join( 102 | working_dir, "appsremote", f"{bas_app_name}", matching_dir, "engine", f"Actual.{bas_app_name}.xml" 103 | ) 104 | 105 | # Write the generated xml configuration to the appropriate location 106 | with codecs.open(xml_config_filename, "w", "utf-8") as f: 107 | f.write(xml_config) 108 | 109 | assert os.path.exists(xml_config_filename) 110 | 111 | 112 | async def start_app(exe_path: FilePath) -> Application: 113 | assert exe_path.exists() and exe_path.is_file() 114 | 115 | app = Application(backend="uia").start(str(exe_path)) 116 | 117 | # Periodically check the application until its initial setup 118 | # (e.g., dependency download) is complete and detached from the process 119 | await ensure_process_not_running(app=app) 120 | 121 | return app 122 | 123 | 124 | async def ensure_process_not_running(app: Application, timeout: int = 60) -> None: 125 | """ 126 | Asynchronously ensures that the application process is not running. 127 | :param app: The application instance. 128 | :param timeout: The timeout in seconds. 129 | 130 | :raises ValueError: If the application process is still running after the timeout. 131 | """ 132 | 133 | for _ in range(0, timeout): 134 | if app.is_process_running(): 135 | await asyncio.sleep(1) 136 | continue 137 | break 138 | 139 | if app.is_process_running(): 140 | raise ValueError("Application process is still running") 141 | 142 | 143 | @pytest_asyncio.fixture(scope="module", autouse=True) 144 | async def _prepare_bas_app() -> AsyncGenerator[None, None]: 145 | """ 146 | Asynchronous pytest fixture that sets up, runs, and tears down the application for e2e tests. 147 | """ 148 | 149 | bas_app_name = get_bas_app_name() 150 | 151 | print("") 152 | print("Preparing the application for e2e tests...") 153 | 154 | if not os.path.exists(FIXTURES_TEMP_DIR): 155 | os.mkdir(FIXTURES_TEMP_DIR) 156 | 157 | with FILE_LOCK_FILENAME: 158 | # Define the path to the zipped application release 159 | src_zip_filename = os.path.join(ABS_PATH, "bas_release", f"{bas_app_name}.zip") 160 | assert os.path.exists(src_zip_filename) 161 | assert os.path.isfile(src_zip_filename) 162 | 163 | # Create a temporary directory to extract and run the application 164 | assert os.path.exists(FIXTURES_TEMP_DIR) 165 | 166 | # Extract the zipped application to the temporary directory 167 | with zipfile.ZipFile(src_zip_filename, "r") as zip_ref: 168 | zip_ref.extractall(FIXTURES_TEMP_DIR) 169 | 170 | # Define the directory path where the application was extracted 171 | working_dir = os.path.join(FIXTURES_TEMP_DIR, f"{bas_app_name}") 172 | assert os.path.exists(working_dir) 173 | assert os.path.isdir(working_dir) 174 | 175 | # Define the path to the executable of the application 176 | exe_path = os.path.join(working_dir, f"{bas_app_name}.exe") 177 | 178 | # Start the application 179 | await start_app(exe_path=FilePath(exe_path)) 180 | 181 | try: 182 | # Reconnect to the application after its setup is complete 183 | app = Application(backend="uia").connect(title="Language chooser", timeout=10) 184 | except TimeoutError: 185 | # not first run 186 | pass 187 | else: 188 | await asyncio.sleep(5) 189 | assert app.is_process_running() is True 190 | 191 | # Simulate a user click on the 'OK Enter' button in the application 192 | btn_wrapper = app.window(title="OK Enter", top_level_only=False).wrapper_object() 193 | btn_wrapper.click() 194 | assert app.is_process_running() is True 195 | 196 | # Give the application some time to process after the click 197 | await asyncio.sleep(5) 198 | 199 | # Close the application 200 | app.kill() 201 | await ensure_process_not_running(app=app) 202 | 203 | write_xml_config() 204 | 205 | # Restart the application with the new configurations 206 | await start_app(exe_path=FilePath(exe_path)) 207 | 208 | # write fresh new config 209 | write_xml_config() 210 | 211 | # Reconnect to the application main window 212 | app = Application(backend="uia").connect(title_re=f"^{bas_app_name}", timeout=10) 213 | assert app.is_process_running() is True 214 | 215 | app.kill() 216 | await ensure_process_not_running(app=app) 217 | await asyncio.sleep(5) 218 | 219 | yield None # run tests 220 | 221 | print("") 222 | print("Teardown the application for e2e tests...") 223 | 224 | 225 | @pytest_asyncio.fixture(scope="function") 226 | async def bas_app() -> AsyncGenerator[Application, None]: 227 | bas_app_name = get_bas_app_name() 228 | 229 | exe_path = os.path.join(FIXTURES_TEMP_DIR, f"{bas_app_name}", f"{bas_app_name}.exe") 230 | 231 | with FILE_LOCK_FILENAME: 232 | write_xml_config() 233 | 234 | # Restart the application with the new configurations 235 | app = await start_app(exe_path=FilePath(exe_path)) 236 | await ensure_process_not_running(app=app) 237 | 238 | # Reconnect to the application main window 239 | app = Application(backend="uia").connect(title_re=f"^{bas_app_name}", timeout=10) 240 | assert app.is_process_running() is True 241 | 242 | try: 243 | # Yield the connected application instance for the tests to run 244 | yield app 245 | finally: 246 | # Close the application after the tests are done 247 | app.kill() 248 | await ensure_process_not_running(app=app) 249 | -------------------------------------------------------------------------------- /tests/fixtures/Actual.PyBasFreeTemplate.xml.default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | false 10 | 11 | 12 | 13 | 14 | 15 | 4 16 | 17 | 18 | false 19 | false 20 | false 21 | false 22 | false 23 | false 24 | NoProtection 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 1 37 | 1 38 | 1 39 | 1 40 | 1 41 | 1 42 | 0 43 | 0 44 | 1 45 | 46 | 47 | ProxyProvider 48 | 49 | 50 | 51 | 52 | Select 53 | 0 54 | 1 55 | 1 56 | Select 57 | NoProxy 58 | Brightdata 59 | 60 | 1 61 | 1 62 | 63 | 64 | 65 | ProxyUsername 66 | 67 | 68 | Brightdata 69 | ProxyProvider 70 | FixedString 71 | 0 72 | 1 73 | 1 74 | FixedString 75 | {{BRIGHTDATA_USERNAME}} 76 | 1 77 | 78 | 79 | 80 | ProxyPassword 81 | 82 | 83 | Brightdata 84 | ProxyProvider 85 | FixedString 86 | 0 87 | 1 88 | 1 89 | FixedString 90 | {{BRIGHTDATA_PASSWORD}} 91 | 1 92 | 93 | 94 | 95 | PythonRunCommands 96 | 97 | 98 | 99 | 100 | FixedString 101 | 0 102 | 1 103 | 1 104 | FixedString 105 | {{PYTHON_RUN_COMMANDS}} 106 | 1 107 | 108 | 109 | 110 | PythonInitialScriptPath 111 | 112 | 113 | 114 | 115 | LinesFromFile 116 | 0 117 | 1 118 | 1 119 | LinesFromFile 120 | {{CMD_INITIAL}} 121 | 0 122 | 1 123 | 0 124 | 1 125 | 1 126 | 1 127 | 5000 128 | 0 129 | -1 130 | 0 131 | 132 | 133 | 134 | PythonWorkerScriptPath 135 | 136 | 137 | 138 | 139 | LinesFromFile 140 | 0 141 | 1 142 | 1 143 | LinesFromFile 144 | {{CMD_WORKER}} 145 | 0 146 | 1 147 | 0 148 | 1 149 | 1 150 | 1 151 | 5000 152 | 0 153 | -1 154 | 0 155 | 156 | 157 | 158 | LimitTasks 159 | 160 | 161 | 162 | 163 | FixedInteger 164 | 0 165 | 1 166 | 1 167 | FixedInteger 168 | {{LIMIT_TASKS}} 169 | -99999999 170 | 99999999 171 | 172 | 173 | 174 | Threads 175 | 176 | 177 | 178 | 179 | FixedInteger 180 | 0 181 | 1 182 | 1 183 | FixedInteger 184 | {{THREADS}} 185 | -99999999 186 | 99999999 187 | 188 | 189 | 190 | FINGERPRINT_KEY 191 | 192 | 193 | 194 | 195 | FixedString 196 | 0 197 | 1 198 | 1 199 | FixedString 200 | {{FINGERPRINT_KEY}} 201 | 1 202 | 203 | 204 | 205 | DEBUG 206 | 207 | 208 | 209 | 210 | Select 211 | 0 212 | 1 213 | 1 214 | Select 215 | false 216 | true 217 | 218 | 1 219 | 1 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /tests/fixtures/fingerprint_raw.zip: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:efecadbb0898372f3409d111b3dfe7a5496fb52eefe54204ff6e8ae1875bb832 3 | size 1409417 4 | -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/__init__.py -------------------------------------------------------------------------------- /tests/functional/browser_automator/test_browser_automator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright.async_api import BrowserContext 3 | from pydantic import DirectoryPath 4 | 5 | from pybas_automation.browser_automator import BrowserAutomator 6 | from pybas_automation.browser_automator.browser_automator import BrowserWsConnectError 7 | from pybas_automation.browser_profile import BrowserProfile 8 | 9 | 10 | class TestBrowserAutomator: 11 | @pytest.mark.asyncio 12 | async def test_expected_failed(self, free_port: int) -> None: 13 | """ 14 | Test the scenario where connecting to the browser fails. 15 | This should raise a BrowserWsConnectError. 16 | """ 17 | remote_debugging_port = free_port 18 | browser_profile = BrowserProfile() 19 | 20 | # Expect an error when trying to connect to the browser without a proper WebSocket debugging endpoint. 21 | with pytest.raises(BrowserWsConnectError): 22 | async with BrowserAutomator( 23 | browser_profile=browser_profile, remote_debugging_port=remote_debugging_port 24 | ) as automator: 25 | automator.connect() 26 | 27 | @pytest.mark.asyncio 28 | async def test_basic(self, browser_data: tuple[BrowserContext, DirectoryPath, int]) -> None: 29 | """ 30 | Test the BrowserAutomator's ability to communicate with a browser instance. 31 | """ 32 | # Unpack browser data 33 | browser, profile_folder_path, remote_debugging_port = browser_data 34 | browser_profile = BrowserProfile(profile_dir=profile_folder_path) 35 | 36 | # Create an instance of BrowserAutomator and communicate with the browser. 37 | async with BrowserAutomator( 38 | browser_profile=browser_profile, remote_debugging_port=remote_debugging_port 39 | ) as automator: 40 | # Send a command to the browser and ensure a valid response. 41 | data = await automator.cdp_client.send_command("Browser.getVersion") 42 | assert data is not None 43 | assert data.get("product", None) is not None 44 | 45 | # Ensure that the browser product version contains "Chrome/" 46 | assert "Chrome/" in data["product"] 47 | 48 | # Use the automator to navigate to a specific webpage. 49 | await automator.page.goto("https://lumtest.com/echo.json") 50 | 51 | import pytest 52 | 53 | @pytest.mark.asyncio 54 | async def test_local_storage_with_cdp_and_js(self, browser_data: tuple[BrowserContext, DirectoryPath, int]) -> None: 55 | """ 56 | This test verifies the correct functionality of local storage interactions within a browser context using the 57 | BrowserAutomator. It navigates to specified URLs, performs local storage operations, and validates that these 58 | operations succeed. The test covers setting, retrieving, and clearing local storage items, 59 | as well as interacting with local storage through the Chrome DevTools Protocol. 60 | """ 61 | 62 | # Extract the browser context, profile directory path, and remote debugging port from the provided tuple. 63 | _, profile_folder_path, remote_debugging_port = browser_data 64 | 65 | # Initialize the browser profile with the specified directory path. 66 | browser_profile = BrowserProfile(profile_dir=profile_folder_path) 67 | 68 | # Use the BrowserAutomator to establish a communication channel with the browser. 69 | async with BrowserAutomator( 70 | browser_profile=browser_profile, remote_debugging_port=remote_debugging_port 71 | ) as automator: 72 | # Define a list of tuples containing URLs to visit and their associated domains. 73 | url_domain_pairs = [ 74 | ("https://playwright.dev/python/", "playwright.dev"), 75 | ("https://lumtest.com/echo.json", "lumtest.com"), 76 | ] 77 | 78 | # Iterate through each URL and its associated domain. 79 | for url, domain in url_domain_pairs: 80 | # Navigate to the URL and wait until network activity is idle to ensure all resources are loaded. 81 | await automator.page.goto(url=url, wait_until="networkidle") 82 | 83 | # Wait for the page to reach a 'loaded' state to ensure all DOM content is fully parsed. 84 | await automator.page.wait_for_load_state("domcontentloaded") 85 | 86 | # Retrieve the security origin of the current frame for later use in Chrome DevTools Protocol commands. 87 | security_origin = await automator.page.evaluate("window.location.origin") 88 | assert security_origin is not None, "Failed to retrieve the security origin." 89 | 90 | # Set a local storage item and verify its presence and value. 91 | await automator.page.evaluate(f"localStorage.setItem('key_{domain}', 'value_{domain}');") 92 | item = await automator.page.evaluate(f"localStorage.getItem('key_{domain}');") 93 | assert ( 94 | item == f"value_{domain}" 95 | ), f"Expected local storage item 'key_{domain}' to have value 'value_{domain}'." 96 | 97 | # Clear all local storage items and verify the specified item is no longer present. 98 | await automator.page.evaluate("localStorage.clear();") 99 | item = await automator.page.evaluate(f"localStorage.getItem('key_{domain}');") 100 | assert item is None, "Expected local storage to be empty after clearing." 101 | 102 | # Set a local storage item using Chrome DevTools Protocol and verify its presence and value. 103 | await automator.cdp_session.send( 104 | "DOMStorage.setDOMStorageItem", 105 | { 106 | "storageId": { 107 | "securityOrigin": security_origin, 108 | "isLocalStorage": True, 109 | }, 110 | "key": f"new_key_{domain}", 111 | "value": f"new_value_{domain}", 112 | }, 113 | ) 114 | item = await automator.page.evaluate(f"localStorage.getItem('new_key_{domain}');") 115 | assert ( 116 | item == f"new_value_{domain}" 117 | ), f"Expected local storage item 'new_key_{domain}' to have value 'new_value_{domain}'." 118 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/browser_profile/__init__.py -------------------------------------------------------------------------------- /tests/functional/browser_profile/cassettes/TestBrowserProfile.test_save_all.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7d7260b91dfd3e2e39a9988e67774165e590321a776f21772c079fc27878303b 3 | size 8290734 4 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/cassettes/TestBrowserProfile.test_save_fingerprint.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a1cc61dc3797d72da4c2d78676bc6da51ad29dfce196ad520c626bf4c7d90ad7 3 | size 8253066 4 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/cassettes/TestBrowserProfileStorage.test_create_no_fingerprint.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:76651453cb69b530cc515667f97a24ff17fcd3c61adc6b8996810e55e8227c5a 3 | size 8189169 4 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/cassettes/TestBrowserProfileStorage.test_create_with_profile_name.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4d75a8495e932282a3c8349a15bb86d87c0703a5f920d250e7846e31b3e842d1 3 | size 8180416 4 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/cassettes/TestBrowserProfileStorage.test_serialize_deserialize.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:46a99dd32bcb7fce7d873746a1e2badbf87353316fce5753b8024924acdfe424 3 | size 8297973 4 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/test_browser_profile.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pytest 5 | 6 | from pybas_automation.bas_actions.browser.proxy import BasActionBrowserProxy, BasActionBrowserProxyTypeEnum 7 | from pybas_automation.browser_profile import BrowserProfile, BrowserProfileStorage 8 | from pybas_automation.fingerprint import BasFingerprintRequest, get_fingerprint 9 | 10 | 11 | @pytest.mark.vcr() 12 | class TestBrowserProfile: 13 | def test_save_fingerprint(self, fingerprint_key: str) -> None: 14 | """ 15 | Test saving a browser fingerprint to a profile. The method 16 | verifies that the fingerprint is valid and can be saved 17 | successfully to the profile directory. 18 | """ 19 | 20 | # Request and get a fingerprint based on the key 21 | request_data = BasFingerprintRequest(key=fingerprint_key) 22 | fingerprint_raw = get_fingerprint(request_data) 23 | 24 | # Ensure the received fingerprint is valid 25 | fingerprint_json = json.loads(fingerprint_raw) 26 | assert fingerprint_json.get("valid") is True 27 | 28 | # Create a browser profile with the obtained fingerprint 29 | browser_profile = BrowserProfile(fingerprint_raw=fingerprint_raw) 30 | assert browser_profile.profile_dir is not None 31 | 32 | # Save the browser profile using the storage utility 33 | browser_profile_storage = BrowserProfileStorage() 34 | browser_profile_storage.save(browser_profile=browser_profile) 35 | 36 | # Ensure the saved profile contains the fingerprint data 37 | assert os.path.exists(os.path.join(browser_profile.profile_dir, ".pybas", "fingerprint_raw.json")) is True 38 | 39 | def test_save_proxy(self) -> None: 40 | """ 41 | Test saving a proxy configuration to a browser profile. The method 42 | ensures that the proxy details are saved correctly to the profile directory. 43 | """ 44 | 45 | # Initialize an empty browser profile 46 | browser_profile = BrowserProfile() 47 | 48 | # Create a proxy configuration 49 | proxy = BasActionBrowserProxy( 50 | server="127.0.0.1", 51 | port=9999, 52 | type=BasActionBrowserProxyTypeEnum.HTTP, 53 | login="user", 54 | password="pass", 55 | ) 56 | 57 | # Set and save the proxy configuration to the browser profile 58 | browser_profile.proxy = proxy 59 | result = browser_profile.save_proxy_to_profile() 60 | assert result is True 61 | assert os.path.exists(os.path.join(browser_profile.profile_dir, ".pybas", "proxy.json")) is True 62 | 63 | def test_save_all(self, fingerprint_key: str) -> None: 64 | """ 65 | Test saving both fingerprint and proxy configurations to a browser profile. 66 | This method verifies that both configurations are saved correctly. 67 | """ 68 | 69 | # Request and get a fingerprint based on the key 70 | request_data = BasFingerprintRequest(key=fingerprint_key) 71 | fingerprint_raw = get_fingerprint(request_data) 72 | 73 | # Ensure the received fingerprint is valid 74 | fingerprint_json = json.loads(fingerprint_raw) 75 | assert fingerprint_json.get("valid") is True 76 | 77 | # Create a browser profile with the obtained fingerprint 78 | browser_profile = BrowserProfile(fingerprint_raw=fingerprint_raw) 79 | assert browser_profile.profile_dir is not None 80 | 81 | # Create a proxy configuration 82 | proxy = BasActionBrowserProxy( 83 | server="127.0.0.1", 84 | port=31000, 85 | type=BasActionBrowserProxyTypeEnum.HTTP, 86 | login="user", 87 | password="pass", 88 | ) 89 | 90 | # Set the proxy configuration to the browser profile 91 | browser_profile.proxy = proxy 92 | 93 | # Save the browser profile with both configurations using the storage utility 94 | browser_profile_storage = BrowserProfileStorage() 95 | browser_profile_storage.save(browser_profile=browser_profile) 96 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/test_browser_profile_proxy.py: -------------------------------------------------------------------------------- 1 | import ipaddress 2 | import time 3 | from typing import Generator 4 | 5 | import pycountry # type: ignore 6 | import pytest 7 | 8 | from pybas_automation.bas_actions.browser.proxy import BasActionBrowserProxy, BasActionBrowserProxyTypeEnum 9 | from pybas_automation.browser_profile.proxy import get_external_info_ip 10 | 11 | # Create a list of country codes using pycountry 12 | countries = [c.alpha_2 for c in pycountry.countries] 13 | 14 | 15 | class TestBrowserProfileProxy: 16 | @pytest.fixture(autouse=True) 17 | def run_around_tests(self) -> Generator[None, None, None]: 18 | """ 19 | Setup and teardown fixture for the tests in this class. Starts and shuts down a SOCKS5 server. 20 | The server is run in a separate thread for the duration of the tests. 21 | """ 22 | import threading 23 | 24 | from tests.contrib.socks5_server.server import SOCKS5ProxyServer 25 | 26 | # Configuration options for the SOCKS5 server 27 | opts = { 28 | "auth": ("test_user", "test_pass"), 29 | "listen_ip": "127.0.0.1", 30 | "port": 9999, 31 | "bind_address": "0.0.0.0", 32 | } 33 | 34 | server = SOCKS5ProxyServer(opts) # type: ignore 35 | 36 | def run_server(s) -> None: # type: ignore 37 | """Helper function to run the server.""" 38 | print("Starting server") 39 | s.serve_forever() 40 | 41 | # Start the SOCKS5 server in a separate thread 42 | t = threading.Thread( 43 | target=run_server, 44 | name="socks5-server", 45 | args=[ 46 | server, 47 | ], 48 | ) 49 | t.start() 50 | # Give the server some time to start up 51 | time.sleep(1) 52 | 53 | # run tests 54 | yield 55 | 56 | # teardown and shutdown the server after tests 57 | server.shutdown() 58 | 59 | def test_proxy(self) -> None: 60 | """ 61 | Test to verify the behavior and functionality of a proxy. This test asserts the validity of the 62 | returned IP and checks if the returned country code is known. 63 | """ 64 | # Create a proxy configuration 65 | proxy = BasActionBrowserProxy( 66 | server="127.0.0.1", 67 | port=9999, 68 | type=BasActionBrowserProxyTypeEnum.SOCKS5, 69 | login="test_user", 70 | password="test_pass", 71 | ) 72 | 73 | # Fetch external info through the proxy 74 | result = get_external_info_ip(proxy) 75 | # Ensure there is an IP and country in the response 76 | assert result.get("ip", None) is not None 77 | assert result.get("country", None) is not None 78 | assert result.get("country", None) != "" 79 | 80 | ip = result.get("ip") 81 | # Validate the IP address 82 | ipaddress.ip_address(ip) # type: ignore 83 | 84 | # Check if the country returned is in the known countries list 85 | assert result.get("country") in countries 86 | 87 | print(result) 88 | -------------------------------------------------------------------------------- /tests/functional/browser_profile/test_browser_profile_storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pytest 5 | 6 | from pybas_automation.browser_profile import BrowserProfile, BrowserProfileStorage 7 | 8 | 9 | @pytest.mark.vcr() 10 | class TestBrowserProfileStorage: 11 | def test_create_no_fingerprint(self, fingerprint_key: str) -> None: 12 | """Test the creation of a browser profile without providing a fingerprint.""" 13 | 14 | # Create a browser profile storage 15 | browser_profile_storage = BrowserProfileStorage(fingerprint_key=fingerprint_key) 16 | assert browser_profile_storage.count() == 0 17 | 18 | # Create a new browser profile 19 | browser_profile = browser_profile_storage.new() 20 | # Ensure the profile directory exists and has been created 21 | assert browser_profile.profile_dir.is_dir() is True 22 | assert browser_profile.profile_dir.exists() is True 23 | assert browser_profile_storage.count() == 1 24 | 25 | # Ensure the fingerprint file has been created inside the profile directory 26 | fingerprint_filename = browser_profile.profile_dir.joinpath(".pybas", "fingerprint_raw.json") 27 | assert fingerprint_filename.exists() is True 28 | 29 | def test_create_with_fingerprint(self, fingerprint_str: str) -> None: 30 | """Test the creation of a browser profile with a provided fingerprint.""" 31 | 32 | browser_profile_storage = BrowserProfileStorage() 33 | assert browser_profile_storage.count() == 0 34 | browser_profile = browser_profile_storage.new(fingerprint_raw=fingerprint_str) 35 | # Ensure the profile directory exists 36 | assert browser_profile.profile_dir.is_dir() is True 37 | assert browser_profile.profile_dir.exists() is True 38 | 39 | # Ensure the fingerprint file exists inside the profile directory 40 | fingerprint_filename = browser_profile.profile_dir.joinpath(".pybas", "fingerprint_raw.json") 41 | assert fingerprint_filename.exists() is True 42 | 43 | def test_create_with_profile_name(self, fingerprint_key: str, fingerprint_str: str) -> None: 44 | """Test the creation of a browser profile with a custom profile name.""" 45 | 46 | browser_profile_storage = BrowserProfileStorage(fingerprint_key=fingerprint_key) 47 | assert browser_profile_storage.count() == 0 48 | 49 | # Create a new profile with a custom name 50 | browser_profile = browser_profile_storage.new(profile_name="cool_profile") 51 | 52 | # Ensure the profile directory has the correct name 53 | assert os.path.basename(browser_profile.profile_dir) == "cool_profile" 54 | assert browser_profile.profile_dir.is_dir() is True 55 | assert browser_profile.profile_dir.exists() is True 56 | assert browser_profile_storage.count() == 1 57 | 58 | # Ensure the fingerprint file exists inside the profile directory 59 | fingerprint_filename = browser_profile.profile_dir.joinpath(".pybas", "fingerprint_raw.json") 60 | assert fingerprint_filename.exists() is True 61 | 62 | def test_load(self, fingerprint_str: str) -> None: 63 | """Test loading profiles from storage.""" 64 | 65 | browser_profile_storage = BrowserProfileStorage() 66 | assert browser_profile_storage.count() == 0 67 | # Create multiple browser profiles 68 | for num in range(0, 10): 69 | browser_profile = browser_profile_storage.new( 70 | fingerprint_raw=fingerprint_str, profile_name=f"cool_profile_{num}" 71 | ) 72 | assert browser_profile.profile_dir.is_dir() is True 73 | assert browser_profile_storage.count() == 1 + num 74 | 75 | # Load the profiles from a new storage instance 76 | new_browser_profile_storage = BrowserProfileStorage() 77 | 78 | assert new_browser_profile_storage.count() == 10 79 | profiles = new_browser_profile_storage.load_all() 80 | 81 | assert len(profiles) == 10 82 | 83 | def test_serialize_deserialize(self, fingerprint_key: str, fingerprint_str: str) -> None: 84 | """Test serialization and deserialization of a browser profile.""" 85 | 86 | browser_profile_storage = BrowserProfileStorage() 87 | 88 | browser_profile = browser_profile_storage.new(fingerprint_raw=fingerprint_str, profile_name="cool_profile_1") 89 | 90 | # Serialize the profile object 91 | serialized = json.dumps(browser_profile.model_dump(mode="json")) 92 | assert serialized is not None 93 | 94 | # Deserialize the serialized data back into a profile object 95 | deserialized = BrowserProfile(**json.loads(serialized)) 96 | assert deserialized is not None 97 | -------------------------------------------------------------------------------- /tests/functional/cmd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/cmd/__init__.py -------------------------------------------------------------------------------- /tests/functional/cmd/cassettes/TestCmdInitial.test_main.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f61ade32dd0094caa30ba15250a70cbcacebf17c2d08ae71d18796eba9394e25 3 | size 8307535 4 | -------------------------------------------------------------------------------- /tests/functional/cmd/cassettes/TestCmdInitial.test_main_proxy.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b0acc72e6c7843f9506674ae16f7c91971aa6f04163280e00fa19a83e4c0a99d 3 | size 8214458 4 | -------------------------------------------------------------------------------- /tests/functional/cmd/cassettes/TestCmdWorker.test_main.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5fdc289341a7141f2c05d03508ac8e264a3a5d2885dc3c19b005e2f7fa75dc3f 3 | size 5273089 4 | -------------------------------------------------------------------------------- /tests/functional/cmd/cassettes/TestCmdWorker.test_main_proxy.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a50934d7b97374de86c07194981a4862d44a261cfcb18e422d50d9d3aacd8704 3 | size 8201564 4 | -------------------------------------------------------------------------------- /tests/functional/cmd/test_cmd_initial.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Dict, Generator 4 | 5 | import pytest 6 | from click.testing import CliRunner 7 | 8 | import cmd_initial 9 | 10 | 11 | @pytest.mark.vcr() 12 | class TestCmdInitial: 13 | @pytest.fixture() 14 | def runner(self) -> Generator[CliRunner, None, None]: 15 | """Fixture to provide a CliRunner instance for invoking command-line applications.""" 16 | yield CliRunner() 17 | 18 | def test_main(self, runner: CliRunner, fingerprint_key: str) -> None: 19 | """Test the main function of cmd_initial with basic arguments.""" 20 | 21 | # Invoke the main function with fingerprint key and a task limit 22 | result = runner.invoke(cmd_initial.main, ["--bas_fingerprint_key", f"{fingerprint_key}", "--limit_tasks", "1"]) 23 | 24 | # Ensure the command executed successfully 25 | assert result.exit_code == 0 26 | assert result.exception is None 27 | 28 | # Parse the output as JSON and check if tasks_file is present 29 | result_json = json.loads(result.output.strip()) 30 | tasks_file = result_json.get("tasks_file", None) 31 | assert tasks_file is not None 32 | 33 | # Ensure the tasks file was created on the filesystem 34 | assert os.path.exists(tasks_file) is True 35 | 36 | def test_main_proxy(self, runner: CliRunner, fingerprint_key: str, brightdata_credentials: Dict[str, str]) -> None: 37 | """Test the main function of cmd_initial with proxy arguments.""" 38 | 39 | # Extract proxy credentials from the provided dictionary 40 | proxy_username, proxy_password = brightdata_credentials["username"], brightdata_credentials["password"] 41 | assert proxy_username is not None 42 | assert proxy_password is not None 43 | 44 | # Invoke the main function with fingerprint key, task limit and proxy arguments 45 | result = runner.invoke( 46 | cmd_initial.main, 47 | [ 48 | "--bas_fingerprint_key", 49 | f"{fingerprint_key}", 50 | "--limit_tasks", 51 | "1", 52 | "--proxy_provider", 53 | "brightdata", 54 | "--proxy_username", 55 | proxy_username, 56 | "--proxy_password", 57 | proxy_password, 58 | ], 59 | ) 60 | 61 | # Ensure the command executed successfully with proxy configuration 62 | assert result.exception is None 63 | assert result.exit_code == 0 64 | -------------------------------------------------------------------------------- /tests/functional/cmd/test_cmd_worker.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import json 3 | import os 4 | from typing import Generator 5 | 6 | import pytest 7 | from click.testing import CliRunner 8 | from playwright.async_api import BrowserContext 9 | from pydantic import DirectoryPath 10 | 11 | import cmd_initial 12 | import cmd_worker 13 | 14 | 15 | def create_tasks(runner: CliRunner, fingerprint_key: str) -> str: 16 | """Create tasks using the cmd_initial module and return the path to the tasks file.""" 17 | 18 | # Invoke the main function of cmd_initial to create tasks 19 | result = runner.invoke(cmd_initial.main, ["--bas_fingerprint_key", f"{fingerprint_key}", "--limit_tasks", "1"]) 20 | 21 | # Ensure the command executed successfully 22 | assert result.exit_code == 0 23 | assert result.exception is None 24 | 25 | # Parse the output as JSON and retrieve the tasks file path 26 | result_json = json.loads(result.output.strip()) 27 | tasks_file = result_json.get("tasks_file", None) 28 | 29 | # Verify the tasks file exists 30 | assert tasks_file is not None 31 | assert os.path.exists(tasks_file) is True 32 | 33 | return str(tasks_file) 34 | 35 | 36 | @pytest.mark.vcr() 37 | class TestCmdWorker: 38 | @pytest.fixture() 39 | def runner(self) -> Generator[CliRunner, None, None]: 40 | """Fixture to provide a CliRunner instance for invoking command-line applications.""" 41 | yield CliRunner() 42 | 43 | def test_main( 44 | self, runner: CliRunner, fingerprint_key: str, browser_data: tuple[BrowserContext, DirectoryPath, int] 45 | ) -> None: 46 | """Test the main function of cmd_worker module.""" 47 | 48 | # Extract the remote debugging port from the browser data tuple 49 | _, _, remote_debugging_port = browser_data 50 | 51 | # Create tasks and get the path to the tasks file 52 | task_file = create_tasks(runner, fingerprint_key) 53 | assert os.path.exists(task_file) is True 54 | 55 | # Load the tasks from the file and retrieve the task ID 56 | tasks_json = json.loads(codecs.open(task_file, "r", "utf-8").read()) 57 | task_id = tasks_json[0].get("task_id") 58 | assert task_id is not None 59 | 60 | # Invoke the main function of cmd_worker with the specified task ID and remote debugging port 61 | result = runner.invoke( 62 | cmd_worker.main, ["--remote_debugging_port", f"{remote_debugging_port}", "--task_id", task_id] 63 | ) 64 | 65 | # Ensure the command executed successfully 66 | assert result.exception is None 67 | assert result.exit_code == 0 68 | 69 | # Load the tasks from the file and retrieve the remote_debugging_port 70 | tasks_json = json.loads(codecs.open(task_file, "r", "utf-8").read()) 71 | task_remote_debugging_port = tasks_json[0].get("remote_debugging_port") 72 | 73 | assert task_remote_debugging_port is not None 74 | assert task_remote_debugging_port == remote_debugging_port 75 | -------------------------------------------------------------------------------- /tests/functional/conftest.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import shutil 4 | import tempfile 5 | from typing import AsyncGenerator, Callable, Dict, Generator 6 | from urllib.parse import parse_qs, urlencode, urlparse 7 | from zipfile import ZipFile 8 | 9 | import nest_asyncio # type: ignore 10 | import pytest 11 | import pytest_asyncio 12 | from playwright.async_api import async_playwright 13 | from playwright.sync_api import BrowserContext 14 | from pydantic import DirectoryPath 15 | 16 | from tests import FIXTURES_DIR, _find_free_port 17 | 18 | # Patch asyncio to support nested asynchronous event loops. 19 | nest_asyncio.apply() 20 | 21 | 22 | @pytest.fixture(scope="session") 23 | def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: 24 | loop = asyncio.get_event_loop() 25 | yield loop 26 | loop.close() 27 | 28 | 29 | @pytest.fixture(scope="module") 30 | def fingerprint_str() -> str: 31 | """ 32 | Retrieve fingerprint data from a zipped fixture. 33 | 34 | :return: Contents of fingerprint_raw.json from fingerprint_raw.zip. 35 | """ 36 | 37 | fingerprint_zip_path = os.path.join(FIXTURES_DIR, "fingerprint_raw.zip") 38 | assert os.path.exists(fingerprint_zip_path) 39 | 40 | with ZipFile(fingerprint_zip_path) as zf: 41 | for file in zf.namelist(): 42 | if file == "fingerprint_raw.json": 43 | with zf.open(file) as f: 44 | return f.read().decode("utf-8") 45 | 46 | raise FileNotFoundError("fingerprint_raw.json not found in the zip file.") 47 | 48 | 49 | @pytest.fixture(scope="session") 50 | def vcr_config() -> Dict[str, Callable]: 51 | """ 52 | Configure VCR to selectively record and replay HTTP requests for tests. 53 | 54 | :return: VCR configuration dictionary. 55 | """ 56 | 57 | def before_record_response(response): # type: ignore 58 | return response if response["status_code"] == 200 else None 59 | 60 | def before_record_request(request): # type: ignore 61 | if urlparse(request.uri).hostname in ["localhost", "127.0.0.1"]: 62 | return None # Skip localhost requests 63 | 64 | # Mask sensitive query params (like API keys) for recording. 65 | parsed = urlparse(request.uri) 66 | parsed_qs = parse_qs(parsed.query) 67 | if "key" in parsed_qs: 68 | parsed_qs["key"] = ["dummy_key"] 69 | request.uri = parsed._replace(query=urlencode(parsed_qs, doseq=True)).geturl() 70 | 71 | return request 72 | 73 | return {"before_record_response": before_record_response, "before_record_request": before_record_request} 74 | 75 | 76 | @pytest.fixture(scope="session") 77 | def browser_profile_folder_path() -> Generator[DirectoryPath, None, None]: 78 | """ 79 | Provide a temporary directory for the browser profile. 80 | 81 | :return: Path to the temporary directory. 82 | """ 83 | 84 | temp_dir = DirectoryPath(tempfile.mkdtemp()) 85 | yield temp_dir 86 | shutil.rmtree(temp_dir, ignore_errors=True) 87 | 88 | 89 | @pytest.fixture(scope="session") 90 | def free_port() -> int: 91 | """ 92 | Fetch an available port on the host machine. 93 | 94 | :return: Free port number. 95 | """ 96 | 97 | return _find_free_port() 98 | 99 | 100 | @pytest_asyncio.fixture(scope="session") 101 | async def browser_data() -> AsyncGenerator[tuple[BrowserContext, DirectoryPath, int], None]: 102 | """ 103 | Initialize a browser with a given user data directory and remote debugging port. 104 | 105 | :return: Tuple containing the BrowserContext, user data directory path, and remote debugging port. 106 | """ 107 | 108 | user_data_dir = DirectoryPath(tempfile.mkdtemp()) 109 | port = _find_free_port() 110 | debug_tests = os.environ.get("DEBUG_TESTS", "false").lower() == "true" 111 | 112 | async with async_playwright() as playwright: 113 | _ = await playwright.chromium.launch_persistent_context( 114 | user_data_dir=user_data_dir, 115 | headless=not debug_tests, 116 | args=[f"--remote-debugging-port={port}"], 117 | ) 118 | 119 | cdp_browser = await playwright.chromium.connect_over_cdp(f"http://localhost:{port}") 120 | yield cdp_browser, user_data_dir, port # type: ignore 121 | 122 | shutil.rmtree(user_data_dir, ignore_errors=True) 123 | 124 | 125 | @pytest.fixture(scope="module") 126 | def brightdata_credentials() -> Dict[str, str]: 127 | """ 128 | Fetch Bright Data service credentials from environment variables. 129 | 130 | :return: Dictionary containing username and password. 131 | """ 132 | 133 | username = os.environ.get("BRIGHTDATA_USERNAME") 134 | password = os.environ.get("BRIGHTDATA_PASSWORD") 135 | if not username or not password: 136 | raise ValueError("Both BRIGHTDATA_USERNAME and BRIGHTDATA_PASSWORD must be set.") 137 | 138 | return {"username": username, "password": password} 139 | -------------------------------------------------------------------------------- /tests/functional/fingerprint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/fingerprint/__init__.py -------------------------------------------------------------------------------- /tests/functional/fingerprint/cassettes/TestFingerprint.test_get_fingerprint.yaml: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:71510b2a923a9825d1c1131de615df09e92df2ed2f029424ba495a29398886a8 3 | size 7240928 4 | -------------------------------------------------------------------------------- /tests/functional/fingerprint/test_fingerprint.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from pybas_automation.fingerprint import get_fingerprint 6 | from pybas_automation.fingerprint.models import BasFingerprintRequest 7 | 8 | 9 | @pytest.mark.vcr() 10 | class TestFingerprint: 11 | def test_get_fingerprint(self, fingerprint_key: str) -> None: 12 | """Test the functionality of the `get_fingerprint` method.""" 13 | 14 | # Ensure the provided fingerprint_key is not None or empty 15 | assert fingerprint_key not in [None, ""] 16 | 17 | # Prepare the request data using the given fingerprint_key 18 | request_data = BasFingerprintRequest(key=fingerprint_key) 19 | 20 | # Fetch the raw fingerprint using the `get_fingerprint` method 21 | fingerprint_raw = get_fingerprint(request_data) 22 | 23 | # Load the raw fingerprint as a JSON object 24 | fingerprint_json = json.loads(fingerprint_raw) 25 | 26 | # Validate that the fingerprint data is marked as valid 27 | assert fingerprint_json.get("valid") is True 28 | 29 | # Further assertions to verify the structure and content of the fetched fingerprint 30 | assert type(fingerprint_json.get("width", None)) is int # Ensure width is an integer value 31 | assert type(fingerprint_json.get("height", None)) is int # Ensure height is an integer value 32 | assert type(fingerprint_json.get("perfectcanvas", None)) is dict # Ensure perfectcanvas is a dictionary 33 | -------------------------------------------------------------------------------- /tests/functional/proxy_providers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/proxy_providers/__init__.py -------------------------------------------------------------------------------- /tests/functional/proxy_providers/brightdata/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/proxy_providers/brightdata/__init__.py -------------------------------------------------------------------------------- /tests/functional/proxy_providers/brightdata/test_brightdata.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from pybas_automation.proxy_providers.brightdata import BrightdataCredentialsModel, BrightDataProxyModel 4 | 5 | 6 | class TestBrightdata: 7 | def test_basic(self, brightdata_credentials: Dict[str, str]) -> None: 8 | """Test the basic functionality of the Brightdata proxy provider.""" 9 | 10 | # Extract the username and password from the provided credentials dictionary 11 | username, password = brightdata_credentials["username"], brightdata_credentials["password"] 12 | 13 | # Ensure the username and password are both not None 14 | assert username is not None 15 | assert password is not None 16 | 17 | # Create a BrightdataCredentialsModel object using the extracted credentials 18 | credentials = BrightdataCredentialsModel(username=username, password=password) 19 | 20 | # Initialize a BrightDataProxyModel object using the credentials 21 | proxy = BrightDataProxyModel(credentials=credentials) 22 | 23 | # Convert the proxy object to a bas_proxy format with keep_session set to False 24 | bas_proxy_1 = proxy.to_bas_proxy(keep_session=False) 25 | 26 | # Assert the proxy conversion is successful, and it does not contain a session in its login 27 | assert bas_proxy_1 is not None 28 | assert "-session-" not in bas_proxy_1.login # type: ignore 29 | -------------------------------------------------------------------------------- /tests/functional/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergerdn/py-bas-automation/de2afa685c4578383b600f6748448c26dd67c8cc/tests/functional/task/__init__.py -------------------------------------------------------------------------------- /tests/functional/task/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | from typing import Generator 5 | 6 | import pytest 7 | from pydantic import DirectoryPath 8 | 9 | 10 | @pytest.fixture(scope="function") 11 | def storage_dir() -> Generator[DirectoryPath, None, None]: 12 | # Create a temporary directory with a specific prefix 13 | dir_name = DirectoryPath(tempfile.mkdtemp(prefix="pybas-storage_dir_test_")) 14 | 15 | # Make sure the directory exists 16 | if not os.path.exists(dir_name): 17 | os.mkdir(dir_name) 18 | 19 | # Yield the temporary directory for use in the tests 20 | yield DirectoryPath(dir_name) 21 | 22 | # Cleanup: Remove the temporary directory after tests are done 23 | shutil.rmtree(dir_name, ignore_errors=True) 24 | 25 | 26 | @pytest.fixture(scope="function") 27 | def profiles_dir() -> Generator[DirectoryPath, None, None]: 28 | # Create a temporary directory with a specific prefix 29 | dir_name = DirectoryPath(tempfile.mkdtemp(prefix="pybas-profiles_dir_test_")) 30 | 31 | # Make sure the directory exists 32 | if not os.path.exists(dir_name): 33 | os.mkdir(dir_name) 34 | 35 | # Yield the temporary directory for use in the tests 36 | yield DirectoryPath(dir_name) 37 | 38 | # Cleanup: Remove the temporary directory after tests are done 39 | shutil.rmtree(dir_name, ignore_errors=True) 40 | -------------------------------------------------------------------------------- /tests/functional/task/test_storage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from typing import List 4 | from uuid import UUID, uuid4 5 | 6 | import pytest 7 | from _pytest.monkeypatch import MonkeyPatch 8 | from pydantic import DirectoryPath, FilePath 9 | 10 | from pybas_automation.bas_actions.browser.proxy import BasActionBrowserProxy, BasActionBrowserProxyTypeEnum 11 | from pybas_automation.browser_profile import BrowserProfileStorage 12 | from pybas_automation.browser_profile.models import BrowserProfile 13 | from pybas_automation.task import BasTask, TaskDuplicateError, TaskStorage, TaskStorageModeEnum 14 | 15 | 16 | def create_task(profiles_dir: DirectoryPath, fingerprint_str: str, with_proxy: bool = False) -> BasTask: 17 | """Creates a temporary directory for a browser profile""" 18 | 19 | one_profile_dir = DirectoryPath(tempfile.mkdtemp(prefix="profile_", dir=profiles_dir)) 20 | browser_profile = BrowserProfile(profile_dir=one_profile_dir) 21 | 22 | task = BasTask() 23 | 24 | # Set the fingerprint for the browser profile 25 | browser_profile.fingerprint_raw = fingerprint_str 26 | browser_profile_storage = BrowserProfileStorage() 27 | if with_proxy: 28 | proxy = BasActionBrowserProxy( 29 | server="127.0.0.1", 30 | port=9999, 31 | type=BasActionBrowserProxyTypeEnum.HTTP, 32 | login="user", 33 | password="pass", 34 | ) 35 | browser_profile.proxy = proxy 36 | 37 | # Save the browser profile 38 | browser_profile_storage.save(browser_profile=browser_profile) 39 | task.browser_settings.profile.profile_folder_path = browser_profile.profile_dir 40 | task.browser_settings.proxy = browser_profile.proxy 41 | 42 | return task 43 | 44 | 45 | class TestTaskStorage: 46 | def test_fail_storage_dir(self) -> None: 47 | """ 48 | # Test if initializing TaskStorage with an invalid directory raises a ValueError 49 | """ 50 | storage_dir = DirectoryPath("some_dir") 51 | with pytest.raises(ValueError): 52 | TaskStorage(storage_dir=storage_dir) 53 | 54 | monkeypatch = MonkeyPatch() 55 | monkeypatch.setenv("LOCALAPPDATA", "some_dir") 56 | 57 | try: 58 | with pytest.raises(ValueError): 59 | TaskStorage() 60 | finally: 61 | monkeypatch.undo() 62 | 63 | def test_custom_task_filename(self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath) -> None: 64 | """ 65 | Test if using a custom task filename works as expected. 66 | """ 67 | task_storage = TaskStorage(task_filename=FilePath("custom_tasks.json")) 68 | assert task_storage.get_all() is None 69 | 70 | def test_failed_custom_task_filename(self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath) -> None: 71 | """ 72 | Test if initializing TaskStorage with an invalid custom filename raises a ValueError. 73 | """ 74 | with pytest.raises(ValueError): 75 | TaskStorage(task_filename=FilePath(os.path.join("some_dir", "custom_tasks.json"))) 76 | 77 | def tests_default_storage_dir(self, storage_dir: DirectoryPath) -> None: 78 | """ 79 | Test if initializing TaskStorage with the default storage directory works as expected. 80 | """ 81 | 82 | os.environ["LOCALAPPDATA"] = str(storage_dir) 83 | task_storage = TaskStorage() 84 | assert task_storage.get_all() is None 85 | 86 | def test_read_mode(self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath, fingerprint_str: str) -> None: 87 | """ 88 | Test the read-only mode of the TaskStorage. 89 | 90 | This test ensures that the read-only mode of TaskStorage prohibits any write operations 91 | while still allowing for the tasks to be loaded and read. 92 | """ 93 | # Initialize TaskStorage in read-only mode and read-write mode 94 | task_storage_read_mode = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ) 95 | task_storage_write_mode = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ_WRITE) 96 | 97 | # Assert that initially, the read mode storage is empty 98 | assert task_storage_read_mode.get_all() is None 99 | task_storage_read_mode.load_all() 100 | 101 | # Test that clear operation raises a ValueError in read mode 102 | with pytest.raises(ValueError): 103 | assert task_storage_read_mode.clear() 104 | 105 | # Create a new task for testing 106 | task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str) 107 | assert task is not None 108 | 109 | # Test that save operation raises a ValueError in read mode 110 | with pytest.raises(ValueError): 111 | task_storage_read_mode.save(task=task) 112 | 113 | # Save the task in write mode and then load it in read mode 114 | task_storage_write_mode.save(task=task) 115 | task_storage_read_mode.load_all() 116 | 117 | # Check that the read mode storage now contains the task 118 | assert task_storage_read_mode.get_all() is not None 119 | assert task_storage_read_mode.count() == 1 120 | 121 | # Test again that clear operation raises a ValueError in read mode 122 | with pytest.raises(ValueError): 123 | assert task_storage_read_mode.clear() 124 | 125 | def test_saved(self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath, fingerprint_str: str) -> None: 126 | """ 127 | Test that tasks are saved and can be retrieved from the storage. 128 | 129 | This test checks the ability to save a task in write mode and then retrieve it in read mode. 130 | Additionally, it tests that attempting to save in read-only mode raises an error. 131 | """ 132 | 133 | # Initialize TaskStorage in write mode 134 | task_storage_write = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ_WRITE) 135 | 136 | # Create a new task and save it in write mode 137 | task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str) 138 | assert task is not None 139 | task_storage_write.save(task=task) 140 | 141 | # Initialize TaskStorage in read mode and load tasks 142 | task_storage_read = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ) 143 | assert task_storage_read.load_all() is True 144 | 145 | # Ensure the tasks are loaded and is of the correct type 146 | assert task_storage_read.get_all() is not None 147 | assert isinstance(task_storage_read.get_all(), List) 148 | 149 | # Check if the task is present in the storage 150 | tasks = task_storage_read.get_all() 151 | assert task in tasks # type: ignore 152 | assert task_storage_read.count() == 1 153 | 154 | # Create another new task for testing 155 | new_task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str) 156 | assert new_task is not None 157 | 158 | # Test that save operation raises a ValueError in read mode and that the count remains unchanged 159 | with pytest.raises(ValueError): 160 | task_storage_read.save(task=new_task) 161 | assert task_storage_read.count() == 1 162 | 163 | def test_basic(self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath, fingerprint_str: str) -> None: 164 | """ 165 | Test basic functionality of TaskStorage. 166 | 167 | This test checks: 168 | - Initialization of an empty storage 169 | - Basic storage operations such as saving and loading of tasks 170 | - The correct handling of duplicate tasks 171 | """ 172 | 173 | # Initialize TaskStorage in read-write mode and ensure it's empty 174 | task_storage = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ_WRITE) 175 | assert task_storage.get_all() is None 176 | assert task_storage.count() == 0 177 | 178 | # Verify that saving an empty list of tasks raises a ValueError 179 | with pytest.raises(ValueError): 180 | task_storage.save_all() 181 | 182 | # Confirm that clearing and loading an empty storage returns False 183 | assert task_storage.clear() is False # no tasks to clear 184 | assert task_storage.load_all() is False # no tasks to load 185 | 186 | # Create and save multiple tasks 187 | tasks_id = [] 188 | for _ in range(0, 10): 189 | task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str) 190 | assert task is not None 191 | assert isinstance(task, BasTask) 192 | task_storage.save(task=task) 193 | tasks_id.append(str(task.task_id)) 194 | 195 | # Ensure all tasks are saved successfully 196 | assert task_storage.save_all() is True 197 | 198 | # Validate that each saved task can be retrieved 199 | for one in tasks_id: 200 | exists_task = task_storage.get(task_id=UUID(one)) 201 | assert exists_task is not None 202 | assert isinstance(exists_task, BasTask) 203 | 204 | # Confirm tasks are loaded and storage is in expected state 205 | assert task_storage.load_all() is True 206 | assert task_storage.get_all() is not None 207 | assert isinstance(task_storage.get_all(), List) 208 | assert len(task_storage.get_all()) == 10 # type: ignore 209 | assert task_storage.count() == 10 210 | assert task_storage.get(task_id=uuid4()) is None # Ensure a random ID doesn't return a task 211 | 212 | # Validate that clearing the storage works and the count is zero 213 | assert task_storage.clear() is True 214 | assert task_storage.count() == 0 215 | assert task_storage.load_all() is False # No tasks to load after clearing 216 | assert task_storage.get(task_id=uuid4()) is None # No tasks to retrieve after clearing 217 | 218 | # Verify handling of duplicate tasks 219 | task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str) 220 | task_storage.save(task=task) 221 | with pytest.raises(TaskDuplicateError): 222 | task_storage.save(task=task) 223 | 224 | # Representation of task storage (for debug purposes) 225 | print(task_storage) # repr 226 | 227 | def test_proxy(self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath, fingerprint_str: str) -> None: 228 | """ 229 | Test the saving functionality of TaskStorage with a task that uses a proxy. 230 | 231 | This test checks: 232 | - Creation of a task with proxy settings 233 | - The proper presence of proxy settings in the created task 234 | - Saving the proxy-enabled task in storage 235 | """ 236 | 237 | # Initialize TaskStorage in read-write mode for saving tasks 238 | task_storage_write = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ_WRITE) 239 | 240 | # Create a task with proxy settings enabled 241 | task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str, with_proxy=True) 242 | 243 | # Validate the presence of proxy settings in the created task 244 | assert task.browser_settings.proxy is not None 245 | assert task is not None 246 | 247 | # Save the task with proxy settings to the storage 248 | task_storage_write.save(task=task) 249 | 250 | def test_update_remote_debugging_port( 251 | self, storage_dir: DirectoryPath, profiles_dir: DirectoryPath, fingerprint_str: str 252 | ) -> None: 253 | # Initialize TaskStorage in read-write mode for saving tasks 254 | task_storage_write = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ_WRITE) 255 | task = create_task(profiles_dir=profiles_dir, fingerprint_str=fingerprint_str) 256 | 257 | # Save the task with proxy settings to the storage 258 | task_storage_write.save(task=task) 259 | 260 | task.remote_debugging_port = 9022 261 | task_storage_write.update(task=task) 262 | 263 | # Initialize TaskStorage in read mode and load tasks 264 | task_storage_read = TaskStorage(storage_dir=storage_dir, mode=TaskStorageModeEnum.READ) 265 | assert task_storage_read.load_all() is True 266 | task_saved = task_storage_read.get(task_id=task.task_id) 267 | 268 | assert task_saved is not None 269 | assert task_saved.remote_debugging_port is not None 270 | assert task_saved.remote_debugging_port == 9022 271 | --------------------------------------------------------------------------------