├── .ci └── opensearch │ ├── Dockerfile.opensearch │ ├── docker-compose.yml │ └── opensearch.yml ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── add-untriaged.yml │ ├── backport.yml │ ├── changelog_verifier.yml │ ├── ci.yml │ ├── delete_backport_branch.yml │ ├── dependabot_pr.yml │ ├── integration-unreleased.yml │ ├── integration.yml │ ├── links.yml │ └── release-drafter.yml ├── .gitignore ├── .whitesource ├── ADMINS.md ├── AUTHORS ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── COMPATIBILITY.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── MAINTAINERS.md ├── MANIFEST.in ├── NOTICE.txt ├── OpenSearch.svg ├── README.md ├── RELEASING.md ├── SECURITY.md ├── USER_GUIDE.md ├── jenkins └── release.JenkinsFile ├── noxfile.py ├── opensearch_dsl ├── __init__.py ├── aggs.py ├── analysis.py ├── connections.py ├── document.py ├── exceptions.py ├── faceted_search.py ├── field.py ├── function.py ├── index.py ├── mapping.py ├── query.py ├── response │ ├── __init__.py │ ├── aggs.py │ └── hit.py ├── search.py ├── serializer.py ├── update_by_query.py ├── utils.py └── wrappers.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── test_aggs.py ├── test_analysis.py ├── test_connections.py ├── test_document.py ├── test_faceted_search.py ├── test_field.py ├── test_index.py ├── test_integration │ ├── __init__.py │ ├── test_analysis.py │ ├── test_count.py │ ├── test_data.py │ ├── test_document.py │ ├── test_faceted_search.py │ ├── test_index.py │ ├── test_mapping.py │ ├── test_search.py │ └── test_update_by_query.py ├── test_mapping.py ├── test_package.py ├── test_query.py ├── test_result.py ├── test_search.py ├── test_update_by_query.py ├── test_utils.py ├── test_validation.py └── test_wrappers.py └── utils ├── build-dists.py └── license-headers.py /.ci/opensearch/Dockerfile.opensearch: -------------------------------------------------------------------------------- 1 | ARG OPENSEARCH_VERSION 2 | FROM opensearchproject/opensearch:${OPENSEARCH_VERSION} 3 | 4 | COPY --chown=opensearch:opensearch opensearch.yml /usr/share/opensearch/config/ 5 | 6 | ARG opensearch_path=/usr/share/opensearch 7 | ARG opensearch_yml=$opensearch_path/config/opensearch.yml 8 | 9 | ARG SECURE_INTEGRATION 10 | RUN if [ "$SECURE_INTEGRATION" != "true" ] ; then /usr/share/opensearch/bin/opensearch-plugin remove opensearch-security; fi 11 | -------------------------------------------------------------------------------- /.ci/opensearch/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | opensearch: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile.opensearch 7 | args: 8 | - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} 9 | - OPENSEARCH_VERSION=${OPENSEARCH_VERSION:-latest} 10 | environment: 11 | - discovery.type=single-node 12 | - bootstrap.memory_lock=true 13 | ports: 14 | - "9200:9200" 15 | user: opensearch 16 | -------------------------------------------------------------------------------- /.ci/opensearch/opensearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: "docker-cluster" 2 | network.host: 0.0.0.0 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams 2 | * @VachaShah @dblock @harshavamsi @Yury-Fridlyand -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | updates: 2 | - directory: / 3 | open-pull-requests-limit: 3 4 | package-ecosystem: pip 5 | schedule: 6 | interval: weekly 7 | labels: 8 | - "dependabot" 9 | - "dependencies" 10 | version: 2 11 | -------------------------------------------------------------------------------- /.github/workflows/add-untriaged.yml: -------------------------------------------------------------------------------- 1 | name: Apply 'untriaged' label during issue lifecycle 2 | 3 | on: 4 | issues: 5 | types: [opened, reopened, transferred] 6 | 7 | jobs: 8 | apply-label: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/github-script@v6 12 | with: 13 | script: | 14 | github.rest.issues.addLabels({ 15 | issue_number: context.issue.number, 16 | owner: context.repo.owner, 17 | repo: context.repo.repo, 18 | labels: ['untriaged'] 19 | }) 20 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | name: Backport 2 | on: 3 | pull_request_target: 4 | types: 5 | - closed 6 | - labeled 7 | 8 | jobs: 9 | backport: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | name: Backport 15 | steps: 16 | - name: GitHub App token 17 | id: github_app_token 18 | uses: tibdex/github-app-token@v1.5.0 19 | with: 20 | app_id: ${{ secrets.APP_ID }} 21 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 22 | installation_id: 22958780 23 | 24 | - name: Backport 25 | uses: VachaShah/backport@v1.1.4 26 | with: 27 | github_token: ${{ steps.github_app_token.outputs.token }} 28 | branch_name: backport/backport-${{ github.event.number }} 29 | -------------------------------------------------------------------------------- /.github/workflows/changelog_verifier.yml: -------------------------------------------------------------------------------- 1 | name: "Changelog Verifier" 2 | on: 3 | pull_request: 4 | types: [opened, edited, review_requested, synchronize, reopened, ready_for_review, labeled, unlabeled] 5 | 6 | jobs: 7 | # Enforces the update of a changelog file on every pull request 8 | verify-changelog: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | with: 13 | token: ${{ secrets.GITHUB_TOKEN }} 14 | ref: ${{ github.event.pull_request.head.sha }} 15 | 16 | - uses: dangoslen/changelog-enforcer@v3 17 | with: 18 | skipLabels: "autocut, skip-changelog" 19 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 4 | on: [push, pull_request] 5 | 6 | jobs: 7 | package: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout Repository 11 | uses: actions/checkout@v3 12 | - name: Set up Python 3.8 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: 3.8 16 | - name: Install dependencies 17 | run: | 18 | python3.8 -m pip install setuptools wheel twine 19 | - name: Build packages 20 | run: | 21 | python3.8 utils/build-dists.py 22 | - name: Check packages 23 | run: | 24 | set -exo pipefail; 25 | if [ $(python3.8 -m twine check dist/* | grep -c 'warning') != 0 ]; then exit 1; fi 26 | 27 | lint: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: Checkout Repository 31 | uses: actions/checkout@v3 32 | - name: Set up Python 3.8 33 | uses: actions/setup-python@v4 34 | with: 35 | python-version: 3.8 36 | - name: Install dependencies 37 | run: | 38 | python3.8 -m pip install nox 39 | - name: Lint the code 40 | run: nox --no-error-on-missing-interpreter -s lint 41 | 42 | test-build-distribution: 43 | runs-on: ubuntu-latest 44 | steps: 45 | - name: Checkout Repository 46 | uses: actions/checkout@v3 47 | - name: Set up Python 3.7 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: 3.7 51 | - name: Install build tools 52 | run: | 53 | python3.7 -m pip install --upgrade build 54 | - name: Build project for distribution 55 | run: | 56 | python3.7 -m build 57 | 58 | test-linux: 59 | runs-on: ${{ matrix.runner }} 60 | 61 | strategy: 62 | fail-fast: false 63 | matrix: 64 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] 65 | runner: ['ubuntu-latest'] 66 | 67 | steps: 68 | - name: Checkout Repository 69 | uses: actions/checkout@v3 70 | - name: Setup Python - ${{ matrix.python-version }} 71 | uses: actions/setup-python@v4 72 | with: 73 | python-version: ${{ matrix.python-version }} 74 | env: 75 | PIP_NO_PYTHON_VERSION_WARNING: 1 76 | PIP_DISABLE_PIP_VERSION_CHECK: 1 77 | - name: Set up Python 3.8 for Nox 78 | if: matrix.python-version != '3.8' 79 | uses: actions/setup-python@v4 80 | with: 81 | python-version: 3.8 82 | - name: Install nox 83 | run: | 84 | python3.8 -m pip install nox 85 | - name: Run Tests 86 | run: | 87 | nox --no-error-on-missing-interpreter -rs test 88 | -------------------------------------------------------------------------------- /.github/workflows/delete_backport_branch.yml: -------------------------------------------------------------------------------- 1 | name: Delete merged branch of the backport PRs 2 | on: 3 | pull_request: 4 | types: 5 | - closed 6 | 7 | jobs: 8 | delete-branch: 9 | runs-on: ubuntu-latest 10 | if: startsWith(github.event.pull_request.head.ref,'backport/') 11 | steps: 12 | - name: Delete merged branch 13 | uses: SvanBoxel/delete-merged-branch@main 14 | env: 15 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/dependabot_pr.yml: -------------------------------------------------------------------------------- 1 | name: Dependabot PR actions 2 | on: pull_request 3 | 4 | jobs: 5 | dependabot: 6 | runs-on: ubuntu-latest 7 | permissions: 8 | pull-requests: write 9 | contents: write 10 | if: ${{ github.actor == 'dependabot[bot]' }} 11 | steps: 12 | - name: GitHub App token 13 | id: github_app_token 14 | uses: tibdex/github-app-token@v1.5.0 15 | with: 16 | app_id: ${{ secrets.APP_ID }} 17 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 18 | installation_id: 22958780 19 | 20 | - name: Check out code 21 | uses: actions/checkout@v2 22 | with: 23 | token: ${{ steps.github_app_token.outputs.token }} 24 | 25 | - name: Update the changelog 26 | uses: dangoslen/dependabot-changelog-helper@v1 27 | with: 28 | version: 'Unreleased' 29 | 30 | - name: Commit the changes 31 | uses: stefanzweifel/git-auto-commit-action@v4 32 | with: 33 | commit_message: "Update changelog" 34 | branch: ${{ github.head_ref }} 35 | commit_user_name: dependabot[bot] 36 | commit_user_email: support@github.com 37 | commit_options: '--signoff' -------------------------------------------------------------------------------- /.github/workflows/integration-unreleased.yml: -------------------------------------------------------------------------------- 1 | name: Integration with Unreleased OpenSearch 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "main" 8 | 9 | jobs: 10 | integ-test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | opensearch_ref: [ '1.x', '2.x', '2.0', 'main' ] 16 | python-version: [ '3.8' ] 17 | 18 | steps: 19 | - name: Checkout OpenSearch 20 | uses: actions/checkout@v3 21 | with: 22 | repository: opensearch-project/opensearch 23 | ref: ${{ matrix.opensearch_ref }} 24 | path: opensearch 25 | 26 | - name: Get OpenSearch branch top 27 | id: get-key 28 | working-directory: opensearch 29 | run: echo key=`git log -1 --format='%H'` >> $GITHUB_OUTPUT 30 | 31 | - name: Restore cached build 32 | id: cache-restore 33 | uses: actions/cache/restore@v3 34 | with: 35 | path: opensearch/distribution/archives/linux-tar/build/distributions 36 | key: ${{ steps.get-key.outputs.key }} 37 | 38 | - name: Assemble OpenSearch 39 | if: steps.cache-restore.outputs.cache-hit != 'true' 40 | working-directory: opensearch 41 | run: ./gradlew :distribution:archives:linux-tar:assemble 42 | 43 | - name: Save cached build 44 | if: steps.cache-restore.outputs.cache-hit != 'true' 45 | uses: actions/cache/save@v3 46 | with: 47 | path: opensearch/distribution/archives/linux-tar/build/distributions 48 | key: ${{ steps.get-key.outputs.key }} 49 | 50 | - name: Run OpenSearch 51 | working-directory: opensearch/distribution/archives/linux-tar/build/distributions 52 | run: | 53 | tar xf opensearch-min-* 54 | ./opensearch-*/bin/opensearch & 55 | for attempt in {1..20}; do sleep 5; if curl -s localhost:9200; then echo '=====> ready'; break; fi; echo '=====> waiting...'; done 56 | 57 | - name: Checkout High Level Python Client 58 | uses: actions/checkout@v3 59 | with: 60 | path: dsl-py 61 | 62 | - name: Setup Python - ${{ matrix.python-version }} 63 | uses: actions/setup-python@v4 64 | with: 65 | python-version: ${{ matrix.python-version }} 66 | env: 67 | PIP_NO_PYTHON_VERSION_WARNING: 1 68 | 69 | - name: Install nox 70 | run: | 71 | python -m pip install nox 72 | 73 | - name: Run integration tests 74 | working-directory: dsl-py 75 | run: | 76 | nox --no-error-on-missing-interpreter -rs test 77 | 78 | - name: Save server logs 79 | if: failure() 80 | uses: actions/upload-artifact@v3 81 | with: 82 | name: opensearch-logs-${{ matrix.opensearch_ref }}-python-${{ matrix.python-version }} 83 | path: | 84 | opensearch/distribution/archives/linux-tar/build/distributions/**/logs/* 85 | -------------------------------------------------------------------------------- /.github/workflows/integration.yml: -------------------------------------------------------------------------------- 1 | name: Integration tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | integration: 7 | name: Integ 8 | runs-on: ubuntu-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | version: [ '1.0.1', '1.1.0', '1.2.4', '1.3.7', '2.0.1', '2.1.0', '2.2.1', '2.3.0', '2.4.0', '2.5.0' ] 13 | secured: [ "true", "false" ] 14 | python-version: [ '3.8' ] 15 | 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v3 19 | 20 | - name: Clean docker containers 21 | run: | 22 | docker volume prune --force 23 | docker network prune --force 24 | docker system prune --volumes --force 25 | 26 | - name: Launch OpenSearch cluster 27 | run: | 28 | export OPENSEARCH_VERSION=${{ matrix.entry.version }} 29 | export SECURE_INTEGRATION=${{ matrix.secured }} 30 | docker-compose --project-directory .ci/opensearch build 31 | docker-compose --project-directory .ci/opensearch up -d 32 | 33 | - name: Setup Python - ${{ matrix.python-version }} 34 | uses: actions/setup-python@v4 35 | with: 36 | python-version: ${{ matrix.python-version }} 37 | env: 38 | PIP_DISABLE_PIP_VERSION_CHECK: 1 39 | PIP_NO_PYTHON_VERSION_WARNING: 1 40 | 41 | - name: Install nox 42 | run: | 43 | python -m pip install --upgrade pip nox 44 | 45 | - name: Integ OpenSearch secured=${{ matrix.secured }} 46 | run: | 47 | export SECURE_INTEGRATION=${{ matrix.secured }} 48 | nox --no-error-on-missing-interpreter -rs test 49 | -------------------------------------------------------------------------------- /.github/workflows/links.yml: -------------------------------------------------------------------------------- 1 | name: Link Checker 2 | on: 3 | push: 4 | branches: 5 | - "*" 6 | pull_request: 7 | branches: 8 | - "*" 9 | 10 | jobs: 11 | linkchecker: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: lychee Link Checker 18 | id: lychee 19 | uses: lycheeverse/lychee-action@v1.0.8 20 | with: 21 | args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "https://localhost:9200" --exclude-mail 22 | env: 23 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 24 | - name: Fail if there were link errors 25 | run: exit ${{ steps.lychee.outputs.exit_code }} -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release drafter 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | lint: 10 | runs-on: ubuntu-20.04 11 | steps: 12 | - name: Checkout Repository 13 | uses: actions/checkout@v3 14 | - id: get_approvers 15 | run: | 16 | echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT 17 | - uses: trstringer/manual-approval@v1 18 | with: 19 | secret: ${{ github.TOKEN }} 20 | approvers: ${{ steps.get_approvers.outputs.approvers }} 21 | minimum-approvals: 2 22 | issue-title: 'Release opensearch-dsl-py' 23 | issue-body: "Please approve or deny the release of opensearch-dsl-py. **Tag**: ${{ github.ref_name }} **Commit**: ${{ github.sha }}" 24 | exclude-workflow-initiator-as-approver: true 25 | - name: Set up Python 3 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install build tools 30 | run: | 31 | python -m pip install --upgrade build 32 | - name: Build project for distribution 33 | run: | 34 | python -m build 35 | tar -zvcf artifacts.tar.gz dist 36 | - name: Draft a release 37 | uses: softprops/action-gh-release@v1 38 | with: 39 | draft: true 40 | generate_release_notes: true 41 | files: | 42 | artifacts.tar.gz 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *~ 3 | *.py[co] 4 | .coverage 5 | *.egg-info 6 | dist 7 | build 8 | *.egg 9 | coverage.xml 10 | junit.xml 11 | docs/_build 12 | .cache 13 | venv 14 | .idea 15 | .pytest_cache 16 | .DS_Store -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "scanSettings": { 3 | "configMode": "AUTO", 4 | "configExternalURL": "", 5 | "projectToken": "", 6 | "baseBranches": [] 7 | }, 8 | "checkRunSettings": { 9 | "vulnerableCheckRunConclusionLevel": "failure", 10 | "displayMode": "diff" 11 | }, 12 | "issueSettings": { 13 | "minSeverityLevel": "LOW", 14 | "issueType": "DEPENDENCY" 15 | }, 16 | "remediateSettings": { 17 | "workflowRules": { 18 | "enabled": true 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /ADMINS.md: -------------------------------------------------------------------------------- 1 | - [Current Admins](#current-admins) 2 | - [Admin Responsibilities](#admin-responsibilities) 3 | 4 | ## Current Admins 5 | 6 | | Admin | GitHub ID | Affiliation | 7 | | ----------------------- | ------------------------------------------- | ----------- | 8 | | Charlotte Henkle | [CEHENKLE](https://github.com/CEHENKLE) | Amazon | 9 | | Henri Yandell | [hyandell](https://github.com/hyandell) | Amazon | 10 | 11 | 12 | ## Admin Responsibilities 13 | 14 | [This document](https://github.com/opensearch-project/.github/blob/main/ADMINS.md#admin-responsibilities) explains what admins do in this repo, and how they should be doing it. 15 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | For a list of all our amazing authors please see the contributors page: 2 | https://github.com/opensearch-project/opensearch-dsl-py/graphs/contributors 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) 3 | 4 | ## [Unreleased] 5 | ### Added 6 | - Github workflow for changelog verification ([#81](https://github.com/opensearch-project/opensearch-dsl-py/pull/81)) 7 | - Add AttrDict .get(...) method ([#90](https://github.com/opensearch-project/opensearch-dsl-py/pull/90)) 8 | - Add release workflows ([#84](https://github.com/opensearch-project/opensearch-dsl-py/pull/84)) 9 | - Update README regarding deprecation ([#100](https://github.com/opensearch-project/opensearch-dsl-py/pull/100)) 10 | - Update README regarding archiving opensearch-dsl-py ([#113](https://github.com/opensearch-project/opensearch-dsl-py/pull/113)) 11 | 12 | ### Changed 13 | - Update maintainers and workflows for dependabot changelog ([#82](https://github.com/opensearch-project/opensearch-dsl-py/pull/82)) 14 | - Update jenkins file to use updated docker image ([#108](https://github.com/opensearch-project/opensearch-dsl-py/pull/108)) 15 | 16 | ### Deprecated 17 | 18 | ### Removed 19 | 20 | ### Fixed 21 | - Update CI workflow python versions ([#92](https://github.com/opensearch-project/opensearch-dsl-py/pull/92)) 22 | - Fix security vulnerability from opensearch-py ([#104](https://github.com/opensearch-project/opensearch-dsl-py/pull/104)) 23 | - Remove unsupported versions from CI. ([#105](https://github.com/opensearch-project/opensearch-dsl-py/pull/105)) 24 | 25 | ### Security 26 | 27 | 28 | [Unreleased]: https://github.com/opensearch-project/opensearch-dsl-py/compare/2.0...HEAD -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | This code of conduct applies to all spaces provided by the OpenSource project including in code, documentation, issue trackers, mailing lists, chat channels, wikis, blogs, social media and any other communication channels used by the project. 3 | 4 | 5 | **Our open source communities endeavor to:** 6 | 7 | * Be Inclusive: We are committed to being a community where everyone can join and contribute. This means using inclusive and welcoming language. 8 | * Be Welcoming: We are committed to maintaining a safe space for everyone to be able to contribute. 9 | * Be Respectful: We are committed to encouraging differing viewpoints, accepting constructive criticism and work collaboratively towards decisions that help the project grow. Disrespectful and unacceptable behavior will not be tolerated. 10 | * Be Collaborative: We are committed to supporting what is best for our community and users. When we build anything for the benefit of the project, we should document the work we do and communicate to others on how this affects their work. 11 | 12 | 13 | **Our Responsibility. As contributors, members, or bystanders we each individually have the responsibility to behave professionally and respectfully at all times. Disrespectful and unacceptable behaviors include, but are not limited to:** 14 | 15 | * The use of violent threats, abusive, discriminatory, or derogatory language; 16 | * Offensive comments related to gender, gender identity and expression, sexual orientation, disability, mental illness, race, political or religious affiliation; 17 | * Posting of sexually explicit or violent content; 18 | * The use of sexualized language and unwelcome sexual attention or advances; 19 | * Public or private harassment of any kind; 20 | * Publishing private information, such as physical or electronic address, without permission; 21 | * Other conduct which could reasonably be considered inappropriate in a professional setting; 22 | * Advocating for or encouraging any of the above behaviors. 23 | * Enforcement and Reporting Code of Conduct Issues: 24 | 25 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported. [Contact us](mailto:opensource-codeofconduct@amazon.com). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. 26 | -------------------------------------------------------------------------------- /COMPATIBILITY.md: -------------------------------------------------------------------------------- 1 | - [Compatibility with OpenSearch](#compatibility-with-opensearch) 2 | - [Upgrading](#upgrading) 3 | 4 | ## Compatibility with OpenSearch 5 | 6 | The below matrix shows the compatibility of the [`opensearch-dsl-py-client`](https://pypi.org/project/opensearch-dsl/) with versions of [`OpenSearch`](https://opensearch.org/downloads.html#opensearch). 7 | 8 | | OpenSearch Version | Client Version | 9 | | --- | --- | 10 | | 1.0.0 | 1.0.0 | 11 | | 1.0.1 | 1.0.0 | 12 | | 1.1.0 | 1.0.0 | 13 | | 1.2.0 | 1.0.0 | 14 | | 1.2.1 | 1.0.0 | 15 | | 1.2.2 | 1.0.0 | 16 | | 1.2.3 | 1.0.0 | 17 | | 1.2.4 | 1.0.0 | 18 | | 1.3.0 | 1.0.0 | 19 | | 1.3.1 | 1.0.0 | 20 | | 1.3.2 | 1.0.0 | 21 | | 1.3.3 | 1.0.0 | 22 | | 2.0.0 | 2.0.1 | 23 | | 2.0.1 | 2.0.1 | 24 | 25 | ## Upgrading 26 | 27 | Major versions of OpenSearch introduce breaking changes that require careful upgrades of the client. While `opensearch-dsl-py-client` 2.0.1 works against the latest OpenSearch 1.x, certain deprecated features removed in OpenSearch 2.0 have also been removed from the client. Please refer to the [OpenSearch documentation](https://opensearch.org/docs/latest/clients/index/) for more information. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | - [Contributing to OpenSearch](#contributing-to-opensearch) 2 | - [First Things First](#first-things-first) 3 | - [Ways to Contribute](#ways-to-contribute) 4 | - [Bug Reports](#bug-reports) 5 | - [Feature Requests](#feature-requests) 6 | - [Contributing Code](#contributing-code) 7 | - [Developer Certificate of Origin](#developer-certificate-of-origin) 8 | - [Changelog](#changelog) 9 | - [Adding Changes](#adding-changes) 10 | - [Review Process](#review-process) 11 | 12 | ## Contributing to OpenSearch 13 | 14 | OpenSearch is a community project that is built and maintained by people just like **you**. We're glad you're interested in helping out. There are several different ways you can do it, but before we talk about that, let's talk about how to get started. 15 | 16 | ## First Things First 17 | 18 | 1. **When in doubt, open an issue** - For almost any type of contribution, the first step is opening an issue. Even if you think you already know what the solution is, writing down a description of the problem you're trying to solve will help everyone get context when they review your pull request. If it's truly a trivial change (e.g. spelling error), you can skip this step -- but as the subject says, when it doubt, [open an issue](https://github.com/opensearch-project/opensearch-dsl-py/issues/new/choose). 19 | 20 | 2. **Only submit your own work** (or work you have sufficient rights to submit) - Please make sure that any code or documentation you submit is your work or you have the rights to submit. We respect the intellectual property rights of others, and as part of contributing, we'll ask you to sign your contribution with a "Developer Certificate of Origin" (DCO) that states you have the rights to submit this work and you understand we'll use your contribution. There's more information about this topic in the [DCO section](#developer-certificate-of-origin). 21 | 22 | ## Ways to Contribute 23 | 24 | ### Bug Reports 25 | 26 | Ugh! Bugs! 27 | 28 | A bug is when software behaves in a way that you didn't expect and the developer didn't intend. To help us understand what's going on, we first want to make sure you're working from the latest version. 29 | 30 | Once you've confirmed that the bug still exists in the latest version, you'll want to check to make sure it's not something we already know about on the [open issues GitHub page](https://github.com/opensearch-project/opensearch-dsl-py/issues). 31 | 32 | If you've upgraded to the latest version and you can't find it in our open issues list, then you'll need to tell us how to reproduce it Provide as much information as you can. You may think that the problem lies with your query, when actually it depends on how your data is indexed. The easier it is for us to recreate your problem, the faster it is likely to be fixed. 33 | 34 | ### Feature Requests 35 | 36 | If you've thought of a way that OpenSearch could be better, we want to hear about it. We track feature requests using GitHub, so please feel free to open an issue which describes the feature you would like to see, why you need it, and how it should work. 37 | 38 | 39 | ### Contributing Code 40 | 41 | As with other types of contributions, the first step is to [open an issue on GitHub](https://github.com/opensearch-project/opensearch-dsl-py/issues/new/choose). Opening an issue before you make changes makes sure that someone else isn't already working on that particular problem. It also lets us all work together to find the right approach before you spend a bunch of time on a PR. So again, when in doubt, open an issue. 42 | 43 | ## Developer Certificate of Origin 44 | 45 | OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](LICENSE.txt). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. 46 | 47 | We respect intellectual property rights of others and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that. 48 | 49 | The DCO is a declaration attached to every contribution made by every developer. In the commit message of the contribution, the developer simply adds a `Signed-off-by` statement and thereby agrees to the DCO, which you can find below or at [DeveloperCertificate.org](http://developercertificate.org/). 50 | 51 | ``` 52 | Developer's Certificate of Origin 1.1 53 | 54 | By making a contribution to this project, I certify that: 55 | 56 | (a) The contribution was created in whole or in part by me and I 57 | have the right to submit it under the open source license 58 | indicated in the file; or 59 | 60 | (b) The contribution is based upon previous work that, to the 61 | best of my knowledge, is covered under an appropriate open 62 | source license and I have the right under that license to 63 | submit that work with modifications, whether created in whole 64 | or in part by me, under the same open source license (unless 65 | I am permitted to submit under a different license), as 66 | Indicated in the file; or 67 | 68 | (c) The contribution was provided directly to me by some other 69 | person who certified (a), (b) or (c) and I have not modified 70 | it. 71 | 72 | (d) I understand and agree that this project and the contribution 73 | are public and that a record of the contribution (including 74 | all personal information I submit with it, including my 75 | sign-off) is maintained indefinitely and may be redistributed 76 | consistent with this project or the open source license(s) 77 | involved. 78 | ``` 79 | 80 | We require that every contribution to OpenSearch is signed with a Developer Certificate of Origin. Additionally, please use your real name. We do not accept anonymous contributors nor those utilizing pseudonyms. 81 | 82 | Each commit must include a DCO which looks like this 83 | 84 | ``` 85 | Signed-off-by: Jane Smith 86 | ``` 87 | 88 | You may type this line on your own when writing your commit messages. However, if your user.name and user.email are set in your git configs, you can use `-s` or `– – signoff` to add the `Signed-off-by` line to the end of the commit message. 89 | 90 | ## Changelog 91 | 92 | OpenSearch-dsl-py maintains a version specific changelog by enforcing a change to the ongoing [CHANGELOG](CHANGELOG.md) file adhering to the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. 93 | 94 | The changes are curated by version, with the changes to the main branch added chronologically to the `Unreleased` version. Each version has corresponding sections which list out the category of the change - `Added`, `Changed`, `Deprecated`, `Removed`, `Fixed`, `Security`. 95 | 96 | 97 | ### Adding Changes 98 | 99 | As a contributor, you must ensure that every pull request has its changes listed out within the corresponding version and appropriate section of the [CHANGELOG](CHANGELOG.md) file. 100 | 101 | Adding in the change is a two step process - 102 | 1. Add your changes to the corresponding section within the CHANGELOG file with dummy pull request information, publish the PR. 103 | 104 | 2. Update the entry for your change in [`CHANGELOG.md`](CHANGELOG.md) and make sure that you reference the pull request there. 105 | 106 | ## Review Process 107 | 108 | We deeply appreciate everyone who takes the time to make a contribution. We will review all contributions as quickly as possible. As a reminder, [opening an issue](https://github.com/opensearch-project/opensearch-dsl-py/issues/new/choose) discussing your change before you make it is the best way to smooth the PR process. This will prevent a rejection because someone else is already working on the problem, or because the solution is incompatible with the architectural direction. 109 | 110 | During the PR process, expect that there will be some back-and-forth. Please try to respond to comments in a timely fashion, and if you don't wish to continue with the PR, let us know. If a PR takes too many iterations for its complexity or size, we may reject it. Additionally, if you stop responding we may close the PR as abandoned. In either case, if you feel this was done in error, please add a comment on the PR. 111 | 112 | If we accept the PR, a [maintainer](MAINTAINERS.md) will merge your change and usually take care of backporting it to appropriate branches ourselves. 113 | 114 | If we reject the PR, we will close the pull request with a comment explaining why. This decision isn't always final: if you feel we have misunderstood your intended change or otherwise think that we should reconsider then please continue the conversation with a comment on the PR and we'll do our best to address any further points you raise. 115 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | - [Overview](#overview) 2 | - [Current Maintainers](#current-maintainers) 3 | - [Emeritus](#emeritus) 4 | 5 | ## Overview 6 | 7 | [This document](https://github.com/opensearch-project/.github/blob/main/MAINTAINERS.md#maintainer-responsibilities) explains who the maintainers are (see below), what they do in this repo, and how they should be doing it. If you're interested in contributing, see [CONTRIBUTING](CONTRIBUTING.md). 8 | 9 | ## Current Maintainers 10 | 11 | | Maintainer | GitHub ID | Affiliation | 12 | | -------------------- | --------------------------------------------------- | ----------- | 13 | | Vacha Shah | [VachaShah](https://github.com/VachaShah) | Amazon | 14 | | Daniel Doubrovkine | [dblock](https://github.com/dblock) | Amazon | 15 | | Harsha Vamsi Kalluri | [harshavamsi](https://github.com/harshavamsi) | Amazon | 16 | | Yury Fridlyand | [Yury-Fridlyand](https://github.com/Yury-Fridlyand) | Bit Quill | 17 | 18 | 19 | ## Emeritus 20 | | Maintainer | GitHub ID | Affiliation | 21 | | ------------ | ------------------------------------- | ----------- | 22 | | Guian Gumpac | [GumpacG](https://github.com/GumpacG) | Bit Quill | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS 2 | include LICENSE 3 | include README 4 | include CONTRIBUTING.rst 5 | include Changelog.rst 6 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | OpenSearch (https://opensearch.org/) 2 | Copyright OpenSearch Contributors 3 | 4 | This product includes software developed by 5 | Elasticsearch (http://www.elastic.co). 6 | -------------------------------------------------------------------------------- /OpenSearch.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CI](https://github.com/opensearch-project/opensearch-dsl-py/actions/workflows/ci.yml/badge.svg)](https://github.com/opensearch-project/opensearch-dsl-py/actions/workflows/ci.yml) 2 | [![Integration](https://github.com/opensearch-project/opensearch-dsl-py/actions/workflows/integration.yml/badge.svg)](https://github.com/opensearch-project/opensearch-dsl-py/actions/workflows/integration.yml) 3 | [![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://forum.opensearch.org/c/clients) 4 | ![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success) 5 | 6 | ![OpenSearch logo](https://github.com/opensearch-project/opensearch-dsl-py/raw/main/OpenSearch.svg) 7 | 8 | > **Warning** 9 | > 10 | >The OpenSearch high-level Python client (opensearch-dsl-py) is now archived. We recommend switching to the OpenSearch Python client ([opensearch-py](https://github.com/opensearch-project/opensearch-py)), which now includes the functionality of opensearch-dsl-py. 11 | 12 | OpenSearch DSL Python Client 13 | 14 | - [Welcome!](https://github.com/opensearch-project/opensearch-dsl-py#welcome) 15 | - [Getting Started](https://github.com/opensearch-project/opensearch-dsl-py#getting-started) 16 | - [Compatibility with OpenSearch](https://github.com/opensearch-project/opensearch-dsl-py#compatibility-with-opensearch) 17 | - [Project Resources](https://github.com/opensearch-project/opensearch-dsl-py#project-resources) 18 | - [Code of Conduct](https://github.com/opensearch-project/opensearch-dsl-py#code-of-conduct) 19 | - [License](https://github.com/opensearch-project/opensearch-dsl-py#license) 20 | - [Copyright](https://github.com/opensearch-project/opensearch-dsl-py#copyright) 21 | 22 | ## Welcome! 23 | 24 | **opensearch-dsl-py** is [a community-driven, open source fork](https://aws.amazon.com/blogs/opensource/introducing-opensearch/) of elasticsearch-dsl-py licensed under the [Apache v2.0 License](LICENSE.txt). It is a Python client specifically used for build and running queries against OpenSearch built on top of [`opensearch-py`](https://github.com/opensearch-project/opensearch-py). For more information, see [opensearch.org](https://opensearch.org/). 25 | 26 | ## Getting Started 27 | 28 | To get started with the OpenSearch DSL Python Client, see [USER GUIDE](USER_GUIDE.md). 29 | 30 | ## Compatibility with OpenSearch 31 | 32 | See [COMPATIBILITY](COMPATIBILITY.md). 33 | 34 | ## Project Resources 35 | 36 | * [Project Website](https://opensearch.org/) 37 | * [Downloads](https://opensearch.org/downloads.html). 38 | * [Documentation](https://opensearch.org/docs/) 39 | * Need help? Try [Forums](https://forum.opensearch.org/) 40 | * [Project Principles](https://opensearch.org/#principles) 41 | * [Contributing to OpenSearch](CONTRIBUTING.md) 42 | * [Maintainer Responsibilities](MAINTAINERS.md) 43 | * [Release Management](RELEASING.md) 44 | * [Admin Responsibilities](ADMINS.md) 45 | * [Security](SECURITY.md) 46 | 47 | ## Code of Conduct 48 | 49 | This project has adopted the [Amazon Open Source Code of Conduct](CODE_OF_CONDUCT.md). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq), or contact [opensource-codeofconduct@amazon.com](mailto:opensource-codeofconduct@amazon.com) with any additional questions or comments. 50 | 51 | ## License 52 | 53 | This project is licensed under the [Apache v2.0 License](LICENSE.txt). 54 | 55 | ## Copyright 56 | 57 | Copyright OpenSearch Contributors. See [NOTICE](NOTICE.txt) for details. 58 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | [Overview](#overview) 2 | - [Branching](#branching) 3 | - [Release Branching](#release-branching) 4 | - [Feature Branches](#feature-branches) 5 | - [Release Labels](#release-labels) 6 | - [Releasing](#releasing) 7 | 8 | ## Overview 9 | 10 | This document explains the release strategy for artifacts in this organization. 11 | 12 | ## Branching 13 | 14 | ### Release Branching 15 | 16 | Given the current major release of 1.0, projects in this organization maintain the following active branches. 17 | 18 | * **main**: The next _major_ release. This is the branch where all merges take place and code moves fast. 19 | * **1.x**: The next _minor_ release. Once a change is merged into `main`, decide whether to backport it to `1.x`. 20 | * **1.0**: The _current_ release. In between minor releases, only hotfixes (e.g. security) are backported to `1.0`. 21 | 22 | Label PRs with the next major version label (e.g. `2.0.0`) and merge changes into `main`. Label PRs that you believe need to be backported as `1.x` and `1.0`. Backport PRs by checking out the versioned branch, cherry-pick changes and open a PR against each target backport branch. 23 | 24 | ### Feature Branches 25 | 26 | Do not creating branches in the upstream repo, use your fork, for the exception of long lasting feature branches that require active collaboration from multiple developers. Name feature branches `feature/`. Once the work is merged to `main`, please make sure to delete the feature branch. 27 | 28 | ## Release Labels 29 | 30 | Repositories create consistent release labels, such as `v1.0.0`, `v1.1.0` and `v2.0.0`, as well as `patch` and `backport`. Use release labels to target an issue or a PR for a given release. See [MAINTAINERS](MAINTAINERS.md#triage-open-issues) for more information on triaging issues. 31 | 32 | ## Releasing 33 | 34 | The release process is standard across repositories in this org and is run by a release manager volunteering from amongst [maintainers](MAINTAINERS.md). 35 | 36 | 1. Create a tag, e.g. v2.1.0, and push it to the GitHub repo. 37 | 1. The [release-drafter.yml](.github/workflows/release-drafter.yml) will be automatically kicked off and is responsible for drafting a new release on GitHub containing release artifacts. 38 | 1. Before creating a draft release, this workflow creates a GitHub issue asking for approval from the [maintainers](MAINTAINERS.md). See sample [issue](https://github.com/gaiksaya/opensearch-dsl-py/issues/6). The maintainers need to approve in order to continue the workflow run. 39 | 1. This draft release triggers the [jenkins release workflow](https://build.ci.opensearch.org/job/opensearch-dsl-py-release/) as a result of which opensearch-dsl-py client is released on [PyPi](https://pypi.org/project/opensearch-dsl/). 40 | 1. Once the above release workflow is successful, the drafted release on GitHub is published automatically. 41 | 1. Increment "VERSION" in [__init__.py](./opensearch_dsl/__init__.py) and [setup.py](./setup.py) to the next patch release, e.g. v2.1.1. See [example](https://github.com/opensearch-project/opensearch-dsl-py/pull/55). 42 | 43 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Reporting a Vulnerability 2 | 3 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) or directly via email to aws-security@amazon.com. Please do **not** create a public GitHub issue. 4 | -------------------------------------------------------------------------------- /USER_GUIDE.md: -------------------------------------------------------------------------------- 1 | - [User Guide](#user-guide) 2 | - [Setup](#setup) 3 | - [Sample code](#sample-code) 4 | # User Guide 5 | 6 | This user guide specifies how to include and use the dsl-py client in your application. 7 | 8 | ## Setup 9 | 10 | To add the client to your project, install it using [pip](https://pip.pypa.io/): 11 | 12 | ```bash 13 | pip install opensearch-dsl 14 | ``` 15 | 16 | Then import it like any other module: 17 | 18 | ```python 19 | from opensearchpy import OpenSearch 20 | from opensearch_dsl import Search 21 | ``` 22 | 23 | If you prefer to add the client manually or just want to examine the source code, see [opensearch-dsl-py on GitHub](https://github.com/opensearch-project/opensearch-dsl-py). 24 | 25 | 26 | ## Sample code 27 | 28 | ```python 29 | from opensearchpy import OpenSearch 30 | from opensearch_dsl import Search 31 | 32 | host = 'localhost' 33 | port = 9200 34 | auth = ('admin', 'admin') # For testing only. Don't store credentials in code. 35 | ca_certs_path = '/full/path/to/root-ca.pem' # Provide a CA bundle if you use intermediate CAs with your root CA. 36 | 37 | # Optional client certificates if you don't want to use HTTP basic authentication. 38 | # client_cert_path = '/full/path/to/client.pem' 39 | # client_key_path = '/full/path/to/client-key.pem' 40 | 41 | # Create the client with SSL/TLS enabled, but hostname verification disabled. 42 | client = OpenSearch( 43 | hosts = [{'host': host, 'port': port}], 44 | http_compress = True, # enables gzip compression for request bodies 45 | http_auth = auth, 46 | # client_cert = client_cert_path, 47 | # client_key = client_key_path, 48 | use_ssl = True, 49 | verify_certs = True, 50 | ssl_assert_hostname = False, 51 | ssl_show_warn = False, 52 | ca_certs = ca_certs_path 53 | ) 54 | 55 | index_name = 'my-dsl-index' 56 | 57 | response = client.indices.create(index_name) 58 | print('\nCreating index:') 59 | print(response) 60 | 61 | # Add a document to the index. 62 | document = { 63 | 'title': 'python', 64 | 'description': 'beta', 65 | 'category': 'search' 66 | } 67 | id = '1' 68 | 69 | response = client.index( 70 | index = index_name, 71 | body = document, 72 | id = id, 73 | refresh = True 74 | ) 75 | 76 | print('\nAdding document:') 77 | print(response) 78 | 79 | # Search for the document. 80 | s = Search(using=client, index=index_name) \ 81 | .filter("term", category="search") \ 82 | .query("match", title="python") 83 | 84 | response = s.execute() 85 | 86 | print('\nSearch results:') 87 | for hit in response: 88 | print(hit.meta.score, hit.title) 89 | 90 | # Delete the document. 91 | print('\nDeleting document:') 92 | print(response) 93 | 94 | # Delete the index. 95 | response = client.indices.delete( 96 | index = index_name 97 | ) 98 | 99 | print('\nDeleting index:') 100 | print(response) 101 | ``` -------------------------------------------------------------------------------- /jenkins/release.JenkinsFile: -------------------------------------------------------------------------------- 1 | lib = library(identifier: 'jenkins@1.3.1', retriever: modernSCM([ 2 | $class: 'GitSCMSource', 3 | remote: 'https://github.com/opensearch-project/opensearch-build-libraries.git', 4 | ])) 5 | 6 | standardReleasePipelineWithGenericTrigger( 7 | overrideDockerImage: 'opensearchstaging/ci-runner:release-centos7-clients-v4', 8 | tokenIdCredential: 'jenkins-opensearch-dsl-py-generic-webhook-token', 9 | causeString: 'A tag was cut on opensearch-project/opensearch-dsl-py repository causing this workflow to run', 10 | downloadReleaseAsset: true, 11 | publishRelease: true) { 12 | publishToPyPi(credentialId: 'jenkins-opensearch-dsl-py-pypi-credentials') 13 | } 14 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import nox 28 | 29 | SOURCE_FILES = ( 30 | "setup.py", 31 | "noxfile.py", 32 | "opensearch_dsl/", 33 | "tests/", 34 | "utils/", 35 | ) 36 | 37 | 38 | @nox.session(python=["2.7", "3.4", "3.5", "3.6", "3.7", "3.8", "3.9"]) 39 | def test(session): 40 | session.install(".[develop]") 41 | 42 | if session.posargs: 43 | argv = session.posargs 44 | else: 45 | argv = ( 46 | "-vvv", 47 | "--cov=opensearch_dsl", 48 | "tests/", 49 | ) 50 | session.run("pytest", *argv) 51 | 52 | 53 | @nox.session() 54 | def format(session): 55 | session.install("black", "isort") 56 | session.run( 57 | "black", 58 | "--skip-string-normalization", 59 | "--target-version=py33", 60 | "--target-version=py37", 61 | *SOURCE_FILES 62 | ) 63 | session.run("isort", *SOURCE_FILES) 64 | session.run("python", "utils/license-headers.py", "fix", *SOURCE_FILES) 65 | 66 | lint(session) 67 | 68 | 69 | @nox.session 70 | def lint(session): 71 | session.install("flake8", "black", "isort") 72 | session.run( 73 | "black", 74 | "--check", 75 | "--skip-string-normalization", 76 | "--target-version=py33", 77 | "--target-version=py37", 78 | *SOURCE_FILES 79 | ) 80 | session.run("isort", "--check", *SOURCE_FILES) 81 | session.run("flake8", "--ignore=E501,E741,W503", *SOURCE_FILES) 82 | session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) 83 | -------------------------------------------------------------------------------- /opensearch_dsl/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from . import connections 28 | from .aggs import A 29 | from .analysis import analyzer, char_filter, normalizer, token_filter, tokenizer 30 | from .document import Document, InnerDoc, MetaField 31 | from .exceptions import ( 32 | IllegalOperation, 33 | OpenSearchDslException, 34 | UnknownDslObject, 35 | ValidationException, 36 | ) 37 | from .faceted_search import ( 38 | DateHistogramFacet, 39 | Facet, 40 | FacetedResponse, 41 | FacetedSearch, 42 | HistogramFacet, 43 | NestedFacet, 44 | RangeFacet, 45 | TermsFacet, 46 | ) 47 | from .field import ( 48 | Binary, 49 | Boolean, 50 | Byte, 51 | Completion, 52 | CustomField, 53 | Date, 54 | DateRange, 55 | DenseVector, 56 | Double, 57 | DoubleRange, 58 | Field, 59 | Float, 60 | FloatRange, 61 | GeoPoint, 62 | GeoShape, 63 | HalfFloat, 64 | Integer, 65 | IntegerRange, 66 | Ip, 67 | IpRange, 68 | Join, 69 | Keyword, 70 | Long, 71 | LongRange, 72 | Murmur3, 73 | Nested, 74 | Object, 75 | Percolator, 76 | RangeField, 77 | RankFeature, 78 | RankFeatures, 79 | ScaledFloat, 80 | SearchAsYouType, 81 | Short, 82 | SparseVector, 83 | Text, 84 | TokenCount, 85 | construct_field, 86 | ) 87 | from .function import SF 88 | from .index import Index, IndexTemplate 89 | from .mapping import Mapping 90 | from .query import Q 91 | from .search import MultiSearch, Search 92 | from .update_by_query import UpdateByQuery 93 | from .utils import AttrDict, AttrList, DslBase 94 | from .wrappers import Range 95 | 96 | VERSION = (2, 1, 0) 97 | __version__ = VERSION 98 | __versionstr__ = ".".join(map(str, VERSION)) 99 | __all__ = [ 100 | "A", 101 | "AttrDict", 102 | "AttrList", 103 | "Binary", 104 | "Boolean", 105 | "Byte", 106 | "Completion", 107 | "CustomField", 108 | "Date", 109 | "DateHistogramFacet", 110 | "DateRange", 111 | "DenseVector", 112 | "Document", 113 | "Double", 114 | "DoubleRange", 115 | "DslBase", 116 | "Facet", 117 | "FacetedResponse", 118 | "FacetedSearch", 119 | "Field", 120 | "Float", 121 | "FloatRange", 122 | "GeoPoint", 123 | "GeoShape", 124 | "HalfFloat", 125 | "HistogramFacet", 126 | "IllegalOperation", 127 | "Index", 128 | "IndexTemplate", 129 | "InnerDoc", 130 | "Integer", 131 | "IntegerRange", 132 | "Ip", 133 | "IpRange", 134 | "Join", 135 | "Keyword", 136 | "Long", 137 | "LongRange", 138 | "Mapping", 139 | "MetaField", 140 | "MultiSearch", 141 | "Murmur3", 142 | "Nested", 143 | "NestedFacet", 144 | "Object", 145 | "OpenSearchDslException", 146 | "Percolator", 147 | "Q", 148 | "Range", 149 | "RangeFacet", 150 | "RangeField", 151 | "RankFeature", 152 | "RankFeatures", 153 | "SF", 154 | "ScaledFloat", 155 | "Search", 156 | "SearchAsYouType", 157 | "Short", 158 | "SparseVector", 159 | "TermsFacet", 160 | "Text", 161 | "TokenCount", 162 | "UnknownDslObject", 163 | "UpdateByQuery", 164 | "ValidationException", 165 | "analyzer", 166 | "char_filter", 167 | "connections", 168 | "construct_field", 169 | "normalizer", 170 | "token_filter", 171 | "tokenizer", 172 | ] 173 | -------------------------------------------------------------------------------- /opensearch_dsl/analysis.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import six 28 | 29 | from .connections import get_connection 30 | from .utils import AttrDict, DslBase, merge 31 | 32 | __all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"] 33 | 34 | 35 | class AnalysisBase(object): 36 | @classmethod 37 | def _type_shortcut(cls, name_or_instance, type=None, **kwargs): 38 | if isinstance(name_or_instance, cls): 39 | if type or kwargs: 40 | raise ValueError("%s() cannot accept parameters." % cls.__name__) 41 | return name_or_instance 42 | 43 | if not (type or kwargs): 44 | return cls.get_dsl_class("builtin")(name_or_instance) 45 | 46 | return cls.get_dsl_class(type, "custom")( 47 | name_or_instance, type or "custom", **kwargs 48 | ) 49 | 50 | 51 | class CustomAnalysis(object): 52 | name = "custom" 53 | 54 | def __init__(self, filter_name, builtin_type="custom", **kwargs): 55 | self._builtin_type = builtin_type 56 | self._name = filter_name 57 | super(CustomAnalysis, self).__init__(**kwargs) 58 | 59 | def to_dict(self): 60 | # only name to present in lists 61 | return self._name 62 | 63 | def get_definition(self): 64 | d = super(CustomAnalysis, self).to_dict() 65 | d = d.pop(self.name) 66 | d["type"] = self._builtin_type 67 | return d 68 | 69 | 70 | class CustomAnalysisDefinition(CustomAnalysis): 71 | def get_analysis_definition(self): 72 | out = {self._type_name: {self._name: self.get_definition()}} 73 | 74 | t = getattr(self, "tokenizer", None) 75 | if "tokenizer" in self._param_defs and hasattr(t, "get_definition"): 76 | out["tokenizer"] = {t._name: t.get_definition()} 77 | 78 | filters = { 79 | f._name: f.get_definition() 80 | for f in self.filter 81 | if hasattr(f, "get_definition") 82 | } 83 | if filters: 84 | out["filter"] = filters 85 | 86 | # any sub filter definitions like multiplexers etc? 87 | for f in self.filter: 88 | if hasattr(f, "get_analysis_definition"): 89 | d = f.get_analysis_definition() 90 | if d: 91 | merge(out, d, True) 92 | 93 | char_filters = { 94 | f._name: f.get_definition() 95 | for f in self.char_filter 96 | if hasattr(f, "get_definition") 97 | } 98 | if char_filters: 99 | out["char_filter"] = char_filters 100 | 101 | return out 102 | 103 | 104 | class BuiltinAnalysis(object): 105 | name = "builtin" 106 | 107 | def __init__(self, name): 108 | self._name = name 109 | super(BuiltinAnalysis, self).__init__() 110 | 111 | def to_dict(self): 112 | # only name to present in lists 113 | return self._name 114 | 115 | 116 | class Analyzer(AnalysisBase, DslBase): 117 | _type_name = "analyzer" 118 | name = None 119 | 120 | 121 | class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): 122 | def get_analysis_definition(self): 123 | return {} 124 | 125 | 126 | class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): 127 | _param_defs = { 128 | "filter": {"type": "token_filter", "multi": True}, 129 | "char_filter": {"type": "char_filter", "multi": True}, 130 | "tokenizer": {"type": "tokenizer"}, 131 | } 132 | 133 | def simulate(self, text, using="default", explain=False, attributes=None): 134 | """ 135 | Use the Analyze API of opensearch to test the outcome of this analyzer. 136 | 137 | :arg text: Text to be analyzed 138 | :arg using: connection alias to use, defaults to ``'default'`` 139 | :arg explain: will output all token attributes for each token. You can 140 | filter token attributes you want to output by setting ``attributes`` 141 | option. 142 | :arg attributes: if ``explain`` is specified, filter the token 143 | attributes to return. 144 | """ 145 | opensearch = get_connection(using) 146 | 147 | body = {"text": text, "explain": explain} 148 | if attributes: 149 | body["attributes"] = attributes 150 | 151 | definition = self.get_analysis_definition() 152 | analyzer_def = self.get_definition() 153 | 154 | for section in ("tokenizer", "char_filter", "filter"): 155 | if section not in analyzer_def: 156 | continue 157 | sec_def = definition.get(section, {}) 158 | sec_names = analyzer_def[section] 159 | 160 | if isinstance(sec_names, six.string_types): 161 | body[section] = sec_def.get(sec_names, sec_names) 162 | else: 163 | body[section] = [ 164 | sec_def.get(sec_name, sec_name) for sec_name in sec_names 165 | ] 166 | 167 | if self._builtin_type != "custom": 168 | body["analyzer"] = self._builtin_type 169 | 170 | return AttrDict(opensearch.indices.analyze(body=body)) 171 | 172 | 173 | class Normalizer(AnalysisBase, DslBase): 174 | _type_name = "normalizer" 175 | name = None 176 | 177 | 178 | class BuiltinNormalizer(BuiltinAnalysis, Normalizer): 179 | def get_analysis_definition(self): 180 | return {} 181 | 182 | 183 | class CustomNormalizer(CustomAnalysisDefinition, Normalizer): 184 | _param_defs = { 185 | "filter": {"type": "token_filter", "multi": True}, 186 | "char_filter": {"type": "char_filter", "multi": True}, 187 | } 188 | 189 | 190 | class Tokenizer(AnalysisBase, DslBase): 191 | _type_name = "tokenizer" 192 | name = None 193 | 194 | 195 | class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): 196 | pass 197 | 198 | 199 | class CustomTokenizer(CustomAnalysis, Tokenizer): 200 | pass 201 | 202 | 203 | class TokenFilter(AnalysisBase, DslBase): 204 | _type_name = "token_filter" 205 | name = None 206 | 207 | 208 | class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): 209 | pass 210 | 211 | 212 | class CustomTokenFilter(CustomAnalysis, TokenFilter): 213 | pass 214 | 215 | 216 | class MultiplexerTokenFilter(CustomTokenFilter): 217 | name = "multiplexer" 218 | 219 | def get_definition(self): 220 | d = super(CustomTokenFilter, self).get_definition() 221 | 222 | if "filters" in d: 223 | d["filters"] = [ 224 | # comma delimited string given by user 225 | fs if isinstance(fs, six.string_types) else 226 | # list of strings or TokenFilter objects 227 | ", ".join(f.to_dict() if hasattr(f, "to_dict") else f for f in fs) 228 | for fs in self.filters 229 | ] 230 | return d 231 | 232 | def get_analysis_definition(self): 233 | if not hasattr(self, "filters"): 234 | return {} 235 | 236 | fs = {} 237 | d = {"filter": fs} 238 | for filters in self.filters: 239 | if isinstance(filters, six.string_types): 240 | continue 241 | fs.update( 242 | { 243 | f._name: f.get_definition() 244 | for f in filters 245 | if hasattr(f, "get_definition") 246 | } 247 | ) 248 | return d 249 | 250 | 251 | class ConditionalTokenFilter(CustomTokenFilter): 252 | name = "condition" 253 | 254 | def get_definition(self): 255 | d = super(CustomTokenFilter, self).get_definition() 256 | if "filter" in d: 257 | d["filter"] = [ 258 | f.to_dict() if hasattr(f, "to_dict") else f for f in self.filter 259 | ] 260 | return d 261 | 262 | def get_analysis_definition(self): 263 | if not hasattr(self, "filter"): 264 | return {} 265 | 266 | return { 267 | "filter": { 268 | f._name: f.get_definition() 269 | for f in self.filter 270 | if hasattr(f, "get_definition") 271 | } 272 | } 273 | 274 | 275 | class CharFilter(AnalysisBase, DslBase): 276 | _type_name = "char_filter" 277 | name = None 278 | 279 | 280 | class BuiltinCharFilter(BuiltinAnalysis, CharFilter): 281 | pass 282 | 283 | 284 | class CustomCharFilter(CustomAnalysis, CharFilter): 285 | pass 286 | 287 | 288 | # shortcuts for direct use 289 | analyzer = Analyzer._type_shortcut 290 | tokenizer = Tokenizer._type_shortcut 291 | token_filter = TokenFilter._type_shortcut 292 | char_filter = CharFilter._type_shortcut 293 | normalizer = Normalizer._type_shortcut 294 | -------------------------------------------------------------------------------- /opensearch_dsl/connections.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from opensearchpy import OpenSearch 28 | from six import string_types 29 | 30 | from .serializer import serializer 31 | 32 | 33 | class Connections(object): 34 | """ 35 | Class responsible for holding connections to different clusters. Used as a 36 | singleton in this module. 37 | """ 38 | 39 | def __init__(self): 40 | self._kwargs = {} 41 | self._conns = {} 42 | 43 | def configure(self, **kwargs): 44 | """ 45 | Configure multiple connections at once, useful for passing in config 46 | dictionaries obtained from other sources, like Django's settings or a 47 | configuration management tool. 48 | 49 | Example:: 50 | 51 | connections.configure( 52 | default={'hosts': 'localhost'}, 53 | dev={'hosts': ['opensearchdev1.example.com:9200'], 'sniff_on_start': True}, 54 | ) 55 | 56 | Connections will only be constructed lazily when requested through 57 | ``get_connection``. 58 | """ 59 | for k in list(self._conns): 60 | # try and preserve existing client to keep the persistent connections alive 61 | if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]: 62 | continue 63 | del self._conns[k] 64 | self._kwargs = kwargs 65 | 66 | def add_connection(self, alias, conn): 67 | """ 68 | Add a connection object, it will be passed through as-is. 69 | """ 70 | self._conns[alias] = conn 71 | 72 | def remove_connection(self, alias): 73 | """ 74 | Remove connection from the registry. Raises ``KeyError`` if connection 75 | wasn't found. 76 | """ 77 | errors = 0 78 | for d in (self._conns, self._kwargs): 79 | try: 80 | del d[alias] 81 | except KeyError: 82 | errors += 1 83 | 84 | if errors == 2: 85 | raise KeyError("There is no connection with alias %r." % alias) 86 | 87 | def create_connection(self, alias="default", **kwargs): 88 | """ 89 | Construct an instance of ``opensearchpy.OpenSearch`` and register 90 | it under given alias. 91 | """ 92 | kwargs.setdefault("serializer", serializer) 93 | conn = self._conns[alias] = OpenSearch(**kwargs) 94 | return conn 95 | 96 | def get_connection(self, alias="default"): 97 | """ 98 | Retrieve a connection, construct it if necessary (only configuration 99 | was passed to us). If a non-string alias has been passed through we 100 | assume it's already a client instance and will just return it as-is. 101 | 102 | Raises ``KeyError`` if no client (or its definition) is registered 103 | under the alias. 104 | """ 105 | # do not check isinstance(OpenSearch) so that people can wrap their 106 | # clients 107 | if not isinstance(alias, string_types): 108 | return alias 109 | 110 | # connection already established 111 | try: 112 | return self._conns[alias] 113 | except KeyError: 114 | pass 115 | 116 | # if not, try to create it 117 | try: 118 | return self.create_connection(alias, **self._kwargs[alias]) 119 | except KeyError: 120 | # no connection and no kwargs to set one up 121 | raise KeyError("There is no connection with alias %r." % alias) 122 | 123 | 124 | connections = Connections() 125 | configure = connections.configure 126 | add_connection = connections.add_connection 127 | remove_connection = connections.remove_connection 128 | create_connection = connections.create_connection 129 | get_connection = connections.get_connection 130 | -------------------------------------------------------------------------------- /opensearch_dsl/exceptions.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | 28 | class OpenSearchDslException(Exception): 29 | pass 30 | 31 | 32 | class UnknownDslObject(OpenSearchDslException): 33 | pass 34 | 35 | 36 | class ValidationException(ValueError, OpenSearchDslException): 37 | pass 38 | 39 | 40 | class IllegalOperation(OpenSearchDslException): 41 | pass 42 | -------------------------------------------------------------------------------- /opensearch_dsl/function.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | try: 28 | import collections.abc as collections_abc # only works on python 3.3+ 29 | except ImportError: 30 | import collections as collections_abc 31 | 32 | from .utils import DslBase 33 | 34 | 35 | def SF(name_or_sf, **params): 36 | # {"script_score": {"script": "_score"}, "filter": {}} 37 | if isinstance(name_or_sf, collections_abc.Mapping): 38 | if params: 39 | raise ValueError("SF() cannot accept parameters when passing in a dict.") 40 | kwargs = {} 41 | sf = name_or_sf.copy() 42 | for k in ScoreFunction._param_defs: 43 | if k in name_or_sf: 44 | kwargs[k] = sf.pop(k) 45 | 46 | # not sf, so just filter+weight, which used to be boost factor 47 | if not sf: 48 | name = "boost_factor" 49 | # {'FUNCTION': {...}} 50 | elif len(sf) == 1: 51 | name, params = sf.popitem() 52 | else: 53 | raise ValueError("SF() got an unexpected fields in the dictionary: %r" % sf) 54 | 55 | # boost factor special case, see https://github.com/elastic/elasticsearch/issues/6343 56 | if not isinstance(params, collections_abc.Mapping): 57 | params = {"value": params} 58 | 59 | # mix known params (from _param_defs) and from inside the function 60 | kwargs.update(params) 61 | return ScoreFunction.get_dsl_class(name)(**kwargs) 62 | 63 | # ScriptScore(script="_score", filter=Q()) 64 | if isinstance(name_or_sf, ScoreFunction): 65 | if params: 66 | raise ValueError( 67 | "SF() cannot accept parameters when passing in a ScoreFunction object." 68 | ) 69 | return name_or_sf 70 | 71 | # "script_score", script="_score", filter=Q() 72 | return ScoreFunction.get_dsl_class(name_or_sf)(**params) 73 | 74 | 75 | class ScoreFunction(DslBase): 76 | _type_name = "score_function" 77 | _type_shortcut = staticmethod(SF) 78 | _param_defs = { 79 | "query": {"type": "query"}, 80 | "filter": {"type": "query"}, 81 | "weight": {}, 82 | } 83 | name = None 84 | 85 | def to_dict(self): 86 | d = super(ScoreFunction, self).to_dict() 87 | # filter and query dicts should be at the same level as us 88 | for k in self._param_defs: 89 | if k in d[self.name]: 90 | d[k] = d[self.name].pop(k) 91 | return d 92 | 93 | 94 | class ScriptScore(ScoreFunction): 95 | name = "script_score" 96 | 97 | 98 | class BoostFactor(ScoreFunction): 99 | name = "boost_factor" 100 | 101 | def to_dict(self): 102 | d = super(BoostFactor, self).to_dict() 103 | if "value" in d[self.name]: 104 | d[self.name] = d[self.name].pop("value") 105 | else: 106 | del d[self.name] 107 | return d 108 | 109 | 110 | class RandomScore(ScoreFunction): 111 | name = "random_score" 112 | 113 | 114 | class FieldValueFactor(ScoreFunction): 115 | name = "field_value_factor" 116 | 117 | 118 | class Linear(ScoreFunction): 119 | name = "linear" 120 | 121 | 122 | class Gauss(ScoreFunction): 123 | name = "gauss" 124 | 125 | 126 | class Exp(ScoreFunction): 127 | name = "exp" 128 | -------------------------------------------------------------------------------- /opensearch_dsl/mapping.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | try: 28 | import collections.abc as collections_abc # only works on python 3.3+ 29 | except ImportError: 30 | import collections as collections_abc 31 | 32 | from itertools import chain 33 | 34 | from six import iteritems, itervalues 35 | 36 | from .connections import get_connection 37 | from .field import Nested, Text, construct_field 38 | from .utils import DslBase 39 | 40 | META_FIELDS = frozenset( 41 | ( 42 | "dynamic", 43 | "transform", 44 | "dynamic_date_formats", 45 | "date_detection", 46 | "numeric_detection", 47 | "dynamic_templates", 48 | "enabled", 49 | ) 50 | ) 51 | 52 | 53 | class Properties(DslBase): 54 | name = "properties" 55 | _param_defs = {"properties": {"type": "field", "hash": True}} 56 | 57 | def __init__(self): 58 | super(Properties, self).__init__() 59 | 60 | def __repr__(self): 61 | return "Properties()" 62 | 63 | def __getitem__(self, name): 64 | return self.properties[name] 65 | 66 | def __contains__(self, name): 67 | return name in self.properties 68 | 69 | def to_dict(self): 70 | return super(Properties, self).to_dict()["properties"] 71 | 72 | def field(self, name, *args, **kwargs): 73 | self.properties[name] = construct_field(*args, **kwargs) 74 | return self 75 | 76 | def _collect_fields(self): 77 | """Iterate over all Field objects within, including multi fields.""" 78 | for f in itervalues(self.properties.to_dict()): 79 | yield f 80 | # multi fields 81 | if hasattr(f, "fields"): 82 | for inner_f in itervalues(f.fields.to_dict()): 83 | yield inner_f 84 | # nested and inner objects 85 | if hasattr(f, "_collect_fields"): 86 | for inner_f in f._collect_fields(): 87 | yield inner_f 88 | 89 | def update(self, other_object): 90 | if not hasattr(other_object, "properties"): 91 | # not an inner/nested object, no merge possible 92 | return 93 | 94 | our, other = self.properties, other_object.properties 95 | for name in other: 96 | if name in our: 97 | if hasattr(our[name], "update"): 98 | our[name].update(other[name]) 99 | continue 100 | our[name] = other[name] 101 | 102 | 103 | class Mapping(object): 104 | def __init__(self): 105 | self.properties = Properties() 106 | self._meta = {} 107 | 108 | def __repr__(self): 109 | return "Mapping()" 110 | 111 | def _clone(self): 112 | m = Mapping() 113 | m.properties._params = self.properties._params.copy() 114 | return m 115 | 116 | @classmethod 117 | def from_opensearch(cls, index, using="default"): 118 | m = cls() 119 | m.update_from_opensearch(index, using) 120 | return m 121 | 122 | def resolve_nested(self, field_path): 123 | field = self 124 | nested = [] 125 | parts = field_path.split(".") 126 | for i, step in enumerate(parts): 127 | try: 128 | field = field[step] 129 | except KeyError: 130 | return (), None 131 | if isinstance(field, Nested): 132 | nested.append(".".join(parts[: i + 1])) 133 | return nested, field 134 | 135 | def resolve_field(self, field_path): 136 | field = self 137 | for step in field_path.split("."): 138 | try: 139 | field = field[step] 140 | except KeyError: 141 | return 142 | return field 143 | 144 | def _collect_analysis(self): 145 | analysis = {} 146 | fields = [] 147 | if "_all" in self._meta: 148 | fields.append(Text(**self._meta["_all"])) 149 | 150 | for f in chain(fields, self.properties._collect_fields()): 151 | for analyzer_name in ( 152 | "analyzer", 153 | "normalizer", 154 | "search_analyzer", 155 | "search_quote_analyzer", 156 | ): 157 | if not hasattr(f, analyzer_name): 158 | continue 159 | analyzer = getattr(f, analyzer_name) 160 | d = analyzer.get_analysis_definition() 161 | # empty custom analyzer, probably already defined out of our control 162 | if not d: 163 | continue 164 | 165 | # merge the definition 166 | # TODO: conflict detection/resolution 167 | for key in d: 168 | analysis.setdefault(key, {}).update(d[key]) 169 | 170 | return analysis 171 | 172 | def save(self, index, using="default"): 173 | from .index import Index 174 | 175 | index = Index(index, using=using) 176 | index.mapping(self) 177 | return index.save() 178 | 179 | def update_from_opensearch(self, index, using="default"): 180 | opensearch = get_connection(using) 181 | raw = opensearch.indices.get_mapping(index=index) 182 | _, raw = raw.popitem() 183 | self._update_from_dict(raw["mappings"]) 184 | 185 | def _update_from_dict(self, raw): 186 | for name, definition in iteritems(raw.get("properties", {})): 187 | self.field(name, definition) 188 | 189 | # metadata like _all etc 190 | for name, value in iteritems(raw): 191 | if name != "properties": 192 | if isinstance(value, collections_abc.Mapping): 193 | self.meta(name, **value) 194 | else: 195 | self.meta(name, value) 196 | 197 | def update(self, mapping, update_only=False): 198 | for name in mapping: 199 | if update_only and name in self: 200 | # nested and inner objects, merge recursively 201 | if hasattr(self[name], "update"): 202 | # FIXME only merge subfields, not the settings 203 | self[name].update(mapping[name], update_only) 204 | continue 205 | self.field(name, mapping[name]) 206 | 207 | if update_only: 208 | for name in mapping._meta: 209 | if name not in self._meta: 210 | self._meta[name] = mapping._meta[name] 211 | else: 212 | self._meta.update(mapping._meta) 213 | 214 | def __contains__(self, name): 215 | return name in self.properties.properties 216 | 217 | def __getitem__(self, name): 218 | return self.properties.properties[name] 219 | 220 | def __iter__(self): 221 | return iter(self.properties.properties) 222 | 223 | def field(self, *args, **kwargs): 224 | self.properties.field(*args, **kwargs) 225 | return self 226 | 227 | def meta(self, name, params=None, **kwargs): 228 | if not name.startswith("_") and name not in META_FIELDS: 229 | name = "_" + name 230 | 231 | if params and kwargs: 232 | raise ValueError("Meta configs cannot have both value and a dictionary.") 233 | 234 | self._meta[name] = kwargs if params is None else params 235 | return self 236 | 237 | def to_dict(self): 238 | meta = self._meta 239 | 240 | # hard coded serialization of analyzers in _all 241 | if "_all" in meta: 242 | meta = meta.copy() 243 | _all = meta["_all"] = meta["_all"].copy() 244 | for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): 245 | if hasattr(_all.get(f, None), "to_dict"): 246 | _all[f] = _all[f].to_dict() 247 | meta.update(self.properties.to_dict()) 248 | return meta 249 | -------------------------------------------------------------------------------- /opensearch_dsl/response/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from ..utils import AttrDict, AttrList, _wrap 28 | from .hit import Hit, HitMeta 29 | 30 | __all__ = ["Response", "AggResponse", "UpdateByQueryResponse", "Hit", "HitMeta"] 31 | 32 | 33 | class Response(AttrDict): 34 | def __init__(self, search, response, doc_class=None): 35 | super(AttrDict, self).__setattr__("_search", search) 36 | super(AttrDict, self).__setattr__("_doc_class", doc_class) 37 | super(Response, self).__init__(response) 38 | 39 | def __iter__(self): 40 | return iter(self.hits) 41 | 42 | def __getitem__(self, key): 43 | if isinstance(key, (slice, int)): 44 | # for slicing etc 45 | return self.hits[key] 46 | return super(Response, self).__getitem__(key) 47 | 48 | def __nonzero__(self): 49 | return bool(self.hits) 50 | 51 | __bool__ = __nonzero__ 52 | 53 | def __repr__(self): 54 | return "" % (self.hits or self.aggregations) 55 | 56 | def __len__(self): 57 | return len(self.hits) 58 | 59 | def __getstate__(self): 60 | return self._d_, self._search, self._doc_class 61 | 62 | def __setstate__(self, state): 63 | super(AttrDict, self).__setattr__("_d_", state[0]) 64 | super(AttrDict, self).__setattr__("_search", state[1]) 65 | super(AttrDict, self).__setattr__("_doc_class", state[2]) 66 | 67 | def success(self): 68 | return self._shards.total == self._shards.successful and not self.timed_out 69 | 70 | @property 71 | def hits(self): 72 | if not hasattr(self, "_hits"): 73 | h = self._d_["hits"] 74 | 75 | try: 76 | hits = AttrList(map(self._search._get_result, h["hits"])) 77 | except AttributeError as e: 78 | # avoid raising AttributeError since it will be hidden by the property 79 | raise TypeError("Could not parse hits.", e) 80 | 81 | # avoid assigning _hits into self._d_ 82 | super(AttrDict, self).__setattr__("_hits", hits) 83 | for k in h: 84 | setattr(self._hits, k, _wrap(h[k])) 85 | return self._hits 86 | 87 | @property 88 | def aggregations(self): 89 | return self.aggs 90 | 91 | @property 92 | def aggs(self): 93 | if not hasattr(self, "_aggs"): 94 | aggs = AggResponse( 95 | self._search.aggs, self._search, self._d_.get("aggregations", {}) 96 | ) 97 | 98 | # avoid assigning _aggs into self._d_ 99 | super(AttrDict, self).__setattr__("_aggs", aggs) 100 | return self._aggs 101 | 102 | 103 | class AggResponse(AttrDict): 104 | def __init__(self, aggs, search, data): 105 | super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs}) 106 | super(AggResponse, self).__init__(data) 107 | 108 | def __getitem__(self, attr_name): 109 | if attr_name in self._meta["aggs"]: 110 | # don't do self._meta['aggs'][attr_name] to avoid copying 111 | agg = self._meta["aggs"].aggs[attr_name] 112 | return agg.result(self._meta["search"], self._d_[attr_name]) 113 | return super(AggResponse, self).__getitem__(attr_name) 114 | 115 | def __iter__(self): 116 | for name in self._meta["aggs"]: 117 | yield self[name] 118 | 119 | 120 | class UpdateByQueryResponse(AttrDict): 121 | def __init__(self, search, response, doc_class=None): 122 | super(AttrDict, self).__setattr__("_search", search) 123 | super(AttrDict, self).__setattr__("_doc_class", doc_class) 124 | super(UpdateByQueryResponse, self).__init__(response) 125 | 126 | def success(self): 127 | return not self.timed_out and not self.failures 128 | -------------------------------------------------------------------------------- /opensearch_dsl/response/aggs.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from ..utils import AttrDict, AttrList 28 | from . import AggResponse, Response 29 | 30 | 31 | class Bucket(AggResponse): 32 | def __init__(self, aggs, search, data, field=None): 33 | super(Bucket, self).__init__(aggs, search, data) 34 | 35 | 36 | class FieldBucket(Bucket): 37 | def __init__(self, aggs, search, data, field=None): 38 | if field: 39 | data["key"] = field.deserialize(data["key"]) 40 | super(FieldBucket, self).__init__(aggs, search, data, field) 41 | 42 | 43 | class BucketData(AggResponse): 44 | _bucket_class = Bucket 45 | 46 | def _wrap_bucket(self, data): 47 | return self._bucket_class( 48 | self._meta["aggs"], 49 | self._meta["search"], 50 | data, 51 | field=self._meta.get("field"), 52 | ) 53 | 54 | def __iter__(self): 55 | return iter(self.buckets) 56 | 57 | def __len__(self): 58 | return len(self.buckets) 59 | 60 | def __getitem__(self, key): 61 | if isinstance(key, (int, slice)): 62 | return self.buckets[key] 63 | return super(BucketData, self).__getitem__(key) 64 | 65 | @property 66 | def buckets(self): 67 | if not hasattr(self, "_buckets"): 68 | field = getattr(self._meta["aggs"], "field", None) 69 | if field: 70 | self._meta["field"] = self._meta["search"]._resolve_field(field) 71 | bs = self._d_["buckets"] 72 | if isinstance(bs, list): 73 | bs = AttrList(bs, obj_wrapper=self._wrap_bucket) 74 | else: 75 | bs = AttrDict({k: self._wrap_bucket(bs[k]) for k in bs}) 76 | super(AttrDict, self).__setattr__("_buckets", bs) 77 | return self._buckets 78 | 79 | 80 | class FieldBucketData(BucketData): 81 | _bucket_class = FieldBucket 82 | 83 | 84 | class TopHitsData(Response): 85 | def __init__(self, agg, search, data): 86 | super(AttrDict, self).__setattr__( 87 | "meta", AttrDict({"agg": agg, "search": search}) 88 | ) 89 | super(TopHitsData, self).__init__(search, data) 90 | -------------------------------------------------------------------------------- /opensearch_dsl/response/hit.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from ..utils import AttrDict, HitMeta 28 | 29 | 30 | class Hit(AttrDict): 31 | def __init__(self, document): 32 | data = {} 33 | if "_source" in document: 34 | data = document["_source"] 35 | if "fields" in document: 36 | data.update(document["fields"]) 37 | 38 | super(Hit, self).__init__(data) 39 | # assign meta as attribute and not as key in self._d_ 40 | super(AttrDict, self).__setattr__("meta", HitMeta(document)) 41 | 42 | def __getstate__(self): 43 | # add self.meta since it is not in self.__dict__ 44 | return super(Hit, self).__getstate__() + (self.meta,) 45 | 46 | def __setstate__(self, state): 47 | super(AttrDict, self).__setattr__("meta", state[-1]) 48 | super(Hit, self).__setstate__(state[:-1]) 49 | 50 | def __dir__(self): 51 | # be sure to expose meta in dir(self) 52 | return super(Hit, self).__dir__() + ["meta"] 53 | 54 | def __repr__(self): 55 | return "".format( 56 | "/".join( 57 | getattr(self.meta, key) for key in ("index", "id") if key in self.meta 58 | ), 59 | super(Hit, self).__repr__(), 60 | ) 61 | -------------------------------------------------------------------------------- /opensearch_dsl/serializer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from opensearchpy.serializer import JSONSerializer 28 | 29 | from .utils import AttrList 30 | 31 | 32 | class AttrJSONSerializer(JSONSerializer): 33 | def default(self, data): 34 | if isinstance(data, AttrList): 35 | return data._l_ 36 | if hasattr(data, "to_dict"): 37 | return data.to_dict() 38 | return super(AttrJSONSerializer, self).default(data) 39 | 40 | 41 | serializer = AttrJSONSerializer() 42 | -------------------------------------------------------------------------------- /opensearch_dsl/update_by_query.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from .connections import get_connection 28 | from .query import Bool, Q 29 | from .response import UpdateByQueryResponse 30 | from .search import ProxyDescriptor, QueryProxy, Request 31 | from .utils import recursive_to_dict 32 | 33 | 34 | class UpdateByQuery(Request): 35 | query = ProxyDescriptor("query") 36 | 37 | def __init__(self, **kwargs): 38 | """ 39 | Update by query request to opensearch. 40 | 41 | :arg using: `OpenSearch` instance to use 42 | :arg index: limit the search to index 43 | :arg doc_type: only query this type. 44 | 45 | All the parameters supplied (or omitted) at creation type can be later 46 | overridden by methods (`using`, `index` and `doc_type` respectively). 47 | 48 | """ 49 | super(UpdateByQuery, self).__init__(**kwargs) 50 | self._response_class = UpdateByQueryResponse 51 | self._script = {} 52 | self._query_proxy = QueryProxy(self, "query") 53 | 54 | def filter(self, *args, **kwargs): 55 | return self.query(Bool(filter=[Q(*args, **kwargs)])) 56 | 57 | def exclude(self, *args, **kwargs): 58 | return self.query(Bool(filter=[~Q(*args, **kwargs)])) 59 | 60 | @classmethod 61 | def from_dict(cls, d): 62 | """ 63 | Construct a new `UpdateByQuery` instance from a raw dict containing the search 64 | body. Useful when migrating from raw dictionaries. 65 | 66 | Example:: 67 | 68 | ubq = UpdateByQuery.from_dict({ 69 | "query": { 70 | "bool": { 71 | "must": [...] 72 | } 73 | }, 74 | "script": {...} 75 | }) 76 | ubq = ubq.filter('term', published=True) 77 | """ 78 | u = cls() 79 | u.update_from_dict(d) 80 | return u 81 | 82 | def _clone(self): 83 | """ 84 | Return a clone of the current search request. Performs a shallow copy 85 | of all the underlying objects. Used internally by most state modifying 86 | APIs. 87 | """ 88 | ubq = super(UpdateByQuery, self)._clone() 89 | 90 | ubq._response_class = self._response_class 91 | ubq._script = self._script.copy() 92 | ubq.query._proxied = self.query._proxied 93 | return ubq 94 | 95 | def response_class(self, cls): 96 | """ 97 | Override the default wrapper used for the response. 98 | """ 99 | ubq = self._clone() 100 | ubq._response_class = cls 101 | return ubq 102 | 103 | def update_from_dict(self, d): 104 | """ 105 | Apply options from a serialized body to the current instance. Modifies 106 | the object in-place. Used mostly by ``from_dict``. 107 | """ 108 | d = d.copy() 109 | if "query" in d: 110 | self.query._proxied = Q(d.pop("query")) 111 | if "script" in d: 112 | self._script = d.pop("script") 113 | self._extra.update(d) 114 | return self 115 | 116 | def script(self, **kwargs): 117 | """ 118 | Define update action to take: 119 | 120 | Note: the API only accepts a single script, so 121 | calling the script multiple times will overwrite. 122 | 123 | Example:: 124 | 125 | ubq = Search() 126 | ubq = ubq.script(source="ctx._source.likes++"") 127 | ubq = ubq.script(source="ctx._source.likes += params.f"", 128 | lang="expression", 129 | params={'f': 3}) 130 | """ 131 | ubq = self._clone() 132 | if ubq._script: 133 | ubq._script = {} 134 | ubq._script.update(kwargs) 135 | return ubq 136 | 137 | def to_dict(self, **kwargs): 138 | """ 139 | Serialize the search into the dictionary that will be sent over as the 140 | request'ubq body. 141 | 142 | All additional keyword arguments will be included into the dictionary. 143 | """ 144 | d = {} 145 | if self.query: 146 | d["query"] = self.query.to_dict() 147 | 148 | if self._script: 149 | d["script"] = self._script 150 | 151 | d.update(recursive_to_dict(self._extra)) 152 | d.update(recursive_to_dict(kwargs)) 153 | return d 154 | 155 | def execute(self): 156 | """ 157 | Execute the search and return an instance of ``Response`` wrapping all 158 | the data. 159 | """ 160 | opensearch = get_connection(self._using) 161 | 162 | self._response = self._response_class( 163 | self, 164 | opensearch.update_by_query( 165 | index=self._index, body=self.to_dict(), **self._params 166 | ), 167 | ) 168 | return self._response 169 | -------------------------------------------------------------------------------- /opensearch_dsl/wrappers.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import operator 28 | 29 | from six import iteritems, string_types 30 | 31 | from .utils import AttrDict 32 | 33 | __all__ = ["Range"] 34 | 35 | 36 | class Range(AttrDict): 37 | OPS = { 38 | "lt": operator.lt, 39 | "lte": operator.le, 40 | "gt": operator.gt, 41 | "gte": operator.ge, 42 | } 43 | 44 | def __init__(self, *args, **kwargs): 45 | if args and (len(args) > 1 or kwargs or not isinstance(args[0], dict)): 46 | raise ValueError( 47 | "Range accepts a single dictionary or a set of keyword arguments." 48 | ) 49 | data = args[0] if args else kwargs 50 | 51 | for k in data: 52 | if k not in self.OPS: 53 | raise ValueError("Range received an unknown operator %r" % k) 54 | 55 | if "gt" in data and "gte" in data: 56 | raise ValueError("You cannot specify both gt and gte for Range.") 57 | 58 | if "lt" in data and "lte" in data: 59 | raise ValueError("You cannot specify both lt and lte for Range.") 60 | 61 | super(Range, self).__init__(args[0] if args else kwargs) 62 | 63 | def __repr__(self): 64 | return "Range(%s)" % ", ".join("%s=%r" % op for op in iteritems(self._d_)) 65 | 66 | def __contains__(self, item): 67 | if isinstance(item, string_types): 68 | return super(Range, self).__contains__(item) 69 | 70 | for op in self.OPS: 71 | if op in self._d_ and not self.OPS[op](item, self._d_[op]): 72 | return False 73 | return True 74 | 75 | @property 76 | def upper(self): 77 | if "lt" in self._d_: 78 | return self._d_["lt"], False 79 | if "lte" in self._d_: 80 | return self._d_["lte"], True 81 | return None, False 82 | 83 | @property 84 | def lower(self): 85 | if "gt" in self._d_: 86 | return self._d_["gt"], False 87 | if "gte" in self._d_: 88 | return self._d_["gte"], True 89 | return None, False 90 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_sphinx] 2 | source-dir = docs/ 3 | build-dir = docs/_build 4 | all_files = 1 5 | 6 | [wheel] 7 | universal = 1 8 | 9 | [isort] 10 | profile = black 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # The OpenSearch Contributors require contributions made to 5 | # this file be licensed under the Apache-2.0 license or a 6 | # compatible open source license. 7 | # 8 | # Modifications Copyright OpenSearch Contributors. See 9 | # GitHub history for details. 10 | # 11 | # Licensed to Elasticsearch B.V. under one or more contributor 12 | # license agreements. See the NOTICE file distributed with 13 | # this work for additional information regarding copyright 14 | # ownership. Elasticsearch B.V. licenses this file to you under 15 | # the Apache License, Version 2.0 (the "License"); you may 16 | # not use this file except in compliance with the License. 17 | # You may obtain a copy of the License at 18 | # 19 | # http://www.apache.org/licenses/LICENSE-2.0 20 | # 21 | # Unless required by applicable law or agreed to in writing, 22 | # software distributed under the License is distributed on an 23 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | # KIND, either express or implied. See the License for the 25 | # specific language governing permissions and limitations 26 | # under the License. 27 | 28 | from os.path import dirname, join 29 | 30 | from setuptools import find_packages, setup 31 | 32 | VERSION = (2, 1, 0) 33 | __version__ = VERSION 34 | __versionstr__ = ".".join(map(str, VERSION)) 35 | 36 | f = open(join(dirname(__file__), "README.md")) 37 | long_description = f.read().strip() 38 | f.close() 39 | 40 | install_requires = [ 41 | "six", 42 | "python-dateutil", 43 | "opensearch-py>=2.2.0", 44 | # ipaddress is included in stdlib since python 3.3 45 | 'ipaddress; python_version<"3.3"', 46 | ] 47 | 48 | develop_requires = [ 49 | "mock", 50 | "pytest>=3.0.0", 51 | "pytest-cov", 52 | "pytest-mock<4.0.0", 53 | "pytz", 54 | "coverage<7.0.0", 55 | "sphinx", 56 | "sphinx_rtd_theme", 57 | ] 58 | 59 | setup( 60 | name="opensearch-dsl", 61 | description="Python client for OpenSearch", 62 | license="Apache-2.0", 63 | url="https://github.com/opensearch-project/opensearch-dsl-py", 64 | long_description=long_description, 65 | long_description_content_type="text/markdown", 66 | version=__versionstr__, 67 | author="Honza Král, Guian Gumpac", 68 | author_email="honza.kral@gmail.com, guiang@bitquilltech.com", 69 | maintainer="Charlene Solonynka, Yury Fridlyand, Guian Gumpac", 70 | maintainer_email="charlenes@bitquilltech.com, yuryf@bitquilltech.com, guiang@bitquilltech.com", 71 | packages=find_packages(where=".", exclude=("tests*",)), 72 | python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", 73 | classifiers=[ 74 | "Development Status :: 5 - Production/Stable", 75 | "License :: OSI Approved :: Apache Software License", 76 | "Intended Audience :: Developers", 77 | "Operating System :: OS Independent", 78 | "Programming Language :: Python", 79 | "Programming Language :: Python :: 2", 80 | "Programming Language :: Python :: 2.7", 81 | "Programming Language :: Python :: 3", 82 | "Programming Language :: Python :: 3.4", 83 | "Programming Language :: Python :: 3.5", 84 | "Programming Language :: Python :: 3.6", 85 | "Programming Language :: Python :: 3.7", 86 | "Programming Language :: Python :: 3.8", 87 | "Programming Language :: Python :: 3.9", 88 | "Programming Language :: Python :: Implementation :: CPython", 89 | "Programming Language :: Python :: Implementation :: PyPy", 90 | ], 91 | install_requires=install_requires, 92 | extras_require={"develop": develop_requires}, 93 | ) 94 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | -------------------------------------------------------------------------------- /tests/test_analysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # The OpenSearch Contributors require contributions made to 5 | # this file be licensed under the Apache-2.0 license or a 6 | # compatible open source license. 7 | # 8 | # Modifications Copyright OpenSearch Contributors. See 9 | # GitHub history for details. 10 | # 11 | # Licensed to Elasticsearch B.V. under one or more contributor 12 | # license agreements. See the NOTICE file distributed with 13 | # this work for additional information regarding copyright 14 | # ownership. Elasticsearch B.V. licenses this file to you under 15 | # the Apache License, Version 2.0 (the "License"); you may 16 | # not use this file except in compliance with the License. 17 | # You may obtain a copy of the License at 18 | # 19 | # http://www.apache.org/licenses/LICENSE-2.0 20 | # 21 | # Unless required by applicable law or agreed to in writing, 22 | # software distributed under the License is distributed on an 23 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | # KIND, either express or implied. See the License for the 25 | # specific language governing permissions and limitations 26 | # under the License. 27 | 28 | from pytest import raises 29 | 30 | from opensearch_dsl import analysis 31 | 32 | 33 | def test_analyzer_serializes_as_name(): 34 | a = analysis.analyzer("my_analyzer") 35 | 36 | assert "my_analyzer" == a.to_dict() 37 | 38 | 39 | def test_analyzer_has_definition(): 40 | a = analysis.CustomAnalyzer( 41 | "my_analyzer", tokenizer="keyword", filter=["lowercase"] 42 | ) 43 | 44 | assert { 45 | "type": "custom", 46 | "tokenizer": "keyword", 47 | "filter": ["lowercase"], 48 | } == a.get_definition() 49 | 50 | 51 | def test_simple_multiplexer_filter(): 52 | a = analysis.analyzer( 53 | "my_analyzer", 54 | tokenizer="keyword", 55 | filter=[ 56 | analysis.token_filter( 57 | "my_multi", "multiplexer", filters=["lowercase", "lowercase, stop"] 58 | ) 59 | ], 60 | ) 61 | 62 | assert { 63 | "analyzer": { 64 | "my_analyzer": { 65 | "filter": ["my_multi"], 66 | "tokenizer": "keyword", 67 | "type": "custom", 68 | } 69 | }, 70 | "filter": { 71 | "my_multi": { 72 | "filters": ["lowercase", "lowercase, stop"], 73 | "type": "multiplexer", 74 | } 75 | }, 76 | } == a.get_analysis_definition() 77 | 78 | 79 | def test_multiplexer_with_custom_filter(): 80 | a = analysis.analyzer( 81 | "my_analyzer", 82 | tokenizer="keyword", 83 | filter=[ 84 | analysis.token_filter( 85 | "my_multi", 86 | "multiplexer", 87 | filters=[ 88 | [analysis.token_filter("en", "snowball", language="English")], 89 | "lowercase, stop", 90 | ], 91 | ) 92 | ], 93 | ) 94 | 95 | assert { 96 | "analyzer": { 97 | "my_analyzer": { 98 | "filter": ["my_multi"], 99 | "tokenizer": "keyword", 100 | "type": "custom", 101 | } 102 | }, 103 | "filter": { 104 | "en": {"type": "snowball", "language": "English"}, 105 | "my_multi": {"filters": ["en", "lowercase, stop"], "type": "multiplexer"}, 106 | }, 107 | } == a.get_analysis_definition() 108 | 109 | 110 | def test_conditional_token_filter(): 111 | a = analysis.analyzer( 112 | "my_cond", 113 | tokenizer=analysis.tokenizer("keyword"), 114 | filter=[ 115 | analysis.token_filter( 116 | "testing", 117 | "condition", 118 | script={"source": "return true"}, 119 | filter=[ 120 | "lowercase", 121 | analysis.token_filter("en", "snowball", language="English"), 122 | ], 123 | ), 124 | "stop", 125 | ], 126 | ) 127 | 128 | assert { 129 | "analyzer": { 130 | "my_cond": { 131 | "filter": ["testing", "stop"], 132 | "tokenizer": "keyword", 133 | "type": "custom", 134 | } 135 | }, 136 | "filter": { 137 | "en": {"language": "English", "type": "snowball"}, 138 | "testing": { 139 | "script": {"source": "return true"}, 140 | "filter": ["lowercase", "en"], 141 | "type": "condition", 142 | }, 143 | }, 144 | } == a.get_analysis_definition() 145 | 146 | 147 | def test_conflicting_nested_filters_cause_error(): 148 | a = analysis.analyzer( 149 | "my_cond", 150 | tokenizer=analysis.tokenizer("keyword"), 151 | filter=[ 152 | analysis.token_filter("en", "stemmer", language="english"), 153 | analysis.token_filter( 154 | "testing", 155 | "condition", 156 | script={"source": "return true"}, 157 | filter=[ 158 | "lowercase", 159 | analysis.token_filter("en", "snowball", language="English"), 160 | ], 161 | ), 162 | ], 163 | ) 164 | 165 | with raises(ValueError): 166 | a.get_analysis_definition() 167 | 168 | 169 | def test_normalizer_serializes_as_name(): 170 | n = analysis.normalizer("my_normalizer") 171 | 172 | assert "my_normalizer" == n.to_dict() 173 | 174 | 175 | def test_normalizer_has_definition(): 176 | n = analysis.CustomNormalizer( 177 | "my_normalizer", filter=["lowercase", "asciifolding"], char_filter=["quote"] 178 | ) 179 | 180 | assert { 181 | "type": "custom", 182 | "filter": ["lowercase", "asciifolding"], 183 | "char_filter": ["quote"], 184 | } == n.get_definition() 185 | 186 | 187 | def test_tokenizer(): 188 | t = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) 189 | 190 | assert t.to_dict() == "trigram" 191 | assert {"type": "nGram", "min_gram": 3, "max_gram": 3} == t.get_definition() 192 | 193 | 194 | def test_custom_analyzer_can_collect_custom_items(): 195 | trigram = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) 196 | my_stop = analysis.token_filter("my_stop", "stop", stopwords=["a", "b"]) 197 | umlauts = analysis.char_filter("umlauts", "pattern_replace", mappings=["ü=>ue"]) 198 | a = analysis.analyzer( 199 | "my_analyzer", 200 | tokenizer=trigram, 201 | filter=["lowercase", my_stop], 202 | char_filter=["html_strip", umlauts], 203 | ) 204 | 205 | assert a.to_dict() == "my_analyzer" 206 | assert { 207 | "analyzer": { 208 | "my_analyzer": { 209 | "type": "custom", 210 | "tokenizer": "trigram", 211 | "filter": ["lowercase", "my_stop"], 212 | "char_filter": ["html_strip", "umlauts"], 213 | } 214 | }, 215 | "tokenizer": {"trigram": trigram.get_definition()}, 216 | "filter": {"my_stop": my_stop.get_definition()}, 217 | "char_filter": {"umlauts": umlauts.get_definition()}, 218 | } == a.get_analysis_definition() 219 | 220 | 221 | def test_stemmer_analyzer_can_pass_name(): 222 | t = analysis.token_filter( 223 | "my_english_filter", name="minimal_english", type="stemmer" 224 | ) 225 | assert t.to_dict() == "my_english_filter" 226 | assert {"type": "stemmer", "name": "minimal_english"} == t.get_definition() 227 | -------------------------------------------------------------------------------- /tests/test_connections.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from opensearchpy import OpenSearch 28 | from pytest import raises 29 | 30 | from opensearch_dsl import connections, serializer 31 | 32 | 33 | def test_default_connection_is_returned_by_default(): 34 | c = connections.Connections() 35 | 36 | con, con2 = object(), object() 37 | c.add_connection("default", con) 38 | 39 | c.add_connection("not-default", con2) 40 | 41 | assert c.get_connection() is con 42 | 43 | 44 | def test_get_connection_created_connection_if_needed(): 45 | c = connections.Connections() 46 | c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}) 47 | 48 | default = c.get_connection() 49 | local = c.get_connection("local") 50 | 51 | assert isinstance(default, OpenSearch) 52 | assert isinstance(local, OpenSearch) 53 | 54 | assert [{"host": "opensearch.com"}] == default.transport.hosts 55 | assert [{"host": "localhost"}] == local.transport.hosts 56 | 57 | 58 | def test_configure_preserves_unchanged_connections(): 59 | c = connections.Connections() 60 | 61 | c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}) 62 | default = c.get_connection() 63 | local = c.get_connection("local") 64 | 65 | c.configure( 66 | default={"hosts": ["not-opensearch.com"]}, local={"hosts": ["localhost"]} 67 | ) 68 | new_default = c.get_connection() 69 | new_local = c.get_connection("local") 70 | 71 | assert new_local is local 72 | assert new_default is not default 73 | 74 | 75 | def test_remove_connection_removes_both_conn_and_conf(): 76 | c = connections.Connections() 77 | 78 | c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}) 79 | c.add_connection("local2", object()) 80 | 81 | c.remove_connection("default") 82 | c.get_connection("local2") 83 | c.remove_connection("local2") 84 | 85 | with raises(Exception): 86 | c.get_connection("local2") 87 | c.get_connection("default") 88 | 89 | 90 | def test_create_connection_constructs_client(): 91 | c = connections.Connections() 92 | c.create_connection("testing", hosts=["opensearch.com"]) 93 | 94 | con = c.get_connection("testing") 95 | assert [{"host": "opensearch.com"}] == con.transport.hosts 96 | 97 | 98 | def test_create_connection_adds_our_serializer(): 99 | c = connections.Connections() 100 | c.create_connection("testing", hosts=["opensearch.com"]) 101 | 102 | assert c.get_connection("testing").transport.serializer is serializer.serializer 103 | -------------------------------------------------------------------------------- /tests/test_faceted_search.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from datetime import datetime 28 | 29 | import pytest 30 | 31 | from opensearch_dsl.faceted_search import DateHistogramFacet, FacetedSearch, TermsFacet 32 | 33 | 34 | class BlogSearch(FacetedSearch): 35 | doc_types = ["user", "post"] 36 | fields = ( 37 | "title^5", 38 | "body", 39 | ) 40 | 41 | facets = { 42 | "category": TermsFacet(field="category.raw"), 43 | "tags": TermsFacet(field="tags"), 44 | } 45 | 46 | 47 | def test_query_is_created_properly(): 48 | bs = BlogSearch("python search") 49 | s = bs.build_search() 50 | 51 | assert s._doc_type == ["user", "post"] 52 | assert { 53 | "aggs": { 54 | "_filter_tags": { 55 | "filter": {"match_all": {}}, 56 | "aggs": {"tags": {"terms": {"field": "tags"}}}, 57 | }, 58 | "_filter_category": { 59 | "filter": {"match_all": {}}, 60 | "aggs": {"category": {"terms": {"field": "category.raw"}}}, 61 | }, 62 | }, 63 | "query": { 64 | "multi_match": {"fields": ("title^5", "body"), "query": "python search"} 65 | }, 66 | "highlight": {"fields": {"body": {}, "title": {}}}, 67 | } == s.to_dict() 68 | 69 | 70 | def test_query_is_created_properly_with_sort_tuple(): 71 | bs = BlogSearch("python search", sort=("category", "-title")) 72 | s = bs.build_search() 73 | 74 | assert s._doc_type == ["user", "post"] 75 | assert { 76 | "aggs": { 77 | "_filter_tags": { 78 | "filter": {"match_all": {}}, 79 | "aggs": {"tags": {"terms": {"field": "tags"}}}, 80 | }, 81 | "_filter_category": { 82 | "filter": {"match_all": {}}, 83 | "aggs": {"category": {"terms": {"field": "category.raw"}}}, 84 | }, 85 | }, 86 | "query": { 87 | "multi_match": {"fields": ("title^5", "body"), "query": "python search"} 88 | }, 89 | "highlight": {"fields": {"body": {}, "title": {}}}, 90 | "sort": ["category", {"title": {"order": "desc"}}], 91 | } == s.to_dict() 92 | 93 | 94 | def test_filter_is_applied_to_search_but_not_relevant_facet(): 95 | bs = BlogSearch("python search", filters={"category": "opensearch"}) 96 | s = bs.build_search() 97 | 98 | assert { 99 | "aggs": { 100 | "_filter_tags": { 101 | "filter": {"terms": {"category.raw": ["opensearch"]}}, 102 | "aggs": {"tags": {"terms": {"field": "tags"}}}, 103 | }, 104 | "_filter_category": { 105 | "filter": {"match_all": {}}, 106 | "aggs": {"category": {"terms": {"field": "category.raw"}}}, 107 | }, 108 | }, 109 | "post_filter": {"terms": {"category.raw": ["opensearch"]}}, 110 | "query": { 111 | "multi_match": {"fields": ("title^5", "body"), "query": "python search"} 112 | }, 113 | "highlight": {"fields": {"body": {}, "title": {}}}, 114 | } == s.to_dict() 115 | 116 | 117 | def test_filters_are_applied_to_search_ant_relevant_facets(): 118 | bs = BlogSearch( 119 | "python search", 120 | filters={"category": "opensearch", "tags": ["python", "django"]}, 121 | ) 122 | s = bs.build_search() 123 | 124 | d = s.to_dict() 125 | 126 | # we need to test post_filter without relying on order 127 | f = d["post_filter"]["bool"].pop("must") 128 | assert len(f) == 2 129 | assert {"terms": {"category.raw": ["opensearch"]}} in f 130 | assert {"terms": {"tags": ["python", "django"]}} in f 131 | 132 | assert { 133 | "aggs": { 134 | "_filter_tags": { 135 | "filter": {"terms": {"category.raw": ["opensearch"]}}, 136 | "aggs": {"tags": {"terms": {"field": "tags"}}}, 137 | }, 138 | "_filter_category": { 139 | "filter": {"terms": {"tags": ["python", "django"]}}, 140 | "aggs": {"category": {"terms": {"field": "category.raw"}}}, 141 | }, 142 | }, 143 | "query": { 144 | "multi_match": {"fields": ("title^5", "body"), "query": "python search"} 145 | }, 146 | "post_filter": {"bool": {}}, 147 | "highlight": {"fields": {"body": {}, "title": {}}}, 148 | } == d 149 | 150 | 151 | def test_date_histogram_facet_with_1970_01_01_date(): 152 | dhf = DateHistogramFacet() 153 | assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) 154 | assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) 155 | 156 | 157 | @pytest.mark.parametrize( 158 | ["interval_type", "interval"], 159 | [ 160 | ("interval", "year"), 161 | ("calendar_interval", "year"), 162 | ("interval", "month"), 163 | ("calendar_interval", "month"), 164 | ("interval", "week"), 165 | ("calendar_interval", "week"), 166 | ("interval", "day"), 167 | ("calendar_interval", "day"), 168 | ("fixed_interval", "day"), 169 | ("interval", "hour"), 170 | ("fixed_interval", "hour"), 171 | ("interval", "1Y"), 172 | ("calendar_interval", "1Y"), 173 | ("interval", "1M"), 174 | ("calendar_interval", "1M"), 175 | ("interval", "1w"), 176 | ("calendar_interval", "1w"), 177 | ("interval", "1d"), 178 | ("calendar_interval", "1d"), 179 | ("fixed_interval", "1d"), 180 | ("interval", "1h"), 181 | ("fixed_interval", "1h"), 182 | ], 183 | ) 184 | def test_date_histogram_interval_types(interval_type, interval): 185 | dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) 186 | assert dhf.get_aggregation().to_dict() == { 187 | "date_histogram": { 188 | "field": "@timestamp", 189 | interval_type: interval, 190 | "min_doc_count": 0, 191 | } 192 | } 193 | dhf.get_value_filter(datetime.now()) 194 | 195 | 196 | def test_date_histogram_no_interval_keyerror(): 197 | dhf = DateHistogramFacet(field="@timestamp") 198 | with pytest.raises(KeyError) as e: 199 | dhf.get_value_filter(datetime.now()) 200 | assert str(e.value) == "'interval'" 201 | -------------------------------------------------------------------------------- /tests/test_field.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import base64 28 | import ipaddress 29 | from datetime import datetime 30 | 31 | import pytest 32 | from dateutil import tz 33 | 34 | from opensearch_dsl import InnerDoc, Range, ValidationException, field 35 | 36 | 37 | def test_date_range_deserialization(): 38 | data = {"lt": "2018-01-01T00:30:10"} 39 | 40 | r = field.DateRange().deserialize(data) 41 | 42 | assert isinstance(r, Range) 43 | assert r.lt == datetime(2018, 1, 1, 0, 30, 10) 44 | 45 | 46 | def test_boolean_deserialization(): 47 | bf = field.Boolean() 48 | 49 | assert not bf.deserialize("false") 50 | assert not bf.deserialize(False) 51 | assert not bf.deserialize("") 52 | assert not bf.deserialize(0) 53 | 54 | assert bf.deserialize(True) 55 | assert bf.deserialize("true") 56 | assert bf.deserialize(1) 57 | 58 | 59 | def test_date_field_can_have_default_tz(): 60 | f = field.Date(default_timezone="UTC") 61 | now = datetime.now() 62 | 63 | now_with_tz = f._deserialize(now) 64 | 65 | assert now_with_tz.tzinfo == tz.gettz("UTC") 66 | assert now.isoformat() + "+00:00" == now_with_tz.isoformat() 67 | 68 | now_with_tz = f._deserialize(now.isoformat()) 69 | 70 | assert now_with_tz.tzinfo == tz.gettz("UTC") 71 | assert now.isoformat() + "+00:00" == now_with_tz.isoformat() 72 | 73 | 74 | def test_custom_field_car_wrap_other_field(): 75 | class MyField(field.CustomField): 76 | @property 77 | def builtin_type(self): 78 | return field.Text(**self._params) 79 | 80 | assert {"type": "text", "index": "not_analyzed"} == MyField( 81 | index="not_analyzed" 82 | ).to_dict() 83 | 84 | 85 | def test_field_from_dict(): 86 | f = field.construct_field({"type": "text", "index": "not_analyzed"}) 87 | 88 | assert isinstance(f, field.Text) 89 | assert {"type": "text", "index": "not_analyzed"} == f.to_dict() 90 | 91 | 92 | def test_multi_fields_are_accepted_and_parsed(): 93 | f = field.construct_field( 94 | "text", 95 | fields={"raw": {"type": "keyword"}, "eng": field.Text(analyzer="english")}, 96 | ) 97 | 98 | assert isinstance(f, field.Text) 99 | assert { 100 | "type": "text", 101 | "fields": { 102 | "raw": {"type": "keyword"}, 103 | "eng": {"type": "text", "analyzer": "english"}, 104 | }, 105 | } == f.to_dict() 106 | 107 | 108 | def test_nested_provides_direct_access_to_its_fields(): 109 | f = field.Nested(properties={"name": {"type": "text", "index": "not_analyzed"}}) 110 | 111 | assert "name" in f 112 | assert f["name"] == field.Text(index="not_analyzed") 113 | 114 | 115 | def test_field_supports_multiple_analyzers(): 116 | f = field.Text(analyzer="snowball", search_analyzer="keyword") 117 | assert { 118 | "analyzer": "snowball", 119 | "search_analyzer": "keyword", 120 | "type": "text", 121 | } == f.to_dict() 122 | 123 | 124 | def test_multifield_supports_multiple_analyzers(): 125 | f = field.Text( 126 | fields={ 127 | "f1": field.Text(search_analyzer="keyword", analyzer="snowball"), 128 | "f2": field.Text(analyzer="keyword"), 129 | } 130 | ) 131 | assert { 132 | "fields": { 133 | "f1": { 134 | "analyzer": "snowball", 135 | "search_analyzer": "keyword", 136 | "type": "text", 137 | }, 138 | "f2": {"analyzer": "keyword", "type": "text"}, 139 | }, 140 | "type": "text", 141 | } == f.to_dict() 142 | 143 | 144 | def test_scaled_float(): 145 | with pytest.raises(TypeError): 146 | field.ScaledFloat() 147 | f = field.ScaledFloat(123) 148 | assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"} 149 | 150 | 151 | def test_ipaddress(): 152 | f = field.Ip() 153 | assert f.deserialize("127.0.0.1") == ipaddress.ip_address(u"127.0.0.1") 154 | assert f.deserialize(u"::1") == ipaddress.ip_address(u"::1") 155 | assert f.serialize(f.deserialize("::1")) == "::1" 156 | assert f.deserialize(None) is None 157 | with pytest.raises(ValueError): 158 | assert f.deserialize("not_an_ipaddress") 159 | 160 | 161 | def test_float(): 162 | f = field.Float() 163 | assert f.deserialize("42") == 42.0 164 | assert f.deserialize(None) is None 165 | with pytest.raises(ValueError): 166 | assert f.deserialize("not_a_float") 167 | 168 | 169 | def test_integer(): 170 | f = field.Integer() 171 | assert f.deserialize("42") == 42 172 | assert f.deserialize(None) is None 173 | with pytest.raises(ValueError): 174 | assert f.deserialize("not_an_integer") 175 | 176 | 177 | def test_binary(): 178 | f = field.Binary() 179 | assert f.deserialize(base64.b64encode(b"42")) == b"42" 180 | assert f.deserialize(f.serialize(b"42")) == b"42" 181 | assert f.deserialize(None) is None 182 | 183 | 184 | def test_constant_keyword(): 185 | f = field.ConstantKeyword() 186 | assert f.to_dict() == {"type": "constant_keyword"} 187 | 188 | 189 | def test_rank_features(): 190 | f = field.RankFeatures() 191 | assert f.to_dict() == {"type": "rank_features"} 192 | 193 | 194 | def test_object_dynamic_values(): 195 | for dynamic in True, False, "strict": 196 | f = field.Object(dynamic=dynamic) 197 | assert f.to_dict()["dynamic"] == dynamic 198 | 199 | 200 | def test_object_disabled(): 201 | f = field.Object(enabled=False) 202 | assert f.to_dict() == {"type": "object", "enabled": False} 203 | 204 | 205 | def test_object_constructor(): 206 | expected = {"type": "object", "properties": {"inner_int": {"type": "integer"}}} 207 | 208 | class Inner(InnerDoc): 209 | inner_int = field.Integer() 210 | 211 | obj_from_doc = field.Object(doc_class=Inner) 212 | assert obj_from_doc.to_dict() == expected 213 | 214 | obj_from_props = field.Object(properties={"inner_int": field.Integer()}) 215 | assert obj_from_props.to_dict() == expected 216 | 217 | with pytest.raises(ValidationException): 218 | field.Object(doc_class=Inner, properties={"inner_int": field.Integer()}) 219 | 220 | with pytest.raises(ValidationException): 221 | field.Object(doc_class=Inner, dynamic=False) 222 | -------------------------------------------------------------------------------- /tests/test_index.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import string 28 | from random import choice 29 | 30 | from pytest import raises 31 | 32 | from opensearch_dsl import Date, Document, Index, IndexTemplate, Text, analyzer 33 | 34 | 35 | class Post(Document): 36 | title = Text() 37 | published_from = Date() 38 | 39 | 40 | def test_multiple_doc_types_will_combine_mappings(): 41 | class User(Document): 42 | username = Text() 43 | 44 | i = Index("i") 45 | i.document(Post) 46 | i.document(User) 47 | assert { 48 | "mappings": { 49 | "properties": { 50 | "title": {"type": "text"}, 51 | "username": {"type": "text"}, 52 | "published_from": {"type": "date"}, 53 | } 54 | } 55 | } == i.to_dict() 56 | 57 | 58 | def test_search_is_limited_to_index_name(): 59 | i = Index("my-index") 60 | s = i.search() 61 | 62 | assert s._index == ["my-index"] 63 | 64 | 65 | def test_cloned_index_has_copied_settings_and_using(): 66 | client = object() 67 | i = Index("my-index", using=client) 68 | i.settings(number_of_shards=1) 69 | 70 | i2 = i.clone("my-other-index") 71 | 72 | assert "my-other-index" == i2._name 73 | assert client is i2._using 74 | assert i._settings == i2._settings 75 | assert i._settings is not i2._settings 76 | 77 | 78 | def test_cloned_index_has_analysis_attribute(): 79 | """ 80 | Regression test for Issue #582 in which `Index.clone()` was not copying 81 | over the `_analysis` attribute. 82 | """ 83 | client = object() 84 | i = Index("my-index", using=client) 85 | 86 | random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) 87 | random_analyzer = analyzer( 88 | random_analyzer_name, tokenizer="standard", filter="standard" 89 | ) 90 | 91 | i.analyzer(random_analyzer) 92 | 93 | i2 = i.clone("my-clone-index") 94 | 95 | assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] 96 | 97 | 98 | def test_settings_are_saved(): 99 | i = Index("i") 100 | i.settings(number_of_replicas=0) 101 | i.settings(number_of_shards=1) 102 | 103 | assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() 104 | 105 | 106 | def test_registered_doc_type_included_in_to_dict(): 107 | i = Index("i", using="alias") 108 | i.document(Post) 109 | 110 | assert { 111 | "mappings": { 112 | "properties": { 113 | "title": {"type": "text"}, 114 | "published_from": {"type": "date"}, 115 | } 116 | } 117 | } == i.to_dict() 118 | 119 | 120 | def test_registered_doc_type_included_in_search(): 121 | i = Index("i", using="alias") 122 | i.document(Post) 123 | 124 | s = i.search() 125 | 126 | assert s._doc_type == [Post] 127 | 128 | 129 | def test_aliases_add_to_object(): 130 | random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) 131 | alias_dict = {random_alias: {}} 132 | 133 | index = Index("i", using="alias") 134 | index.aliases(**alias_dict) 135 | 136 | assert index._aliases == alias_dict 137 | 138 | 139 | def test_aliases_returned_from_to_dict(): 140 | random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) 141 | alias_dict = {random_alias: {}} 142 | 143 | index = Index("i", using="alias") 144 | index.aliases(**alias_dict) 145 | 146 | assert index._aliases == index.to_dict()["aliases"] == alias_dict 147 | 148 | 149 | def test_analyzers_added_to_object(): 150 | random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) 151 | random_analyzer = analyzer( 152 | random_analyzer_name, tokenizer="standard", filter="standard" 153 | ) 154 | 155 | index = Index("i", using="alias") 156 | index.analyzer(random_analyzer) 157 | 158 | assert index._analysis["analyzer"][random_analyzer_name] == { 159 | "filter": ["standard"], 160 | "type": "custom", 161 | "tokenizer": "standard", 162 | } 163 | 164 | 165 | def test_analyzers_returned_from_to_dict(): 166 | random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) 167 | random_analyzer = analyzer( 168 | random_analyzer_name, tokenizer="standard", filter="standard" 169 | ) 170 | index = Index("i", using="alias") 171 | index.analyzer(random_analyzer) 172 | 173 | assert index.to_dict()["settings"]["analysis"]["analyzer"][ 174 | random_analyzer_name 175 | ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} 176 | 177 | 178 | def test_conflicting_analyzer_raises_error(): 179 | i = Index("i") 180 | i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) 181 | 182 | with raises(ValueError): 183 | i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) 184 | 185 | 186 | def test_index_template_can_have_order(): 187 | i = Index("i-*") 188 | it = i.as_template("i", order=2) 189 | 190 | assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() 191 | 192 | 193 | def test_index_template_save_result(mock_client): 194 | it = IndexTemplate("test-template", "test-*") 195 | 196 | assert it.save(using="mock") == mock_client.indices.put_template() 197 | -------------------------------------------------------------------------------- /tests/test_integration/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | -------------------------------------------------------------------------------- /tests/test_integration/test_analysis.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from opensearch_dsl import analyzer, token_filter, tokenizer 28 | 29 | 30 | def test_simulate_with_just__builtin_tokenizer(client): 31 | a = analyzer("my-analyzer", tokenizer="keyword") 32 | tokens = a.simulate("Hello World!", using=client).tokens 33 | 34 | assert len(tokens) == 1 35 | assert tokens[0].token == "Hello World!" 36 | 37 | 38 | def test_simulate_complex(client): 39 | a = analyzer( 40 | "my-analyzer", 41 | tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), 42 | filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], 43 | ) 44 | 45 | tokens = a.simulate("if:this:works", using=client).tokens 46 | 47 | assert len(tokens) == 2 48 | assert ["this", "works"] == [t.token for t in tokens] 49 | 50 | 51 | def test_simulate_builtin(client): 52 | a = analyzer("my-analyzer", "english") 53 | tokens = a.simulate("fixes running").tokens 54 | 55 | assert ["fix", "run"] == [t.token for t in tokens] 56 | -------------------------------------------------------------------------------- /tests/test_integration/test_count.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from opensearch_dsl.search import Q, Search 28 | 29 | 30 | def test_count_all(data_client): 31 | s = Search(using=data_client).index("git") 32 | assert 53 == s.count() 33 | 34 | 35 | def test_count_prefetch(data_client, mocker): 36 | mocker.spy(data_client, "count") 37 | 38 | search = Search(using=data_client).index("git") 39 | search.execute() 40 | assert search.count() == 53 41 | assert data_client.count.call_count == 0 42 | 43 | search._response.hits.total.relation = "gte" 44 | assert search.count() == 53 45 | assert data_client.count.call_count == 1 46 | 47 | 48 | def test_count_filter(data_client): 49 | s = Search(using=data_client).index("git").filter(~Q("exists", field="parent_shas")) 50 | # initial commit + repo document 51 | assert 2 == s.count() 52 | -------------------------------------------------------------------------------- /tests/test_integration/test_faceted_search.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from datetime import datetime 28 | 29 | import pytest 30 | 31 | from opensearch_dsl import A, Boolean, Date, Document, Keyword 32 | from opensearch_dsl.faceted_search import ( 33 | DateHistogramFacet, 34 | FacetedSearch, 35 | NestedFacet, 36 | RangeFacet, 37 | TermsFacet, 38 | ) 39 | 40 | from .test_document import PullRequest 41 | 42 | 43 | class Repos(Document): 44 | is_public = Boolean() 45 | created_at = Date() 46 | 47 | class Index: 48 | name = "git" 49 | 50 | 51 | class Commit(Document): 52 | files = Keyword() 53 | committed_date = Date() 54 | 55 | class Index: 56 | name = "git" 57 | 58 | 59 | class MetricSearch(FacetedSearch): 60 | index = "git" 61 | doc_types = [Commit] 62 | 63 | facets = { 64 | "files": TermsFacet(field="files", metric=A("max", field="committed_date")), 65 | } 66 | 67 | 68 | @pytest.fixture(scope="session") 69 | def commit_search_cls(opensearch_version): 70 | interval_kwargs = {"fixed_interval": "1d"} 71 | 72 | class CommitSearch(FacetedSearch): 73 | index = "flat-git" 74 | fields = ( 75 | "description", 76 | "files", 77 | ) 78 | 79 | facets = { 80 | "files": TermsFacet(field="files"), 81 | "frequency": DateHistogramFacet( 82 | field="authored_date", min_doc_count=1, **interval_kwargs 83 | ), 84 | "deletions": RangeFacet( 85 | field="stats.deletions", 86 | ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], 87 | ), 88 | } 89 | 90 | return CommitSearch 91 | 92 | 93 | @pytest.fixture(scope="session") 94 | def repo_search_cls(opensearch_version): 95 | interval_type = "calendar_interval" 96 | 97 | class RepoSearch(FacetedSearch): 98 | index = "git" 99 | doc_types = [Repos] 100 | facets = { 101 | "public": TermsFacet(field="is_public"), 102 | "created": DateHistogramFacet( 103 | field="created_at", **{interval_type: "month"} 104 | ), 105 | } 106 | 107 | def search(self): 108 | s = super(RepoSearch, self).search() 109 | return s.filter("term", commit_repo="repo") 110 | 111 | return RepoSearch 112 | 113 | 114 | @pytest.fixture(scope="session") 115 | def pr_search_cls(opensearch_version): 116 | interval_type = "calendar_interval" 117 | 118 | class PRSearch(FacetedSearch): 119 | index = "test-prs" 120 | doc_types = [PullRequest] 121 | facets = { 122 | "comments": NestedFacet( 123 | "comments", 124 | DateHistogramFacet( 125 | field="comments.created_at", **{interval_type: "month"} 126 | ), 127 | ) 128 | } 129 | 130 | return PRSearch 131 | 132 | 133 | def test_facet_with_custom_metric(data_client): 134 | ms = MetricSearch() 135 | r = ms.execute() 136 | 137 | dates = [f[1] for f in r.facets.files] 138 | assert dates == list(sorted(dates, reverse=True)) 139 | assert dates[0] == 1399038439000 140 | 141 | 142 | def test_nested_facet(pull_request, pr_search_cls): 143 | prs = pr_search_cls() 144 | r = prs.execute() 145 | 146 | assert r.hits.total.value == 1 147 | assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments 148 | 149 | 150 | def test_nested_facet_with_filter(pull_request, pr_search_cls): 151 | prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) 152 | r = prs.execute() 153 | 154 | assert r.hits.total.value == 1 155 | assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments 156 | 157 | prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) 158 | r = prs.execute() 159 | assert not r.hits 160 | 161 | 162 | def test_datehistogram_facet(data_client, repo_search_cls): 163 | rs = repo_search_cls() 164 | r = rs.execute() 165 | 166 | assert r.hits.total.value == 1 167 | assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created 168 | 169 | 170 | def test_boolean_facet(data_client, repo_search_cls): 171 | rs = repo_search_cls() 172 | r = rs.execute() 173 | 174 | assert r.hits.total.value == 1 175 | assert [(True, 1, False)] == r.facets.public 176 | value, count, selected = r.facets.public[0] 177 | assert value is True 178 | 179 | 180 | def test_empty_search_finds_everything( 181 | data_client, opensearch_version, commit_search_cls 182 | ): 183 | cs = commit_search_cls() 184 | r = cs.execute() 185 | 186 | assert r.hits.total.value == 52 187 | assert [ 188 | ("opensearch_dsl", 40, False), 189 | ("test_opensearch_dsl", 35, False), 190 | ("opensearch_dsl/query.py", 19, False), 191 | ("test_opensearch_dsl/test_search.py", 15, False), 192 | ("opensearch_dsl/utils.py", 14, False), 193 | ("test_opensearch_dsl/test_query.py", 13, False), 194 | ("opensearch_dsl/search.py", 12, False), 195 | ("opensearch_dsl/aggs.py", 11, False), 196 | ("test_opensearch_dsl/test_result.py", 5, False), 197 | ("opensearch_dsl/result.py", 3, False), 198 | ] == r.facets.files 199 | 200 | assert [ 201 | (datetime(2014, 3, 3, 0, 0), 2, False), 202 | (datetime(2014, 3, 4, 0, 0), 1, False), 203 | (datetime(2014, 3, 5, 0, 0), 3, False), 204 | (datetime(2014, 3, 6, 0, 0), 3, False), 205 | (datetime(2014, 3, 7, 0, 0), 9, False), 206 | (datetime(2014, 3, 10, 0, 0), 2, False), 207 | (datetime(2014, 3, 15, 0, 0), 4, False), 208 | (datetime(2014, 3, 21, 0, 0), 2, False), 209 | (datetime(2014, 3, 23, 0, 0), 2, False), 210 | (datetime(2014, 3, 24, 0, 0), 10, False), 211 | (datetime(2014, 4, 20, 0, 0), 2, False), 212 | (datetime(2014, 4, 22, 0, 0), 2, False), 213 | (datetime(2014, 4, 25, 0, 0), 3, False), 214 | (datetime(2014, 4, 26, 0, 0), 2, False), 215 | (datetime(2014, 4, 27, 0, 0), 2, False), 216 | (datetime(2014, 5, 1, 0, 0), 2, False), 217 | (datetime(2014, 5, 2, 0, 0), 1, False), 218 | ] == r.facets.frequency 219 | 220 | assert [ 221 | ("ok", 19, False), 222 | ("good", 14, False), 223 | ("better", 19, False), 224 | ] == r.facets.deletions 225 | 226 | 227 | def test_term_filters_are_shown_as_selected_and_data_is_filtered( 228 | data_client, commit_search_cls 229 | ): 230 | cs = commit_search_cls(filters={"files": "test_opensearch_dsl"}) 231 | 232 | r = cs.execute() 233 | 234 | assert 35 == r.hits.total.value 235 | assert [ 236 | ("opensearch_dsl", 40, False), 237 | ("test_opensearch_dsl", 35, True), # selected 238 | ("opensearch_dsl/query.py", 19, False), 239 | ("test_opensearch_dsl/test_search.py", 15, False), 240 | ("opensearch_dsl/utils.py", 14, False), 241 | ("test_opensearch_dsl/test_query.py", 13, False), 242 | ("opensearch_dsl/search.py", 12, False), 243 | ("opensearch_dsl/aggs.py", 11, False), 244 | ("test_opensearch_dsl/test_result.py", 5, False), 245 | ("opensearch_dsl/result.py", 3, False), 246 | ] == r.facets.files 247 | 248 | assert [ 249 | (datetime(2014, 3, 3, 0, 0), 1, False), 250 | (datetime(2014, 3, 5, 0, 0), 2, False), 251 | (datetime(2014, 3, 6, 0, 0), 3, False), 252 | (datetime(2014, 3, 7, 0, 0), 6, False), 253 | (datetime(2014, 3, 10, 0, 0), 1, False), 254 | (datetime(2014, 3, 15, 0, 0), 3, False), 255 | (datetime(2014, 3, 21, 0, 0), 2, False), 256 | (datetime(2014, 3, 23, 0, 0), 1, False), 257 | (datetime(2014, 3, 24, 0, 0), 7, False), 258 | (datetime(2014, 4, 20, 0, 0), 1, False), 259 | (datetime(2014, 4, 25, 0, 0), 3, False), 260 | (datetime(2014, 4, 26, 0, 0), 2, False), 261 | (datetime(2014, 4, 27, 0, 0), 1, False), 262 | (datetime(2014, 5, 1, 0, 0), 1, False), 263 | (datetime(2014, 5, 2, 0, 0), 1, False), 264 | ] == r.facets.frequency 265 | 266 | assert [ 267 | ("ok", 12, False), 268 | ("good", 10, False), 269 | ("better", 13, False), 270 | ] == r.facets.deletions 271 | 272 | 273 | def test_range_filters_are_shown_as_selected_and_data_is_filtered( 274 | data_client, commit_search_cls 275 | ): 276 | cs = commit_search_cls(filters={"deletions": "better"}) 277 | 278 | r = cs.execute() 279 | 280 | assert 19 == r.hits.total.value 281 | 282 | 283 | def test_pagination(data_client, commit_search_cls): 284 | cs = commit_search_cls() 285 | cs = cs[0:20] 286 | 287 | assert 52 == cs.count() 288 | assert 20 == len(cs.execute()) 289 | -------------------------------------------------------------------------------- /tests/test_integration/test_index.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from opensearch_dsl import Date, Document, Index, IndexTemplate, Text, analysis 28 | 29 | 30 | class Post(Document): 31 | title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) 32 | published_from = Date() 33 | 34 | 35 | def test_index_template_works(write_client): 36 | it = IndexTemplate("test-template", "test-*") 37 | it.document(Post) 38 | it.settings(number_of_replicas=0, number_of_shards=1) 39 | it.save() 40 | 41 | i = Index("test-blog") 42 | i.create() 43 | 44 | assert { 45 | "test-blog": { 46 | "mappings": { 47 | "properties": { 48 | "title": {"type": "text", "analyzer": "my_analyzer"}, 49 | "published_from": {"type": "date"}, 50 | } 51 | } 52 | } 53 | } == write_client.indices.get_mapping(index="test-blog") 54 | 55 | 56 | def test_index_can_be_saved_even_with_settings(write_client): 57 | i = Index("test-blog", using=write_client) 58 | i.settings(number_of_shards=3, number_of_replicas=0) 59 | i.save() 60 | i.settings(number_of_replicas=1) 61 | i.save() 62 | 63 | assert ( 64 | "1" == i.get_settings()["test-blog"]["settings"]["index"]["number_of_replicas"] 65 | ) 66 | 67 | 68 | def test_index_exists(data_client): 69 | assert Index("git").exists() 70 | assert not Index("not-there").exists() 71 | 72 | 73 | def test_index_can_be_created_with_settings_and_mappings(write_client): 74 | i = Index("test-blog", using=write_client) 75 | i.document(Post) 76 | i.settings(number_of_replicas=0, number_of_shards=1) 77 | i.create() 78 | 79 | assert { 80 | "test-blog": { 81 | "mappings": { 82 | "properties": { 83 | "title": {"type": "text", "analyzer": "my_analyzer"}, 84 | "published_from": {"type": "date"}, 85 | } 86 | } 87 | } 88 | } == write_client.indices.get_mapping(index="test-blog") 89 | 90 | settings = write_client.indices.get_settings(index="test-blog") 91 | assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" 92 | assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" 93 | assert settings["test-blog"]["settings"]["index"]["analysis"] == { 94 | "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} 95 | } 96 | 97 | 98 | def test_delete(write_client): 99 | write_client.indices.create( 100 | index="test-index", 101 | body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, 102 | ) 103 | 104 | i = Index("test-index", using=write_client) 105 | i.delete() 106 | assert not write_client.indices.exists(index="test-index") 107 | 108 | 109 | def test_multiple_indices_with_same_doc_type_work(write_client): 110 | i1 = Index("test-index-1", using=write_client) 111 | i2 = Index("test-index-2", using=write_client) 112 | 113 | for i in (i1, i2): 114 | i.document(Post) 115 | i.create() 116 | 117 | for i in ("test-index-1", "test-index-2"): 118 | settings = write_client.indices.get_settings(index=i) 119 | assert settings[i]["settings"]["index"]["analysis"] == { 120 | "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} 121 | } 122 | -------------------------------------------------------------------------------- /tests/test_integration/test_mapping.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from pytest import raises 28 | 29 | from opensearch_dsl import analysis, exceptions, mapping 30 | 31 | 32 | def test_mapping_saved_into_opensearch(write_client): 33 | m = mapping.Mapping() 34 | m.field( 35 | "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") 36 | ) 37 | m.field("tags", "keyword") 38 | m.save("test-mapping", using=write_client) 39 | 40 | assert { 41 | "test-mapping": { 42 | "mappings": { 43 | "properties": { 44 | "name": {"type": "text", "analyzer": "my_analyzer"}, 45 | "tags": {"type": "keyword"}, 46 | } 47 | } 48 | } 49 | } == write_client.indices.get_mapping(index="test-mapping") 50 | 51 | 52 | def test_mapping_saved_into_opensearch_when_index_already_exists_closed(write_client): 53 | m = mapping.Mapping() 54 | m.field( 55 | "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") 56 | ) 57 | write_client.indices.create(index="test-mapping") 58 | 59 | with raises(exceptions.IllegalOperation): 60 | m.save("test-mapping", using=write_client) 61 | 62 | write_client.cluster.health(index="test-mapping", wait_for_status="yellow") 63 | write_client.indices.close(index="test-mapping") 64 | m.save("test-mapping", using=write_client) 65 | 66 | assert { 67 | "test-mapping": { 68 | "mappings": { 69 | "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} 70 | } 71 | } 72 | } == write_client.indices.get_mapping(index="test-mapping") 73 | 74 | 75 | def test_mapping_saved_into_opensearch_when_index_already_exists_with_analysis( 76 | write_client, 77 | ): 78 | m = mapping.Mapping() 79 | analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") 80 | m.field("name", "text", analyzer=analyzer) 81 | 82 | new_analysis = analyzer.get_analysis_definition() 83 | new_analysis["analyzer"]["other_analyzer"] = { 84 | "type": "custom", 85 | "tokenizer": "whitespace", 86 | } 87 | write_client.indices.create( 88 | index="test-mapping", body={"settings": {"analysis": new_analysis}} 89 | ) 90 | 91 | m.field("title", "text", analyzer=analyzer) 92 | m.save("test-mapping", using=write_client) 93 | 94 | assert { 95 | "test-mapping": { 96 | "mappings": { 97 | "properties": { 98 | "name": {"type": "text", "analyzer": "my_analyzer"}, 99 | "title": {"type": "text", "analyzer": "my_analyzer"}, 100 | } 101 | } 102 | } 103 | } == write_client.indices.get_mapping(index="test-mapping") 104 | 105 | 106 | def test_mapping_gets_updated_from_opensearch(write_client): 107 | write_client.indices.create( 108 | index="test-mapping", 109 | body={ 110 | "settings": {"number_of_shards": 1, "number_of_replicas": 0}, 111 | "mappings": { 112 | "date_detection": False, 113 | "properties": { 114 | "title": { 115 | "type": "text", 116 | "analyzer": "snowball", 117 | "fields": {"raw": {"type": "keyword"}}, 118 | }, 119 | "created_at": {"type": "date"}, 120 | "comments": { 121 | "type": "nested", 122 | "properties": { 123 | "created": {"type": "date"}, 124 | "author": { 125 | "type": "text", 126 | "analyzer": "snowball", 127 | "fields": {"raw": {"type": "keyword"}}, 128 | }, 129 | }, 130 | }, 131 | }, 132 | }, 133 | }, 134 | ) 135 | 136 | m = mapping.Mapping.from_opensearch("test-mapping", using=write_client) 137 | 138 | assert ["comments", "created_at", "title"] == list( 139 | sorted(m.properties.properties._d_.keys()) 140 | ) 141 | assert { 142 | "date_detection": False, 143 | "properties": { 144 | "comments": { 145 | "type": "nested", 146 | "properties": { 147 | "created": {"type": "date"}, 148 | "author": { 149 | "analyzer": "snowball", 150 | "fields": {"raw": {"type": "keyword"}}, 151 | "type": "text", 152 | }, 153 | }, 154 | }, 155 | "created_at": {"type": "date"}, 156 | "title": { 157 | "analyzer": "snowball", 158 | "fields": {"raw": {"type": "keyword"}}, 159 | "type": "text", 160 | }, 161 | }, 162 | } == m.to_dict() 163 | 164 | # test same with alias 165 | write_client.indices.put_alias(index="test-mapping", name="test-alias") 166 | 167 | m2 = mapping.Mapping.from_opensearch("test-alias", using=write_client) 168 | assert m2.to_dict() == m.to_dict() 169 | -------------------------------------------------------------------------------- /tests/test_integration/test_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # The OpenSearch Contributors require contributions made to 5 | # this file be licensed under the Apache-2.0 license or a 6 | # compatible open source license. 7 | # 8 | # Modifications Copyright OpenSearch Contributors. See 9 | # GitHub history for details. 10 | # 11 | # Licensed to Elasticsearch B.V. under one or more contributor 12 | # license agreements. See the NOTICE file distributed with 13 | # this work for additional information regarding copyright 14 | # ownership. Elasticsearch B.V. licenses this file to you under 15 | # the Apache License, Version 2.0 (the "License"); you may 16 | # not use this file except in compliance with the License. 17 | # You may obtain a copy of the License at 18 | # 19 | # http://www.apache.org/licenses/LICENSE-2.0 20 | # 21 | # Unless required by applicable law or agreed to in writing, 22 | # software distributed under the License is distributed on an 23 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | # KIND, either express or implied. See the License for the 25 | # specific language governing permissions and limitations 26 | # under the License. 27 | 28 | from __future__ import unicode_literals 29 | 30 | from opensearchpy import TransportError 31 | from pytest import raises 32 | 33 | from opensearch_dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text 34 | from opensearch_dsl.response import aggs 35 | 36 | from .test_data import FLAT_DATA 37 | 38 | 39 | class Repository(Document): 40 | created_at = Date() 41 | description = Text(analyzer="snowball") 42 | tags = Keyword() 43 | 44 | @classmethod 45 | def search(cls): 46 | return super(Repository, cls).search().filter("term", commit_repo="repo") 47 | 48 | class Index: 49 | name = "git" 50 | 51 | 52 | class Commit(Document): 53 | class Index: 54 | name = "flat-git" 55 | 56 | 57 | def test_filters_aggregation_buckets_are_accessible(data_client): 58 | has_tests_query = Q("term", files="test_opensearch_dsl") 59 | s = Commit.search()[0:0] 60 | s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( 61 | "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} 62 | ).metric("lines", "stats", field="stats.lines") 63 | response = s.execute() 64 | 65 | assert isinstance( 66 | response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket 67 | ) 68 | assert ( 69 | 35 70 | == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count 71 | ) 72 | assert ( 73 | 228 74 | == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max 75 | ) 76 | 77 | 78 | def test_top_hits_are_wrapped_in_response(data_client): 79 | s = Commit.search()[0:0] 80 | s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( 81 | "top_commits", "top_hits", size=5 82 | ) 83 | response = s.execute() 84 | 85 | top_commits = response.aggregations.top_authors.buckets[0].top_commits 86 | assert isinstance(top_commits, aggs.TopHitsData) 87 | assert 5 == len(top_commits) 88 | 89 | hits = [h for h in top_commits] 90 | assert 5 == len(hits) 91 | assert isinstance(hits[0], Commit) 92 | 93 | 94 | def test_inner_hits_are_wrapped_in_response(data_client): 95 | s = Search(index="git")[0:1].query( 96 | "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") 97 | ) 98 | response = s.execute() 99 | 100 | commit = response.hits[0] 101 | assert isinstance(commit.meta.inner_hits.repo, response.__class__) 102 | assert repr(commit.meta.inner_hits.repo[0]).startswith( 103 | " 0 43 | assert not response.timed_out 44 | assert response.updated == 52 45 | assert response.deleted == 0 46 | assert response.took > 0 47 | assert response.success() 48 | 49 | 50 | def test_update_by_query_with_script(write_client, setup_ubq_tests): 51 | index = setup_ubq_tests 52 | 53 | ubq = ( 54 | UpdateByQuery(using=write_client) 55 | .index(index) 56 | .filter(~Q("exists", field="parent_shas")) 57 | .script(source="ctx._source.is_public = false") 58 | ) 59 | ubq = ubq.params(conflicts="proceed") 60 | 61 | response = ubq.execute() 62 | assert response.total == 2 63 | assert response.updated == 2 64 | assert response.version_conflicts == 0 65 | 66 | 67 | def test_delete_by_query_with_script(write_client, setup_ubq_tests): 68 | index = setup_ubq_tests 69 | 70 | ubq = ( 71 | UpdateByQuery(using=write_client) 72 | .index(index) 73 | .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) 74 | .script(source='ctx.op = "delete"') 75 | ) 76 | ubq = ubq.params(conflicts="proceed") 77 | 78 | response = ubq.execute() 79 | 80 | assert response.total == 1 81 | assert response.deleted == 1 82 | assert response.success() 83 | -------------------------------------------------------------------------------- /tests/test_mapping.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import json 28 | 29 | from opensearch_dsl import Keyword, Nested, Text, analysis, mapping 30 | 31 | 32 | def test_mapping_can_has_fields(): 33 | m = mapping.Mapping() 34 | m.field("name", "text").field("tags", "keyword") 35 | 36 | assert { 37 | "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} 38 | } == m.to_dict() 39 | 40 | 41 | def test_mapping_update_is_recursive(): 42 | m1 = mapping.Mapping() 43 | m1.field("title", "text") 44 | m1.field("author", "object") 45 | m1.field("author", "object", properties={"name": {"type": "text"}}) 46 | m1.meta("_all", enabled=False) 47 | m1.meta("dynamic", False) 48 | 49 | m2 = mapping.Mapping() 50 | m2.field("published_from", "date") 51 | m2.field("author", "object", properties={"email": {"type": "text"}}) 52 | m2.field("title", "text") 53 | m2.field("lang", "keyword") 54 | m2.meta("_analyzer", path="lang") 55 | 56 | m1.update(m2, update_only=True) 57 | 58 | assert { 59 | "_all": {"enabled": False}, 60 | "_analyzer": {"path": "lang"}, 61 | "dynamic": False, 62 | "properties": { 63 | "published_from": {"type": "date"}, 64 | "title": {"type": "text"}, 65 | "lang": {"type": "keyword"}, 66 | "author": { 67 | "type": "object", 68 | "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, 69 | }, 70 | }, 71 | } == m1.to_dict() 72 | 73 | 74 | def test_properties_can_iterate_over_all_the_fields(): 75 | m = mapping.Mapping() 76 | m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) 77 | m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) 78 | 79 | assert {"f1", "f2", "f3", "f4"} == { 80 | f.test_attr for f in m.properties._collect_fields() 81 | } 82 | 83 | 84 | def test_mapping_can_collect_all_analyzers_and_normalizers(): 85 | a1 = analysis.analyzer( 86 | "my_analyzer1", 87 | tokenizer="keyword", 88 | filter=[ 89 | "lowercase", 90 | analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), 91 | ], 92 | ) 93 | a2 = analysis.analyzer("english") 94 | a3 = analysis.analyzer("unknown_custom") 95 | a4 = analysis.analyzer( 96 | "my_analyzer2", 97 | tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), 98 | filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], 99 | ) 100 | a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") 101 | n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) 102 | n2 = analysis.normalizer( 103 | "my_normalizer2", 104 | filter=[ 105 | "my_filter1", 106 | "my_filter2", 107 | analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), 108 | ], 109 | ) 110 | n3 = analysis.normalizer("unknown_custom") 111 | 112 | m = mapping.Mapping() 113 | m.field( 114 | "title", 115 | "text", 116 | analyzer=a1, 117 | fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, 118 | ) 119 | m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) 120 | m.field("normalized_title", "keyword", normalizer=n1) 121 | m.field("normalized_comment", "keyword", normalizer=n2) 122 | m.field("unknown", "keyword", normalizer=n3) 123 | m.meta("_all", analyzer=a5) 124 | 125 | assert { 126 | "analyzer": { 127 | "my_analyzer1": { 128 | "filter": ["lowercase", "my_filter1"], 129 | "tokenizer": "keyword", 130 | "type": "custom", 131 | }, 132 | "my_analyzer2": { 133 | "filter": ["my_filter2"], 134 | "tokenizer": "trigram", 135 | "type": "custom", 136 | }, 137 | "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, 138 | }, 139 | "normalizer": { 140 | "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, 141 | "my_normalizer2": { 142 | "filter": ["my_filter1", "my_filter2", "my_filter3"], 143 | "type": "custom", 144 | }, 145 | }, 146 | "filter": { 147 | "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, 148 | "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, 149 | "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, 150 | }, 151 | "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, 152 | } == m._collect_analysis() 153 | 154 | assert json.loads(json.dumps(m.to_dict())) == m.to_dict() 155 | 156 | 157 | def test_mapping_can_collect_multiple_analyzers(): 158 | a1 = analysis.analyzer( 159 | "my_analyzer1", 160 | tokenizer="keyword", 161 | filter=[ 162 | "lowercase", 163 | analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), 164 | ], 165 | ) 166 | a2 = analysis.analyzer( 167 | "my_analyzer2", 168 | tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), 169 | filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], 170 | ) 171 | m = mapping.Mapping() 172 | m.field("title", "text", analyzer=a1, search_analyzer=a2) 173 | m.field( 174 | "text", 175 | "text", 176 | analyzer=a1, 177 | fields={ 178 | "english": Text(analyzer=a1), 179 | "unknown": Keyword(analyzer=a1, search_analyzer=a2), 180 | }, 181 | ) 182 | assert { 183 | "analyzer": { 184 | "my_analyzer1": { 185 | "filter": ["lowercase", "my_filter1"], 186 | "tokenizer": "keyword", 187 | "type": "custom", 188 | }, 189 | "my_analyzer2": { 190 | "filter": ["my_filter2"], 191 | "tokenizer": "trigram", 192 | "type": "custom", 193 | }, 194 | }, 195 | "filter": { 196 | "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, 197 | "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, 198 | }, 199 | "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, 200 | } == m._collect_analysis() 201 | 202 | 203 | def test_even_non_custom_analyzers_can_have_params(): 204 | a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") 205 | m = mapping.Mapping() 206 | m.field("title", "text", analyzer=a1) 207 | 208 | assert { 209 | "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} 210 | } == m._collect_analysis() 211 | 212 | 213 | def test_resolve_field_can_resolve_multifields(): 214 | m = mapping.Mapping() 215 | m.field("title", "text", fields={"keyword": Keyword()}) 216 | 217 | assert isinstance(m.resolve_field("title.keyword"), Keyword) 218 | 219 | 220 | def test_resolve_nested(): 221 | m = mapping.Mapping() 222 | m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) 223 | m.field("k2", "keyword") 224 | 225 | nested, field = m.resolve_nested("n1.n2.k1") 226 | assert nested == ["n1", "n1.n2"] 227 | assert isinstance(field, Keyword) 228 | 229 | nested, field = m.resolve_nested("k2") 230 | assert nested == [] 231 | assert isinstance(field, Keyword) 232 | -------------------------------------------------------------------------------- /tests/test_package.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import opensearch_dsl 28 | 29 | 30 | def test__all__is_sorted(): 31 | assert opensearch_dsl.__all__ == sorted(opensearch_dsl.__all__) 32 | -------------------------------------------------------------------------------- /tests/test_result.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import pickle 28 | from datetime import date 29 | 30 | from pytest import fixture, raises 31 | 32 | from opensearch_dsl import Date, Document, Object, Search, response 33 | from opensearch_dsl.aggs import Terms 34 | from opensearch_dsl.response.aggs import AggResponse, Bucket, BucketData 35 | 36 | 37 | @fixture 38 | def agg_response(aggs_search, aggs_data): 39 | return response.Response(aggs_search, aggs_data) 40 | 41 | 42 | def test_agg_response_is_pickleable(agg_response): 43 | agg_response.hits 44 | r = pickle.loads(pickle.dumps(agg_response)) 45 | 46 | assert r == agg_response 47 | assert r._search == agg_response._search 48 | assert r.hits == agg_response.hits 49 | 50 | 51 | def test_response_is_pickleable(dummy_response): 52 | res = response.Response(Search(), dummy_response) 53 | res.hits 54 | r = pickle.loads(pickle.dumps(res)) 55 | 56 | assert r == res 57 | assert r._search == res._search 58 | assert r.hits == res.hits 59 | 60 | 61 | def test_hit_is_pickleable(dummy_response): 62 | res = response.Response(Search(), dummy_response) 63 | hits = pickle.loads(pickle.dumps(res.hits)) 64 | 65 | assert hits == res.hits 66 | assert hits[0].meta == res.hits[0].meta 67 | 68 | 69 | def test_response_stores_search(dummy_response): 70 | s = Search() 71 | r = response.Response(s, dummy_response) 72 | 73 | assert r._search is s 74 | 75 | 76 | def test_interactive_helpers(dummy_response): 77 | res = response.Response(Search(), dummy_response) 78 | hits = res.hits 79 | h = hits[0] 80 | 81 | rhits = ( 82 | "[, , " 83 | ", ]" 84 | ).format( 85 | repr(dummy_response["hits"]["hits"][0]["_source"]), 86 | repr(dummy_response["hits"]["hits"][1]["_source"])[:60], 87 | repr(dummy_response["hits"]["hits"][2]["_source"])[:60], 88 | ) 89 | 90 | assert res 91 | assert "" % rhits == repr(res) 92 | assert rhits == repr(hits) 93 | assert {"meta", "city", "name"} == set(dir(h)) 94 | assert "" % dummy_response["hits"]["hits"][0][ 95 | "_source" 96 | ] == repr(h) 97 | 98 | 99 | def test_empty_response_is_false(dummy_response): 100 | dummy_response["hits"]["hits"] = [] 101 | res = response.Response(Search(), dummy_response) 102 | 103 | assert not res 104 | 105 | 106 | def test_len_response(dummy_response): 107 | res = response.Response(Search(), dummy_response) 108 | assert len(res) == 4 109 | 110 | 111 | def test_iterating_over_response_gives_you_hits(dummy_response): 112 | res = response.Response(Search(), dummy_response) 113 | hits = list(h for h in res) 114 | 115 | assert res.success() 116 | assert 123 == res.took 117 | assert 4 == len(hits) 118 | assert all(isinstance(h, response.Hit) for h in hits) 119 | h = hits[0] 120 | 121 | assert "test-index" == h.meta.index 122 | assert "opensearch" == h.meta.id 123 | assert 12 == h.meta.score 124 | 125 | assert hits[1].meta.routing == "opensearch" 126 | 127 | 128 | def test_hits_get_wrapped_to_contain_additional_attrs(dummy_response): 129 | res = response.Response(Search(), dummy_response) 130 | hits = res.hits 131 | 132 | assert 123 == hits.total 133 | assert 12.0 == hits.max_score 134 | 135 | 136 | def test_hits_provide_dot_and_bracket_access_to_attrs(dummy_response): 137 | res = response.Response(Search(), dummy_response) 138 | h = res.hits[0] 139 | 140 | assert "OpenSearch" == h.name 141 | assert "OpenSearch" == h["name"] 142 | 143 | assert "Honza" == res.hits[2].name.first 144 | 145 | with raises(KeyError): 146 | h["not_there"] 147 | 148 | with raises(AttributeError): 149 | h.not_there 150 | 151 | 152 | def test_slicing_on_response_slices_on_hits(dummy_response): 153 | res = response.Response(Search(), dummy_response) 154 | 155 | assert res[0] is res.hits[0] 156 | assert res[::-1] == res.hits[::-1] 157 | 158 | 159 | def test_aggregation_base(agg_response): 160 | assert agg_response.aggs is agg_response.aggregations 161 | assert isinstance(agg_response.aggs, response.AggResponse) 162 | 163 | 164 | def test_metric_agg_works(agg_response): 165 | assert 25052.0 == agg_response.aggs.sum_lines.value 166 | 167 | 168 | def test_aggregations_can_be_iterated_over(agg_response): 169 | aggs = [a for a in agg_response.aggs] 170 | 171 | assert len(aggs) == 3 172 | assert all(map(lambda a: isinstance(a, AggResponse), aggs)) 173 | 174 | 175 | def test_aggregations_can_be_retrieved_by_name(agg_response, aggs_search): 176 | a = agg_response.aggs["popular_files"] 177 | 178 | assert isinstance(a, BucketData) 179 | assert isinstance(a._meta["aggs"], Terms) 180 | assert a._meta["aggs"] is aggs_search.aggs.aggs["popular_files"] 181 | 182 | 183 | def test_bucket_response_can_be_iterated_over(agg_response): 184 | popular_files = agg_response.aggregations.popular_files 185 | 186 | buckets = [b for b in popular_files] 187 | assert all(isinstance(b, Bucket) for b in buckets) 188 | assert buckets == popular_files.buckets 189 | 190 | 191 | def test_bucket_keys_get_deserialized(aggs_data, aggs_search): 192 | class Commit(Document): 193 | info = Object(properties={"committed_date": Date()}) 194 | 195 | class Index: 196 | name = "test-commit" 197 | 198 | aggs_search = aggs_search.doc_type(Commit) 199 | agg_response = response.Response(aggs_search, aggs_data) 200 | 201 | per_month = agg_response.aggregations.per_month 202 | for b in per_month: 203 | assert isinstance(b.key, date) 204 | -------------------------------------------------------------------------------- /tests/test_update_by_query.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from copy import deepcopy 28 | 29 | from opensearch_dsl import Q, UpdateByQuery 30 | from opensearch_dsl.response import UpdateByQueryResponse 31 | 32 | 33 | def test_ubq_starts_with_no_query(): 34 | ubq = UpdateByQuery() 35 | 36 | assert ubq.query._proxied is None 37 | 38 | 39 | def test_ubq_to_dict(): 40 | ubq = UpdateByQuery() 41 | assert {} == ubq.to_dict() 42 | 43 | ubq = ubq.query("match", f=42) 44 | assert {"query": {"match": {"f": 42}}} == ubq.to_dict() 45 | 46 | assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) 47 | 48 | ubq = UpdateByQuery(extra={"size": 5}) 49 | assert {"size": 5} == ubq.to_dict() 50 | 51 | ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) 52 | assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() 53 | 54 | 55 | def test_complex_example(): 56 | ubq = UpdateByQuery() 57 | ubq = ( 58 | ubq.query("match", title="python") 59 | .query(~Q("match", title="ruby")) 60 | .filter(Q("term", category="meetup") | Q("term", category="conference")) 61 | .script( 62 | source="ctx._source.likes += params.f", lang="painless", params={"f": 3} 63 | ) 64 | ) 65 | 66 | ubq.query.minimum_should_match = 2 67 | assert { 68 | "query": { 69 | "bool": { 70 | "filter": [ 71 | { 72 | "bool": { 73 | "should": [ 74 | {"term": {"category": "meetup"}}, 75 | {"term": {"category": "conference"}}, 76 | ] 77 | } 78 | } 79 | ], 80 | "must": [{"match": {"title": "python"}}], 81 | "must_not": [{"match": {"title": "ruby"}}], 82 | "minimum_should_match": 2, 83 | } 84 | }, 85 | "script": { 86 | "source": "ctx._source.likes += params.f", 87 | "lang": "painless", 88 | "params": {"f": 3}, 89 | }, 90 | } == ubq.to_dict() 91 | 92 | 93 | def test_exclude(): 94 | ubq = UpdateByQuery() 95 | ubq = ubq.exclude("match", title="python") 96 | 97 | assert { 98 | "query": { 99 | "bool": { 100 | "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] 101 | } 102 | } 103 | } == ubq.to_dict() 104 | 105 | 106 | def test_reverse(): 107 | d = { 108 | "query": { 109 | "filtered": { 110 | "filter": { 111 | "bool": { 112 | "should": [ 113 | {"term": {"category": "meetup"}}, 114 | {"term": {"category": "conference"}}, 115 | ] 116 | } 117 | }, 118 | "query": { 119 | "bool": { 120 | "must": [{"match": {"title": "python"}}], 121 | "must_not": [{"match": {"title": "ruby"}}], 122 | "minimum_should_match": 2, 123 | } 124 | }, 125 | } 126 | }, 127 | "script": { 128 | "source": "ctx._source.likes += params.f", 129 | "lang": "painless", 130 | "params": {"f": 3}, 131 | }, 132 | } 133 | 134 | d2 = deepcopy(d) 135 | 136 | ubq = UpdateByQuery.from_dict(d) 137 | 138 | assert d == d2 139 | assert d == ubq.to_dict() 140 | 141 | 142 | def test_from_dict_doesnt_need_query(): 143 | ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) 144 | 145 | assert {"script": {"source": "test"}} == ubq.to_dict() 146 | 147 | 148 | def test_params_being_passed_to_search(mock_client): 149 | ubq = UpdateByQuery(using="mock") 150 | ubq = ubq.params(routing="42") 151 | ubq.execute() 152 | 153 | mock_client.update_by_query.assert_called_once_with( 154 | index=None, body={}, routing="42" 155 | ) 156 | 157 | 158 | def test_overwrite_script(): 159 | ubq = UpdateByQuery() 160 | ubq = ubq.script( 161 | source="ctx._source.likes += params.f", lang="painless", params={"f": 3} 162 | ) 163 | assert { 164 | "script": { 165 | "source": "ctx._source.likes += params.f", 166 | "lang": "painless", 167 | "params": {"f": 3}, 168 | } 169 | } == ubq.to_dict() 170 | ubq = ubq.script(source="ctx._source.likes++") 171 | assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() 172 | 173 | 174 | def test_update_by_query_response_success(): 175 | ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": []}) 176 | assert ubqr.success() 177 | 178 | ubqr = UpdateByQueryResponse({}, {"timed_out": True, "failures": []}) 179 | assert not ubqr.success() 180 | 181 | ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": [{}]}) 182 | assert not ubqr.success() 183 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | import pickle 28 | 29 | from pytest import raises 30 | 31 | from opensearch_dsl import Q, serializer, utils 32 | 33 | 34 | def test_attrdict_pickle(): 35 | ad = utils.AttrDict({}) 36 | 37 | pickled_ad = pickle.dumps(ad) 38 | assert ad == pickle.loads(pickled_ad) 39 | 40 | 41 | def test_attrlist_pickle(): 42 | al = utils.AttrList([]) 43 | 44 | pickled_al = pickle.dumps(al) 45 | assert al == pickle.loads(pickled_al) 46 | 47 | 48 | def test_attrlist_slice(): 49 | class MyAttrDict(utils.AttrDict): 50 | pass 51 | 52 | l = utils.AttrList([{}, {}], obj_wrapper=MyAttrDict) 53 | assert isinstance(l[:][0], MyAttrDict) 54 | 55 | 56 | def test_merge(): 57 | a = utils.AttrDict({"a": {"b": 42, "c": 47}}) 58 | b = {"a": {"b": 123, "d": -12}, "e": [1, 2, 3]} 59 | 60 | utils.merge(a, b) 61 | 62 | assert a == {"a": {"b": 123, "c": 47, "d": -12}, "e": [1, 2, 3]} 63 | 64 | 65 | def test_merge_conflict(): 66 | for d in ( 67 | {"a": 42}, 68 | {"a": {"b": 47}}, 69 | ): 70 | utils.merge({"a": {"b": 42}}, d) 71 | with raises(ValueError): 72 | utils.merge({"a": {"b": 42}}, d, True) 73 | 74 | 75 | def test_attrdict_bool(): 76 | d = utils.AttrDict({}) 77 | 78 | assert not d 79 | d.title = "Title" 80 | assert d 81 | 82 | 83 | def test_attrlist_items_get_wrapped_during_iteration(): 84 | al = utils.AttrList([1, object(), [1], {}]) 85 | 86 | l = list(iter(al)) 87 | 88 | assert isinstance(l[2], utils.AttrList) 89 | assert isinstance(l[3], utils.AttrDict) 90 | 91 | 92 | def test_serializer_deals_with_Attr_versions(): 93 | d = utils.AttrDict({"key": utils.AttrList([1, 2, 3])}) 94 | 95 | assert serializer.serializer.dumps(d) == serializer.serializer.dumps( 96 | {"key": [1, 2, 3]} 97 | ) 98 | 99 | 100 | def test_serializer_deals_with_objects_with_to_dict(): 101 | class MyClass(object): 102 | def to_dict(self): 103 | return 42 104 | 105 | assert serializer.serializer.dumps(MyClass()) == "42" 106 | 107 | 108 | def test_recursive_to_dict(): 109 | assert utils.recursive_to_dict({"k": [1, (1.0, {"v": Q("match", key="val")})]}) == { 110 | "k": [1, (1.0, {"v": {"match": {"key": "val"}}})] 111 | } 112 | 113 | 114 | def test_attrdict_get(): 115 | a = utils.AttrDict({"a": {"b": 42, "c": 47}}) 116 | assert a.get("a", {}).get("b", 0) == 42 117 | assert a.get("a", {}).get("e", 0) == 0 118 | assert a.get("d", {}) == {} 119 | with raises(AttributeError): 120 | assert a.get("d") 121 | -------------------------------------------------------------------------------- /tests/test_validation.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from datetime import datetime 28 | 29 | from pytest import raises 30 | 31 | from opensearch_dsl import ( 32 | Boolean, 33 | Date, 34 | Document, 35 | InnerDoc, 36 | Integer, 37 | Nested, 38 | Object, 39 | Text, 40 | ) 41 | from opensearch_dsl.exceptions import ValidationException 42 | 43 | 44 | class Author(InnerDoc): 45 | name = Text(required=True) 46 | email = Text(required=True) 47 | 48 | def clean(self): 49 | print(self, type(self), self.name) 50 | if self.name.lower() not in self.email: 51 | raise ValidationException("Invalid email!") 52 | 53 | 54 | class BlogPost(Document): 55 | authors = Nested(Author, required=True) 56 | created = Date() 57 | inner = Object() 58 | 59 | 60 | class BlogPostWithStatus(Document): 61 | published = Boolean(required=True) 62 | 63 | 64 | class AutoNowDate(Date): 65 | def clean(self, data): 66 | if data is None: 67 | data = datetime.now() 68 | return super(AutoNowDate, self).clean(data) 69 | 70 | 71 | class Log(Document): 72 | timestamp = AutoNowDate(required=True) 73 | data = Text() 74 | 75 | 76 | def test_required_int_can_be_0(): 77 | class DT(Document): 78 | i = Integer(required=True) 79 | 80 | dt = DT(i=0) 81 | assert dt.full_clean() is None 82 | 83 | 84 | def test_required_field_cannot_be_empty_list(): 85 | class DT(Document): 86 | i = Integer(required=True) 87 | 88 | dt = DT(i=[]) 89 | with raises(ValidationException): 90 | dt.full_clean() 91 | 92 | 93 | def test_validation_works_for_lists_of_values(): 94 | class DT(Document): 95 | i = Date(required=True) 96 | 97 | dt = DT(i=[datetime.now(), "not date"]) 98 | with raises(ValidationException): 99 | dt.full_clean() 100 | 101 | dt = DT(i=[datetime.now(), datetime.now()]) 102 | assert None is dt.full_clean() 103 | 104 | 105 | def test_field_with_custom_clean(): 106 | l = Log() 107 | l.full_clean() 108 | 109 | assert isinstance(l.timestamp, datetime) 110 | 111 | 112 | def test_empty_object(): 113 | d = BlogPost(authors=[{"name": "Guian", "email": "guiang@bitquilltech.com"}]) 114 | d.inner = {} 115 | 116 | d.full_clean() 117 | 118 | 119 | def test_missing_required_field_raises_validation_exception(): 120 | d = BlogPost() 121 | with raises(ValidationException): 122 | d.full_clean() 123 | 124 | d = BlogPost() 125 | d.authors.append({"name": "Guian"}) 126 | with raises(ValidationException): 127 | d.full_clean() 128 | 129 | d = BlogPost() 130 | d.authors.append({"name": "Guian", "email": "guiang@bitquilltech.com"}) 131 | d.full_clean() 132 | 133 | 134 | def test_boolean_doesnt_treat_false_as_empty(): 135 | d = BlogPostWithStatus() 136 | with raises(ValidationException): 137 | d.full_clean() 138 | d.published = False 139 | d.full_clean() 140 | d.published = True 141 | d.full_clean() 142 | 143 | 144 | def test_custom_validation_on_nested_gets_run(): 145 | d = BlogPost(authors=[Author(name="Guian", email="king@example.com")], created=None) 146 | 147 | assert isinstance(d.authors[0], Author) 148 | 149 | with raises(ValidationException): 150 | d.full_clean() 151 | 152 | 153 | def test_accessing_known_fields_returns_empty_value(): 154 | d = BlogPost() 155 | 156 | assert [] == d.authors 157 | 158 | d.authors.append({}) 159 | assert None is d.authors[0].name 160 | assert None is d.authors[0].email 161 | 162 | 163 | def test_empty_values_are_not_serialized(): 164 | d = BlogPost( 165 | authors=[{"name": "Guian", "email": "guiang@bitquilltech.com"}], created=None 166 | ) 167 | 168 | d.full_clean() 169 | assert d.to_dict() == { 170 | "authors": [{"name": "Guian", "email": "guiang@bitquilltech.com"}] 171 | } 172 | -------------------------------------------------------------------------------- /tests/test_wrappers.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | from datetime import datetime, timedelta 28 | 29 | import pytest 30 | 31 | from opensearch_dsl import Range 32 | 33 | 34 | @pytest.mark.parametrize( 35 | "kwargs, item", 36 | [ 37 | ({}, 1), 38 | ({}, -1), 39 | ({"gte": -1}, -1), 40 | ({"lte": 4}, 4), 41 | ({"lte": 4, "gte": 2}, 4), 42 | ({"lte": 4, "gte": 2}, 2), 43 | ({"gt": datetime.now() - timedelta(seconds=10)}, datetime.now()), 44 | ], 45 | ) 46 | def test_range_contains(kwargs, item): 47 | assert item in Range(**kwargs) 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "kwargs, item", 52 | [ 53 | ({"gt": -1}, -1), 54 | ({"lt": 4}, 4), 55 | ({"lt": 4}, 42), 56 | ({"lte": 4, "gte": 2}, 1), 57 | ({"lte": datetime.now() - timedelta(seconds=10)}, datetime.now()), 58 | ], 59 | ) 60 | def test_range_not_contains(kwargs, item): 61 | assert item not in Range(**kwargs) 62 | 63 | 64 | @pytest.mark.parametrize( 65 | "args,kwargs", 66 | [ 67 | (({},), {"lt": 42}), 68 | ((), {"not_lt": 42}), 69 | ((object(),), {}), 70 | ((), {"lt": 1, "lte": 1}), 71 | ((), {"gt": 1, "gte": 1}), 72 | ], 73 | ) 74 | def test_range_raises_value_error_on_wrong_params(args, kwargs): 75 | with pytest.raises(ValueError): 76 | Range(*args, **kwargs) 77 | 78 | 79 | @pytest.mark.parametrize( 80 | "range,lower,inclusive", 81 | [ 82 | (Range(gt=1), 1, False), 83 | (Range(gte=1), 1, True), 84 | (Range(), None, False), 85 | (Range(lt=42), None, False), 86 | ], 87 | ) 88 | def test_range_lower(range, lower, inclusive): 89 | assert (lower, inclusive) == range.lower 90 | 91 | 92 | @pytest.mark.parametrize( 93 | "range,upper,inclusive", 94 | [ 95 | (Range(lt=1), 1, False), 96 | (Range(lte=1), 1, True), 97 | (Range(), None, False), 98 | (Range(gt=42), None, False), 99 | ], 100 | ) 101 | def test_range_upper(range, upper, inclusive): 102 | assert (upper, inclusive) == range.upper 103 | -------------------------------------------------------------------------------- /utils/build-dists.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | """A command line tool for building and verifying releases 28 | Can be used for building both 'opensearch' and 'opensearchX' dists. 29 | Only requires 'name' in 'setup.py' and the directory to be changed. 30 | """ 31 | 32 | import contextlib 33 | import os 34 | import re 35 | import shlex 36 | import shutil 37 | import tempfile 38 | 39 | base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 40 | tmp_dir = None 41 | 42 | 43 | @contextlib.contextmanager 44 | def set_tmp_dir(): 45 | global tmp_dir 46 | tmp_dir = tempfile.mkdtemp() 47 | yield tmp_dir 48 | shutil.rmtree(tmp_dir) 49 | tmp_dir = None 50 | 51 | 52 | def run(*argv, expect_exit_code=0): 53 | global tmp_dir 54 | if tmp_dir is None: 55 | os.chdir(base_dir) 56 | else: 57 | os.chdir(tmp_dir) 58 | 59 | cmd = " ".join(shlex.quote(x) for x in argv) 60 | print("$ " + cmd) 61 | exit_code = os.system(cmd) 62 | if exit_code != expect_exit_code: 63 | print( 64 | "Command exited incorrectly: should have been %d was %d" 65 | % (expect_exit_code, exit_code) 66 | ) 67 | exit(exit_code or 1) 68 | 69 | 70 | def test_dist(dist): 71 | with set_tmp_dir() as tmp_dir: 72 | dist_name = ( 73 | re.match(r"^(opensearch\d*[_-]dsl)-", os.path.basename(dist)) 74 | .group(1) 75 | .replace("-", "_") 76 | ) 77 | 78 | # Build the venv and install the dist 79 | run("python", "-m", "venv", os.path.join(tmp_dir, "venv")) 80 | venv_python = os.path.join(tmp_dir, "venv/bin/python") 81 | run(venv_python, "-m", "pip", "install", "-U", "pip") 82 | run(venv_python, "-m", "pip", "install", dist) 83 | 84 | # Test the sync namespaces 85 | run(venv_python, "-c", f"from {dist_name} import Q") 86 | 87 | # Ensure that the namespaces are correct for the dist 88 | for suffix in ("", "1", "2", "5", "6", "7", "8", "9", "10"): 89 | distx_name = f"opensearch{suffix}_dsl" 90 | run( 91 | venv_python, 92 | "-c", 93 | f"import {distx_name}", 94 | expect_exit_code=256 if distx_name != dist_name else 0, 95 | ) 96 | # Tests the dependencies of the dist 97 | run( 98 | venv_python, 99 | "-c", 100 | f"import opensearch{suffix}", 101 | expect_exit_code=256 if distx_name != dist_name else 0, 102 | ) 103 | 104 | # Uninstall the dist, see that we can't import things anymore 105 | run(venv_python, "-m", "pip", "uninstall", "--yes", dist_name) 106 | run( 107 | venv_python, 108 | "-c", 109 | f"from {dist_name} import Q", 110 | expect_exit_code=256, 111 | ) 112 | 113 | 114 | def main(): 115 | run("rm", "-rf", "build/", "dist/", "*.egg-info", ".eggs") 116 | run("python", "setup.py", "sdist", "bdist_wheel") 117 | 118 | for dist in os.listdir(os.path.join(base_dir, "dist")): 119 | test_dist(os.path.join(base_dir, "dist", dist)) 120 | 121 | # After this run 'python -m twine upload dist/*' 122 | print( 123 | "\n\n" 124 | "===============================\n\n" 125 | " * Releases are ready! *\n\n" 126 | "$ python -m twine upload dist/*\n\n" 127 | "===============================" 128 | ) 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /utils/license-headers.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # The OpenSearch Contributors require contributions made to 4 | # this file be licensed under the Apache-2.0 license or a 5 | # compatible open source license. 6 | # 7 | # Modifications Copyright OpenSearch Contributors. See 8 | # GitHub history for details. 9 | # 10 | # Licensed to Elasticsearch B.V. under one or more contributor 11 | # license agreements. See the NOTICE file distributed with 12 | # this work for additional information regarding copyright 13 | # ownership. Elasticsearch B.V. licenses this file to you under 14 | # the Apache License, Version 2.0 (the "License"); you may 15 | # not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | # KIND, either express or implied. See the License for the 24 | # specific language governing permissions and limitations 25 | # under the License. 26 | 27 | """Script which verifies that all source files have a license header. 28 | Has two modes: 'fix' and 'check'. 'fix' fixes problems, 'check' will 29 | error out if 'fix' would have changed the file. 30 | """ 31 | 32 | import os 33 | import sys 34 | from itertools import chain 35 | from typing import Iterator, List 36 | 37 | lines_to_keep = ["# -*- coding: utf-8 -*-\n", "#!/usr/bin/env python\n"] 38 | license_header_lines = [ 39 | "# SPDX-License-Identifier: Apache-2.0\n", 40 | "#\n", 41 | "# The OpenSearch Contributors require contributions made to\n", 42 | "# this file be licensed under the Apache-2.0 license or a\n", 43 | "# compatible open source license.\n", 44 | "#\n", 45 | "# Modifications Copyright OpenSearch Contributors. See\n", 46 | "# GitHub history for details.\n", 47 | "#\n", 48 | "# Licensed to Elasticsearch B.V. under one or more contributor\n", 49 | "# license agreements. See the NOTICE file distributed with\n", 50 | "# this work for additional information regarding copyright\n", 51 | "# ownership. Elasticsearch B.V. licenses this file to you under\n", 52 | '# the Apache License, Version 2.0 (the "License"); you may\n', 53 | "# not use this file except in compliance with the License.\n", 54 | "# You may obtain a copy of the License at\n", 55 | "#\n", 56 | "# http://www.apache.org/licenses/LICENSE-2.0\n", 57 | "#\n", 58 | "# Unless required by applicable law or agreed to in writing,\n", 59 | "# software distributed under the License is distributed on an\n", 60 | '# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n', 61 | "# KIND, either express or implied. See the License for the\n", 62 | "# specific language governing permissions and limitations\n", 63 | "# under the License.\n", 64 | "\n", 65 | ] 66 | 67 | 68 | def find_files_to_fix(sources: List[str]) -> Iterator[str]: 69 | """Iterates over all files and dirs in 'sources' and returns 70 | only the filepaths that need fixing. 71 | """ 72 | for source in sources: 73 | if os.path.isfile(source) and does_file_need_fix(source): 74 | yield source 75 | elif os.path.isdir(source): 76 | for root, _, filenames in os.walk(source): 77 | for filename in filenames: 78 | filepath = os.path.join(root, filename) 79 | if does_file_need_fix(filepath): 80 | yield filepath 81 | 82 | 83 | def does_file_need_fix(filepath: str) -> bool: 84 | if not filepath.endswith(".py"): 85 | return False 86 | with open(filepath, mode="r") as f: 87 | first_license_line = None 88 | for line in f: 89 | if line == license_header_lines[0]: 90 | first_license_line = line 91 | break 92 | elif line not in lines_to_keep: 93 | return True 94 | for header_line, line in zip( 95 | license_header_lines, chain((first_license_line,), f) 96 | ): 97 | if line != header_line: 98 | return True 99 | return False 100 | 101 | 102 | def add_header_to_file(filepath: str) -> None: 103 | with open(filepath, mode="r") as f: 104 | lines = list(f) 105 | i = 0 106 | for i, line in enumerate(lines): 107 | if line not in lines_to_keep: 108 | break 109 | lines = lines[:i] + license_header_lines + lines[i:] 110 | with open(filepath, mode="w") as f: 111 | f.truncate() 112 | f.write("".join(lines)) 113 | print(f"Fixed {os.path.relpath(filepath, os.getcwd())}") 114 | 115 | 116 | def main(): 117 | mode = sys.argv[1] 118 | assert mode in ("fix", "check") 119 | sources = [os.path.abspath(x) for x in sys.argv[2:]] 120 | files_to_fix = find_files_to_fix(sources) 121 | 122 | if mode == "fix": 123 | for filepath in files_to_fix: 124 | add_header_to_file(filepath) 125 | else: 126 | no_license_headers = list(files_to_fix) 127 | if no_license_headers: 128 | print("No license header found in:") 129 | cwd = os.getcwd() 130 | [ 131 | print(f" - {os.path.relpath(filepath, cwd)}") 132 | for filepath in no_license_headers 133 | ] 134 | sys.exit(1) 135 | else: 136 | print("All files had license header") 137 | 138 | 139 | if __name__ == "__main__": 140 | main() 141 | --------------------------------------------------------------------------------