├── .backportrc.json ├── .buildkite ├── README.md ├── nightly.py ├── nightly_aarch64.py ├── nightly_docker.yml ├── nightly_steps.yml ├── pipeline.yml ├── publish_docker.sh ├── pull-requests.json └── run_nigthly.sh ├── .coveragerc ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ ├── enhancement.md │ └── qa_connector_package.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── add-labels-main.yml │ └── backport.yml ├── .gitignore ├── .isort.cfg ├── .ruff.toml ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── NOTICE.txt ├── README.md ├── catalog-info.yaml ├── config.yml ├── connectors ├── VERSION ├── __init__.py ├── cli.py ├── config.py ├── es │ ├── __init__.py │ ├── client.py │ ├── document.py │ ├── index.py │ ├── language_data.yml │ ├── license.py │ ├── settings.py │ └── sink.py ├── filtering │ ├── __init__.py │ ├── basic_rule.py │ └── validation.py ├── kibana.py ├── logger.py ├── preflight_check.py ├── protocol │ ├── __init__.py │ └── connectors.py ├── services │ ├── __init__.py │ ├── base.py │ ├── job_cleanup.py │ ├── job_execution.py │ └── job_scheduling.py ├── source.py ├── sources │ ├── __init__.py │ ├── atlassian.py │ ├── azure_blob_storage.py │ ├── confluence.py │ ├── directory.py │ ├── dropbox.py │ ├── generic_database.py │ ├── github.py │ ├── google_cloud_storage.py │ ├── google_drive.py │ ├── jira.py │ ├── mongo.py │ ├── mssql.py │ ├── mysql.py │ ├── network_drive.py │ ├── oracle.py │ ├── postgresql.py │ ├── s3.py │ ├── servicenow.py │ ├── sharepoint_online.py │ └── sharepoint_server.py ├── sync_job_runner.py └── utils.py ├── docs ├── CODE_OF_CONDUCT.md ├── CONFIG.md ├── CONNECTOR_PROTOCOL.md ├── CONTRIBUTING.md ├── DEVELOPING.md ├── DOCKER.md ├── INTERNAL.md ├── REFERENCE.md ├── RELEASING.md ├── SECURITY.md ├── SUPPORT.md ├── UPGRADING.md ├── reference │ ├── dropbox.md │ └── github.md └── sync-rules │ ├── CONFLUENCE.md │ ├── DROPBOX.md │ ├── GITHUB.md │ ├── JIRA.md │ ├── MYSQL.md │ └── SERVICENOW.md ├── logo-enterprise-search.png ├── pyrightconfig.json ├── requirements ├── aarch64.txt ├── arm64.txt ├── framework.txt ├── tests.txt └── x86_64.txt ├── scripts └── verify.py ├── setup.cfg ├── setup.py └── tests ├── commons.py ├── conftest.py ├── es ├── test_client.py ├── test_document.py ├── test_index.py ├── test_license.py └── test_settings.py ├── fake_sources.py ├── filtering ├── test_basic_rule.py └── test_validation.py ├── fixtures ├── config.yml ├── config_2.yml ├── config_https.yml ├── config_mem.yml ├── connector.json ├── entsearch.yml ├── entsearch_invalid_log_level.yml └── memconfig.yml ├── ftest.sh ├── protocol └── test_connectors.py ├── services ├── test_base.py ├── test_job_cleanup.py ├── test_job_execution.py └── test_job_scheduling.py ├── sources ├── fixtures │ ├── README.md │ ├── azure_blob_storage │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── confluence │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── dir │ │ ├── .env │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ └── fixture.py │ ├── dropbox │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── fixture.py │ ├── github │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── google_cloud_storage │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ ├── mocker.py │ │ ├── requirements.txt │ │ └── service_account_dummy_cert.pem │ ├── google_drive │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── jira │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── mongodb │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── mongodb_serverless │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── mssql │ │ ├── .env │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── mysql │ │ ├── .env │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── network_drive │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── oracle │ │ ├── .env │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── postgresql │ │ ├── .env │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── s3 │ │ ├── .env │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── servicenow │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt │ ├── sharepoint_online │ │ ├── .env │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── connector.json │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ ├── nginx │ │ │ └── conf │ │ │ │ └── sharepoint.com │ │ └── requirements.txt │ └── sharepoint_server │ │ ├── Dockerfile │ │ ├── config.yml │ │ ├── docker-compose.yml │ │ ├── fixture.py │ │ └── requirements.txt ├── support.py ├── test_atlassian.py ├── test_azure_blob_storage.py ├── test_confluence.py ├── test_directory.py ├── test_dropbox.py ├── test_generic_database.py ├── test_github.py ├── test_google_cloud_storage.py ├── test_google_drive.py ├── test_jira.py ├── test_mongo.py ├── test_mssql.py ├── test_mysql.py ├── test_network_drive.py ├── test_oracle.py ├── test_postgresql.py ├── test_s3.py ├── test_servicenow.py ├── test_sharepoint_online.py └── test_sharepoint_server.py ├── test_cli.py ├── test_commons.py ├── test_config.py ├── test_kibana.py ├── test_logger.py ├── test_preflight_check.py ├── test_sink.py ├── test_source.py ├── test_sync_job_runner.py └── test_utils.py /.backportrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "targetBranchChoices": [ 3 | { "name": "main", "checked": true }, 4 | "pre-8.10-stable", 5 | "8.9", 6 | "8.8" 7 | ], 8 | "fork": false, 9 | "targetPRLabels": ["backport"], 10 | "branchLabelMapping": { 11 | "^v8.10.0(.0)?$": "main", 12 | "^vpre-8.10-stable$": "pre-8.10-stable", 13 | "^v(\\d+).(\\d+)(.\\d+)+$": "$1.$2" 14 | }, 15 | "upstream": "elastic/connectors-python" 16 | } 17 | -------------------------------------------------------------------------------- /.buildkite/README.md: -------------------------------------------------------------------------------- 1 | ## Here we define our Buildkite pipelines 2 | 3 | We use our own custom image. The image definition can be found here: https://github.com/elastic/ci-agent-images/pull/132 4 | 5 | The image is built weekly, see the cron definition: https://github.com/elastic/ci/pull/1813/files 6 | 7 | The image and cron job were built following instruction from several sources: 8 | 9 | - https://docs.elastic.dev/ci/agent-images-for-buildkite 10 | - https://github.com/elastic/ci/blob/main/vm-images/README.md 11 | - https://github.com/elastic/ci-agent-images/README.md 12 | 13 | In case something is unclear, don't hesitate to contact #buildkite Slack channel. 14 | -------------------------------------------------------------------------------- /.buildkite/nightly.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | 4 | _AGENTS = """\ 5 | agents: 6 | provider: "gcp" 7 | machineType: "n1-standard-8" 8 | useVault: true 9 | image: family/enterprise-search-ubuntu-2204-connectors-py 10 | """ 11 | 12 | with open(os.path.join(os.path.dirname(__file__), "nightly_steps.yml")) as f: 13 | steps = f.read().strip() 14 | 15 | 16 | print(_AGENTS) 17 | print() 18 | print(steps) 19 | -------------------------------------------------------------------------------- /.buildkite/nightly_aarch64.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | 4 | _AGENTS = """\ 5 | agents: 6 | provider: aws 7 | instanceType: m6g.xlarge 8 | imagePrefix: enterprise-search-ubuntu-2204-aarch64-connectors-py 9 | """ 10 | 11 | with open(os.path.join(os.path.dirname(__file__), "nightly_steps.yml")) as f: 12 | steps = f.read().strip() 13 | 14 | 15 | print(_AGENTS) 16 | print() 17 | print(steps) 18 | -------------------------------------------------------------------------------- /.buildkite/nightly_docker.yml: -------------------------------------------------------------------------------- 1 | agents: 2 | provider: "gcp" 3 | machineType: "n1-standard-8" 4 | useVault: true 5 | image: family/enterprise-search-ubuntu-2204-connectors-py 6 | 7 | steps: 8 | - label: "🏗️ Docker images" 9 | command: 10 | - ".buildkite/publish_docker.sh" 11 | -------------------------------------------------------------------------------- /.buildkite/nightly_steps.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - label: "🔨 MySQL" 3 | command: 4 | - ".buildkite/run_nigthly.sh mysql" 5 | artifact_paths: 6 | - "perf8-report-*/**/*" 7 | - label: "🔨 Network Drive" 8 | command: 9 | - ".buildkite/run_nigthly.sh network_drive" 10 | artifact_paths: 11 | - "perf8-report-*/**/*" 12 | - label: "🔨 Amazon S3" 13 | command: 14 | - ".buildkite/run_nigthly.sh s3" 15 | artifact_paths: 16 | - "perf8-report-*/**/*" 17 | - label: "🔨 Google Cloud Storage" 18 | command: 19 | - ".buildkite/run_nigthly.sh google_cloud_storage" 20 | artifact_paths: 21 | - "perf8-report-*/**/*" 22 | - label: "🔨 Azure Blob Storage" 23 | command: 24 | - ".buildkite/run_nigthly.sh azure_blob_storage" 25 | artifact_paths: 26 | - "perf8-report-*/**/*" 27 | - label: "🔨 Postgresql" 28 | command: 29 | - ".buildkite/run_nigthly.sh postgresql" 30 | artifact_paths: 31 | - "perf8-report-*/**/*" 32 | - label: "🔨 System Directory" 33 | command: 34 | - ".buildkite/run_nigthly.sh dir" 35 | artifact_paths: 36 | - "perf8-report-*/**/*" 37 | - label: "🔨 Oracle Database" 38 | command: 39 | - ".buildkite/run_nigthly.sh oracle" 40 | env: 41 | SKIP_AARCH64: "true" 42 | artifact_paths: 43 | - "perf8-report-*/**/*" 44 | - label: "🔨 Sharepoint Server" 45 | command: 46 | - ".buildkite/run_nigthly.sh sharepoint_server extra_small" 47 | artifact_paths: 48 | - "perf8-report-*/**/*" 49 | - label: "🔨 Sharepoint Online" 50 | command: 51 | - ".buildkite/run_nigthly.sh sharepoint_online medium" 52 | artifact_paths: 53 | - "perf8-report-*/**/*" 54 | - label: "🔨 Microsoft SQL" 55 | command: 56 | - ".buildkite/run_nigthly.sh mssql" 57 | artifact_paths: 58 | - "perf8-report-*/**/*" 59 | - label: "🔨 Jira" 60 | command: 61 | - ".buildkite/run_nigthly.sh jira" 62 | artifact_paths: 63 | - "perf8-report-*/**/*" 64 | - label: "🔨 Confluence" 65 | command: 66 | - ".buildkite/run_nigthly.sh confluence" 67 | artifact_paths: 68 | - "perf8-report-*/**/*" 69 | - label: "🔨 ServiceNow" 70 | command: 71 | - ".buildkite/run_nigthly.sh servicenow" 72 | artifact_paths: 73 | - "perf8-report-*/**/*" 74 | - label: "🔨 MongoDB" 75 | command: 76 | - ".buildkite/run_nigthly.sh mongodb" 77 | artifact_paths: 78 | - "perf8-report-*/**/*" 79 | # - label: "🔨 MongoDB Serverless" 80 | # command: 81 | # - ".buildkite/run_nigthly.sh mongodb_serverless" 82 | # artifact_paths: 83 | # - "perf8-report-*/**/*" 84 | - label: "🔨 GitHub" 85 | command: 86 | - ".buildkite/run_nigthly.sh github" 87 | artifact_paths: 88 | - "perf8-report-*/**/*" 89 | - label: "🔨 Google Drive" 90 | command: 91 | - ".buildkite/run_nigthly.sh google_drive" 92 | artifact_paths: 93 | - "perf8-report-*/**/*" 94 | - label: "🔨 Dropbox" 95 | command: 96 | - ".buildkite/run_nigthly.sh dropbox" 97 | artifact_paths: 98 | - "perf8-report-*/**/*" 99 | -------------------------------------------------------------------------------- /.buildkite/pipeline.yml: -------------------------------------------------------------------------------- 1 | agents: 2 | provider: "gcp" 3 | machineType: "n1-standard-2" 4 | useVault: false 5 | image: family/enterprise-search-ubuntu-2204-connectors-py 6 | 7 | steps: 8 | - label: ":face_with_peeking_eye: Lint" 9 | command: "make lint" 10 | 11 | - label: ":pytest: Test" 12 | command: "make test" 13 | 14 | - label: ":shipit: Smoke test" 15 | command: "make ftest NAME=dir DATA_SIZE=small" 16 | agents: 17 | machineType: "n1-standard-8" 18 | -------------------------------------------------------------------------------- /.buildkite/publish_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | sudo apt-get update 7 | sudo DEBIAN_FRONTEND=noninteractive apt-get install ca-certificates curl gnupg lsb-release -y 8 | sudo mkdir -p /etc/apt/keyrings 9 | 10 | echo "Installing Docker & Docker Compose" 11 | ARCH=`dpkg --print-architecture` 12 | RELEASE=`lsb_release -cs` 13 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg 14 | echo "deb [arch=$ARCH signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $RELEASE stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 15 | sudo apt-get update 16 | sudo DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin 17 | sudo systemctl start docker 18 | 19 | echo "Starting test task" 20 | BASEDIR=$(realpath $(dirname $0)) 21 | ROOT=$(realpath $BASEDIR/../) 22 | 23 | cd $ROOT 24 | 25 | # docker snapshot publication 26 | echo "Building the image" 27 | make docker-build 28 | 29 | 30 | # !!! WARNING be cautious about the following lines, to avoid leaking the secrets in the CI logs 31 | 32 | set +x # Do not remove so we don't leak passwords 33 | VAULT_ADDR=${VAULT_ADDR:-https://vault-ci-prod.elastic.dev} 34 | VAULT_USER="docker-swiftypeadmin" 35 | echo "Fetching Docker credentials for '$VAULT_USER' from Vault..." 36 | DOCKER_USER=$(vault read -address "${VAULT_ADDR}" -field user_20230609 secret/ci/elastic-connectors-python/${VAULT_USER}) 37 | DOCKER_PASSWORD=$(vault read -address "${VAULT_ADDR}" -field secret_20230609 secret/ci/elastic-connectors-python/${VAULT_USER}) 38 | echo "Done!" 39 | echo 40 | 41 | echo "Logging into Docker as '$DOCKER_USER'..." 42 | docker login -u "${DOCKER_USER}" -p ${DOCKER_PASSWORD} docker.elastic.co 43 | echo "Done!" 44 | echo 45 | echo "Pushing the image to docker.elastic.co" 46 | make docker-push 47 | -------------------------------------------------------------------------------- /.buildkite/pull-requests.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobs": [ 3 | { 4 | "enabled": true, 5 | "pipelineSlug": "connectors-python", 6 | "allow_org_users": true, 7 | "allowed_repo_permissions": ["admin", "write"], 8 | "allowed_list": ["praveen-elastic", "moxarth-elastic", "khusbu-crest", "akanshi-crest"], 9 | "set_commit_status": true, 10 | "commit_status_context": "buildkite/connectors-python", 11 | "build_on_commit": true, 12 | "build_on_comment": true, 13 | "trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))", 14 | "always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))", 15 | "skip_ci_labels": ["skip-ci"], 16 | "skip_target_branches": [], 17 | "always_require_ci_on_changed": [] 18 | } 19 | ] 20 | } 21 | 22 | -------------------------------------------------------------------------------- /.buildkite/run_nigthly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | MACHINE_TYPE=`uname -m` 7 | 8 | if [ "$MACHINE_TYPE" != "x86_64" ] && [ -v SKIP_AARCH64 ]; then 9 | echo "Running on aarch64 and skipping" 10 | exit 11 | fi 12 | 13 | 14 | BASEDIR=$(realpath $(dirname $0)) 15 | ROOT=$(realpath $BASEDIR/../) 16 | 17 | # TODO to be moved in the image at https://github.com/elastic/ci-agent-images/blob/main/vm-images/enterprise-search/scripts/connectors-python/install-deps.sh#L6 18 | sudo apt-get -y install liblz4-dev libunwind-dev 19 | 20 | cd $ROOT 21 | 22 | make install 23 | 24 | export PIP=$ROOT/bin/pip 25 | 26 | $PIP install py-spy 27 | DATA_SIZE="${2:-small}" 28 | 29 | # If we run on buildkite, we connect to docker so we can pull private images 30 | # !!! WARNING be cautious about the following lines, to avoid leaking the secrets in the CI logs 31 | set +x # Do not remove so we don't leak passwords 32 | if [ -v BUILDKITE ]; then 33 | echo "Connecting to Vault" 34 | VAULT_ADDR=${VAULT_ADDR:-https://vault-ci-prod.elastic.dev} 35 | VAULT_USER="docker-swiftypeadmin" 36 | echo "Fetching Docker credentials for '$VAULT_USER' from Vault..." 37 | DOCKER_USER=$(vault read -address "${VAULT_ADDR}" -field user_20230609 secret/ci/elastic-connectors-python/${VAULT_USER}) 38 | DOCKER_PASSWORD=$(vault read -address "${VAULT_ADDR}" -field secret_20230609 secret/ci/elastic-connectors-python/${VAULT_USER}) 39 | echo "Done!" 40 | 41 | # required by serverless 42 | sudo sysctl -w vm.max_map_count=262144 43 | fi 44 | 45 | PERF8=yes NAME=$1 DATA_SIZE=$DATA_SIZE make ftest 46 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = connectors/quartz.py,connectors/conftest.py,tests/* 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Global rule 2 | * @elastic/ingestion-team 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve. 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Bug Description 11 | 12 | 13 | ### To Reproduce 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | ## Expected behavior 21 | 22 | 23 | ## Screenshots 24 | 26 | 27 | ## Environment 28 | 29 | 30 | 31 | - OS: [e.g. iOS] 32 | - Browser [e.g. chrome, safari] 33 | - Version [e.g. 22] 34 | 35 | 36 | ## Additional context 37 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Question or Discussion 4 | url: https://discuss.elastic.co/c/enterprise-search/workplace-search/ 5 | about: Please ask and answer questions here. 6 | - name: Security Vulnerability 7 | url: https://www.elastic.co/community/security 8 | about: DO NOT file issues related to security. Instead, please follow our security policy here. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement 3 | about: It's not a bug, but some desired feature is missing 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Problem Description 11 | 14 | 15 | ### Proposed Solution 16 | 18 | 19 | 20 | ### Alternatives 21 | 23 | 24 | ### Additional Context 25 | 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/qa_connector_package.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Non-regression QA 3 | about: Create a non-regression QA issue for one connector client 4 | title: '[8.x QA] Validate connector client' 5 | labels: testing 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Non-regression QA 11 | 12 | **Note:** always test with the latest Build Candidate on Elastic Cloud, using the full Elastic stack 13 | 14 | - [ ] Start the whole stack from scratch and navigate to Enterprise Search 15 | - [ ] Check that no indices are shown in the top level Indices list 16 | - [ ] Click on "Create an Elasticsearch index" - a new page is open where you can select an ingestion method 17 | - [ ] Choose Connector -> Use a connector 18 | - [ ] Choose the connector you want to test and Continue 19 | - [ ] Create an index with a valid name and Universal language 20 | ------- 21 | 22 | - [ ] Connector name and description are editable on the Configurations page 23 | - [ ] Connector can be deleted from the Indices page 24 | - [ ] Connector can be deleted from the Indices page and it can be recreated with the same name after 25 | - [ ] Pull connectors repository, run `make install` but do not run connector yet 26 | 27 | 28 | - [ ] Verify that you are redirected to "configuration" page where you can create an api key and can copy connector id / whole section of config into the connector 29 | - [ ] Update connector configuration with the api_key and connector_id, plus choose a service_type to test and set it in config 30 | - [ ] Start the connector by running `make run` - verify that it starts and does not actually do anything yet 31 | - [ ] Wait for the Kibana page with the connector configuration to update and verify that it's possible to edit connector configuration now 32 | - [ ] Edit and save connector configuration, then reload the page and verify that configuration is properly saved 33 | - [ ] Click on "Set schedule and sync" and verify that you're redirected to the scheduling tab 34 | - [ ] Enable scheduling for frequency = every minute and save schedule; refresh the page and verify that the changes were stored 35 | - [ ] Switch to the connector and wait for a minute or two, verify that connector starts to ingest data 36 | - [ ] Verify that the data from the connector appears in the expected index 37 | - [ ] Verify that on the index list page index information is updated properly, showing expected number of documents and new index size 38 | - [ ] Verify that on the connector overview page "Document Count" is updated to reflect the number of documents in the index 39 | - [ ] Verify that you can see ingested documents in `documents` tab 40 | - [ ] Verify that index mappings are correct on the `index mappings` tab 41 | 42 | 43 | **Record a short demo showing the connectors' configuration and that there were documents ingested** 44 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Closes https://github.com/elastic/connectors-py/issues/### 2 | 3 | 4 | 10 | 11 | ## Checklists 12 | 13 | 15 | 16 | #### Pre-Review Checklist 17 | - [ ] this PR has a meaningful title 18 | - [ ] this PR links to all relevant github issues that it fixes or partially addresses 19 | - [ ] if there is no GH issue, please create it. Each PR should have a link to an issue 20 | - [ ] this PR has a thorough description 21 | - [ ] Covered the changes with automated tests 22 | - [ ] Tested the changes locally 23 | - [ ] Added a label for each target release version (example: `v7.13.2`, `v7.14.0`, `v8.0.0`) 24 | - [ ] Considered corresponding documentation changes 25 | - [ ] Contributed any configuration settings changes to the configuration reference 26 | 27 | #### Changes Requiring Extra Attention 28 | 29 | 32 | 33 | - [ ] Security-related changes (encryption, TLS, SSRF, etc) 34 | - [ ] New external service dependencies added. 35 | 36 | ## Related Pull Requests 37 | 38 | 41 | 42 | ## Release Note 43 | 44 | 48 | -------------------------------------------------------------------------------- /.github/workflows/add-labels-main.yml: -------------------------------------------------------------------------------- 1 | name: Force backport labels for main 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - opened 9 | 10 | jobs: 11 | add_labels: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - id: version 16 | uses: juliangruber/read-file-action@ebfa650188272343fef925480eb4d18c5d49b925 17 | with: 18 | path: ./connectors/VERSION 19 | - uses: actions-ecosystem/action-add-labels@v1 20 | with: 21 | labels: | 22 | auto-backport 23 | v${{ steps.version.outputs.content }} 24 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | name: Backport PR 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - labeled 9 | - closed 10 | 11 | jobs: 12 | backport: 13 | if: | 14 | github.event.pull_request.merged == true 15 | && contains(github.event.pull_request.labels.*.name, 'auto-backport') 16 | && ( 17 | (github.event.action == 'labeled' && github.event.label.name == 'auto-backport') 18 | || (github.event.action == 'closed') 19 | ) 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Checkout Actions 23 | uses: actions/checkout@v2 24 | with: 25 | repository: 'swiftype/kibana-github-actions' 26 | ref: main 27 | path: ./actions 28 | 29 | - name: Install Actions 30 | run: npm install --production --prefix ./actions 31 | 32 | - name: Run Backport 33 | uses: ./actions/backport 34 | with: 35 | github_token: ${{ secrets.GITHUB_TOKEN }} 36 | approver_token: ${{ secrets.REPO_SCOPED_TOKEN }} 37 | auto_approve: 'true' 38 | commit_user: elastic 39 | commit_email: ent-search-backport@users.noreply.github.com 40 | auto_merge: 'true' 41 | auto_merge_method: 'squash' 42 | manual_backport_command_template: 'backport --pr %pullNumber% --autoMerge --autoMergeMethod squash' 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | lib 3 | include 4 | .coverage 5 | *.un~ 6 | *.swp 7 | pyvenv.cfg 8 | *.egg-info 9 | __pycache__ 10 | 11 | # jetbrains files 12 | .idea 13 | *.iml 14 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile = black 3 | skip=lib,bin,include,pyodbc.pyi 4 | -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- 1 | select = ["E", "F", "B"] 2 | ignore = ["E501"] 3 | 4 | # Allow autofix for all enabled rules (when `--fix`) is provided. 5 | fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] 6 | unfixable = [] 7 | 8 | exclude = [ 9 | ".git", 10 | "__pycache__", 11 | "lib", 12 | "bin", 13 | "include" 14 | ] 15 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 16 | target-version = "py310" 17 | 18 | [per-file-ignores] 19 | "tests/*" = ["B017"] 20 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | COPY . /app 4 | WORKDIR /app 5 | RUN make clean install 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Makefile README.rst config.yml CONTRIBUTE.rst LICENSE 2 | recursive-include connectors/ *.yml 3 | recursive-include requirements/ *.txt 4 | include connectors/VERSION 5 | include tests/sources/fixtures/google_cloud_storage/service_account_dummy_cert.pem 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test lint autoformat run ftest install dev release docker-build docker-run docker-push 2 | 3 | PYTHON=python3.10 4 | ARCH=$(shell uname -m) 5 | PERF8?=no 6 | SLOW_TEST_THRESHOLD=1 # seconds 7 | VERSION=$(shell cat connectors/VERSION) 8 | 9 | 10 | bin/python: 11 | $(PYTHON) -m venv . 12 | bin/pip install --upgrade pip 13 | 14 | install: bin/python bin/elastic-ingest 15 | 16 | dev: install 17 | bin/pip install -r requirements/tests.txt 18 | 19 | bin/elastic-ingest: bin/python 20 | bin/pip install -r requirements/$(ARCH).txt 21 | bin/python setup.py develop 22 | 23 | bin/black: bin/python 24 | bin/pip install -r requirements/$(ARCH).txt 25 | bin/pip install -r requirements/tests.txt 26 | 27 | 28 | bin/pytest: bin/python 29 | bin/pip install -r requirements/$(ARCH).txt 30 | bin/pip install -r requirements/tests.txt 31 | 32 | clean: 33 | rm -rf bin lib include 34 | 35 | lint: bin/python bin/black bin/elastic-ingest 36 | bin/isort --check . --sp .isort.cfg 37 | bin/black --check connectors 38 | bin/black --check tests 39 | bin/black --check setup.py 40 | bin/black --check scripts 41 | bin/ruff connectors 42 | bin/ruff tests 43 | bin/ruff setup.py 44 | bin/ruff scripts 45 | bin/pyright connectors 46 | bin/pyright tests 47 | 48 | autoformat: bin/python bin/black bin/elastic-ingest 49 | bin/isort . --sp .isort.cfg 50 | bin/black connectors 51 | bin/black tests 52 | bin/black setup.py 53 | bin/black scripts 54 | 55 | test: bin/pytest bin/elastic-ingest 56 | bin/pytest --cov-report term-missing --cov-fail-under 92 --cov-report html --cov=connectors --fail-slow=$(SLOW_TEST_THRESHOLD) -sv tests 57 | 58 | release: install 59 | bin/python setup.py sdist 60 | 61 | ftest: bin/pytest bin/elastic-ingest 62 | tests/ftest.sh $(NAME) $(PERF8) 63 | 64 | ftrace: bin/pytest bin/elastic-ingest 65 | PERF8_TRACE=true tests/ftest.sh $(NAME) $(PERF8) 66 | 67 | run: install 68 | bin/elastic-ingest 69 | 70 | default-config: install 71 | bin/elastic-ingest --action config --service-type $(SERVICE_TYPE) 72 | 73 | docker-build: 74 | docker build -t docker.elastic.co/enterprise-search/elastic-connectors:$(VERSION)-SNAPSHOT . 75 | 76 | docker-run: 77 | docker run -v $(PWD):/config docker.elastic.co/enterprise-search/elastic-connectors:$(VERSION)-SNAPSHOT /app/bin/elastic-ingest -c /config/config.yml --log-level=DEBUG 78 | 79 | docker-push: 80 | docker push docker.elastic.co/enterprise-search/elastic-connectors:$(VERSION)-SNAPSHOT 81 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | connectors-py 2 | Copyright 2022 Elasticsearch B.V. 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elastic Python connectors 2 | 3 | ![logo](logo-enterprise-search.png) 4 | 5 | The home of Elastic connector service and connectors written in Python. This repository contains the framework for customizing Elastic connectors, or writing your own connectors for advanced use cases. 6 | 7 | **The connector will be operated by an administrative user from within Kibana.** 8 | 9 | See the [official end-user documentation](https://www.elastic.co/guide/en/enterprise-search/current/build-connector.html). 10 | 11 | ## Guides 12 | 13 | - [Code of Conduct](https://www.elastic.co/community/codeofconduct) 14 | - [Getting Support](docs/SUPPORT.md) 15 | - [Releasing](docs/RELEASING.md) 16 | - [Developer guide](docs/DEVELOPING.md) 17 | - [Connectors Reference](docs/REFERENCE.md) 18 | - [Security Policy](docs/SECURITY.md) 19 | - [Elastic-internal guide](docs/INTERNAL.md) 20 | - [Connector Protocol](docs/CONNECTOR_PROTOCOL.md) 21 | - [Configuration](docs/CONFIG.md) 22 | - [Contribution guide](docs/CONTRIBUTING.md) 23 | - [Upgrading](docs/UPGRADING.md) 24 | 25 | ### Advanced sync rules 26 | 27 | - [MySQL](docs/sync-rules/MYSQL.md) 28 | - [JIRA](docs/sync-rules/JIRA.md) 29 | - [CONFLUENCE](docs/sync-rules/CONFLUENCE.md) 30 | - [SERVICENOW](docs/sync-rules/SERVICENOW.md) 31 | - [GITHUB](docs/sync-rules/GITHUB.md) 32 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 30 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_content_syncs: 1 26 | max_concurrent_access_control_syncs: 1 27 | job_cleanup_interval: 300 28 | log_level: INFO 29 | 30 | #connectors: 31 | # - 32 | # connector_id: changeme 33 | # service_type: changeme 34 | # api_key: changeme 35 | 36 | sources: 37 | mongodb: connectors.sources.mongo:MongoDataSource 38 | s3: connectors.sources.s3:S3DataSource 39 | dir: connectors.sources.directory:DirectoryDataSource 40 | mysql: connectors.sources.mysql:MySqlDataSource 41 | network_drive: connectors.sources.network_drive:NASDataSource 42 | google_cloud_storage: connectors.sources.google_cloud_storage:GoogleCloudStorageDataSource 43 | google_drive: connectors.sources.google_drive:GoogleDriveDataSource 44 | azure_blob_storage: connectors.sources.azure_blob_storage:AzureBlobStorageDataSource 45 | postgresql: connectors.sources.postgresql:PostgreSQLDataSource 46 | oracle: connectors.sources.oracle:OracleDataSource 47 | sharepoint_server: connectors.sources.sharepoint_server:SharepointServerDataSource 48 | mssql: connectors.sources.mssql:MSSQLDataSource 49 | jira: connectors.sources.jira:JiraDataSource 50 | confluence: connectors.sources.confluence:ConfluenceDataSource 51 | dropbox: connectors.sources.dropbox:DropboxDataSource 52 | servicenow: connectors.sources.servicenow:ServiceNowDataSource 53 | sharepoint_online: connectors.sources.sharepoint_online:SharepointOnlineDataSource 54 | github: connectors.sources.github:GitHubDataSource 55 | -------------------------------------------------------------------------------- /connectors/VERSION: -------------------------------------------------------------------------------- 1 | 8.10.0.0 2 | -------------------------------------------------------------------------------- /connectors/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | 8 | with open(os.path.join(os.path.dirname(__file__), "VERSION")) as f: 9 | __version__ = f.read().strip() 10 | -------------------------------------------------------------------------------- /connectors/config.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import os 8 | 9 | from envyaml import EnvYAML 10 | 11 | from connectors.logger import logger 12 | 13 | 14 | def load_config(config_file): 15 | logger.info(f"Loading config from {config_file}") 16 | configuration = EnvYAML(config_file) 17 | _ent_search_config(configuration) 18 | return configuration 19 | 20 | 21 | # Left - in Enterprise Search; Right - in Connectors 22 | config_mappings = { 23 | "elasticsearch.host": "elasticsearch.host", 24 | "elasticsearch.username": "elasticsearch.username", 25 | "elasticsearch.password": "elasticsearch.password", 26 | "elasticsearch.headers": "elasticsearch.headers", 27 | "log_level": "service.log_level", 28 | } 29 | 30 | # Enterprise Search uses Ruby and is in lower case always, so hacking it here for now 31 | # Ruby-supported log levels: 'debug', 'info', 'warn', 'error', 'fatal', 'unknown' 32 | # Python-supported log levels: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', 'NOTSET' 33 | log_level_mappings = { 34 | "debug": "DEBUG", 35 | "info": "INFO", 36 | "warn": "WARNING", 37 | "error": "ERROR", 38 | "fatal": "CRITICAL", 39 | "unknown": "NOTSET", 40 | } 41 | 42 | 43 | def _ent_search_config(configuration): 44 | if "ENT_SEARCH_CONFIG_PATH" not in os.environ: 45 | return 46 | logger.info("Found ENT_SEARCH_CONFIG_PATH, loading ent-search config") 47 | ent_search_config = EnvYAML(os.environ["ENT_SEARCH_CONFIG_PATH"]) 48 | for es_field in config_mappings.keys(): 49 | if es_field not in ent_search_config: 50 | continue 51 | 52 | connector_field = config_mappings[es_field] 53 | es_field_value = ent_search_config[es_field] 54 | 55 | if es_field == "log_level": 56 | if es_field_value not in log_level_mappings: 57 | raise ValueError( 58 | f"Unexpected log level: {es_field_value}. Allowed values: {', '.join(log_level_mappings.keys())}" 59 | ) 60 | es_field_value = log_level_mappings[es_field_value] 61 | 62 | _update_config_field(configuration, connector_field, es_field_value) 63 | 64 | logger.debug(f"Overridden {connector_field}") 65 | 66 | 67 | def _update_config_field(configuration, field, value): 68 | """ 69 | Update configuration field value taking into account the nesting. 70 | 71 | Configuration is a hash of hashes, so we need to dive inside to do proper assignment. 72 | 73 | E.g. _update_config({}, "elasticsearch.bulk.queuesize", 20) will result in the following config: 74 | { 75 | "elasticsearch": { 76 | "bulk": { 77 | "queuesize": 20 78 | } 79 | } 80 | } 81 | """ 82 | subfields = field.split(".") 83 | 84 | current_leaf = configuration 85 | for subfield in subfields[:-1]: 86 | if subfield not in current_leaf: 87 | current_leaf[subfield] = {} 88 | current_leaf = current_leaf[subfield] 89 | 90 | current_leaf[subfields[-1]] = value 91 | -------------------------------------------------------------------------------- /connectors/es/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.es.client import ESClient # NOQA 7 | from connectors.es.document import ESDocument, InvalidDocumentSourceError # NOQA 8 | from connectors.es.index import ESIndex # NOQA 9 | from connectors.es.settings import DEFAULT_LANGUAGE, Mappings # NOQA 10 | -------------------------------------------------------------------------------- /connectors/es/language_data.yml: -------------------------------------------------------------------------------- 1 | --- 2 | da: 3 | name: Danish 4 | stemmer: danish 5 | stop_words: _danish_ 6 | de: 7 | name: German 8 | stemmer: light_german 9 | stop_words: _german_ 10 | en: 11 | name: English 12 | stemmer: light_english 13 | stop_words: _english_ 14 | es: 15 | name: Spanish 16 | stemmer: light_spanish 17 | stop_words: _spanish_ 18 | fr: 19 | name: French 20 | stemmer: light_french 21 | stop_words: _french_ 22 | custom_filter_definitions: 23 | fr-elision: 24 | type: elision 25 | articles: 26 | - l 27 | - m 28 | - t 29 | - qu 30 | - n 31 | - s 32 | - j 33 | - d 34 | - c 35 | - jusqu 36 | - quoiqu 37 | - lorsqu 38 | - puisqu 39 | articles_case: true 40 | prepended_filters: 41 | - fr-elision 42 | it: 43 | name: Italian 44 | stemmer: light_italian 45 | stop_words: _italian_ 46 | custom_filter_definitions: 47 | it-elision: 48 | type: elision 49 | articles: 50 | - c 51 | - l 52 | - all 53 | - dall 54 | - dell 55 | - nell 56 | - sull 57 | - coll 58 | - pell 59 | - gl 60 | - agl 61 | - dagl 62 | - degl 63 | - negl 64 | - sugl 65 | - un 66 | - m 67 | - t 68 | - s 69 | - v 70 | - d 71 | articles_case: true 72 | prepended_filters: 73 | - it-elision 74 | ja: 75 | name: Japanese 76 | stemmer: light_english 77 | stop_words: _english_ 78 | postpended_filters: 79 | - cjk_bigram 80 | ko: 81 | name: Korean 82 | stemmer: light_english 83 | stop_words: _english_ 84 | postpended_filters: 85 | - cjk_bigram 86 | nl: 87 | name: Dutch 88 | stemmer: dutch 89 | stop_words: _dutch_ 90 | pt: 91 | name: Portuguese 92 | stemmer: light_portuguese 93 | stop_words: _portuguese_ 94 | pt-br: 95 | name: Portuguese (Brazil) 96 | stemmer: brazilian 97 | stop_words: _brazilian_ 98 | ru: 99 | name: Russian 100 | stemmer: russian 101 | stop_words: _russian_ 102 | th: 103 | name: Thai 104 | stemmer: light_english 105 | stop_words: _thai_ 106 | zh: 107 | name: Chinese 108 | stemmer: light_english 109 | stop_words: _english_ 110 | postpended_filters: 111 | - cjk_bigram 112 | -------------------------------------------------------------------------------- /connectors/es/license.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.protocol import JobType 7 | 8 | 9 | def requires_platinum_license(sync_job, connector, source_klass): 10 | """Returns whether this scenario requires a Platinum license""" 11 | return ( 12 | sync_job.job_type == JobType.ACCESS_CONTROL 13 | and connector.features.document_level_security_enabled() 14 | ) or source_klass.is_premium() 15 | -------------------------------------------------------------------------------- /connectors/filtering/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /connectors/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .connectors import * # NOQA 7 | -------------------------------------------------------------------------------- /connectors/services/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.services.base import get_services # NOQA 7 | from connectors.services.job_cleanup import JobCleanUpService # NOQA 8 | from connectors.services.job_execution import JobExecutionService # NOQA 9 | from connectors.services.job_scheduling import JobSchedulingService # NOQA 10 | -------------------------------------------------------------------------------- /connectors/sources/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /connectors/sources/atlassian.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import fastjsonschema 8 | from fastjsonschema import JsonSchemaValueException 9 | 10 | from connectors.filtering.validation import ( 11 | AdvancedRulesValidator, 12 | SyncRuleValidationResult, 13 | ) 14 | from connectors.utils import RetryStrategy, retryable 15 | 16 | RETRIES = 3 17 | RETRY_INTERVAL = 2 18 | 19 | 20 | class AtlassianAdvancedRulesValidator(AdvancedRulesValidator): 21 | QUERY_OBJECT_SCHEMA_DEFINITION = { 22 | "type": "object", 23 | "properties": { 24 | "query": {"type": "string", "minLength": 1}, 25 | }, 26 | "required": ["query"], 27 | "additionalProperties": False, 28 | } 29 | 30 | SCHEMA_DEFINITION = {"type": "array", "items": QUERY_OBJECT_SCHEMA_DEFINITION} 31 | 32 | SCHEMA = fastjsonschema.compile(definition=SCHEMA_DEFINITION) 33 | 34 | def __init__(self, source): 35 | self.source = source 36 | 37 | async def validate(self, advanced_rules): 38 | if len(advanced_rules) == 0: 39 | return SyncRuleValidationResult.valid_result( 40 | SyncRuleValidationResult.ADVANCED_RULES 41 | ) 42 | 43 | return await self._remote_validation(advanced_rules) 44 | 45 | @retryable( 46 | retries=RETRIES, 47 | interval=RETRY_INTERVAL, 48 | strategy=RetryStrategy.EXPONENTIAL_BACKOFF, 49 | ) 50 | async def _remote_validation(self, advanced_rules): 51 | try: 52 | AtlassianAdvancedRulesValidator.SCHEMA(advanced_rules) 53 | except JsonSchemaValueException as e: 54 | return SyncRuleValidationResult( 55 | rule_id=SyncRuleValidationResult.ADVANCED_RULES, 56 | is_valid=False, 57 | validation_message=e.message, 58 | ) 59 | 60 | return SyncRuleValidationResult.valid_result( 61 | SyncRuleValidationResult.ADVANCED_RULES 62 | ) 63 | -------------------------------------------------------------------------------- /connectors/sources/directory.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """ 7 | Demo of a standalone source 8 | """ 9 | import functools 10 | import hashlib 11 | import os 12 | from datetime import datetime, timezone 13 | from pathlib import Path 14 | 15 | from connectors.source import BaseDataSource 16 | from connectors.utils import TIKA_SUPPORTED_FILETYPES, get_base64_value 17 | 18 | DEFAULT_DIR = os.environ.get("SYSTEM_DIR", os.path.dirname(__file__)) 19 | 20 | 21 | class DirectoryDataSource(BaseDataSource): 22 | """Directory""" 23 | 24 | name = "System Directory" 25 | service_type = "dir" 26 | 27 | def __init__(self, configuration): 28 | super().__init__(configuration=configuration) 29 | self.directory = os.path.abspath(self.configuration["directory"]) 30 | self.pattern = self.configuration["pattern"] 31 | 32 | @classmethod 33 | def get_default_configuration(cls): 34 | return { 35 | "directory": { 36 | "label": "Directory path", 37 | "order": 1, 38 | "type": "str", 39 | "validations": [], 40 | "value": DEFAULT_DIR, 41 | }, 42 | "pattern": { 43 | "display": "text", 44 | "label": "File glob-like pattern", 45 | "order": 2, 46 | "type": "str", 47 | "value": "**/*.*", 48 | }, 49 | } 50 | 51 | async def ping(self): 52 | return True 53 | 54 | async def changed(self): 55 | return True 56 | 57 | def get_id(self, path): 58 | return hashlib.md5(str(path).encode("utf8")).hexdigest() 59 | 60 | async def _download(self, path, timestamp=None, doit=None): 61 | if not (doit and os.path.splitext(path)[-1] in TIKA_SUPPORTED_FILETYPES): 62 | return 63 | 64 | self._logger.info(f"Reading {path}") 65 | with open(file=path, mode="rb") as f: 66 | return { 67 | "_id": self.get_id(path), 68 | "_timestamp": timestamp, 69 | "_attachment": get_base64_value(f.read()), 70 | } 71 | 72 | async def get_docs(self, filtering=None): 73 | self._logger.debug(f"Reading {self.directory}...") 74 | root_directory = Path(self.directory) 75 | 76 | for path_object in root_directory.glob(self.pattern): 77 | if not path_object.is_file(): 78 | continue 79 | 80 | # download coroutine 81 | download_coro = functools.partial(self._download, str(path_object)) 82 | 83 | # get the last modified value of the file 84 | stat = path_object.stat() 85 | ts = stat.st_mtime 86 | ts = datetime.fromtimestamp(ts, tz=timezone.utc) 87 | 88 | # send back as a doc 89 | doc = { 90 | "path": str(path_object), 91 | "last_modified_time": ts, 92 | "inode_protection_mode": stat.st_mode, 93 | "inode_number": stat.st_ino, 94 | "device_inode_reside": stat.st_dev, 95 | "number_of_links": stat.st_nlink, 96 | "uid": stat.st_uid, 97 | "gid": stat.st_gid, 98 | "ctime": stat.st_ctime, 99 | "last_access_time": stat.st_atime, 100 | "size": stat.st_size, 101 | "_timestamp": ts.isoformat(), 102 | "_id": self.get_id(path_object), 103 | } 104 | 105 | yield doc, download_coro 106 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 303 See Other 2 | 3 | Location: https://www.elastic.co/community/codeofconduct 4 | -------------------------------------------------------------------------------- /docs/INTERNAL.md: -------------------------------------------------------------------------------- 1 | # Elastic Internal Documentation 2 | 3 | ### Testing locally with Enterprise Search and Kibana 4 | 5 | ##### Setup 6 | * clone [kibana](https://github.com/elastic/kibana) 7 | * `cd` into your kibana checkout 8 | * install kibana dependencies with: 9 | ```shell 10 | nvm use && yarn kbn clean && yarn kbn bootstrap 11 | ``` 12 | * clone [ent-search](https://github.com/elastic/ent-search/) 13 | * follow the ent-search [setup steps](https://github.com/elastic/ent-search/#set-up) 14 | 15 | ##### Start Elasticsearch 16 | * `cd` into your kibana checkout 17 | * start elasticsearch with: 18 | ```shell 19 | nvm use && yarn es snapshot -E xpack.security.authc.api_key.enabled=true 20 | ``` 21 | 22 | ##### Start Kibana 23 | * `cd` into your kibana checkout 24 | * start kibana with: 25 | ```shell 26 | nvm use && yarn start --no-base-path 27 | ``` 28 | 29 | ##### Start Enterprise Search 30 | * `cd` into your ent-search checkout 31 | * start Enterprise Search with: 32 | ```shell 33 | script/togo/development start 34 | ``` 35 | 36 | ##### Start Connectors 37 | * `cd` into your connectors checkout 38 | * run `make install` to get the latest dependencies 39 | * run `make run` to start Connectors. 40 | -------------------------------------------------------------------------------- /docs/REFERENCE.md: -------------------------------------------------------------------------------- 1 | # Connector Reference (Home) 2 | ℹ️ Find documentation for the following connector clients in the Elastic Enterprise Search docs: 3 | 4 | - [Azure Blob Storage](https://www.elastic.co/guide/en/enterprise-search/master/connectors-azure-blob.html) 5 | - [Confluence](https://www.elastic.co/guide/en/enterprise-search/master/connectors-confluence.html) 6 | - [Dropbox](https://www.elastic.co/guide/en/enterprise-search/master/connectors-dropbox.html) 7 | - [Google Cloud Storage](https://www.elastic.co/guide/en/enterprise-search/master/connectors-google-cloud.html) 8 | - [Google Drive](https://www.elastic.co/guide/en/enterprise-search/master/connectors-google-drive.html) 9 | - [Jira](https://www.elastic.co/guide/en/enterprise-search/master/connectors-jira.html) 10 | - [Microsoft SQL](https://www.elastic.co/guide/en/enterprise-search/master/connectors-ms-sql.html) 11 | - [MongoDB](https://www.elastic.co/guide/en/enterprise-search/master/connectors-mongodb.html) 12 | - [MySQL](https://www.elastic.co/guide/en/enterprise-search/master/connectors-mysql.html) 13 | - [Network drive](https://www.elastic.co/guide/en/enterprise-search/master/connectors-network-drive.html) 14 | - [Oracle](https://www.elastic.co/guide/en/enterprise-search/master/connectors-oracle.html) 15 | - [PostgreSQL](https://www.elastic.co/guide/en/enterprise-search/master/connectors-postgresql.html) 16 | - [S3](https://www.elastic.co/guide/en/enterprise-search/master/connectors-s3.html) 17 | - [ServiceNow](https://www.elastic.co/guide/en/enterprise-search/master/connectors-servicenow.html) 18 | - [SharePoint Online](https://www.elastic.co/guide/en/enterprise-search/master/connectors-sharepoint-online.html) 19 | - [SharePoint Server](https://www.elastic.co/guide/en/enterprise-search/master/connectors-sharepoint.html) 20 | - [GitHub](./reference/github.md) 21 | -------------------------------------------------------------------------------- /docs/RELEASING.md: -------------------------------------------------------------------------------- 1 | # Releasing the Connectors project 2 | 3 | The version scheme we use is **MAJOR.MINOR.PATCH.BUILD** and stored in the [VERSION](https://github.com/elastic/connectors-python/blob/main/connectors/VERSION) file at the root of this repository. 4 | 5 | ## Unified release 6 | 7 | **MAJOR.MINOR.PATCH** should match the Elastic and Enterprise Search version it targets and the *BUILD* number should be set to **0** the day the Connectors release is created to be included with the Enterprise Search distribution. 8 | 9 | For example, when shipping for `8.1.2`, the version is `8.1.2.0`. 10 | 11 | To release Connectors: 12 | 13 | 1. Make sure all tests and linter pass with `make lint test` 14 | 2. Run `make release` 15 | 3. Set the [VERSION](../connectors/VERSION) file to the new/incremented version on the release branch 16 | 4. PR this change to the appropriate Connectors release branch 17 | 18 | A package will be generated in `dist/` 19 | 20 | Take care of the branching (minor releases only): 21 | 22 | - Increment the VERSION on main to match the next minor release 23 | - Create a new maintenance branch 24 | - Make sure the `.backportrc.json` is updated. The previous minor is added to `targetBranchChoices` and the new minor is used in `branchLabelMapping` 25 | 26 | After the Elastic unified release is complete 27 | 28 | - Update the **BUILD** version ([example PR](https://github.com/elastic/connectors-python/pull/122)). Note that the Connectors project does not immediately bump to the next **PATCH** version. That won't happen until that patch release's FF date. 29 | 30 | ## In-Between releases 31 | 32 | Sometimes, we need to release Connectors independently from Enterprise Search. For instance, if someone wants to use the project as an HTTP Service, and we have a bug fix we want them to have as soon as possible. 33 | 34 | In that case, we increment the **BUILD** number, and follow the same release process than for the unified release. 35 | 36 | So `8.1.2.1`, `8.1.2.2` etc. On the next unified release, the version will be bumped to the next **PATCH** value, and **BUILD** set to `0` 37 | 38 | **In-Between releases should never introduce new features since they will eventually be merged into the next PATCH release. New features are always done in Developer previews** 39 | 40 | ## Developer preview releases 41 | 42 | For developer previews, we are adding a `pre` tag using an ISO8601 date. 43 | You can use `make release_dev` instead of `make release` in that case. 44 | -------------------------------------------------------------------------------- /docs/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Thanks for your interest in the security of our products. Our security policy can be found at [https://www.elastic.co/community/security](https://www.elastic.co/community/security). 4 | 5 | ## Reporting a Vulnerability 6 | Please send security vulnerability reports to security@elastic.co. 7 | -------------------------------------------------------------------------------- /docs/SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Getting Support 2 | 3 | ### Official Support Services 4 | If you have an Elastic subscription, you are entitled to Support services. See our welcome page for [working with our support team](https://www.elastic.co/support/welcome). 5 | 6 | ### Where do I report issues with Connectors? 7 | If something is not working as expected, please open an [issue](https://github.com/elastic/connectors-python/issues/new). 8 | 9 | ### Where else can I go to get help? 10 | The Ingestion team at Elastic maintains this repository and is happy to help. Try posting your question to the [Elastic discuss forums](https://discuss.elastic.co/c/enterprise-search/84). Be sure to mention that you're using Connectors and also let us know what service type you're trying to use, and any errors/issues you are encountering. You can also find us in the `#enterprise-search` channel of the [Elastic Community Slack](http://elasticstack.slack.com). 11 | -------------------------------------------------------------------------------- /docs/sync-rules/CONFLUENCE.md: -------------------------------------------------------------------------------- 1 | ### Setting up the Confluence connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### Example advanced sync rules 6 | 7 | #### One query for indexing data that is in a particular Space with key 'DEV' 8 | 9 | ```json 10 | [ 11 | { 12 | "query": "space = DEV" 13 | } 14 | ] 15 | ``` 16 | 17 | #### Two queries for indexing data based on created and lastmodified time 18 | 19 | ```json 20 | [ 21 | { 22 | "query": "created >= now('-5w')" 23 | }, 24 | { 25 | "query": "lastmodified < startOfYear()" 26 | } 27 | ] 28 | ``` 29 | 30 | #### One query for indexing only given types in a space with space key 'SD' 31 | ```json 32 | [ 33 | { 34 | "query": "type in ('page', 'attachment') AND space.key = 'SD'" 35 | } 36 | ] 37 | ``` 38 | 39 | ### Limitations 40 | 41 | - Syncing the recently created/updated items in Confluence may be delayed when using Advanced Sync Rules since the search endpoint used with cql query returns stale results in the response. For more details refer the following issue: https://jira.atlassian.com/browse/CONFCLOUD-73997 42 | -------------------------------------------------------------------------------- /docs/sync-rules/DROPBOX.md: -------------------------------------------------------------------------------- 1 | ### Setting up the Dropbox connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### Example advanced sync rules 6 | 7 | #### Two rules for indexing content based on queries only 8 | 9 | ```json 10 | [ 11 | { 12 | "query": "confidential" 13 | }, 14 | { 15 | "query": "dropbox" 16 | } 17 | ] 18 | ``` 19 | 20 | #### Single query for indexing data based on file extensions 21 | 22 | ```json 23 | [ 24 | { 25 | "query": "dropbox", 26 | "options": { 27 | "file_extensions": [ 28 | "txt", 29 | "pdf" 30 | ] 31 | } 32 | } 33 | ] 34 | ``` 35 | 36 | #### Single query for indexing data based on file categories 37 | ```json 38 | [ 39 | { 40 | "query": "test", 41 | "options": { 42 | "file_categories": [ 43 | { 44 | ".tag": "paper" 45 | }, 46 | { 47 | ".tag": "png" 48 | } 49 | ] 50 | } 51 | } 52 | ] 53 | ``` 54 | **Note** 55 | 56 | - `query` contains a string that matches words in the filename. 57 | - If both `file_extensions` and `file_categories` are provided, priority is given to `file_categories`. 58 | 59 | ### Limitations 60 | 61 | - Content extraction is not supported for Dropbox Paper files when advanced sync rules are enabled. 62 | -------------------------------------------------------------------------------- /docs/sync-rules/GITHUB.md: -------------------------------------------------------------------------------- 1 | ### Setting up the Github connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### Example advanced sync rules 6 | 7 | #### Advanced rules for indexing document and files based on branch name configured via branch key 8 | 9 | ```json 10 | [ 11 | { 12 | "repository": "repo_name", 13 | "filter": { 14 | "branch": "sync-rules-feature" 15 | } 16 | } 17 | ] 18 | ``` 19 | 20 | #### Advanced rules for indexing document based on issue query related to bugs via issue key 21 | 22 | ```json 23 | [ 24 | { 25 | "repository": "repo_name", 26 | "filter": { 27 | "issue": "is:bug" 28 | } 29 | } 30 | ] 31 | ``` 32 | 33 | #### Advanced rules for indexing document based on pr query related to open pr's via pr key 34 | 35 | ```json 36 | [ 37 | { 38 | "repository": "repo_name", 39 | "filter": { 40 | "pr": "is:open" 41 | } 42 | } 43 | ] 44 | ``` 45 | 46 | #### Advanced rules for indexing document and files based on queries and branch name 47 | 48 | ```json 49 | [ 50 | { 51 | "repository": "repo_name", 52 | "filter": { 53 | "issue": "is:bug", 54 | "pr": "is:open", 55 | "branch": "sync-rules-feature" 56 | } 57 | } 58 | ] 59 | ``` 60 | 61 | **NOTE**: All documents pulled by a given rule are indexed regardless of whether the document has already been indexed by the previous rule. In such cases, document duplication will happen, but the count of indexed documents will be different in the logs. The correct count of documents can be referred from the ElasticSearch index. 62 | 63 | #### Advanced rules for overlapping 64 | 65 | ```json 66 | [ 67 | { 68 | "filter": { 69 | "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-20" 70 | }, 71 | "repository": "repo_name" 72 | }, 73 | { 74 | "filter": { 75 | "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-15" 76 | }, 77 | "repository": "repo_name" 78 | } 79 | ] 80 | ``` 81 | -------------------------------------------------------------------------------- /docs/sync-rules/JIRA.md: -------------------------------------------------------------------------------- 1 | ### Setting up the Jira connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### Example advanced sync rules 6 | 7 | #### Two queries for indexing content based on status of Jira issues 8 | 9 | ```json 10 | [ 11 | { 12 | "query": "project = Collaboration AND status = 'In Progress'" 13 | }, 14 | { 15 | "query": "status IN ('To Do', 'In Progress', 'Closed')" 16 | } 17 | ] 18 | ``` 19 | 20 | #### One query for indexing data based on priority of issues for given projects ProjA, ProjB, ProjC 21 | 22 | ```json 23 | [ 24 | { 25 | "query": "priority in (Blocker, Critical) AND project in (ProjA, ProjB, ProjC)" 26 | } 27 | ] 28 | ``` 29 | 30 | #### One query for indexing data based on assignee and created time 31 | ```json 32 | [ 33 | { 34 | "query": "assignee is EMPTY and created < -1d" 35 | } 36 | ] 37 | ``` 38 | -------------------------------------------------------------------------------- /docs/sync-rules/MYSQL.md: -------------------------------------------------------------------------------- 1 | ### Setting up the MySQL connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### MySQL Docker setup 6 | 7 | #### Run MySQL container 8 | ```shell 9 | docker run --name mysql_container -p 3306:3306 -e MYSQL_ROOT_PASSWORD=changeme -e MYSQL_USER=elastic -e MYSQL_PASSWORD=changeme -d mysql:latest 10 | ``` 11 | 12 | #### Grant privileges to user 13 | 14 | ```shell 15 | docker exec -it mysql_container mysql -u root -p 16 | ``` 17 | 18 | ```mysql 19 | GRANT ALL PRIVILEGES ON sample_db.* TO 'elastic'@'%'; 20 | FLUSH PRIVILEGES; 21 | ``` 22 | 23 | ### Example data 24 | ```mysql 25 | CREATE DATABASE sample_db; 26 | USE sample_db; 27 | 28 | CREATE TABLE person ( 29 | person_id INT AUTO_INCREMENT PRIMARY KEY, 30 | name VARCHAR(255), 31 | age INT 32 | ); 33 | 34 | CREATE TABLE address ( 35 | address_id INT AUTO_INCREMENT PRIMARY KEY, 36 | address VARCHAR(255) 37 | ); 38 | 39 | INSERT INTO person (name, age) VALUES ('Alice', 30); 40 | INSERT INTO person (name, age) VALUES ('Bob', 25); 41 | INSERT INTO person (name, age) VALUES ('Carol', 35); 42 | 43 | INSERT INTO address (address) VALUES ('123 Elm St'); 44 | INSERT INTO address (address) VALUES ('456 Oak St'); 45 | INSERT INTO address (address) VALUES ('789 Pine St'); 46 | ``` 47 | 48 | ### Example advanced sync rules 49 | 50 | #### Two LIMIT queries 51 | 52 | ```json 53 | [ 54 | { 55 | "tables": [ 56 | "person" 57 | ], 58 | "query": "SELECT * FROM sample_db.person LIMIT 1;" 59 | }, 60 | { 61 | "tables": [ 62 | "address" 63 | ], 64 | "query": "SELECT * FROM sample_db.address LIMIT 1;" 65 | } 66 | ] 67 | ``` 68 | 69 | #### One WHERE query 70 | 71 | ```json 72 | [ 73 | { 74 | "tables": ["person"], 75 | "query": "SELECT * FROM sample_db.person WHERE sample_db.person.age > 25;" 76 | } 77 | ] 78 | ``` 79 | 80 | #### One JOIN query 81 | ```json 82 | [ 83 | { 84 | "tables": ["person", "tables"], 85 | "query": "SELECT * FROM sample_db.person INNER JOIN sample_db.address ON sample_db.person.person_id = sample_db.address.address_id;" 86 | } 87 | ] 88 | ``` -------------------------------------------------------------------------------- /docs/sync-rules/SERVICENOW.md: -------------------------------------------------------------------------------- 1 | ### Setting up the ServiceNow connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### Example advanced sync rules 6 | 7 | #### Query for indexing document based on incident number starts with INC001 for Incident service 8 | 9 | ```json 10 | [ 11 | { 12 | "service": "Incident", 13 | "query": "numberSTARTSWITHINC001" 14 | } 15 | ] 16 | ``` 17 | 18 | #### Query for indexing document based on user active state as false for User service 19 | 20 | ```json 21 | [ 22 | { 23 | "service": "User", 24 | "query": "active=False" 25 | } 26 | ] 27 | ``` 28 | 29 | #### Query for indexing document based on author as administrator for Knowledge service 30 | 31 | ```json 32 | [ 33 | { 34 | "service": "Knowledge", 35 | "query": "author.nameSTARTSWITHSystem Administrator" 36 | } 37 | ] 38 | ``` 39 | -------------------------------------------------------------------------------- /logo-enterprise-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jenniferlb63/connectors-python-script/7590b84d9310827b0a1516c5ce65b971c5793681/logo-enterprise-search.png -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "pythonVersion": "3.10", 3 | "include": [ 4 | "connectors" 5 | ], 6 | "reportMissingImports": false, 7 | "reportMissingModuleSource": false, 8 | "reportOptionalMemberAccess": false, 9 | "exclude": [ 10 | "**/tests", 11 | "**/__pycache__" 12 | ], 13 | "executionEnvironments": [ 14 | { 15 | "root": "./", 16 | "venv": "./" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /requirements/aarch64.txt: -------------------------------------------------------------------------------- 1 | # Linux ARM 2 | -r framework.txt 3 | 4 | aiomysql==0.0.22 5 | aioboto3==10.3.0 6 | motor==2.5.1 7 | smbprotocol==1.9.0 8 | pymongo[srv]==3.13.0 9 | -------------------------------------------------------------------------------- /requirements/arm64.txt: -------------------------------------------------------------------------------- 1 | # Apple M1 2 | 3 | -r aarch64.txt 4 | 5 | SQLAlchemy[asyncio]==2.0.1 6 | pymongo[srv]==3.13.0 7 | -------------------------------------------------------------------------------- /requirements/framework.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | elasticsearch[async]==8.8.0 3 | elastic-transport==8.4.0 4 | pyyaml==6.0 5 | envyaml==1.10.211231 6 | ecs-logging==2.0.0 7 | pympler==1.0.1 8 | cron-schedule-triggers==0.0.11 9 | pytz==2019.3 10 | python-dateutil==2.8.2 11 | aiogoogle==5.3.0 12 | uvloop==0.17.0; sys_platform != 'win32' 13 | fastjsonschema==2.16.2 14 | base64io==1.0.3 15 | azure-storage-blob==12.13.0 16 | SQLAlchemy==2.0.1 17 | oracledb==1.2.2 18 | asyncpg==0.27.0 19 | python-tds==1.12.0 20 | sqlalchemy-pytds==0.3.5 21 | pyOpenSSL==23.1.1 22 | dropbox==11.36.2 23 | beautifulsoup4==4.12.2 24 | gidgethub==5.2.1 25 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | # tests 2 | black==23.7.0 3 | ruff==0.0.278 4 | isort==5.12.0 5 | aioresponses==0.7.4 6 | pytest==7.4.0 7 | pytest-cov==4.1.0 8 | pytest-asyncio==0.21.1 9 | pytest-randomly==3.13.0 10 | git+https://github.com/elastic/perf8#egg=perf8 11 | freezegun==1.2.2 12 | pytest-fail-slow==0.3.0 13 | pyright==1.1.317 14 | requests==2.31.0 15 | retry==0.9.2 16 | -------------------------------------------------------------------------------- /requirements/x86_64.txt: -------------------------------------------------------------------------------- 1 | # X86 Linux or Mac 2 | -r framework.txt 3 | 4 | aiomysql==0.1.1 5 | motor==3.0.0 6 | aioboto3==10.3.0 7 | smbprotocol==1.9.0 8 | -------------------------------------------------------------------------------- /scripts/verify.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | import os 8 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser 9 | 10 | import yaml 11 | from elasticsearch import AsyncElasticsearch 12 | 13 | DEFAULT_CONFIG = os.path.join(os.path.dirname(__file__), "..", "config.yml") 14 | SERVERLESS = "SERVERLESS" in os.environ 15 | 16 | 17 | async def verify(service_type, index_name, size, config): 18 | config = config["elasticsearch"] 19 | host = config["host"] 20 | auth = config["username"], config["password"] 21 | client = AsyncElasticsearch(hosts=[host], basic_auth=auth, request_timeout=120) 22 | 23 | if not SERVERLESS: 24 | await client.indices.refresh(index=index_name) 25 | 26 | try: 27 | print(f"Verifying {index_name}...") 28 | resp = await client.count(index=index_name) 29 | count = resp["count"] 30 | 31 | print(f"Found {count} documents") 32 | if count < size: 33 | raise Exception(f"We want {size} docs") 34 | 35 | # checking one doc 36 | res = await client.search(index=index_name, query={"match_all": {}}) 37 | first_doc = res["hits"]["hits"][0]["_source"] 38 | print("First doc") 39 | print(first_doc) 40 | 41 | if len(first_doc.keys()) < 3: 42 | raise Exception("The doc does not look right") 43 | 44 | if "_extract_binary_content" in first_doc: 45 | raise Exception("The pipeline did not run") 46 | 47 | if "_attachment" in first_doc: 48 | raise Exception("Content extraction was not successful") 49 | 50 | print("🤗") 51 | finally: 52 | await client.close() 53 | 54 | 55 | def _parser(): 56 | parser = ArgumentParser( 57 | prog="verify", formatter_class=ArgumentDefaultsHelpFormatter 58 | ) 59 | parser.add_argument( 60 | "--config-file", type=str, help="Configuration file", default=DEFAULT_CONFIG 61 | ) 62 | parser.add_argument( 63 | "--service-type", type=str, help="Service type", default="mongodb" 64 | ) 65 | parser.add_argument( 66 | "--index-name", type=str, help="Elasticsearch index", default="search-mongo" 67 | ) 68 | parser.add_argument("--size", type=int, help="How many docs", default=10001) 69 | return parser 70 | 71 | 72 | def main(args=None): 73 | parser = _parser() 74 | args = parser.parse_args(args=args) 75 | config_file = args.config_file 76 | 77 | if not os.path.exists(config_file): 78 | raise IOError(f"{config_file} does not exist") 79 | 80 | with open(config_file) as f: 81 | config = yaml.safe_load(f) 82 | 83 | try: 84 | asyncio.run(verify(args.service_type, args.index_name, args.size, config)) 85 | print("Bye") 86 | except (asyncio.CancelledError, KeyboardInterrupt): 87 | print("Bye") 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tools:pytest] 2 | asyncio_mode = auto 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | import sys 8 | 9 | from setuptools import find_packages, setup 10 | from setuptools._vendor.packaging.markers import Marker 11 | 12 | try: 13 | ARCH = os.uname().machine 14 | except Exception: 15 | ARCH = "x86_64" 16 | 17 | if sys.version_info.major != 3: 18 | raise ValueError("Requires Python 3") 19 | if sys.version_info.minor < 10: 20 | raise ValueError("Requires Python 3.10 or superior.") 21 | 22 | from connectors import __version__ # NOQA 23 | 24 | # We feed install_requires with `requirements.txt` but we unpin versions so we 25 | # don't enforce them and trap folks into dependency hell. (only works with `==` here) 26 | # 27 | # A proper production installation will do the following sequence: 28 | # 29 | # $ pip install -r requirements/`uname -n`.txt 30 | # $ pip install elasticsearch-connectors 31 | # 32 | # Because the *pinned* dependencies is what we tested 33 | # 34 | 35 | 36 | def extract_req(req): 37 | req = req.strip().split(";") 38 | if len(req) > 1: 39 | env_marker = req[-1].strip() 40 | marker = Marker(env_marker) 41 | if not marker.evaluate(): 42 | return None 43 | req = req[0] 44 | req = req.split("=") 45 | return req[0] 46 | 47 | 48 | def read_reqs(req_file): 49 | deps = [] 50 | reqs_dir, __ = os.path.split(req_file) 51 | 52 | with open(req_file) as f: 53 | reqs = f.readlines() 54 | for req in reqs: 55 | req = req.strip() 56 | if req == "" or req.startswith("#"): 57 | continue 58 | if req.startswith("-r"): 59 | subreq_file = req.split("-r")[-1].strip() 60 | subreq_file = os.path.join(reqs_dir, subreq_file) 61 | for subreq in read_reqs(subreq_file): 62 | dep = extract_req(subreq) 63 | if dep is not None and dep not in deps: 64 | deps.append(dep) 65 | else: 66 | dep = extract_req(req) 67 | if dep is not None and dep not in deps: 68 | deps.append(dep) 69 | return deps 70 | 71 | 72 | install_requires = read_reqs(os.path.join("requirements", f"{ARCH}.txt")) 73 | 74 | 75 | with open("README.md") as f: 76 | long_description = f.read() 77 | 78 | 79 | classifiers = [ 80 | "Programming Language :: Python", 81 | "License :: OSI Approved :: Apache Software License", 82 | "Programming Language :: Python :: 3 :: Only", 83 | ] 84 | 85 | 86 | setup( 87 | name="elasticsearch-connectors", 88 | version=__version__, 89 | packages=find_packages(), 90 | description=("Elastic Search Connectors."), 91 | long_description=long_description, 92 | author="Ingestion Team", 93 | author_email="tarek@ziade.org", 94 | include_package_data=True, 95 | zip_safe=False, 96 | classifiers=classifiers, 97 | install_requires=install_requires, 98 | entry_points=""" 99 | [console_scripts] 100 | elastic-ingest = connectors.cli:main 101 | fake-kibana = connectors.kibana:main 102 | """, 103 | ) 104 | -------------------------------------------------------------------------------- /tests/commons.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | class AsyncIterator: 7 | """ 8 | Async documents generator fake class, which records the args and kwargs it was called with. 9 | """ 10 | 11 | def __init__(self, items): 12 | self.items = items 13 | self.call_args = [] 14 | self.call_kwargs = [] 15 | self.i = 0 16 | self.call_count = 0 17 | 18 | def __aiter__(self): 19 | return self 20 | 21 | async def __anext__(self): 22 | if self.i >= len(self.items): 23 | raise StopAsyncIteration 24 | 25 | item = self.items[self.i] 26 | self.i += 1 27 | return item 28 | 29 | def __call__(self, *args, **kwargs): 30 | self.call_count += 1 31 | 32 | if args: 33 | self.call_args.append(args) 34 | 35 | if kwargs: 36 | self.call_kwargs.append(kwargs) 37 | 38 | return self 39 | 40 | def assert_not_called(self): 41 | return self.call_count == 0 42 | 43 | def assert_called_once(self): 44 | return self.call_count == 1 45 | -------------------------------------------------------------------------------- /tests/es/test_document.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from unittest.mock import AsyncMock, Mock 7 | 8 | import pytest 9 | 10 | from connectors.es import ESDocument, InvalidDocumentSourceError 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "doc_source", 15 | [ 16 | None, 17 | "hahaha", 18 | {}, 19 | {"_id": {}}, 20 | {"_id": "1", "_source": "hahaha"}, 21 | ], 22 | ) 23 | def test_es_document_raise(doc_source): 24 | with pytest.raises(InvalidDocumentSourceError): 25 | ESDocument(elastic_index=None, doc_source=doc_source) 26 | 27 | 28 | def test_es_document_ok(): 29 | doc_source = {"_id": "1", "_source": {}} 30 | es_document = ESDocument(elastic_index=None, doc_source=doc_source) 31 | assert isinstance(es_document, ESDocument) 32 | 33 | 34 | def test_es_document_get(): 35 | source = { 36 | "_id": "test", 37 | "_seq_no": 1, 38 | "_primary_term": 2, 39 | "_source": { 40 | "string": "string_value", 41 | "none_value": None, 42 | "empty_dict": {}, 43 | "nested_dict": {"string": "string_value"}, 44 | }, 45 | } 46 | default_value = "default" 47 | es_doc = ESDocument(elastic_index=None, doc_source=source) 48 | assert es_doc.id == "test" 49 | assert es_doc._seq_no == 1 50 | assert es_doc._primary_term == 2 51 | assert es_doc.get("string", default=default_value) == "string_value" 52 | assert es_doc.get("non_existing") is None 53 | assert es_doc.get("non_existing", default=default_value) == default_value 54 | assert es_doc.get("empty_dict", default=default_value) == {} 55 | assert es_doc.get("empty_dict", "string") is None 56 | assert es_doc.get("empty_dict", "string", default=default_value) == default_value 57 | assert es_doc.get("nested_dict", "non_existing") is None 58 | assert ( 59 | es_doc.get("nested_dict", "non_existing", default=default_value) 60 | == default_value 61 | ) 62 | 63 | 64 | @pytest.mark.asyncio 65 | async def test_reload(): 66 | source = { 67 | "_id": "test", 68 | "_seq_no": 1, 69 | "_primary_term": 1, 70 | "_source": { 71 | "status": "pending", 72 | }, 73 | } 74 | updated_source = { 75 | "_id": "test", 76 | "_seq_no": 2, 77 | "_primary_term": 2, 78 | "_source": { 79 | "status": "in_progress", 80 | }, 81 | } 82 | 83 | index = Mock() 84 | index.fetch_response_by_id = AsyncMock(return_value=updated_source) 85 | doc = ESDocument(index, source) 86 | assert doc.id == "test" 87 | assert doc._seq_no == 1 88 | assert doc._primary_term == 1 89 | assert doc.get("status") == "pending" 90 | await doc.reload() 91 | assert doc.id == "test" 92 | assert doc._seq_no == 2 93 | assert doc._primary_term == 2 94 | assert doc.get("status") == "in_progress" 95 | -------------------------------------------------------------------------------- /tests/es/test_license.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from unittest.mock import Mock 7 | 8 | import pytest 9 | 10 | from connectors.es.license import requires_platinum_license 11 | from connectors.protocol import JobType 12 | 13 | 14 | def mock_source_klass(is_premium): 15 | source_klass = Mock() 16 | source_klass.is_premium = Mock(return_value=is_premium) 17 | 18 | return source_klass 19 | 20 | 21 | def mock_connector(document_level_security_enabled): 22 | connector = Mock() 23 | connector.features = Mock() 24 | connector.features.document_level_security_enabled = Mock( 25 | return_value=document_level_security_enabled 26 | ) 27 | 28 | return connector 29 | 30 | 31 | def mock_sync_job(job_type): 32 | sync_job = Mock() 33 | sync_job.job_type = job_type 34 | 35 | return sync_job 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "job_type, document_level_security_enabled, is_premium", 40 | [ 41 | (JobType.UNSET, False, True), 42 | (JobType.ACCESS_CONTROL, True, False), 43 | (JobType.ACCESS_CONTROL, True, True), 44 | ], 45 | ) 46 | def test_requires_platinum_license( 47 | job_type, document_level_security_enabled, is_premium 48 | ): 49 | sync_job = mock_sync_job(job_type) 50 | connector = mock_connector(document_level_security_enabled) 51 | source_klass = mock_source_klass(is_premium) 52 | 53 | assert requires_platinum_license(sync_job, connector, source_klass) 54 | 55 | 56 | @pytest.mark.parametrize( 57 | "job_type, document_level_security_enabled, is_premium", 58 | [ 59 | (JobType.FULL, True, False), 60 | (JobType.INCREMENTAL, True, False), 61 | (JobType.ACCESS_CONTROL, False, False), 62 | ], 63 | ) 64 | def test_does_not_require_platinum_license( 65 | job_type, document_level_security_enabled, is_premium 66 | ): 67 | sync_job = mock_sync_job(job_type) 68 | connector = mock_connector(document_level_security_enabled) 69 | source_klass = mock_source_klass(is_premium) 70 | 71 | assert not requires_platinum_license(sync_job, connector, source_klass) 72 | -------------------------------------------------------------------------------- /tests/fixtures/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunck_size: 250 8 | max_wait_duration: 1 9 | initial_backoff_duration: 0 10 | backoff_multiplier: 0 11 | 12 | service: 13 | idling: 0.5 14 | heartbeat: 300 15 | max_errors: 20 16 | max_errors_span: 600 17 | max_concurrent_content_syncs: 10 18 | max_concurrent_access_control_syncs: 10 19 | log_level: INFO 20 | 21 | connectors: 22 | - 23 | connector_id: '1' 24 | 25 | sources: 26 | fake: fake_sources:FakeSource 27 | fake_with_incremental: fake_sources:FakeSourceWithIncrementalSync 28 | large_fake: fake_sources:LargeFakeSource 29 | fail_once: fake_sources:FailsThenWork 30 | fake_ts: fake_sources:FakeSourceTS 31 | filtering_state_valid: fake_sources:FakeSourceFilteringValid 32 | filtering_state_invalid: fake_sources:FakeSourceFilteringStateInvalid 33 | filtering_state_edited: fake_sources:FakeSourceFilteringStateEdited 34 | filtering_errors_present: fake_sources:FakeSourceFilteringErrorsPresent 35 | -------------------------------------------------------------------------------- /tests/fixtures/config_2.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | max_wait_duration: 1 8 | initial_backoff_duration: 0 9 | backoff_multiplier: 0 10 | 11 | service: 12 | idling: 0.5 13 | heartbeat: 300 14 | max_errors: 20 15 | max_errors_span: 600 16 | 17 | connectors: 18 | - 19 | connector_id: 'blah' 20 | 21 | sources: 22 | fake: fake_sources:FakeSource 23 | 24 | -------------------------------------------------------------------------------- /tests/fixtures/config_https.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: https://safenowhere.com 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunck_size: 250 8 | max_wait_duration: 1 9 | initial_backoff_duration: 0 10 | backoff_multiplier: 0 11 | 12 | service: 13 | idling: 0.5 14 | heartbeat: 300 15 | max_errors: 20 16 | max_errors_span: 600 17 | 18 | connectors: 19 | - 20 | connector_id: '1' 21 | 22 | sources: 23 | fake: fake_sources:FakeSource 24 | large_fake: fake_sources:LargeFakeSource 25 | -------------------------------------------------------------------------------- /tests/fixtures/config_mem.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunk_size: 250 8 | max_wait_duration: 1 9 | initial_backoff_duration: 0 10 | backoff_multiplier: 0 11 | 12 | service: 13 | idling: 0.5 14 | heartbeat: 300 15 | max_errors: 20 16 | max_errors_span: 600 17 | trace_mem: true 18 | 19 | connectors: 20 | - 21 | connector_id: '1' 22 | 23 | sources: 24 | fake: fake_sources:FakeSource 25 | large_fake: fake_sources:LargeFakeSource 26 | fail_once: fake_sources:FailsThenWork 27 | -------------------------------------------------------------------------------- /tests/fixtures/entsearch.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | headers: 6 | X-Elastic-Auth: SomeYeahValue 7 | X-Something: 1 8 | 9 | log_level: debug 10 | -------------------------------------------------------------------------------- /tests/fixtures/entsearch_invalid_log_level.yml: -------------------------------------------------------------------------------- 1 | 2 | elasticsearch: 3 | host: http://nowhere.com:9200 4 | user: elastic 5 | password: ${elasticsearch.password} 6 | headers: 7 | X-Elastic-Auth: SomeYeahValue 8 | X-Something: 1 9 | 10 | log_level: WHAT 11 | -------------------------------------------------------------------------------- /tests/fixtures/memconfig.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunk_size: 500 8 | chunk_max_mem_size: 0.5 9 | queue_max_mem_size: 25 10 | max_wait_duration: 1 11 | initial_backoff_duration: 0 12 | backoff_multiplier: 0 13 | 14 | service: 15 | idling: 0.5 16 | heartbeat: 300 17 | max_errors: 20 18 | max_errors_span: 600 19 | 20 | connectors: 21 | - 22 | connector_id: '1' 23 | 24 | sources: 25 | fake: fake_sources:FakeSource 26 | large_fake: fake_sources:LargeFakeSource 27 | fail_once: fake_sources:FailsThenWork 28 | -------------------------------------------------------------------------------- /tests/services/test_job_cleanup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import asyncio 8 | from unittest.mock import AsyncMock, Mock, patch 9 | 10 | import pytest 11 | 12 | from connectors.services.job_cleanup import IDLE_JOB_ERROR, JobCleanUpService 13 | from tests.commons import AsyncIterator 14 | 15 | CONFIG = { 16 | "elasticsearch": { 17 | "host": "http://nowhere.com:9200", 18 | "user": "elastic", 19 | "password": "changeme", 20 | }, 21 | "service": { 22 | "max_errors": 20, 23 | "max_errors_span": 600, 24 | "job_cleanup_interval": 1, 25 | }, 26 | "native_service_types": ["mongodb"], 27 | } 28 | 29 | 30 | def create_service(): 31 | return JobCleanUpService(CONFIG) 32 | 33 | 34 | def mock_connector(id="1", index_name="index_name"): 35 | connector = Mock() 36 | connector.id = id 37 | connector.index_name = index_name 38 | connector.sync_done = AsyncMock() 39 | return connector 40 | 41 | 42 | def mock_sync_job(id="1", connector_id="1", index_name="index_name"): 43 | job = Mock() 44 | job.job_id = id 45 | job.connector_id = connector_id 46 | job.index_name = index_name 47 | job.fail = AsyncMock() 48 | job.reload = AsyncMock() 49 | return job 50 | 51 | 52 | async def run_service_with_stop_after(service, stop_after): 53 | async def _terminate(): 54 | await asyncio.sleep(stop_after) 55 | service.stop() 56 | 57 | await asyncio.gather(service.run(), _terminate()) 58 | 59 | 60 | @pytest.mark.asyncio 61 | @patch("connectors.protocol.SyncJobIndex.delete_jobs") 62 | @patch("connectors.protocol.SyncJobIndex.delete_indices") 63 | @patch("connectors.protocol.SyncJobIndex.idle_jobs") 64 | @patch("connectors.protocol.SyncJobIndex.orphaned_jobs") 65 | @patch("connectors.protocol.ConnectorIndex.fetch_by_id") 66 | @patch("connectors.protocol.ConnectorIndex.supported_connectors") 67 | @patch("connectors.protocol.ConnectorIndex.all_connectors") 68 | async def test_cleanup_jobs( 69 | all_connectors, 70 | supported_connectors, 71 | connector_fetch_by_id, 72 | orphaned_jobs, 73 | idle_jobs, 74 | delete_indices, 75 | delete_jobs, 76 | ): 77 | existing_index_name = "foo" 78 | to_be_deleted_index_name = "bar" 79 | connector = mock_connector(index_name=existing_index_name) 80 | sync_job = mock_sync_job(index_name=to_be_deleted_index_name) 81 | another_sync_job = mock_sync_job(index_name=existing_index_name) 82 | 83 | all_connectors.return_value = AsyncIterator([connector]) 84 | supported_connectors.return_value = AsyncIterator([connector]) 85 | connector_fetch_by_id.return_value = connector 86 | orphaned_jobs.return_value = AsyncIterator([sync_job, another_sync_job]) 87 | idle_jobs.return_value = AsyncIterator([sync_job]) 88 | delete_jobs.return_value = {"deleted": 1, "failures": [], "total": 1} 89 | 90 | service = create_service() 91 | await run_service_with_stop_after(service, 0.1) 92 | 93 | delete_indices.assert_called_with(indices=[to_be_deleted_index_name]) 94 | delete_jobs.assert_called_with(job_ids=[sync_job.id, another_sync_job.id]) 95 | sync_job.fail.assert_called_with(message=IDLE_JOB_ERROR) 96 | connector.sync_done.assert_called_with(job=sync_job) 97 | -------------------------------------------------------------------------------- /tests/sources/fixtures/README.md: -------------------------------------------------------------------------------- 1 | e2e fixtures 2 | ------------ 3 | 4 | Each fixture needs to implement the following: 5 | 6 | - create a directory here that matches the service type 7 | - add in it the following files: 8 | 9 | - config.yml 10 | - fixture.py 11 | - requirements.txt 12 | - docker-compose.yml 13 | 14 | config.yml 15 | ========== 16 | 17 | The config file necessary to run the connector for the ftest. 18 | Specifically, this must set the `connector_id` and `service_type` for the connector. 19 | Other configuration changes are optional. 20 | 21 | fixture.py 22 | ========== 23 | 24 | This file may contain four functions (all optional): 25 | 26 | - load -- loads data in the backend 27 | - remove -- removes random data in the backend 28 | - setup -- called before the docker is started 29 | - teardown -- called after the docker has been torn down 30 | 31 | requirements.txt 32 | ================ 33 | 34 | pip requirements. Lists all libs needed for `fixture.py` to run 35 | 36 | 37 | docker-compose.yml 38 | ================== 39 | 40 | A Docker compose file that needs to run the whole stack: 41 | 42 | - Elasticsearch 43 | - Kibana 44 | - Enterprise Search 45 | - Any backend server like MySQL 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/sources/fixtures/azure_blob_storage/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'abs' 32 | service_type: 'azure_blob_storage' 33 | 34 | sources: 35 | azure_blob_storage: connectors.sources.azure_blob_storage:AzureBlobStorageDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/azure_blob_storage/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | azureblobstorage: 77 | image: mcr.microsoft.com/azure-storage/azurite 78 | ports: 79 | - 10000:10000 80 | command: ["azurite-blob","--blobHost","0.0.0.0","--blobPort","10000"] 81 | 82 | networks: 83 | esnet: 84 | driver: bridge 85 | 86 | volumes: 87 | esdata: 88 | driver: local 89 | -------------------------------------------------------------------------------- /tests/sources/fixtures/azure_blob_storage/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import random 7 | import string 8 | 9 | from azure.storage.blob import BlobServiceClient 10 | 11 | CONTAINER = 2 12 | LARGE_CONTAINER = 3 13 | SMALL_CONTAINER = 7 14 | BLOB_COUNT = 1000 15 | CONNECTION_STRING = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" 16 | 17 | 18 | def random_text(k=1024 * 20): 19 | return "".join(random.choices(string.ascii_uppercase + string.digits, k=k)) 20 | 21 | 22 | BIG_TEXT = random_text() 23 | 24 | 25 | def load(): 26 | """Method for generating 10k document for azurite emulator""" 27 | try: 28 | blob_service_client = BlobServiceClient.from_connection_string( 29 | CONNECTION_STRING 30 | ) 31 | 32 | for container_id in range(0, SMALL_CONTAINER): 33 | container_client = blob_service_client.get_container_client( 34 | f"containersmall{container_id}" 35 | ) 36 | container_client.create_container() 37 | 38 | for blob_id in range(0, BLOB_COUNT): 39 | blob_client = container_client.get_blob_client(f"file{blob_id}.txt") 40 | blob_client.upload_blob( 41 | f"Testing blob{blob_id} document for container{container_id}", 42 | blob_type="BlockBlob", 43 | ) 44 | 45 | for container_id in range(0, LARGE_CONTAINER): 46 | container_client = blob_service_client.get_container_client( 47 | f"containerlarge{container_id}" 48 | ) 49 | container_client.create_container() 50 | 51 | for blob_id in range(0, BLOB_COUNT): 52 | blob_client = container_client.get_blob_client(f"file{blob_id}.txt") 53 | blob_client.upload_blob( 54 | BIG_TEXT, 55 | blob_type="BlockBlob", 56 | ) 57 | except Exception as exception: 58 | print(f"Exception: {exception}") 59 | 60 | 61 | def remove(): 62 | """Method for removing 2k document for azurite emulator""" 63 | try: 64 | blob_service_client = BlobServiceClient.from_connection_string( 65 | CONNECTION_STRING 66 | ) 67 | 68 | for container_id in range(0, CONTAINER): 69 | container_client = blob_service_client.get_container_client( 70 | f"containersmall{container_id}" 71 | ) 72 | container_client.delete_container() 73 | except Exception as exception: 74 | print(f"Exception: {exception}") 75 | -------------------------------------------------------------------------------- /tests/sources/fixtures/azure_blob_storage/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-storage-blob 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/confluence/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/confluence/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | 3 | ADD . /python-flask 4 | WORKDIR /python-flask 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /tests/sources/fixtures/confluence/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'confluence' 32 | service_type: 'confluence' 33 | 34 | sources: 35 | confluence: connectors.sources.confluence:ConfluenceDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/confluence/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | confluence: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "5000:5000" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | 85 | networks: 86 | esnet: 87 | driver: bridge 88 | 89 | volumes: 90 | esdata: 91 | driver: local 92 | -------------------------------------------------------------------------------- /tests/sources/fixtures/confluence/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 -------------------------------------------------------------------------------- /tests/sources/fixtures/dir/.env: -------------------------------------------------------------------------------- 1 | SYSTEM_DIR=dir/data 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dir/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'dir' 32 | service_type: 'dir' 33 | 34 | sources: 35 | dir: connectors.sources.directory:DirectoryDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dir/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | networks: 77 | esnet: 78 | driver: bridge 79 | 80 | volumes: 81 | esdata: 82 | driver: local 83 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dir/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | import random 8 | import shutil 9 | import urllib.request 10 | import zipfile 11 | 12 | SYSTEM_DIR = os.path.join(os.path.dirname(__file__), "data") 13 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 14 | 15 | if DATA_SIZE == "small": 16 | REPO = "connectors-python" 17 | elif DATA_SIZE == "medium": 18 | REPO = "elasticsearch" 19 | else: 20 | REPO = "kibana" 21 | 22 | 23 | def get_num_docs(): 24 | match os.environ.get("DATA_SIZE", "medium"): 25 | case "small": 26 | print("100") 27 | case "medium": 28 | print("200") 29 | case _: 30 | print("300") 31 | 32 | 33 | def load(): 34 | if os.path.exists(SYSTEM_DIR): 35 | teardown() 36 | print(f"Working in {SYSTEM_DIR}") 37 | os.makedirs(SYSTEM_DIR) 38 | repo_zip = os.path.join(SYSTEM_DIR, "repo.zip") 39 | 40 | # lazy tree generator: we download the elasticsearch repo and unzip it 41 | print(f"Downloading some source from {REPO} this may take a while...") 42 | urllib.request.urlretrieve( 43 | f"https://github.com/elastic/{REPO}/zipball/main", repo_zip 44 | ) 45 | 46 | print("Unzipping the tree") 47 | with zipfile.ZipFile(repo_zip) as zip_ref: 48 | zip_ref.extractall(SYSTEM_DIR) 49 | 50 | os.unlink(repo_zip) 51 | 52 | 53 | def remove(): 54 | # removing 10 files 55 | files = [] 56 | for root, __, filenames in os.walk(SYSTEM_DIR): 57 | for filename in filenames: 58 | files.append(os.path.join(root, filename)) 59 | 60 | random.shuffle(files) 61 | for i in range(10): 62 | print(f"deleting {files[i]}") 63 | os.unlink(files[i]) 64 | 65 | 66 | def teardown(): 67 | shutil.rmtree(SYSTEM_DIR) 68 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dropbox/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | DROPBOX_API_URL="http://127.0.0.1:8085/" 3 | DROPBOX_API_URL_V2="http://127.0.0.1:8085/2/" 4 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dropbox/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | 3 | ADD . /python-flask 4 | WORKDIR /python-flask 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dropbox/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'dropbox' 32 | service_type: 'dropbox' 33 | 34 | sources: 35 | dropbox: connectors.sources.dropbox:DropboxDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dropbox/connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dropbox", 3 | "service_type": "dropbox", 4 | "index_name": "search-dropbox", 5 | "sync_cursor": null, 6 | "is_native": false, 7 | "api_key_id": null, 8 | "status": "configured", 9 | "language": "en", 10 | "last_access_control_sync_error": null, 11 | "last_access_control_sync_status": null, 12 | "last_sync_status": null, 13 | "last_sync_error": null, 14 | "last_synced": null, 15 | "last_seen": null, 16 | "created_at": null, 17 | "updated_at": null, 18 | "configuration": { 19 | "path": { 20 | "label": "Path to fetch files/folders", 21 | "order": 1, 22 | "required": false, 23 | "type": "str", 24 | "value": "/" 25 | }, 26 | "app_key": { 27 | "label": "Dropbox App Key", 28 | "sensitive": true, 29 | "order": 2, 30 | "type": "str", 31 | "value": "abc#123" 32 | }, 33 | "app_secret": { 34 | "label": "Dropbox App Secret", 35 | "sensitive": true, 36 | "order": 3, 37 | "type": "str", 38 | "value": "abc#123" 39 | }, 40 | "refresh_token": { 41 | "label": "Dropbox Refresh Token", 42 | "sensitive": true, 43 | "order": 4, 44 | "type": "str", 45 | "value": "abc#123" 46 | }, 47 | "retry_count": { 48 | "default_value": 3, 49 | "display": "numeric", 50 | "label": "Retries per request", 51 | "order": 5, 52 | "required": false, 53 | "type": "int", 54 | "ui_restrictions": ["advanced"], 55 | "value": 3 56 | }, 57 | "concurrent_downloads": { 58 | "default_value": 100, 59 | "display": "numeric", 60 | "label": "Maximum concurrent downloads", 61 | "order": 6, 62 | "required": false, 63 | "type": "int", 64 | "ui_restrictions": ["advanced"], 65 | "value": 100 66 | } 67 | }, 68 | "scheduling": {"full": {"enabled": true, "interval": "1 * * * * *"}}, 69 | "pipeline": { 70 | "extract_binary_content": true, 71 | "name": "ent-search-generic-ingestion", 72 | "reduce_whitespace": true, 73 | "run_ml_inference": true 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dropbox/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | dropbox: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "8085:8085" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | 85 | volumes: 86 | esdata: 87 | driver: local 88 | 89 | networks: 90 | esnet: 91 | driver: bridge 92 | -------------------------------------------------------------------------------- /tests/sources/fixtures/dropbox/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.3.2 -------------------------------------------------------------------------------- /tests/sources/fixtures/github/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/github/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | 3 | ADD . /python-flask 4 | WORKDIR /python-flask 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /tests/sources/fixtures/github/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'github' 32 | service_type: 'github' 33 | 34 | sources: 35 | github: connectors.sources.github:GitHubDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/github/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | github: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "9091:9091" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | 85 | volumes: 86 | esdata: 87 | driver: local 88 | 89 | networks: 90 | esnet: 91 | driver: bridge 92 | -------------------------------------------------------------------------------- /tests/sources/fixtures/github/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.3.2 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/.env: -------------------------------------------------------------------------------- 1 | STORAGE_EMULATOR_HOST="http://localhost:4443" 2 | MAX_RSS="290M" 3 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | ADD . /python-flask 3 | WORKDIR /python-flask 4 | RUN pip install -r requirements.txt 5 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'gcs' 32 | service_type: 'google_cloud_storage' 33 | 34 | sources: 35 | google_cloud_storage: connectors.sources.google_cloud_storage:GoogleCloudStorageDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | gcs-mocker: 28 | build: . 29 | command: python mocker.py 30 | ports: 31 | - "4444:4444" 32 | volumes: 33 | - .:/python-flask 34 | restart: always 35 | 36 | google_cloud_storage: 37 | container_name: google_cloud_storage 38 | image: fsouza/fake-gcs-server 39 | ports: 40 | - "4443:4443" 41 | command: ["-scheme", "http", "-port", "4443"] 42 | 43 | kibana: 44 | image: docker.elastic.co/kibana/kibana:${VERSION} 45 | ports: 46 | - 5601:5601 47 | extra_hosts: 48 | - "host.docker.internal:host-gateway" 49 | depends_on: 50 | - "elasticsearch" 51 | profiles: 52 | - "enterprise-search" 53 | environment: 54 | ELASTICSEARCH_URL: http://host.docker.internal:9200 55 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 56 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 57 | ELASTICSEARCH_USERNAME: kibana_system 58 | ELASTICSEARCH_PASSWORD: changeme 59 | networks: 60 | - esnet 61 | 62 | enterprise_search: 63 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 64 | profiles: 65 | - "enterprise-search" 66 | depends_on: 67 | - "elasticsearch" 68 | environment: 69 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 70 | - elasticsearch.username=elastic 71 | - elasticsearch.password=changeme 72 | - elasticsearch.host=http://host.docker.internal:9200 73 | - allow_es_settings_modification=true 74 | - kibana.host=http://host.docker.internal:5601 75 | - kibana.external_url=http://localhost:5601 76 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 77 | - JAVA_OPTS=-Xms2g -Xmx2g 78 | - email.account.enabled=true 79 | - email.account.smtp.auth=plain 80 | - email.account.smtp.starttls.enable=false 81 | - email.account.smtp.host=host.docker.internal 82 | - email.account.smtp.port=1025 83 | - email.account.email_defaults.from=local@example.com 84 | - DEBUG=true 85 | ports: 86 | - 3002:3002 87 | extra_hosts: 88 | - "host.docker.internal:host-gateway" 89 | networks: 90 | - esnet 91 | 92 | networks: 93 | esnet: 94 | 95 | volumes: 96 | esdata: 97 | driver: local 98 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/mocker.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """Module responsible for mocking POST call to Google Cloud Storage Data Source 7 | """ 8 | from flask import Flask 9 | 10 | app = Flask(__name__) 11 | 12 | 13 | @app.route("/token", methods=["POST"]) 14 | def post_auth_token(): 15 | """Function to load""" 16 | return { 17 | "access_token": "XXXXXXStBkRnGyZ2mUYOLgls7QVBxOg82XhBCFo8UIT5gM", 18 | "token_type": "Bearer", 19 | "expires_in": 3600, 20 | "refresh_token": "XXXXXX3SEBX7F2cfrHcqJEa3KoAHYeXES6nmho", 21 | } 22 | 23 | 24 | if __name__ == "__main__": 25 | app.run(host="0.0.0.0", port=4444) 26 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/requirements.txt: -------------------------------------------------------------------------------- 1 | google-auth 2 | google-cloud-storage 3 | flask==2.2.3 4 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_cloud_storage/service_account_dummy_cert.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDY3E8o1NEFcjMM 3 | HW/5ZfFJw29/8NEqpViNjQIx95Xx5KDtJ+nWn9+OW0uqsSqKlKGhAdAo+Q6bjx2c 4 | uXVsXTu7XrZUY5Kltvj94DvUa1wjNXs606r/RxWTJ58bfdC+gLLxBfGnB6CwK0YQ 5 | xnfpjNbkUfVVzO0MQD7UP0Hl5ZcY0Puvxd/yHuONQn/rIAieTHH1pqgW+zrH/y3c 6 | 59IGThC9PPtugI9ea8RSnVj3PWz1bX2UkCDpy9IRh9LzJLaYYX9RUd7++dULUlat 7 | AaXBh1U6emUDzhrIsgApjDVtimOPbmQWmX1S60mqQikRpVYZ8u+NDD+LNw+/Eovn 8 | xCj2Y3z1AgMBAAECggEAWDBzoqO1IvVXjBA2lqId10T6hXmN3j1ifyH+aAqK+FVl 9 | GjyWjDj0xWQcJ9ync7bQ6fSeTeNGzP0M6kzDU1+w6FgyZqwdmXWI2VmEizRjwk+/ 10 | /uLQUcL7I55Dxn7KUoZs/rZPmQDxmGLoue60Gg6z3yLzVcKiDc7cnhzhdBgDc8vd 11 | QorNAlqGPRnm3EqKQ6VQp6fyQmCAxrr45kspRXNLddat3AMsuqImDkqGKBmF3Q1y 12 | xWGe81LphUiRqvqbyUlh6cdSZ8pLBpc9m0c3qWPKs9paqBIvgUPlvOZMqec6x4S6 13 | ChbdkkTRLnbsRr0Yg/nDeEPlkhRBhasXpxpMUBgPywKBgQDs2axNkFjbU94uXvd5 14 | znUhDVxPFBuxyUHtsJNqW4p/ujLNimGet5E/YthCnQeC2P3Ym7c3fiz68amM6hiA 15 | OnW7HYPZ+jKFnefpAtjyOOs46AkftEg07T9XjwWNPt8+8l0DYawPoJgbM5iE0L2O 16 | x8TU1Vs4mXc+ql9F90GzI0x3VwKBgQDqZOOqWw3hTnNT07Ixqnmd3dugV9S7eW6o 17 | U9OoUgJB4rYTpG+yFqNqbRT8bkx37iKBMEReppqonOqGm4wtuRR6LSLlgcIU9Iwx 18 | yfH12UWqVmFSHsgZFqM/cK3wGev38h1WBIOx3/djKn7BdlKVh8kWyx6uC8bmV+E6 19 | OoK0vJD6kwKBgHAySOnROBZlqzkiKW8c+uU2VATtzJSydrWm0J4wUPJifNBa/hVW 20 | dcqmAzXC9xznt5AVa3wxHBOfyKaE+ig8CSsjNyNZ3vbmr0X04FoV1m91k2TeXNod 21 | jMTobkPThaNm4eLJMN2SQJuaHGTGERWC0l3T18t+/zrDMDCPiSLX1NAvAoGBAN1T 22 | VLJYdjvIMxf1bm59VYcepbK7HLHFkRq6xMJMZbtG0ryraZjUzYvB4q4VjHk2UDiC 23 | lhx13tXWDZH7MJtABzjyg+AI7XWSEQs2cBXACos0M4Myc6lU+eL+iA+OuoUOhmrh 24 | qmT8YYGu76/IBWUSqWuvcpHPpwl7871i4Ga/I3qnAoGBANNkKAcMoeAbJQK7a/Rn 25 | wPEJB+dPgNDIaboAsh1nZhVhN5cvdvCWuEYgOGCPQLYQF0zmTLcM+sVxOYgfy8mV 26 | fbNgPgsP5xmu6dw2COBKdtozw0HrWSRjACd1N4yGu75+wPCcX/gQarcjRcXXZeEa 27 | NtBLSfcqPULqD+h7br9lEJio 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_drive/.env: -------------------------------------------------------------------------------- 1 | GOOGLE_DRIVE_EMULATOR_HOST="http://localhost:10339" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_drive/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | ADD . /python-flask 3 | WORKDIR /python-flask 4 | RUN pip install -r requirements.txt 5 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_drive/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'google_drive' 32 | service_type: 'google_drive' 33 | 34 | sources: 35 | google_drive: connectors.sources.google_drive:GoogleDriveDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_drive/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | google_drive: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "10339:10339" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | environment: 85 | - DATA_SIZE=${DATA_SIZE} 86 | 87 | volumes: 88 | esdata: 89 | driver: local 90 | 91 | networks: 92 | esnet: 93 | driver: bridge 94 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_drive/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """Module to handle api calls received from connector.""" 7 | 8 | import io 9 | import os 10 | import random 11 | import string 12 | 13 | from flask import Flask, request 14 | 15 | DOCS_COUNT = {"small": 750, "medium": 1500, "large": 3000} 16 | 17 | DATA_SIZE = os.environ.get("DATA_SIZE") 18 | 19 | 20 | def generate_random_string(length): 21 | """Function that generates random string with fixed lenght. 22 | 23 | Args: 24 | length (int): Length of generated string 25 | 26 | Returns: 27 | str: Random string 28 | """ 29 | return "".join([random.choice(string.ascii_letters) for _ in range(length)]) 30 | 31 | 32 | def generate_document_data(): 33 | """Function to generate random data content. 34 | 35 | Returns: 36 | io.BytesIO: Dummy attachment content 37 | """ 38 | # 1KB text file 39 | file_content = generate_random_string(1000) 40 | return io.BytesIO(bytes(file_content, encoding="utf-8")) 41 | 42 | 43 | app = Flask(__name__) 44 | 45 | 46 | @app.route("/drive/v3/about", methods=["GET"]) 47 | def about_get(): 48 | return {"kind": "drive#about"} 49 | 50 | 51 | @app.route("/drive/v3/drives", methods=["GET"]) 52 | def drives_list(): 53 | return { 54 | "nextPageToken": "dummyToken", 55 | "kind": "drive#driveList", 56 | "drives": [ 57 | {"id": "id1", "name": "Drive1 [Internal]", "kind": "drive#drive"}, 58 | {"id": "id2", "name": "Drive2 [Internal]", "kind": "drive#drive"}, 59 | ], 60 | } 61 | 62 | 63 | @app.route("/drive/v3/files", methods=["GET"]) 64 | def files_list(): 65 | files_list = [ 66 | { 67 | "kind": "drive#file", 68 | "mimeType": "text/plain", 69 | "id": generate_random_string(length=16), 70 | "name": f"file_name_{id}", 71 | "fileExtension": "txt", 72 | "size": 12345, 73 | "modifiedTime": 1687860674, 74 | "parents": [], 75 | } 76 | for id in range(DOCS_COUNT.get(DATA_SIZE, "small")) 77 | ] 78 | return {"nextPageToken": "dummyToken", "files": files_list} 79 | 80 | 81 | @app.route("/drive/v3/files/", methods=["GET"]) 82 | def files_get(file_id): 83 | req_params = request.args.to_dict() 84 | 85 | # response includes the file contents in the response body 86 | if req_params.get("alt", None) == "media": 87 | return generate_document_data() 88 | # response includes file metadata 89 | else: 90 | return { 91 | "kind": "drive#file", 92 | "id": "file_0", 93 | "name": "file_name_0", 94 | "mimeType": "text/plain", 95 | } 96 | 97 | 98 | @app.route("/token", methods=["POST"]) 99 | def post_auth_token(): 100 | """Function to load""" 101 | return { 102 | "access_token": "XXXXXXStBkRnGyZ2mUYOLgls7QVBxOg82XhBCFo8UIT5gM", 103 | "token_type": "Bearer", 104 | "expires_in": 3600, 105 | "refresh_token": "XXXXXX3SEBX7F2cfrHcqJEa3KoAHYeXES6nmho", 106 | } 107 | 108 | 109 | if __name__ == "__main__": 110 | app.run(host="0.0.0.0", port=10339) 111 | -------------------------------------------------------------------------------- /tests/sources/fixtures/google_drive/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/jira/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/jira/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | 3 | ADD . /python-flask 4 | WORKDIR /python-flask 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /tests/sources/fixtures/jira/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'jira' 32 | service_type: 'jira' 33 | 34 | sources: 35 | jira: connectors.sources.jira:JiraDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/jira/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | jira: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "8080:8080" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | 85 | volumes: 86 | esdata: 87 | driver: local 88 | 89 | networks: 90 | esnet: 91 | driver: bridge 92 | -------------------------------------------------------------------------------- /tests/sources/fixtures/jira/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'mongo' 32 | service_type: 'mongodb' 33 | 34 | sources: 35 | mongodb: connectors.sources.mongo:MongoDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mongo: 28 | container_name: mongo 29 | image: mongo:latest 30 | volumes: 31 | - mongo:/data/db 32 | networks: 33 | - mongo-network 34 | ports: 35 | - 27021:27017 36 | restart: always 37 | environment: 38 | # provide your credentials here 39 | - MONGO_INITDB_ROOT_USERNAME=admin 40 | - MONGO_INITDB_ROOT_PASSWORD=justtesting 41 | kibana: 42 | image: docker.elastic.co/kibana/kibana:${VERSION} 43 | ports: 44 | - 5601:5601 45 | extra_hosts: 46 | - "host.docker.internal:host-gateway" 47 | depends_on: 48 | - "elasticsearch" 49 | profiles: 50 | - "enterprise-search" 51 | environment: 52 | ELASTICSEARCH_URL: http://host.docker.internal:9200 53 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 54 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 55 | ELASTICSEARCH_USERNAME: kibana_system 56 | ELASTICSEARCH_PASSWORD: changeme 57 | networks: 58 | - esnet 59 | 60 | enterprise_search: 61 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 62 | profiles: 63 | - "enterprise-search" 64 | depends_on: 65 | - "elasticsearch" 66 | environment: 67 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 68 | - elasticsearch.username=elastic 69 | - elasticsearch.password=changeme 70 | - elasticsearch.host=http://host.docker.internal:9200 71 | - allow_es_settings_modification=true 72 | - kibana.host=http://host.docker.internal:5601 73 | - kibana.external_url=http://localhost:5601 74 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 75 | - JAVA_OPTS=-Xms2g -Xmx2g 76 | - email.account.enabled=true 77 | - email.account.smtp.auth=plain 78 | - email.account.smtp.starttls.enable=false 79 | - email.account.smtp.host=host.docker.internal 80 | - email.account.smtp.port=1025 81 | - email.account.email_defaults.from=local@example.com 82 | - DEBUG=true 83 | ports: 84 | - 3002:3002 85 | extra_hosts: 86 | - "host.docker.internal:host-gateway" 87 | networks: 88 | - esnet 89 | 90 | networks: 91 | mongo-network: 92 | driver: bridge 93 | esnet: 94 | 95 | volumes: 96 | esdata: 97 | driver: local 98 | mongo: 99 | driver: local 100 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb/fixture.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import bson 4 | from faker import Faker 5 | from pymongo import MongoClient 6 | 7 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 8 | _SIZES = {"small": 750, "medium": 1500, "large": 3000} 9 | NUMBER_OF_RECORDS_TO_DELETE = 50 10 | 11 | fake = Faker() 12 | client = MongoClient("mongodb://admin:justtesting@127.0.0.1:27021") 13 | 14 | 15 | def setup(): 16 | pass 17 | 18 | 19 | def load(): 20 | def _random_record(): 21 | return { 22 | "id": bson.ObjectId(), 23 | "name": fake.name(), 24 | "address": fake.address(), 25 | "birthdate": fake.date(), 26 | "time": fake.time(), 27 | "comment": fake.sentence(), 28 | } 29 | 30 | record_number = _SIZES[DATA_SIZE] + NUMBER_OF_RECORDS_TO_DELETE 31 | 32 | print(f"Generating {record_number} random records") 33 | db = client.sample_database 34 | collection = db.sample_collection 35 | 36 | data = [] 37 | for _ in range(record_number): 38 | data.append(_random_record()) 39 | collection.insert_many(data) 40 | 41 | 42 | def remove(): 43 | db = client.sample_database 44 | collection = db.sample_collection 45 | 46 | records = collection.find().limit(NUMBER_OF_RECORDS_TO_DELETE) 47 | doc_ids = [rec.get("_id") for rec in records] 48 | 49 | query = {"_id": {"$in": doc_ids}} 50 | collection.delete_many(query) 51 | 52 | 53 | def teardown(): 54 | pass 55 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb/requirements.txt: -------------------------------------------------------------------------------- 1 | pymongo 2 | faker 3 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb_serverless/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'mongo_serverless' 32 | service_type: 'mongodb' 33 | 34 | sources: 35 | mongodb: connectors.sources.mongo:MongoDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb_serverless/connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mongodb", 3 | "service_type": "mongodb", 4 | "index_name": "search-mongodb", 5 | "sync_cursor": null, 6 | "is_native": false, 7 | "api_key_id": null, 8 | "status": "configured", 9 | "language": "en", 10 | "last_sync_status": null, 11 | "last_permissions_sync_status": null, 12 | "last_sync_error": null, 13 | "last_synced": null, 14 | "last_seen": null, 15 | "created_at": null, 16 | "updated_at": null, 17 | "configuration": { 18 | "host": { 19 | "label": "Server Hostname", 20 | "order": 1, 21 | "type": "str", 22 | "value": "mongodb://127.0.0.1:27021" 23 | }, 24 | "user": { 25 | "default_value": "", 26 | "label": "Username", 27 | "order": 2, 28 | "type": "str", 29 | "value": "admin", 30 | "required": false 31 | }, 32 | "password": { 33 | "default_value": "", 34 | "label": "Password", 35 | "order": 3, 36 | "sensitive": true, 37 | "type": "str", 38 | "value": "justtesting", 39 | "required": false 40 | }, 41 | "database": { 42 | "label": "Database", 43 | "order": 4, 44 | "type": "str", 45 | "value": "sample_database" 46 | }, 47 | "collection": { 48 | "label": "Collection", 49 | "order": 5, 50 | "type": "str", 51 | "value": "sample_collection" 52 | }, 53 | "direct_connection": { 54 | "display": "toggle", 55 | "label": "Direct connection", 56 | "order": 6, 57 | "type": "bool", 58 | "value": true 59 | } 60 | }, 61 | "filtering": [ 62 | { 63 | "domain": "DEFAULT", 64 | "draft": { 65 | "advanced_snippet": { 66 | "updated_at": "2023-01-31T16:41:27.341Z", 67 | "created_at": "2023-01-31T16:38:49.244Z", 68 | "value": {} 69 | }, 70 | "rules": [ 71 | { 72 | "field": "_", 73 | "updated_at": "2023-01-31T16:41:27.341Z", 74 | "created_at": "2023-01-31T16:38:49.244Z", 75 | "rule": "regex", 76 | "id": "DEFAULT", 77 | "value": ".*", 78 | "order": 1, 79 | "policy": "include" 80 | } 81 | ], 82 | "validation": { 83 | "state": "valid", 84 | "errors": [] 85 | } 86 | }, 87 | "active": { 88 | "advanced_snippet": { 89 | "updated_at": "2023-01-31T16:41:27.341Z", 90 | "created_at": "2023-01-31T16:38:49.244Z", 91 | "value": {} 92 | }, 93 | "rules": [ 94 | { 95 | "field": "_", 96 | "updated_at": "2023-01-31T16:41:27.341Z", 97 | "created_at": "2023-01-31T16:38:49.244Z", 98 | "rule": "regex", 99 | "id": "DEFAULT", 100 | "value": ".*", 101 | "order": 1, 102 | "policy": "include" 103 | } 104 | ], 105 | "validation": { 106 | "state": "valid", 107 | "errors": [] 108 | } 109 | } 110 | } 111 | ], 112 | "scheduling": { 113 | "full": { 114 | "enabled": true, 115 | "interval": "1 * * * * *" 116 | } 117 | }, 118 | "pipeline": { 119 | "extract_binary_content": true, 120 | "name": "ent-search-generic-ingestion", 121 | "reduce_whitespace": true, 122 | "run_ml_inference": true 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb_serverless/fixture.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | import bson 5 | from faker import Faker 6 | from pymongo import MongoClient 7 | 8 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 9 | _SIZES = {"small": 750, "medium": 1500, "large": 3000} 10 | NUMBER_OF_RECORDS_TO_DELETE = 50 11 | 12 | fake = Faker() 13 | client = MongoClient("mongodb://admin:justtesting@127.0.0.1:27021") 14 | OB_STORE = "/tmp/objectstore" 15 | 16 | 17 | def setup(): 18 | print(f"preparing {OB_STORE}") 19 | # creating the file storage for es 20 | if os.path.exists(OB_STORE): 21 | shutil.rmtree(OB_STORE) 22 | os.makedirs(OB_STORE, exist_ok=True) 23 | for r, _, _ in os.walk(OB_STORE): 24 | os.chmod(r, 0o777) 25 | os.chmod(OB_STORE, 0o777) 26 | print(f"{OB_STORE} ready") 27 | 28 | 29 | def load(): 30 | def _random_record(): 31 | return { 32 | "id": bson.ObjectId(), 33 | "name": fake.name(), 34 | "address": fake.address(), 35 | "birthdate": fake.date(), 36 | "time": fake.time(), 37 | "comment": fake.sentence(), 38 | } 39 | 40 | record_number = _SIZES[DATA_SIZE] + NUMBER_OF_RECORDS_TO_DELETE 41 | 42 | print(f"Generating {record_number} random records") 43 | db = client.sample_database 44 | collection = db.sample_collection 45 | 46 | data = [] 47 | for _ in range(record_number): 48 | data.append(_random_record()) 49 | collection.insert_many(data) 50 | 51 | 52 | def remove(): 53 | db = client.sample_database 54 | collection = db.sample_collection 55 | 56 | records = collection.find().limit(NUMBER_OF_RECORDS_TO_DELETE) 57 | doc_ids = [rec.get("_id") for rec in records] 58 | 59 | query = {"_id": {"$in": doc_ids}} 60 | collection.delete_many(query) 61 | 62 | 63 | def teardown(): 64 | pass 65 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mongodb_serverless/requirements.txt: -------------------------------------------------------------------------------- 1 | pymongo 2 | faker 3 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mssql/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="230M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mssql/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'mssql' 32 | service_type: 'mssql' 33 | 34 | sources: 35 | mssql: connectors.sources.mssql:MSSQLDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mssql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mssql: 28 | container_name: mssql 29 | image: mcr.microsoft.com/azure-sql-edge:latest 30 | environment: 31 | ACCEPT_EULA: Y 32 | MSSQL_SA_PASSWORD: Password_123 33 | ports: 34 | - 9090:1433 35 | 36 | kibana: 37 | image: docker.elastic.co/kibana/kibana:${VERSION} 38 | ports: 39 | - 5601:5601 40 | extra_hosts: 41 | - "host.docker.internal:host-gateway" 42 | depends_on: 43 | - "elasticsearch" 44 | profiles: 45 | - "enterprise-search" 46 | environment: 47 | ELASTICSEARCH_URL: http://host.docker.internal:9200 48 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 49 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 50 | ELASTICSEARCH_USERNAME: kibana_system 51 | ELASTICSEARCH_PASSWORD: changeme 52 | networks: 53 | - esnet 54 | 55 | enterprise_search: 56 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 57 | profiles: 58 | - "enterprise-search" 59 | depends_on: 60 | - "elasticsearch" 61 | environment: 62 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 63 | - elasticsearch.username=elastic 64 | - elasticsearch.password=changeme 65 | - elasticsearch.host=http://host.docker.internal:9200 66 | - allow_es_settings_modification=true 67 | - kibana.host=http://host.docker.internal:5601 68 | - kibana.external_url=http://localhost:5601 69 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 70 | - JAVA_OPTS=-Xms2g -Xmx2g 71 | - email.account.enabled=true 72 | - email.account.smtp.auth=plain 73 | - email.account.smtp.starttls.enable=false 74 | - email.account.smtp.host=host.docker.internal 75 | - email.account.smtp.port=1025 76 | - email.account.email_defaults.from=local@example.com 77 | - DEBUG=true 78 | ports: 79 | - 3002:3002 80 | extra_hosts: 81 | - "host.docker.internal:host-gateway" 82 | networks: 83 | - esnet 84 | 85 | networks: 86 | esnet: 87 | 88 | volumes: 89 | esdata: 90 | driver: local 91 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mssql/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | import random 8 | import string 9 | 10 | import pytds 11 | 12 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 13 | DATABASE_NAME = "xe" 14 | _SIZES = {"small": 5, "medium": 10, "large": 30} 15 | NUM_TABLES = _SIZES[DATA_SIZE] 16 | HOST = "127.0.0.1" 17 | PORT = 9090 18 | USER = "admin" 19 | PASSWORD = "Password_123" 20 | 21 | 22 | def random_text(k=1024 * 20): 23 | """Function to generate random text 24 | 25 | Args: 26 | k (int, optional): size of data in bytes. Defaults to 1024*20. 27 | 28 | Returns: 29 | string: random text 30 | """ 31 | return "".join(random.choices(string.ascii_uppercase + string.digits, k=k)) 32 | 33 | 34 | BIG_TEXT = random_text() 35 | 36 | 37 | def inject_lines(table, cursor, start, lines): 38 | """Ingest rows in table 39 | 40 | Args: 41 | table (str): Name of table 42 | cursor (cursor): Cursor to execute query 43 | start (int): Starting row 44 | lines (int): Number of rows 45 | """ 46 | rows = [] 47 | for row_id in range(lines): 48 | row_id += start 49 | rows.append((f"user_{row_id}", row_id, BIG_TEXT)) 50 | sql_query = ( 51 | f"INSERT INTO customers_{table} (name, age, description) VALUES (%s, %s, %s)" 52 | ) 53 | cursor.executemany(sql_query, rows) 54 | 55 | 56 | def load(): 57 | """N tables of 10001 rows each. each row is ~ 1024*20 bytes""" 58 | 59 | database_sa = pytds.connect( 60 | server=HOST, port=PORT, user="sa", password=PASSWORD, autocommit=True 61 | ) 62 | cursor = database_sa.cursor() 63 | cursor.execute("CREATE LOGIN admin WITH PASSWORD = 'Password_123'") 64 | cursor.execute("ALTER SERVER ROLE [sysadmin] ADD MEMBER [admin]") 65 | cursor.close() 66 | database_sa.close() 67 | database = pytds.connect(server=HOST, port=PORT, user=USER, password=PASSWORD) 68 | database.autocommit = True 69 | cursor = database.cursor() 70 | cursor.execute(f"DROP DATABASE IF EXISTS {DATABASE_NAME}") 71 | cursor.execute(f"CREATE DATABASE {DATABASE_NAME}") 72 | cursor.execute(f"USE {DATABASE_NAME}") 73 | database.autocommit = False 74 | 75 | for table in range(NUM_TABLES): 76 | print(f"Adding data from table #{table}...") 77 | sql_query = f"CREATE TABLE customers_{table} (name VARCHAR(255), age int, description TEXT, PRIMARY KEY (name))" 78 | cursor.execute(sql_query) 79 | for i in range(10): 80 | inject_lines(table, cursor, i * 1000, 1000) 81 | database.commit() 82 | 83 | 84 | def remove(): 85 | """Removes 10 random items per table""" 86 | 87 | database = pytds.connect(server=HOST, port=PORT, user=USER, password=PASSWORD) 88 | cursor = database.cursor() 89 | cursor.execute(f"USE {DATABASE_NAME}") 90 | for table in range(NUM_TABLES): 91 | rows = [(f"user_{row_id}",) for row_id in random.sample(range(1, 1000), 10)] 92 | sql_query = f"DELETE from customers_{table} where name=%s" 93 | cursor.executemany(sql_query, rows) 94 | database.commit() 95 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mssql/requirements.txt: -------------------------------------------------------------------------------- 1 | python-tds==1.12.0 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mysql/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mysql/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'mysql' 32 | service_type: 'mysql' 33 | 34 | sources: 35 | mysql: connectors.sources.mysql:MySqlDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mysql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mysql: 28 | container_name: mysql 29 | image: mysql:latest 30 | environment: 31 | MYSQL_ROOT_PASSWORD: changeme 32 | command: --default-authentication-plugin=mysql_native_password 33 | volumes: 34 | - mysql:/data/mysql 35 | networks: 36 | - mysql-network 37 | ports: 38 | - 3306:3306 39 | restart: always 40 | 41 | kibana: 42 | image: docker.elastic.co/kibana/kibana:${VERSION} 43 | ports: 44 | - 5601:5601 45 | extra_hosts: 46 | - "host.docker.internal:host-gateway" 47 | depends_on: 48 | - "elasticsearch" 49 | profiles: 50 | - "enterprise-search" 51 | environment: 52 | ELASTICSEARCH_URL: http://host.docker.internal:9200 53 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 54 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 55 | ELASTICSEARCH_USERNAME: kibana_system 56 | ELASTICSEARCH_PASSWORD: changeme 57 | networks: 58 | - esnet 59 | 60 | enterprise_search: 61 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 62 | profiles: 63 | - "enterprise-search" 64 | depends_on: 65 | - "elasticsearch" 66 | environment: 67 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 68 | - elasticsearch.username=elastic 69 | - elasticsearch.password=changeme 70 | - elasticsearch.host=http://host.docker.internal:9200 71 | - allow_es_settings_modification=true 72 | - kibana.host=http://host.docker.internal:5601 73 | - kibana.external_url=http://localhost:5601 74 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 75 | - JAVA_OPTS=-Xms2g -Xmx2g 76 | - email.account.enabled=true 77 | - email.account.smtp.auth=plain 78 | - email.account.smtp.starttls.enable=false 79 | - email.account.smtp.host=host.docker.internal 80 | - email.account.smtp.port=1025 81 | - email.account.email_defaults.from=local@example.com 82 | - DEBUG=true 83 | ports: 84 | - 3002:3002 85 | extra_hosts: 86 | - "host.docker.internal:host-gateway" 87 | networks: 88 | - esnet 89 | 90 | networks: 91 | mysql-network: 92 | driver: bridge 93 | esnet: 94 | 95 | volumes: 96 | esdata: 97 | driver: local 98 | mysql: 99 | driver: local 100 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mysql/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | import random 8 | import string 9 | 10 | from mysql.connector import connect 11 | 12 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 13 | DATABASE_NAME = "customerinfo" 14 | _SIZES = {"small": 5, "medium": 10, "large": 30} 15 | NUM_TABLES = _SIZES[DATA_SIZE] 16 | 17 | 18 | def random_text(k=1024 * 20): 19 | return "".join(random.choices(string.ascii_uppercase + string.digits, k=k)) 20 | 21 | 22 | BIG_TEXT = random_text() 23 | 24 | 25 | def inject_lines(table, cursor, start, lines): 26 | raws = [] 27 | for raw_id in range(lines): 28 | raw_id += start 29 | raws.append((f"user_{raw_id}", raw_id, BIG_TEXT)) 30 | sql_query = ( 31 | f"INSERT INTO customers_{table}" 32 | + "(name, age, description) VALUES (%s, %s, %s)" 33 | ) 34 | cursor.executemany(sql_query, raws) 35 | 36 | 37 | def load(): 38 | """N tables of 10001 rows each. each row is ~ 1024*20 bytes""" 39 | database = connect(host="127.0.0.1", port=3306, user="root", password="changeme") 40 | cursor = database.cursor() 41 | cursor.execute(f"DROP DATABASE IF EXISTS {DATABASE_NAME}") 42 | cursor.execute(f"CREATE DATABASE {DATABASE_NAME}") 43 | cursor.execute(f"USE {DATABASE_NAME}") 44 | for table in range(NUM_TABLES): 45 | print(f"Adding data from table #{table}...") 46 | sql_query = f"CREATE TABLE IF NOT EXISTS customers_{table} (name VARCHAR(255), age int, description LONGTEXT, PRIMARY KEY (name))" 47 | cursor.execute(sql_query) 48 | for i in range(10): 49 | inject_lines(table, cursor, i * 1000, 1000) 50 | 51 | database.commit() 52 | 53 | 54 | def remove(): 55 | """Removes 10 random items per table""" 56 | database = connect(host="127.0.0.1", port=3306, user="root", password="changeme") 57 | cursor = database.cursor() 58 | cursor.execute(f"USE {DATABASE_NAME}") 59 | for table in range(NUM_TABLES): 60 | print(f"Working on table {table}...") 61 | rows = [(f"user_{row_id}",) for row_id in random.sample(range(1, 1000), 10)] 62 | print(rows) 63 | sql_query = f"DELETE from customers_{table} where name=%s" 64 | cursor.executemany(sql_query, rows) 65 | database.commit() 66 | -------------------------------------------------------------------------------- /tests/sources/fixtures/mysql/requirements.txt: -------------------------------------------------------------------------------- 1 | mysql-connector-python 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/network_drive/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'network_drive' 32 | service_type: 'network_drive' 33 | 34 | sources: 35 | network_drive: connectors.sources.network_drive:NASDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/network_drive/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | samba: 28 | image: dperson/samba 29 | networks: 30 | - default 31 | ports: 32 | - "445:445/tcp" 33 | restart: unless-stopped 34 | command: '-s "Folder1;/mnt;yes;no;yes;admin" -u "admin;abc@123" -p' 35 | 36 | 37 | kibana: 38 | image: docker.elastic.co/kibana/kibana:${VERSION} 39 | ports: 40 | - 5601:5601 41 | extra_hosts: 42 | - "host.docker.internal:host-gateway" 43 | depends_on: 44 | - "elasticsearch" 45 | profiles: 46 | - "enterprise-search" 47 | environment: 48 | ELASTICSEARCH_URL: http://host.docker.internal:9200 49 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 50 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 51 | ELASTICSEARCH_USERNAME: kibana_system 52 | ELASTICSEARCH_PASSWORD: changeme 53 | networks: 54 | - esnet 55 | 56 | enterprise_search: 57 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 58 | profiles: 59 | - "enterprise-search" 60 | depends_on: 61 | - "elasticsearch" 62 | environment: 63 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 64 | - elasticsearch.username=elastic 65 | - elasticsearch.password=changeme 66 | - elasticsearch.host=http://host.docker.internal:9200 67 | - allow_es_settings_modification=true 68 | - kibana.host=http://host.docker.internal:5601 69 | - kibana.external_url=http://localhost:5601 70 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 71 | - JAVA_OPTS=-Xms2g -Xmx2g 72 | - email.account.enabled=true 73 | - email.account.smtp.auth=plain 74 | - email.account.smtp.starttls.enable=false 75 | - email.account.smtp.host=host.docker.internal 76 | - email.account.smtp.port=1025 77 | - email.account.email_defaults.from=local@example.com 78 | - DEBUG=true 79 | ports: 80 | - 3002:3002 81 | extra_hosts: 82 | - "host.docker.internal:host-gateway" 83 | networks: 84 | - esnet 85 | 86 | networks: 87 | esnet: 88 | default: 89 | 90 | volumes: 91 | esdata: 92 | driver: local 93 | 94 | -------------------------------------------------------------------------------- /tests/sources/fixtures/network_drive/requirements.txt: -------------------------------------------------------------------------------- 1 | smbprotocol 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/oracle/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/oracle/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'oracle' 32 | service_type: 'oracle' 33 | 34 | sources: 35 | oracle: connectors.sources.oracle:OracleDataSource 36 | 37 | -------------------------------------------------------------------------------- /tests/sources/fixtures/oracle/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | oracle: 77 | image: gvenzl/oracle-xe:latest 78 | ports: 79 | - 9090:1521 80 | environment: 81 | - ORACLE_PASSWORD=Password_123 82 | restart: always 83 | 84 | networks: 85 | esnet: 86 | driver: bridge 87 | 88 | volumes: 89 | esdata: 90 | driver: local -------------------------------------------------------------------------------- /tests/sources/fixtures/oracle/requirements.txt: -------------------------------------------------------------------------------- 1 | oracledb==1.2.2 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/postgresql/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="410M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/postgresql/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'postgres' 32 | service_type: 'postgresql' 33 | 34 | sources: 35 | postgresql: connectors.sources.postgresql:PostgreSQLDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/postgresql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | postgresql: 28 | container_name: postgresql 29 | image: postgres:14.1-alpine 30 | environment: 31 | POSTGRES_USER: admin 32 | POSTGRES_PASSWORD: Password_123 33 | POSTGRES_DB: xe 34 | PGDATA: /var/lib/postgresql/data/pgdata 35 | ports: 36 | - 9090:5432 37 | command: ["-c", "track_commit_timestamp=on"] 38 | restart: always 39 | 40 | kibana: 41 | image: docker.elastic.co/kibana/kibana:${VERSION} 42 | ports: 43 | - 5601:5601 44 | extra_hosts: 45 | - "host.docker.internal:host-gateway" 46 | depends_on: 47 | - "elasticsearch" 48 | profiles: 49 | - "enterprise-search" 50 | environment: 51 | ELASTICSEARCH_URL: http://host.docker.internal:9200 52 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 53 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 54 | ELASTICSEARCH_USERNAME: kibana_system 55 | ELASTICSEARCH_PASSWORD: changeme 56 | networks: 57 | - esnet 58 | 59 | enterprise_search: 60 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 61 | profiles: 62 | - "enterprise-search" 63 | depends_on: 64 | - "elasticsearch" 65 | environment: 66 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 67 | - elasticsearch.username=elastic 68 | - elasticsearch.password=changeme 69 | - elasticsearch.host=http://host.docker.internal:9200 70 | - allow_es_settings_modification=true 71 | - kibana.host=http://host.docker.internal:5601 72 | - kibana.external_url=http://localhost:5601 73 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 74 | - JAVA_OPTS=-Xms2g -Xmx2g 75 | - email.account.enabled=true 76 | - email.account.smtp.auth=plain 77 | - email.account.smtp.starttls.enable=false 78 | - email.account.smtp.host=host.docker.internal 79 | - email.account.smtp.port=1025 80 | - email.account.email_defaults.from=local@example.com 81 | - DEBUG=true 82 | ports: 83 | - 3002:3002 84 | extra_hosts: 85 | - "host.docker.internal:host-gateway" 86 | networks: 87 | - esnet 88 | 89 | networks: 90 | esnet: 91 | 92 | volumes: 93 | esdata: 94 | driver: local 95 | -------------------------------------------------------------------------------- /tests/sources/fixtures/postgresql/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | import os 8 | import random 9 | import string 10 | 11 | import asyncpg 12 | 13 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 14 | CONNECTION_STRING = "postgresql://admin:Password_123@127.0.0.1:9090/xe" 15 | _SIZES = {"small": 5, "medium": 10, "large": 30} 16 | NUM_TABLES = _SIZES[DATA_SIZE] 17 | 18 | 19 | def random_text(k=1024 * 20): 20 | """Function to generate random text 21 | 22 | Args: 23 | k (int, optional): size of data in bytes. Defaults to 1024*20. 24 | 25 | Returns: 26 | string: random text 27 | """ 28 | return "".join(random.choices(string.ascii_uppercase + string.digits, k=k)) 29 | 30 | 31 | BIG_TEXT = random_text() 32 | 33 | 34 | def load(): 35 | """Generate tables and loads table data in the microsoft server.""" 36 | 37 | async def inject_lines(table, connect, start, lines): 38 | """Ingest rows in table 39 | 40 | Args: 41 | table (str): Name of table 42 | connect (connection): Connection to execute query 43 | start (int): Starting row 44 | lines (int): Number of rows 45 | """ 46 | rows = [] 47 | for row_id in range(lines): 48 | row_id += start 49 | rows.append((f"user_{row_id}", row_id, BIG_TEXT)) 50 | sql_query = ( 51 | f"INSERT INTO customers_{table}" 52 | + "(name, age, description) VALUES ($1, $2, $3)" 53 | ) 54 | await connect.executemany(sql_query, rows) 55 | 56 | async def load_rows(): 57 | """N tables of 10001 rows each. each row is ~ 1024*20 bytes""" 58 | connect = await asyncpg.connect(CONNECTION_STRING) 59 | for table in range(NUM_TABLES): 60 | print(f"Adding data from table #{table}...") 61 | sql_query = f"CREATE TABLE IF NOT EXISTS customers_{table} (name VARCHAR(255), age int, description TEXT, PRIMARY KEY (name))" 62 | await connect.execute(sql_query) 63 | for i in range(10): 64 | await inject_lines(table, connect, i * 1000, 1000) 65 | await connect.close() 66 | 67 | asyncio.get_event_loop().run_until_complete(load_rows()) 68 | 69 | 70 | def remove(): 71 | """Remove documents from tables""" 72 | 73 | async def remove_rows(): 74 | """Removes 10 random items per table""" 75 | connect = await asyncpg.connect(CONNECTION_STRING) 76 | for table in range(NUM_TABLES): 77 | rows = [(f"user_{row_id}",) for row_id in random.sample(range(1, 1000), 10)] 78 | sql_query = f"DELETE from customers_{table} where name=$1" 79 | await connect.executemany(sql_query, rows) 80 | await connect.close() 81 | 82 | asyncio.get_event_loop().run_until_complete(remove_rows()) 83 | -------------------------------------------------------------------------------- /tests/sources/fixtures/postgresql/requirements.txt: -------------------------------------------------------------------------------- 1 | asyncpg 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/s3/.env: -------------------------------------------------------------------------------- 1 | AWS_ENDPOINT_URL="http://127.0.0.1" 2 | AWS_PORT=5001 3 | -------------------------------------------------------------------------------- /tests/sources/fixtures/s3/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 's3' 32 | service_type: 's3' 33 | 34 | sources: 35 | s3: connectors.sources.s3:S3DataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/s3/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | s3: 77 | image: motoserver/moto 78 | ports: 79 | - 5001:5000 80 | 81 | networks: 82 | esnet: 83 | driver: bridge 84 | 85 | volumes: 86 | esdata: 87 | driver: local -------------------------------------------------------------------------------- /tests/sources/fixtures/s3/fixture.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import string 4 | 5 | BUCKET_NAME = "ent-search-ingest-dev" 6 | REGION_NAME = "us-west-2" 7 | AWS_ENDPOINT_URL = "http://127.0.0.1" 8 | AWS_PORT = int(os.environ.get("AWS_PORT", "5001")) 9 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 10 | AWS_SECRET_KEY = "dummy_secret_key" 11 | AWS_ACCESS_KEY_ID = "dummy_access_key" 12 | 13 | if DATA_SIZE == "small": 14 | FOLDER_COUNT = 400 15 | SMALL_TEXT_COUNT = 500 16 | BIG_TEXT_COUNT = 100 17 | OBJECT_COUNT = 5 18 | elif DATA_SIZE == "medium": 19 | FOLDER_COUNT = 2000 20 | SMALL_TEXT_COUNT = 2500 21 | BIG_TEXT_COUNT = 500 22 | OBJECT_COUNT = 10 23 | else: 24 | FOLDER_COUNT = 4000 25 | SMALL_TEXT_COUNT = 5000 26 | BIG_TEXT_COUNT = 1000 27 | OBJECT_COUNT = 15 28 | 29 | 30 | def random_text(k=0): 31 | return "".join(random.choices(string.ascii_uppercase + string.digits, k=k)) 32 | 33 | 34 | BIG_TEXT = random_text(k=1024 * 20) 35 | 36 | 37 | def setup(): 38 | os.environ["AWS_ENDPOINT_URL"] = AWS_ENDPOINT_URL 39 | os.environ["AWS_PORT"] = str(AWS_PORT) 40 | 41 | 42 | def load(): 43 | """Method for generating 10k document for aws s3 emulator""" 44 | import boto3 45 | 46 | try: 47 | s3_client = boto3.client( 48 | "s3", 49 | endpoint_url=f"{AWS_ENDPOINT_URL}:{AWS_PORT}", 50 | region_name=REGION_NAME, 51 | aws_access_key_id=AWS_ACCESS_KEY_ID, 52 | aws_secret_access_key=AWS_SECRET_KEY, 53 | ) 54 | s3_client.create_bucket( 55 | Bucket=BUCKET_NAME, 56 | CreateBucketConfiguration={ 57 | "LocationConstraint": REGION_NAME, 58 | }, 59 | ) 60 | print("Creating objects on the aws-moto server") 61 | # add folders to the bucket 62 | for object_id in range(0, FOLDER_COUNT): 63 | s3_client.put_object( 64 | Key=f"{BUCKET_NAME}/{object_id}/", 65 | Bucket=BUCKET_NAME, 66 | StorageClass="STANDARD", 67 | ) 68 | # add small text files to the bucket 69 | for object_id in range(0, SMALL_TEXT_COUNT): 70 | s3_client.put_object( 71 | Key=f"{BUCKET_NAME}/small_file_{object_id}.txt", 72 | Bucket=BUCKET_NAME, 73 | Body=f"Testing object{object_id} document for bucket: {BUCKET_NAME}", 74 | StorageClass="STANDARD", 75 | ) 76 | # add big text files to the bucket 77 | for object_id in range(0, BIG_TEXT_COUNT): 78 | s3_client.put_object( 79 | Key=f"{BUCKET_NAME}/big_file_{object_id}.txt", 80 | Bucket=BUCKET_NAME, 81 | Body=BIG_TEXT, 82 | StorageClass="STANDARD", 83 | ) 84 | except Exception: 85 | raise 86 | 87 | 88 | def remove(): 89 | """Method for removing 15 random document from aws s3 emulator""" 90 | import boto3 91 | 92 | try: 93 | s3_client = boto3.client( 94 | "s3", 95 | endpoint_url=f"{AWS_ENDPOINT_URL}:{AWS_PORT}", 96 | region_name=REGION_NAME, 97 | aws_access_key_id=AWS_ACCESS_KEY_ID, 98 | aws_secret_access_key=AWS_SECRET_KEY, 99 | ) 100 | print("Removing data from aws-moto server.") 101 | for object_id in range(0, OBJECT_COUNT): 102 | s3_client.delete_object( 103 | Bucket=BUCKET_NAME, Key=f"{BUCKET_NAME}/{object_id}/" 104 | ) 105 | except Exception: 106 | raise 107 | -------------------------------------------------------------------------------- /tests/sources/fixtures/s3/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/servicenow/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/servicenow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | 3 | ADD . /python-flask 4 | WORKDIR /python-flask 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /tests/sources/fixtures/servicenow/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 30 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'servicenow' 32 | service_type: 'servicenow' 33 | 34 | sources: 35 | servicenow: connectors.sources.servicenow:ServiceNowDataSource 36 | -------------------------------------------------------------------------------- /tests/sources/fixtures/servicenow/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms512m -Xmx512m 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | servicenow: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "9318:9318" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | 85 | volumes: 86 | esdata: 87 | driver: local 88 | 89 | networks: 90 | esnet: 91 | driver: bridge 92 | -------------------------------------------------------------------------------- /tests/sources/fixtures/servicenow/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_online/.env: -------------------------------------------------------------------------------- 1 | OVERRIDE_URL="http://localhost:10337" 2 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_online/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | ADD . /python-flask 3 | WORKDIR /python-flask 4 | RUN pip install -r requirements.txt 5 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_online/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'sharepoint_online' 32 | service_type: 'sharepoint_online' 33 | 34 | sources: 35 | sharepoint_online: connectors.sources.sharepoint_online:SharepointOnlineDataSource 36 | 37 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_online/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | sharepoint_online: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - 10337:10337 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | environment: 85 | - DATA_SIZE=${DATA_SIZE} 86 | 87 | volumes: 88 | esdata: 89 | driver: local 90 | 91 | networks: 92 | esnet: 93 | driver: bridge 94 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_online/nginx/conf/sharepoint.com: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | listen [::]:80; 4 | 5 | server_name example.org www.example.org; 6 | server_tokens off; 7 | 8 | location /.well-known/acme-challenge/ { 9 | root /var/www/certbot; 10 | } 11 | 12 | location / { 13 | return 301 https://www.sharepoint.com$request_uri; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_online/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 2 | flask_limiter==3.3.1 3 | faker==18.11.2 4 | yattag==1.15.1 5 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.8 2 | 3 | ADD . /python-flask 4 | WORKDIR /python-flask 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_server/config.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://localhost:9200 3 | username: elastic 4 | password: changeme 5 | ssl: true 6 | bulk: 7 | queue_max_size: 1024 8 | queue_max_mem_size: 25 9 | display_every: 100 10 | chunk_size: 1000 11 | max_concurrency: 5 12 | chunk_max_mem_size: 5 13 | concurrent_downloads: 10 14 | request_timeout: 120 15 | max_wait_duration: 120 16 | initial_backoff_duration: 1 17 | backoff_multiplier: 2 18 | log_level: info 19 | 20 | service: 21 | idling: 5 22 | heartbeat: 300 23 | max_errors: 20 24 | max_errors_span: 600 25 | max_concurrent_syncs: 1 26 | job_cleanup_interval: 300 27 | log_level: INFO 28 | 29 | connectors: 30 | - 31 | connector_id: 'sharepoint_server' 32 | service_type: 'sharepoint_server' 33 | 34 | sources: 35 | sharepoint_server: connectors.sources.sharepoint_server:SharepointServerDataSource 36 | 37 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_server/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:${VERSION} 29 | ports: 30 | - 5601:5601 31 | extra_hosts: 32 | - "host.docker.internal:host-gateway" 33 | depends_on: 34 | - "elasticsearch" 35 | profiles: 36 | - "enterprise-search" 37 | environment: 38 | ELASTICSEARCH_URL: http://host.docker.internal:9200 39 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 40 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: changeme 43 | networks: 44 | - esnet 45 | 46 | enterprise_search: 47 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 48 | profiles: 49 | - "enterprise-search" 50 | depends_on: 51 | - "elasticsearch" 52 | environment: 53 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 54 | - elasticsearch.username=elastic 55 | - elasticsearch.password=changeme 56 | - elasticsearch.host=http://host.docker.internal:9200 57 | - allow_es_settings_modification=true 58 | - kibana.host=http://host.docker.internal:5601 59 | - kibana.external_url=http://localhost:5601 60 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 61 | - JAVA_OPTS=-Xms2g -Xmx2g 62 | - email.account.enabled=true 63 | - email.account.smtp.auth=plain 64 | - email.account.smtp.starttls.enable=false 65 | - email.account.smtp.host=host.docker.internal 66 | - email.account.smtp.port=1025 67 | - email.account.email_defaults.from=local@example.com 68 | - DEBUG=true 69 | ports: 70 | - 3002:3002 71 | extra_hosts: 72 | - "host.docker.internal:host-gateway" 73 | networks: 74 | - esnet 75 | 76 | sharepoint: 77 | build: . 78 | command: python fixture.py 79 | ports: 80 | - "8491:8491" 81 | volumes: 82 | - .:/python-flask 83 | restart: always 84 | environment: 85 | - DATA_SIZE=${DATA_SIZE} 86 | 87 | volumes: 88 | esdata: 89 | driver: local 90 | 91 | networks: 92 | esnet: 93 | driver: bridge 94 | -------------------------------------------------------------------------------- /tests/sources/fixtures/sharepoint_server/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 2 | flask_limiter==3.3.1 3 | -------------------------------------------------------------------------------- /tests/sources/support.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.source import DEFAULT_CONFIGURATION, DataSourceConfiguration 7 | 8 | 9 | def create_source(klass, **extras): 10 | config = klass.get_default_configuration() 11 | for k, v in extras.items(): 12 | if k in config: 13 | config[k].update({"value": v}) 14 | else: 15 | config[k] = DEFAULT_CONFIGURATION.copy() | {"value": v} 16 | 17 | return klass(configuration=DataSourceConfiguration(config)) 18 | 19 | 20 | async def assert_basics(klass, field, value): 21 | config = DataSourceConfiguration(klass.get_default_configuration()) 22 | assert config[field] == value 23 | source = create_source(klass) 24 | await source.ping() 25 | await source.changed() 26 | -------------------------------------------------------------------------------- /tests/sources/test_atlassian.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from unittest.mock import ANY 7 | 8 | import pytest 9 | 10 | from connectors.filtering.validation import ( 11 | AdvancedRulesValidator, 12 | SyncRuleValidationResult, 13 | ) 14 | from connectors.sources.atlassian import AtlassianAdvancedRulesValidator 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "advanced_rules, expected_validation_result", 19 | [ 20 | ( 21 | # valid: empty array should be valid 22 | [], 23 | SyncRuleValidationResult.valid_result( 24 | SyncRuleValidationResult.ADVANCED_RULES 25 | ), 26 | ), 27 | ( 28 | # valid: empty object should also be valid -> default value in Kibana 29 | {}, 30 | SyncRuleValidationResult.valid_result( 31 | SyncRuleValidationResult.ADVANCED_RULES 32 | ), 33 | ), 34 | ( 35 | # valid: one custom query 36 | [{"query": "type=A"}], 37 | SyncRuleValidationResult.valid_result( 38 | SyncRuleValidationResult.ADVANCED_RULES 39 | ), 40 | ), 41 | ( 42 | # valid: two custom queries 43 | [{"query": "type=A"}, {"query": "type=B"}], 44 | SyncRuleValidationResult.valid_result( 45 | SyncRuleValidationResult.ADVANCED_RULES 46 | ), 47 | ), 48 | ( 49 | # invalid: query empty 50 | [{"query": "type=A"}, {"query": ""}], 51 | SyncRuleValidationResult( 52 | SyncRuleValidationResult.ADVANCED_RULES, 53 | is_valid=False, 54 | validation_message=ANY, 55 | ), 56 | ), 57 | ( 58 | # invalid: unallowed key 59 | [{"query": "type=A"}, {"queries": "type=B"}], 60 | SyncRuleValidationResult( 61 | SyncRuleValidationResult.ADVANCED_RULES, 62 | is_valid=False, 63 | validation_message=ANY, 64 | ), 65 | ), 66 | ( 67 | # invalid: list of strings -> wrong type 68 | {"query": ["type=A"]}, 69 | SyncRuleValidationResult( 70 | SyncRuleValidationResult.ADVANCED_RULES, 71 | is_valid=False, 72 | validation_message=ANY, 73 | ), 74 | ), 75 | ( 76 | # invalid: array of arrays -> wrong type 77 | {"query": ["type=A", ""]}, 78 | SyncRuleValidationResult( 79 | SyncRuleValidationResult.ADVANCED_RULES, 80 | is_valid=False, 81 | validation_message=ANY, 82 | ), 83 | ), 84 | ], 85 | ) 86 | @pytest.mark.asyncio 87 | async def test_advanced_rules_validation(advanced_rules, expected_validation_result): 88 | validation_result = await AtlassianAdvancedRulesValidator( 89 | AdvancedRulesValidator 90 | ).validate(advanced_rules) 91 | 92 | assert validation_result == expected_validation_result 93 | -------------------------------------------------------------------------------- /tests/sources/test_directory.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import pytest 7 | 8 | from connectors.sources.directory import DEFAULT_DIR, DirectoryDataSource 9 | from tests.sources.support import assert_basics, create_source 10 | 11 | 12 | @pytest.mark.asyncio 13 | async def test_basics(): 14 | await assert_basics(DirectoryDataSource, "directory", DEFAULT_DIR) 15 | 16 | 17 | @pytest.mark.asyncio 18 | async def test_get_docs(catch_stdout): 19 | source = create_source(DirectoryDataSource) 20 | num = 0 21 | async for (doc, dl) in source.get_docs(): 22 | num += 1 23 | if doc["path"].endswith("__init__.py"): 24 | continue 25 | data = await dl(doit=True, timestamp="xx") 26 | if data is not None: 27 | assert len(data["_attachment"]) > 0 28 | if num > 100: 29 | break 30 | 31 | assert num > 3 32 | -------------------------------------------------------------------------------- /tests/sources/test_mssql.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """Tests the microsoft sql database source class methods""" 7 | from unittest.mock import patch 8 | 9 | import pytest 10 | from freezegun import freeze_time 11 | 12 | from connectors.sources.mssql import MSSQLDataSource, MSSQLQueries 13 | from tests.sources.support import create_source 14 | from tests.sources.test_generic_database import ConnectionSync 15 | 16 | MSSQL_CONNECTION_STRING = "mssql+pytds://admin:Password_123@127.0.0.1:9090/xe" 17 | 18 | 19 | class MockEngine: 20 | """This Class create mock engine for mssql dialect""" 21 | 22 | def connect(self): 23 | """Make a connection 24 | 25 | Returns: 26 | connection: Instance of ConnectionSync 27 | """ 28 | return ConnectionSync(MSSQLQueries()) 29 | 30 | 31 | @freeze_time("2023-01-24T04:07:19") 32 | @patch("connectors.sources.mssql.create_engine") 33 | @patch("connectors.sources.mssql.URL.create") 34 | @pytest.mark.asyncio 35 | async def test_create_engine(mock_create_url, mock_create_engine): 36 | # Setup 37 | source = create_source(MSSQLDataSource) 38 | mock_create_engine.return_value = "Mock engine" 39 | mock_create_url.return_value = MSSQL_CONNECTION_STRING 40 | 41 | # Execute 42 | source._create_engine() 43 | 44 | # Assert 45 | mock_create_engine.assert_called_with(MSSQL_CONNECTION_STRING, connect_args={}) 46 | 47 | # Setup 48 | source.ssl_enabled = True 49 | source.ssl_ca = "-----BEGIN CERTIFICATE----- Certificate -----END CERTIFICATE-----" 50 | 51 | # Execute 52 | source._create_engine() 53 | 54 | # Assert 55 | mock_create_engine.assert_called_with( 56 | MSSQL_CONNECTION_STRING, 57 | connect_args={ 58 | "cafile": source.certfile, 59 | "validate_host": False, 60 | }, 61 | ) 62 | 63 | # Cleanup 64 | await source.close() 65 | 66 | 67 | @pytest.mark.asyncio 68 | async def test_get_docs_mssql(): 69 | # Setup 70 | source = create_source(MSSQLDataSource) 71 | source.engine = MockEngine() 72 | actual_response = [] 73 | expected_response = [ 74 | { 75 | "dbo_emp_table_ids": 1, 76 | "dbo_emp_table_names": "abcd", 77 | "_id": "xe_dbo_emp_table_1_", 78 | "_timestamp": "2023-02-21T08:37:15+00:00", 79 | "Database": "xe", 80 | "Table": "emp_table", 81 | "schema": "dbo", 82 | }, 83 | { 84 | "dbo_emp_table_ids": 2, 85 | "dbo_emp_table_names": "xyz", 86 | "_id": "xe_dbo_emp_table_2_", 87 | "_timestamp": "2023-02-21T08:37:15+00:00", 88 | "Database": "xe", 89 | "Table": "emp_table", 90 | "schema": "dbo", 91 | }, 92 | ] 93 | 94 | # Execute 95 | async for doc in source.get_docs(): 96 | actual_response.append(doc[0]) 97 | 98 | # Assert 99 | assert actual_response == expected_response 100 | 101 | 102 | @pytest.mark.asyncio 103 | async def test_close(): 104 | source = create_source(MSSQLDataSource) 105 | source.create_pem_file() 106 | await source.close() 107 | -------------------------------------------------------------------------------- /tests/sources/test_oracle.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """Tests the Oracle Database source class methods""" 7 | from unittest.mock import patch 8 | 9 | import pytest 10 | from sqlalchemy import create_engine 11 | from sqlalchemy.engine import Engine 12 | 13 | from connectors.sources.oracle import OracleDataSource, OracleQueries 14 | from tests.sources.support import create_source 15 | from tests.sources.test_generic_database import ConnectionSync 16 | 17 | DSN = "oracle+oracledb://admin:Password_123@(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=127.0.0.1)(PORT=9090))(CONNECT_DATA=(SID=xe)))" 18 | 19 | 20 | @pytest.mark.asyncio 21 | @patch("connectors.sources.oracle.create_engine") 22 | async def test_create_engine_in_thick_mode(mock_fun): 23 | """Test create_engine method of OracleDataSource class in thick mode""" 24 | # Setup 25 | source = create_source(OracleDataSource) 26 | config_file_path = {"lib_dir": "/home/devuser/lib", "config_dir": ""} 27 | source.oracle_home = "/home/devuser" 28 | mock_fun.return_value = "Mock Response" 29 | 30 | # Execute 31 | source._create_engine() 32 | 33 | # Assert 34 | mock_fun.assert_called_with(DSN, thick_mode=config_file_path) 35 | 36 | 37 | @pytest.mark.asyncio 38 | @patch("connectors.sources.oracle.create_engine") 39 | async def test_create_engine_in_thin_mode(mock_fun): 40 | """Test create_engine method of OracleDataSource class in thin mode""" 41 | # Setup 42 | source = create_source(OracleDataSource) 43 | 44 | # Execute 45 | source._create_engine() 46 | 47 | # Assert 48 | mock_fun.assert_called_with(DSN) 49 | 50 | 51 | @pytest.mark.asyncio 52 | async def test_get_docs_oracle(): 53 | # Setup 54 | source = create_source(OracleDataSource) 55 | 56 | with patch.object(Engine, "connect", return_value=ConnectionSync(OracleQueries())): 57 | source.engine = create_engine(DSN) 58 | actual_response = [] 59 | expected_response = [ 60 | { 61 | "emp_table_ids": 1, 62 | "emp_table_names": "abcd", 63 | "_id": "xe_emp_table_1_", 64 | "_timestamp": "2023-02-21T08:37:15+00:00", 65 | "Database": "xe", 66 | "Table": "emp_table", 67 | }, 68 | { 69 | "emp_table_ids": 2, 70 | "emp_table_names": "xyz", 71 | "_id": "xe_emp_table_2_", 72 | "_timestamp": "2023-02-21T08:37:15+00:00", 73 | "Database": "xe", 74 | "Table": "emp_table", 75 | }, 76 | ] 77 | 78 | # Execute 79 | async for doc in source.get_docs(): 80 | actual_response.append(doc[0]) 81 | 82 | # Assert 83 | assert actual_response == expected_response 84 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | import logging 8 | import os 9 | import signal 10 | from io import StringIO 11 | from unittest import mock 12 | from unittest.mock import patch 13 | 14 | import pytest 15 | 16 | from connectors import __version__ 17 | from connectors.cli import main, run 18 | 19 | HERE = os.path.dirname(__file__) 20 | FIXTURES_DIR = os.path.abspath(os.path.join(HERE, "fixtures")) 21 | CONFIG = os.path.join(FIXTURES_DIR, "config.yml") 22 | 23 | 24 | def test_main(catch_stdout): 25 | assert main(["--version"]) == 0 26 | catch_stdout.seek(0) 27 | assert catch_stdout.read().strip() == __version__ 28 | 29 | 30 | def test_main_and_kill(mock_responses): 31 | headers = {"X-Elastic-Product": "Elasticsearch"} 32 | host = "http://localhost:9200" 33 | 34 | mock_responses.get(host, headers=headers) 35 | mock_responses.head(f"{host}/.elastic-connectors", headers=headers) 36 | mock_responses.head(f"{host}/.elastic-connectors-sync-jobs", headers=headers) 37 | mock_responses.get( 38 | f"{host}/_ingest/pipeline/ent-search-generic-ingestion", headers=headers 39 | ) 40 | 41 | async def kill(): 42 | await asyncio.sleep(0.2) 43 | os.kill(os.getpid(), signal.SIGTERM) 44 | 45 | loop = asyncio.new_event_loop() 46 | asyncio.set_event_loop(loop) 47 | loop.create_task(kill()) 48 | 49 | main([]) 50 | 51 | 52 | def test_run(mock_responses, set_env): 53 | args = mock.MagicMock() 54 | args.log_level = "DEBUG" 55 | args.config_file = CONFIG 56 | args.action = ["list"] 57 | with patch("sys.stdout", new=StringIO()) as patched_stdout: 58 | assert run(args) == 0 59 | 60 | output = patched_stdout.getvalue().strip() 61 | 62 | assert "Registered connectors:" in output 63 | assert "- Fakey" in output 64 | assert "- Phatey" in output 65 | assert "Bye" in output 66 | 67 | 68 | def test_config_action(mock_responses, set_env): 69 | args = mock.MagicMock() 70 | args.log_level = "DEBUG" 71 | args.config_file = CONFIG 72 | args.action = ["config"] 73 | args.service_type = "fake" 74 | with patch("sys.stdout", new=StringIO()) as patched_stdout: 75 | result = run(args) 76 | output = patched_stdout.getvalue().strip() 77 | assert result == 0 78 | assert "Could not find a connector for service type" not in output 79 | assert "Getting default configuration for service type fake" in output 80 | 81 | 82 | def test_run_snowflake(mock_responses, set_env): 83 | args = mock.MagicMock() 84 | args.log_level = "DEBUG" 85 | args.config_file = CONFIG 86 | args.action = ["list", "poll"] 87 | with patch("sys.stdout", new=StringIO()) as patched_stdout: 88 | assert run(args) == -1 89 | output = patched_stdout.getvalue().strip() 90 | assert "Cannot use the `list` action with other actions" in output 91 | 92 | 93 | @patch("connectors.cli.set_logger") 94 | @patch("connectors.cli.load_config", side_effect=Exception("something went wrong")) 95 | def test_main_with_invalid_configuration(load_config, set_logger): 96 | args = mock.MagicMock() 97 | args.log_level = logging.DEBUG # should be ignored! 98 | args.filebeat = True 99 | 100 | with pytest.raises(Exception): 101 | run(args) 102 | 103 | set_logger.assert_called_with(logging.INFO, filebeat=True) 104 | -------------------------------------------------------------------------------- /tests/test_commons.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import pytest 7 | 8 | from tests.commons import AsyncIterator 9 | 10 | 11 | @pytest.mark.asyncio 12 | async def test_async_generation(): 13 | items = [1, 2, 3] 14 | 15 | async_generator = AsyncIterator(items) 16 | 17 | yielded_items = [] 18 | async for item in async_generator: 19 | yielded_items.append(item) 20 | 21 | assert yielded_items == items 22 | 23 | 24 | @pytest.mark.asyncio 25 | async def test_call_args(): 26 | items = [1] 27 | 28 | async_generator = AsyncIterator(items) 29 | 30 | arg_one = "arg one" 31 | arg_two = "arg two" 32 | 33 | # first call 34 | async for _ in async_generator(arg_one, arg_two): 35 | pass 36 | 37 | arg_three = "arg three" 38 | arg_four = "arg four" 39 | 40 | # second call 41 | async for _ in async_generator(arg_three, arg_four): 42 | pass 43 | 44 | first_call_args = async_generator.call_args[0] 45 | second_call_args = async_generator.call_args[1] 46 | 47 | assert len(async_generator.call_args) == 2 48 | 49 | assert first_call_args[0] == arg_one 50 | assert first_call_args[1] == arg_two 51 | 52 | assert second_call_args[0] == arg_three 53 | assert second_call_args[1] == arg_four 54 | 55 | 56 | @pytest.mark.asyncio 57 | async def test_call_kwargs(): 58 | items = [1] 59 | 60 | async_generator = AsyncIterator(items) 61 | 62 | kwarg_one_value = "kwarg one value" 63 | kwarg_two_value = "kwarg two value" 64 | 65 | # first call 66 | async for _ in async_generator( 67 | kwarg_one_key=kwarg_one_value, kwarg_two_key=kwarg_two_value 68 | ): 69 | pass 70 | 71 | kwarg_three_value = "kwarg three value" 72 | kwarg_four_value = "kwarg four value" 73 | 74 | # second call 75 | async for _ in async_generator( 76 | kwarg_three_key=kwarg_three_value, kwarg_four_key=kwarg_four_value 77 | ): 78 | pass 79 | 80 | first_call_kwargs = async_generator.call_kwargs[0] 81 | second_call_kwargs = async_generator.call_kwargs[1] 82 | 83 | assert len(async_generator.call_kwargs) == 2 84 | 85 | assert first_call_kwargs["kwarg_one_key"] == kwarg_one_value 86 | assert first_call_kwargs["kwarg_two_key"] == kwarg_two_value 87 | 88 | assert second_call_kwargs["kwarg_three_key"] == kwarg_three_value 89 | assert second_call_kwargs["kwarg_four_key"] == kwarg_four_value 90 | 91 | 92 | @pytest.mark.asyncio 93 | async def test_assert_not_called(): 94 | items = [] 95 | 96 | async_generator = AsyncIterator(items) 97 | assert async_generator.assert_not_called() 98 | 99 | 100 | @pytest.mark.asyncio 101 | async def test_assert_called_once(): 102 | items = [] 103 | 104 | async_generator = AsyncIterator(items) 105 | 106 | async for _ in async_generator(): 107 | pass 108 | 109 | # not a direct call on the generator -> call count still 1 110 | async for _ in async_generator: 111 | pass 112 | 113 | assert async_generator.assert_called_once() 114 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import os 8 | from unittest import mock 9 | 10 | import pytest 11 | from envyaml import EnvYAML 12 | 13 | from connectors.config import _update_config_field, load_config 14 | 15 | HERE = os.path.dirname(__file__) 16 | FIXTURES_DIR = os.path.abspath(os.path.join(HERE, "fixtures")) 17 | 18 | CONFIG_FILE = os.path.join(FIXTURES_DIR, "config.yml") 19 | ES_CONFIG_FILE = os.path.join(FIXTURES_DIR, "entsearch.yml") 20 | ES_CONFIG_INVALID_LOG_LEVEL_FILE = os.path.join( 21 | FIXTURES_DIR, "entsearch_invalid_log_level.yml" 22 | ) 23 | 24 | 25 | def test_bad_config_file(): 26 | with pytest.raises(FileNotFoundError): 27 | load_config("BEEUUUAH") 28 | 29 | 30 | def test_config(set_env): 31 | config = load_config(CONFIG_FILE) 32 | assert isinstance(config, EnvYAML) 33 | 34 | 35 | def test_config_with_ent_search(set_env): 36 | with mock.patch.dict(os.environ, {"ENT_SEARCH_CONFIG_PATH": ES_CONFIG_FILE}): 37 | config = load_config(CONFIG_FILE) 38 | assert config["elasticsearch"]["headers"]["X-Elastic-Auth"] == "SomeYeahValue" 39 | assert config["service"]["log_level"] == "DEBUG" 40 | 41 | 42 | def test_config_with_invalid_log_level(set_env): 43 | with mock.patch.dict( 44 | os.environ, {"ENT_SEARCH_CONFIG_PATH": ES_CONFIG_INVALID_LOG_LEVEL_FILE} 45 | ): 46 | with pytest.raises(ValueError) as e: 47 | _ = load_config(CONFIG_FILE) 48 | 49 | assert e.match("Unexpected log level.*") 50 | 51 | 52 | def test_update_config_when_nested_field_does_not_exist(): 53 | config = {} 54 | 55 | _update_config_field(config, "test.nested.property", 50) 56 | 57 | assert config["test"]["nested"]["property"] == 50 58 | 59 | 60 | def test_update_config_when_nested_field_exists(): 61 | config = {"test": {"nested": {"property": 25}}} 62 | 63 | _update_config_field(config, "test.nested.property", 50) 64 | 65 | assert config["test"]["nested"]["property"] == 50 66 | 67 | 68 | def test_update_config_when_root_field_does_not_exist(): 69 | config = {} 70 | 71 | _update_config_field(config, "test", 50) 72 | 73 | assert config["test"] == 50 74 | 75 | 76 | def test_update_config_when_root_field_does_exists(): 77 | config = {"test": 10} 78 | 79 | _update_config_field(config, "test", 50) 80 | 81 | assert config["test"] == 50 82 | --------------------------------------------------------------------------------