├── app └── connectors_service │ ├── connectors │ ├── cli │ │ ├── .gitkeep │ │ ├── README.md │ │ ├── __init__.py │ │ └── auth.py │ ├── VERSION │ ├── agent │ │ ├── __init__.py │ │ ├── pipelines │ │ │ ├── googledrive_pipeline.json │ │ │ └── salesforce_pipeline.json │ │ ├── logger.py │ │ └── cli.py │ ├── sources │ │ ├── __init__.py │ │ ├── atlassian │ │ │ ├── __init__.py │ │ │ ├── jira │ │ │ │ └── __init__.py │ │ │ ├── confluence │ │ │ │ └── __init__.py │ │ │ └── utils.py │ │ ├── shared │ │ │ ├── __init__.py │ │ │ ├── database │ │ │ │ └── __init__.py │ │ │ └── google │ │ │ │ └── __init__.py │ │ ├── sharepoint │ │ │ ├── __init__.py │ │ │ ├── sharepoint_server │ │ │ │ └── __init__.py │ │ │ └── sharepoint_online │ │ │ │ ├── __init__.py │ │ │ │ ├── validator.py │ │ │ │ └── utils.py │ │ ├── azure_blob_storage │ │ │ └── __init__.py │ │ ├── directory │ │ │ └── __init__.py │ │ ├── box │ │ │ ├── __init__.py │ │ │ └── constants.py │ │ ├── zoom │ │ │ └── __init__.py │ │ ├── slack │ │ │ └── __init__.py │ │ ├── graphql │ │ │ ├── __init__.py │ │ │ └── constants.py │ │ ├── outlook │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── sandfly │ │ │ └── __init__.py │ │ ├── mysql │ │ │ ├── common.py │ │ │ └── __init__.py │ │ ├── gmail │ │ │ ├── __init__.py │ │ │ └── validator.py │ │ ├── mongo │ │ │ └── __init__.py │ │ ├── microsoft_teams │ │ │ └── __init__.py │ │ ├── s3 │ │ │ ├── __init__.py │ │ │ └── validator.py │ │ ├── google_cloud_storage │ │ │ └── __init__.py │ │ ├── oracle │ │ │ ├── __init__.py │ │ │ └── queries.py │ │ ├── notion │ │ │ └── __init__.py │ │ ├── redis │ │ │ └── __init__.py │ │ ├── github │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── onedrive │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ └── validator.py │ │ ├── servicenow │ │ │ └── __init__.py │ │ ├── salesforce │ │ │ └── __init__.py │ │ ├── mssql │ │ │ ├── __init__.py │ │ │ └── queries.py │ │ ├── gitlab │ │ │ └── __init__.py │ │ ├── postgresql │ │ │ ├── __init__.py │ │ │ └── queries.py │ │ ├── google_drive │ │ │ └── __init__.py │ │ ├── network_drive │ │ │ └── __init__.py │ │ └── dropbox │ │ │ └── __init__.py │ ├── protocol │ │ └── __init__.py │ ├── __init__.py │ ├── es │ │ ├── cli_client.py │ │ ├── license.py │ │ ├── __init__.py │ │ └── language_data.yml │ ├── services │ │ ├── __init__.py │ │ ├── content_sync_job_execution.py │ │ └── access_control_sync_job_execution.py │ ├── build_info.py │ └── access_control.py │ ├── tests │ ├── sources │ │ ├── fixtures │ │ │ ├── dir │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── docker-compose.yml │ │ │ │ └── fixture.py │ │ │ ├── github │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── jira │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── mssql │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── mysql │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── oracle │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── confluence │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── postgresql │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── redis │ │ │ │ ├── requirements.txt │ │ │ │ ├── config.yml │ │ │ │ ├── docker-compose.yml │ │ │ │ └── fixture.py │ │ │ ├── servicenow │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── notion │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── docker-compose.yml │ │ │ │ └── connector.json │ │ │ ├── onedrive │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── gitlab │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── connector.json │ │ │ │ └── docker-compose.yml │ │ │ ├── microsoft_teams │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── s3 │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── sharepoint_online │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── nginx │ │ │ │ │ └── conf │ │ │ │ │ │ └── sharepoint.com │ │ │ │ └── docker-compose.yml │ │ │ ├── box │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── docker-compose.yml │ │ │ │ └── connector.json │ │ │ ├── google_drive │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── salesforce │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── connector.json │ │ │ │ └── docker-compose.yml │ │ │ ├── zoom │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── description.txt │ │ │ ├── google_cloud_storage │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ ├── mocker.py │ │ │ │ └── docker-compose.yml │ │ │ ├── dropbox │ │ │ │ ├── .env │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── mongodb │ │ │ │ ├── config.yml │ │ │ │ ├── docker-compose.yml │ │ │ │ └── fixture.py │ │ │ ├── graphql │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── sandfly │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── azure_blob_storage │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── network_drive │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ ├── mongodb_serverless │ │ │ │ ├── config.yml │ │ │ │ └── fixture.py │ │ │ ├── sharepoint_server │ │ │ │ ├── config.yml │ │ │ │ └── docker-compose.yml │ │ │ └── README.md │ │ ├── test_directory.py │ │ └── support.py │ ├── Dockerfile.ftest │ ├── fixtures │ │ ├── entsearch.yml │ │ ├── entsearch_invalid_log_level.yml │ │ ├── config_2.yml │ │ ├── config_https.yml │ │ ├── config_mem.yml │ │ ├── memconfig.yml │ │ └── config.yml │ ├── __init__.py │ ├── es │ │ ├── test_cli_client.py │ │ └── test_license.py │ ├── agent │ │ ├── test_cli.py │ │ └── test_component.py │ └── test_access_control.py │ ├── scripts │ ├── testing │ │ ├── startup_scipt.sh │ │ ├── pull-connectors.sh │ │ ├── scenarios │ │ │ └── clients │ │ │ │ ├── spo_full_and_incremental_syncs.yml │ │ │ │ └── spo_automated_testing_site.json │ │ └── docker-compose.yml │ ├── __init__.py │ └── deps-csv.py │ ├── MANIFEST.in │ └── README.md ├── libs └── connectors_sdk │ ├── connectors_sdk │ ├── VERSION │ ├── filtering │ │ └── __init__.py │ ├── __init__.py │ └── config.py │ ├── .gitignore │ ├── tests │ └── __init__.py │ ├── pyrightconfig.json │ ├── pytest.ini │ ├── .ruff.toml │ ├── pyproject.toml │ ├── Makefile │ └── README.md ├── docs ├── reference │ └── README.md ├── CODE_OF_CONDUCT.md ├── SECURITY.md ├── SUPPORT.md └── sync-rules │ └── s3.md ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── enhancement.md │ └── bug_report.md └── workflows │ ├── add-labels-main.yml │ ├── label-community-issues.yml │ ├── scripts │ └── label_community_issues.py │ └── backport.yml ├── logo-enterprise-search.png ├── .coveragerc ├── .buildkite ├── run_tests.sh ├── publish │ ├── git-setup.sh │ ├── manual-release │ │ ├── restore-stack-version.sh │ │ └── update-release-version.sh │ ├── build-docker.sh │ ├── push-docker.sh │ └── build-multiarch-docker.sh ├── nightly_aarch64.py ├── nightly.py ├── README.md ├── pull-requests.json ├── run_functional_test.sh ├── publish_to_pypi.sh ├── diff ├── run_notice_check.sh ├── run_linter.sh ├── publish_docker.sh ├── shared.sh └── test_python_packages.sh ├── resources ├── agent │ ├── python-elastic-agent-client │ └── python-elastic-agent-client.spec.yml └── connectors_api │ └── README.md ├── Dockerfile.wolfi ├── .gitignore ├── .backportrc.json ├── scripts └── stack │ ├── wait-for-kibana.sh │ ├── wait-for-elasticsearch.sh │ ├── update-kibana-user-password.sh │ ├── view-connectors-logs.sh │ ├── stop-stack.sh │ ├── parse-params.sh │ ├── copy-config.sh │ ├── set-env.sh │ ├── docker │ └── docker-compose.yml │ └── configure-connectors.sh ├── Dockerfile ├── renovate.json └── Dockerfile.agent /app/connectors_service/connectors/cli/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/VERSION: -------------------------------------------------------------------------------- 1 | 9.4.0 2 | -------------------------------------------------------------------------------- /libs/connectors_sdk/connectors_sdk/VERSION: -------------------------------------------------------------------------------- 1 | 9.4.0 2 | -------------------------------------------------------------------------------- /docs/reference/README.md: -------------------------------------------------------------------------------- 1 | See [REFERENCE.md](../REFERENCE.md). -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dir/.env: -------------------------------------------------------------------------------- 1 | SYSTEM_DIR=dir/data 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/github/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/jira/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mssql/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="230M" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mysql/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/oracle/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Global rule 2 | * @elastic/search-extract-and-transform 3 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/confluence/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/postgresql/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="410M" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/redis/requirements.txt: -------------------------------------------------------------------------------- 1 | redis==5.0.1 -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/servicenow/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | -------------------------------------------------------------------------------- /libs/connectors_sdk/.gitignore: -------------------------------------------------------------------------------- 1 | htmlcov 2 | build 3 | .venv 4 | .ruff_cache 5 | .coverage 6 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/notion/.env: -------------------------------------------------------------------------------- 1 | OVERRIDE_URL="http://localhost:9096" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/onedrive/.env: -------------------------------------------------------------------------------- 1 | OVERRIDE_URL="http://localhost:10972" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/gitlab/.env: -------------------------------------------------------------------------------- 1 | GITLAB_FTEST_HOST="https://127.0.0.1:9091" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/microsoft_teams/.env: -------------------------------------------------------------------------------- 1 | OVERRIDE_URL="http://localhost:10971" 2 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 303 See Other 2 | 3 | Location: https://www.elastic.co/community/codeofconduct 4 | -------------------------------------------------------------------------------- /logo-enterprise-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/connectors/HEAD/logo-enterprise-search.png -------------------------------------------------------------------------------- /app/connectors_service/scripts/testing/startup_scipt.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | touch /var/log/startup-is-finished 4 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/s3/.env: -------------------------------------------------------------------------------- 1 | AWS_ENDPOINT_URL="http://127.0.0.1" 2 | AWS_PORT=5001 3 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sharepoint_online/.env: -------------------------------------------------------------------------------- 1 | OVERRIDE_URL="http://localhost:10337" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/box/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | BOX_BASE_URL="http://127.0.0.1:9092" 3 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_drive/.env: -------------------------------------------------------------------------------- 1 | GOOGLE_API_FTEST_HOST="http://localhost:10339" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/salesforce/.env: -------------------------------------------------------------------------------- 1 | SALESFORCE_EMULATOR_HOST="http://localhost:10338" 2 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/zoom/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | OVERRIDE_URL="http://127.0.0.1:10971" 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = connectors/quartz.py,connectors/conftest.py,tests/*,connectors/agent/*,connectors/cli/* 3 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/description.txt: -------------------------------------------------------------------------------- 1 | Running an e2e test for google_drive with a medium corpus. 2 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/cli/README.md: -------------------------------------------------------------------------------- 1 | # Docs 2 | 3 | Refer to [CLI.md](../../docs/CLI.md) for the connectors CLI documentation. 4 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_cloud_storage/.env: -------------------------------------------------------------------------------- 1 | STORAGE_EMULATOR_HOST="http://localhost:4443" 2 | MAX_RSS="290M" 3 | -------------------------------------------------------------------------------- /app/connectors_service/tests/Dockerfile.ftest: -------------------------------------------------------------------------------- 1 | FROM connectors-base 2 | 3 | WORKDIR app/connectors_service 4 | RUN .venv/bin/pip install ".[ftest]" 5 | -------------------------------------------------------------------------------- /app/connectors_service/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Makefile README.md config.yml LICENSE 2 | recursive-include connectors/ *.yml 3 | include connectors/VERSION 4 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/box/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'box' 6 | service_type: 'box' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dir/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'dir' 6 | service_type: 'dir' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/s3/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 's3' 6 | service_type: 's3' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dropbox/.env: -------------------------------------------------------------------------------- 1 | MAX_RSS="240M" 2 | DROPBOX_API_URL="http://127.0.0.1:8085/" 3 | DROPBOX_API_URL_V2="http://127.0.0.1:8085/2/" 4 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/jira/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'jira' 6 | service_type: 'jira' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mssql/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'mssql' 6 | service_type: 'mssql' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mysql/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'mysql' 6 | service_type: 'mysql' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/redis/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'redis' 6 | service_type: 'redis' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/zoom/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'zoom' 6 | service_type: 'zoom' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/github/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'github' 6 | service_type: 'github' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/gitlab/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'gitlab' 6 | service_type: 'gitlab' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mongodb/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'mongo' 6 | service_type: 'mongodb' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/notion/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'notion' 6 | service_type: 'notion' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dropbox/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'dropbox' 6 | service_type: 'dropbox' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/graphql/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'graphql' 6 | service_type: 'graphql' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/onedrive/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'onedrive' 6 | service_type: 'onedrive' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/oracle/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'oracle' 6 | service_type: 'oracle' 7 | 8 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sandfly/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'sandfly' 6 | service_type: 'sandfly' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/confluence/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'confluence' 6 | service_type: 'confluence' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/postgresql/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'postgres' 6 | service_type: 'postgresql' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/salesforce/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'salesforce' 6 | service_type: 'salesforce' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/servicenow/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'servicenow' 6 | service_type: 'servicenow' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/azure_blob_storage/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'abs' 6 | service_type: 'azure_blob_storage' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_drive/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'google_drive' 6 | service_type: 'google_drive' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/network_drive/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'network_drive' 6 | service_type: 'network_drive' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_cloud_storage/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'gcs' 6 | service_type: 'google_cloud_storage' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/microsoft_teams/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'microsoft_teams' 6 | service_type: 'microsoft_teams' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mongodb_serverless/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'mongo_serverless' 6 | service_type: 'mongodb' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sharepoint_server/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'sharepoint_server' 6 | service_type: 'sharepoint_server' 7 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sharepoint_online/config.yml: -------------------------------------------------------------------------------- 1 | service.idling: 1 2 | 3 | connectors: 4 | - 5 | connector_id: 'sharepoint_online' 6 | service_type: 'sharepoint_online' 7 | 8 | -------------------------------------------------------------------------------- /.buildkite/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | source .buildkite/shared.sh 7 | 8 | init_python 9 | 10 | make test 11 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/entsearch.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | headers: 6 | X-Elastic-Auth: SomeYeahValue 7 | X-Something: 1 8 | 9 | log_level: debug 10 | -------------------------------------------------------------------------------- /resources/agent/python-elastic-agent-client: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PY_AGENT_CLIENT_PATH=/usr/share/connectors/app/connectors_service 3 | PYTHON_PATH=$PY_AGENT_CLIENT_PATH/.venv/bin/python 4 | COMPONENT_PATH=$PY_AGENT_CLIENT_PATH/connectors/agent/cli.py 5 | $PYTHON_PATH $COMPONENT_PATH 6 | -------------------------------------------------------------------------------- /Dockerfile.wolfi: -------------------------------------------------------------------------------- 1 | FROM docker.elastic.co/wolfi/python:3.11-dev@sha256:e2d3d2bba33963144b9a88fd23285de3acb316ecd00b01dcbe8ac8b806d1ce3d 2 | USER root 3 | COPY . /app 4 | WORKDIR /app 5 | RUN make clean install-package 6 | RUN ln -s app/connectors_service/.venv/bin /app/bin 7 | ENTRYPOINT [] 8 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/entsearch_invalid_log_level.yml: -------------------------------------------------------------------------------- 1 | 2 | elasticsearch: 3 | host: http://nowhere.com:9200 4 | user: elastic 5 | password: ${elasticsearch.password} 6 | headers: 7 | X-Elastic-Auth: SomeYeahValue 8 | X-Something: 1 9 | 10 | log_level: WHAT 11 | -------------------------------------------------------------------------------- /libs/connectors_sdk/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/agent/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/scripts/testing/pull-connectors.sh: -------------------------------------------------------------------------------- 1 | mkdir -p /var/app/ 2 | 3 | git clone https://github.com/elastic/connectors.git /var/app 4 | cd /var/app 5 | git clean -ffxdq 6 | git fetch -v --prune -- origin $1 7 | git checkout -f $1 8 | 9 | make clean install 10 | chmod a+rxw /var/app -R 11 | -------------------------------------------------------------------------------- /.buildkite/publish/git-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | export GIT_BRANCH=${BUILDKITE_BRANCH} 5 | 6 | git switch - 7 | git checkout $GIT_BRANCH 8 | git pull origin $GIT_BRANCH 9 | git config --local user.email 'elasticmachine@users.noreply.github.com' 10 | git config --local user.name 'Elastic Machine' -------------------------------------------------------------------------------- /libs/connectors_sdk/connectors_sdk/filtering/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/atlassian/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/shared/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/sharepoint/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | -------------------------------------------------------------------------------- /docs/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Thanks for your interest in the security of our products. Our security policy can be found at [https://www.elastic.co/community/security](https://www.elastic.co/community/security). 4 | 5 | ## Reporting a Vulnerability 6 | Please send security vulnerability reports to security@elastic.co. 7 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .connectors import * # NOQA 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib 2 | lib64 3 | dist 4 | include 5 | .coverage 6 | *.un~ 7 | *.swp 8 | pyvenv.cfg 9 | *.egg-info 10 | __pycache__ 11 | 12 | # real configurations 13 | app/connectors_service/config.yml 14 | 15 | # jetbrains files 16 | .idea 17 | *.iml 18 | .cli 19 | 20 | htmlcov 21 | .venv 22 | **/venv 23 | scripts/stack/connectors-config 24 | 25 | .python-version 26 | -------------------------------------------------------------------------------- /.backportrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "targetBranchChoices": [ 3 | { "name": "main", "checked": true }, 4 | "9.3", 5 | "9.2", 6 | "9.1", 7 | "8.19" 8 | ], 9 | "fork": false, 10 | "targetPRLabels": ["backport"], 11 | "branchLabelMapping": { 12 | "^v9.4.0$": "main", 13 | "^v(\\d+).(\\d+)(.\\d+)*$": "$1.$2" 14 | }, 15 | "upstream": "elastic/connectors" 16 | } 17 | -------------------------------------------------------------------------------- /scripts/stack/wait-for-kibana.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -eq 0 ]; then 4 | KIBANA_URL="http://localhost:5601" 5 | else 6 | KIBANA_URL="$1" 7 | shift 8 | fi 9 | 10 | echo "Connecting to Kibana on $KIBANA_URL" 11 | until curl -XGET --silent --output /dev/null --max-time 1 "$@" ${KIBANA_URL}/status -I; do 12 | echo 'Waiting for Kibana to be running...' 13 | sleep 2 14 | done 15 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/agent/pipelines/googledrive_pipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ingest pipeline for google drive content", 3 | "processors": [ 4 | { 5 | "rename": { 6 | "field": "name", 7 | "target_field": "title", 8 | "description": "renames the name field to title", 9 | "ignore_failure": true 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Question or Discussion 4 | url: https://discuss.elastic.co/c/search 5 | about: Please ask and answer questions here. 6 | - name: Security Vulnerability 7 | url: https://www.elastic.co/community/security 8 | about: DO NOT file issues related to security. Instead, please follow our security policy here. 9 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/shared/database/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .generic_database import Queries 8 | 9 | __all__ = ["Queries"] 10 | -------------------------------------------------------------------------------- /.buildkite/nightly_aarch64.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | 4 | _AGENTS = """\ 5 | agents: 6 | provider: aws 7 | instanceType: m6g.xlarge 8 | imagePrefix: enterprise-search-ubuntu-2204-aarch64-connectors-py 9 | """ 10 | 11 | with open(os.path.join(os.path.dirname(__file__), "nightly_steps.yml")) as f: 12 | steps = f.read().strip() 13 | 14 | 15 | print(_AGENTS) 16 | print() 17 | print(steps) 18 | -------------------------------------------------------------------------------- /.buildkite/nightly.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | 4 | _AGENTS = """\ 5 | agents: 6 | provider: "gcp" 7 | machineType: "n1-standard-8" 8 | useVault: true 9 | image: family/enterprise-search-ubuntu-2204-connectors-py 10 | """ 11 | 12 | with open(os.path.join(os.path.dirname(__file__), "nightly_steps.yml")) as f: 13 | steps = f.read().strip() 14 | 15 | 16 | print(_AGENTS) 17 | print() 18 | print(steps) 19 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/azure_blob_storage/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .datasource import AzureBlobStorageDataSource 7 | 8 | __all__ = ["AzureBlobStorageDataSource"] 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM cgr.dev/chainguard/wolfi-base:latest 2 | ARG python_version=3.11 3 | 4 | USER root 5 | RUN apk add --no-cache python3=~${python_version} make git 6 | 7 | COPY --chown=nonroot:nonroot . /app 8 | 9 | USER nonroot 10 | WORKDIR /app 11 | RUN make clean install-package 12 | RUN ln -s app/connectors_service/.venv/bin /app/bin 13 | 14 | USER root 15 | RUN apk del make git 16 | 17 | USER nonroot 18 | ENTRYPOINT [] 19 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/directory/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .datasource import DEFAULT_DIR, DirectoryDataSource 7 | 8 | __all__ = ["DirectoryDataSource", "DEFAULT_DIR"] 9 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sharepoint_online/nginx/conf/sharepoint.com: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | listen [::]:80; 4 | 5 | server_name example.org www.example.org; 6 | server_tokens off; 7 | 8 | location /.well-known/acme-challenge/ { 9 | root /var/www/certbot; 10 | } 11 | 12 | location / { 13 | return 301 https://www.sharepoint.com$request_uri; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | 8 | with open(os.path.join(os.path.dirname(__file__), "VERSION")) as f: 9 | __version__ = f.read().strip() 10 | -------------------------------------------------------------------------------- /libs/connectors_sdk/connectors_sdk/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | 8 | with open(os.path.join(os.path.dirname(__file__), "VERSION")) as f: 9 | __version__ = f.read().strip() 10 | -------------------------------------------------------------------------------- /libs/connectors_sdk/pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "pythonVersion": "3.10", 3 | "include": [ 4 | "connectors_sdk" 5 | ], 6 | "reportMissingImports": false, 7 | "reportMissingModuleSource": false, 8 | "reportOptionalMemberAccess": false, 9 | "exclude": [ 10 | "**/tests", 11 | "**/__pycache__" 12 | ], 13 | "executionEnvironments": [ 14 | { 15 | "root": "./", 16 | "venv": "./" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/box/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import BoxClient 8 | from .datasource import BoxDataSource 9 | 10 | __all__ = ["BoxDataSource", "BoxClient"] 11 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/zoom/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import ZoomClient 7 | from .datasource import ZoomDataSource 8 | 9 | __all__ = ["ZoomClient", "ZoomDataSource"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/slack/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import SlackClient 7 | from .datasource import SlackDataSource 8 | 9 | __all__ = ["SlackClient", "SlackDataSource"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/atlassian/jira/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import JiraClient 7 | from .datasource import JiraDataSource 8 | 9 | __all__ = ["JiraClient", "JiraDataSource"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/graphql/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import GraphQLClient 7 | from .datasource import GraphQLDataSource 8 | 9 | __all__ = ["GraphQLDataSource", "GraphQLClient"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/outlook/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import OutlookClient 8 | from .datasource import OutlookDataSource 9 | 10 | __all__ = ["OutlookDataSource", "OutlookClient"] 11 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/sandfly/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import SandflyClient 8 | from .datasource import SandflyDataSource 9 | 10 | __all__ = ["SandflyDataSource", "SandflyClient"] 11 | -------------------------------------------------------------------------------- /scripts/stack/wait-for-elasticsearch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -eq 0 ]; then 4 | ELASTICSEARCH_URL="http://localhost:9200" 5 | else 6 | ELASTICSEARCH_URL="$1" 7 | shift 8 | fi 9 | 10 | echo "Connecting to Elasticsearch on $ELASTICSEARCH_URL" 11 | until curl -u elastic:${ELASTIC_PASSWORD:-"changeme"} --silent --output /dev/null --max-time 1 "$@" ${ELASTICSEARCH_URL}; do 12 | echo 'Waiting for Elasticsearch to be running...' 13 | sleep 2 14 | done 15 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/mysql/common.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | MAX_POOL_SIZE = 10 7 | DEFAULT_FETCH_SIZE = 5000 8 | RETRIES = 3 9 | RETRY_INTERVAL = 2 10 | 11 | 12 | def format_list(list_): 13 | return ", ".join(list_) 14 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/atlassian/confluence/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import ConfluenceClient 7 | from .datasource import ConfluenceDataSource 8 | 9 | __all__ = ["ConfluenceClient", "ConfluenceDataSource"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/gmail/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .datasource import GMailDataSource 7 | from .validator import GMailAdvancedRulesValidator 8 | 9 | __all__ = ["GMailDataSource", "GMailAdvancedRulesValidator"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/mongo/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .datasource import MongoDataSource 7 | from .validator import MongoAdvancedRulesValidator 8 | 9 | __all__ = ["MongoDataSource", "MongoAdvancedRulesValidator"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/microsoft_teams/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import MicrosoftTeamsClient 7 | from .datasource import MicrosoftTeamsDataSource 8 | 9 | __all__ = ["MicrosoftTeamsClient", "MicrosoftTeamsDataSource"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/s3/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import S3Client 7 | from .datasource import S3DataSource 8 | from .validator import S3AdvancedRulesValidator 9 | 10 | __all__ = ["S3Client", "S3DataSource", "S3AdvancedRulesValidator"] 11 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/google_cloud_storage/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import GoogleCloudStorageClient 7 | from .datasource import GoogleCloudStorageDataSource 8 | 9 | __all__ = ["GoogleCloudStorageClient", "GoogleCloudStorageDataSource"] 10 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/oracle/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import OracleClient 8 | from .datasource import OracleDataSource 9 | from .queries import OracleQueries 10 | 11 | __all__ = ["OracleDataSource", "OracleClient", "OracleQueries"] 12 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/es/cli_client.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.es.client import USER_AGENT_BASE 7 | from connectors.es.management_client import ESManagementClient 8 | 9 | 10 | class CLIClient(ESManagementClient): 11 | user_agent = f"{USER_AGENT_BASE}/cli" 12 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/sharepoint/sharepoint_server/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import SharepointServerClient 8 | from .datasource import SharepointServerDataSource 9 | 10 | __all__ = ["SharepointServerDataSource", "SharepointServerClient"] 11 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/config_2.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | max_wait_duration: 1 8 | initial_backoff_duration: 0 9 | backoff_multiplier: 0 10 | 11 | service: 12 | idling: 0.5 13 | heartbeat: 300 14 | max_errors: 20 15 | max_errors_span: 600 16 | 17 | connectors: 18 | - 19 | connector_id: 'blah' 20 | 21 | sources: 22 | fake: tests.fake_sources:FakeSource 23 | 24 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/notion/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import NotionClient 7 | from .datasource import NotionDataSource 8 | from .validator import NotionAdvancedRulesValidator 9 | 10 | __all__ = ["NotionClient", "NotionDataSource", "NotionAdvancedRulesValidator"] 11 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/redis/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import RedisClient 8 | from .datasource import RedisDataSource 9 | from .validator import RedisAdvancedRulesValidator 10 | 11 | __all__ = ["RedisDataSource", "RedisClient", "RedisAdvancedRulesValidator"] 12 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/github/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import GitHubClient 8 | from .datasource import GitHubDataSource 9 | from .validator import GitHubAdvancedRulesValidator 10 | 11 | __all__ = ["GitHubDataSource", "GitHubClient", "GitHubAdvancedRulesValidator"] 12 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/onedrive/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import OneDriveClient 8 | from .datasource import OneDriveDataSource 9 | from .validator import OneDriveAdvancedRulesValidator 10 | 11 | __all__ = ["OneDriveDataSource", "OneDriveClient", "OneDriveAdvancedRulesValidator"] 12 | -------------------------------------------------------------------------------- /resources/connectors_api/README.md: -------------------------------------------------------------------------------- 1 | ### Setup 2 | 3 | You need to specify values for the following variables: 4 | 5 | - `ES_HOST_PORT`: Specifies the host and port of your Elasticsearch instance you want to test against (example: `http://localhost:9200`). 6 | - `ES_USER`: Specifies the user, which should be used to authenticate against the Elasticsearch instance (example: `elastic`). 7 | - `ES_PASSWORD`: Specifies the password, which should be used to authenticate against the Elasticsearch instance (example: `password`). 8 | 9 | **Note: the postman collection uses basic auth per default.** -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/servicenow/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import ServiceNowClient 7 | from .datasource import ServiceNowDataSource 8 | from .validator import ServiceNowAdvancedRulesValidator 9 | 10 | __all__ = [ 11 | "ServiceNowClient", 12 | "ServiceNowDataSource", 13 | "ServiceNowAdvancedRulesValidator", 14 | ] 15 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/config_https.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: https://safenowhere.com 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunck_size: 250 8 | max_wait_duration: 1 9 | initial_backoff_duration: 0 10 | backoff_multiplier: 0 11 | 12 | service: 13 | idling: 0.5 14 | heartbeat: 300 15 | max_errors: 20 16 | max_errors_span: 600 17 | 18 | connectors: 19 | - 20 | connector_id: '1' 21 | 22 | sources: 23 | fake: tests.fake_sources:FakeSource 24 | large_fake: tests.fake_sources:LargeFakeSource 25 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/salesforce/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import SalesforceClient 8 | from .datasource import SalesforceDataSource 9 | from .validator import SalesforceAdvancedRulesValidator 10 | 11 | __all__ = [ 12 | "SalesforceDataSource", 13 | "SalesforceClient", 14 | "SalesforceAdvancedRulesValidator", 15 | ] 16 | -------------------------------------------------------------------------------- /.buildkite/publish/manual-release/restore-stack-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | # Load our common environment variables for publishing 5 | export REL_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | CURDIR="$(dirname "$REL_DIR")" 7 | 8 | source $CURDIR/publish-common.sh 9 | 10 | echo $ORIG_VERSION > $PROJECT_ROOT/connectors/VERSION # removes the timestamp suffix 11 | UPDATED_VERSION=`cat $PROJECT_ROOT/connectors/VERSION` 12 | 13 | git add $PROJECT_ROOT/connectors/VERSION 14 | git commit -m "Restoring version from ${VERSION} to ${UPDATED_VERSION}" 15 | git push origin ${GIT_BRANCH} 16 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/mssql/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import MSSQLClient 7 | from .datasource import MSSQLDataSource 8 | from .queries import MSSQLQueries 9 | from .validator import MSSQLAdvancedRulesValidator 10 | 11 | __all__ = [ 12 | "MSSQLDataSource", 13 | "MSSQLAdvancedRulesValidator", 14 | "MSSQLClient", 15 | "MSSQLQueries", 16 | ] 17 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/gitlab/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """GitLab connector package. 7 | 8 | This package provides integration with GitLab Cloud for syncing projects, issues, 9 | merge requests, epics, releases, and README files to Elasticsearch. 10 | """ 11 | 12 | from connectors.sources.gitlab.datasource import GitLabDataSource 13 | 14 | __all__ = ["GitLabDataSource"] 15 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/mysql/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import MySQLClient, generate_id, row2doc 7 | from .datasource import MySqlDataSource 8 | from .validator import MySQLAdvancedRulesValidator 9 | 10 | __all__ = [ 11 | "MySqlDataSource", 12 | "MySQLAdvancedRulesValidator", 13 | "MySQLClient", 14 | "row2doc", 15 | "generate_id", 16 | ] 17 | -------------------------------------------------------------------------------- /resources/agent/python-elastic-agent-client.spec.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | inputs: 3 | - name: connectors-py 4 | description: "Connectors Py component input" 5 | platforms: &platforms 6 | - linux/amd64 7 | - linux/arm64 8 | - darwin/amd64 9 | - darwin/arm64 10 | - windows/amd64 11 | - container/amd64 12 | - container/arm64 13 | outputs: &outputs 14 | - elasticsearch 15 | shippers: &shippers 16 | - shipper 17 | command: &command 18 | restart_monitoring_period: 5s 19 | maximum_restarts_per_period: 1 20 | timeouts: 21 | restart: 1s 22 | args: [] 23 | -------------------------------------------------------------------------------- /scripts/stack/update-kibana-user-password.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -eq 0 ]; then 4 | ELASTICSEARCH_URL="http://localhost:9200" 5 | else 6 | ELASTICSEARCH_URL="$1" 7 | shift 8 | fi 9 | 10 | if [[ ${CURDIR:-} == "" ]]; then 11 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 12 | fi 13 | 14 | echo "Updating Kibana password in Elasticsearch running on $ELASTICSEARCH_URL" 15 | change_data="{ \"password\": \"${ELASTIC_PASSWORD}\" }" 16 | curl -u elastic:$ELASTIC_PASSWORD "$@" -X POST "${ELASTICSEARCH_URL}/_security/user/kibana_system/_password?pretty" -H 'Content-Type: application/json' -d"${change_data}" 17 | -------------------------------------------------------------------------------- /.buildkite/README.md: -------------------------------------------------------------------------------- 1 | ## Here we define our Buildkite pipelines 2 | 3 | We use our own custom image. The image definition can be found here: https://github.com/elastic/ci-agent-images/pull/132 4 | 5 | The image is built weekly, see the cron definition: https://github.com/elastic/ci/pull/1813/files 6 | 7 | The image and cron job were built following instruction from several sources: 8 | 9 | - https://docs.elastic.dev/ci/agent-images-for-buildkite 10 | - https://github.com/elastic/ci/blob/main/vm-images/README.md 11 | - https://github.com/elastic/ci-agent-images/README.md 12 | 13 | In case something is unclear, don't hesitate to contact #buildkite Slack channel. 14 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/sharepoint/sharepoint_online/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import SharepointOnlineClient 8 | from .datasource import SharepointOnlineDataSource 9 | from .validator import SharepointOnlineAdvancedRulesValidator 10 | 11 | __all__ = [ 12 | "SharepointOnlineDataSource", 13 | "SharepointOnlineAdvancedRulesValidator", 14 | "SharepointOnlineClient", 15 | ] 16 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/config_mem.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunk_size: 250 8 | max_wait_duration: 1 9 | initial_backoff_duration: 0 10 | backoff_multiplier: 0 11 | 12 | service: 13 | idling: 0.5 14 | heartbeat: 300 15 | max_errors: 20 16 | max_errors_span: 600 17 | trace_mem: true 18 | 19 | connectors: 20 | - 21 | connector_id: '1' 22 | 23 | sources: 24 | fake: tests.fake_sources:FakeSource 25 | large_fake: tests.fake_sources:LargeFakeSource 26 | fail_once: tests.fake_sources:FailsThenWork 27 | -------------------------------------------------------------------------------- /app/connectors_service/README.md: -------------------------------------------------------------------------------- 1 | # Connectors service 2 | 3 | The connectors service is what powers the Elastic Connector experience. It handles the connection to Elasticsearch, content syncs, scheduling and final cleanup. 4 | 5 | The source code implementations for individual data sources also live here. If you are looking to contribute a new data source implementation, this is the place to write it. 6 | 7 | ## What's here? 8 | - A `pyproject.toml` file 9 | - Connectors service definition and its entry points under `connectors/` 10 | - The source code implementation for individual data sources under `connectors/sources/` 11 | - Relevant testing code and fixtures under `tests/` 12 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/es/license.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.protocol import JobType 7 | 8 | 9 | def requires_platinum_license(sync_job, connector, source_klass): 10 | """Returns whether this scenario requires a Platinum license""" 11 | return ( 12 | sync_job.job_type == JobType.ACCESS_CONTROL 13 | and connector.features.document_level_security_enabled() 14 | ) or source_klass.is_premium() 15 | -------------------------------------------------------------------------------- /.github/workflows/add-labels-main.yml: -------------------------------------------------------------------------------- 1 | name: Force backport labels for main 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - opened 9 | 10 | jobs: 11 | add_labels: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - id: version 16 | uses: juliangruber/read-file-action@ebfa650188272343fef925480eb4d18c5d49b925 17 | with: 18 | path: ./app/connectors_service/connectors/VERSION 19 | - uses: actions-ecosystem/action-add-labels@v1 20 | with: 21 | labels: | 22 | auto-backport 23 | v${{ steps.version.outputs.content }} 24 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/postgresql/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from .client import PostgreSQLClient 8 | from .datasource import PostgreSQLDataSource 9 | from .queries import PostgreSQLQueries 10 | from .validator import PostgreSQLAdvancedRulesValidator 11 | 12 | __all__ = [ 13 | "PostgreSQLDataSource", 14 | "PostgreSQLClient", 15 | "PostgreSQLAdvancedRulesValidator", 16 | "PostgreSQLQueries", 17 | ] 18 | -------------------------------------------------------------------------------- /app/connectors_service/scripts/testing/scenarios/clients/spo_full_and_incremental_syncs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | scenarios: 3 | - index_name: search-demo-index-001 4 | connector_name: spo 5 | service_type: sharepoint_online 6 | index_language: en 7 | connector_configuration: scenarios/clients/spo_automated_testing_site.json 8 | native: false 9 | tests: 10 | - name: Full sync job is performed without errors 11 | job_type: full 12 | timeout: 60 13 | match: { status: 'completed'} 14 | - name: Incremental sync job is performed without errors 15 | job_type: incremental 16 | timeout: 20 17 | match: { status: 'completed'} 18 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/memconfig.yml: -------------------------------------------------------------------------------- 1 | elasticsearch: 2 | host: http://nowhere.com:9200 3 | user: elastic 4 | password: ${elasticsearch.password} 5 | bulk: 6 | queue_max_size: 1024 7 | chunk_size: 500 8 | chunk_max_mem_size: 0.5 9 | queue_max_mem_size: 25 10 | max_wait_duration: 1 11 | initial_backoff_duration: 0 12 | backoff_multiplier: 0 13 | 14 | service: 15 | idling: 0.5 16 | heartbeat: 300 17 | max_errors: 20 18 | max_errors_span: 600 19 | 20 | connectors: 21 | - 22 | connector_id: '1' 23 | 24 | sources: 25 | fake: fake_sources:FakeSource 26 | large_fake: fake_sources:LargeFakeSource 27 | fail_once: fake_sources:FailsThenWork 28 | -------------------------------------------------------------------------------- /.buildkite/publish/manual-release/update-release-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | # Load our common environment variables for publishing 5 | export REL_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | CURDIR="$(dirname "$REL_DIR")" 7 | 8 | source $CURDIR/publish-common.sh 9 | 10 | echo $VERSION > $PROJECT_ROOT/app/connectors/VERSION # adds the timestamp suffix 11 | UPDATED_VERSION=`cat $PROJECT_ROOT/app/connectors/VERSION` 12 | 13 | git add $PROJECT_ROOT/app/connectors/VERSION 14 | git commit -m "Bumping version from ${ORIG_VERSION} to ${UPDATED_VERSION}" 15 | git push origin ${GIT_BRANCH} 16 | 17 | echo "Tagging the release" 18 | git tag "v${UPDATED_VERSION}" 19 | git push origin --tags 20 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/es/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import warnings 7 | 8 | from elasticsearch.exceptions import GeneralAvailabilityWarning 9 | 10 | from connectors.es.client import ESClient # NOQA 11 | from connectors.es.document import ESDocument, InvalidDocumentSourceError # NOQA 12 | from connectors.es.index import ESIndex # NOQA 13 | 14 | warnings.filterwarnings("ignore", category=GeneralAvailabilityWarning) 15 | 16 | TIMESTAMP_FIELD = "_timestamp" 17 | DEFAULT_LANGUAGE = "en" 18 | -------------------------------------------------------------------------------- /.buildkite/pull-requests.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobs": [ 3 | { 4 | "enabled": true, 5 | "pipelineSlug": "connectors", 6 | "allow_org_users": true, 7 | "allowed_repo_permissions": ["admin", "write"], 8 | "allowed_list": [], 9 | "set_commit_status": true, 10 | "commit_status_context": "buildkite/connectors", 11 | "build_on_commit": false, 12 | "build_on_comment": true, 13 | "trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))", 14 | "always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))", 15 | "skip_ci_labels": ["skip-ci"], 16 | "skip_target_branches": [], 17 | "always_require_ci_on_changed": [] 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /app/connectors_service/scripts/testing/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/${NAMESPACE:-elasticsearch}/elasticsearch:${ES_VERSION} 6 | environment: 7 | - "discovery.type=single-node" 8 | - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m" 9 | - "xpack.security.enabled=true" 10 | - "xpack.security.authc.api_key.enabled=true" 11 | - "xpack.security.authc.token.enabled=true" 12 | - "ELASTIC_PASSWORD=changeme" 13 | - "action.destructive_requires_name=false" 14 | ulimits: 15 | memlock: 16 | soft: -1 17 | hard: -1 18 | ports: 19 | - 9200:9200 20 | volumes: 21 | - es-data:/usr/share/elasticsearch/data 22 | volumes: 23 | es-data: 24 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/services/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from connectors.services.access_control_sync_job_execution import ( 8 | AccessControlSyncJobExecutionService, # NOQA 9 | ) 10 | from connectors.services.base import get_services # NOQA 11 | from connectors.services.content_sync_job_execution import ( 12 | ContentSyncJobExecutionService, # NOQA 13 | ) 14 | from connectors.services.job_cleanup import JobCleanUpService # NOQA 15 | from connectors.services.job_scheduling import JobSchedulingService # NOQA 16 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/google_drive/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .clients import ( 7 | GoogleAdminDirectoryClient, 8 | GoogleDriveClient, 9 | GoogleServiceAccountClient, 10 | ) 11 | from .datasource import ( 12 | RETRIES, 13 | GoogleDriveDataSource, 14 | SyncCursorEmpty, 15 | ) 16 | 17 | __all__ = [ 18 | "GoogleAdminDirectoryClient", 19 | "GoogleDriveClient", 20 | "GoogleServiceAccountClient", 21 | "GoogleDriveDataSource", 22 | "SyncCursorEmpty", 23 | "RETRIES", 24 | ] 25 | -------------------------------------------------------------------------------- /app/connectors_service/tests/es/test_cli_client.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors import __version__ 7 | from connectors.es.cli_client import CLIClient 8 | 9 | 10 | def test_overrides_user_agent_header(): 11 | config = { 12 | "username": "elastic", 13 | "password": "changeme", 14 | "host": "http://nowhere.com:9200", 15 | } 16 | cli_client = CLIClient(config) 17 | 18 | assert ( 19 | cli_client.client._headers["user-agent"] 20 | == f"elastic-connectors-{__version__}/cli" 21 | ) 22 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/gitlab/connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "configuration": { 3 | "token": { 4 | "label": "Personal Access Token", 5 | "order": 1, 6 | "sensitive": true, 7 | "type": "str", 8 | "tooltip": "GitLab Personal Access Token with api, read_api, and read_repository scopes.", 9 | "value": "test-token-changeme" 10 | }, 11 | "projects": { 12 | "display": "textarea", 13 | "label": "List of projects", 14 | "order": 2, 15 | "tooltip": "List of project paths (e.g., 'group/project'). Use '*' to sync all accessible projects.", 16 | "type": "list", 17 | "value": "*" 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.buildkite/run_functional_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | MACHINE_TYPE=`uname -m` 7 | 8 | if [ "$MACHINE_TYPE" != "x86_64" ] && [ -v SKIP_AARCH64 ]; then 9 | echo "Running on aarch64 and skipping" 10 | exit 11 | fi 12 | 13 | source .buildkite/shared.sh 14 | 15 | init_python 16 | 17 | BASEDIR=$(realpath $(dirname $0)) 18 | ROOT=$(realpath $BASEDIR/../) 19 | VENV_ROOT=$ROOT/app/connectors_service/.venv 20 | 21 | cd $ROOT 22 | 23 | make install 24 | 25 | export PIP=$VENV_ROOT/bin/pip 26 | 27 | $PIP install py-spy 28 | 29 | 30 | if [ -v BUILDKITE ]; then 31 | # required by serverless 32 | sudo sysctl -w vm.max_map_count=262144 33 | fi 34 | 35 | PERF8=yes NAME=$CONNECTOR DATA_SIZE=$DATA_SIZE make ftest 36 | -------------------------------------------------------------------------------- /scripts/stack/view-connectors-logs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | if ! which docker-compose > /dev/null; then 7 | echo "Could not find 'docker-compose'. Make sure it is installed and available via your PATH" 8 | exit 2 9 | fi 10 | 11 | pushd "$CURDIR" 12 | 13 | compose_file=$CURDIR/docker/docker-compose.yml 14 | echo "Using compose file at: $compose_file" 15 | 16 | . $CURDIR/parse-params.sh 17 | parse_params $@ 18 | eval set -- "$parsed_params" 19 | 20 | source $CURDIR/set-env.sh $CURDIR/.env 21 | 22 | if [ "${watch_logs:-}" = true ] 23 | then 24 | docker-compose -f "$compose_file" logs -f elastic-connectors 25 | else 26 | docker-compose -f "$compose_file" logs -n20 elastic-connectors 27 | fi 28 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/graphql/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | PING_QUERY = """ 7 | { 8 | __schema { 9 | queryType { 10 | name 11 | } 12 | } 13 | } 14 | """ 15 | RETRIES = 3 16 | RETRY_INTERVAL = 2 17 | BASIC = "basic" 18 | BEARER = "bearer" 19 | CURSOR_PAGINATION = "cursor_pagination" 20 | GET = "get" 21 | NO_PAGINATION = "no_pagination" 22 | POST = "post" 23 | 24 | # Regular expression to validate the Base URL 25 | URL_REGEX = "^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$" 26 | -------------------------------------------------------------------------------- /app/connectors_service/scripts/testing/scenarios/clients/spo_automated_testing_site.json: -------------------------------------------------------------------------------- 1 | { 2 | "tenant_id": "vault:connectors-sources/sharepoint-online-onedrive:tenant-id", 3 | "tenant_name": "vault:connectors-sources/sharepoint-online-onedrive:tenant-name", 4 | "use_text_extraction_service": false, 5 | "fetch_drive_item_permissions": true, 6 | "fetch_subsites": true, 7 | "client_id": "vault:connectors-sources/sharepoint-online-onedrive:client-id", 8 | "fetch_unique_page_permissions": true, 9 | "secret_value": "vault:connectors-sources/sharepoint-online-onedrive:secret-value", 10 | "enumerate_all_sites": false, 11 | "fetch_unique_list_item_permissions": true, 12 | "fetch_unique_list_permissions": true, 13 | "site_collections": "Automatedtesting", 14 | "use_document_level_security": false 15 | } 16 | -------------------------------------------------------------------------------- /libs/connectors_sdk/connectors_sdk/config.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | class DataSourceFrameworkConfig: 7 | """ 8 | The configs that will be exposed to DataSource instances. 9 | This abstraction prevents DataSource instances from having access to all configuration, while also 10 | preventing them from requiring substantial changes to access new configs that may be added. 11 | """ 12 | 13 | def __init__(self, max_file_size): 14 | """ 15 | Should not be called directly. Use the Builder. 16 | """ 17 | self.max_file_size = max_file_size 18 | -------------------------------------------------------------------------------- /.github/workflows/label-community-issues.yml: -------------------------------------------------------------------------------- 1 | name: Label Community Issues 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | 7 | jobs: 8 | run-python-script: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: '3.10' 19 | 20 | - name: Install dependencies 21 | run: python3 -m pip install aiohttp gidgethub 22 | 23 | - name: Run Python script 24 | run: python .github/workflows/scripts/label_community_issues.py 25 | env: 26 | ACTOR: ${{ github.actor }} 27 | NUMBER: ${{ github.event.issue.number }} 28 | REPO: ${{ github.repository }} 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement 3 | about: It's not a bug, but some desired feature is missing 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Problem Description 11 | 14 | 15 | ### Proposed Solution 16 | 18 | 19 | 20 | ### Alternatives 21 | 23 | 24 | ### Additional Context 25 | 26 | -------------------------------------------------------------------------------- /.buildkite/publish_to_pypi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | source .buildkite/shared.sh 7 | 8 | init_python 9 | cd "$PACKAGE_PATH" 10 | python -m pip install --upgrade build twine 11 | python -m build 12 | 13 | export TWINE_USERNAME="__token__" 14 | 15 | # upload to test or real PyPI based on TEST_PYPI=1 env var or arg 16 | if [[ "${1:-}" == "TEST_PYPI=1" ]] || [[ "${TEST_PYPI:-}" =~ ^(1|TRUE|true)$ ]]; then 17 | TWINE_PASSWORD=$(vault read -field publishing-api-key secret/ci/elastic-connectors/test-pypi) 18 | export TWINE_PASSWORD 19 | python -m twine upload --repository testpypi dist/* 20 | else 21 | TWINE_PASSWORD=$(vault read -field publishing-api-key secret/ci/elastic-connectors/pypi) 22 | export TWINE_PASSWORD 23 | python -m twine upload --repository pypi dist/* 24 | fi 25 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "local>elastic/renovate-config", 5 | "github>elastic/renovate-config:only-chainguard" 6 | ], 7 | "schedule": [ 8 | "* * * * 0,6" 9 | ], 10 | "labels": [ 11 | "renovate", 12 | "auto-backport", 13 | "v8.19.0", 14 | "v9.1.0", 15 | "v9.2.0", 16 | "v9.3.0", 17 | "v9.4.0" 18 | ], 19 | "packageRules": [ 20 | { 21 | "enabled": false, 22 | "matchPackageNames": [ 23 | "/^cgr.dev//" 24 | ] 25 | }, 26 | { 27 | "matchPackageNames": [ 28 | "docker.elastic.co/wolfi/python" 29 | ], 30 | "matchCurrentValue": "/3\\.11(-dev)?/", 31 | "matchUpdateTypes": [ 32 | "major", 33 | "minor" 34 | ], 35 | "enabled": false 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /.buildkite/diff: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Produces a list of changed files between two commits (works for merges and 3 | # regular commits). 4 | # Used in conjunction with the monorepo-diff-buildkite-plugin to determine 5 | # which pipelines to upload/trigger based on the files changed. 6 | 7 | [ $# -lt 1 ] && { echo "argument is missing."; exit 1; } 8 | 9 | COMMIT=$1 10 | 11 | if [ -n "$BUILDKITE_PULL_REQUEST_BASE_BRANCH" ]; then 12 | HEAD_BRANCH="origin/$BUILDKITE_PULL_REQUEST_BASE_BRANCH" 13 | MERGE_BASE=$(git merge-base "$HEAD_BRANCH" "$COMMIT") 14 | echo "Checking against a base branch: $BUILDKITE_PULL_REQUEST_BASE_BRANCH with merge base at $MERGE_BASE" 15 | git diff --raw "$MERGE_BASE".."$COMMIT" | awk '{print $6; if($7) {print $7}}' 16 | else 17 | echo "Checking against the head of the current branch" 18 | git diff --raw HEAD~1 | awk '{print $6; if($7) {print $7}}' 19 | fi 20 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/build_info.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import os 7 | 8 | import yaml 9 | 10 | from connectors import __version__ 11 | 12 | # This references a file that's built in .buildkite/publish/publish-common.sh 13 | # See https://github.com/elastic/connectors/pull/3154 for more info 14 | yaml_path = os.path.join(os.path.dirname(__file__), "build.yaml") 15 | if os.path.exists(yaml_path): 16 | __build_info__ = "" 17 | with open(yaml_path) as f: 18 | data = yaml.safe_load(f) 19 | for key in data: 20 | __build_info__ += f"{key}: {data[key]}\n" 21 | else: 22 | __build_info__ = __version__ 23 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/agent/logger.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import logging 7 | 8 | import ecs_logging 9 | 10 | root_logger = logging.getLogger("agent_component") 11 | handler = logging.StreamHandler() 12 | handler.setFormatter(ecs_logging.StdlibFormatter()) 13 | root_logger.addHandler(handler) 14 | root_logger.setLevel(logging.INFO) 15 | 16 | 17 | def get_logger(module): 18 | logger = root_logger.getChild(module) 19 | 20 | if logger.hasHandlers(): 21 | return logger 22 | 23 | logger.addHandler(handler) 24 | 25 | return logger 26 | 27 | 28 | def update_logger_level(log_level): 29 | root_logger.setLevel(log_level) 30 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dir/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | networks: 28 | esnet: 29 | driver: bridge 30 | 31 | volumes: 32 | esdata: 33 | driver: local 34 | -------------------------------------------------------------------------------- /docs/SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Getting Support 2 | 3 | ### Official Support Services 4 | If you have an Elastic subscription, you are entitled to Support services. See our welcome page for [working with our support team](https://www.elastic.co/support/welcome). 5 | 6 | ### Where do I report issues with Connectors? 7 | If something is not working as expected, please open an [issue](https://github.com/elastic/connectors/issues/new). 8 | 9 | ### Where else can I go to get help? 10 | The Ingestion team at Elastic maintains this repository and is happy to help. Try posting your question to the [Elastic discuss forums](https://discuss.elastic.co/c/enterprise-search/84). Be sure to mention that you're using Connectors and also let us know what service type you're trying to use, and any errors/issues you are encountering. You can also find us in the `#search-connectors` channel of the [Elastic Community Slack](http://elasticstack.slack.com). 11 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/shared/google/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .google import ( 7 | GMailClient, 8 | GoogleDirectoryClient, 9 | GoogleServiceAccountClient, 10 | MessageFields, 11 | RetryableAiohttpSession, 12 | UserFields, 13 | load_service_account_json, 14 | remove_universe_domain, 15 | validate_service_account_json, 16 | ) 17 | 18 | __all__ = [ 19 | "GMailClient", 20 | "GoogleDirectoryClient", 21 | "GoogleServiceAccountClient", 22 | "MessageFields", 23 | "RetryableAiohttpSession", 24 | "UserFields", 25 | "load_service_account_json", 26 | "remove_universe_domain", 27 | "validate_service_account_json", 28 | ] 29 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_cloud_storage/mocker.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | # ruff: noqa: T201 7 | """Module responsible for mocking POST call to Google Cloud Storage Data Source""" 8 | 9 | from flask import Flask 10 | 11 | app = Flask(__name__) 12 | 13 | 14 | @app.route("/token", methods=["POST"]) 15 | def post_auth_token(): 16 | """Function to load""" 17 | return { 18 | "access_token": "XXXXXXStBkRnGyZ2mUYOLgls7QVBxOg82XhBCFo8UIT5gM", 19 | "token_type": "Bearer", 20 | "expires_in": 3600, 21 | "refresh_token": "XXXXXX3SEBX7F2cfrHcqJEa3KoAHYeXES6nmho", 22 | } 23 | 24 | 25 | if __name__ == "__main__": 26 | app.run(host="0.0.0.0", port=4444) 27 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/agent/pipelines/salesforce_pipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ingest pipeline for salesforce content", 3 | "processors": [ 4 | { 5 | "remove": { 6 | "field": [ 7 | "AccountId", 8 | "version_url", 9 | "version_number", 10 | "ConvertedAccount", 11 | "ConvertedContact", 12 | "ConvertedOpportunity", 13 | "Feeds.CommentCount", 14 | "Feeds.attributes", 15 | "FeedComments.attributes", 16 | "FeedComments.ParentId", 17 | "FeedComments.IsDeleted", 18 | "FeedComments.totalSize", 19 | "PhotoUrl", 20 | "StageName", 21 | "OwnerId", 22 | "content_size", 23 | "linked_ids" 24 | ], 25 | "description": "removes fields that are not needed in the index", 26 | "ignore_failure": true, 27 | "ignore_missing": true 28 | } 29 | } 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/s3/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | s3: 28 | image: motoserver/moto 29 | ports: 30 | - 5001:5000 31 | 32 | networks: 33 | esnet: 34 | driver: bridge 35 | 36 | volumes: 37 | esdata: 38 | driver: local 39 | -------------------------------------------------------------------------------- /scripts/stack/stop-stack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | 7 | if ! which docker-compose > /dev/null; then 8 | echo "Could not find 'docker-compose'. Make sure it is installed and available via your PATH" 9 | exit 2 10 | fi 11 | 12 | source $CURDIR/set-env.sh 13 | compose_file=$CURDIR/docker/docker-compose.yml 14 | 15 | . $CURDIR/parse-params.sh 16 | parse_params $@ 17 | eval set -- "$parsed_params" 18 | 19 | echo "Stopping running containers..." 20 | if [ "${remove_volumes:-}" == true ]; then 21 | echo "... also removing data volumes..." 22 | docker-compose -f $compose_file down -v 23 | else 24 | docker-compose -f $compose_file down 25 | fi 26 | 27 | if [ "${reset_config:-}" == true ]; then 28 | config_path="$PROJECT_ROOT/scripts/stack/connectors-config" 29 | echo "... removing configuration file..." 30 | if [ -d "$config_path" ]; then 31 | rm -rf "$config_path" 32 | fi 33 | fi 34 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/network_drive/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .datasource import NASDataSource 7 | from .netdrive import ( 8 | ClientPermissionException, 9 | InvalidLogonHoursException, 10 | NetworkDriveAdvancedRulesValidator, 11 | NoLogonServerException, 12 | PasswordChangeRequiredException, 13 | SecurityInfo, 14 | SMBSession, 15 | UserAccountDisabledException, 16 | ) 17 | 18 | __all__ = [ 19 | "ClientPermissionException", 20 | "InvalidLogonHoursException", 21 | "NetworkDriveAdvancedRulesValidator", 22 | "NoLogonServerException", 23 | "PasswordChangeRequiredException", 24 | "SecurityInfo", 25 | "SMBSession", 26 | "UserAccountDisabledException", 27 | "NASDataSource", 28 | ] 29 | -------------------------------------------------------------------------------- /.buildkite/run_notice_check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | source .buildkite/shared.sh 7 | 8 | init_python 9 | 10 | if is_pr && ! is_fork; then 11 | echo 'Running on a PR that is not a fork, will commit changes' 12 | 13 | export GH_TOKEN="$VAULT_GITHUB_TOKEN" 14 | source .buildkite/publish/git-setup.sh 15 | make notice 16 | 17 | if [ -z "$(git status --porcelain | grep NOTICE.txt)" ]; then 18 | echo 'Nothing changed' 19 | exit 0 20 | else 21 | echo 'New changes to NOTICE.txt:' 22 | git --no-pager diff 23 | 24 | git status --porcelain | grep app/connectors_service/NOTICE.txt && git add app/connectors_service/NOTICE.txt 25 | git status --porcelain | grep libs/connectors_sdk/NOTICE.txt && git add libs/connectors_sdk/NOTICE.txt 26 | git commit -m "Update NOTICE.txt" 27 | git push 28 | 29 | exit 1 30 | fi 31 | else 32 | echo 'Skipping autofix' 33 | make notice 34 | exit 0 35 | fi 36 | -------------------------------------------------------------------------------- /app/connectors_service/tests/agent/test_cli.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | import os 8 | import signal 9 | from unittest.mock import AsyncMock, patch 10 | 11 | from connectors.agent.cli import main 12 | 13 | 14 | @patch("connectors.agent.cli.ConnectorsAgentComponent", return_value=AsyncMock()) 15 | def test_main_responds_to_sigterm(patch_component): 16 | async def kill(): 17 | await asyncio.sleep(0.2) 18 | os.kill(os.getpid(), signal.SIGTERM) 19 | 20 | loop = asyncio.new_event_loop() 21 | loop.create_task(kill()) 22 | 23 | # No asserts here. 24 | # main() will block forever unless it's killed with a signal 25 | # This test succeeds if it exits, if it hangs it'll be killed by a timeout 26 | main() 27 | 28 | loop.close() 29 | -------------------------------------------------------------------------------- /.buildkite/publish/build-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ######## 4 | # Builds the docker image and saves it to an archive file 5 | # so it can be stored as an artifact in Buildkite 6 | ######## 7 | 8 | set -exu 9 | set -o pipefail 10 | 11 | if [[ "${ARCHITECTURE:-}" == "" ]]; then 12 | echo "!! ARCHITECTURE is not set. Exiting." 13 | exit 2 14 | fi 15 | 16 | # Load our common environment variables for publishing 17 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 18 | source $CURDIR/publish-common.sh 19 | 20 | pushd $PROJECT_ROOT 21 | 22 | # set our complete tag name and build the image 23 | TAG_NAME="$BASE_TAG_NAME:${DOCKER_TAG_VERSION}-${ARCHITECTURE}" 24 | docker build -f $DOCKERFILE_PATH -t $TAG_NAME . 25 | 26 | # save the image to an archive file 27 | OUTPUT_PATH="$PROJECT_ROOT/.artifacts" 28 | OUTPUT_FILE="$OUTPUT_PATH/${DOCKER_ARTIFACT_KEY}-${DOCKER_TAG_VERSION}-${ARCHITECTURE}.tar.gz" 29 | mkdir -p $OUTPUT_PATH 30 | docker save $TAG_NAME | gzip > $OUTPUT_FILE 31 | 32 | popd 33 | -------------------------------------------------------------------------------- /libs/connectors_sdk/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_mode = auto 3 | addopts = 4 | -v 5 | filterwarnings = 6 | error 7 | ; botocore has this warning that is reported by them to be irrelevant 8 | ignore:.*urllib3.contrib.pyopenssl.*:DeprecationWarning:botocore.* 9 | ; latest main of aioresponses does not have this problem, but current package uses deprecated pkg_resources API 10 | ignore:.*pkg_resources.*:DeprecationWarning 11 | ; SQLAlchemy uses deprecated APIs internally 12 | ignore:.*dbapi().*:DeprecationWarning 13 | ; aiogoogle inherits on top of AioHttpSession, which is not recommended by aiohttp 14 | ignore:Inheritance class AiohttpSession from ClientSession is discouraged:DeprecationWarning 15 | ; aiogoogle inherits on top of RetryableAioHttpSession, which is not recommended by aiohttp 16 | ignore:Inheritance class RetryableAiohttpSession from ClientSession is discouraged:DeprecationWarning 17 | ; pytest may generate its own warnings in some situations, such as improper usage or deprecated features. 18 | ignore::pytest.PytestUnraisableExceptionWarning 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve. 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Bug Description 11 | 12 | 13 | ### To Reproduce 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | ## Expected behavior 21 | 22 | 23 | ## Screenshots 24 | 26 | 27 | ## Environment 28 | 29 | 30 | 31 | - OS: [e.g. iOS] 32 | - Browser [e.g. chrome, safari] 33 | - Version [e.g. 22] 34 | 35 | 36 | ## Additional context 37 | 39 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mssql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mssql: 28 | container_name: mssql 29 | image: mcr.microsoft.com/azure-sql-edge:latest 30 | environment: 31 | ACCEPT_EULA: Y 32 | MSSQL_SA_PASSWORD: Password_123 33 | ports: 34 | - 9090:1433 35 | 36 | networks: 37 | esnet: 38 | 39 | volumes: 40 | esdata: 41 | driver: local 42 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/oracle/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | oracle: 28 | image: container-registry.oracle.com/database/free:latest 29 | ports: 30 | - 1521:1521 31 | environment: 32 | - ORACLE_PWD=Password_123 33 | restart: always 34 | 35 | networks: 36 | esnet: 37 | driver: bridge 38 | 39 | volumes: 40 | esdata: 41 | driver: local 42 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/azure_blob_storage/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | azureblobstorage: 28 | image: mcr.microsoft.com/azure-storage/azurite 29 | ports: 30 | - 10000:10000 31 | command: ["azurite-blob","--blobHost","0.0.0.0","--blobPort","10000"] 32 | 33 | networks: 34 | esnet: 35 | driver: bridge 36 | 37 | volumes: 38 | esdata: 39 | driver: local 40 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/network_drive/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | samba: 28 | image: dperson/samba 29 | networks: 30 | - default 31 | ports: 32 | - "445:445/tcp" 33 | restart: unless-stopped 34 | command: '-s "Folder1;/mnt;yes;no;yes;admin" -u "admin;abc@123" -p' 35 | 36 | networks: 37 | esnet: 38 | default: 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/test_directory.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import pytest 7 | 8 | from connectors.sources.directory import DEFAULT_DIR, DirectoryDataSource 9 | from tests.sources.support import assert_basics, create_source 10 | 11 | 12 | @pytest.mark.asyncio 13 | async def test_basics(): 14 | await assert_basics(DirectoryDataSource, "directory", DEFAULT_DIR) 15 | 16 | 17 | @pytest.mark.asyncio 18 | async def test_get_docs(catch_stdout): 19 | async with create_source(DirectoryDataSource) as source: 20 | num = 0 21 | async for doc, dl in source.get_docs(): 22 | num += 1 23 | if doc["path"].endswith("__init__.py"): 24 | continue 25 | data = await dl(doit=True, timestamp="xx") 26 | if data is not None: 27 | assert len(data["_attachment"]) > 0 28 | if num > 100: 29 | break 30 | 31 | assert num > 3 32 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/redis/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms512m -Xmx512m 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | redis: 28 | container_name: redis 29 | image: redis:latest 30 | volumes: 31 | - redis:/data 32 | networks: 33 | - redis-network 34 | ports: 35 | - 6379:6379 36 | restart: always 37 | 38 | networks: 39 | redis-network: 40 | driver: bridge 41 | esnet: 42 | 43 | volumes: 44 | esdata: 45 | driver: local 46 | redis: 47 | driver: local 48 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/box/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | box: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.box.fixture 32 | ports: 33 | - "9092:9092" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/salesforce/connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "configuration": { 3 | "client_id": { 4 | "label": "Client ID", 5 | "type": "str", 6 | "value": "1234" 7 | }, 8 | "client_secret": { 9 | "label": "Client Secret", 10 | "type": "str", 11 | "value": "abcd" 12 | }, 13 | "domain": { 14 | "label": "Domain", 15 | "type": "str", 16 | "value": "fake.sandbox" 17 | }, 18 | "use_text_extraction_service": { 19 | "default_value": null, 20 | "depends_on": [], 21 | "display": "toggle", 22 | "label": "Use text extraction service", 23 | "options": [], 24 | "order": 7, 25 | "required": true, 26 | "sensitive": false, 27 | "tooltip": "Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction.", 28 | "type": "bool", 29 | "ui_restrictions": [], 30 | "validations": [], 31 | "value": false 32 | } 33 | } 34 | } -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/jira/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | jira: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.jira.fixture 32 | ports: 33 | - "8080:8080" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/tests/fixtures/config.yml: -------------------------------------------------------------------------------- 1 | 2 | elasticsearch.host: http://nowhere.com:9200 3 | elasticsearch: 4 | user: elastic 5 | password: ${elasticsearch.password} 6 | bulk: 7 | queue_max_size: 1024 8 | chunck_size: 250 9 | max_wait_duration: 1 10 | initial_backoff_duration: 0 11 | backoff_multiplier: 0 12 | 13 | service: 14 | idling: 0.5 15 | heartbeat: 300 16 | max_errors: 20 17 | max_errors_span: 600 18 | max_concurrent_content_syncs: 10 19 | max_concurrent_access_control_syncs: 10 20 | log_level: INFO 21 | 22 | connectors: 23 | - 24 | connector_id: '1' 25 | 26 | sources: 27 | fake: tests.fake_sources:FakeSource 28 | fake_with_incremental: tests.fake_sources:FakeSourceWithIncrementalSync 29 | large_fake: tests.fake_sources:LargeFakeSource 30 | fail_once: tests.fake_sources:FailsThenWork 31 | fake_ts: tests.fake_sources:FakeSourceTS 32 | filtering_state_valid: tests.fake_sources:FakeSourceFilteringValid 33 | filtering_state_invalid: tests.fake_sources:FakeSourceFilteringStateInvalid 34 | filtering_state_edited: tests.fake_sources:FakeSourceFilteringStateEdited 35 | filtering_errors_present: tests.fake_sources:FakeSourceFilteringErrorsPresent 36 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dropbox/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | dropbox: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.dropbox.fixture 32 | ports: 33 | - "8085:8085" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/github/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | github: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.github.fixture 32 | ports: 33 | - "9091:9091" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/graphql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | graphql: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.graphql.fixture 32 | ports: 33 | - "9094:9094" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/notion/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | notion: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.notion.fixture 32 | ports: 33 | - "9096:9096" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/atlassian/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.access_control import prefix_identity 7 | 8 | 9 | def prefix_account_id(account_id): 10 | return prefix_identity("account_id", account_id) 11 | 12 | 13 | def prefix_group_id(group_id): 14 | return prefix_identity("group_id", group_id) 15 | 16 | 17 | def prefix_role_key(role_key): 18 | return prefix_identity("role_key", role_key) 19 | 20 | 21 | def prefix_account_name(account_name): 22 | return prefix_identity("name", account_name.replace(" ", "-")) 23 | 24 | 25 | def prefix_account_email(email): 26 | return prefix_identity("email_address", email) 27 | 28 | 29 | def prefix_account_locale(locale): 30 | return prefix_identity("locale", locale) 31 | 32 | 33 | def prefix_user(user): 34 | if not user: 35 | return 36 | return prefix_identity("user", user) 37 | 38 | 39 | def prefix_group(group): 40 | return prefix_identity("group", group) 41 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/confluence/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | confluence: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.confluence.fixture 32 | ports: 33 | - "9696:9696" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | networks: 39 | esnet: 40 | driver: bridge 41 | 42 | volumes: 43 | esdata: 44 | driver: local 45 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/dropbox/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from .client import DropboxClient 7 | from .common import ( 8 | AUTHENTICATED_ADMIN_URL, 9 | ENDPOINTS, 10 | FILE, 11 | FOLDER, 12 | MAX_CONCURRENT_DOWNLOADS, 13 | PAPER, 14 | REQUEST_BATCH_SIZE, 15 | RETRY_COUNT, 16 | InvalidClientCredentialException, 17 | InvalidPathException, 18 | InvalidRefreshTokenException, 19 | ) 20 | from .datasource import DropboxDataSource 21 | from .validator import DropBoxAdvancedRulesValidator 22 | 23 | __all__ = [ 24 | "DropboxClient", 25 | "DropboxDataSource", 26 | "DropBoxAdvancedRulesValidator", 27 | "AUTHENTICATED_ADMIN_URL", 28 | "ENDPOINTS", 29 | "FILE", 30 | "FOLDER", 31 | "MAX_CONCURRENT_DOWNLOADS", 32 | "PAPER", 33 | "REQUEST_BATCH_SIZE", 34 | "RETRY_COUNT", 35 | "InvalidPathException", 36 | "InvalidClientCredentialException", 37 | "InvalidRefreshTokenException", 38 | ] 39 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/servicenow/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms512m -Xmx512m 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | servicenow: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.servicenow.fixture 32 | ports: 33 | - "9318:9318" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | volumes: 39 | esdata: 40 | driver: local 41 | 42 | networks: 43 | esnet: 44 | driver: bridge 45 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/box/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import os 8 | 9 | FINISHED = "FINISHED" 10 | 11 | ENDPOINTS = { 12 | "TOKEN": "/oauth2/token", 13 | "PING": "/2.0/users/me", 14 | "FOLDER": "/2.0/folders/{folder_id}/items", 15 | "CONTENT": "/2.0/files/{file_id}/content", 16 | "USERS": "/2.0/users", 17 | } 18 | RETRIES = 3 19 | RETRY_INTERVAL = 2 20 | CHUNK_SIZE = 1024 21 | FETCH_LIMIT = 1000 22 | QUEUE_MEM_SIZE = 5 * 1024 * 1024 # ~ 5 MB 23 | MAX_CONCURRENCY = 2000 24 | MAX_CONCURRENT_DOWNLOADS = 15 25 | FIELDS = "name,modified_at,size,type,sequence_id,etag,created_at,modified_at,content_created_at,content_modified_at,description,created_by,modified_by,owned_by,parent,item_status" 26 | FILE = "file" 27 | BOX_FREE = "box_free" 28 | BOX_ENTERPRISE = "box_enterprise" 29 | 30 | refresh_token = None 31 | 32 | if "BOX_BASE_URL" in os.environ: 33 | BASE_URL = os.environ.get("BOX_BASE_URL") 34 | else: 35 | BASE_URL = "https://api.box.com" 36 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/gitlab/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | gitlab: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.gitlab.fixture 32 | ports: 33 | - "9091:9091" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | networks: 38 | - esnet 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/zoom/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | zoom: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.zoom.fixture 32 | ports: 33 | - 10971:10971 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sandfly/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | sandfly: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.sandfly.fixture 32 | ports: 33 | - "8080:8080" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/onedrive/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | onedrive: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.onedrive.fixture 32 | ports: 33 | - 10972:10972 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/postgresql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | postgresql: 28 | container_name: postgresql 29 | image: postgres:14.1-alpine 30 | environment: 31 | POSTGRES_USER: admin 32 | POSTGRES_PASSWORD: Password_123 33 | POSTGRES_DB: xe 34 | PGDATA: /var/lib/postgresql/data/pgdata 35 | ports: 36 | - 9090:5432 37 | command: ["-c", "track_commit_timestamp=on"] 38 | restart: always 39 | 40 | networks: 41 | esnet: 42 | 43 | volumes: 44 | esdata: 45 | driver: local 46 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/salesforce/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | salesforce: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.salesforce.fixture 32 | ports: 33 | - "10338:10338" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /.buildkite/run_linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | source .buildkite/shared.sh 7 | 8 | init_python 9 | 10 | if is_pr && ! is_fork; then 11 | echo "We're on PR, running autoformat" 12 | 13 | export GH_TOKEN="$VAULT_GITHUB_TOKEN" 14 | source .buildkite/publish/git-setup.sh 15 | 16 | if ! make autoformat ; then 17 | echo "make autoformat ran with errors, exiting" 18 | exit 1 19 | fi 20 | 21 | if [ -z "$(git status --porcelain)" ]; then 22 | echo "Nothing to be fixed by autoformat" 23 | else 24 | 25 | git --no-pager diff 26 | echo "linting errors are fixed, pushing the diff" 27 | 28 | git add . 29 | git commit -m"make autoformat" 30 | git push 31 | # exit 1 to re-trigger the build 32 | exit 1 33 | fi 34 | 35 | echo "Running type checking" 36 | if ! make typecheck ; then 37 | echo "Type checking failed" 38 | exit 1 39 | fi 40 | else 41 | echo "We're not on PR or running against a fork, running only linter" 42 | # On non-PR branches the bot has no permissions to open PRs. 43 | # Theoretically this would never fail because we always ask 44 | # linter to succeed to merge. It can fail intermittently? 45 | make lint 46 | fi 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_drive/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | google_drive: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.google_drive.fixture 32 | ports: 33 | - "10339:10339" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /.buildkite/publish_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | sudo apt-get update 7 | sudo DEBIAN_FRONTEND=noninteractive apt-get install ca-certificates curl gnupg lsb-release -y 8 | 9 | BASEDIR=$(realpath $(dirname $0)) 10 | ROOT=$(realpath $BASEDIR/../) 11 | 12 | cd $ROOT 13 | 14 | # docker snapshot publication 15 | echo "Building the image" 16 | make docker-build 17 | 18 | # !!! WARNING be cautious about the following lines, to avoid leaking the secrets in the CI logs 19 | 20 | set +x # Do not remove so we don't leak passwords 21 | VAULT_ADDR=${VAULT_ADDR:-https://vault-ci-prod.elastic.dev} 22 | VAULT_USER="docker-swiftypeadmin" 23 | echo "Fetching Docker credentials for '$VAULT_USER' from Vault..." 24 | DOCKER_USER=$(vault read -address "${VAULT_ADDR}" -field user_20230609 secret/ci/elastic-connectors/${VAULT_USER}) 25 | DOCKER_PASSWORD=$(vault read -address "${VAULT_ADDR}" -field secret_20230609 secret/ci/elastic-connectors/${VAULT_USER}) 26 | echo "Done!" 27 | echo 28 | 29 | echo "Logging into Docker as '$DOCKER_USER'..." 30 | docker login -u "${DOCKER_USER}" -p ${DOCKER_PASSWORD} docker.elastic.co 31 | echo "Done!" 32 | echo 33 | echo "Pushing the image to docker.elastic.co" 34 | make docker-push 35 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/microsoft_teams/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | microsoft_teams: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.microsoft_teams.fixture 32 | ports: 33 | - 10971:10971 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sharepoint_server/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | sharepoint: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.sharepoint_server.fixture 32 | ports: 33 | - "8491:8491" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/sharepoint_online/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | sharepoint_online: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.sharepoint_online.fixture 32 | ports: 33 | - 10337:10337 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | environment: 38 | - DATA_SIZE=${DATA_SIZE} 39 | 40 | volumes: 41 | esdata: 42 | driver: local 43 | 44 | networks: 45 | esnet: 46 | driver: bridge 47 | -------------------------------------------------------------------------------- /.buildkite/shared.sh: -------------------------------------------------------------------------------- 1 | init_python() { 2 | source ~/.bash_profile 3 | 4 | pyenv global $PYTHON_VERSION 5 | echo "Python version:" 6 | pyenv global 7 | } 8 | 9 | retry() { 10 | local retries=$1; shift 11 | local delay=$1; shift 12 | local attempts=1 13 | 14 | until "$@"; do 15 | retry_exit_status=$? 16 | echo "Exited with $retry_exit_status" >&2 17 | if (( retries == "0" )); then 18 | return $retry_exit_status 19 | elif (( attempts == retries )); then 20 | echo "Failed $attempts retries" >&2 21 | return $retry_exit_status 22 | else 23 | echo "Retrying $((retries - attempts)) more times..." >&2 24 | attempts=$((attempts + 1)) 25 | sleep "$delay" 26 | fi 27 | done 28 | } 29 | 30 | is_pr() { 31 | if [ -z "$BUILDKITE_PULL_REQUEST" ] || [ "$BUILDKITE_PULL_REQUEST" = "false" ]; then 32 | echo "Running against a non-PR change" 33 | return 1 # false 34 | else 35 | echo "Running against a PR" 36 | return 0 # true 37 | fi 38 | } 39 | 40 | is_fork() { 41 | if [ "$BUILDKITE_PULL_REQUEST_REPO" = "https://github.com/elastic/connectors.git" ]; then 42 | echo "Running against real connectors repo" 43 | return 1 # false 44 | else 45 | echo "Running against a fork" 46 | return 0 # true 47 | fi 48 | } 49 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mysql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mysql: 28 | container_name: mysql 29 | image: mysql:8.3 30 | environment: 31 | MYSQL_ROOT_PASSWORD: changeme 32 | command: --default-authentication-plugin=mysql_native_password 33 | volumes: 34 | - mysql:/data/mysql 35 | networks: 36 | - mysql-network 37 | ports: 38 | - 3306:3306 39 | restart: always 40 | 41 | 42 | 43 | 44 | 45 | networks: 46 | mysql-network: 47 | driver: bridge 48 | esnet: 49 | 50 | volumes: 51 | esdata: 52 | driver: local 53 | mysql: 54 | driver: local 55 | -------------------------------------------------------------------------------- /.buildkite/publish/push-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ######## 4 | # Pushes the docker image to the docker registry 5 | ######## 6 | 7 | set -exu 8 | set -o pipefail 9 | 10 | if [[ "${ARCHITECTURE:-}" == "" ]]; then 11 | echo "!! ARCHITECTURE is not set. Exiting." 12 | exit 2 13 | fi 14 | 15 | # Load our common environment variables for publishing 16 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | source $CURDIR/publish-common.sh 18 | 19 | # Load the image from the artifact created in build-docker.sh 20 | echo "Loading image from archive file..." 21 | docker load < "$PROJECT_ROOT/.artifacts/${DOCKER_ARTIFACT_KEY}-${DOCKER_TAG_VERSION}-${ARCHITECTURE}.tar.gz" 22 | 23 | # ensure +x is set to avoid writing any sensitive information to the console 24 | set +x 25 | 26 | # Log into Docker 27 | echo "Logging into docker..." 28 | DOCKER_USER=$(vault read -address "${VAULT_ADDR}" -field user_20230609 secret/ci/elastic-connectors/${VAULT_USER}) 29 | vault read -address "${VAULT_ADDR}" -field secret_20230609 secret/ci/elastic-connectors/${VAULT_USER} | \ 30 | docker login -u $DOCKER_USER --password-stdin docker.elastic.co 31 | 32 | # Set our tag name and push the image 33 | TAG_NAME="$BASE_TAG_NAME:${DOCKER_TAG_VERSION}-${ARCHITECTURE}" 34 | echo "Pushing image to docker with tag: $TAG_NAME" 35 | docker push $TAG_NAME 36 | -------------------------------------------------------------------------------- /libs/connectors_sdk/.ruff.toml: -------------------------------------------------------------------------------- 1 | target-version = "py310" 2 | 3 | [lint] 4 | select = ["A", "ASYNC", "I", "E", "F", "B", "C4", "T10", "T20", "EM", "ISC", "S", "CPY001"] 5 | ignore = ["E501", "ISC001"] 6 | preview = true 7 | 8 | # Allow autofix for all enabled rules (when `--fix`) is provided. 9 | fixable = ["A", "B", "C", "C4", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT", "T10", "T20"] 10 | unfixable = [] 11 | 12 | exclude = [ 13 | ".git", 14 | "__pycache__", 15 | "lib", 16 | "bin", 17 | "include" 18 | ] 19 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 20 | 21 | [lint.per-file-ignores] 22 | "connectors/*" = ["S608"] 23 | "scripts/verify.py" = [ "EM" ] 24 | "tests/*" = ["B017", "S101", "S", "ASYNC110"] 25 | "tests/conftest.py" = [ "EM" ] 26 | 27 | [lint.isort] 28 | known-first-party=["connectors_sdk", "tests"] 29 | 30 | [lint.flake8-copyright] 31 | notice-rgx = "#\n# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n# or more contributor license agreements. Licensed under the Elastic License 2.0;\n# you may not use this file except in compliance with the Elastic License 2.0.\n#" 32 | 33 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mongodb/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mongo: 28 | container_name: mongo 29 | image: mongo:latest 30 | volumes: 31 | - mongo:/data/db 32 | networks: 33 | - mongo-network 34 | ports: 35 | - 27021:27017 36 | restart: always 37 | environment: 38 | # provide your credentials here 39 | - MONGO_INITDB_ROOT_USERNAME=admin 40 | - MONGO_INITDB_ROOT_PASSWORD=justtesting 41 | 42 | 43 | 44 | 45 | networks: 46 | mongo-network: 47 | driver: bridge 48 | esnet: 49 | 50 | volumes: 51 | esdata: 52 | driver: local 53 | mongo: 54 | driver: local 55 | -------------------------------------------------------------------------------- /.github/workflows/scripts/label_community_issues.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import aiohttp 4 | import asyncio 5 | import os 6 | from gidgethub.aiohttp import GitHubAPI 7 | from gidgethub import BadRequest 8 | 9 | ACTOR = os.getenv("ACTOR") 10 | NUMBER = os.getenv("NUMBER") 11 | REPO = os.getenv("REPO") 12 | GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") 13 | 14 | LABELS = ["community-driven", "needs-triage"] 15 | 16 | async def main(): 17 | async with aiohttp.ClientSession() as session: 18 | gh = GitHubAPI(session, requester="", base_url="https://api.github.com", oauth_token=GITHUB_TOKEN) 19 | 20 | print("********") 21 | print(f"ACTOR: {ACTOR}") 22 | print(f"NUMBER: {NUMBER}") 23 | print(f"REPO: {REPO}") 24 | print("********") 25 | 26 | try: 27 | # this API returns a None response, but will raise if the user isn't a collaborator 28 | await gh.getitem(f"/repos/{REPO}/collaborators/{ACTOR}") 29 | print("User is a collaborator, not applying labels.") 30 | except BadRequest as e: 31 | # if this fails we want it to be noisy, so no try/except 32 | print("User is not a collaborator, applying labels...") 33 | await gh.post(f"/repos/{REPO}/issues/{NUMBER}/labels", data={"labels": LABELS}) 34 | 35 | if __name__ == "__main__": 36 | asyncio.run(main()) 37 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | name: Backport PR 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - labeled 9 | - closed 10 | 11 | jobs: 12 | backport: 13 | if: | 14 | github.event.pull_request.merged == true 15 | && contains(github.event.pull_request.labels.*.name, 'auto-backport') 16 | && ( 17 | (github.event.action == 'labeled' && github.event.label.name == 'auto-backport') 18 | || (github.event.action == 'closed') 19 | ) 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Checkout Actions 23 | uses: actions/checkout@v2 24 | with: 25 | repository: 'swiftype/kibana-github-actions' 26 | ref: main 27 | path: ./actions 28 | 29 | - name: Install Actions 30 | run: npm install --production --prefix ./actions 31 | 32 | - name: Run Backport 33 | uses: ./actions/backport 34 | with: 35 | github_token: ${{ secrets.GITHUB_TOKEN }} 36 | approver_token: ${{ secrets.REPO_SCOPED_TOKEN }} 37 | auto_approve: 'true' 38 | commit_user: elastic 39 | commit_email: ent-search-backport@users.noreply.github.com 40 | auto_merge: 'true' 41 | auto_merge_method: 'squash' 42 | manual_backport_command_template: 'backport --pr %pullNumber% --autoMerge --autoMergeMethod squash' 43 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/google_cloud_storage/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | elasticsearch: 5 | image: ${ELASTICSEARCH_DRA_DOCKER_IMAGE} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms2g -Xmx2g 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | gcs-mocker: 28 | build: 29 | context: ../../../../ 30 | dockerfile: ${DOCKERFILE_FTEST_PATH} 31 | command: .venv/bin/python -m tests.sources.fixtures.google_cloud_storage.mocker 32 | ports: 33 | - "4444:4444" 34 | volumes: 35 | - .:/python-flask 36 | restart: always 37 | 38 | google_cloud_storage: 39 | container_name: google_cloud_storage 40 | image: fsouza/fake-gcs-server 41 | ports: 42 | - "4443:4443" 43 | command: ["-scheme", "http", "-port", "4443"] 44 | 45 | networks: 46 | esnet: 47 | 48 | volumes: 49 | esdata: 50 | driver: local 51 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/support.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from contextlib import asynccontextmanager 7 | 8 | from connectors_sdk.config import DataSourceFrameworkConfig 9 | from connectors_sdk.source import DEFAULT_CONFIGURATION, DataSourceConfiguration 10 | 11 | 12 | @asynccontextmanager 13 | async def create_source(klass, **extras): 14 | config = klass.get_default_configuration() 15 | for k, v in extras.items(): 16 | if k in config: 17 | config[k].update({"value": v}) 18 | else: 19 | config[k] = DEFAULT_CONFIGURATION.copy() | {"value": v} 20 | 21 | source = klass(configuration=DataSourceConfiguration(config)) 22 | data_source_config = DataSourceFrameworkConfig(5 * 1024 * 1024) 23 | 24 | source.set_framework_config(data_source_config) 25 | try: 26 | yield source 27 | finally: 28 | await source.close() 29 | 30 | 31 | async def assert_basics(klass, field, value): 32 | config = DataSourceConfiguration(klass.get_default_configuration()) 33 | assert config[field] == value 34 | async with create_source(klass) as source: 35 | await source.ping() 36 | await source.changed() 37 | -------------------------------------------------------------------------------- /docs/sync-rules/s3.md: -------------------------------------------------------------------------------- 1 | ### Setting up the Amazon S3 connector 2 | 3 | See the [Developer guide](../../docs/DEVELOPING.md) for setting up connectors. 4 | 5 | ### Overview 6 | 7 | Advanced Sync Rules help manage data in Amazon S3 buckets. They take the following parameters: 8 | 9 | 1. `bucket`: This is the Amazon S3 bucket the rule applies to. 10 | 2. `extension` (optional): Lists which file types to sync. Defaults to syncing all types. 11 | 3. `prefix` (optional): String of prefix characters. The connector will index files and folder data that matches the string. Defaults to `""` (i.e. sync all bucket objects). 12 | 13 | ### Advanced sync rules examples 14 | 15 | ### Indexing files and folders recursively by prefix 16 | 17 | #### Example 18 | Fetch files/folders in `folder1/docs`. 19 | 20 | ```json 21 | [ 22 | { 23 | "bucket": "bucket1", 24 | "prefix": "folder1/docs" 25 | } 26 | 27 | ] 28 | 29 | ``` 30 | 31 | #### Example 32 | 33 | Fetch files/folder starting with `folder1`. 34 | 35 | ```json 36 | [ 37 | { 38 | "bucket": "bucket2", 39 | "prefix": "folder1" 40 | } 41 | ] 42 | 43 | ``` 44 | 45 | ### Indexing files and folders by specifying extensions 46 | 47 | #### Example 48 | 49 | Fetch all objects which start with `abc` and then filter using file extensions. 50 | 51 | ```json 52 | [ 53 | { 54 | "bucket": "bucket2", 55 | "prefix": "abc", 56 | "extension": [".txt", ".png"] 57 | } 58 | ] 59 | 60 | ``` 61 | -------------------------------------------------------------------------------- /libs/connectors_sdk/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "elasticsearch-connectors-sdk" 7 | description = "Elasticsearch Connectors SDK" 8 | dynamic = ["version"] 9 | readme = "README.md" 10 | authors = [ 11 | { name = "Search Extract and Transform Team", email = "enterprise-search-ingestion-team@elastic.co" } 12 | ] 13 | license = "Elastic-2.0" 14 | classifiers = [ 15 | "Programming Language :: Python", 16 | "Programming Language :: Python :: 3.10", 17 | "Programming Language :: Python :: 3.11", 18 | "Programming Language :: Python :: 3 :: Only", 19 | ] 20 | requires-python = ">=3.10,<3.12" 21 | dependencies = [ 22 | "aiofiles==23.2.1", 23 | "aiohttp==3.12.14", 24 | "base64io==1.0.3", 25 | "fastjsonschema==2.16.2", 26 | "ecs-logging==2.0.0", 27 | ] 28 | 29 | [project.optional-dependencies] 30 | tests = [ 31 | "aioresponses==0.7.6", 32 | "faker==18.11.2", 33 | "freezegun==1.2.2", 34 | "ruff==0.6.3", 35 | "pytest==7.4.0", 36 | "pytest-cov==4.1.0", 37 | "pytest-asyncio==0.21.1", 38 | "pytest-mock==3.11.1", 39 | "pytest-randomly==3.13.0", 40 | "pytest-fail-slow==0.3.0", 41 | "pyright==1.1.317", 42 | "requests==2.32.4", 43 | ] 44 | 45 | [tool.setuptools.packages.find] 46 | where = ["."] 47 | include = ["connectors_sdk*"] 48 | 49 | [tool.setuptools.dynamic] 50 | version = {file = "connectors_sdk/VERSION"} 51 | 52 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/agent/cli.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | import functools 8 | import signal 9 | 10 | from elastic_agent_client.util.async_tools import ( 11 | sleeps_for_retryable, 12 | ) 13 | 14 | from connectors.agent.component import ConnectorsAgentComponent 15 | from connectors.agent.logger import get_logger 16 | 17 | logger = get_logger("cli") 18 | 19 | 20 | def main(args=None): 21 | """Script entry point into running Connectors Service on Agent. 22 | 23 | It initialises an event loop, creates a component and runs the component. 24 | Additionally, signals are handled for graceful termination of the component. 25 | """ 26 | loop = asyncio.get_event_loop() 27 | logger.info("Running agent") 28 | component = ConnectorsAgentComponent() 29 | 30 | def _shutdown(signal_name): 31 | sleeps_for_retryable.cancel(signal_name) 32 | component.stop(signal_name) 33 | 34 | for sig in (signal.SIGINT, signal.SIGTERM): 35 | loop.add_signal_handler(sig, functools.partial(_shutdown, sig.name)) 36 | 37 | return loop.run_until_complete(component.run()) 38 | 39 | 40 | if __name__ == "__main__": 41 | try: 42 | main() 43 | finally: 44 | logger.info("Bye") 45 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/services/content_sync_job_execution.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from functools import cached_property 8 | 9 | from connectors.protocol import JobStatus, JobType 10 | from connectors.services.job_execution import JobExecutionService 11 | 12 | 13 | class ContentSyncJobExecutionService(JobExecutionService): 14 | name = "sync_content" 15 | 16 | def __init__(self, config): 17 | super().__init__(config, "content_sync_job_execution_service") 18 | 19 | @cached_property 20 | def display_name(self): 21 | return "content sync job execution" 22 | 23 | @cached_property 24 | def max_concurrency_config(self): 25 | return "service.max_concurrent_content_syncs" 26 | 27 | @cached_property 28 | def job_types(self): 29 | return [JobType.FULL.value, JobType.INCREMENTAL.value] 30 | 31 | @cached_property 32 | def max_concurrency(self): 33 | return self.service_config.get("max_concurrent_content_syncs") 34 | 35 | def should_execute(self, connector, sync_job): 36 | if connector.last_sync_status == JobStatus.IN_PROGRESS: 37 | sync_job.log_debug("Connector is still syncing content, skip the job...") 38 | return False 39 | 40 | return True 41 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/README.md: -------------------------------------------------------------------------------- 1 | e2e fixtures 2 | ------------ 3 | 4 | Each fixture needs to implement the following: 5 | 6 | - create a directory here that matches the service type 7 | - add in it the following files: 8 | 9 | - config.yml 10 | - fixture.py 11 | - requirements.txt 12 | - docker-compose.yml 13 | - connector.json 14 | 15 | config.yml 16 | ========== 17 | 18 | The config file necessary to run the connector for the ftest. 19 | Specifically, this must set the `connector_id` and `service_type` for the connector. 20 | Other configuration changes are optional. 21 | 22 | fixture.py 23 | ========== 24 | 25 | This file may contain four functions (all optional): 26 | 27 | - load -- loads data in the backend 28 | - remove -- removes random data in the backend 29 | - setup -- called before the docker is started 30 | - teardown -- called after the docker has been torn down 31 | 32 | requirements.txt 33 | ================ 34 | 35 | pip requirements. Lists all libs needed for `fixture.py` to run 36 | 37 | docker-compose.yml 38 | ================== 39 | 40 | A Docker compose file that needs to run the whole stack: 41 | 42 | - Elasticsearch 43 | - Kibana 44 | - Enterprise Search 45 | - Any backend server like MySQL 46 | 47 | connector.json 48 | ========== 49 | 50 | This file should be a JSON representation of the connector’s `configuration`, with the schema populated as it would appear in an Elastic document See the [example connector.json file](../fixtures/sharepoint_online/connector.json) for reference. 51 | -------------------------------------------------------------------------------- /scripts/stack/parse-params.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | parse_params() { 4 | update_images=false 5 | remove_volumes=false 6 | no_connectors=false 7 | connectors_only=false 8 | bypass_config=false 9 | reset_config=-false 10 | use_snapshot=true 11 | watch_logs=false 12 | 13 | #Boilerplate parameter parsing 14 | PARAMS="" 15 | while (( "$#" )); do 16 | case "$1" in 17 | -n|--no-connectors) 18 | no_connectors=true 19 | shift 1 20 | ;; 21 | -x|--no-configuration) 22 | bypass_config=true 23 | shift 1 24 | ;; 25 | -c|--connectors-only) 26 | connectors_only=true 27 | shift 1 28 | ;; 29 | -u|--update-images) 30 | update_images=true 31 | shift 1 32 | ;; 33 | -v|--remove-volumes) 34 | remove_volumes=true 35 | shift 1 36 | ;; 37 | -r|--reset-configuration) 38 | reset_config=true 39 | shift 1 40 | ;; 41 | -s|--no-snapshot) 42 | use_snapshot=false 43 | shift 1 44 | ;; 45 | -w|--watch-logs) 46 | watch_logs=true 47 | shift 1 48 | ;; 49 | --) # end argument parsing 50 | shift 51 | break 52 | ;; 53 | -*|--*=) # unsupported flags 54 | echo "Error: Unsupported flag $1" >&2 55 | exit 1 56 | ;; 57 | *) # preserve positional arguments 58 | PARAMS="$PARAMS $1" 59 | shift 60 | ;; 61 | esac 62 | done 63 | 64 | 65 | parsed_params=$PARAMS 66 | } 67 | -------------------------------------------------------------------------------- /app/connectors_service/tests/agent/test_component.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | from unittest.mock import MagicMock, patch 8 | 9 | import pytest 10 | 11 | from connectors.agent.component import ConnectorsAgentComponent 12 | 13 | 14 | class StubMultiService: 15 | def __init__(self): 16 | self.running_stop = asyncio.Event() 17 | self.has_ran = False 18 | self.has_shutdown = False 19 | 20 | async def run(self): 21 | self.has_ran = True 22 | self.running_stop.clear() 23 | await self.running_stop.wait() 24 | 25 | def shutdown(self, sig): 26 | self.has_shutdown = True 27 | self.running_stop.set() 28 | 29 | 30 | @pytest.mark.asyncio 31 | @patch("connectors.agent.component.MultiService", return_value=StubMultiService()) 32 | @patch("connectors.agent.component.new_v2_from_reader", return_value=MagicMock()) 33 | async def test_try_update_without_auth_data( 34 | stub_multi_service, patch_new_v2_from_reader 35 | ): 36 | component = ConnectorsAgentComponent() 37 | 38 | async def stop_after_timeout(): 39 | await asyncio.sleep(0.1) 40 | component.stop("SIGINT") 41 | 42 | await asyncio.gather(component.run(), stop_after_timeout()) 43 | 44 | assert stub_multi_service.has_ran 45 | assert stub_multi_service.has_shutdown 46 | -------------------------------------------------------------------------------- /scripts/stack/copy-config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONFIG_PATH=${1:-} 4 | 5 | if [[ ${CURDIR:-} == "" ]]; then 6 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 7 | fi 8 | source $CURDIR/set-env.sh 9 | 10 | config_dir="$PROJECT_ROOT/scripts/stack/connectors-config" 11 | script_config="$config_dir/config.yml" 12 | 13 | if [ -f "$script_config" ]; then 14 | echo "config.yml already exists in $config_dir. Not overwriting." 15 | return 0 16 | fi 17 | 18 | is_example_config=false 19 | if [[ "${CONFIG_PATH:-}" == "" ]]; then 20 | cp -n "$PROJECT_ROOT/config.yml.example" "$PROJECT_ROOT/config.yml" 21 | CONFIG_PATH="$PROJECT_ROOT/config.yml" 22 | is_example_config=true 23 | fi 24 | 25 | mkdir -p "$config_dir" 26 | cp "$CONFIG_PATH" "$script_config" 27 | echo "copied config from $CONFIG_PATH to $config_dir" 28 | 29 | if [[ "$is_example_config" == true ]]; then 30 | export CONFIG_FILE="$script_config" 31 | sed_cmd="sed -i" 32 | if [[ "$MACHINE_OS" == "MacOS" || "$MACHINE_OS" == "FreeBSD" ]]; then 33 | sed_cmd="sed -i -e" 34 | fi 35 | $sed_cmd '/connectors:/s/^#//g' "$script_config" 36 | $sed_cmd '/elasticsearch.host/s/^#//g' "$script_config" 37 | $sed_cmd '/elasticsearch.username/s/^#//g' "$script_config" 38 | $sed_cmd '/elasticsearch.password/s/^#//g' "$script_config" 39 | 40 | if [[ "${ELASTIC_PASSWORD:-}" != "" ]]; then 41 | esc_pass=$(printf '%s' "$ELASTIC_PASSWORD" | sed 's/[&|\\]/\\&/g') 42 | $sed_cmd "/^elasticsearch\.password:/s|:.*|: ${esc_pass}|" "$script_config"; 43 | fi 44 | fi 45 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/sharepoint/sharepoint_online/validator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import fastjsonschema 8 | from connectors_sdk.filtering.validation import ( 9 | AdvancedRulesValidator, 10 | SyncRuleValidationResult, 11 | ) 12 | 13 | 14 | class SharepointOnlineAdvancedRulesValidator(AdvancedRulesValidator): 15 | SCHEMA_DEFINITION = { 16 | "type": "object", 17 | "properties": { 18 | "skipExtractingDriveItemsOlderThan": {"type": "integer"}, # in Days 19 | }, 20 | "additionalProperties": False, 21 | } 22 | 23 | SCHEMA = fastjsonschema.compile(definition=SCHEMA_DEFINITION) 24 | 25 | async def validate(self, advanced_rules): 26 | try: 27 | SharepointOnlineAdvancedRulesValidator.SCHEMA(advanced_rules) 28 | 29 | return SyncRuleValidationResult.valid_result( 30 | rule_id=SyncRuleValidationResult.ADVANCED_RULES 31 | ) 32 | except fastjsonschema.JsonSchemaValueException as e: 33 | return SyncRuleValidationResult( 34 | rule_id=SyncRuleValidationResult.ADVANCED_RULES, 35 | is_valid=False, 36 | validation_message=f"{e.message}. Make sure advanced filtering rules follow the following schema: {SharepointOnlineAdvancedRulesValidator.SCHEMA_DEFINITION['properties']}", 37 | ) 38 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/notion/connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "configuration": { 3 | "notion_secret_key": { 4 | "display": "text", 5 | "label": "Notion Secret Key", 6 | "order": 1, 7 | "required": true, 8 | "sensitive": true, 9 | "type": "str", 10 | "value": "secret_1234" 11 | }, 12 | "databases": { 13 | "label": "List of Databases", 14 | "display": "text", 15 | "order": 2, 16 | "required": true, 17 | "type": "list", 18 | "value": "*" 19 | }, 20 | "pages": { 21 | "label": "List of Pages", 22 | "display": "text", 23 | "order": 3, 24 | "required": true, 25 | "type": "list", 26 | "value": "*" 27 | }, 28 | "index_comments": { 29 | "display": "toggle", 30 | "label": "Enable indexing comments", 31 | "order": 4, 32 | "tooltip": "Enabling this would significantly degrade the connector performance due to increased amount of network calls to the source", 33 | "type": "bool", 34 | "value": false 35 | }, 36 | "concurrent_downloads": { 37 | "default_value": 20, 38 | "display": "numeric", 39 | "label": "Maximum concurrent downloads", 40 | "order": 6, 41 | "required": false, 42 | "type": "int", 43 | "ui_restrictions": [ 44 | "advanced" 45 | ] 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /.buildkite/test_python_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # !!! WARNING DO NOT add -x to avoid leaking vault passwords 4 | set -euo pipefail 5 | 6 | source .buildkite/shared.sh 7 | 8 | init_python 9 | 10 | python -m pip install --upgrade build twine 11 | python -m build "$PACKAGE_PATH" 12 | ls -lah "$PACKAGE_PATH/dist/" 13 | python -m twine check "$PACKAGE_PATH/dist/*" 14 | 15 | # If this is the connectors_service package, test the installation and CLI 16 | if [[ "$PACKAGE_PATH" == *app/connectors_service* ]]; then 17 | echo "Testing connectors_service package installation and CLI..." 18 | 19 | # Install the connectors_sdk package first 20 | LIB_PATH="libs/connectors_sdk" 21 | python -m build "$LIB_PATH" 22 | python -m pip install "$LIB_PATH"/dist/*.whl 23 | 24 | python -m pip install "$PACKAGE_PATH"/dist/*.whl 25 | connectors --help 26 | elastic-ingest --help 27 | # elastic-agent-connectors --help 28 | test-connectors --help 29 | else 30 | python -m pip install "$PACKAGE_PATH"/dist/*.whl 31 | python -c "import connectors_sdk; print(f'🎉 Success! connectors_sdk version: {connectors_sdk.__version__}')" 32 | fi 33 | 34 | if [[ "${PYTHON_VERSION:-}" == "${DRA_PYTHON_VERSION:-}" ]]; then 35 | if [[ "$PACKAGE_PATH" == *app/connectors_service* ]]; then 36 | buildkite-agent artifact upload 'app/connectors_service/dist/*.whl' 37 | buildkite-agent artifact upload 'app/connectors_service/dist/*.tar.gz' 38 | elif [[ "$PACKAGE_PATH" == *libs/connectors_sdk* ]]; then 39 | buildkite-agent artifact upload 'libs/connectors_sdk/dist/*.whl' 40 | buildkite-agent artifact upload 'libs/connectors_sdk/dist/*.tar.gz' 41 | fi 42 | fi 43 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/github/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from enum import Enum 8 | 9 | WILDCARD = "*" 10 | BLOB = "blob" 11 | FILE = "file" 12 | GITHUB_CLOUD = "github_cloud" 13 | GITHUB_SERVER = "github_server" 14 | PERSONAL_ACCESS_TOKEN = "personal_access_token" # noqa: S105 15 | GITHUB_APP = "github_app" 16 | PULL_REQUEST_OBJECT = "pullRequest" 17 | REPOSITORY_OBJECT = "repository" 18 | 19 | RETRIES = 3 20 | RETRY_INTERVAL = 2 21 | FORBIDDEN = 403 22 | UNAUTHORIZED = 401 23 | NODE_SIZE = 100 24 | REVIEWS_COUNT = 45 25 | 26 | SUPPORTED_EXTENSION = [".markdown", ".md", ".rst"] 27 | 28 | FILE_SCHEMA = { 29 | "name": "name", 30 | "size": "size", 31 | "type": "type", 32 | "path": "path", 33 | "mode": "mode", 34 | "extension": "extension", 35 | "_timestamp": "_timestamp", 36 | } 37 | PATH_SCHEMA = { 38 | "name": "name", 39 | "size": "size", 40 | "type": "type", 41 | "path": "path", 42 | "extension": "extension", 43 | "_timestamp": "_timestamp", 44 | } 45 | 46 | 47 | class ObjectType(Enum): 48 | REPOSITORY = "Repository" 49 | ISSUE = "Issue" 50 | PULL_REQUEST = "Pull request" 51 | PR = "pr" 52 | BRANCH = "branch" 53 | PATH = "path" 54 | 55 | 56 | class UnauthorizedException(Exception): 57 | pass 58 | 59 | 60 | class NoInstallationAccessTokenException(Exception): 61 | pass 62 | 63 | 64 | class ForbiddenException(Exception): 65 | pass 66 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/onedrive/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | 8 | import os 9 | 10 | from connectors_sdk.logger import logger 11 | 12 | RETRIES = 3 13 | RETRY_INTERVAL = 2 14 | DEFAULT_RETRY_SECONDS = 30 15 | FETCH_SIZE = 999 16 | DEFAULT_PARALLEL_CONNECTION_COUNT = 15 17 | REQUEST_TIMEOUT = 300 18 | FILE = "file" 19 | FOLDER = "folder" 20 | 21 | USERS = "users" 22 | GROUPS = "groups" 23 | PERMISSIONS = "permissions" 24 | DELTA = "delta" 25 | PING = "ping" 26 | BATCH = "batch" 27 | ITEM_FIELDS = "id,name,lastModifiedDateTime,content.downloadUrl,createdDateTime,size,webUrl,parentReference,file,folder" 28 | 29 | ENDPOINTS = { 30 | PING: "drives", 31 | USERS: "users", 32 | GROUPS: "users/{user_id}/transitiveMemberOf", 33 | PERMISSIONS: "users/{user_id}/drive/items/{item_id}/permissions", 34 | DELTA: "users/{user_id}/drive/root/delta", 35 | BATCH: "$batch", 36 | } 37 | 38 | GRAPH_API_MAX_BATCH_SIZE = 20 39 | 40 | if "OVERRIDE_URL" in os.environ: 41 | logger.warning("x" * 50) 42 | logger.warning( 43 | f"ONEDRIVE CONNECTOR CALLS ARE REDIRECTED TO {os.environ['OVERRIDE_URL']}" 44 | ) 45 | logger.warning("IT'S SUPPOSED TO BE USED ONLY FOR TESTING") 46 | logger.warning("x" * 50) 47 | override_url = os.environ["OVERRIDE_URL"] 48 | BASE_URL = override_url 49 | GRAPH_API_AUTH_URL = override_url 50 | else: 51 | BASE_URL = "https://graph.microsoft.com/v1.0/" 52 | GRAPH_API_AUTH_URL = "https://login.microsoftonline.com" 53 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/outlook/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | """Microsoft Outlook source module is responsible to fetch documents from Outlook server or cloud platforms.""" 7 | 8 | from datetime import date 9 | 10 | import exchangelib 11 | 12 | from connectors.access_control import prefix_identity 13 | 14 | 15 | def ews_format_to_datetime(source_datetime, timezone): 16 | """Change datetime format to user account timezone 17 | Args: 18 | datetime: Datetime in UTC format 19 | timezone: User account timezone 20 | Returns: 21 | Datetime: Date format as user account timezone 22 | """ 23 | if isinstance(source_datetime, exchangelib.ewsdatetime.EWSDateTime) and isinstance( 24 | timezone, exchangelib.ewsdatetime.EWSTimeZone 25 | ): 26 | return (source_datetime.astimezone(timezone)).strftime("%Y-%m-%dT%H:%M:%SZ") 27 | elif isinstance(source_datetime, exchangelib.ewsdatetime.EWSDate) or isinstance( 28 | source_datetime, date 29 | ): 30 | return source_datetime.strftime("%Y-%m-%d") 31 | else: 32 | return source_datetime 33 | 34 | 35 | def _prefix_email(email): 36 | return prefix_identity("email", email) 37 | 38 | 39 | def _prefix_display_name(user): 40 | return prefix_identity("name", user) 41 | 42 | 43 | def _prefix_user_id(user_id): 44 | return prefix_identity("user_id", user_id) 45 | 46 | 47 | def _prefix_job(job_title): 48 | return prefix_identity("job_title", job_title) 49 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/services/access_control_sync_job_execution.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from functools import cached_property 7 | 8 | from connectors.protocol import JobStatus, JobType 9 | from connectors.services.job_execution import JobExecutionService 10 | 11 | 12 | class AccessControlSyncJobExecutionService(JobExecutionService): 13 | name = "sync_access_control" 14 | 15 | def __init__(self, config): 16 | super().__init__(config, "access_control_sync_job_execution_service") 17 | 18 | @cached_property 19 | def display_name(self): 20 | return "access control sync job execution" 21 | 22 | @cached_property 23 | def max_concurrency_config(self): 24 | return "service.max_concurrent_access_control_syncs" 25 | 26 | @cached_property 27 | def job_types(self): 28 | return JobType.ACCESS_CONTROL.value 29 | 30 | @cached_property 31 | def max_concurrency(self): 32 | return self.service_config.get("max_concurrent_access_control_syncs") 33 | 34 | def should_execute(self, connector, sync_job): 35 | if not connector.features.document_level_security_enabled(): 36 | sync_job.log_debug("DLS is not enabled for the connector, skip the job...") 37 | return False 38 | 39 | if connector.last_access_control_sync_status == JobStatus.IN_PROGRESS: 40 | sync_job.log_debug( 41 | "Connector is still syncing access control, skip the job..." 42 | ) 43 | return False 44 | 45 | return True 46 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/gmail/validator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import fastjsonschema 8 | from connectors_sdk.filtering.validation import ( 9 | AdvancedRulesValidator, 10 | SyncRuleValidationResult, 11 | ) 12 | from fastjsonschema import JsonSchemaValueException 13 | 14 | 15 | class GMailAdvancedRulesValidator(AdvancedRulesValidator): 16 | MESSAGES_SCHEMA_DEFINITION = { 17 | "type": "array", 18 | "items": {"type": "string"}, 19 | "minItems": 1, 20 | } 21 | 22 | SCHEMA_DEFINITION = { 23 | "type": "object", 24 | "properties": {"messages": MESSAGES_SCHEMA_DEFINITION}, 25 | "additionalProperties": False, 26 | } 27 | 28 | SCHEMA = fastjsonschema.compile( 29 | definition=SCHEMA_DEFINITION, 30 | ) 31 | 32 | async def validate(self, advanced_rules): 33 | if len(advanced_rules) == 0: 34 | return SyncRuleValidationResult.valid_result( 35 | SyncRuleValidationResult.ADVANCED_RULES 36 | ) 37 | 38 | try: 39 | GMailAdvancedRulesValidator.SCHEMA(advanced_rules) 40 | 41 | return SyncRuleValidationResult.valid_result( 42 | rule_id=SyncRuleValidationResult.ADVANCED_RULES 43 | ) 44 | except JsonSchemaValueException as e: 45 | return SyncRuleValidationResult( 46 | rule_id=SyncRuleValidationResult.ADVANCED_RULES, 47 | is_valid=False, 48 | validation_message=e.message, 49 | ) 50 | -------------------------------------------------------------------------------- /scripts/stack/set-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "${CURDIR:-}" == "" ]]; then 4 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | fi 6 | 7 | function realpath { 8 | echo "$(cd "$(dirname "$1")"; pwd)"/"$(basename "$1")"; 9 | } 10 | 11 | if [[ "${CURDIR:-}" != "" && "${PROJECT_ROOT:-}" == "" ]]; then 12 | SCRIPT_DIR=$(realpath "$(dirname "$CURDIR")") 13 | export PROJECT_ROOT=$(realpath "$(dirname "$SCRIPT_DIR")") 14 | echo "set PROJECT_ROOT to $PROJECT_ROOT" 15 | fi 16 | 17 | if [[ "${CONNECTORS_VERSION:-}" == "" ]]; then 18 | SET_CONNECTORS_VERSION=`head -1 $PROJECT_ROOT/connectors/VERSION` 19 | else 20 | SET_CONNECTORS_VERSION="$CONNECTORS_VERSION" 21 | fi 22 | 23 | SET_STACK_VERSION="$SET_CONNECTORS_VERSION" 24 | 25 | if [ "$use_snapshot" == true ]; then 26 | SET_CONNECTORS_VERSION="$SET_CONNECTORS_VERSION-SNAPSHOT" 27 | SET_STACK_VERSION="$SET_STACK_VERSION-SNAPSHOT" 28 | fi 29 | 30 | if [ -z "$ELASTICSEARCH_VERSION" ] 31 | then 32 | export ELASTICSEARCH_VERSION="$SET_STACK_VERSION" 33 | fi 34 | echo "ELASTICSEARCH_VERSION=$ELASTICSEARCH_VERSION" 35 | 36 | if [ -z "$KIBANA_VERSION" ] 37 | then 38 | export KIBANA_VERSION="$SET_STACK_VERSION" 39 | fi 40 | echo "KIBANA_VERSION=$KIBANA_VERSION" 41 | 42 | if [ -z "$CONNECTORS_VERSION" ] 43 | then 44 | export CONNECTORS_VERSION="$SET_CONNECTORS_VERSION" 45 | fi 46 | echo "CONNECTORS_VERSION=$CONNECTORS_VERSION" 47 | 48 | if [ -z "$ELASTIC_PASSWORD" ] 49 | then 50 | export ELASTIC_PASSWORD="changeme" 51 | fi 52 | 53 | uname_value=`uname` 54 | case "${uname_value:none}" in 55 | Linux*) machine_os="Linux";; 56 | Darwin*) machine_os="MacOS";; 57 | FreeBSD*) machine_os="FreeBSD";; 58 | *) machine_os="UNKNOWN:${uname_value}" 59 | esac 60 | export MACHINE_OS="$machine_os" 61 | -------------------------------------------------------------------------------- /scripts/stack/docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | networks: 4 | elastic: 5 | name: 'elastic' 6 | external: true 7 | 8 | services: 9 | elasticsearch: 10 | image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION} 11 | environment: 12 | - "discovery.type=single-node" 13 | - "ES_JAVA_OPTS=-Xms2048m -Xmx2048m" 14 | - "xpack.security.enabled=true" 15 | - "xpack.security.authc.api_key.enabled=true" 16 | - "xpack.security.authc.token.enabled=true" 17 | - "ELASTIC_PASSWORD=${ELASTIC_PASSWORD}" 18 | - "action.destructive_requires_name=false" 19 | ulimits: 20 | memlock: 21 | soft: -1 22 | hard: -1 23 | networks: 24 | - elastic 25 | ports: 26 | - 9200:9200 27 | volumes: 28 | - conn-es-data:/usr/share/elasticsearch/data 29 | 30 | kibana: 31 | image: docker.elastic.co/kibana/kibana:${KIBANA_VERSION} 32 | ports: 33 | - 5601:5601 34 | depends_on: 35 | - elasticsearch 36 | networks: 37 | - elastic 38 | environment: 39 | ELASTICSEARCH_URL: http://elasticsearch:9200 40 | ELASTICSEARCH_HOSTS: http://elasticsearch:9200 41 | ELASTICSEARCH_USERNAME: kibana_system 42 | ELASTICSEARCH_PASSWORD: ${ELASTIC_PASSWORD} 43 | XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY: c69548d9027afcf4d55146b1d425a9f4c69548d9027afcf4d55146b1d425a9f4 44 | 45 | elastic-connectors: 46 | image: docker.elastic.co/integrations/elastic-connectors:${CONNECTORS_VERSION} 47 | depends_on: 48 | - elasticsearch 49 | - kibana 50 | volumes: 51 | - ${CURDIR}/connectors-config:/config 52 | command: /app/bin/elastic-ingest -c /config/config.yml 53 | network_mode: "elastic" 54 | 55 | volumes: 56 | conn-es-data: 57 | -------------------------------------------------------------------------------- /.buildkite/publish/build-multiarch-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ######## 4 | # Builds the multiarch docker image and pushes it to the docker registry 5 | ######## 6 | 7 | set -exu 8 | set -o pipefail 9 | 10 | # Load our common environment variables for publishing 11 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 12 | source $CURDIR/publish-common.sh 13 | 14 | # Set our tag name as well as the tag names of the indiividual platform images 15 | TAG_NAME="${BASE_TAG_NAME}:${DOCKER_TAG_VERSION}" 16 | AMD64_TAG="${BASE_TAG_NAME}:${DOCKER_TAG_VERSION}-amd64" 17 | ARM64_TAG="${BASE_TAG_NAME}:${DOCKER_TAG_VERSION}-arm64" 18 | 19 | # Pull the images from the registry 20 | buildah pull $AMD64_TAG 21 | buildah pull $ARM64_TAG 22 | 23 | # ensure +x is set to avoid writing any sensitive information to the console 24 | set +x 25 | 26 | DOCKER_PASSWORD=$(vault read -address "${VAULT_ADDR}" -field secret_20230609 secret/ci/elastic-connectors/${VAULT_USER}) 27 | 28 | # Log into Docker 29 | echo "Logging into docker..." 30 | DOCKER_USER=$(vault read -address "${VAULT_ADDR}" -field user_20230609 secret/ci/elastic-connectors/${VAULT_USER}) 31 | vault read -address "${VAULT_ADDR}" -field secret_20230609 secret/ci/elastic-connectors/${VAULT_USER} | \ 32 | buildah login --username="${DOCKER_USER}" --password-stdin docker.elastic.co 33 | 34 | # Create the manifest for the multiarch image 35 | echo "Creating manifest..." 36 | buildah manifest create $TAG_NAME \ 37 | $AMD64_TAG \ 38 | $ARM64_TAG 39 | 40 | # ... and push it 41 | echo "Pushing manifest..." 42 | buildah manifest push $TAG_NAME docker://$TAG_NAME 43 | 44 | # Write out the final manifest for debugging purposes 45 | echo "Built and pushed multiarch image... dumping final manifest..." 46 | buildah manifest inspect $TAG_NAME 47 | -------------------------------------------------------------------------------- /Dockerfile.agent: -------------------------------------------------------------------------------- 1 | # This file is for internal experimental purposes only. 2 | # Please do not use this file for any real-world workloads. 3 | FROM docker.elastic.co/elastic-agent/elastic-agent:9.3.0-SNAPSHOT 4 | 5 | USER root 6 | 7 | # Install basic dependencies 8 | RUN microdnf update -y && microdnf install -y \ 9 | vim \ 10 | wget \ 11 | git \ 12 | make \ 13 | python3.11 \ 14 | python3.11-pip \ 15 | && microdnf clean all 16 | 17 | 18 | # Install Go-based yq separately 19 | RUN wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq && \ 20 | chmod +x /usr/bin/yq 21 | 22 | # Copy project files 23 | COPY ./ /usr/share/connectors 24 | 25 | # Set working directory 26 | WORKDIR /usr/share/connectors 27 | 28 | # Install Python agent client 29 | RUN PYTHON=python3.11 make clean install install-agent 30 | 31 | # Copy and move the component files into the dynamic agent directory 32 | COPY ./resources/agent/python-elastic-agent-client /tmp/python-elastic-agent-client 33 | COPY ./resources/agent/python-elastic-agent-client.spec.yml /tmp/python-elastic-agent-client.spec.yml 34 | 35 | RUN BUILD_DIR=$(cat /usr/share/elastic-agent/.build_hash.txt | cut -c 1-6) && \ 36 | mv /tmp/python-elastic-agent-client \ 37 | /usr/share/elastic-agent/data/elastic-agent-${BUILD_DIR}/components/python-elastic-agent-client && \ 38 | mv /tmp/python-elastic-agent-client.spec.yml \ 39 | /usr/share/elastic-agent/data/elastic-agent-${BUILD_DIR}/components/python-elastic-agent-client.spec.yml 40 | 41 | # Modify the elastic-agent.yml file 42 | RUN yq eval --inplace '.inputs += { "type": "connectors-py", "id": "connectors-py", "use_output": "default"}' \ 43 | /usr/share/elastic-agent/elastic-agent.yml 44 | 45 | # Set the final working directory 46 | WORKDIR /usr/share/elastic-agent 47 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/cli/auth.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import asyncio 7 | import os 8 | 9 | import yaml 10 | from elasticsearch import ApiError 11 | 12 | from connectors.es.cli_client import CLIClient 13 | 14 | CONFIG_FILE_PATH = ".cli/config.yml" 15 | 16 | 17 | class Auth: 18 | def __init__(self, host, username=None, password=None, api_key=None): 19 | elastic_config = { 20 | "host": host, 21 | "username": username, 22 | "password": password, 23 | "api_key": api_key, 24 | } 25 | 26 | # remove empty values 27 | self.elastic_config = {k: v for k, v in elastic_config.items() if v is not None} 28 | 29 | self.cli_client = CLIClient(self.elastic_config) 30 | 31 | def authenticate(self): 32 | if asyncio.run(self.__ping_es_client()): 33 | self.__save_config() 34 | return True 35 | else: 36 | return False 37 | 38 | def is_config_present(self): 39 | return os.path.isfile(CONFIG_FILE_PATH) 40 | 41 | async def __ping_es_client(self): 42 | try: 43 | return await self.cli_client.ping() 44 | except ApiError: 45 | return False 46 | finally: 47 | await self.cli_client.close() 48 | 49 | def __save_config(self): 50 | yaml_content = yaml.dump({"elasticsearch": self.elastic_config}) 51 | os.makedirs(os.path.dirname(CONFIG_FILE_PATH), exist_ok=True) 52 | 53 | with open(CONFIG_FILE_PATH, "w") as f: 54 | f.write(yaml_content) 55 | -------------------------------------------------------------------------------- /libs/connectors_sdk/Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python3 2 | VENV_DIR ?= .venv/bin 3 | SLOW_TEST_THRESHOLD = 1 4 | 5 | .venv/bin/python: 6 | $(PYTHON) -m venv .venv 7 | $(VENV_DIR)/pip install --upgrade pip 8 | $(VENV_DIR)/python -m pip install build 9 | $(VENV_DIR)/pip install -e . 10 | 11 | install: .venv/bin/python 12 | 13 | install-package: 14 | $(PYTHON) -m venv .venv 15 | $(VENV_DIR)/pip install --upgrade pip 16 | $(VENV_DIR)/python -m pip install build 17 | $(VENV_DIR)/python -m build 18 | $(VENV_DIR)/pip install dist/*.whl 19 | 20 | clean: 21 | rm -rf bin lib .venv include elasticsearch_connectors_sdk.egg-info .coverage site-packages pyvenv.cfg include.site.python*.greenlet dist build htmlcov 22 | 23 | 24 | .venv/bin/pytest: .venv/bin/python 25 | .venv/bin/pip install ".[tests]" 26 | 27 | .venv/bin/ruff: .venv/bin/python 28 | .venv/bin/pip install ".[tests]" 29 | 30 | .venv/bin/pip-licenses: .venv/bin/python 31 | .venv/bin/pip install pip-licenses 32 | 33 | typecheck: install 34 | .venv/bin/pyright connectors_sdk 35 | .venv/bin/pyright tests 36 | 37 | lint: install .venv/bin/ruff typecheck 38 | .venv/bin/ruff check connectors_sdk 39 | .venv/bin/ruff format connectors_sdk --check 40 | .venv/bin/ruff check tests 41 | .venv/bin/ruff format tests --check 42 | 43 | test: .venv/bin/pytest 44 | $(VENV_DIR)/pytest --cov-report term-missing --cov-fail-under 80 --cov-report html --cov=connectors_sdk --fail-slow=$(SLOW_TEST_THRESHOLD) -sv tests 45 | 46 | autoformat: install .venv/bin/ruff 47 | .venv/bin/ruff check connectors_sdk --fix 48 | .venv/bin/ruff format connectors_sdk 49 | .venv/bin/ruff check tests --fix 50 | .venv/bin/ruff format tests 51 | 52 | notice: .venv/bin/python .venv/bin/pip-licenses 53 | .venv/bin/pip-licenses --format=plain-vertical --with-license-file --no-license-path > NOTICE.txt 54 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mongodb/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | # ruff: noqa: T201 7 | import os 8 | from uuid import uuid4 9 | 10 | import bson 11 | from faker import Faker 12 | from pymongo import MongoClient 13 | 14 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 15 | _SIZES = {"small": 750, "medium": 1500, "large": 3000} 16 | NUMBER_OF_RECORDS_TO_DELETE = 50 17 | 18 | fake = Faker() 19 | client = MongoClient( 20 | "mongodb://admin:justtesting@127.0.0.1:27021?uuidRepresentation=standard" 21 | ) 22 | 23 | 24 | async def load(): 25 | def _random_record(): 26 | return { 27 | "id": bson.ObjectId(), 28 | "name": fake.name(), 29 | "address": fake.address(), 30 | "birthdate": fake.date(), 31 | "time": fake.time(), 32 | "comment": fake.sentence(), 33 | "unique_id": uuid4(), 34 | } 35 | 36 | record_number = _SIZES[DATA_SIZE] + NUMBER_OF_RECORDS_TO_DELETE 37 | 38 | print(f"Generating {record_number} random records") 39 | db = client.sample_database 40 | collection = db.sample_collection 41 | 42 | data = [] 43 | for _ in range(record_number): 44 | data.append(_random_record()) 45 | collection.insert_many(data) 46 | 47 | 48 | async def remove(): 49 | db = client.sample_database 50 | collection = db.sample_collection 51 | 52 | records = collection.find().limit(NUMBER_OF_RECORDS_TO_DELETE) 53 | doc_ids = [rec.get("_id") for rec in records] 54 | 55 | query = {"_id": {"$in": doc_ids}} 56 | collection.delete_many(query) 57 | -------------------------------------------------------------------------------- /app/connectors_service/scripts/deps-csv.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import csv 8 | import sys 9 | 10 | # input csv column indices 11 | NAME = 0 12 | VERSION = 1 13 | LICENSE = 2 14 | URL = 3 15 | 16 | 17 | def main(dependencies_csv): 18 | """ 19 | The input is what we get from `pip-licenses --format=csv --with-urls` 20 | See: https://pypi.org/project/pip-licenses/#csv 21 | Unfortunately, our DRA requires a few more columns that `pip-licenses` does not understand. 22 | This function reorders each row. 23 | :param dependencies_csv: 24 | :return: 25 | """ 26 | rows = [] 27 | 28 | # read the csv rows into memory 29 | with open(dependencies_csv) as csv_file: 30 | reader = csv.reader(csv_file) 31 | for row in reader: 32 | rows.append(row) 33 | 34 | # overwrite the original file 35 | with open(dependencies_csv, "w") as csv_file: 36 | writer = csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL) 37 | 38 | # The expected column order (this row is the CSV header) 39 | writer.writerow(["name", "url", "version", "revision", "license", "sourceURL"]) 40 | 41 | # reorder each row using the expected column order. (leaves 'revision' and 'sourceURL' empty) 42 | for row in rows[1:]: # skip the header row 43 | writer.writerow([row[NAME], row[URL], row[VERSION], "", row[LICENSE], ""]) 44 | 45 | 46 | if __name__ == "__main__": 47 | depenencies_csv = sys.argv[1] 48 | print(f"post-processing {depenencies_csv}") # noqa 49 | main(depenencies_csv) 50 | print(f"wrote output to {depenencies_csv}") # noqa 51 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/redis/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | # ruff: noqa: T201 7 | import os 8 | import random 9 | 10 | import redis.asyncio as redis 11 | 12 | from tests.commons import WeightedFakeProvider 13 | 14 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 15 | _NUM_DB = {"small": 2, "medium": 4, "large": 16} 16 | NUM_DB = _NUM_DB[DATA_SIZE] 17 | RECORDS_TO_DELETE = 10 18 | EACH_ROW_ITEMS = 500 19 | ENDPOINT = "redis://localhost:6379/" 20 | 21 | fake_provider = WeightedFakeProvider(weights=[0.65, 0.3, 0.05, 0]) 22 | 23 | 24 | async def inject_lines(redis_client, lines): 25 | text = fake_provider.get_text() 26 | rows = {} 27 | for row_id in range(lines): 28 | key = f"user_{row_id}" 29 | rows[key] = text 30 | await redis_client.mset(rows) 31 | 32 | 33 | async def load(): 34 | """N databases of 500 rows each. each row is ~ 1024*20 bytes""" 35 | redis_client = await redis.from_url(f"{ENDPOINT}") 36 | for db in range(NUM_DB): 37 | print(f"Adding data in {db}...") 38 | await redis_client.execute_command("SELECT", db) 39 | await inject_lines(redis_client, EACH_ROW_ITEMS) 40 | 41 | 42 | async def remove(): 43 | """Removes 10 random items per db""" 44 | redis_client = await redis.from_url(f"{ENDPOINT}") 45 | for db in range(NUM_DB): 46 | print(f"Working on db {db}...") 47 | await redis_client.execute_command("SELECT", db) 48 | keys = [ 49 | f"user_{row_id}" 50 | for row_id in random.sample(range(1, 100), RECORDS_TO_DELETE) 51 | ] 52 | await redis_client.delete(*keys) 53 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/access_control.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | ACCESS_CONTROL = "_allow_access_control" 8 | DLS_QUERY = """{ 9 | "bool": { 10 | "should": [ 11 | { 12 | "bool": { 13 | "must_not": { 14 | "exists": { 15 | "field": "_allow_access_control" 16 | } 17 | } 18 | } 19 | }, 20 | { 21 | "terms": { 22 | "_allow_access_control.enum": {{#toJson}}access_control{{/toJson}} 23 | } 24 | } 25 | ] 26 | } 27 | }""" 28 | 29 | 30 | def prefix_identity(prefix, identity): 31 | if prefix is None or identity is None: 32 | return None 33 | 34 | return f"{prefix}:{identity}" 35 | 36 | 37 | def es_access_control_query(access_control): 38 | # filter out 'None' values 39 | filtered_access_control = list( 40 | filter( 41 | lambda access_control_entity: access_control_entity is not None, 42 | access_control, 43 | ) 44 | ) 45 | 46 | return { 47 | "query": { 48 | "template": { 49 | "params": {"access_control": filtered_access_control}, 50 | "source": DLS_QUERY, 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/oracle/queries.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from connectors.sources.shared.database.generic_database import Queries 8 | 9 | 10 | class OracleQueries(Queries): 11 | """Class contains methods which return query""" 12 | 13 | def ping(self): 14 | """Query to ping source""" 15 | return "SELECT 1+1 FROM DUAL" 16 | 17 | def all_tables(self, **kwargs): 18 | """Query to get all tables""" 19 | return ( 20 | f"SELECT TABLE_NAME FROM all_tables where OWNER = UPPER('{kwargs['user']}')" 21 | ) 22 | 23 | def table_primary_key(self, **kwargs): 24 | """Query to get the primary key""" 25 | return f"SELECT cols.column_name FROM all_constraints cons, all_cons_columns cols WHERE cols.table_name = '{kwargs['table']}' AND cons.constraint_type = 'P' AND cons.constraint_name = cols.constraint_name AND cons.owner = UPPER('{kwargs['user']}') AND cons.owner = cols.owner ORDER BY cols.table_name, cols.position" 26 | 27 | def table_data(self, **kwargs): 28 | """Query to get the table data""" 29 | return f"SELECT * FROM {kwargs['table']}" 30 | 31 | def table_last_update_time(self, **kwargs): 32 | """Query to get the last update time of the table""" 33 | return f"SELECT SCN_TO_TIMESTAMP(MAX(ora_rowscn)) from {kwargs['table']}" 34 | 35 | def table_data_count(self, **kwargs): 36 | """Query to get the number of rows in the table""" 37 | return f"SELECT COUNT(*) FROM {kwargs['table']}" 38 | 39 | def all_schemas(self): 40 | """Query to get all schemas of database""" 41 | pass # Multiple schemas not supported in Oracle 42 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/s3/validator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | import fastjsonschema 7 | from connectors_sdk.filtering.validation import ( 8 | AdvancedRulesValidator, 9 | SyncRuleValidationResult, 10 | ) 11 | from fastjsonschema import JsonSchemaValueException 12 | 13 | 14 | class S3AdvancedRulesValidator(AdvancedRulesValidator): 15 | RULES_OBJECT_SCHEMA_DEFINITION = { 16 | "type": "object", 17 | "properties": { 18 | "bucket": {"type": "string", "minLength": 1}, 19 | "prefix": {"type": "string"}, 20 | "extension": {"type": "array"}, 21 | }, 22 | "required": ["bucket"], 23 | "additionalProperties": False, 24 | } 25 | 26 | SCHEMA_DEFINITION = {"type": "array", "items": RULES_OBJECT_SCHEMA_DEFINITION} 27 | 28 | SCHEMA = fastjsonschema.compile(definition=SCHEMA_DEFINITION) 29 | 30 | def __init__(self, source): 31 | self.source = source 32 | 33 | async def validate(self, advanced_rules): 34 | if len(advanced_rules) == 0: 35 | return SyncRuleValidationResult.valid_result( 36 | SyncRuleValidationResult.ADVANCED_RULES 37 | ) 38 | try: 39 | S3AdvancedRulesValidator.SCHEMA(advanced_rules) 40 | return SyncRuleValidationResult.valid_result( 41 | rule_id=SyncRuleValidationResult.ADVANCED_RULES 42 | ) 43 | except JsonSchemaValueException as e: 44 | return SyncRuleValidationResult( 45 | rule_id=SyncRuleValidationResult.ADVANCED_RULES, 46 | is_valid=False, 47 | validation_message=e.message, 48 | ) 49 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/mssql/queries.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from connectors.sources.shared.database.generic_database import Queries 8 | 9 | # Connector will skip the below tables if it gets from the input 10 | TABLES_TO_SKIP = {"msdb": ["sysutility_ucp_configuration_internal"]} 11 | 12 | 13 | class MSSQLQueries(Queries): 14 | """Class contains methods which return query""" 15 | 16 | def ping(self): 17 | """Query to ping source""" 18 | return "SELECT 1+1" 19 | 20 | def all_tables(self, **kwargs): 21 | """Query to get all tables""" 22 | return f"SELECT table_name FROM information_schema.tables WHERE TABLE_SCHEMA = '{ kwargs['schema'] }'" 23 | 24 | def table_primary_key(self, **kwargs): 25 | """Query to get the primary key""" 26 | return f"SELECT C.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS T JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE C ON C.CONSTRAINT_NAME=T.CONSTRAINT_NAME WHERE C.TABLE_NAME='{kwargs['table']}' and C.TABLE_SCHEMA='{kwargs['schema']}' and T.CONSTRAINT_TYPE='PRIMARY KEY'" 27 | 28 | def table_data(self, **kwargs): 29 | """Query to get the table data""" 30 | return f'SELECT * FROM {kwargs["schema"]}."{kwargs["table"]}"' 31 | 32 | def table_last_update_time(self, **kwargs): 33 | """Query to get the last update time of the table""" 34 | return f"SELECT last_user_update FROM sys.dm_db_index_usage_stats WHERE object_id=object_id('{kwargs['schema']}.{kwargs['table']}')" 35 | 36 | def table_data_count(self, **kwargs): 37 | """Query to get the number of rows in the table""" 38 | return f'SELECT COUNT(*) FROM {kwargs["schema"]}."{kwargs["table"]}"' 39 | 40 | def all_schemas(self): 41 | """Query to get all schemas of database""" 42 | pass 43 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/sharepoint/sharepoint_online/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | from datetime import datetime 8 | 9 | from connectors.access_control import prefix_identity 10 | from connectors.sources.sharepoint.sharepoint_online.constants import ( 11 | EXCLUDED_SHAREPOINT_PATH_SEGMENTS, 12 | TIMESTAMP_FORMAT, 13 | ) 14 | 15 | 16 | class SyncCursorEmpty(Exception): 17 | """Exception class to notify that incremental sync can't run because sync_cursor is empty. 18 | See: https://learn.microsoft.com/en-us/graph/delta-query-overview 19 | """ 20 | 21 | pass 22 | 23 | 24 | def _prefix_group(group): 25 | return prefix_identity("group", group) 26 | 27 | 28 | def _prefix_user(user): 29 | return prefix_identity("user", user) 30 | 31 | 32 | def _prefix_user_id(user_id): 33 | return prefix_identity("user_id", user_id) 34 | 35 | 36 | def _prefix_email(email): 37 | return prefix_identity("email", email) 38 | 39 | 40 | def _get_login_name(raw_login_name): 41 | if raw_login_name and ( 42 | raw_login_name.startswith("i:0#.f|membership|") 43 | or raw_login_name.startswith("c:0o.c|federateddirectoryclaimprovider|") 44 | or raw_login_name.startswith("c:0t.c|tenant|") 45 | ): 46 | parts = raw_login_name.split("|") 47 | 48 | if len(parts) > 2: 49 | return parts[2] 50 | 51 | return None 52 | 53 | 54 | def _parse_created_date_time(created_date_time): 55 | if created_date_time is None: 56 | return None 57 | return datetime.strptime(created_date_time, TIMESTAMP_FORMAT) 58 | 59 | 60 | def _is_excluded_sharepoint_url(url: str) -> bool: 61 | try: 62 | return any( 63 | segment in url.lower() for segment in EXCLUDED_SHAREPOINT_PATH_SEGMENTS 64 | ) 65 | except Exception: 66 | return False 67 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/dir/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | # ruff: noqa: T201 7 | import os 8 | import random 9 | import shutil 10 | import urllib.request 11 | import zipfile 12 | 13 | SYSTEM_DIR = os.path.join(os.path.dirname(__file__), "data") 14 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 15 | 16 | if DATA_SIZE == "small": 17 | REPO = "connectors-python" 18 | elif DATA_SIZE == "medium": 19 | REPO = "elasticsearch" 20 | else: 21 | REPO = "kibana" 22 | 23 | 24 | def get_num_docs(): 25 | match os.environ.get("DATA_SIZE", "medium"): 26 | case "small": 27 | print("100") 28 | case "medium": 29 | print("200") 30 | case _: 31 | print("300") 32 | 33 | 34 | async def load(): 35 | if os.path.exists(SYSTEM_DIR): 36 | teardown() 37 | print(f"Working in {SYSTEM_DIR}") 38 | os.makedirs(SYSTEM_DIR) 39 | repo_zip = os.path.join(SYSTEM_DIR, "repo.zip") 40 | 41 | # lazy tree generator: we download the elasticsearch repo and unzip it 42 | print(f"Downloading some source from {REPO} this may take a while...") 43 | urllib.request.urlretrieve( 44 | f"https://github.com/elastic/{REPO}/zipball/main", repo_zip 45 | ) 46 | 47 | print("Unzipping the tree") 48 | with zipfile.ZipFile(repo_zip) as zip_ref: 49 | zip_ref.extractall(SYSTEM_DIR) 50 | 51 | os.unlink(repo_zip) 52 | 53 | 54 | async def remove(): 55 | # removing 10 files 56 | files = [] 57 | for root, __, filenames in os.walk(SYSTEM_DIR): 58 | for filename in filenames: 59 | files.append(os.path.join(root, filename)) 60 | 61 | random.shuffle(files) 62 | for i in range(10): 63 | print(f"deleting {files[i]}") 64 | os.unlink(files[i]) 65 | 66 | 67 | async def teardown(): 68 | shutil.rmtree(SYSTEM_DIR) 69 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/mongodb_serverless/fixture.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | # ruff: noqa: T201 7 | import os 8 | import shutil 9 | 10 | import bson 11 | from faker import Faker 12 | from pymongo import MongoClient 13 | 14 | DATA_SIZE = os.environ.get("DATA_SIZE", "small").lower() 15 | _SIZES = {"small": 750, "medium": 1500, "large": 3000} 16 | NUMBER_OF_RECORDS_TO_DELETE = 50 17 | 18 | fake = Faker() 19 | client = MongoClient("mongodb://admin:justtesting@127.0.0.1:27021") 20 | OB_STORE = "/tmp/objectstore" 21 | 22 | 23 | async def setup(): 24 | print(f"preparing {OB_STORE}") 25 | # creating the file storage for es 26 | if os.path.exists(OB_STORE): 27 | shutil.rmtree(OB_STORE) 28 | os.makedirs(OB_STORE, exist_ok=True) 29 | for r, _, _ in os.walk(OB_STORE): 30 | os.chmod(r, 0o777) 31 | os.chmod(OB_STORE, 0o777) 32 | print(f"{OB_STORE} ready") 33 | 34 | 35 | async def load(): 36 | def _random_record(): 37 | return { 38 | "id": bson.ObjectId(), 39 | "name": fake.name(), 40 | "address": fake.address(), 41 | "birthdate": fake.date(), 42 | "time": fake.time(), 43 | "comment": fake.sentence(), 44 | } 45 | 46 | record_number = _SIZES[DATA_SIZE] + NUMBER_OF_RECORDS_TO_DELETE 47 | 48 | print(f"Generating {record_number} random records") 49 | db = client.sample_database 50 | collection = db.sample_collection 51 | 52 | data = [] 53 | for _ in range(record_number): 54 | data.append(_random_record()) 55 | collection.insert_many(data) 56 | 57 | 58 | async def remove(): 59 | db = client.sample_database 60 | collection = db.sample_collection 61 | 62 | records = collection.find().limit(NUMBER_OF_RECORDS_TO_DELETE) 63 | doc_ids = [rec.get("_id") for rec in records] 64 | 65 | query = {"_id": {"$in": doc_ids}} 66 | collection.delete_many(query) 67 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/onedrive/validator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | 8 | import fastjsonschema 9 | from connectors_sdk.filtering.validation import ( 10 | AdvancedRulesValidator, 11 | SyncRuleValidationResult, 12 | ) 13 | 14 | 15 | class OneDriveAdvancedRulesValidator(AdvancedRulesValidator): 16 | RULES_OBJECT_SCHEMA_DEFINITION = { 17 | "type": "object", 18 | "properties": { 19 | "skipFilesWithExtensions": { 20 | "type": "array", 21 | "minItems": 1, 22 | "items": {"type": "string"}, 23 | }, 24 | "parentPathPattern": {"type": "string", "minLength": 1}, 25 | "owners": { 26 | "type": "array", 27 | "minItems": 1, 28 | "items": {"type": "string", "format": "email", "minLength": 1}, 29 | }, 30 | }, 31 | "minProperties": 1, 32 | "additionalProperties": False, 33 | } 34 | 35 | SCHEMA_DEFINITION = {"type": "array", "items": RULES_OBJECT_SCHEMA_DEFINITION} 36 | SCHEMA = fastjsonschema.compile(definition=SCHEMA_DEFINITION) 37 | 38 | def __init__(self, source): 39 | self.source = source 40 | 41 | async def validate(self, advanced_rules): 42 | if len(advanced_rules) == 0: 43 | return SyncRuleValidationResult.valid_result( 44 | SyncRuleValidationResult.ADVANCED_RULES 45 | ) 46 | 47 | try: 48 | OneDriveAdvancedRulesValidator.SCHEMA(advanced_rules) 49 | except fastjsonschema.JsonSchemaValueException as e: 50 | return SyncRuleValidationResult( 51 | rule_id=SyncRuleValidationResult.ADVANCED_RULES, 52 | is_valid=False, 53 | validation_message=e.message, 54 | ) 55 | 56 | return SyncRuleValidationResult.valid_result( 57 | SyncRuleValidationResult.ADVANCED_RULES 58 | ) 59 | -------------------------------------------------------------------------------- /libs/connectors_sdk/README.md: -------------------------------------------------------------------------------- 1 | # Connectors SDK 2 | 3 | The Connectors SDK is a framework for writing data connectors. This library is a dependency of the Connectors service found under `app/connectors_service`. 4 | 5 | Furthermore, you can use this SDK as a standalone framework to author simple data source connectors without having to ingest data directly into Elasticsearch. 6 | 7 | ## What's here? 8 | - A `pyproject.toml` file 9 | - The connectors framework code 10 | 11 | ## Simple code example 12 | ```python 13 | from connectors_sdk.source import ( 14 | BaseDataSource, 15 | DataSourceConfiguration 16 | ) 17 | 18 | class CustomDataSource(BaseDataSource): 19 | def __init__(self, configuration): 20 | super().__init__(configuration=configuration) 21 | 22 | @classmethod 23 | def get_default_configuration(cls): 24 | # return a default configuration 25 | return { 26 | "max_doc_count": { 27 | "label": "Maximum nomber of documents", 28 | "order": "1", 29 | "tooltip": "Maximum number of documents to return", 30 | "type":"int", 31 | "value": 1 32 | } 33 | } 34 | 35 | def get_docs(self): 36 | # get your data 37 | data = { 38 | "document_0451": "A shock to the system.", 39 | "document_0452": "A Foundation for knowledge.", 40 | "document_0453": "We CAN count to three.", 41 | "document_0454": "Gather artifacts from anomalies.", 42 | "document_0455": "Security is not optional.", 43 | "document_0456": "The Invicible." 44 | } 45 | 46 | docs_to_return = [] 47 | for k, v in data.items(): 48 | if len(docs_to_return) < self.configuration["max_doc_count"]: 49 | docs_to_return.append(v) 50 | continue 51 | break 52 | 53 | return docs_to_return 54 | 55 | 56 | if __name__ == "__main__": 57 | # Initialize the base configuration. 58 | base_config = {"max_doc_count": 3} 59 | data_source_config = DataSourceConfiguration(base_config) 60 | # Create the CustomDataSource by passing our DataSourceConfiguration object to it 61 | data_source = CustomDataSource(data_source_config) 62 | # Get docs and print it 63 | docs = data_source.get_docs() 64 | print (docs) 65 | ``` 66 | -------------------------------------------------------------------------------- /app/connectors_service/tests/test_access_control.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | 7 | import pytest 8 | 9 | from connectors.access_control import ( 10 | es_access_control_query, 11 | prefix_identity, 12 | ) 13 | 14 | 15 | @pytest.mark.asyncio 16 | async def test_access_control_query(): 17 | access_control = ["user_1"] 18 | access_control_query = es_access_control_query(access_control) 19 | 20 | assert access_control_query == { 21 | "query": { 22 | "template": { 23 | "params": {"access_control": access_control}, 24 | "source": """{ 25 | "bool": { 26 | "should": [ 27 | { 28 | "bool": { 29 | "must_not": { 30 | "exists": { 31 | "field": "_allow_access_control" 32 | } 33 | } 34 | } 35 | }, 36 | { 37 | "terms": { 38 | "_allow_access_control.enum": {{#toJson}}access_control{{/toJson}} 39 | } 40 | } 41 | ] 42 | } 43 | }""", 44 | } 45 | } 46 | } 47 | 48 | 49 | def test_prefix_identity(): 50 | prefix = "prefix" 51 | identity = "identity" 52 | 53 | assert prefix_identity(prefix, identity) == f"{prefix}:{identity}" 54 | 55 | 56 | def test_prefix_identity_with_prefix_none(): 57 | prefix = None 58 | identity = "identity" 59 | 60 | assert prefix_identity(prefix, identity) is None 61 | 62 | 63 | def test_prefix_identity_with_identity_none(): 64 | prefix = "prefix" 65 | identity = None 66 | 67 | assert prefix_identity(prefix, identity) is None 68 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/sources/postgresql/queries.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from connectors.sources.shared.database.generic_database import Queries 7 | 8 | 9 | class PostgreSQLQueries(Queries): 10 | """Class contains methods which return query""" 11 | 12 | def ping(self): 13 | """Query to ping source""" 14 | return "SELECT 1+1" 15 | 16 | def all_tables(self, **kwargs): 17 | """Query to get all tables""" 18 | return f"SELECT table_name FROM information_schema.tables WHERE table_catalog = '{kwargs['database']}' and table_schema = '{kwargs['schema']}'" 19 | 20 | def table_primary_key(self, **kwargs): 21 | """Query to get the primary key""" 22 | return ( 23 | f"SELECT a.attname AS c " 24 | f"FROM pg_index i " 25 | f"JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) " 26 | f"JOIN pg_class t ON t.oid = i.indrelid " 27 | f"JOIN pg_constraint c ON c.conindid = i.indexrelid " 28 | f"WHERE i.indrelid = '\"{kwargs['schema']}\".\"{kwargs['table']}\"'::regclass " 29 | f"AND t.relkind = 'r' " 30 | f"AND c.contype = 'p' " 31 | f"ORDER BY array_position(i.indkey, a.attnum)" 32 | ) 33 | 34 | def table_data(self, **kwargs): 35 | """Query to get the table data""" 36 | return f'SELECT * FROM "{kwargs["schema"]}"."{kwargs["table"]}" ORDER BY {kwargs["columns"]} LIMIT {kwargs["limit"]} OFFSET {kwargs["offset"]}' 37 | 38 | def table_last_update_time(self, **kwargs): 39 | """Query to get the last update time of the table""" 40 | return f'SELECT MAX(pg_xact_commit_timestamp(xmin)) FROM "{kwargs["schema"]}"."{kwargs["table"]}"' 41 | 42 | def table_data_count(self, **kwargs): 43 | """Query to get the number of rows in the table""" 44 | return f'SELECT COUNT(*) FROM "{kwargs["schema"]}"."{kwargs["table"]}"' 45 | 46 | def all_schemas(self): 47 | """Query to get all schemas of database""" 48 | pass 49 | -------------------------------------------------------------------------------- /scripts/stack/configure-connectors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o pipefail 4 | 5 | if [[ ${CURDIR:-} == "" ]]; then 6 | export CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 7 | fi 8 | 9 | source $CURDIR/set-env.sh 10 | 11 | PYTHON_EXECUTABLE="" 12 | 13 | if which python3 > /dev/null; then 14 | PY_VERSION=`python3 --version` 15 | PYTHON_EXECUTABLE="python3" 16 | elif which python > /dev/null; then 17 | PY_VERSION=`python --version` 18 | PYTHON_EXECUTABLE="python" 19 | fi 20 | 21 | pushd $PROJECT_ROOT 22 | if [[ "${CONFIG_FILE:-}" == "" ]]; then 23 | CONFIG_FILE="${PROJECT_ROOT}/scripts/stack/connectors-config/config.yml" 24 | fi 25 | CLI_CONFIG="${PROJECT_ROOT}/scripts/stack/connectors-config/cli_config.yml" 26 | 27 | # ensure our Connectors CLI config exists and has the correct information 28 | if [ ! -f "$CLI_CONFIG" ]; then 29 | cliConfigText=' 30 | elasticsearch: 31 | host: http://localhost:9200 32 | password: '"${ELASTIC_PASSWORD}"' 33 | username: elastic 34 | ' 35 | echo "${cliConfigText}" > "$CLI_CONFIG" 36 | fi 37 | 38 | CONNECTORS_EXE="${PROJECT_ROOT}/bin/connectors" 39 | if [ ! -f "$CONNECTORS_EXE" ]; then 40 | echo "Could not find a connectors executable, running 'make clean install'" 41 | 42 | if [ $PYTHON_EXECUTABLE == "" ]; then 43 | echo "Could not find a suitable Python 3 executable..." 44 | exit 2 45 | fi 46 | 47 | make clean install PYTHON=$PYTHON_EXECUTABLE 48 | fi 49 | 50 | keep_configuring=true 51 | while [ $keep_configuring == true ]; do 52 | echo 53 | echo "Currently configured connectors:" 54 | $CONNECTORS_EXE --config "$CLI_CONFIG" connector list 55 | echo 56 | while true; do 57 | read -p "Do you want to set up a new connector? (y/N) " yn 58 | case $yn in 59 | [yY] ) break;; 60 | [nN] ) keep_configuring=false; break;; 61 | * ) keep_configuring=false; break;; 62 | esac 63 | done 64 | 65 | if [ $keep_configuring == true ]; then 66 | $CONNECTORS_EXE --config "${CLI_CONFIG}" connector create --connector-service-config "$CONFIG_FILE" --update-config 67 | fi 68 | done 69 | popd 70 | -------------------------------------------------------------------------------- /app/connectors_service/tests/es/test_license.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | # 6 | from unittest.mock import Mock 7 | 8 | import pytest 9 | 10 | from connectors.es.license import requires_platinum_license 11 | from connectors.protocol import JobType 12 | 13 | 14 | def mock_source_klass(is_premium): 15 | source_klass = Mock() 16 | source_klass.is_premium = Mock(return_value=is_premium) 17 | 18 | return source_klass 19 | 20 | 21 | def mock_connector(document_level_security_enabled): 22 | connector = Mock() 23 | connector.features = Mock() 24 | connector.features.document_level_security_enabled = Mock( 25 | return_value=document_level_security_enabled 26 | ) 27 | 28 | return connector 29 | 30 | 31 | def mock_sync_job(job_type): 32 | sync_job = Mock() 33 | sync_job.job_type = job_type 34 | 35 | return sync_job 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "job_type, document_level_security_enabled, is_premium", 40 | [ 41 | (JobType.UNSET, False, True), 42 | (JobType.ACCESS_CONTROL, True, False), 43 | (JobType.ACCESS_CONTROL, True, True), 44 | ], 45 | ) 46 | def test_requires_platinum_license( 47 | job_type, document_level_security_enabled, is_premium 48 | ): 49 | sync_job = mock_sync_job(job_type) 50 | connector = mock_connector(document_level_security_enabled) 51 | source_klass = mock_source_klass(is_premium) 52 | 53 | assert requires_platinum_license(sync_job, connector, source_klass) 54 | 55 | 56 | @pytest.mark.parametrize( 57 | "job_type, document_level_security_enabled, is_premium", 58 | [ 59 | (JobType.FULL, True, False), 60 | (JobType.INCREMENTAL, True, False), 61 | (JobType.ACCESS_CONTROL, False, False), 62 | ], 63 | ) 64 | def test_does_not_require_platinum_license( 65 | job_type, document_level_security_enabled, is_premium 66 | ): 67 | sync_job = mock_sync_job(job_type) 68 | connector = mock_connector(document_level_security_enabled) 69 | source_klass = mock_source_klass(is_premium) 70 | 71 | assert not requires_platinum_license(sync_job, connector, source_klass) 72 | -------------------------------------------------------------------------------- /app/connectors_service/connectors/es/language_data.yml: -------------------------------------------------------------------------------- 1 | --- 2 | da: 3 | name: Danish 4 | stemmer: danish 5 | stop_words: _danish_ 6 | de: 7 | name: German 8 | stemmer: light_german 9 | stop_words: _german_ 10 | en: 11 | name: English 12 | stemmer: light_english 13 | stop_words: _english_ 14 | es: 15 | name: Spanish 16 | stemmer: light_spanish 17 | stop_words: _spanish_ 18 | fr: 19 | name: French 20 | stemmer: light_french 21 | stop_words: _french_ 22 | custom_filter_definitions: 23 | fr-elision: 24 | type: elision 25 | articles: 26 | - l 27 | - m 28 | - t 29 | - qu 30 | - n 31 | - s 32 | - j 33 | - d 34 | - c 35 | - jusqu 36 | - quoiqu 37 | - lorsqu 38 | - puisqu 39 | articles_case: true 40 | prepended_filters: 41 | - fr-elision 42 | it: 43 | name: Italian 44 | stemmer: light_italian 45 | stop_words: _italian_ 46 | custom_filter_definitions: 47 | it-elision: 48 | type: elision 49 | articles: 50 | - c 51 | - l 52 | - all 53 | - dall 54 | - dell 55 | - nell 56 | - sull 57 | - coll 58 | - pell 59 | - gl 60 | - agl 61 | - dagl 62 | - degl 63 | - negl 64 | - sugl 65 | - un 66 | - m 67 | - t 68 | - s 69 | - v 70 | - d 71 | articles_case: true 72 | prepended_filters: 73 | - it-elision 74 | ja: 75 | name: Japanese 76 | stemmer: light_english 77 | stop_words: _english_ 78 | postpended_filters: 79 | - cjk_bigram 80 | ko: 81 | name: Korean 82 | stemmer: light_english 83 | stop_words: _english_ 84 | postpended_filters: 85 | - cjk_bigram 86 | nl: 87 | name: Dutch 88 | stemmer: dutch 89 | stop_words: _dutch_ 90 | pt: 91 | name: Portuguese 92 | stemmer: light_portuguese 93 | stop_words: _portuguese_ 94 | pt-br: 95 | name: Portuguese (Brazil) 96 | stemmer: brazilian 97 | stop_words: _brazilian_ 98 | ru: 99 | name: Russian 100 | stemmer: russian 101 | stop_words: _russian_ 102 | th: 103 | name: Thai 104 | stemmer: light_english 105 | stop_words: _thai_ 106 | zh: 107 | name: Chinese 108 | stemmer: light_english 109 | stop_words: _english_ 110 | postpended_filters: 111 | - cjk_bigram 112 | -------------------------------------------------------------------------------- /app/connectors_service/tests/sources/fixtures/box/connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "configuration": { 3 | "is_enterprise": { 4 | "display": "dropdown", 5 | "label": "Box data source", 6 | "options": [ 7 | { 8 | "label": "Box Free", 9 | "value": "box_free" 10 | }, 11 | { 12 | "label": "Box Enterprise", 13 | "value": "box_enterprise" 14 | } 15 | ], 16 | "order": 1, 17 | "type": "str", 18 | "value": "box_free" 19 | }, 20 | "client_id": { 21 | "label": "Client ID", 22 | "order": 2, 23 | "type": "str", 24 | "value": "0000000000000000000000000000" 25 | }, 26 | "client_secret": { 27 | "label": "Client Secret", 28 | "order": 3, 29 | "sensitive": true, 30 | "type": "str", 31 | "value": "0000000000000000000000000" 32 | }, 33 | "refresh_token": { 34 | "depends_on": [ 35 | { 36 | "field": "is_enterprise", 37 | "value": "box_free" 38 | } 39 | ], 40 | "label": "Refresh Token", 41 | "order": 4, 42 | "sensitive": true, 43 | "type": "str", 44 | "value": "xxxxxxxxxxxxxxxxxxxx" 45 | }, 46 | "enterprise_id": { 47 | "depends_on": [ 48 | { 49 | "field": "is_enterprise", 50 | "value": "box_enterprise" 51 | } 52 | ], 53 | "label": "Enterprise ID", 54 | "order": 5, 55 | "type": "int", 56 | "value": 999999 57 | }, 58 | "concurrent_downloads": { 59 | "default_value": 15, 60 | "display": "numeric", 61 | "label": "Maximum concurrent downloads", 62 | "order": 6, 63 | "required": false, 64 | "type": "int", 65 | "ui_restrictions": [ 66 | "advanced" 67 | ], 68 | "validations": [ 69 | { 70 | "type": "less_than", 71 | "constraint": 16 72 | } 73 | ] 74 | } 75 | } 76 | } --------------------------------------------------------------------------------