├── .makecache └── .keepdir ├── pytest.ini ├── tests ├── handlers │ ├── aws │ │ ├── testdata │ │ │ ├── cloudwatch-log-3.json │ │ │ ├── cloudwatch-log-1.json │ │ │ └── cloudwatch-log-2.json │ │ ├── __init__.py │ │ └── test_replay_trigger.py │ └── __init__.py ├── __init__.py ├── share │ ├── __init__.py │ ├── test_environment.py │ ├── test_json.py │ ├── test_factory.py │ └── test_include_exclude.py ├── shippers │ ├── __init__.py │ ├── ssl │ │ ├── localhost.crt │ │ └── localhost.pkcs8.key │ ├── test_composite.py │ ├── test_logstash.py │ └── test_factory.py ├── storage │ ├── __init__.py │ ├── test_factory.py │ └── test_payload.py ├── scripts │ ├── flake8.sh │ ├── mypy.sh │ ├── black.sh │ ├── isort.sh │ ├── run_tests.sh │ ├── docker │ │ ├── black.sh │ │ ├── flake8.sh │ │ ├── mypy.sh │ │ ├── run_tests.sh │ │ └── isort.sh │ └── license_headers_check.sh ├── entrypoint.sh ├── Dockerfile └── testcontainers │ └── es.py ├── docs ├── reference │ ├── images │ │ ├── false-after-multi.png │ │ ├── true-after-multi.png │ │ ├── true-before-multi.png │ │ ├── false-before-multi.png │ │ ├── aws-serverless-lambda-flow.png │ │ ├── multiline-regexp-test-repl-main.png │ │ ├── multiline-regexp-test-repl-run.png │ │ ├── aws-serverless-forwarder-install-assets.png │ │ └── aws-serverless-forwarder-create-function.png │ └── toc.yml ├── docset.yml └── README-AWS.md ├── requirements-lint.txt ├── .flake8 ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE.md ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── feature-request.md │ ├── flaky-test.md │ └── input-output-checklist.md ├── workflows │ ├── docs-cleanup.yml │ ├── github-commands-comment.yml │ ├── docs-build.yml │ ├── version-update.yml │ ├── test-reporter.yml │ ├── test.yml │ ├── create-tag.yml │ └── releases-production.yml ├── dependabot.yml └── PULL_REQUEST_TEMPLATE.md ├── requirements.txt ├── handlers ├── __init__.py └── aws │ ├── __init__.py │ ├── exceptions.py │ ├── replay_trigger.py │ ├── kinesis_trigger.py │ ├── cloudwatch_logs_trigger.py │ └── s3_sqs_trigger.py ├── pyproject.toml ├── share ├── version.py ├── environment.py ├── json.py ├── utils.py ├── logger.py ├── __init__.py ├── events.py ├── factory.py ├── include_exlude.py ├── expand_event_list_from_field.py └── secretsmanager.py ├── requirements-tests.txt ├── dev-corner └── how-to-test-locally │ ├── .env │ ├── Taskfile.yaml │ └── README.md ├── .editorconfig ├── README.md ├── storage ├── __init__.py ├── factory.py ├── storage.py ├── s3.py └── payload.py ├── .gitignore ├── shippers ├── __init__.py ├── shipper.py ├── composite.py ├── factory.py └── logstash.py ├── mypy.ini ├── main_aws.py ├── LICENSE.txt ├── Makefile ├── .internal └── aws │ ├── cloudformation │ └── application.yaml │ └── scripts │ └── dist.sh └── CONTRIBUTING.md /.makecache/.keepdir: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | unit: unit test 4 | integration: integration test 5 | -------------------------------------------------------------------------------- /tests/handlers/aws/testdata/cloudwatch-log-3.json: -------------------------------------------------------------------------------- 1 | { 2 | "another": "continuation", 3 | "from": "the", 4 | "continuing": "queue" 5 | } 6 | -------------------------------------------------------------------------------- /docs/reference/images/false-after-multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/false-after-multi.png -------------------------------------------------------------------------------- /docs/reference/images/true-after-multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/true-after-multi.png -------------------------------------------------------------------------------- /docs/reference/images/true-before-multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/true-before-multi.png -------------------------------------------------------------------------------- /requirements-lint.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | flake8-per-file-ignores 4 | isort 5 | mypy==1.10.1 6 | types-PyYAML 7 | types-mock 8 | pyflakes>=3.0.0,<3.3.0 9 | -------------------------------------------------------------------------------- /docs/reference/images/false-before-multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/false-before-multi.png -------------------------------------------------------------------------------- /tests/handlers/aws/testdata/cloudwatch-log-1.json: -------------------------------------------------------------------------------- 1 | { 2 | "@timestamp": "2021-12-28T11:33:08.160Z", 3 | "log.level": "info", 4 | "message": "trigger" 5 | } 6 | -------------------------------------------------------------------------------- /docs/reference/images/aws-serverless-lambda-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/aws-serverless-lambda-flow.png -------------------------------------------------------------------------------- /docs/reference/images/multiline-regexp-test-repl-main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/multiline-regexp-test-repl-main.png -------------------------------------------------------------------------------- /docs/reference/images/multiline-regexp-test-repl-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/multiline-regexp-test-repl-run.png -------------------------------------------------------------------------------- /docs/reference/images/aws-serverless-forwarder-install-assets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/aws-serverless-forwarder-install-assets.png -------------------------------------------------------------------------------- /docs/reference/images/aws-serverless-forwarder-create-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/aws-serverless-forwarder-create-function.png -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude= 3 | venv/**, 4 | max-line-length=120 5 | per-file-ignores = __init__.py:F401 6 | extend-ignore = 7 | # See https://github.com/PyCQA/pycodestyle/issues/373 8 | E203, 9 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [coverage:run] 2 | omit = 3 | tests/* 4 | 5 | [coverage:paths] 6 | source = 7 | ./ 8 | /app/ 9 | C:\Users\jenkins\workspace\*\src\github.com\elastic\elastic-serverless-forwarder 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: "Report confirmed bugs." 4 | 5 | --- 6 | 7 | Please include configurations and logs if available. 8 | 9 | For confirmed bugs, please report: 10 | - Version: 11 | - Steps to Reproduce: 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | elastic-apm==6.23.0 2 | boto3==1.39.0 3 | ecs_logging==2.2.0 4 | elasticsearch==7.17.12 5 | PyYAML==6.0.2 6 | aws_lambda_typing==2.20.0 7 | orjson==3.10.18 8 | requests==2.32.3 9 | urllib3==1.26.20 10 | typing-extensions==4.13.2 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: "Report confirmed bugs." 4 | 5 | --- 6 | 7 | Please include configurations and logs if available. 8 | 9 | For confirmed bugs, please report: 10 | - Version: 11 | - Steps to Reproduce: 12 | -------------------------------------------------------------------------------- /handlers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | include = '\.pyi?$' 4 | exclude = ''' 5 | /( 6 | \.git 7 | | venv 8 | 9 | # The following are specific to Black, you probably don't want those. 10 | | blib2to3 11 | | tests/data 12 | )/ 13 | ''' 14 | -------------------------------------------------------------------------------- /tests/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /tests/share/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /tests/shippers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /tests/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /tests/handlers/aws/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | -------------------------------------------------------------------------------- /share/version.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | version = "1.21.1" 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement request 3 | about: Elastic Serverless Forwarder can't do all the things, but maybe it can do your things. 4 | 5 | --- 6 | 7 | **Describe the enhancement:** 8 | 9 | **Describe a specific use case for the enhancement or feature:** 10 | 11 | -------------------------------------------------------------------------------- /tests/scripts/flake8.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -e 7 | 8 | flake8 . 9 | -------------------------------------------------------------------------------- /requirements-tests.txt: -------------------------------------------------------------------------------- 1 | mock==5.2.0 2 | pytest==8.4.2 3 | pytest-cov==6.1.1 4 | pytest-benchmark==5.1.0 5 | coverage==7.9.1 6 | simplejson==3.19.2 7 | ujson==5.9.0 8 | pysimdjson==6.0.2 9 | python-rapidjson==1.14 10 | cysimdjson==23.8 11 | responses==0.25.7 12 | types-requests<2.31.0.7 13 | testcontainers==3.7.1 14 | pyOpenSSL==24.0.0 15 | -------------------------------------------------------------------------------- /.github/workflows/docs-cleanup.yml: -------------------------------------------------------------------------------- 1 | name: docs-cleanup 2 | 3 | on: 4 | pull_request_target: 5 | types: 6 | - closed 7 | 8 | jobs: 9 | docs-preview: 10 | uses: elastic/docs-builder/.github/workflows/preview-cleanup.yml@main 11 | permissions: 12 | contents: none 13 | id-token: write 14 | deployments: write 15 | -------------------------------------------------------------------------------- /tests/scripts/mypy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -e 7 | 8 | mypy --install-types --non-interactive . 9 | -------------------------------------------------------------------------------- /tests/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | export HOME=/home/user 7 | exec /usr/local/bin/gosu user "$@" 8 | -------------------------------------------------------------------------------- /tests/handlers/aws/testdata/cloudwatch-log-2.json: -------------------------------------------------------------------------------- 1 | { 2 | "ecs": { 3 | "version": "1.6.0" 4 | }, 5 | "log": { 6 | "logger": "root", 7 | "origin": { 8 | "file": { 9 | "line": 30, 10 | "name": "handler.py" 11 | }, 12 | "function": "lambda_handler" 13 | }, 14 | "original": "trigger" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /docs/reference/toc.yml: -------------------------------------------------------------------------------- 1 | project: 'Elastic Serverless Forwarder for AWS reference' 2 | toc: 3 | - file: index.md 4 | - file: aws-deploy-elastic-serverless-forwarder.md 5 | - file: aws-elastic-serverless-forwarder-configuration.md 6 | # - file: aws-serverless-troubleshooting.md 7 | # - file: deploy-elastic-serverless-forwarder.md 8 | # - file: configuration-options-for-elastic-serverless-forwarder.md -------------------------------------------------------------------------------- /.github/workflows/github-commands-comment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: github-commands-comment 3 | 4 | on: 5 | pull_request_target: 6 | types: 7 | - opened 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | comment: 14 | runs-on: ubuntu-latest 15 | permissions: 16 | pull-requests: write 17 | steps: 18 | - uses: elastic/oblt-actions/elastic/github-commands@v1 19 | -------------------------------------------------------------------------------- /docs/docset.yml: -------------------------------------------------------------------------------- 1 | project: 'Elastic Serverless Forwarder for AWS' 2 | products: 3 | - id: elastic-serverless-forwarder 4 | exclude: 5 | - README-AWS.md 6 | cross_links: 7 | - docs-content 8 | - elasticsearch 9 | - integration-docs 10 | - logstash 11 | - logstash-docs-md 12 | toc: 13 | - toc: reference 14 | subs: 15 | stack: "Elastic Stack" 16 | es: "Elasticsearch" 17 | kib: "Kibana" 18 | ls: "Logstash" 19 | -------------------------------------------------------------------------------- /dev-corner/how-to-test-locally/.env: -------------------------------------------------------------------------------- 1 | # List of requirement files. 2 | # Split them with , and without space, like this: example1.txt,example2.txt 3 | REQUIREMENTS=requirements.txt 4 | 5 | # List of python files/directories to add to the zip file. 6 | # Split them with , and without space, like this: example1.txt,example2.txt 7 | DEPENDENCIES=main_aws.py,handlers,share,storage,shippers 8 | 9 | # Zip filename 10 | FILENAME=local_esf.zip 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/docs-build.yml: -------------------------------------------------------------------------------- 1 | name: docs-build 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request_target: ~ 8 | merge_group: ~ 9 | 10 | jobs: 11 | docs-preview: 12 | uses: elastic/docs-builder/.github/workflows/preview-build.yml@main 13 | with: 14 | path-pattern: docs/** 15 | permissions: 16 | deployments: write 17 | id-token: write 18 | contents: read 19 | pull-requests: write 20 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | [*.asciidoc] 13 | trim_trailing_whitespace = false 14 | 15 | [Makefile] 16 | indent_style = tab 17 | 18 | [Jenkinsfile] 19 | indent_size = 2 20 | 21 | [*.groovy] 22 | indent_size = 2 23 | 24 | [*.feature] 25 | indent_size = 2 26 | 27 | [*.yml] 28 | indent_size = 2 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/elastic/elastic-serverless-forwarder/actions/workflows/test.yml/badge.svg)](https://github.com/elastic/elastic-serverless-forwarder/actions/workflows/test.yml) 2 | 3 | # elastic-serverless-forwarder 4 | Elastic Serverless Forwarder 5 | 6 | ### Changelog [link](https://github.com/elastic/elastic-serverless-forwarder/blob/main/CHANGELOG.md) 7 | ### For AWS documentation, [go here](https://github.com/elastic/elastic-serverless-forwarder/blob/main/docs/README-AWS.md) 8 | -------------------------------------------------------------------------------- /handlers/aws/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from .exceptions import ( 6 | ConfigFileException, 7 | InputConfigException, 8 | OutputConfigException, 9 | ReplayHandlerException, 10 | TriggerTypeException, 11 | ) 12 | from .handler import lambda_handler 13 | -------------------------------------------------------------------------------- /storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from .decorator import by_lines, inflate, json_collector, multi_line 6 | from .factory import StorageFactory 7 | from .payload import PayloadStorage 8 | from .s3 import S3Storage 9 | from .storage import CommonStorage, GetByLinesIterator, ProtocolStorage, StorageDecoratorIterator, StorageReader 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .makecache 2 | !.makecache/.keepdir 3 | *.pyc 4 | *.log 5 | *.egg 6 | *.db 7 | *.pid 8 | *.zip 9 | .coverage* 10 | .DS_Store 11 | .idea 12 | .vscode 13 | .benchmarks 14 | pip-log.txt 15 | /*.egg-info 16 | /build 17 | /cover 18 | /example_project/local_settings.py 19 | /docs/html 20 | /docs/doctrees 21 | /example_project/*.db 22 | tests/.schemacache 23 | coverage 24 | .tox 25 | .eggs 26 | .cache 27 | /testdb.sql 28 | venv 29 | benchmarks/result* 30 | coverage.xml 31 | tests/elastic-serverless-forwarder-junit.xml 32 | *.code-workspace 33 | .pytest_cache/ 34 | .python-version 35 | htmlcov/ 36 | -------------------------------------------------------------------------------- /tests/scripts/black.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | set -e 6 | if [[ $# -eq 0 ]] 7 | then 8 | echo "Usage: $0 diff|fix" 9 | exit 1 10 | fi 11 | 12 | if [[ "$1" = "diff" ]] 13 | then 14 | OPTIONS="--diff --check --line-length=120" 15 | elif [[ "$1" = "fix" ]] 16 | then 17 | OPTIONS="--line-length=120" 18 | fi 19 | 20 | black -t py39 ${OPTIONS} . 21 | -------------------------------------------------------------------------------- /share/environment.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import os 6 | import platform 7 | 8 | 9 | def is_aws() -> bool: 10 | return os.getenv("AWS_EXECUTION_ENV") is not None 11 | 12 | 13 | def get_environment() -> str: 14 | if is_aws(): 15 | return os.environ["AWS_EXECUTION_ENV"] 16 | else: 17 | return f"Python/{platform.python_version()} {platform.system()}/{platform.machine()}" 18 | -------------------------------------------------------------------------------- /share/json.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any, AnyStr 6 | 7 | import orjson 8 | 9 | 10 | def json_dumper(json_object: Any) -> str: 11 | if isinstance(json_object, bytes): 12 | json_object = json_object.decode("utf-8") 13 | 14 | return orjson.dumps(json_object).decode("utf-8") 15 | 16 | 17 | def json_parser(payload: AnyStr) -> Any: 18 | return orjson.loads(payload) 19 | -------------------------------------------------------------------------------- /shippers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from .composite import CompositeShipper 6 | from .es import ElasticsearchShipper, JSONSerializer 7 | from .factory import ShipperFactory 8 | from .logstash import LogstashShipper 9 | from .shipper import ( 10 | EVENT_IS_EMPTY, 11 | EVENT_IS_FILTERED, 12 | EVENT_IS_SENT, 13 | EventIdGeneratorCallable, 14 | ProtocolShipper, 15 | ReplayHandlerCallable, 16 | ) 17 | -------------------------------------------------------------------------------- /tests/scripts/isort.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -e 7 | if [[ $# -eq 0 ]] 8 | then 9 | echo "Usage: $0 diff|fix" 10 | exit 1 11 | fi 12 | 13 | if [[ "$1" = "diff" ]] 14 | then 15 | OPTIONS="--diff --check --py 39 --profile black --line-length=120" 16 | elif [[ "$1" = "fix" ]] 17 | then 18 | OPTIONS="-v --py 39 --profile black --line-length=120" 19 | fi 20 | 21 | isort ${OPTIONS} . 22 | -------------------------------------------------------------------------------- /tests/share/test_environment.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import os 6 | from unittest import mock 7 | 8 | import pytest 9 | 10 | from share.environment import get_environment 11 | 12 | 13 | @pytest.mark.unit 14 | @mock.patch.dict(os.environ, {"AWS_EXECUTION_ENV": "AWS_Lambda_Python3.12"}) 15 | def test_aws_environment() -> None: 16 | environment = get_environment() 17 | assert environment == "AWS_Lambda_Python3.12" 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/flaky-test.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Flaky Test 3 | about: Report a flaky test (one that doesn't pass consistently) 4 | labels: flaky-test 5 | --- 6 | 7 | ## Flaky Test 8 | 9 | * **Test Name:** Name of the failing test. 10 | * **Link:** Link to file/line number in github. 11 | * **Branch:** Git branch the test was seen in. If a PR, the branch the PR was based off. 12 | * **Artifact Link:** If available, attach the generated zip artifact associated with the stack trace for this failure. 13 | * **Notes:** Additional details about the test. e.g. theory as to failure cause 14 | 15 | ### Stack Trace 16 | 17 | ``` 18 | paste stack trace here 19 | ``` 20 | -------------------------------------------------------------------------------- /share/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | import hashlib 5 | import sys 6 | 7 | 8 | def get_hex_prefix(src: str) -> str: 9 | return hashlib.sha3_384(src.encode("utf-8")).hexdigest() 10 | 11 | 12 | def create_user_agent(esf_version: str, environment: str = sys.version) -> str: 13 | """Creates the 'User-Agent' header given ESF version and running environment""" 14 | return f"ElasticServerlessForwarder/{esf_version} ({environment})" 15 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | # Global options: 2 | [mypy] 3 | python_version = 3.12 4 | warn_return_any = True 5 | warn_unused_configs = True 6 | strict = True 7 | disallow_untyped_defs = True 8 | no_implicit_reexport = False 9 | exclude = venv/.* 10 | 11 | [mypy-elasticapm.*] 12 | ignore_missing_imports = True 13 | 14 | [mypy-boto3.*] 15 | ignore_missing_imports = True 16 | 17 | [mypy-botocore.*] 18 | ignore_missing_imports = True 19 | 20 | [mypy-testcontainers.*] 21 | ignore_missing_imports = True 22 | 23 | [mypy-pytest_benchmark.*] 24 | ignore_missing_imports = True 25 | 26 | [mypy-rapidjson.*] 27 | ignore_missing_imports = True 28 | 29 | [mypy-cysimdjson.*] 30 | ignore_missing_imports = True 31 | -------------------------------------------------------------------------------- /main_aws.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any 6 | 7 | from aws_lambda_typing import context as context_ 8 | 9 | from handlers.aws import lambda_handler 10 | 11 | 12 | def handler(lambda_event: dict[str, Any], lambda_context: context_.Context) -> Any: 13 | """ 14 | AWS Lambda handler as main entrypoint 15 | This is just a wrapper to handlers.aws.lambda_handler 16 | """ 17 | return lambda_handler(lambda_event, lambda_context) 18 | -------------------------------------------------------------------------------- /tests/scripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -ex 7 | 8 | # delete any __pycache__ folders to avoid hard-to-debug caching issues 9 | find . -name __pycache__ -type d -exec rm -r '{}' + || true 10 | PYTEST_ARGS=("${PYTEST_ARGS}") 11 | py.test -vv "${PYTEST_ARGS[*]}" "${PYTEST_JUNIT}" tests -s 12 | 13 | if [[ "${PYTEST_ADDOPTS}" == *"--cov"* ]]; then 14 | # Transform coverage to xml so Jenkins can parse and report it 15 | coverage xml 16 | coverage html 17 | fi 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/input-output-checklist.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New Input / Output 3 | about: "Meta issue to track the creation, updating of a new input or output." 4 | 5 | --- 6 | 7 | # Elastic Serverless Forwarder Input / Output release checklist 8 | 9 | This checklist is intended for Devs which create or update a module to make sure input/output are consistent. 10 | 11 | ## Input 12 | 13 | For an input to be supported, the following criterias should be met: 14 | 15 | * [ ] Config for the input is defined 16 | * [ ] Handler for the input is defined 17 | * [ ] Unit tests exist 18 | * [ ] Integration tests exist 19 | * [ ] Documentation 20 | 21 | ## Output 22 | 23 | * [ ] Config for the output is defined 24 | * [ ] Implementation for the output is defined 25 | * [ ] Unit tests exist 26 | * [ ] Integration tests exist 27 | * [ ] Documentation 28 | -------------------------------------------------------------------------------- /share/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import logging 6 | import os 7 | 8 | import ecs_logging 9 | from elasticapm.handlers.logging import LoggingFilter 10 | 11 | log_level = logging.getLevelName(os.getenv("LOG_LEVEL", "INFO").upper()) 12 | 13 | # Get the Logger 14 | logger = logging.getLogger() 15 | logger.setLevel(log_level) 16 | logger.propagate = False 17 | 18 | # Add an ECS formatter to the Handler 19 | handler = logging.StreamHandler() 20 | handler.setFormatter(ecs_logging.StdlibFormatter()) 21 | 22 | # Add an APM log correlation 23 | handler.addFilter(LoggingFilter()) # type: ignore 24 | logger.handlers = [handler] 25 | -------------------------------------------------------------------------------- /share/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from .config import Config, ElasticsearchOutput, Input, LogstashOutput, Output, parse_config 6 | from .events import normalise_event 7 | from .expand_event_list_from_field import ExpandEventListFromField 8 | from .factory import MultilineFactory 9 | from .include_exlude import IncludeExcludeFilter, IncludeExcludeRule 10 | from .json import json_dumper, json_parser 11 | from .logger import logger as shared_logger 12 | from .multiline import CollectBuffer, CountMultiline, FeedIterator, PatternMultiline, ProtocolMultiline, WhileMultiline 13 | from .secretsmanager import aws_sm_expander 14 | from .utils import get_hex_prefix 15 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | ignore: 13 | - dependency-name: "elasticsearch" 14 | 15 | - package-ecosystem: "github-actions" 16 | directories: 17 | - "/" 18 | - "/.github/actions/*" 19 | schedule: 20 | interval: "weekly" 21 | day: "sunday" 22 | time: "22:00" 23 | groups: 24 | github-actions: 25 | patterns: 26 | - "*" 27 | -------------------------------------------------------------------------------- /share/events.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any 6 | 7 | 8 | def normalise_event(event_payload: dict[str, Any]) -> dict[str, Any]: 9 | """ 10 | This method move fields payload in the event at root level and then removes it with meta payload 11 | It has to be called as last step after any operation on the event payload just before sending to the cluster 12 | """ 13 | if "fields" in event_payload: 14 | fields: dict[str, Any] = event_payload["fields"] 15 | for field_key in fields.keys(): 16 | event_payload[field_key] = fields[field_key] 17 | 18 | del event_payload["fields"] 19 | 20 | if "meta" in event_payload: 21 | del event_payload["meta"] 22 | 23 | return event_payload 24 | -------------------------------------------------------------------------------- /handlers/aws/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | 6 | class TriggerTypeException(Exception): 7 | """Raised when there is an error related to the trigger type""" 8 | 9 | pass 10 | 11 | 12 | class ConfigFileException(Exception): 13 | """Raised when there is an error related to the config file""" 14 | 15 | pass 16 | 17 | 18 | class InputConfigException(Exception): 19 | """Raised when there is an error related to the configured input""" 20 | 21 | pass 22 | 23 | 24 | class OutputConfigException(Exception): 25 | """Raised when there is an error related to the configured output""" 26 | 27 | pass 28 | 29 | 30 | class ReplayHandlerException(Exception): 31 | """Raised when there is an error in ingestion in the replay queue""" 32 | 33 | pass 34 | -------------------------------------------------------------------------------- /tests/scripts/docker/black.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | pip_cache="$HOME/.cache" 7 | docker_pip_cache="/tmp/cache/pip" 8 | 9 | cd tests 10 | 11 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t python-linters --file Dockerfile .. 12 | docker run \ 13 | -e LOCAL_USER_ID=$UID \ 14 | -e PIP_CACHE=${docker_pip_cache} \ 15 | -v ${pip_cache}:$(dirname ${docker_pip_cache}) \ 16 | -v "$(dirname $(pwd))":/app \ 17 | -w /app \ 18 | --rm python-linters \ 19 | /bin/bash \ 20 | -c "pip install --user -U pip 21 | pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache} 22 | pip install --user -r requirements.txt --cache-dir ${docker_pip_cache} 23 | PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/black.sh $*" 24 | -------------------------------------------------------------------------------- /tests/scripts/docker/flake8.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -e 7 | 8 | pip_cache="$HOME/.cache" 9 | docker_pip_cache="/tmp/cache/pip" 10 | 11 | cd tests 12 | 13 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t lint_flake8 --file Dockerfile .. 14 | docker run \ 15 | -e LOCAL_USER_ID=$UID \ 16 | -e PIP_CACHE=${docker_pip_cache} \ 17 | -v ${pip_cache}:$(dirname ${docker_pip_cache}) \ 18 | -v "$(dirname $(pwd))":/app \ 19 | -w /app \ 20 | --rm lint_flake8 \ 21 | /bin/bash \ 22 | -c "pip install --user -U pip 23 | pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache} 24 | pip install --user -r requirements.txt --cache-dir ${docker_pip_cache} 25 | PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/flake8.sh $*" 26 | -------------------------------------------------------------------------------- /tests/scripts/docker/mypy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -e 7 | 8 | pip_cache="$HOME/.cache" 9 | docker_pip_cache="/tmp/cache/pip" 10 | 11 | cd tests 12 | 13 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t lint_mypy --file Dockerfile .. 14 | docker run \ 15 | -e LOCAL_USER_ID=$UID \ 16 | -e PIP_CACHE=${docker_pip_cache} \ 17 | -v ${pip_cache}:$(dirname ${docker_pip_cache}) \ 18 | -v "$(dirname $(pwd))":/app \ 19 | -w /app \ 20 | --rm lint_mypy \ 21 | /bin/bash \ 22 | -c "pip install --user -U pip 23 | pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache} 24 | pip install --user -r requirements-tests.txt --cache-dir ${docker_pip_cache} 25 | pip install --user -r requirements.txt --cache-dir ${docker_pip_cache} 26 | PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/mypy.sh $*" 27 | -------------------------------------------------------------------------------- /tests/shippers/ssl/localhost.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDBDCCAeygAwIBAgIVAKHPEVe18psDxZqv8JlUJTAENPmoMA0GCSqGSIb3DQEB 3 | CwUAMBQxEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0yMjExMTgxNTA0MDlaFw0yNTEx 4 | MTcxNTA0MDlaMBQxEjAQBgNVBAMTCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEB 5 | BQADggEPADCCAQoCggEBAMb4JyxYDA7oFFxi2HsaGOs0Ti9B1JFVdOxX27nbGOUK 6 | 3KAlqUDKYvCZGfrDrZxCFIVndOOfu/pKa3sg+I8naYnt2f7CLBbgYfL6Lli0FM1A 7 | L7MQUExGRO/F3WpkBElCO5I2NFqoPHYHwk0mwfInMFJSb645wFYvPqyLsqjK44L1 8 | ItubBE7tQp1+BNb2OmJYqerCX1H+DaK3azH3IJX3HnD15mHfccMm9PBLN47lsr/C 9 | E7gGUb0E4ypFu9G6hsClriwdHNIqJvO2lesf6ZJaYwDq5G5opNijeN032UmhT4NA 10 | FJlUDT+/1K3ZgzN8cRI9vEtWu2UZ0LzviaYDVhy/0CUCAwEAAaNNMEswHQYDVR0O 11 | BBYEFJkk5RLf05LzU3QEFSzAG4F7ODS8MB8GA1UdIwQYMBaAFJkk5RLf05LzU3QE 12 | FSzAG4F7ODS8MAkGA1UdEwQCMAAwDQYJKoZIhvcNAQELBQADggEBAKmfSyWAuklK 13 | u0D02JugnCNC6Ea+Ug0zBVqeNWiMHnREwZ+R0CLXvhaGJviOCIeYL+M2MO8KM41S 14 | 5n2FmQCvX/cNiaVW3qZZKQD6p0hN34luenxyJv21+Zx7CdkLZo7OT3JfCKcEN+zP 15 | nvPR6ynBqAhfy0GB+3B1M7dK3DxP73zNG8TJ3XFNWWYSLvfY8wEO/tCq6wxxttYd 16 | fcjv0LYJBpsYGiE0Ll1ZIZgjLTUfCHuQCDU0XHaZF01jb5ttpxyWVoUjSbPoFnp0 17 | nIJsGTwHZXU3Y+BQdL3DRLOdnfrdJwpXpEeTRCHfBSWNcP0p2rfWz/2Nl6gtT6zo 18 | gJw8IvJqQvw= 19 | -----END CERTIFICATE----- 20 | -------------------------------------------------------------------------------- /tests/scripts/docker/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | set -ex 6 | 7 | pip_cache="$HOME/.cache" 8 | docker_pip_cache="/tmp/cache/pip" 9 | 10 | cd tests 11 | 12 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t run_tests --file Dockerfile .. 13 | docker run \ 14 | --privileged \ 15 | -e LOCAL_USER_ID=$UID \ 16 | -e PIP_CACHE=${docker_pip_cache} \ 17 | -e PYTEST_ARGS="${PYTEST_ARGS}" \ 18 | -e PYTEST_ADDOPTS="${PYTEST_ADDOPTS}" \ 19 | -e PYTEST_JUNIT="--junitxml=/app/tests/elastic-serverless-forwarder-junit.xml" \ 20 | -e AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ 21 | -e AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ 22 | -v "$(dirname $(pwd))":/app \ 23 | -w /app \ 24 | --rm run_tests \ 25 | /bin/bash \ 26 | -c "sudo service docker start 27 | pip install --user -U pip 28 | pip install --user -r requirements-tests.txt --cache-dir ${docker_pip_cache} 29 | pip install --user -r requirements.txt --cache-dir ${docker_pip_cache} 30 | PATH=\${PATH}:\${HOME}/.local/bin/ timeout 60m /bin/bash ./tests/scripts/run_tests.sh" 31 | -------------------------------------------------------------------------------- /tests/scripts/docker/isort.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -e 7 | if [[ $# -eq 0 ]] 8 | then 9 | echo "Usage: $0 diff|fix" 10 | exit 1 11 | fi 12 | 13 | if [[ "$1" = "diff" ]] 14 | then 15 | OPTIONS="--diff --check --profile black --line-length=120" 16 | elif [[ "$1" = "fix" ]] 17 | then 18 | OPTIONS="--profile black --line-length=120" 19 | fi 20 | 21 | 22 | pip_cache="$HOME/.cache" 23 | docker_pip_cache="/tmp/cache/pip" 24 | 25 | cd tests 26 | 27 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t python-linters --file Dockerfile .. 28 | docker run \ 29 | -e LOCAL_USER_ID=$UID \ 30 | -e PIP_CACHE=${docker_pip_cache} \ 31 | -v ${pip_cache}:$(dirname ${docker_pip_cache}) \ 32 | -v "$(dirname $(pwd))":/app \ 33 | -w /app \ 34 | --rm python-linters \ 35 | /bin/bash \ 36 | -c "pip install --user -U pip 37 | pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache} 38 | pip install --user -r requirements.txt --cache-dir ${docker_pip_cache} 39 | PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/isort.sh $*" 40 | -------------------------------------------------------------------------------- /shippers/shipper.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any, Callable, Protocol 6 | 7 | # ReplayHandlerCallable accepts the output type, a dict of arguments for the output and the event to be replayed. 8 | # It does not return anything. 9 | ReplayHandlerCallable = Callable[[str, dict[str, Any], dict[str, Any]], None] 10 | 11 | # EventIdGeneratorCallable accepts a dict of the events as argument. It returns the _id of that event. 12 | EventIdGeneratorCallable = Callable[[dict[str, Any]], str] 13 | 14 | EVENT_IS_EMPTY = "EVENT_IS_EMPTY" 15 | EVENT_IS_FILTERED = "EVENT_IS_FILTERED" 16 | EVENT_IS_SENT = "EVENT_IS_SENT" 17 | 18 | 19 | class ProtocolShipper(Protocol): 20 | """ 21 | Protocol for Shipper components 22 | """ 23 | 24 | def send(self, event: dict[str, Any]) -> str: 25 | pass # pragma: no cover 26 | 27 | def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None: 28 | pass # pragma: no cover 29 | 30 | def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None: 31 | pass # pragma: no cover 32 | 33 | def flush(self) -> None: 34 | pass # pragma: no cover 35 | -------------------------------------------------------------------------------- /dev-corner/how-to-test-locally/Taskfile.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | env: 4 | # Directory to place the dependencies - just internal to this taskfile 5 | DIR: dependencies 6 | 7 | dotenv: ['.env'] 8 | 9 | tasks: 10 | default: 11 | cmds: 12 | - task: install-requirements 13 | - task: build-zip-file 14 | - task: remove-dependencies-dir 15 | - task: add-to-zip 16 | 17 | install-requirements: 18 | desc: "Install requirements from $REQUIREMENTS." 19 | internal: true 20 | requires: 21 | var: REQUIREMENTS 22 | cmds: 23 | - rm -rf $DIR 24 | - for: 25 | var: REQUIREMENTS 26 | split: ',' 27 | cmd: pip3.12 install -r ../../{{ .ITEM }} -t $DIR 28 | 29 | build-zip-file: 30 | desc: "Zip $DIR to build $FILENAME." 31 | internal: true 32 | cmds: 33 | - rm -rf $FILENAME 34 | - cd $DIR && zip -r ../$FILENAME . 35 | 36 | remove-dependencies-dir: 37 | desc: "Delete $DIR." 38 | internal: true 39 | cmds: 40 | - rm -rf $DIR 41 | 42 | add-to-zip: 43 | desc: "Add $DEPENDENCIES to zip file." 44 | internal: true 45 | cmds: 46 | - for: 47 | var: DEPENDENCIES 48 | split: ',' 49 | cmd: zip -r $FILENAME ../../{{ .ITEM }} 50 | -------------------------------------------------------------------------------- /.github/workflows/version-update.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Workflow to check that the version inside share/version.py matches the version in the last entry 3 | # of CHANGELOG 4 | name: version-update 5 | 6 | on: 7 | pull_request: 8 | paths: 9 | - 'share/version.py' 10 | - 'CHANGELOG.md' 11 | 12 | jobs: 13 | 14 | version-increase: 15 | runs-on: ubuntu-latest 16 | timeout-minutes: 5 17 | 18 | steps: 19 | 20 | - uses: actions/checkout@v5 21 | 22 | - name: Compare versions in share/version.py and CHANGELOG last entry 23 | shell: bash 24 | run: | 25 | # Get the version inside share/version.py 26 | version_py=$(grep -oE '[0-9]+\.[0-9]+\.[0-9]+(\-[a-zA-Z]+[0-9]+)?' share/version.py) 27 | echo "::notice::Version inside share/version.py is $version_py." 28 | 29 | # Assumes the first line of the CHANGELOG file follows a format like this: '### v1.17.1 - 2024/09/23' 30 | # Example: 31 | # Input: '### v1.17.1 - 2024/09/23' 32 | # Output: '1.17.1' 33 | version_changelog=$(awk 'NR==1' CHANGELOG.md | awk '{print substr($2,2)}') 34 | echo "::notice::Version in CHANGELOG last entry is $version_changelog." 35 | 36 | if [ "$version_changelog" != "$version_py" ]; then 37 | error="Versions in share/version.py and CHANGELOG do not match." 38 | reminder="Make sure CHANGELOG first line follows format '### v - '." 39 | echo "::error::$error $reminder" 40 | exit 1 41 | fi 42 | -------------------------------------------------------------------------------- /.github/workflows/test-reporter.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Workflow to process the JUnit test results and add a report to the checks. 3 | name: test-reporter 4 | on: 5 | workflow_run: 6 | workflows: 7 | - test 8 | types: 9 | - completed 10 | 11 | permissions: 12 | contents: read 13 | actions: read 14 | checks: write 15 | pull-requests: write 16 | 17 | jobs: 18 | report: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: elastic/oblt-actions/test-report@v1 22 | with: 23 | artifact: /test-results(.*)/ # artifact name pattern 24 | name: JUnit Tests # Name of the check run which will be created 25 | path: "**/elastic-serverless-forwarder-junit.xml" # Path to test results (inside artifact .zip) 26 | reporter: java-junit # Format of test results 27 | output-to: step-summary # Write summary in the PR 28 | 29 | coverage: 30 | if: ${{ github.event.workflow_run.event == 'pull_request' }} 31 | runs-on: ubuntu-latest 32 | steps: 33 | - uses: actions/download-artifact@v6 34 | with: 35 | pattern: test-results* 36 | merge-multiple: true 37 | run-id: ${{ github.event.workflow_run.id }} 38 | - uses: 5monkeys/cobertura-action@ee5787cc56634acddedc51f21c7947985531e6eb 39 | with: 40 | path: "**/coverage.xml" 41 | skip_covered: false 42 | minimum_coverage: 100 43 | fail_below_threshold: true 44 | show_line: true 45 | show_branch: true 46 | show_missing: true 47 | -------------------------------------------------------------------------------- /share/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any, Callable 6 | 7 | from .multiline import CountMultiline, PatternMultiline, ProtocolMultiline, WhileMultiline 8 | 9 | _init_definition_by_multiline_type: dict[str, dict[str, Any]] = { 10 | "count": { 11 | "class": CountMultiline, 12 | }, 13 | "pattern": { 14 | "class": PatternMultiline, 15 | }, 16 | "while_pattern": { 17 | "class": WhileMultiline, 18 | }, 19 | } 20 | 21 | 22 | class MultilineFactory: 23 | """ 24 | Multiline factory. 25 | Provides a static method to instantiate a multiline processor 26 | """ 27 | 28 | @staticmethod 29 | def create(multiline_type: str, **kwargs: Any) -> ProtocolMultiline: 30 | """ 31 | Instantiates a concrete Multiline processor given a multiline type and args 32 | """ 33 | 34 | if multiline_type not in _init_definition_by_multiline_type: 35 | raise ValueError( 36 | "You must provide one of the following multiline types: " 37 | + f"{', '.join(_init_definition_by_multiline_type.keys())}. {multiline_type} given" 38 | ) 39 | 40 | multiline_definition = _init_definition_by_multiline_type[multiline_type] 41 | 42 | multiline_builder: Callable[..., ProtocolMultiline] = multiline_definition["class"] 43 | 44 | return multiline_builder(**kwargs) 45 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 15 | 16 | jobs: 17 | license: 18 | runs-on: ubuntu-latest 19 | timeout-minutes: 5 20 | steps: 21 | - uses: actions/checkout@v5 22 | - run: make license 23 | 24 | lint: 25 | runs-on: ubuntu-latest 26 | timeout-minutes: 5 27 | steps: 28 | - uses: actions/checkout@v5 29 | - run: make docker-lint 30 | 31 | coverage: 32 | runs-on: ubuntu-latest 33 | timeout-minutes: 60 34 | 35 | steps: 36 | 37 | - uses: actions/checkout@v5 38 | 39 | - uses: actions/setup-python@v6 40 | with: 41 | python-version: '3.12' # As defined in tests/scripts/docker/run_tests.sh 42 | cache: 'pip' # caching pip dependencies 43 | 44 | - run: make all-requirements 45 | 46 | - run: make coverage 47 | env: 48 | # See https://github.com/elastic/elastic-serverless-forwarder/pull/280#issuecomment-1461554126 49 | AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID 50 | AWS_SECRET_ACCESS_KEY: AWS_SECRET_ACCESS_KEY 51 | PYTEST_JUNIT: "--junitxml=./elastic-serverless-forwarder-junit.xml" 52 | 53 | - name: Store test results 54 | if: success() || failure() 55 | uses: actions/upload-artifact@v5 56 | with: 57 | name: test-results 58 | path: | 59 | **/elastic-serverless-forwarder-junit.xml 60 | **/coverage.xml 61 | -------------------------------------------------------------------------------- /docs/README-AWS.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | The Elastic Serverless Forwarder is an Amazon Web Services (AWS) Lambda function that ships logs from an AWS environment to Elastic. 4 | 5 | Please refer to the official [Elastic documentation for Elastic Serverless Forwarder](https://www.elastic.co/docs/reference/aws-forwarder) for detailed instructions on how to deploy and configure the forwarder. 6 | 7 | ## Overview 8 | 9 | - Amazon S3 (via SQS event notifications) 10 | - Amazon Kinesis Data Streams 11 | - Amazon CloudWatch Logs subscription filters 12 | - Amazon SQS message payload 13 | 14 | ![Lambda flow](https://github.com/elastic/elastic-serverless-forwarder/raw/lambda-v0.25.0/docs/lambda-flow.png) 15 | 16 | ## Important - v1.6.0 17 | 18 | #### Version 1.6.0 introduces a new event ID format which is backwards incompatible with previously indexed events. Be aware that previously indexed events would be duplicated if they trigger the forwarder again after upgrading to this version. More information is available at [our troubleshooting documentation](https://www.elastic.co/guide/en/observability/master/aws-serverless-troubleshooting.html#aws-serverless-troubleshooting-event-id-format). 19 | 20 | ## Resources and links 21 | 22 | * [Elastic documentation for Elastic Serverless Forwarder](https://www.elastic.co/docs/reference/aws-forwarder) 23 | * [Elastic documentation for integrations](https://docs.elastic.co/en/integrations) 24 | * [Blog: Elastic and AWS Serverless Application Repository (SAR): Speed time to actionable insights with frictionless log ingestion from Amazon S3](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3) 25 | -------------------------------------------------------------------------------- /tests/share/test_json.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from unittest import TestCase 6 | 7 | import pytest 8 | 9 | from share import json_dumper, json_parser 10 | 11 | 12 | @pytest.mark.unit 13 | class TestJsonParser(TestCase): 14 | def test_json_parser(self) -> None: 15 | with self.subTest("loads raises"): 16 | with self.assertRaises(Exception): 17 | json_parser("[") 18 | 19 | with self.subTest("loads array"): 20 | loaded = json_parser("[1, 2, 3]") 21 | assert [1, 2, 3] == loaded 22 | 23 | with self.subTest("loads dict"): 24 | loaded = json_parser('{"key":"value"}') 25 | assert {"key": "value"} == loaded 26 | 27 | with self.subTest("loads scalar"): 28 | loaded = json_parser('"a string"') 29 | assert "a string" == loaded 30 | 31 | 32 | @pytest.mark.unit 33 | class TestJsonDumper(TestCase): 34 | def test_json_dumper(self) -> None: 35 | with self.subTest("dumps raises"): 36 | with self.assertRaises(Exception): 37 | json_dumper(set()) 38 | 39 | with self.subTest("dumps bytes"): 40 | dumped = json_dumper(b"bytes") 41 | assert '"bytes"' == dumped 42 | 43 | with self.subTest("dumps str"): 44 | dumped = json_dumper("string") 45 | assert '"string"' == dumped 46 | 47 | with self.subTest("dumps dict"): 48 | dumped = json_dumper({"key": "value"}) 49 | assert '{"key":"value"}' == dumped 50 | -------------------------------------------------------------------------------- /tests/shippers/ssl/localhost.pkcs8.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDG+CcsWAwO6BRc 3 | Yth7GhjrNE4vQdSRVXTsV9u52xjlCtygJalAymLwmRn6w62cQhSFZ3Tjn7v6Smt7 4 | IPiPJ2mJ7dn+wiwW4GHy+i5YtBTNQC+zEFBMRkTvxd1qZARJQjuSNjRaqDx2B8JN 5 | JsHyJzBSUm+uOcBWLz6si7KoyuOC9SLbmwRO7UKdfgTW9jpiWKnqwl9R/g2it2sx 6 | 9yCV9x5w9eZh33HDJvTwSzeO5bK/whO4BlG9BOMqRbvRuobApa4sHRzSKibztpXr 7 | H+mSWmMA6uRuaKTYo3jdN9lJoU+DQBSZVA0/v9St2YMzfHESPbxLVrtlGdC874mm 8 | A1Ycv9AlAgMBAAECggEAFGyx3L/3EOqq+GbfMRfXOgHHCtvXncmndIF6WURP9Gce 9 | icQD8VY1PNyCibbfkfxIjf82RQ2HJuTPzrZuPYe+sj171MaK7xg+hco/yrRFtGIj 10 | 415H1+SYvAJUPdEhjYGiitpMmofNmRIn1HKnkxuJvtGejER6ZX55EQ5QGzqkizM/ 11 | dfBxK7VirLD48WyOpbni4paow9155BVSp8gyt0gKyfDWwojmwFJhAcJk79HoPRvG 12 | 450lhzPt2Twdzv5r9FbRL93pA0OnWrY5VoEAlKRxIeDr/pHm3anTJpzd3gHbAkXF 13 | Yk5+8wy2h5BqbyVDhrST8unWhdTwDuS+EfFXY5FwcQKBgQDmQm2OZ09nbkb8iv46 14 | VMp37WoFrevwvqFnN9rdgdPPdaJgrxO/86XltqZtlNhQQokTqxHtUT7/7EQh23P/ 15 | Yo8whccIMCd/2BmKK/vaK8b/lt0y5hRDepQkJd9Yy99sZCZIq+GqdqDdyl3KUF5n 16 | v/9uqkd/GPGaeg33PxrqccmH2QKBgQDdNkNbEmLDwpGXx1X4A1ZFwk33opPYEf0Q 17 | F8JTCuZ6fuhqRGYBtn+et4apqWylyO2UZ0pi2PeQR+ua8nsBaW+vfeumzlEElvCD 18 | Qv/x4qB8sCnlOj/QtRRNyv/SGaatySEAaxoEghbbLPcbvvWsxzHQ6VlOxAF6ei3t 19 | F/1k4PwHLQKBgQCdXABrNYc6dE7nAEZFrvS/iOzP/NAlsuGzHKTihHswaIAT+xJQ 20 | OD7EAlMyyGocT5xGaL7TpJadZ+YYDVX0znJ1ArfGjMIxyImLuAtiSlCxE3UP99UZ 21 | WIgtPASrNoj2FmtjdrO+P1wotsfqH4qk9L2n7470+SMEIy2wLtxCJIlJ4QKBgQCU 22 | vh7uRt+YJ+VD/GoG+R9yiqNoZq4otHfH8WHd+s9dAKaAhftdHXyUmWz3+g8vLnrp 23 | tcZjzuYv4tw+dNtW6LGLfA0PPV1my6Nvb4av+6XUEZQqKU+to4TChkQb3tmfs0T4 24 | hguZimuZ9pM12eJRyiLqCW3es5cW5r6o1N+yjEGLBQKBgQDhgIvw+Ug2KORKND3C 25 | 7d7WlujRnGgUBygR4dLl//xfu9wrE9aSO+auCn4AXfukeeNn+a7V1Nh8j2Fsw9dF 26 | HomK3swZO4ab/DKDelNOHd0zyyjdFxJeSWg8CXDv5j1WR+Soh7Xqv+sO4RtQWNXQ 27 | nMfcUb52goDx+rT5ZPmjOeucSA== 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /tests/scripts/license_headers_check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | if [[ $# -eq 0 ]] 7 | then 8 | echo "Usage: $0 check|fix" 9 | exit 1 10 | fi 11 | 12 | FILES=$(find . \( -iname "*.py" -or -iname "*.sh" \) -not -path "./venv/*") 13 | for FILE in $FILES 14 | do 15 | MISSING=$(grep --files-without-match "Licensed under the Elastic License 2.0" "$FILE") 16 | if [[ -n "$MISSING" ]] 17 | then 18 | if [[ "$1" = "fix" ]] 19 | then 20 | echo fix "$FILE" 21 | TMPFILE=$(mktemp /tmp/license.XXXXXXXXXX) 22 | if [[ "$FILE" == *".sh" && $(grep "#!/usr/bin/env bash" "$FILE") ]] 23 | then 24 | cat < "$TMPFILE" 25 | #!/usr/bin/env bash 26 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 27 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 28 | # you may not use this file except in compliance with the Elastic License 2.0. 29 | EOF 30 | tail -n +2 "$FILE" >> "$TMPFILE" 31 | mv "$TMPFILE" "$FILE" 32 | chmod 755 "$FILE" 33 | else 34 | cat < "$TMPFILE" 35 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 36 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 37 | # you may not use this file except in compliance with the Elastic License 2.0. 38 | 39 | EOF 40 | cat "$FILE" >> "$TMPFILE" 41 | mv "$TMPFILE" "$FILE" 42 | fi 43 | else 44 | echo "File with missing copyright header:" 45 | echo "$MISSING" 46 | exit 1 47 | fi 48 | fi 49 | done 50 | -------------------------------------------------------------------------------- /tests/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON_IMAGE 2 | FROM ${PYTHON_IMAGE} 3 | 4 | RUN apt-get -qq update && apt-get -qq -y --no-install-recommends install \ 5 | ca-certificates \ 6 | curl \ 7 | docker.io \ 8 | libsasl2-dev \ 9 | sudo \ 10 | zip && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | ARG UID={${UID}:-1001} 14 | ENV USER_ID=${LOCAL_USER_ID:-${UID}} 15 | 16 | RUN echo '{"storage-driver": "vfs"}' > /etc/docker/daemon.json 17 | 18 | # setup user 19 | RUN useradd --shell /bin/bash -u $USER_ID --gid 0 --non-unique --comment "" --create-home user 20 | RUN usermod -a -G sudo user 21 | RUN usermod -a -G docker user 22 | RUN echo "user ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/user 23 | 24 | 25 | # connection to ha.pool.sks-keyservers.net fails sometimes, so let's retry with couple different servers 26 | RUN for server in $(shuf -e ha.pool.sks-keyservers.net \ 27 | hkp://p80.pool.sks-keyservers.net:80 \ 28 | keyserver.ubuntu.com \ 29 | hkp://keyserver.ubuntu.com:80 \ 30 | pgp.mit.edu) ; do gpg --no-tty --keyserver "$server" --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 && s=0 && break || s=$?; done; (exit $s) 31 | 32 | RUN curl -o /usr/local/bin/gosu -sSL "https://github.com/tianon/gosu/releases/download/1.14/gosu-$(dpkg --print-architecture)" \ 33 | && curl -o /usr/local/bin/gosu.asc -sSL "https://github.com/tianon/gosu/releases/download/1.14/gosu-$(dpkg --print-architecture).asc" \ 34 | && gpg --verify /usr/local/bin/gosu.asc \ 35 | && rm /usr/local/bin/gosu.asc \ 36 | && chmod +x /usr/local/bin/gosu 37 | 38 | COPY tests/entrypoint.sh /usr/local/bin/entrypoint.sh 39 | 40 | COPY requirements.txt / 41 | 42 | # if we're in a pypy image, link pypy/pypy3 to /usr/local/bin/python 43 | RUN if command -v pypy3; then ln -s $(command -v pypy3) /usr/local/bin/python; elif command -v pypy; then ln -s $(command -v pypy) /usr/local/bin/python; fi 44 | 45 | RUN chmod +x /usr/local/bin/entrypoint.sh 46 | 47 | WORKDIR /app 48 | 49 | ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] 50 | -------------------------------------------------------------------------------- /tests/share/test_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from unittest import TestCase 6 | 7 | import pytest 8 | 9 | from share import CountMultiline, MultilineFactory, PatternMultiline, WhileMultiline 10 | 11 | 12 | @pytest.mark.unit 13 | class TestMultilineFactory(TestCase): 14 | def test_create(self) -> None: 15 | with self.subTest("create count multiline success"): 16 | multiline = MultilineFactory.create(multiline_type="count", count_lines=1) 17 | 18 | assert isinstance(multiline, CountMultiline) 19 | 20 | with self.subTest("create count multiline error"): 21 | with self.assertRaises(TypeError): 22 | MultilineFactory.create(multiline_type="count") 23 | 24 | with self.subTest("create pattern multiline success"): 25 | multiline = MultilineFactory.create(multiline_type="pattern", pattern=".+", match="after") 26 | 27 | assert isinstance(multiline, PatternMultiline) 28 | 29 | with self.subTest("create pattern multiline error"): 30 | with self.assertRaises(TypeError): 31 | MultilineFactory.create(multiline_type="pattern") 32 | 33 | with self.subTest("create while_pattern multiline success"): 34 | multiline = MultilineFactory.create(multiline_type="while_pattern", pattern=".+") 35 | 36 | assert isinstance(multiline, WhileMultiline) 37 | 38 | with self.subTest("create while_pattern multiline error"): 39 | with self.assertRaises(TypeError): 40 | MultilineFactory.create(multiline_type="while_pattern") 41 | 42 | with self.subTest("create invalid type"): 43 | with self.assertRaisesRegex( 44 | ValueError, 45 | "^You must provide one of the following multiline types: " 46 | "count, pattern, while_pattern. invalid type given$", 47 | ): 48 | MultilineFactory.create(multiline_type="invalid type") 49 | -------------------------------------------------------------------------------- /storage/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any, Callable, Optional 6 | 7 | from share import ExpandEventListFromField, ProtocolMultiline, json_dumper 8 | 9 | from .payload import PayloadStorage 10 | from .s3 import S3Storage 11 | from .storage import ProtocolStorage 12 | 13 | _init_definition_by_storage_type: dict[str, dict[str, Any]] = { 14 | "s3": {"class": S3Storage, "kwargs": ["bucket_name", "object_key"]}, 15 | "payload": {"class": PayloadStorage, "kwargs": ["payload"]}, 16 | } 17 | 18 | 19 | class StorageFactory: 20 | """ 21 | Storage factory. 22 | Provides static methods to instantiate a Storage 23 | """ 24 | 25 | @staticmethod 26 | def create( 27 | storage_type: str, 28 | json_content_type: Optional[str] = None, 29 | event_list_from_field_expander: Optional[ExpandEventListFromField] = None, 30 | multiline_processor: Optional[ProtocolMultiline] = None, 31 | **kwargs: Any, 32 | ) -> ProtocolStorage: 33 | """ 34 | Instantiates a concrete Storage given its type and the storage init kwargs 35 | """ 36 | 37 | if storage_type not in _init_definition_by_storage_type: 38 | raise ValueError( 39 | "You must provide one of the following storage types: " 40 | + f"{', '.join(_init_definition_by_storage_type.keys())}" 41 | ) 42 | 43 | storage_definition = _init_definition_by_storage_type[storage_type] 44 | storage_kwargs = storage_definition["kwargs"] 45 | 46 | init_kwargs: list[str] = [key for key in kwargs.keys() if key in storage_kwargs and kwargs[key]] 47 | if len(init_kwargs) != len(storage_kwargs): 48 | raise ValueError( 49 | f"You must provide the following not empty init kwargs for {storage_type}: " 50 | + f"{', '.join(storage_kwargs)}. (provided: {json_dumper(kwargs)})" 51 | ) 52 | 53 | kwargs["json_content_type"] = json_content_type 54 | kwargs["multiline_processor"] = multiline_processor 55 | kwargs["event_list_from_field_expander"] = event_list_from_field_expander 56 | 57 | storage_builder: Callable[..., ProtocolStorage] = storage_definition["class"] 58 | return storage_builder(**kwargs) 59 | -------------------------------------------------------------------------------- /.github/workflows/create-tag.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Workflow to create a new git tag if version.py variable version gets updated 3 | name: create-tag 4 | 5 | permissions: 6 | contents: write # write permission is required to create a GitHub release 7 | 8 | on: 9 | push: 10 | branches: 11 | - 'main' 12 | paths: 13 | - 'share/version.py' 14 | 15 | jobs: 16 | 17 | create-tag: 18 | runs-on: ubuntu-latest 19 | timeout-minutes: 5 20 | 21 | steps: 22 | 23 | - uses: actions/checkout@v5 24 | 25 | - name: Get version number 26 | shell: bash 27 | run: | 28 | VERSION=$(grep -oE '[0-9]+\.[0-9]+\.[0-9]+(\-[a-zA-Z]+[0-9]+)?' share/version.py) 29 | echo "VERSION=${VERSION}" >> $GITHUB_ENV 30 | echo "::notice::ESF version is $VERSION." 31 | 32 | - name: Check if version increased 33 | id: version 34 | shell: bash 35 | run: | 36 | git fetch --tags 37 | 38 | # We will list all tags sorted by the version after the prefix lambda-v. 39 | # We retrieve only the first line - that is, the most recent version. 40 | # After that, we remove the prefix to only get the version number. 41 | old_version=$(git tag --list --sort=-version:refname "lambda-v*" | awk 'NR==1{print $1}' | awk -F"lambda-v" '{ print $NF }') 42 | 43 | # We now need to compare the current version inside version.py. 44 | IFS='.' read -a new_numbers <<< ${{ env.VERSION }} 45 | IFS='.' read -a old_numbers <<< $old_version 46 | 47 | CREATE_TAG=false # only create tag if version increased 48 | for i in 0 1 2 49 | do 50 | if [[ ${new_numbers[i]} > ${old_numbers[i]} ]] 51 | then 52 | CREATE_TAG=true 53 | break 54 | elif [[ ${new_numbers[i]} < ${old_numbers[i]} ]] 55 | then 56 | break 57 | fi 58 | done 59 | 60 | echo "CREATE_TAG=${CREATE_TAG}" >> $GITHUB_ENV 61 | echo "::notice::Latest version is $old_version." 62 | echo "::notice::Current version is ${{ env.VERSION }}." 63 | echo "::notice::The result for creating tag is $CREATE_TAG." 64 | 65 | - name: Create tag 66 | if: env.CREATE_TAG == 'true' # run only in case CREATE_TAG is true 67 | uses: actions/github-script@v8 68 | with: 69 | script: | 70 | github.rest.git.createRef({ 71 | owner: context.repo.owner, 72 | repo: context.repo.repo, 73 | ref: 'refs/tags/lambda-v' + "${{ env.VERSION }}", 74 | sha: context.sha 75 | }) 76 | -------------------------------------------------------------------------------- /dev-corner/how-to-test-locally/README.md: -------------------------------------------------------------------------------- 1 | This document contains details about how to build ESF Lambda locally. 2 | Once built, the Lambda can be deployed to validate functionality. 3 | 4 | ## Building lambda 5 | 6 | To build the Lambda, you may use one of the options below, 7 | 8 | ### Using Makefile 9 | 10 | To build, 11 | 12 | ```shell 13 | make package 14 | ``` 15 | 16 | This will generate a Lambda zip named `local_esf.zip`. 17 | 18 | To clean up any leftover resources, 19 | 20 | ```shell 21 | make clean 22 | ``` 23 | 24 | ### Using Task file 25 | 26 | #### Requirements 27 | 28 | - [Terraform](https://www.terraform.io/) 29 | - (Optional) [Taskfile](https://taskfile.dev/installation/) 30 | 31 | 32 | #### Building 33 | 34 | **Important note**: ESF dependencies have been tested on architecture `x86_64`. Make sure to use it as well. 35 | 36 | You can build your own, or you can choose to run: 37 | ```bash 38 | task 39 | ``` 40 | To build it automatically. 41 | 42 | You can update the task variables in the `.env` file: 43 | - The list of python dependencies, `DEPENDENCIES`. 44 | - The list of python requirement files, `REQUIREMENTS`. 45 | - The name of the zip file, `FILENAME`. 46 | 47 | 48 | ## Deploying Lambda 49 | 50 | Once Lambda zip is ready, you should use the code in [ESF terraform repository](https://github.com/elastic/terraform-elastic-esf). 51 | 52 | > **NOTE**: ESF lambda function is using architecture `x86_64`. 53 | 54 | Place your `local_esf.zip` (or `` if you changed the value) in the same directory as ESF terraform. 55 | 56 | Go to `esf.tf` file and edit: 57 | 58 | ```terraform 59 | locals { 60 | ... 61 | dependencies-file = "local_esf.zip" # value of FILENAME in .env 62 | ... 63 | } 64 | ``` 65 | 66 | Remove/comment these lines from `esf.tf` file: 67 | 68 | ```terraform 69 | #resource "terraform_data" "curl-dependencies-zip" { 70 | # provisioner "local-exec" { 71 | # command = "curl -L -O ${local.dependencies-bucket-url}/${local.dependencies-file}" 72 | # } 73 | #} 74 | ``` 75 | 76 | And fix the now missing dependency in `dependencies-file`: 77 | 78 | ```terraform 79 | resource "aws_s3_object" "dependencies-file" { 80 | bucket = local.config-bucket-name 81 | key = local.dependencies-file 82 | source = local.dependencies-file 83 | 84 | depends_on = [aws_s3_bucket.esf-config-bucket] #, terraform_data.curl-dependencies-zip] 85 | } 86 | ``` 87 | 88 | Now follow the README file from [ESF terraform repository](https://github.com/elastic/terraform-elastic-esf) on how to configure the remaining necessary variables. You will have to configure `release-version` variable, but it will not be relevant to this. You can set any value you want for it. 89 | 90 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | ## What does this PR do? 12 | 13 | 16 | 17 | ## Why is it important? 18 | 19 | 22 | 23 | ## Checklist 24 | 25 | 30 | 31 | - [ ] My code follows the style guidelines of this project 32 | - [ ] I have commented my code, particularly in hard-to-understand areas 33 | - [ ] I have made corresponding changes to the documentation 34 | - [ ] I have made corresponding change to the default configuration files 35 | - [ ] I have added tests that prove my fix is effective or that my feature works 36 | - [ ] I have added an entry in `CHANGELOG.md` and updated `share/version.py`, if my change requires a new release. 37 | 38 | ## Author's Checklist 39 | 40 | 43 | - [ ] 44 | 45 | ## How to test this PR locally 46 | 47 | 50 | 51 | ## Related issues 52 | 53 | 61 | - 62 | 63 | ## Use cases 64 | 65 | 70 | 71 | ## Screenshots 72 | 73 | 76 | 77 | ## Logs 78 | 79 | 82 | -------------------------------------------------------------------------------- /tests/share/test_include_exclude.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from __future__ import annotations 6 | 7 | from unittest import TestCase 8 | 9 | import pytest 10 | 11 | from share import IncludeExcludeFilter, IncludeExcludeRule 12 | 13 | _message = "a message" 14 | 15 | 16 | @pytest.mark.unit 17 | class TestIncludeExclude(TestCase): 18 | def test_include_exclude(self) -> None: 19 | with self.subTest("no rules"): 20 | include_exclude_filter = IncludeExcludeFilter() 21 | assert include_exclude_filter.filter(_message) is True 22 | 23 | with self.subTest("exclude rule match"): 24 | include_exclude_filter = IncludeExcludeFilter(exclude_patterns=[IncludeExcludeRule(pattern="message")]) 25 | assert include_exclude_filter.filter(_message) is False 26 | 27 | with self.subTest("exclude rule not match"): 28 | include_exclude_filter = IncludeExcludeFilter(exclude_patterns=[IncludeExcludeRule(pattern="not matching")]) 29 | assert include_exclude_filter.filter(_message) is True 30 | 31 | with self.subTest("include rule match"): 32 | include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="message")]) 33 | assert include_exclude_filter.filter(_message) is True 34 | 35 | with self.subTest("include rule not match"): 36 | include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="not matching")]) 37 | assert include_exclude_filter.filter(_message) is False 38 | 39 | with self.subTest("both rules exclude priority"): 40 | include_exclude_filter = IncludeExcludeFilter( 41 | include_patterns=[IncludeExcludeRule(pattern="message")], 42 | exclude_patterns=[IncludeExcludeRule(pattern="message")], 43 | ) 44 | assert include_exclude_filter.filter(_message) is False 45 | 46 | with self.subTest("both rules include match"): 47 | include_exclude_filter = IncludeExcludeFilter( 48 | include_patterns=[IncludeExcludeRule(pattern="message")], 49 | exclude_patterns=[IncludeExcludeRule(pattern="not matching")], 50 | ) 51 | assert include_exclude_filter.filter(_message) is True 52 | 53 | with self.subTest("both rules no match"): 54 | include_exclude_filter = IncludeExcludeFilter( 55 | include_patterns=[IncludeExcludeRule(pattern="not matching")], 56 | exclude_patterns=[IncludeExcludeRule(pattern="not matching")], 57 | ) 58 | assert include_exclude_filter.filter(_message) is False 59 | -------------------------------------------------------------------------------- /shippers/composite.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from copy import deepcopy 6 | from typing import Any, Optional 7 | 8 | from share import IncludeExcludeFilter, shared_logger 9 | 10 | from .shipper import ( 11 | EVENT_IS_EMPTY, 12 | EVENT_IS_FILTERED, 13 | EVENT_IS_SENT, 14 | EventIdGeneratorCallable, 15 | ProtocolShipper, 16 | ReplayHandlerCallable, 17 | ) 18 | 19 | 20 | class CompositeShipper: 21 | """ 22 | Composite Shipper. 23 | This class implements composite pattern for shippers 24 | """ 25 | 26 | def __init__(self, **kwargs: Any): 27 | self._shippers: list[ProtocolShipper] = [] 28 | self._include_exclude_filter: Optional[IncludeExcludeFilter] = None 29 | 30 | def add_include_exclude_filter(self, include_exclude_filter: Optional[IncludeExcludeFilter]) -> None: 31 | """ 32 | IncludeExcludeFilter setter. 33 | Add an includeExcludeFilter to the composite 34 | """ 35 | self._include_exclude_filter = include_exclude_filter 36 | 37 | def add_shipper(self, shipper: ProtocolShipper) -> None: 38 | """ 39 | Shipper setter. 40 | Add a shipper to the composite 41 | """ 42 | self._shippers.append(shipper) 43 | 44 | def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None: 45 | for shipper in self._shippers: 46 | shipper.set_event_id_generator(event_id_generator=event_id_generator) 47 | 48 | def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None: 49 | for shipper in self._shippers: 50 | shipper.set_replay_handler(replay_handler=replay_handler) 51 | 52 | def send(self, event: dict[str, Any]) -> str: 53 | message: str = "" 54 | if "fields" in event and "message" in event["fields"]: 55 | message = event["fields"]["message"] 56 | elif "message" in event: 57 | message = event["message"] 58 | 59 | if len(message.strip()) == 0: 60 | shared_logger.debug("event is empty: message is zero length") 61 | return EVENT_IS_EMPTY 62 | 63 | if self._include_exclude_filter is not None and not self._include_exclude_filter.filter(message): 64 | shared_logger.debug("event is filtered according to filter rules") 65 | return EVENT_IS_FILTERED 66 | 67 | for shipper in self._shippers: 68 | # dict are mutated if not deep copied, every shipper can mutate the 69 | # events it receives without affecting the events of other shippers 70 | sent_event = deepcopy(event) 71 | shipper.send(sent_event) 72 | 73 | return EVENT_IS_SENT 74 | 75 | def flush(self) -> None: 76 | for shipper in self._shippers: 77 | shipper.flush() 78 | -------------------------------------------------------------------------------- /tests/storage/test_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import re 6 | from unittest import TestCase 7 | 8 | import pytest 9 | 10 | from storage import PayloadStorage, S3Storage, StorageFactory 11 | 12 | 13 | @pytest.mark.unit 14 | class TestStorageFactory(TestCase): 15 | def test_create(self) -> None: 16 | with self.subTest("create s3 storage success"): 17 | storage = StorageFactory.create(storage_type="s3", bucket_name="bucket_name", object_key="object_key") 18 | 19 | assert isinstance(storage, S3Storage) 20 | 21 | with self.subTest("create s3 storage error"): 22 | with self.assertRaisesRegex( 23 | ValueError, 24 | re.escape( 25 | "You must provide the following not empty init kwargs for" 26 | + " s3: bucket_name, object_key. (provided: {})" 27 | ), 28 | ): 29 | StorageFactory.create(storage_type="s3") 30 | 31 | with self.subTest("create s3 storage empty kwargs"): 32 | with self.assertRaisesRegex( 33 | ValueError, 34 | re.escape( 35 | "You must provide the following not empty init kwargs for s3: bucket_name, object_key." 36 | + ' (provided: {"bucket_name":"","object_key":""})' 37 | ), 38 | ): 39 | StorageFactory.create(storage_type="s3", bucket_name="", object_key="") 40 | 41 | with self.subTest("create payload storage success"): 42 | storage = StorageFactory.create(storage_type="payload", payload="payload") 43 | 44 | assert isinstance(storage, PayloadStorage) 45 | 46 | with self.subTest("create payload storage error"): 47 | with self.assertRaisesRegex( 48 | ValueError, 49 | re.escape( 50 | "You must provide the following not empty init kwargs for" + " payload: payload. (provided: {})" 51 | ), 52 | ): 53 | StorageFactory.create(storage_type="payload") 54 | 55 | with self.subTest("create payload storage empty kwargs"): 56 | with self.assertRaisesRegex( 57 | ValueError, 58 | re.escape( 59 | "You must provide the following not empty init kwargs for payload: payload." 60 | + ' (provided: {"payload":""})' 61 | ), 62 | ): 63 | StorageFactory.create(storage_type="payload", payload="") 64 | 65 | with self.subTest("create invalid type"): 66 | with self.assertRaisesRegex( 67 | ValueError, "^You must provide one of the following storage types: s3, payload$" 68 | ): 69 | StorageFactory.create(storage_type="invalid type") 70 | -------------------------------------------------------------------------------- /storage/storage.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from abc import ABCMeta 6 | from io import BytesIO 7 | from typing import Any, Callable, Iterator, Optional, Protocol, TypeVar, Union 8 | 9 | from typing_extensions import TypeAlias 10 | 11 | from share import ExpandEventListFromField, ProtocolMultiline 12 | 13 | # CHUNK_SIZE is how much we read from the gzip stream at every iteration in the inflate decorator 14 | # BEWARE, this CHUNK_SIZE has a huge impact on performance, contrary to what we stated here: 15 | # https://github.com/elastic/elastic-serverless-forwarder/pull/11#discussion_r732587976 16 | # Reinstating to 1M from 1K resulted on 6.2M gzip of 35.1 of inflated content 17 | # to be ingested in 45 secs instead of having the lambda timing out 18 | CHUNK_SIZE: int = 1024**2 19 | 20 | 21 | def is_gzip_content(content: bytes) -> bool: 22 | return content.startswith(b"\037\213") # gzip compression method 23 | 24 | 25 | class StorageReader: 26 | """ 27 | StorageReader is an interface for contents returned by storage. 28 | It wraps the underlying type and forward to it 29 | """ 30 | 31 | def __init__(self, raw: Any): 32 | self._raw = raw 33 | 34 | def __getattr__(self, item: str) -> Any: 35 | return getattr(self._raw, item) 36 | 37 | 38 | # GetByLinesIterator yields a tuple of content, starting offset, ending offset 39 | # and optional offset of a list of expanded events 40 | GetByLinesIterator: TypeAlias = Iterator[tuple[bytes, int, int, Optional[int]]] 41 | 42 | 43 | class ProtocolStorage(Protocol): 44 | """ 45 | Protocol for Storage components 46 | """ 47 | 48 | json_content_type: Optional[str] 49 | multiline_processor: Optional[ProtocolMultiline] 50 | event_list_from_field_expander: Optional[ExpandEventListFromField] 51 | 52 | def get_by_lines(self, range_start: int) -> GetByLinesIterator: 53 | pass # pragma: no cover 54 | 55 | def get_as_string(self) -> str: 56 | pass # pragma: no cover 57 | 58 | 59 | class CommonStorage(metaclass=ABCMeta): 60 | """ 61 | Common class for Storage components 62 | """ 63 | 64 | json_content_type: Optional[str] = None 65 | multiline_processor: Optional[ProtocolMultiline] = None 66 | event_list_from_field_expander: Optional[ExpandEventListFromField] = None 67 | 68 | 69 | ProtocolStorageType = TypeVar("ProtocolStorageType", bound=ProtocolStorage) 70 | 71 | # StorageDecoratorIterator yields a tuple of content (expressed as `StorageReader` or bytes), starting offset, 72 | # ending offset, newline and optional offset of a list of expanded events 73 | StorageDecoratorIterator: TypeAlias = Iterator[tuple[Union[StorageReader, bytes], int, int, bytes, Optional[int]]] 74 | 75 | # StorageDecoratorCallable accepts a `ProtocolStorageType`, the range start offset, the content as BytesIO and a boolean 76 | # flag indicating if the content is gzipped as arguments. It returns a `StorageDecoratorIterator` 77 | StorageDecoratorCallable = Callable[[ProtocolStorageType, int, BytesIO, bool], StorageDecoratorIterator] 78 | -------------------------------------------------------------------------------- /shippers/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any, Callable 6 | 7 | from share.config import ElasticsearchOutput, LogstashOutput, Output 8 | 9 | from .es import ElasticsearchShipper 10 | from .logstash import LogstashShipper 11 | from .shipper import ProtocolShipper 12 | 13 | _init_definition_by_output: dict[str, dict[str, Any]] = { 14 | "elasticsearch": { 15 | "class": ElasticsearchShipper, 16 | }, 17 | "logstash": { 18 | "class": LogstashShipper, 19 | }, 20 | } 21 | 22 | 23 | class ShipperFactory: 24 | """ 25 | Shipper factory. 26 | Provides static methods to instantiate a shipper 27 | """ 28 | 29 | @staticmethod 30 | def create_from_output(output_type: str, output: Output) -> ProtocolShipper: 31 | """ 32 | Instantiates a concrete Shipper given an output type and an Output instance 33 | """ 34 | 35 | if output_type == "elasticsearch": 36 | if not isinstance(output, ElasticsearchOutput): 37 | raise ValueError(f"output expected to be ElasticsearchOutput type, given {type(output)}") 38 | 39 | return ShipperFactory.create( 40 | output_type="elasticsearch", 41 | elasticsearch_url=output.elasticsearch_url, 42 | username=output.username, 43 | password=output.password, 44 | cloud_id=output.cloud_id, 45 | api_key=output.api_key, 46 | es_datastream_name=output.es_datastream_name, 47 | tags=output.tags, 48 | batch_max_actions=output.batch_max_actions, 49 | batch_max_bytes=output.batch_max_bytes, 50 | ssl_assert_fingerprint=output.ssl_assert_fingerprint, 51 | es_dead_letter_index=output.es_dead_letter_index, 52 | ) 53 | 54 | if output_type == "logstash": 55 | if not isinstance(output, LogstashOutput): 56 | raise ValueError(f"output expected to be LogstashOutput type, given {type(output)}") 57 | 58 | return ShipperFactory.create( 59 | output_type="logstash", 60 | logstash_url=output.logstash_url, 61 | username=output.username, 62 | password=output.password, 63 | max_batch_size=output.max_batch_size, 64 | compression_level=output.compression_level, 65 | ssl_assert_fingerprint=output.ssl_assert_fingerprint, 66 | tags=output.tags, 67 | ) 68 | 69 | raise ValueError( 70 | f"You must provide one of the following outputs: " f"{', '.join(_init_definition_by_output.keys())}" 71 | ) 72 | 73 | @staticmethod 74 | def create(output_type: str, **kwargs: Any) -> ProtocolShipper: 75 | """ 76 | Instantiates a concrete Shipper given an output type and the shipper init kwargs 77 | """ 78 | 79 | if output_type not in _init_definition_by_output: 80 | raise ValueError( 81 | f"You must provide one of the following outputs: " f"{', '.join(_init_definition_by_output.keys())}" 82 | ) 83 | 84 | output_definition = _init_definition_by_output[output_type] 85 | 86 | output_builder: Callable[..., ProtocolShipper] = output_definition["class"] 87 | 88 | return output_builder(**kwargs) 89 | -------------------------------------------------------------------------------- /handlers/aws/replay_trigger.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any, Optional 6 | 7 | from share import Config, ElasticsearchOutput, Input, LogstashOutput, Output, shared_logger 8 | from shippers import CompositeShipper, ProtocolShipper, ShipperFactory 9 | 10 | from .exceptions import InputConfigException, OutputConfigException, ReplayHandlerException 11 | from .utils import delete_sqs_record 12 | 13 | 14 | class ReplayedEventReplayHandler: 15 | def __init__(self, replay_queue_arn: str): 16 | self._replay_queue_arn = replay_queue_arn 17 | self._failed_event_ids: list[str] = [] 18 | self._events_with_receipt_handle: dict[str, str] = {} 19 | 20 | def add_event_with_receipt_handle(self, event_uniq_id: str, receipt_handle: str) -> None: 21 | self._events_with_receipt_handle[event_uniq_id] = receipt_handle 22 | 23 | def replay_handler( 24 | self, output_destination: str, output_args: dict[str, Any], event_payload: dict[str, Any] 25 | ) -> None: 26 | event_uniq_id: str = event_payload["_id"] + output_destination 27 | self._failed_event_ids.append(event_uniq_id) 28 | 29 | def flush(self) -> None: 30 | for failed_event_uniq_id in self._failed_event_ids: 31 | del self._events_with_receipt_handle[failed_event_uniq_id] 32 | 33 | for receipt_handle in self._events_with_receipt_handle.values(): 34 | delete_sqs_record(self._replay_queue_arn, receipt_handle) 35 | 36 | if len(self._failed_event_ids) > 0: 37 | raise ReplayHandlerException() 38 | 39 | 40 | def get_shipper_for_replay_event( 41 | config: Config, 42 | output_destination: str, 43 | output_args: dict[str, Any], 44 | event_input_id: str, 45 | replay_handler: ReplayedEventReplayHandler, 46 | ) -> Optional[CompositeShipper]: 47 | event_input: Optional[Input] = config.get_input_by_id(event_input_id) 48 | if event_input is None: 49 | raise InputConfigException(f"Cannot load input for input id {event_input_id}") 50 | 51 | output: Optional[Output] = event_input.get_output_by_destination(output_destination) 52 | if output is None: 53 | raise OutputConfigException(f"Cannot load output with destination {output_destination}") 54 | 55 | # Let's wrap the specific output shipper in the composite one, since the composite deepcopy the mutating events 56 | shipper: CompositeShipper = CompositeShipper() 57 | 58 | if output.type == "elasticsearch": 59 | assert isinstance(output, ElasticsearchOutput) 60 | output.es_datastream_name = output_args["es_datastream_name"] 61 | shared_logger.debug("setting ElasticSearch shipper") 62 | elasticsearch: ProtocolShipper = ShipperFactory.create_from_output(output_type=output.type, output=output) 63 | 64 | shipper.add_shipper(elasticsearch) 65 | shipper.set_replay_handler(replay_handler=replay_handler.replay_handler) 66 | 67 | return shipper 68 | 69 | if output.type == "logstash": 70 | assert isinstance(output, LogstashOutput) 71 | shared_logger.debug("setting Logstash shipper") 72 | logstash: ProtocolShipper = ShipperFactory.create_from_output(output_type=output.type, output=output) 73 | 74 | shipper.add_shipper(logstash) 75 | shipper.set_replay_handler(replay_handler=replay_handler.replay_handler) 76 | 77 | return shipper 78 | 79 | return None 80 | -------------------------------------------------------------------------------- /share/include_exlude.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from __future__ import annotations 6 | 7 | import re 8 | from typing import Optional 9 | 10 | 11 | class IncludeExcludeRule: 12 | """ 13 | IncludeExcludeRule represents a pattern rule 14 | """ 15 | 16 | def __init__(self, pattern: str): 17 | self.pattern = re.compile(pattern) 18 | 19 | def __eq__(self, other: object) -> bool: 20 | assert isinstance(other, IncludeExcludeRule) 21 | 22 | return self.pattern == other.pattern 23 | 24 | 25 | class IncludeExcludeFilter: 26 | """ 27 | Base class for IncludeExclude filter 28 | """ 29 | 30 | def __init__( 31 | self, 32 | include_patterns: Optional[list[IncludeExcludeRule]] = None, 33 | exclude_patterns: Optional[list[IncludeExcludeRule]] = None, 34 | ): 35 | self._include_rules: Optional[list[IncludeExcludeRule]] = None 36 | self._exclude_rules: Optional[list[IncludeExcludeRule]] = None 37 | 38 | if include_patterns is not None and len(include_patterns) > 0: 39 | self.include_rules = include_patterns 40 | 41 | if exclude_patterns is not None and len(exclude_patterns) > 0: 42 | self.exclude_rules = exclude_patterns 43 | 44 | self._always_yield = self._include_rules is None and self._exclude_rules is None 45 | 46 | self._include_only = self._include_rules is not None and self._exclude_rules is None 47 | self._exclude_only = self._exclude_rules is not None and self._include_rules is None 48 | 49 | def _is_included(self, message: str) -> bool: 50 | assert self._include_rules is not None 51 | 52 | for include_rule in self._include_rules: 53 | if include_rule.pattern.search(message) is not None: 54 | return True 55 | 56 | return False 57 | 58 | def _is_excluded(self, message: str) -> bool: 59 | assert self._exclude_rules is not None 60 | 61 | for exclude_rule in self._exclude_rules: 62 | if exclude_rule.pattern.search(message) is not None: 63 | return True 64 | 65 | return False 66 | 67 | def filter(self, message: str) -> bool: 68 | """ 69 | filter returns True if the event is included or not excluded 70 | """ 71 | 72 | if self._always_yield: 73 | return True 74 | 75 | if self._include_only: 76 | return self._is_included(message) 77 | 78 | if self._exclude_only: 79 | return not self._is_excluded(message) 80 | 81 | if self._is_excluded(message): 82 | return False 83 | 84 | return self._is_included(message) 85 | 86 | def __eq__(self, other: object) -> bool: 87 | assert isinstance(other, IncludeExcludeFilter) 88 | 89 | return self.include_rules == other.include_rules and self.exclude_rules == other.exclude_rules 90 | 91 | @property 92 | def include_rules(self) -> Optional[list[IncludeExcludeRule]]: 93 | return self._include_rules 94 | 95 | @include_rules.setter 96 | def include_rules(self, value: list[IncludeExcludeRule]) -> None: 97 | self._include_rules = value 98 | 99 | @property 100 | def exclude_rules(self) -> Optional[list[IncludeExcludeRule]]: 101 | return self._exclude_rules 102 | 103 | @exclude_rules.setter 104 | def exclude_rules(self, value: list[IncludeExcludeRule]) -> None: 105 | self._exclude_rules = value 106 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Elastic License 2.0 2 | 3 | URL: https://www.elastic.co/licensing/elastic-license 4 | 5 | ## Acceptance 6 | 7 | By using the software, you agree to all of the terms and conditions below. 8 | 9 | ## Copyright License 10 | 11 | The licensor grants you a non-exclusive, royalty-free, worldwide, 12 | non-sublicensable, non-transferable license to use, copy, distribute, make 13 | available, and prepare derivative works of the software, in each case subject to 14 | the limitations and conditions below. 15 | 16 | ## Limitations 17 | 18 | You may not provide the software to third parties as a hosted or managed 19 | service, where the service provides users with access to any substantial set of 20 | the features or functionality of the software. 21 | 22 | You may not move, change, disable, or circumvent the license key functionality 23 | in the software, and you may not remove or obscure any functionality in the 24 | software that is protected by the license key. 25 | 26 | You may not alter, remove, or obscure any licensing, copyright, or other notices 27 | of the licensor in the software. Any use of the licensor’s trademarks is subject 28 | to applicable law. 29 | 30 | ## Patents 31 | 32 | The licensor grants you a license, under any patent claims the licensor can 33 | license, or becomes able to license, to make, have made, use, sell, offer for 34 | sale, import and have imported the software, in each case subject to the 35 | limitations and conditions in this license. This license does not cover any 36 | patent claims that you cause to be infringed by modifications or additions to 37 | the software. If you or your company make any written claim that the software 38 | infringes or contributes to infringement of any patent, your patent license for 39 | the software granted under these terms ends immediately. If your company makes 40 | such a claim, your patent license ends immediately for work on behalf of your 41 | company. 42 | 43 | ## Notices 44 | 45 | You must ensure that anyone who gets a copy of any part of the software from you 46 | also gets a copy of these terms. 47 | 48 | If you modify the software, you must include in any modified copies of the 49 | software prominent notices stating that you have modified the software. 50 | 51 | ## No Other Rights 52 | 53 | These terms do not imply any licenses other than those expressly granted in 54 | these terms. 55 | 56 | ## Termination 57 | 58 | If you use the software in violation of these terms, such use is not licensed, 59 | and your licenses will automatically terminate. If the licensor provides you 60 | with a notice of your violation, and you cease all violation of this license no 61 | later than 30 days after you receive that notice, your licenses will be 62 | reinstated retroactively. However, if you violate these terms after such 63 | reinstatement, any additional violation of these terms will cause your licenses 64 | to terminate automatically and permanently. 65 | 66 | ## No Liability 67 | 68 | *As far as the law allows, the software comes as is, without any warranty or 69 | condition, and the licensor will not be liable to you for any damages arising 70 | out of these terms or the use or nature of the software, under any kind of 71 | legal claim.* 72 | 73 | ## Definitions 74 | 75 | The **licensor** is the entity offering these terms, and the **software** is the 76 | software the licensor makes available under these terms, including any portion 77 | of it. 78 | 79 | **you** refers to the individual or entity agreeing to these terms. 80 | 81 | **your company** is any legal entity, sole proprietorship, or other kind of 82 | organization that you work for, plus all organizations that have control over, 83 | are under the control of, or are under common control with that 84 | organization. **control** means ownership of substantially all the assets of an 85 | entity, or the power to direct its management and policies by vote, contract, or 86 | otherwise. Control can be direct or indirect. 87 | 88 | **your licenses** are all the licenses granted to you for the software under 89 | these terms. 90 | 91 | **use** means anything you do with the software requiring one of your licenses. 92 | 93 | **trademark** means trademarks, service marks, and similar rights. 94 | -------------------------------------------------------------------------------- /.github/workflows/releases-production.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # IMPORTANT: 3 | # If you change the name of this file, you will have to update 4 | # https://github.com/elastic/oblt-infra/blob/main/conf/resources/repos/elastic-serverless-forwarder/01-aws-oidc-github.tf 5 | # to include the current one! 6 | 7 | # Workflow to push zip with dependencies to S3 bucket every time the ESF version is updated 8 | # (we need this for ESF terraform), and to publish the new SAR version 9 | name: releases-production 10 | 11 | on: 12 | workflow_run: 13 | workflows: [create-tag] 14 | types: 15 | - completed 16 | 17 | permissions: 18 | id-token: write # This is required for requesting the JWT 19 | contents: read # This is required for actions/checkout 20 | 21 | env: 22 | AWS_REGION: "eu-central-1" 23 | AWS_ACCOUNT_ID: "267093732750" # account 'elastic-observability-prod' 24 | 25 | jobs: 26 | 27 | get-esf-version: 28 | runs-on: ubuntu-latest 29 | timeout-minutes: 5 30 | 31 | outputs: 32 | version: ${{ steps.get-version.outputs.version }} 33 | 34 | steps: 35 | - uses: actions/checkout@v5 36 | 37 | - name: Get version number 38 | id: get-version 39 | shell: bash 40 | run: | 41 | version=$(grep -oE '[0-9]+\.[0-9]+\.[0-9]+(\-[a-zA-Z]+[0-9]+)?' share/version.py) 42 | echo "version=${version}" >> $GITHUB_OUTPUT 43 | echo "::notice::ESF version is ${version}." 44 | 45 | 46 | build-and-upload-dependencies: 47 | runs-on: ubuntu-latest 48 | timeout-minutes: 30 49 | needs: get-esf-version 50 | 51 | env: 52 | BUCKET_NAME: "esf-dependencies" 53 | 54 | steps: 55 | # See https://docs.aws.amazon.com/lambda/latest/dg/python-package.html#python-package-create-dependencies 56 | 57 | - uses: actions/checkout@v5 58 | with: 59 | ref: 'lambda-v${{ needs.get-esf-version.outputs.version }}' 60 | 61 | - uses: actions/setup-python@v6 62 | with: 63 | python-version: '3.12' 64 | cache: 'pip' # caching pip dependencies 65 | 66 | - name: Install requirements in a directory and zip it. 67 | shell: bash 68 | run: | 69 | pip3 install -r requirements.txt -t ./dependencies 70 | cd dependencies && zip -r ../lambda-v${{ needs.get-esf-version.outputs.version }}.zip . 71 | 72 | - name: Place handlers in the zip file. 73 | shell: bash 74 | run: | 75 | zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip main_aws.py 76 | zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip handlers 77 | zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip share 78 | zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip storage 79 | zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip shippers 80 | 81 | - name: Configure AWS credentials 82 | uses: elastic/oblt-actions/aws/auth@v1 83 | with: 84 | aws-account-id: "${{ env.AWS_ACCOUNT_ID }}" 85 | aws-region: "${{ env.AWS_REGION }}" 86 | 87 | - name: Copy file to s3 88 | run: | 89 | aws s3 cp ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip s3://${{ env.BUCKET_NAME }}/ 90 | 91 | 92 | release-sar: 93 | runs-on: ubuntu-latest 94 | timeout-minutes: 30 95 | needs: get-esf-version 96 | 97 | env: 98 | BUCKET_NAME: "elastic-serverless-forwarder" 99 | 100 | steps: 101 | - uses: actions/checkout@v5 102 | with: 103 | ref: 'lambda-v${{ needs.get-esf-version.outputs.version }}' 104 | 105 | - uses: elastic/oblt-actions/aws/auth@v1 106 | with: 107 | aws-account-id: "${{ env.AWS_ACCOUNT_ID }}" 108 | aws-region: "${{ env.AWS_REGION }}" 109 | 110 | - uses: aws-actions/setup-sam@c71dd89d980e49367c70391e8ada4353f52f2800 # v2 111 | with: 112 | use-installer: true 113 | token: ${{ secrets.GITHUB_TOKEN }} 114 | 115 | - name: Build and package 116 | run: | 117 | .internal/aws/scripts/dist.sh \ 118 | elastic-serverless-forwarder \ 119 | ${{ needs.get-esf-version.outputs.version }} \ 120 | ${{ env.BUCKET_NAME }} \ 121 | ${{ env.AWS_ACCOUNT_ID }} \ 122 | ${{ env.AWS_REGION }} \ 123 | "Elastic" 124 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help license all-requirements requirements requirements-lint requirements-tests benchmark black coverage flake8 integration-test isort lint mypy test unit-test docker-benchmark docker-black docker-coverage docker-flake8 docker-integration-test docker-isort docker-lint docker-mypy docker-test docker-unit-test 2 | SHELL := /bin/bash 3 | 4 | help: ## Display this help text 5 | @grep -E '^[a-zA-Z_-]+[%]?:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' 6 | 7 | benchmark: PYTEST_ARGS=-m benchmark ## Run benchmarks on the host 8 | benchmark: export PYTEST_ADDOPTS=--benchmark-group-by=group 9 | benchmark: test 10 | 11 | unit-test: PYTEST_ARGS=-m unit ## Run unit tests on the host 12 | unit-test: test 13 | 14 | integration-test: PYTEST_ARGS=-m integration ## Run integration tests on the host 15 | integration-test: test 16 | 17 | test: PYTEST_ARGS_FLAGS=$(if $(PYTEST_ARGS),$(PYTEST_ARGS),-m not benchmark) ## Run unit tests on the host 18 | test: 19 | PYTEST_ARGS="${PYTEST_ARGS_FLAGS}" tests/scripts/${SCRIPTS_BASE_DIR}run_tests.sh 20 | 21 | coverage: export PYTEST_ADDOPTS=--cov=. --cov-context=test --cov-config=.coveragerc --cov-branch ## Run tests with coverage on the host 22 | coverage: export COVERAGE_FILE=.coverage 23 | coverage: test 24 | 25 | lint: black flake8 isort mypy ## Lint the project on the host 26 | 27 | black: ## Run black in the project on the host 28 | tests/scripts/${SCRIPTS_BASE_DIR}black.sh diff 29 | 30 | flake8: ## Run flake8 in the project on the host 31 | tests/scripts/${SCRIPTS_BASE_DIR}flake8.sh 32 | 33 | isort: ## Run isort in the project on the host 34 | tests/scripts/${SCRIPTS_BASE_DIR}isort.sh diff 35 | 36 | mypy: ## Run mypy in the project on the host 37 | tests/scripts/${SCRIPTS_BASE_DIR}mypy.sh 38 | 39 | package: ## Package lambda by installing python dependencies matching x86_64 40 | mkdir deps && \ 41 | pip install --target=./deps --platform manylinux2014_x86_64 --implementation cp --python-version 3.12 --only-binary=:all: --upgrade -r requirements.txt && \ 42 | cd ./deps && \ 43 | zip -r ../local_esf.zip . && \ 44 | cd .. && \ 45 | zip -r local_esf.zip main_aws.py handlers share storage shippers && \ 46 | rm -r ./deps 47 | 48 | clean: ## cleanup any leftover resources 49 | rm -f -r ./deps 50 | rm -f local_esf.zip 51 | 52 | docker-test: ## Run all tests on docker 53 | docker-test: SCRIPTS_BASE_DIR=docker/ 54 | docker-test: test 55 | 56 | docker-benchmark: ## Run benchmarks on docker 57 | docker-benchmark: SCRIPTS_BASE_DIR=docker/ 58 | docker-benchmark: benchmark 59 | 60 | docker-unit-test: ## Run unit tests on docker 61 | docker-unit-test: SCRIPTS_BASE_DIR=docker/ 62 | docker-unit-test: unit-test 63 | 64 | docker-integration-test: ## Run integration tests on docker 65 | docker-integration-test: SCRIPTS_BASE_DIR=docker/ 66 | docker-integration-test: integration-test 67 | 68 | docker-coverage: ## Run tests with coverage on docker 69 | docker-coverage: SCRIPTS_BASE_DIR=docker/ 70 | docker-coverage: coverage 71 | 72 | docker-lint: docker-black docker-flake8 docker-isort docker-mypy ## Lint the project on docker 73 | 74 | docker-black: ## Run black in the project on docker 75 | docker-black: SCRIPTS_BASE_DIR=docker/ 76 | docker-black: black 77 | 78 | docker-flake8: ## Run flake8 in the project on docker 79 | docker-flake8: SCRIPTS_BASE_DIR=docker/ 80 | docker-flake8: flake8 81 | 82 | docker-isort: ## Run isort in the project on docker 83 | docker-isort: SCRIPTS_BASE_DIR=docker/ 84 | docker-isort: isort 85 | 86 | docker-mypy: ## Run mypy in the project on docker 87 | docker-mypy: SCRIPTS_BASE_DIR=docker/ 88 | docker-mypy: mypy 89 | 90 | license: ## Run license validation in the project 91 | tests/scripts/license_headers_check.sh check 92 | 93 | all-requirements: requirements-lint requirements-tests requirements ## Install all requirements on the host 94 | 95 | requirements: .makecache/requirements.txt ## Install app requirements on the host 96 | 97 | requirements-lint: .makecache/requirements-lint.txt ## Install all linting requirements on the host 98 | 99 | requirements-tests: .makecache/requirements-tests.txt ## Install tests requirements on the host 100 | 101 | .makecache/requirements.txt: requirements.txt 102 | pip3 install -r requirements.txt 103 | touch .makecache/requirements.txt 104 | 105 | .makecache/requirements-lint.txt: requirements-lint.txt 106 | pip3 install -r requirements-lint.txt 107 | touch .makecache/requirements-lint.txt 108 | 109 | .makecache/requirements-tests.txt: requirements-tests.txt 110 | pip3 install -r requirements-tests.txt 111 | touch .makecache/requirements-tests.txt 112 | -------------------------------------------------------------------------------- /tests/handlers/aws/test_replay_trigger.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Optional 6 | from unittest import TestCase 7 | 8 | import mock 9 | import pytest 10 | 11 | from handlers.aws import OutputConfigException 12 | from handlers.aws.replay_trigger import ReplayedEventReplayHandler, get_shipper_for_replay_event 13 | from share import parse_config 14 | from shippers import CompositeShipper, ElasticsearchShipper, LogstashShipper 15 | 16 | 17 | @pytest.mark.unit 18 | class TestReplayTrigger(TestCase): 19 | @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) 20 | def test_get_shipper_for_replay_event(self) -> None: 21 | with self.subTest("Logstash shipper from replay event"): 22 | config_yaml_kinesis = """ 23 | inputs: 24 | - type: kinesis-data-stream 25 | id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream 26 | outputs: 27 | - type: logstash 28 | args: 29 | logstash_url: logstash_url 30 | """ 31 | config = parse_config(config_yaml_kinesis) 32 | replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") 33 | logstash_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event( 34 | config, 35 | "logstash_url", 36 | {}, 37 | "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", 38 | replay_handler, 39 | ) 40 | assert isinstance(logstash_shipper, CompositeShipper) 41 | assert isinstance(logstash_shipper._shippers[0], LogstashShipper) 42 | 43 | with self.subTest("Elasticsearch shipper from replay event"): 44 | config_yaml_kinesis = """ 45 | inputs: 46 | - type: kinesis-data-stream 47 | id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream 48 | outputs: 49 | - type: elasticsearch 50 | args: 51 | elasticsearch_url: "elasticsearch_url" 52 | username: "username" 53 | password: "password" 54 | es_datastream_name: "es_datastream_name" 55 | """ 56 | config = parse_config(config_yaml_kinesis) 57 | replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") 58 | elasticsearch_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event( 59 | config, 60 | "elasticsearch_url", 61 | {"es_datastream_name": "es_datastream_name"}, 62 | "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", 63 | replay_handler, 64 | ) 65 | 66 | assert isinstance(elasticsearch_shipper, CompositeShipper) 67 | assert isinstance(elasticsearch_shipper._shippers[0], ElasticsearchShipper) 68 | 69 | with self.subTest("Exception from output destination"): 70 | config_yaml_kinesis = """ 71 | inputs: 72 | - type: kinesis-data-stream 73 | id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream 74 | outputs: 75 | - type: output_type 76 | args: 77 | output_arg: output_arg 78 | """ 79 | config = parse_config(config_yaml_kinesis) 80 | replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") 81 | with self.assertRaisesRegex(OutputConfigException, "test"): 82 | get_shipper_for_replay_event( 83 | config, 84 | "test", 85 | {}, 86 | "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", 87 | replay_handler, 88 | ) 89 | -------------------------------------------------------------------------------- /storage/s3.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from io import SEEK_SET, BytesIO 6 | from typing import Any, Optional 7 | 8 | import boto3 9 | import botocore.client 10 | import elasticapm # noqa: F401 11 | from botocore.response import StreamingBody 12 | 13 | from share import ExpandEventListFromField, ProtocolMultiline, shared_logger 14 | 15 | from .decorator import by_lines, inflate, json_collector, multi_line 16 | from .storage import ( 17 | CHUNK_SIZE, 18 | CommonStorage, 19 | GetByLinesIterator, 20 | StorageDecoratorIterator, 21 | StorageReader, 22 | is_gzip_content, 23 | ) 24 | 25 | 26 | class S3Storage(CommonStorage): 27 | """ 28 | S3 Storage. 29 | This class implements concrete S3 Storage 30 | """ 31 | 32 | _s3_client = boto3.client( 33 | "s3", config=botocore.client.Config(retries={"total_max_attempts": 10, "mode": "standard"}) 34 | ) 35 | 36 | def __init__( 37 | self, 38 | bucket_name: str, 39 | object_key: str, 40 | json_content_type: Optional[str] = None, 41 | multiline_processor: Optional[ProtocolMultiline] = None, 42 | event_list_from_field_expander: Optional[ExpandEventListFromField] = None, 43 | ): 44 | self._bucket_name: str = bucket_name 45 | self._object_key: str = object_key 46 | self.json_content_type = json_content_type 47 | self.multiline_processor = multiline_processor 48 | self.event_list_from_field_expander = event_list_from_field_expander 49 | 50 | @multi_line 51 | @json_collector 52 | @by_lines 53 | @inflate 54 | def _generate(self, range_start: int, body: BytesIO, is_gzipped: bool) -> StorageDecoratorIterator: 55 | """ 56 | Concrete implementation of the iterator for get_by_lines 57 | """ 58 | 59 | file_ending_offset: int = range_start 60 | 61 | def chunk_lambda() -> Any: 62 | return body.read(CHUNK_SIZE) 63 | 64 | if is_gzipped: 65 | reader: StorageReader = StorageReader(raw=body) 66 | yield reader, 0, 0, b"", None 67 | else: 68 | for chunk in iter(chunk_lambda, b""): 69 | file_starting_offset = file_ending_offset 70 | file_ending_offset += len(chunk) 71 | 72 | shared_logger.debug("_generate flat", extra={"offset": file_ending_offset}) 73 | yield chunk, file_ending_offset, file_starting_offset, b"", None 74 | 75 | def get_by_lines(self, range_start: int) -> GetByLinesIterator: 76 | original_range_start: int = range_start 77 | 78 | s3_object_head = self._s3_client.head_object(Bucket=self._bucket_name, Key=self._object_key) 79 | 80 | content_type: str = s3_object_head["ContentType"] 81 | content_length: int = s3_object_head["ContentLength"] 82 | shared_logger.debug( 83 | "get_by_lines", 84 | extra={ 85 | "content_type": content_type, 86 | "range_start": range_start, 87 | "bucket_name": self._bucket_name, 88 | "object_key": self._object_key, 89 | }, 90 | ) 91 | 92 | file_content: BytesIO = BytesIO(b"") 93 | self._s3_client.download_fileobj(self._bucket_name, self._object_key, file_content) 94 | 95 | file_content.flush() 96 | file_content.seek(0, SEEK_SET) 97 | is_gzipped: bool = False 98 | if is_gzip_content(file_content.readline()): 99 | is_gzipped = True 100 | range_start = 0 101 | 102 | if range_start < content_length: 103 | file_content.seek(range_start, SEEK_SET) 104 | 105 | for log_event, line_starting_offset, line_ending_offset, _, event_expanded_offset in self._generate( 106 | original_range_start, file_content, is_gzipped 107 | ): 108 | assert isinstance(log_event, bytes) 109 | yield log_event, line_starting_offset, line_ending_offset, event_expanded_offset 110 | else: 111 | shared_logger.info(f"requested file content from {range_start}, file size {content_length}: skip it") 112 | 113 | def get_as_string(self) -> str: 114 | shared_logger.debug("get_as_string", extra={"bucket_name": self._bucket_name, "object_key": self._object_key}) 115 | s3_object = self._s3_client.get_object(Bucket=self._bucket_name, Key=self._object_key, Range="bytes=0-") 116 | 117 | body: StreamingBody = s3_object["Body"] 118 | return str(body.read(s3_object["ContentLength"]).decode("utf-8")) 119 | -------------------------------------------------------------------------------- /.internal/aws/cloudformation/application.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Transform: AWS::Serverless-2016-10-31 3 | Description: > 4 | Elastic Serverless Forwarder 5 | 6 | SAM Template for the application, not intended to be deployed on its own 7 | 8 | Parameters: 9 | ElasticServerlessForwarderS3ConfigFile: 10 | Type: String 11 | ElasticServerlessForwarderSSMSecrets: 12 | Type: CommaDelimitedList 13 | ElasticServerlessForwarderKMSKeys: 14 | Type: CommaDelimitedList 15 | ElasticServerlessForwarderSQSEvents: 16 | Type: CommaDelimitedList 17 | ElasticServerlessForwarderSQSEvents2: 18 | Type: CommaDelimitedList 19 | ElasticServerlessForwarderS3SQSEvents: 20 | Type: CommaDelimitedList 21 | ElasticServerlessForwarderS3SQSEvents2: 22 | Type: CommaDelimitedList 23 | ElasticServerlessForwarderKinesisEvents: 24 | Type: CommaDelimitedList 25 | ElasticServerlessForwarderKinesisEvents2: 26 | Type: CommaDelimitedList 27 | ElasticServerlessForwarderCloudWatchLogsEvents: 28 | Type: CommaDelimitedList 29 | ElasticServerlessForwarderCloudWatchLogsEvents2: 30 | Type: CommaDelimitedList 31 | ElasticServerlessForwarderS3Buckets: 32 | Type: CommaDelimitedList 33 | ElasticServerlessForwarderSecurityGroups: 34 | Type: CommaDelimitedList 35 | ElasticServerlessForwarderSubnets: 36 | Type: CommaDelimitedList 37 | Resources: 38 | ElasticServerlessForwarderContinuingDLQ: 39 | Type: AWS::SQS::Queue 40 | Properties: 41 | DelaySeconds: 0 42 | QueueName: !Join [ "-", ["elastic-serverless-forwarder-continuing-dlq", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] 43 | VisibilityTimeout: 910 44 | SqsManagedSseEnabled: true 45 | ElasticServerlessForwarderContinuingQueue: 46 | Type: AWS::SQS::Queue 47 | Properties: 48 | DelaySeconds: 0 49 | QueueName: !Join [ "-", ["elastic-serverless-forwarder-continuing-queue", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] 50 | RedrivePolicy: { "deadLetterTargetArn" : !GetAtt ElasticServerlessForwarderContinuingDLQ.Arn, "maxReceiveCount" : 1 } 51 | VisibilityTimeout: 910 52 | SqsManagedSseEnabled: true 53 | ElasticServerlessForwarderReplayDLQ: 54 | Type: AWS::SQS::Queue 55 | Properties: 56 | DelaySeconds: 0 57 | QueueName: !Join [ "-", ["elastic-serverless-forwarder-replay-dlq", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] 58 | VisibilityTimeout: 910 59 | SqsManagedSseEnabled: true 60 | ElasticServerlessForwarderReplayQueue: 61 | Type: AWS::SQS::Queue 62 | Properties: 63 | DelaySeconds: 0 64 | QueueName: !Join [ "-", ["elastic-serverless-forwarder-replay-queue", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] 65 | RedrivePolicy: { "deadLetterTargetArn" : !GetAtt ElasticServerlessForwarderReplayDLQ.Arn, "maxReceiveCount" : 3 } 66 | VisibilityTimeout: 910 67 | SqsManagedSseEnabled: true 68 | ApplicationElasticServerlessForwarder: 69 | Type: AWS::Serverless::Function 70 | Properties: 71 | Timeout: 900 72 | MemorySize: 512 73 | CodeUri: %codeUri% 74 | Runtime: python3.12 75 | Architectures: 76 | - x86_64 77 | Handler: main_aws.handler 78 | Environment: 79 | Variables: 80 | S3_CONFIG_FILE: !Ref ElasticServerlessForwarderS3ConfigFile 81 | SQS_CONTINUE_URL: !Ref ElasticServerlessForwarderContinuingQueue 82 | SQS_REPLAY_URL: !Ref ElasticServerlessForwarderReplayQueue 83 | Events: 84 | SQSContinuingEvent: 85 | Type: SQS 86 | Properties: 87 | Queue: !GetAtt ElasticServerlessForwarderContinuingQueue.Arn 88 | BatchSize: 10 89 | Enabled: true 90 | Fn::Transform: 91 | Type: AWS::CloudFormation::Macro 92 | Name: %sarAppName%-macro 93 | Metadata: 94 | AWS::ServerlessRepo::Application: 95 | Name: helper-application-%sarAppName% 96 | Description: | 97 | NOTE: DO NOT DEPLOY 98 | Deploy elastic-serverless-forwarder instead. This is a helper SAM template for the application and not intended to be deployed on its own. 99 | Author: %sarAuthorName% 100 | SemanticVersion: %semanticVersion% 101 | LicenseUrl: %codeUri%/LICENSE.txt 102 | HomePageUrl: https://github.com/elastic/elastic-serverless-forwarder 103 | SourceCodeUrl: https://github.com/elastic/elastic-serverless-forwarder 104 | Outputs: 105 | EsfLambdaFunctionARN: 106 | Description: ARN of the ESF Lambda Function 107 | Value: !GetAtt ApplicationElasticServerlessForwarder.Arn 108 | EsfLambdaFunctionRoleARN: 109 | Description: ARN of the IAM role associated with the ESF Lambda function 110 | Value: !GetAtt ApplicationElasticServerlessForwarderRole.Arn 111 | -------------------------------------------------------------------------------- /storage/payload.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | import base64 5 | import binascii 6 | import gzip 7 | from io import SEEK_SET, BytesIO 8 | from typing import Any, Optional 9 | 10 | from share import ExpandEventListFromField, ProtocolMultiline, shared_logger 11 | 12 | from .decorator import by_lines, inflate, json_collector, multi_line 13 | from .storage import ( 14 | CHUNK_SIZE, 15 | CommonStorage, 16 | GetByLinesIterator, 17 | StorageDecoratorIterator, 18 | StorageReader, 19 | is_gzip_content, 20 | ) 21 | 22 | 23 | class PayloadStorage(CommonStorage): 24 | """ 25 | PayloadStorage Storage. 26 | This class implements concrete Payload Storage. 27 | The payload might be base64 and gzip encoded 28 | """ 29 | 30 | def __init__( 31 | self, 32 | payload: str, 33 | json_content_type: Optional[str] = None, 34 | multiline_processor: Optional[ProtocolMultiline] = None, 35 | event_list_from_field_expander: Optional[ExpandEventListFromField] = None, 36 | ): 37 | self._payload: str = payload 38 | self.json_content_type = json_content_type 39 | self.multiline_processor = multiline_processor 40 | self.event_list_from_field_expander = event_list_from_field_expander 41 | 42 | @multi_line 43 | @json_collector 44 | @by_lines 45 | @inflate 46 | def _generate(self, range_start: int, body: BytesIO, is_gzipped: bool) -> StorageDecoratorIterator: 47 | """ 48 | Concrete implementation of the iterator for get_by_lines 49 | """ 50 | 51 | file_ending_offset: int = range_start 52 | 53 | def chunk_lambda() -> Any: 54 | return body.read(CHUNK_SIZE) 55 | 56 | if is_gzipped: 57 | reader: StorageReader = StorageReader(raw=body) 58 | yield reader, 0, 0, b"", None 59 | else: 60 | for chunk in iter(chunk_lambda, b""): 61 | file_starting_offset = file_ending_offset 62 | file_ending_offset += len(chunk) 63 | 64 | shared_logger.debug("_generate flat", extra={"offset": file_ending_offset}) 65 | yield chunk, file_starting_offset, file_ending_offset, b"", None 66 | 67 | def get_by_lines(self, range_start: int) -> GetByLinesIterator: 68 | original_range_start: int = range_start 69 | 70 | is_gzipped: bool = False 71 | is_b64encoded: bool = False 72 | try: 73 | base64_decoded = base64.b64decode(self._payload, validate=True) 74 | # we try to unicode decode to catch if `base64.b64decode` decoded to non-valid unicode: 75 | # in this case `UnicodeDecodeError` will be thrown, this mean that the original was not base64 encoded 76 | # we try this only if it's not gzipped, because in that case `UnicodeDecodeError` will be thrown anyway 77 | if not is_gzip_content(base64_decoded): 78 | base64_decoded.decode("utf-8") 79 | # if `UnicodeDecodeError` was thrown, the content was not base64 encoded 80 | # and the below assignment will not be executed 81 | is_b64encoded = True 82 | else: 83 | # we have gzip content that was base64 encoded 84 | # let's do the proper assignment 85 | is_b64encoded = True 86 | except (UnicodeDecodeError, ValueError, binascii.Error): 87 | # it was not valid unicode base64 encoded value or is it bare gzip content 88 | # just take as it is and encode to unicode bytes 89 | base64_decoded = self._payload.encode("utf-8") 90 | 91 | if is_gzip_content(base64_decoded): 92 | is_gzipped = True 93 | range_start = 0 94 | 95 | shared_logger.debug( 96 | "get_by_lines", 97 | extra={ 98 | "range_start": original_range_start, 99 | "is_b64encoded": is_b64encoded, 100 | "is_gzipped": is_gzipped, 101 | }, 102 | ) 103 | 104 | content_length = len(base64_decoded) 105 | if range_start < content_length: 106 | file_content: BytesIO = BytesIO(base64_decoded) 107 | 108 | file_content.flush() 109 | file_content.seek(range_start, SEEK_SET) 110 | 111 | for log_event, line_starting_offset, line_ending_offset, _, event_expanded_offset in self._generate( 112 | original_range_start, file_content, is_gzipped 113 | ): 114 | assert isinstance(log_event, bytes) 115 | yield log_event, line_starting_offset, line_ending_offset, event_expanded_offset 116 | else: 117 | shared_logger.info(f"requested payload content from {range_start}, payload size {content_length}: skip it") 118 | 119 | def get_as_string(self) -> str: 120 | try: 121 | base64_decoded = base64.b64decode(self._payload, validate=True) 122 | if not is_gzip_content(base64_decoded): 123 | base64_decoded.decode("utf-8") 124 | except (UnicodeDecodeError, ValueError, binascii.Error): 125 | base64_decoded = self._payload.encode("utf-8") 126 | 127 | if is_gzip_content(base64_decoded): 128 | return gzip.decompress(base64_decoded).decode("utf-8") 129 | 130 | return base64_decoded.decode("utf-8") 131 | -------------------------------------------------------------------------------- /.internal/aws/scripts/dist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 4 | # you may not use this file except in compliance with the Elastic License 2.0. 5 | 6 | set -ex 7 | 8 | echo " AWS CLI (https://aws.amazon.com/cli/), AWS SAM CLI (https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-sam-cli.html), docker and Python3.12 with pip3 required" 9 | echo " Please, execute from root folder of the repo" 10 | 11 | if [[ $# -ne 5 && $# -ne 6 ]] 12 | then 13 | echo "Usage: $0 sar-app-name semantic-version bucket-name account-id region" 14 | echo " Arguments:" 15 | echo " sar-app-name: name of the app to be deployed in SAR" 16 | echo " semantic-version: semantic version of the app to deploy in SAR" 17 | echo " bucket-name: bucket name where to store the zip artifact for SAR code" 18 | echo " (it will be created if it doesn't exists, otherwise " 19 | echo " you need already to have proper access to it)" 20 | echo " account-id: AWS account id to use for deploying" 21 | echo " region: region where to deploy in SAR for" 22 | echo " sar-author-name: name of the author of the app to be deployed in SAR" 23 | echo " (default to Elastic))" 24 | exit 1 25 | fi 26 | 27 | SAR_APP_NAME="$1" 28 | SEMANTIC_VERSION="$2" 29 | BUCKET="$3" 30 | ACCOUNT_ID="$4" 31 | REGION="$5" 32 | SAR_AUTHOR_NAME="${6:-Elastic}" 33 | TMPDIR=$(mktemp -d /tmp/dist.XXXXXXXXXX) 34 | CODE_URI="${TMPDIR}/sources" 35 | 36 | trap "rm -rf ${TMPDIR}" EXIT 37 | 38 | aws s3api get-bucket-location --bucket "${BUCKET}" --region "${REGION}" || aws s3api create-bucket --acl private --bucket "${BUCKET}" --region "${REGION}" --create-bucket-configuration LocationConstraint="${REGION}" || aws s3api create-bucket --acl private --bucket "${BUCKET}" --region "${REGION}" 39 | 40 | # Check if region is in AWS GovCloud and create bucket arn 41 | if [[ "${REGION}" == *gov* ]]; then 42 | BUCKET_ARN="arn:aws-us-gov:s3:::${BUCKET}" 43 | AWS_OR_AWS_GOV="aws-us-gov" 44 | else 45 | BUCKET_ARN="arn:aws:s3:::${BUCKET}" 46 | AWS_OR_AWS_GOV="aws" 47 | fi 48 | 49 | BUCKET_RESOURCE="${BUCKET_ARN}/*" 50 | 51 | cat < "${TMPDIR}/policy.json" 52 | { 53 | "Version": "2012-10-17", 54 | "Statement": [ 55 | { 56 | "Effect": "Allow", 57 | "Principal": { 58 | "Service": "serverlessrepo.amazonaws.com" 59 | }, 60 | "Action": "s3:GetObject", 61 | "Resource": "${BUCKET_RESOURCE}", 62 | "Condition" : { 63 | "StringEquals": { 64 | "aws:SourceAccount": "${ACCOUNT_ID}" 65 | } 66 | } 67 | } 68 | ] 69 | } 70 | EOF 71 | 72 | aws s3api put-bucket-policy --bucket "${BUCKET}" --region "${REGION}" --policy "file://${TMPDIR}/policy.json" 73 | mkdir -v -p "${CODE_URI}" 74 | cp -v requirements.txt "${CODE_URI}/" 75 | cp -v main_aws.py "${CODE_URI}/" 76 | find {handlers,share,shippers,storage} -not -name "*__pycache__*" -type d -print0|xargs -t -0 -Idirname mkdir -v -p "${CODE_URI}/dirname" 77 | find {handlers,share,shippers,storage} -not -name "*__pycache__*" -name "*.py" -exec cp -v '{}' "${CODE_URI}/{}" \; 78 | cp -v LICENSE.txt "${CODE_URI}/LICENSE.txt" 79 | cp -v docs/README-AWS.md "${CODE_URI}/README.md" 80 | 81 | sed -e "s|%codeUri%|${CODE_URI}|g" -e "s/%sarAppName%/${SAR_APP_NAME}/g" -e "s/%sarAuthorName%/${SAR_AUTHOR_NAME}/g" -e "s/%semanticVersion%/${SEMANTIC_VERSION}/g" -e "s/%awsRegion%/${REGION}/g" -e "s/%awsOrGov%/${AWS_OR_AWS_GOV}/g" .internal/aws/cloudformation/macro.yaml > "${TMPDIR}/macro.yaml" 82 | sed -e "s|%codeUri%|${CODE_URI}|g" -e "s/%sarAppName%/${SAR_APP_NAME}/g" -e "s/%sarAuthorName%/${SAR_AUTHOR_NAME}/g" -e "s/%semanticVersion%/${SEMANTIC_VERSION}/g" -e "s/%awsRegion%/${REGION}/g" -e "s/%accountID%/${ACCOUNT_ID}/g" -e "s/%awsOrGov%/${AWS_OR_AWS_GOV}/g" .internal/aws/cloudformation/template.yaml > "${TMPDIR}/template.yaml" 83 | sed -e "s|%codeUri%|${CODE_URI}|g" -e "s/%sarAppName%/${SAR_APP_NAME}/g" -e "s/%sarAuthorName%/${SAR_AUTHOR_NAME}/g" -e "s/%semanticVersion%/${SEMANTIC_VERSION}/g" -e "s/%awsRegion%/${REGION}/g" -e "s/%codeURIBucket%/${BUCKET}/g" .internal/aws/cloudformation/application.yaml > "${TMPDIR}/application.yaml" 84 | 85 | sam build --debug --use-container --build-dir "${TMPDIR}/.aws-sam/build/macro" --template-file "${TMPDIR}/macro.yaml" --region "${REGION}" 86 | sam package --template-file "${TMPDIR}/.aws-sam/build/macro/template.yaml" --output-template-file "${TMPDIR}/.aws-sam/build/macro/packaged.yaml" --s3-bucket "${BUCKET}" --region "${REGION}" 87 | sam publish --template "${TMPDIR}/.aws-sam/build/macro/packaged.yaml" --region "${REGION}" 88 | 89 | sam build --debug --use-container --build-dir "${TMPDIR}/.aws-sam/build/application" --template-file "${TMPDIR}/application.yaml" --region "${REGION}" 90 | sam package --template-file "${TMPDIR}/.aws-sam/build/application/template.yaml" --output-template-file "${TMPDIR}/.aws-sam/build/application/packaged.yaml" --s3-bucket "${BUCKET}" --region "${REGION}" 91 | sam publish --template "${TMPDIR}/.aws-sam/build/application/packaged.yaml" --region "${REGION}" 92 | 93 | sam build --debug --use-container --build-dir "${TMPDIR}/.aws-sam/build/template" --template-file "${TMPDIR}/template.yaml" --region "${REGION}" 94 | sam package --template-file "${TMPDIR}/.aws-sam/build/template/template.yaml" --output-template-file "${TMPDIR}/.aws-sam/build/template/packaged.yaml" --s3-bucket "${BUCKET}" --region "${REGION}" 95 | sam publish --template "${TMPDIR}/.aws-sam/build/template/packaged.yaml" --region "${REGION}" 96 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the Elastic Serverless Forwarder 2 | 3 | If you have a bugfix or new feature that you would like to contribute to 4 | elastic-serverless-forwarder, please find or open an issue about it first. Talk about what you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change. 5 | 6 | We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. 7 | 8 | ## Running Elastic Serverless Forwarder locally 9 | 10 | We don't provide yet a tool for running Elastic Serverless Forwarder locally. A good first contribution would be to add such support. 11 | 12 | ## Code structure 13 | 14 | The code in the repository is organised according to some conventions. 15 | The folders starting with a dot (`.`) are to be considered internal to Elastic workflow and you should not usually be concerned about them. 16 | 17 | In `docs` folder there is the documentation specific to every serverless solution we support (at the moment only AWS Lambda). 18 | 19 | The `tests` folder contains both unit and integration tests for the whole code base, structured mimicking the folders/pacakges structure of the main code base. An exception is the `scripts` folder where maintenance helper scripts (usually in `bash`) reside. 20 | 21 | We identified so far three components of the project, on top of the serverless function handlers for every cloud solution supported (at the moment only AWS Lambda): 22 | * `shippers`: the package related to outputs. Either you are sending data to Elasticsearch, Logstash, or anything else, your code must reside here. 23 | * `storage`: the package related to inputs. Either you are reading data from S3, a bytes blob payload, or anything else, your code must reside here. 24 | * `share`: the package for common shared utilities that are not related to the above domains and don't contain code related to specific application handling. 25 | 26 | In the `handlers` package it resides the code with logic related to the specific serverless solutions, each of them in a specific subpackages (at the moment only AWS Lambda): everything related to a specific cloud serverless solution must reside there. 27 | 28 | 29 | ## Contributing Code Changes 30 | 31 | The process for contributing to any of the Elastic repositories is similar. 32 | 33 | 1. Please make sure you have signed the [Contributor License Agreement](http://www.elastic.co/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. 34 | 35 | 2. Install the required dependencies. We have three different dependencies sets respectively for the app, linting and tests. You can install them all together or separately, either in a virtualenv or not, according to your preferences. The `make` targets provided are the following: 36 | * `all-requirements` Install all requirements on the host 37 | * `requirements` Install app requirements on the host 38 | * `requirements-lint` Install all linting requirements on the host 39 | * `requirements-tests` Install tests requirements on the host 40 | 41 | 3. Run the linter, license check and test suite to ensure your changes do not break existing code. The `make` targets provided are the following: 42 | * `lint` Lint the project on the host 43 | * `black` Run black in the project on the host 44 | * `isort` Run isort in the project on the host 45 | * `mypy` Run mypy in the project on the host 46 | * `license` Run license validation in the project 47 | * `test` Run all tests on the host 48 | * `integration-test` Run integration tests on the host 49 | * `unit-test` Run unit tests on the host 50 | * `coverage` Run tests on the host with coverage 51 | 52 | 4. A subset of the previous tasks can be run in docker (that's the method used in CI), these are the equivalent `make` targets provided: 53 | * `docker-lint` Lint the project on docker 54 | * `docker-black` Run black in the project on docker 55 | * `docker-isort` Run isort in the project on docker 56 | * `docker-mypy` Run mypy in the project on docker 57 | * `docker-test` Run tests on docker 58 | * `docker-integration-test` Run integration tests on docker 59 | * `docker-unit-test` Run unit tests on docker 60 | * `docker-coverage` Run tests on docker with coverage 61 | 62 | 5. Scripts for automated fix of linting and license are provided where available. They are the following: 63 | * `./tests/scripts/black.sh fix` 64 | * `./tests/scripts/isort.sh fix` 65 | * `./tests/scripts/license_headers_check.sh fix` 66 | 67 | 7. Rebase your changes. Update your local repository with the most recent code from the main elastic-serverless-forwarder repository, and rebase your branch on top of the latest `main` elastic-serverless-forwarder branch. 68 | 69 | 8. Submit a pull request. Push your local changes to your forked copy of the repository and submit a pull request. In the pull request, describe what your changes do and mention the number of the issue where discussion has taken place, eg “Closes #123″. Please add or modify tests related to your changes. We tend to reach 100% coverage for all the code outside the `handlers` folder. 70 | 71 | Then sit back and wait. There will probably be a discussion about the pull 72 | request and, if any changes are needed, we would love to work with you to get your pull request merged into elastic-serverless-forwarder 73 | -------------------------------------------------------------------------------- /shippers/logstash.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import gzip 6 | from typing import Any, Optional 7 | 8 | from requests import Session 9 | from requests.adapters import HTTPAdapter 10 | from requests.exceptions import RequestException 11 | from urllib3.util.retry import Retry 12 | 13 | from share import json_dumper, normalise_event, shared_logger 14 | from shippers.shipper import EventIdGeneratorCallable, ReplayHandlerCallable 15 | 16 | _EVENT_SENT = "_EVENT_SENT" 17 | _EVENT_BUFFERED = "_EVENT_BUFFERED" 18 | 19 | _TIMEOUT = 10 20 | _MAX_RETRIES = 4 21 | _STATUS_FORCE_LIST = [429, 500, 502, 503, 504] 22 | # A backoff factor to apply between attempts after the second try. urllib3 will sleep for: 23 | # {backoff factor} * (2 ** ({number of total retries} - 1)) 24 | # seconds. If the backoff_factor is 1, then sleep() will sleep for [0s, 2s, 4s, …] between retries. 25 | _BACKOFF_FACTOR = 1 26 | 27 | 28 | class LogstashAdapter(HTTPAdapter): 29 | """ 30 | An HTTP adapter specific for Logstash that encapsulates the retry/backoff parameters and allows to verify 31 | certificates by SSL fingerprint 32 | """ 33 | 34 | def __init__(self, fingerprint: str, *args, **kwargs): # type: ignore 35 | self._fingerprint = fingerprint 36 | retry_strategy = Retry(total=_MAX_RETRIES, backoff_factor=_BACKOFF_FACTOR, status_forcelist=_STATUS_FORCE_LIST) 37 | HTTPAdapter.__init__(self, max_retries=retry_strategy, *args, **kwargs) # type: ignore 38 | 39 | def init_poolmanager(self, *args, **kwargs): # type: ignore 40 | if self._fingerprint: 41 | kwargs["assert_fingerprint"] = self._fingerprint 42 | return super().init_poolmanager(*args, **kwargs) # type: ignore 43 | 44 | 45 | class LogstashShipper: 46 | """ 47 | Logstash Shipper. 48 | This class implements concrete Logstash Shipper 49 | """ 50 | 51 | def __init__( 52 | self, 53 | logstash_url: str = "", 54 | username: str = "", 55 | password: str = "", 56 | max_batch_size: int = 1, 57 | compression_level: int = 9, 58 | ssl_assert_fingerprint: str = "", 59 | tags: list[str] = [], 60 | ) -> None: 61 | if logstash_url: 62 | self._logstash_url = logstash_url 63 | else: 64 | raise ValueError("You must provide logstash_url") 65 | 66 | self._replay_handler: Optional[ReplayHandlerCallable] = None 67 | self._event_id_generator: Optional[EventIdGeneratorCallable] = None 68 | self._events_batch: list[dict[str, Any]] = [] 69 | 70 | self._max_batch_size = max_batch_size 71 | 72 | self._tags = tags 73 | 74 | if 0 <= compression_level <= 9: 75 | self._compression_level = compression_level 76 | else: 77 | raise ValueError("compression_level must be an integer value between 0 and 9") 78 | 79 | self._replay_args: dict[str, Any] = {} 80 | 81 | self._session = self._get_session(self._logstash_url, username, password, ssl_assert_fingerprint) 82 | 83 | @staticmethod 84 | def _get_session(url: str, username: str, password: str, ssl_assert_fingerprint: str) -> Session: 85 | session = Session() 86 | 87 | if username: 88 | session.auth = (username, password) 89 | 90 | if ssl_assert_fingerprint: 91 | session.verify = False 92 | 93 | session.mount(url, LogstashAdapter(ssl_assert_fingerprint)) 94 | 95 | return session 96 | 97 | def send(self, event: dict[str, Any]) -> str: 98 | if "_id" not in event and self._event_id_generator is not None: 99 | event["_id"] = self._event_id_generator(event) 100 | 101 | event["tags"] = ["forwarded"] 102 | event["tags"] += self._tags 103 | 104 | event = normalise_event(event) 105 | 106 | # Let's move _id to @metadata._id for logstash 107 | if "_id" in event: 108 | event["@metadata"] = {"_id": event["_id"]} 109 | del event["_id"] 110 | 111 | self._events_batch.append(event) 112 | if len(self._events_batch) < self._max_batch_size: 113 | return _EVENT_BUFFERED 114 | 115 | self._send() 116 | 117 | return _EVENT_SENT 118 | 119 | def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None: 120 | self._event_id_generator = event_id_generator 121 | 122 | def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None: 123 | self._replay_handler = replay_handler 124 | 125 | def flush(self) -> None: 126 | if len(self._events_batch) > 0: 127 | self._send() 128 | 129 | self._events_batch.clear() 130 | 131 | return 132 | 133 | def _send(self) -> None: 134 | ndjson = "\n".join(json_dumper(event) for event in self._events_batch) 135 | 136 | try: 137 | response = self._session.put( 138 | self._logstash_url, 139 | data=gzip.compress(ndjson.encode("utf-8"), self._compression_level), 140 | headers={"Content-Encoding": "gzip", "Content-Type": "application/x-ndjson"}, 141 | timeout=_TIMEOUT, 142 | ) 143 | 144 | if response.status_code == 401: 145 | raise RequestException("Authentication error") 146 | 147 | self._events_batch.clear() 148 | 149 | except RequestException as e: 150 | shared_logger.error( 151 | f"logstash shipper encountered an error while publishing events to logstash. Error: {str(e)}" 152 | ) 153 | 154 | if self._replay_handler is not None: 155 | for event in self._events_batch: 156 | # let's put back the _id field from @metadata._id 157 | if "@metadata" in event and "_id" in event["@metadata"]: 158 | event["_id"] = event["@metadata"]["_id"] 159 | del event["@metadata"] 160 | 161 | self._replay_handler(self._logstash_url, self._replay_args, event) 162 | -------------------------------------------------------------------------------- /tests/shippers/test_composite.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from typing import Any 6 | from unittest import TestCase 7 | 8 | import pytest 9 | 10 | from share import IncludeExcludeFilter, IncludeExcludeRule 11 | from shippers import ( 12 | EVENT_IS_EMPTY, 13 | EVENT_IS_FILTERED, 14 | EVENT_IS_SENT, 15 | CompositeShipper, 16 | EventIdGeneratorCallable, 17 | ReplayHandlerCallable, 18 | ) 19 | 20 | 21 | class DummyShipper: 22 | def send(self, event: dict[str, Any]) -> str: 23 | self._sent.append(event) 24 | return "dummy" 25 | 26 | def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None: 27 | self._event_id_generator = event_id_generator 28 | 29 | def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None: 30 | self._replay_handler = replay_handler 31 | 32 | def flush(self) -> None: 33 | self._flushed = True 34 | 35 | def __init__(self, **kwargs: Any): 36 | self._sent: list[dict[str, Any]] = [] 37 | self._flushed = False 38 | 39 | 40 | @pytest.mark.unit 41 | class TestCompositeShipper(TestCase): 42 | def test_add_shipper(self) -> None: 43 | dummy_shipper = DummyShipper() 44 | composite_shipper = CompositeShipper() 45 | composite_shipper.add_shipper(dummy_shipper) 46 | assert composite_shipper._shippers == [dummy_shipper] 47 | 48 | def test_add_include_exclude_filter(self) -> None: 49 | composite_shipper = CompositeShipper() 50 | include_exclude_filter = IncludeExcludeFilter() 51 | composite_shipper.add_include_exclude_filter(include_exclude_filter) 52 | assert composite_shipper._include_exclude_filter == include_exclude_filter 53 | 54 | def test_send(self) -> None: 55 | dummy_shipper = DummyShipper() 56 | composite_shipper = CompositeShipper() 57 | composite_shipper.add_shipper(dummy_shipper) 58 | assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"}) 59 | assert dummy_shipper._sent == [] 60 | 61 | assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}}) 62 | assert dummy_shipper._sent == [] 63 | 64 | assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""}) 65 | assert dummy_shipper._sent == [] 66 | 67 | assert EVENT_IS_SENT == composite_shipper.send({"message": "will pass"}) 68 | assert dummy_shipper._sent == [{"message": "will pass"}] 69 | 70 | dummy_shipper._sent = [] 71 | 72 | assert EVENT_IS_SENT == composite_shipper.send({"fields": {"message": "will pass"}}) 73 | assert dummy_shipper._sent == [{"fields": {"message": "will pass"}}] 74 | 75 | dummy_shipper._sent = [] 76 | 77 | include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="match")]) 78 | composite_shipper.add_include_exclude_filter(include_exclude_filter) 79 | 80 | assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"}) 81 | assert dummy_shipper._sent == [] 82 | 83 | assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}}) 84 | assert dummy_shipper._sent == [] 85 | 86 | assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""}) 87 | assert dummy_shipper._sent == [] 88 | 89 | assert EVENT_IS_SENT == composite_shipper.send({"fields": {"message": "match"}}) 90 | assert dummy_shipper._sent == [{"fields": {"message": "match"}}] 91 | 92 | dummy_shipper._sent = [] 93 | 94 | include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="match")]) 95 | composite_shipper.add_include_exclude_filter(include_exclude_filter) 96 | assert EVENT_IS_SENT == composite_shipper.send({"message": "match"}) 97 | assert dummy_shipper._sent == [{"message": "match"}] 98 | 99 | dummy_shipper._sent = [] 100 | 101 | assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"}) 102 | assert dummy_shipper._sent == [] 103 | 104 | assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}}) 105 | assert dummy_shipper._sent == [] 106 | 107 | assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""}) 108 | assert dummy_shipper._sent == [] 109 | 110 | include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="not match")]) 111 | composite_shipper.add_include_exclude_filter(include_exclude_filter) 112 | 113 | assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"}) 114 | assert dummy_shipper._sent == [] 115 | 116 | assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}}) 117 | assert dummy_shipper._sent == [] 118 | 119 | assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""}) 120 | assert dummy_shipper._sent == [] 121 | 122 | assert EVENT_IS_FILTERED == composite_shipper.send({"fields": {"message": "a message"}}) 123 | assert dummy_shipper._sent == [] 124 | 125 | dummy_shipper._sent = [] 126 | 127 | assert EVENT_IS_FILTERED == composite_shipper.send({"message": "a message"}) 128 | assert dummy_shipper._sent == [] 129 | 130 | def test_set_event_id_generator(self) -> None: 131 | dummy_shipper = DummyShipper() 132 | composite_shipper = CompositeShipper() 133 | composite_shipper.add_shipper(dummy_shipper) 134 | 135 | def event_id_generator(event: dict[str, Any]) -> str: 136 | return "" 137 | 138 | composite_shipper.set_event_id_generator(event_id_generator=event_id_generator) 139 | assert dummy_shipper._event_id_generator == event_id_generator 140 | 141 | def test_set_replay_handler(self) -> None: 142 | dummy_shipper = DummyShipper() 143 | composite_shipper = CompositeShipper() 144 | composite_shipper.add_shipper(dummy_shipper) 145 | 146 | def replay_handler(output_type: str, output_args: dict[str, Any], payload: dict[str, Any]) -> None: 147 | return 148 | 149 | composite_shipper.set_replay_handler(replay_handler=replay_handler) 150 | assert dummy_shipper._replay_handler == replay_handler 151 | 152 | def test_flush(self) -> None: 153 | dummy_shipper = DummyShipper() 154 | composite_shipper = CompositeShipper() 155 | composite_shipper.add_shipper(dummy_shipper) 156 | composite_shipper.flush() 157 | assert dummy_shipper._flushed is True 158 | -------------------------------------------------------------------------------- /share/expand_event_list_from_field.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from copy import deepcopy 6 | from typing import Any, Callable, Iterator, Optional, Union 7 | 8 | from .json import json_dumper 9 | from .logger import logger as shared_logger 10 | 11 | # ExpandEventListFromFieldResolverCallable accepts an integration_scope and the field to expand events list from as 12 | # arguments. It returns the resolved name of the field to expand the events list from. 13 | ExpandEventListFromFieldResolverCallable = Callable[[str, str], str] 14 | 15 | 16 | class ExpandEventListFromField: 17 | def __init__( 18 | self, 19 | field_to_expand_event_list_from: str, 20 | integration_scope: str, 21 | field_resolver: ExpandEventListFromFieldResolverCallable, 22 | root_fields_to_add_to_expanded_event: Optional[Union[str, list[str]]] = None, 23 | last_event_expanded_offset: Optional[int] = None, 24 | ): 25 | self._last_event_expanded_offset: Optional[int] = last_event_expanded_offset 26 | self._root_fields_to_add_to_expanded_event = root_fields_to_add_to_expanded_event 27 | self._field_to_expand_event_list_from: str = field_resolver(integration_scope, field_to_expand_event_list_from) 28 | 29 | def _expand_event_list_from_field( 30 | self, json_object: dict[str, Any], starting_offset: int, ending_offset: int 31 | ) -> Iterator[tuple[Any, int, Optional[int], bool, bool]]: 32 | if len(self._field_to_expand_event_list_from) == 0 or self._field_to_expand_event_list_from not in json_object: 33 | yield None, starting_offset, 0, True, False 34 | else: 35 | events_list: list[Any] = json_object[self._field_to_expand_event_list_from] 36 | # let's set to 1 if empty list to avoid division by zero in the line below, 37 | # for loop will be not executed anyway 38 | offset_skew = 0 39 | events_list_length = max(1, len(events_list)) 40 | avg_event_length = (ending_offset - starting_offset) / events_list_length 41 | if self._last_event_expanded_offset is not None and len(events_list) > self._last_event_expanded_offset + 1: 42 | offset_skew = self._last_event_expanded_offset + 1 43 | events_list = events_list[offset_skew:] 44 | 45 | # Let's compute the root_fields_to_add_to_expanded_event only once per events to expand 46 | root_fields_to_add_to_expanded_event: dict[str, Any] = {} 47 | if self._root_fields_to_add_to_expanded_event == "all": 48 | root_fields_to_add_to_expanded_event = deepcopy(json_object) 49 | del root_fields_to_add_to_expanded_event[self._field_to_expand_event_list_from] 50 | # we want to add only a list of root fields 51 | elif isinstance(self._root_fields_to_add_to_expanded_event, list): 52 | for root_field_to_add_to_expanded_event in self._root_fields_to_add_to_expanded_event: 53 | if root_field_to_add_to_expanded_event in json_object: 54 | root_fields_to_add_to_expanded_event[root_field_to_add_to_expanded_event] = json_object[ 55 | root_field_to_add_to_expanded_event 56 | ] 57 | else: 58 | shared_logger.debug( 59 | f"`{root_field_to_add_to_expanded_event}` field specified in " 60 | f"`root_fields_to_add_to_expanded_event` parameter is not present at root level" 61 | f" to expanded event not present at root level" 62 | ) 63 | 64 | for event_n, event in enumerate(events_list): 65 | if self._root_fields_to_add_to_expanded_event: 66 | # we can and want to add the root fields only in case the event is a not empty json object 67 | if isinstance(event, dict) and len(event) > 0: 68 | # we want to add all the root fields 69 | event.update(root_fields_to_add_to_expanded_event) 70 | else: 71 | shared_logger.debug("root fields to be added on a non json object event") 72 | 73 | event_n += offset_skew 74 | yield event, int( 75 | starting_offset + (event_n * avg_event_length) 76 | ), event_n, event_n == events_list_length - 1, True 77 | 78 | def expand( 79 | self, log_event: bytes, json_object: Optional[dict[str, Any]], starting_offset: int, ending_offset: int 80 | ) -> Iterator[tuple[bytes, int, int, Optional[int]]]: 81 | if json_object is None: 82 | yield log_event, starting_offset, ending_offset, None 83 | else: 84 | # expanded_ending_offset is set to the starting_offset because if we want to set it to the beginning of the 85 | # json object in case of a message from the continuation queue. if we update it, if the payload is continued 86 | # we will fetch the content of the payload from the middle of the json object, failing to parse it 87 | expanded_ending_offset: int = starting_offset 88 | 89 | for ( 90 | expanded_event, 91 | expanded_starting_offset, 92 | expanded_event_n, 93 | is_last_expanded_event, 94 | event_was_expanded, 95 | ) in self._expand_event_list_from_field(json_object, starting_offset, ending_offset): 96 | if event_was_expanded: 97 | # empty values once json dumped might have a len() greater than 0, this will prevent 98 | # them to be skipped later as empty value, so we yield as zero length bytes string 99 | if not expanded_event: 100 | expanded_log_event = b"" 101 | else: 102 | expanded_log_event = json_dumper(expanded_event).encode("utf-8") 103 | 104 | if is_last_expanded_event: 105 | expanded_event_n = None 106 | # only when we reach the last expanded event we can move the ending offset 107 | expanded_ending_offset = ending_offset 108 | else: 109 | expanded_event_n = None 110 | expanded_log_event = log_event 111 | expanded_ending_offset = ending_offset 112 | 113 | yield expanded_log_event, expanded_starting_offset, expanded_ending_offset, expanded_event_n 114 | -------------------------------------------------------------------------------- /share/secretsmanager.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import re 6 | from typing import Any, Union 7 | 8 | import boto3 9 | from botocore.client import BaseClient as BotoBaseClient 10 | from orjson import JSONDecodeError 11 | 12 | from .json import json_parser 13 | from .logger import logger as shared_logger 14 | 15 | 16 | def _get_aws_sm_client(region_name: str) -> BotoBaseClient: 17 | """ 18 | Getter for secrets manager client 19 | Extracted for mocking 20 | """ 21 | 22 | return boto3.client("secretsmanager", region_name=region_name) 23 | 24 | 25 | def aws_sm_expander(config_yaml: str) -> str: 26 | """ 27 | Secrets Manager expander for config file 28 | It scans the file for the secrets manager arn pattern, checks for correct configuration, 29 | retrieves the values from the secret manager and replaces them in the config file. 30 | Exceptions will be raised for the following scenarios: 31 | - Not respecting the arn pattern 32 | - Input is for both plain text and json keys for the same secret manager name 33 | - The fetched value is empty 34 | """ 35 | 36 | config_secret_entry_values: dict[str, str] = {} 37 | secret_arn_by_secret_name: dict[str, str] = {} 38 | secret_key_values_cache: dict[str, dict[str, Any]] = {} 39 | secret_consistency_len_check: dict[str, int] = {} 40 | 41 | re_pattern = r"arn:aws:secretsmanager:(?:[^:]+)?:(?:[^:]+)?:secret:(?:[^\"']+)?" 42 | found_secrets_entries = re.findall(re_pattern, config_yaml) 43 | 44 | for secret_arn in found_secrets_entries: 45 | splitted_secret_arn = secret_arn.split(":") 46 | 47 | if len(splitted_secret_arn) != 7 and len(splitted_secret_arn) != 8: 48 | raise SyntaxError("Invalid arn format: {}".format(secret_arn)) 49 | 50 | if secret_arn not in config_secret_entry_values: 51 | config_secret_entry_values[secret_arn] = "" 52 | 53 | region = splitted_secret_arn[3] 54 | secrets_manager_name = splitted_secret_arn[6] 55 | 56 | if region == "": 57 | raise ValueError("Must be provided region in arn: {}".format(secret_arn)) 58 | 59 | if secrets_manager_name == "": 60 | raise ValueError("Must be provided secrets manager name in arn: {}".format(secret_arn)) 61 | 62 | if secrets_manager_name not in secret_consistency_len_check: 63 | secret_consistency_len_check[secrets_manager_name] = len(splitted_secret_arn) 64 | else: 65 | if secret_consistency_len_check[secrets_manager_name] != len(splitted_secret_arn): 66 | raise ValueError( 67 | "You cannot have both plain text and json key for the same secret: {}".format(secret_arn) 68 | ) 69 | 70 | if region not in secret_key_values_cache: 71 | secret_key_values_cache[region] = {} 72 | 73 | if secrets_manager_name not in secret_key_values_cache[region]: 74 | secret_key_values_cache[region][secrets_manager_name] = {} 75 | 76 | secret_arn_by_secret_name[secrets_manager_name] = ":".join(splitted_secret_arn[0:7]) 77 | 78 | for region in secret_key_values_cache: 79 | for secrets_manager_name in secret_key_values_cache[region]: 80 | secret_arn = secret_arn_by_secret_name[secrets_manager_name] 81 | str_secrets = get_secret_values(secret_arn, region) 82 | parsed_secrets = parse_secrets_str(str_secrets, secret_arn) 83 | 84 | secret_key_values_cache[region][secrets_manager_name] = parsed_secrets 85 | 86 | for config_secret_entry in config_secret_entry_values: 87 | splitted_secret_arn = config_secret_entry.split(":") 88 | 89 | region = splitted_secret_arn[3] 90 | secrets_manager_name = splitted_secret_arn[6] 91 | 92 | if len(splitted_secret_arn) == 8: 93 | wanted_key = splitted_secret_arn[-1] 94 | if wanted_key == "": 95 | raise ValueError(f"Error for secret {config_secret_entry}: key must not be empty") 96 | 97 | if not isinstance(secret_key_values_cache[region][secrets_manager_name], dict): 98 | raise ValueError(f"Error for secret {config_secret_entry}: expected to be keys/values pair") 99 | 100 | if wanted_key in secret_key_values_cache[region][secrets_manager_name]: 101 | fetched_secret_entry_value = secret_key_values_cache[region][secrets_manager_name][wanted_key] 102 | if fetched_secret_entry_value == "": 103 | raise ValueError(f"Error for secret {config_secret_entry}: must not be empty") 104 | config_secret_entry_values[config_secret_entry] = fetched_secret_entry_value 105 | else: 106 | raise KeyError(f"Error for secret {config_secret_entry}: key not found") 107 | else: 108 | if secret_key_values_cache[region][secrets_manager_name] == "": 109 | raise ValueError(f"Error for secret {config_secret_entry}: must not be empty") 110 | elif not isinstance(secret_key_values_cache[region][secrets_manager_name], str): 111 | raise ValueError(f"Error for secret {config_secret_entry}: expected to be a string") 112 | 113 | config_secret_entry_values[config_secret_entry] = secret_key_values_cache[region][secrets_manager_name] 114 | 115 | config_yaml = config_yaml.replace(config_secret_entry, config_secret_entry_values[config_secret_entry]) 116 | 117 | return config_yaml 118 | 119 | 120 | def get_secret_values(secret_arn: str, region_name: str) -> str: 121 | """ 122 | Calls the get_secret_value api from secrets manager, and returns the values. 123 | If the secret is created in a binary format, it will be received as a byte string 124 | on the "BinarySecret" key (boto3 does the base64 decoding internally). 125 | Raises exceptions for ClientError errors. 126 | """ 127 | 128 | secrets: str = "" 129 | client = _get_aws_sm_client(region_name) 130 | 131 | try: 132 | get_secret_value_response = client.get_secret_value(SecretId=secret_arn) 133 | except Exception as e: 134 | raise e 135 | else: 136 | if "SecretString" in get_secret_value_response: 137 | secrets = get_secret_value_response["SecretString"] 138 | 139 | else: 140 | secrets = get_secret_value_response["SecretBinary"].decode("utf-8") 141 | 142 | return secrets 143 | 144 | 145 | def parse_secrets_str(secrets: str, secret_arn: str) -> Union[str, dict[str, Any]]: 146 | """ 147 | Helper function to determine if the secrets from secrets manager are json or plain text. 148 | Returns str or dict only. 149 | """ 150 | 151 | try: 152 | parsed_secrets: dict[str, str] = json_parser(secrets) 153 | except JSONDecodeError: 154 | shared_logger.debug("parsed secrets as plaintext") 155 | return secrets 156 | else: 157 | shared_logger.debug("parsed secrets as json") 158 | return parsed_secrets 159 | -------------------------------------------------------------------------------- /handlers/aws/kinesis_trigger.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import datetime 6 | from typing import Any, Iterator, Optional 7 | 8 | from botocore.client import BaseClient as BotoBaseClient 9 | 10 | from share import ExpandEventListFromField, ProtocolMultiline, shared_logger 11 | from storage import ProtocolStorage, StorageFactory 12 | 13 | from .utils import get_account_id_from_arn, get_kinesis_stream_name_type_and_region_from_arn 14 | 15 | 16 | def _handle_kinesis_move( 17 | sqs_client: BotoBaseClient, 18 | sqs_destination_queue: str, 19 | kinesis_record: dict[str, Any], 20 | event_input_id: str, 21 | config_yaml: str, 22 | continuing_queue: bool = True, 23 | last_ending_offset: Optional[int] = None, 24 | last_event_expanded_offset: Optional[int] = None, 25 | ) -> None: 26 | """ 27 | Handler of the continuation/replay queue for kinesis data stream inputs. 28 | If a kinesis data stream records batch cannot be fully processed before the timeout of the lambda, the handler will 29 | be called for the continuation queue: it will send new sqs messages for the unprocessed records to the 30 | internal continuing sqs queue. 31 | If a sqs message has an eventSourceARN not present in the config.yaml ids, then the handler should be called, 32 | so it can get placed in the internal replay queue. 33 | 34 | :param continuing_queue: should be set to true if the sqs message is going to be placed in the continuing 35 | queue. Otherwise, we assume it will be placed in the replaying queue, and, in that case, it should be set to false. 36 | """ 37 | 38 | sequence_number = kinesis_record["kinesis"]["sequenceNumber"] 39 | partition_key = kinesis_record["kinesis"]["partitionKey"] 40 | approximate_arrival_timestamp = kinesis_record["kinesis"]["approximateArrivalTimestamp"] 41 | stream_type, stream_name, _ = get_kinesis_stream_name_type_and_region_from_arn(event_input_id) 42 | 43 | message_attributes = { 44 | "config": {"StringValue": config_yaml, "DataType": "String"}, 45 | "originalStreamType": {"StringValue": stream_type, "DataType": "String"}, 46 | "originalStreamName": {"StringValue": stream_name, "DataType": "String"}, 47 | "originalPartitionKey": {"StringValue": partition_key, "DataType": "String"}, 48 | "originalSequenceNumber": {"StringValue": sequence_number, "DataType": "String"}, 49 | "originalEventSourceARN": {"StringValue": event_input_id, "DataType": "String"}, 50 | "originalApproximateArrivalTimestamp": { 51 | "StringValue": str(approximate_arrival_timestamp), 52 | "DataType": "Number", 53 | }, 54 | } 55 | 56 | if last_ending_offset is not None: 57 | message_attributes["originalLastEndingOffset"] = {"StringValue": str(last_ending_offset), "DataType": "Number"} 58 | 59 | if last_event_expanded_offset is not None: 60 | message_attributes["originalLastEventExpandedOffset"] = { 61 | "StringValue": str(last_event_expanded_offset), 62 | "DataType": "Number", 63 | } 64 | 65 | kinesis_data: str = kinesis_record["kinesis"]["data"] 66 | 67 | sqs_client.send_message( 68 | QueueUrl=sqs_destination_queue, 69 | MessageBody=kinesis_data, 70 | MessageAttributes=message_attributes, 71 | ) 72 | 73 | if continuing_queue: 74 | shared_logger.debug( 75 | "continuing", 76 | extra={ 77 | "sqs_continuing_queue": sqs_destination_queue, 78 | "last_ending_offset": last_ending_offset, 79 | "last_event_expanded_offset": last_event_expanded_offset, 80 | "partition_key": partition_key, 81 | "approximate_arrival_timestamp": approximate_arrival_timestamp, 82 | "sequence_number": sequence_number, 83 | }, 84 | ) 85 | else: 86 | shared_logger.debug( 87 | "replaying", 88 | extra={ 89 | "sqs_replaying_queue": sqs_destination_queue, 90 | "partition_key": partition_key, 91 | "approximate_arrival_timestamp": approximate_arrival_timestamp, 92 | "sequence_number": sequence_number, 93 | }, 94 | ) 95 | 96 | 97 | def _handle_kinesis_record( 98 | event: dict[str, Any], 99 | input_id: str, 100 | event_list_from_field_expander: ExpandEventListFromField, 101 | json_content_type: Optional[str], 102 | multiline_processor: Optional[ProtocolMultiline], 103 | ) -> Iterator[tuple[dict[str, Any], int, Optional[int], int]]: 104 | """ 105 | Handler for kinesis data stream inputs. 106 | It iterates through kinesis records in the kinesis trigger and process 107 | the content of kinesis.data payload 108 | """ 109 | account_id = get_account_id_from_arn(input_id) 110 | for kinesis_record_n, kinesis_record in enumerate(event["Records"]): 111 | storage: ProtocolStorage = StorageFactory.create( 112 | storage_type="payload", 113 | payload=kinesis_record["kinesis"]["data"], 114 | json_content_type=json_content_type, 115 | event_list_from_field_expander=event_list_from_field_expander, 116 | multiline_processor=multiline_processor, 117 | ) 118 | 119 | stream_type, stream_name, aws_region = get_kinesis_stream_name_type_and_region_from_arn( 120 | kinesis_record["eventSourceARN"] 121 | ) 122 | 123 | events = storage.get_by_lines(range_start=0) 124 | 125 | for log_event, starting_offset, ending_offset, event_expanded_offset in events: 126 | assert isinstance(log_event, bytes) 127 | 128 | es_event: dict[str, Any] = { 129 | "@timestamp": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), 130 | "fields": { 131 | "message": log_event.decode("utf-8"), 132 | "log": { 133 | "offset": starting_offset, 134 | "file": { 135 | "path": kinesis_record["eventSourceARN"], 136 | }, 137 | }, 138 | "aws": { 139 | "kinesis": { 140 | "type": stream_type, 141 | "name": stream_name, 142 | "partition_key": kinesis_record["kinesis"]["partitionKey"], 143 | "sequence_number": kinesis_record["kinesis"]["sequenceNumber"], 144 | } 145 | }, 146 | "cloud": { 147 | "provider": "aws", 148 | "region": aws_region, 149 | "account": {"id": account_id}, 150 | }, 151 | }, 152 | "meta": { 153 | "approximate_arrival_timestamp": int( 154 | float(kinesis_record["kinesis"]["approximateArrivalTimestamp"]) * 1000 155 | ), 156 | }, 157 | } 158 | 159 | yield es_event, ending_offset, event_expanded_offset, kinesis_record_n 160 | -------------------------------------------------------------------------------- /handlers/aws/cloudwatch_logs_trigger.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import datetime 6 | from typing import Any, Iterator, Optional 7 | 8 | from botocore.client import BaseClient as BotoBaseClient 9 | 10 | from share import ExpandEventListFromField, ProtocolMultiline, json_parser, shared_logger 11 | from storage import ProtocolStorage, StorageFactory 12 | 13 | from .utils import GZIP_ENCODING, PAYLOAD_ENCODING_KEY, get_account_id_from_arn, gzip_base64_encoded 14 | 15 | 16 | def _from_awslogs_data_to_event(awslogs_data: str) -> Any: 17 | """ 18 | Returns cloudwatch logs event from base64 encoded and gzipped payload 19 | """ 20 | storage: ProtocolStorage = StorageFactory.create(storage_type="payload", payload=awslogs_data) 21 | cloudwatch_logs_payload_plain = storage.get_as_string() 22 | return json_parser(cloudwatch_logs_payload_plain) 23 | 24 | 25 | def _handle_cloudwatch_logs_move( 26 | sqs_client: BotoBaseClient, 27 | sqs_destination_queue: str, 28 | cloudwatch_logs_event: dict[str, Any], 29 | input_id: str, 30 | config_yaml: str, 31 | continuing_queue: bool = True, 32 | current_log_event: int = 0, 33 | last_ending_offset: Optional[int] = None, 34 | last_event_expanded_offset: Optional[int] = None, 35 | ) -> None: 36 | """ 37 | Handler of the continuation queue for cloudwatch logs inputs 38 | If a cloudwatch logs data payload cannot be fully processed before the 39 | timeout of the lambda this handler will be called: it will 40 | send new sqs messages for the unprocessed payload to the 41 | internal continuing sqs queue 42 | """ 43 | 44 | log_group_name = cloudwatch_logs_event["logGroup"] 45 | log_stream_name = cloudwatch_logs_event["logStream"] 46 | logs_events = cloudwatch_logs_event["logEvents"][current_log_event:] 47 | 48 | for current_log_event, log_event in enumerate(logs_events): 49 | if current_log_event > 0: 50 | last_ending_offset = None 51 | 52 | message_attributes = { 53 | "config": {"StringValue": config_yaml, "DataType": "String"}, 54 | "originalEventId": {"StringValue": log_event["id"], "DataType": "String"}, 55 | "originalEventSourceARN": {"StringValue": input_id, "DataType": "String"}, 56 | "originalLogGroup": {"StringValue": log_group_name, "DataType": "String"}, 57 | "originalLogStream": {"StringValue": log_stream_name, "DataType": "String"}, 58 | "originalEventTimestamp": {"StringValue": str(log_event["timestamp"]), "DataType": "Number"}, 59 | PAYLOAD_ENCODING_KEY: {"StringValue": GZIP_ENCODING, "DataType": "String"}, 60 | } 61 | 62 | if last_ending_offset is not None: 63 | message_attributes["originalLastEndingOffset"] = { 64 | "StringValue": str(last_ending_offset), 65 | "DataType": "Number", 66 | } 67 | 68 | if last_event_expanded_offset is not None: 69 | message_attributes["originalLastEventExpandedOffset"] = { 70 | "StringValue": str(last_event_expanded_offset), 71 | "DataType": "Number", 72 | } 73 | 74 | # forward compressed message to sqs queue 75 | sqs_client.send_message( 76 | QueueUrl=sqs_destination_queue, 77 | MessageBody=gzip_base64_encoded(log_event["message"]), 78 | MessageAttributes=message_attributes, 79 | ) 80 | 81 | if continuing_queue: 82 | shared_logger.debug( 83 | "continuing", 84 | extra={ 85 | "sqs_continuing_queue": sqs_destination_queue, 86 | "last_ending_offset": last_ending_offset, 87 | "last_event_expanded_offset": last_event_expanded_offset, 88 | "event_id": log_event["id"], 89 | "event_timestamp": log_event["timestamp"], 90 | }, 91 | ) 92 | else: 93 | shared_logger.debug( 94 | "replaying", 95 | extra={ 96 | "sqs_replaying_queue": sqs_destination_queue, 97 | "event_id": log_event["id"], 98 | "event_timestamp": log_event["timestamp"], 99 | }, 100 | ) 101 | 102 | 103 | def _handle_cloudwatch_logs_event( 104 | event: dict[str, Any], 105 | aws_region: str, 106 | input_id: str, 107 | event_list_from_field_expander: ExpandEventListFromField, 108 | json_content_type: Optional[str], 109 | multiline_processor: Optional[ProtocolMultiline], 110 | ) -> Iterator[tuple[dict[str, Any], int, Optional[int], int]]: 111 | """ 112 | Handler for cloudwatch logs inputs. 113 | It iterates through the logEvents in cloudwatch logs trigger payload and process 114 | content of body payload in the log event. 115 | If a log event cannot be fully processed before the 116 | timeout of the lambda it will call the sqs continuing handler 117 | """ 118 | 119 | account_id = get_account_id_from_arn(input_id) 120 | 121 | log_group_name = event["logGroup"] 122 | log_stream_name = event["logStream"] 123 | 124 | for cloudwatch_log_event_n, cloudwatch_log_event in enumerate(event["logEvents"]): 125 | event_id = cloudwatch_log_event["id"] 126 | event_timestamp = cloudwatch_log_event["timestamp"] 127 | 128 | storage_message: ProtocolStorage = StorageFactory.create( 129 | storage_type="payload", 130 | payload=cloudwatch_log_event["message"], 131 | json_content_type=json_content_type, 132 | event_list_from_field_expander=event_list_from_field_expander, 133 | multiline_processor=multiline_processor, 134 | ) 135 | 136 | events = storage_message.get_by_lines(range_start=0) 137 | 138 | for log_event, starting_offset, ending_offset, event_expanded_offset in events: 139 | assert isinstance(log_event, bytes) 140 | 141 | es_event: dict[str, Any] = { 142 | "@timestamp": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), 143 | "fields": { 144 | "message": log_event.decode("utf-8"), 145 | "log": { 146 | "offset": starting_offset, 147 | "file": { 148 | "path": f"{log_group_name}/{log_stream_name}", 149 | }, 150 | }, 151 | "aws": { 152 | "cloudwatch": { 153 | "log_group": log_group_name, 154 | "log_stream": log_stream_name, 155 | "event_id": event_id, 156 | } 157 | }, 158 | "cloud": { 159 | "provider": "aws", 160 | "region": aws_region, 161 | "account": {"id": account_id}, 162 | }, 163 | }, 164 | "meta": {"event_timestamp": event_timestamp}, 165 | } 166 | 167 | yield es_event, ending_offset, event_expanded_offset, cloudwatch_log_event_n 168 | -------------------------------------------------------------------------------- /handlers/aws/s3_sqs_trigger.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import datetime 6 | from typing import Any, Iterator, Optional, Union 7 | from urllib.parse import unquote_plus 8 | 9 | import elasticapm 10 | from botocore.client import BaseClient as BotoBaseClient 11 | 12 | from share import ExpandEventListFromField, ProtocolMultiline, json_dumper, json_parser, shared_logger 13 | from storage import ProtocolStorage, StorageFactory 14 | 15 | from .utils import ( 16 | discover_integration_scope, 17 | expand_event_list_from_field_resolver, 18 | get_account_id_from_arn, 19 | get_bucket_name_from_arn, 20 | ) 21 | 22 | 23 | def _handle_s3_sqs_move( 24 | sqs_client: BotoBaseClient, 25 | sqs_destination_queue: str, 26 | sqs_record: dict[str, Any], 27 | input_id: str, 28 | config_yaml: str, 29 | current_s3_record: int = 0, 30 | continuing_queue: bool = True, 31 | last_ending_offset: Optional[int] = None, 32 | last_event_expanded_offset: Optional[int] = None, 33 | ) -> None: 34 | """ 35 | Handler of the continuation/replay queue for s3-sqs inputs. 36 | If a sqs message cannot be fully processed before the timeout of the lambda, the handler will be called 37 | for the continuation queue: it will send new sqs messages for the unprocessed records to the 38 | internal continuing sqs queue. 39 | If a sqs message has an eventSourceARN not present in the config.yaml ids, then the handler should be called, 40 | so it can get placed in the internal replay queue. 41 | 42 | :param continuing_queue: should be set to true if the sqs message is going to be placed in the continuing 43 | queue. Otherwise, we assume it will be placed in the replaying queue, and, in that case, it should be set to false. 44 | """ 45 | 46 | body = json_parser(sqs_record["body"]) 47 | body["Records"] = body["Records"][current_s3_record:] 48 | if last_ending_offset is not None: 49 | body["Records"][0]["last_ending_offset"] = last_ending_offset 50 | 51 | if last_event_expanded_offset is not None: 52 | body["Records"][0]["last_event_expanded_offset"] = last_event_expanded_offset 53 | elif "last_event_expanded_offset" in body["Records"][0]: 54 | del body["Records"][0]["last_event_expanded_offset"] 55 | 56 | sqs_record["body"] = json_dumper(body) 57 | 58 | sqs_client.send_message( 59 | QueueUrl=sqs_destination_queue, 60 | MessageBody=sqs_record["body"], 61 | MessageAttributes={ 62 | "config": {"StringValue": config_yaml, "DataType": "String"}, 63 | "originalEventSourceARN": {"StringValue": input_id, "DataType": "String"}, 64 | }, 65 | ) 66 | 67 | if continuing_queue: 68 | shared_logger.debug( 69 | "continuing", 70 | extra={ 71 | "sqs_continuing_queue": sqs_destination_queue, 72 | "last_ending_offset": last_ending_offset, 73 | "last_event_expanded_offset": last_event_expanded_offset, 74 | "current_s3_record": current_s3_record, 75 | }, 76 | ) 77 | else: 78 | shared_logger.debug( 79 | "replaying", 80 | extra={ 81 | "sqs_replaying_queue": sqs_destination_queue, 82 | "input_id": input_id, 83 | "message_id": sqs_record["messageId"], 84 | }, 85 | ) 86 | 87 | 88 | def _handle_s3_sqs_event( 89 | sqs_record_body: dict[str, Any], 90 | input_id: str, 91 | field_to_expand_event_list_from: str, 92 | root_fields_to_add_to_expanded_event: Optional[Union[str, list[str]]], 93 | json_content_type: Optional[str], 94 | multiline_processor: Optional[ProtocolMultiline], 95 | ) -> Iterator[tuple[dict[str, Any], int, Optional[int], int]]: 96 | """ 97 | Handler for s3-sqs input. 98 | It takes an sqs record in the sqs trigger and process 99 | corresponding object in S3 buckets sending to the defined outputs. 100 | """ 101 | 102 | account_id = get_account_id_from_arn(input_id) 103 | 104 | for s3_record_n, s3_record in enumerate(sqs_record_body["Records"]): 105 | aws_region = s3_record["awsRegion"] 106 | bucket_arn = unquote_plus(s3_record["s3"]["bucket"]["arn"], "utf-8") 107 | object_key = unquote_plus(s3_record["s3"]["object"]["key"], "utf-8") 108 | event_time = int(datetime.datetime.strptime(s3_record["eventTime"], "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() * 1000) 109 | last_ending_offset = s3_record["last_ending_offset"] if "last_ending_offset" in s3_record else 0 110 | last_event_expanded_offset = ( 111 | s3_record["last_event_expanded_offset"] if "last_event_expanded_offset" in s3_record else None 112 | ) 113 | 114 | integration_scope = discover_integration_scope(object_key) 115 | 116 | event_list_from_field_expander = ExpandEventListFromField( 117 | field_to_expand_event_list_from, 118 | integration_scope, 119 | expand_event_list_from_field_resolver, 120 | root_fields_to_add_to_expanded_event, 121 | last_event_expanded_offset, 122 | ) 123 | 124 | assert len(bucket_arn) > 0 125 | assert len(object_key) > 0 126 | 127 | bucket_name: str = get_bucket_name_from_arn(bucket_arn) 128 | storage: ProtocolStorage = StorageFactory.create( 129 | storage_type="s3", 130 | bucket_name=bucket_name, 131 | object_key=object_key, 132 | json_content_type=json_content_type, 133 | event_list_from_field_expander=event_list_from_field_expander, 134 | multiline_processor=multiline_processor, 135 | ) 136 | 137 | span = elasticapm.capture_span(f"WAIT FOR OFFSET STARTING AT {last_ending_offset}") 138 | span.__enter__() 139 | events = storage.get_by_lines(range_start=last_ending_offset) 140 | 141 | for log_event, starting_offset, ending_offset, event_expanded_offset in events: 142 | assert isinstance(log_event, bytes) 143 | 144 | if span: 145 | span.__exit__(None, None, None) 146 | span = None # type: ignore 147 | 148 | es_event: dict[str, Any] = { 149 | "@timestamp": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), 150 | "fields": { 151 | "message": log_event.decode("utf-8"), 152 | "log": { 153 | "offset": starting_offset, 154 | "file": { 155 | "path": "https://{0}.s3.{1}.amazonaws.com/{2}".format(bucket_name, aws_region, object_key), 156 | }, 157 | }, 158 | "aws": { 159 | "s3": { 160 | "bucket": {"name": bucket_name, "arn": bucket_arn}, 161 | "object": {"key": object_key}, 162 | } 163 | }, 164 | "cloud": { 165 | "provider": "aws", 166 | "region": aws_region, 167 | "account": {"id": account_id}, 168 | }, 169 | }, 170 | "meta": {"event_time": event_time, "integration_scope": integration_scope}, 171 | } 172 | 173 | yield es_event, ending_offset, event_expanded_offset, s3_record_n 174 | -------------------------------------------------------------------------------- /tests/shippers/test_logstash.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | import datetime 5 | import gzip 6 | from copy import deepcopy 7 | from typing import Any 8 | from unittest import TestCase 9 | from unittest.mock import MagicMock 10 | 11 | import pytest 12 | import responses 13 | import ujson 14 | from requests import PreparedRequest 15 | 16 | from shippers.logstash import _EVENT_SENT, _MAX_RETRIES, LogstashShipper 17 | 18 | _now = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ") 19 | 20 | _dummy_event: dict[str, Any] = { 21 | "@timestamp": _now, 22 | "fields": { 23 | "message": "A dummy message", 24 | "log": { 25 | "offset": 10, 26 | "file": { 27 | "path": "https://bucket_name.s3.aws-region.amazonaws.com/file.key", 28 | }, 29 | }, 30 | "aws": { 31 | "s3": { 32 | "bucket": { 33 | "name": "arn:aws:s3:::bucket_name", 34 | "arn": "bucket_name", 35 | }, 36 | "object": { 37 | "key": "file.key", 38 | }, 39 | }, 40 | }, 41 | "cloud": { 42 | "provider": "aws", 43 | "region": "aws-region", 44 | }, 45 | }, 46 | "meta": {}, 47 | } 48 | 49 | _dummy_expected_event: dict[str, Any] = { 50 | "@timestamp": _now, 51 | "_id": "_id", 52 | "message": "A dummy message", 53 | "log": { 54 | "offset": 10, 55 | "file": { 56 | "path": "https://bucket_name.s3.aws-region.amazonaws.com/file.key", 57 | }, 58 | }, 59 | "aws": { 60 | "s3": { 61 | "bucket": { 62 | "name": "arn:aws:s3:::bucket_name", 63 | "arn": "bucket_name", 64 | }, 65 | "object": { 66 | "key": "file.key", 67 | }, 68 | }, 69 | }, 70 | "cloud": { 71 | "provider": "aws", 72 | "region": "aws-region", 73 | }, 74 | "tags": ["forwarded"], 75 | } 76 | 77 | 78 | def _dummy_replay_handler(output_type: str, output_args: dict[str, Any], event_payload: dict[str, Any]) -> None: 79 | pass 80 | 81 | 82 | @pytest.mark.unit 83 | class TestLogstashShipper(TestCase): 84 | @responses.activate 85 | def test_send_successful(self) -> None: 86 | def request_callback(request: PreparedRequest) -> tuple[int, dict[Any, Any], str]: 87 | _payload = [] 88 | assert request.headers["Content-Encoding"] == "gzip" 89 | assert request.headers["Content-Type"] == "application/x-ndjson" 90 | assert request.body is not None 91 | assert isinstance(request.body, bytes) 92 | 93 | events = gzip.decompress(request.body).decode("utf-8").split("\n") 94 | for event in events: 95 | _payload.append(ujson.loads(event)) 96 | 97 | expected_event = deepcopy(_dummy_expected_event) 98 | expected_event["@metadata"] = {"_id": "_id"} 99 | del expected_event["_id"] 100 | 101 | assert _payload == [expected_event, expected_event] 102 | 103 | return 200, {}, "okay" 104 | 105 | def event_id_generator(event: dict[str, Any]) -> str: 106 | return "_id" 107 | 108 | url = "http://logstash_url" 109 | event = deepcopy(_dummy_event) 110 | responses.add_callback(responses.PUT, url, callback=request_callback) 111 | logstash_shipper = LogstashShipper(logstash_url=url, max_batch_size=2) 112 | logstash_shipper.set_event_id_generator(event_id_generator) 113 | logstash_shipper.send(event) 114 | logstash_shipper.send(event) 115 | 116 | @responses.activate 117 | def test_send_failures(self) -> None: 118 | url = "http://logstash_url" 119 | with self.subTest("Does not exceed max_retries"): 120 | responses.put(url=url, status=429) 121 | responses.put(url=url, status=429) 122 | responses.put(url=url, status=429) 123 | responses.put(url=url, status=200) 124 | event = deepcopy(_dummy_event) 125 | logstash_shipper = LogstashShipper(logstash_url=url) 126 | assert logstash_shipper.send(event) == _EVENT_SENT 127 | with self.subTest("Exceeds max retries, replay handler set"): 128 | for i in range(_MAX_RETRIES): 129 | responses.put(url=url, status=429) 130 | responses.put(url=url, status=429) 131 | logstash_shipper = LogstashShipper(logstash_url=url) 132 | replay_handler = MagicMock(side_effect=_dummy_replay_handler) 133 | logstash_shipper.set_replay_handler(replay_handler) 134 | event = deepcopy(_dummy_event) 135 | assert logstash_shipper.send(event) == _EVENT_SENT 136 | replay_handler.assert_called_once_with(url, {}, event) 137 | with self.subTest("Exceeds max retries, replay handler not set"): 138 | for i in range(_MAX_RETRIES): 139 | responses.put(url=url, status=429) 140 | responses.put(url=url, status=429) 141 | replay_handler = MagicMock(side_effect=_dummy_replay_handler) 142 | logstash_shipper = LogstashShipper(logstash_url=url) 143 | event = deepcopy(_dummy_event) 144 | assert logstash_shipper.send(event) == _EVENT_SENT 145 | replay_handler.assert_not_called() 146 | with self.subTest("Authentication error, request is not retried"): 147 | responses.put(url=url, status=401) 148 | logstash_shipper = LogstashShipper(logstash_url=url) 149 | replay_handler = MagicMock(side_effect=_dummy_replay_handler) 150 | logstash_shipper.set_replay_handler(replay_handler) 151 | event = deepcopy(_dummy_event) 152 | assert logstash_shipper.send(event) == _EVENT_SENT 153 | replay_handler.assert_called_once_with(url, {}, event) 154 | 155 | @responses.activate 156 | def test_flush(self) -> None: 157 | url = "http://logstash_url" 158 | responses.put(url=url, status=200) 159 | responses.put(url=url, status=200) 160 | logstash_shipper = LogstashShipper(logstash_url=url, max_batch_size=2) 161 | event = deepcopy(_dummy_event) 162 | logstash_shipper.send(event) 163 | assert logstash_shipper._events_batch == [event] 164 | logstash_shipper.flush() 165 | assert logstash_shipper._events_batch == [] 166 | 167 | @responses.activate 168 | def test_buffer_handling_at_capacity(self) -> None: 169 | url = "http://logstash_url" 170 | responses.put(url=url, status=200) 171 | responses.put(url=url, status=200) 172 | responses.put(url=url, status=200) 173 | 174 | logstash_shipper = LogstashShipper(logstash_url=url, max_batch_size=2) 175 | event = deepcopy(_dummy_event) 176 | 177 | logstash_shipper.send(event) # this should not trigger the send 178 | assert logstash_shipper._events_batch == [event] 179 | logstash_shipper.send(event) # this should trigger the send and empty the buffer 180 | assert logstash_shipper._events_batch == [] 181 | logstash_shipper.send(event) # this should not trigger the send 182 | assert logstash_shipper._events_batch == [event] 183 | logstash_shipper.flush() # this should trigger the send and empty the buffer 184 | assert logstash_shipper._events_batch == [] 185 | -------------------------------------------------------------------------------- /tests/testcontainers/es.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | from __future__ import annotations 6 | 7 | import ssl 8 | import time 9 | from typing import Any 10 | 11 | from elasticsearch import Elasticsearch 12 | from OpenSSL import crypto as OpenSSLCrypto 13 | from testcontainers.core.container import DockerContainer 14 | from testcontainers.core.waiting_utils import wait_container_is_ready 15 | 16 | DEFAULT_USERNAME = "elastic" 17 | DEFAULT_PASSWORD = "password" 18 | 19 | 20 | class ElasticsearchContainer(DockerContainer): # type: ignore 21 | """ 22 | Elasticsearch container. 23 | 24 | Example 25 | ------- 26 | :: 27 | 28 | with ElasticsearchContainer() as esc: 29 | # NOTE: container will terminate once out of this with statement 30 | url = esc.get_url() 31 | """ 32 | 33 | _DEFAULT_IMAGE = "docker.elastic.co/elasticsearch/elasticsearch" 34 | _DEFAULT_VERSION = "7.17.20" 35 | _DEFAULT_PORT = 9200 36 | _DEFAULT_USERNAME = DEFAULT_USERNAME 37 | _DEFAULT_PASSWORD = DEFAULT_PASSWORD 38 | 39 | def __init__( 40 | self, 41 | image: str = _DEFAULT_IMAGE, 42 | version: str = _DEFAULT_VERSION, 43 | port: int = _DEFAULT_PORT, 44 | username: str = _DEFAULT_USERNAME, 45 | password: str = _DEFAULT_PASSWORD, 46 | ): 47 | image = f"{image}:{version}" 48 | super(ElasticsearchContainer, self).__init__(image=image, entrypoint="sleep") 49 | self.with_command("infinity") 50 | 51 | self.port = port 52 | self.host = "" 53 | self.exposed_port = 0 54 | self.ssl_assert_fingerprint = "" 55 | 56 | self.elastic_user: str = username 57 | self.elastic_password: str = password 58 | 59 | self.with_exposed_ports(self.port) 60 | 61 | self._pipelines_ids: set[str] = set() 62 | self._index_indices: set[str] = set() 63 | 64 | def _configure(self) -> None: 65 | """ 66 | Values set here will override any value set by calling .with_env(...) 67 | after initializing this class before .start() 68 | """ 69 | 70 | exit_code, _ = self.get_wrapped_container().exec_run( 71 | cmd="elasticsearch-certutil cert --silent --name localhost --dns localhost --keep-ca-key " 72 | "--out /usr/share/elasticsearch/elasticsearch-ssl-http.zip --self-signed --ca-pass '' --pass ''" 73 | ) 74 | assert exit_code == 0 75 | 76 | exit_code, _ = self.get_wrapped_container().exec_run( 77 | cmd="unzip /usr/share/elasticsearch/elasticsearch-ssl-http.zip -d /usr/share/elasticsearch/config/certs/" 78 | ) 79 | 80 | assert exit_code == 0 81 | 82 | self.get_wrapped_container().exec_run( 83 | cmd="/bin/tini -- /usr/local/bin/docker-entrypoint.sh", 84 | detach=True, 85 | environment={ 86 | "ES_JAVA_OPTS": "-Xms1g -Xmx1g", 87 | "ELASTIC_PASSWORD": self.elastic_password, 88 | "xpack.security.enabled": "true", 89 | "discovery.type": "single-node", 90 | "network.bind_host": "0.0.0.0", 91 | "network.publish_host": "0.0.0.0", 92 | "logger.org.elasticsearch": "DEBUG", 93 | "xpack.security.http.ssl.enabled": "true", 94 | "xpack.security.http.ssl.keystore.path": "/usr/share/elasticsearch/config/certs/localhost/" 95 | "localhost.p12", 96 | }, 97 | ) 98 | 99 | def get_url(self) -> str: 100 | return f"https://{self.host}:{self.exposed_port}" 101 | 102 | @wait_container_is_ready() # type: ignore 103 | def _connect(self) -> None: 104 | self.host = self.get_container_host_ip() 105 | self.exposed_port = int(self.get_exposed_port(self.port)) 106 | 107 | while True: 108 | try: 109 | pem_server_certificate: str = ssl.get_server_certificate((self.host, self.exposed_port)) 110 | openssl_certificate = OpenSSLCrypto.load_certificate( 111 | OpenSSLCrypto.FILETYPE_PEM, pem_server_certificate.encode("utf-8") 112 | ) 113 | except Exception: 114 | time.sleep(1) 115 | else: 116 | self.ssl_assert_fingerprint = str(openssl_certificate.digest("sha256").decode()) 117 | break 118 | 119 | assert len(self.ssl_assert_fingerprint) > 0 120 | 121 | self.es_client = Elasticsearch( 122 | hosts=[f"{self.host}:{self.exposed_port}"], 123 | scheme="https", 124 | http_auth=(self.elastic_user, self.elastic_password), 125 | ssl_assert_fingerprint=self.ssl_assert_fingerprint, 126 | verify_certs=False, 127 | timeout=30, 128 | max_retries=10, 129 | retry_on_timeout=True, 130 | raise_on_error=False, 131 | raise_on_exception=False, 132 | ) 133 | 134 | while not self.es_client.ping(): 135 | time.sleep(1) 136 | 137 | while True: 138 | cluster_health = self.es_client.cluster.health(wait_for_status="green") 139 | if "status" in cluster_health and cluster_health["status"] == "green": 140 | break 141 | 142 | time.sleep(1) 143 | 144 | def reset(self) -> None: 145 | for index in self._index_indices: 146 | self.es_client.indices.delete_data_stream(name=index) 147 | 148 | if self.es_client.indices.exists(index="logs-stash.elasticsearch-output"): 149 | self.es_client.indices.delete_data_stream(name="logs-stash.elasticsearch-output") 150 | 151 | self._index_indices = set() 152 | 153 | for pipeline_id in self._pipelines_ids: 154 | self.es_client.ingest.delete_pipeline(id=pipeline_id) 155 | 156 | self._pipelines_ids = set() 157 | 158 | def start(self) -> ElasticsearchContainer: 159 | super().start() 160 | self._configure() 161 | self._connect() 162 | return self 163 | 164 | def count(self, **kwargs: Any) -> dict[str, Any]: 165 | if "index" in kwargs and ("ignore_unavailable" not in kwargs or kwargs["ignore_unavailable"] is not True): 166 | self._index_indices.add(kwargs["index"]) 167 | 168 | return self.es_client.count(**kwargs) 169 | 170 | def refresh(self, **kwargs: Any) -> dict[str, Any]: 171 | if "index" in kwargs and ("ignore_unavailable" not in kwargs or kwargs["ignore_unavailable"] is not True): 172 | self._index_indices.add(kwargs["index"]) 173 | 174 | return self.es_client.indices.refresh(**kwargs) 175 | 176 | def put_pipeline(self, **kwargs: Any) -> dict[str, Any]: 177 | if "id" in kwargs: 178 | self._pipelines_ids.add(kwargs["id"]) 179 | 180 | return self.es_client.ingest.put_pipeline(**kwargs) 181 | 182 | def delete_by_query(self, **kwargs: Any) -> dict[str, Any]: 183 | if "index" in kwargs: 184 | self._index_indices.add(kwargs["index"]) 185 | 186 | return self.es_client.delete_by_query(**kwargs) 187 | 188 | def put_settings(self, **kwargs: Any) -> dict[str, Any]: 189 | if "index" in kwargs: 190 | self._index_indices.add(kwargs["index"]) 191 | 192 | return self.es_client.indices.put_settings(**kwargs) 193 | 194 | def exists(self, **kwargs: Any) -> bool: 195 | exists = self.es_client.indices.exists(**kwargs) 196 | if exists and "index" in kwargs: 197 | self._index_indices.add(kwargs["index"]) 198 | 199 | return exists 200 | 201 | def search(self, **kwargs: Any) -> dict[str, Any]: 202 | if "index" in kwargs: 203 | self._index_indices.add(kwargs["index"]) 204 | 205 | return self.es_client.search(**kwargs) 206 | 207 | def index(self, **kwargs: Any) -> dict[str, Any]: 208 | if "index" in kwargs: 209 | self._index_indices.add(kwargs["index"]) 210 | 211 | return self.es_client.index(**kwargs) 212 | 213 | def create_data_stream(self, **kwargs: Any) -> dict[str, Any]: 214 | if "name" in kwargs: 215 | self._index_indices.add(kwargs["name"]) 216 | 217 | return self.es_client.indices.create_data_stream(**kwargs) 218 | -------------------------------------------------------------------------------- /tests/shippers/test_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import re 6 | from unittest import TestCase 7 | 8 | from share import ElasticsearchOutput, LogstashOutput, Output 9 | from shippers import ElasticsearchShipper, LogstashShipper, ProtocolShipper, ShipperFactory 10 | 11 | 12 | class TestShipperFactory(TestCase): 13 | def test_create(self) -> None: 14 | with self.subTest("create elasticsearch shipper success elasticsearch_url and http auth"): 15 | shipper: ProtocolShipper = ShipperFactory.create( 16 | output_type="elasticsearch", 17 | elasticsearch_url="elasticsearch_url", 18 | username="username", 19 | password="password", 20 | es_datastream_name="es_datastream_name", 21 | ) 22 | 23 | assert isinstance(shipper, ElasticsearchShipper) 24 | 25 | with self.subTest("create elasticsearch shipper success elasticsearch_url and api key"): 26 | shipper = ShipperFactory.create( 27 | output_type="elasticsearch", 28 | elasticsearch_url="elasticsearch_url", 29 | api_key="api_key", 30 | es_datastream_name="es_datastream_name", 31 | ) 32 | 33 | assert isinstance(shipper, ElasticsearchShipper) 34 | 35 | with self.subTest("create elasticsearch shipper success cloud id and http auth"): 36 | shipper = ShipperFactory.create( 37 | output_type="elasticsearch", 38 | cloud_id="cloud_id:bG9jYWxob3N0OjkyMDAkMA==", 39 | username="username", 40 | password="password", 41 | es_datastream_name="es_datastream_name", 42 | ) 43 | 44 | assert isinstance(shipper, ElasticsearchShipper) 45 | 46 | with self.subTest("create elasticsearch shipper success cloud id and api key"): 47 | shipper = ShipperFactory.create( 48 | output_type="elasticsearch", 49 | cloud_id="cloud_id:bG9jYWxob3N0OjkyMDAkMA==", 50 | api_key="api_key", 51 | es_datastream_name="es_datastream_name", 52 | ) 53 | 54 | assert isinstance(shipper, ElasticsearchShipper) 55 | 56 | with self.subTest("create logstash shipper success with only logstash_url"): 57 | shipper = ShipperFactory.create( 58 | output_type="logstash", 59 | logstash_url="http://myhost:8080", 60 | ) 61 | 62 | assert isinstance(shipper, LogstashShipper) 63 | 64 | with self.subTest("create logstash shipper success with logstash_url, batch size and compression level"): 65 | shipper = ShipperFactory.create( 66 | output_type="logstash", 67 | logstash_url="http://myhost:8080", 68 | max_batch_size=50, 69 | compression_level=9, 70 | ) 71 | 72 | assert isinstance(shipper, LogstashShipper) 73 | with self.subTest("create elasticsearch shipper no kwargs error"): 74 | with self.assertRaisesRegex(ValueError, "You must provide one between elasticsearch_url or cloud_id"): 75 | ShipperFactory.create(output_type="elasticsearch") 76 | 77 | with self.subTest("create logstash shipper no kwargs error"): 78 | with self.assertRaisesRegex(ValueError, "You must provide logstash_url"): 79 | ShipperFactory.create(output_type="logstash") 80 | 81 | with self.subTest("create elasticsearch shipper empty elasticsearch_url and no cloud_id"): 82 | with self.assertRaisesRegex(ValueError, "You must provide one between elasticsearch_url or cloud_id"): 83 | ShipperFactory.create(output_type="elasticsearch", elasticsearch_url="") 84 | 85 | with self.subTest("create elasticsearch shipper empty cloud_id and no elasticsearch_url"): 86 | with self.assertRaisesRegex(ValueError, "You must provide one between elasticsearch_url or cloud_id"): 87 | ShipperFactory.create(output_type="elasticsearch", cloud_id="") 88 | 89 | with self.subTest("create elasticsearch shipper empty username and no api_key"): 90 | with self.assertRaisesRegex(ValueError, "You must provide one between username and password or api_key"): 91 | ShipperFactory.create(output_type="elasticsearch", elasticsearch_url="elasticsearch_url", username="") 92 | 93 | with self.subTest("create elasticsearch shipper empty api_key and no username"): 94 | with self.assertRaisesRegex(ValueError, "You must provide one between username and password or api_key"): 95 | ShipperFactory.create(output_type="elasticsearch", elasticsearch_url="elasticsearch_url", api_key="") 96 | 97 | with self.subTest("create logstash shipper compression level lower than 0"): 98 | with self.assertRaisesRegex(ValueError, "compression_level must be an integer value between 0 and 9"): 99 | ShipperFactory.create(output_type="logstash", logstash_url="logstash_url", compression_level=-1) 100 | 101 | with self.subTest("create logstash shipper compression level higher than 9"): 102 | with self.assertRaisesRegex(ValueError, "compression_level must be an integer value between 0 and 9"): 103 | ShipperFactory.create(output_type="logstash", logstash_url="logstash_url", compression_level=10) 104 | 105 | with self.subTest("create invalid type"): 106 | with self.assertRaisesRegex( 107 | ValueError, re.escape("You must provide one of the following outputs: elasticsearch") 108 | ): 109 | ShipperFactory.create(output_type="invalid type") 110 | 111 | def test_create_from_output(self) -> None: 112 | elasticsearch_output = ElasticsearchOutput( 113 | elasticsearch_url="elasticsearch_url", 114 | username="username", 115 | password="password", 116 | es_datastream_name="es_datastream_name", 117 | ) 118 | 119 | with self.subTest("create output type elasticsearch"): 120 | with self.assertRaisesRegex( 121 | ValueError, 122 | re.escape("output expected to be ElasticsearchOutput type, given "), 123 | ): 124 | ShipperFactory.create_from_output( 125 | output_type="elasticsearch", output=Output(output_type="elasticsearch") 126 | ) 127 | 128 | with self.subTest("create from output elasticsearch shipper success"): 129 | shipper: ProtocolShipper = ShipperFactory.create_from_output( 130 | output_type=elasticsearch_output.type, output=elasticsearch_output 131 | ) 132 | 133 | assert isinstance(shipper, ElasticsearchShipper) 134 | 135 | with self.subTest("create from output invalid type"): 136 | with self.assertRaisesRegex( 137 | ValueError, re.escape("You must provide one of the following outputs: elasticsearch, logstash") 138 | ): 139 | ShipperFactory.create_from_output(output_type="invalid type", output=elasticsearch_output) 140 | 141 | logstash_output = LogstashOutput(logstash_url="logstash_url") 142 | 143 | with self.subTest("create output type logstash"): 144 | with self.assertRaisesRegex( 145 | ValueError, 146 | re.escape("output expected to be LogstashOutput type, given "), 147 | ): 148 | ShipperFactory.create_from_output(output_type="logstash", output=Output(output_type="logstash")) 149 | 150 | with self.subTest("create from output logstash shipper success"): 151 | logstash_shipper: ProtocolShipper = ShipperFactory.create_from_output( 152 | output_type=logstash_output.type, output=logstash_output 153 | ) 154 | 155 | assert isinstance(logstash_shipper, LogstashShipper) 156 | 157 | with self.subTest("create from output invalid type"): 158 | with self.assertRaisesRegex( 159 | ValueError, re.escape("You must provide one of the following outputs: elasticsearch, logstash") 160 | ): 161 | ShipperFactory.create_from_output(output_type="invalid type", output=logstash_output) 162 | -------------------------------------------------------------------------------- /tests/storage/test_payload.py: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 3 | # you may not use this file except in compliance with the Elastic License 2.0. 4 | 5 | import base64 6 | import datetime 7 | import gzip 8 | import random 9 | from typing import Optional 10 | 11 | import mock 12 | import pytest 13 | 14 | from storage import PayloadStorage 15 | 16 | from .test_benchmark import ( 17 | _IS_PLAIN, 18 | _LENGTH_ABOVE_THRESHOLD, 19 | MockContentBase, 20 | get_by_lines_parameters, 21 | multiline_processor, 22 | ) 23 | 24 | 25 | class MockContent(MockContentBase): 26 | @staticmethod 27 | def init_content( 28 | content_type: str, 29 | newline: bytes, 30 | length_multiplier: int = _LENGTH_ABOVE_THRESHOLD, 31 | json_content_type: Optional[str] = None, 32 | ) -> None: 33 | MockContentBase.init_content( 34 | content_type=content_type, 35 | newline=newline, 36 | length_multiplier=length_multiplier, 37 | json_content_type=json_content_type, 38 | ) 39 | 40 | MockContent.f_content_gzip = base64.b64encode(gzip.compress(MockContentBase.mock_content)) 41 | MockContent.f_content_plain = base64.b64encode(MockContentBase.mock_content) 42 | MockContent.f_size_gzip = len(MockContent.f_content_gzip) 43 | MockContent.f_size_plain = len(MockContent.f_content_plain) 44 | 45 | 46 | @pytest.mark.unit 47 | def test_get_as_string_plain() -> None: 48 | MockContent.init_content(content_type=_IS_PLAIN, newline=b"\n") 49 | original = base64.b64decode(MockContent.f_content_plain).decode("utf-8") 50 | payload_storage = PayloadStorage(payload=original) 51 | content = payload_storage.get_as_string() 52 | assert content == original 53 | assert len(content) == len(original) 54 | 55 | 56 | @pytest.mark.unit 57 | def test_get_as_string_base64() -> None: 58 | MockContent.init_content(content_type=_IS_PLAIN, newline=b"\n") 59 | payload_storage = PayloadStorage(payload=MockContent.f_content_plain.decode("utf-8")) 60 | content = payload_storage.get_as_string() 61 | original = base64.b64decode(MockContent.f_content_plain).decode("utf-8") 62 | assert content == original 63 | assert len(content) == len(original) 64 | 65 | 66 | @pytest.mark.unit 67 | def test_get_as_string_gzip() -> None: 68 | MockContent.init_content(content_type=_IS_PLAIN, newline=b"\n") 69 | payload_storage = PayloadStorage(payload=MockContent.f_content_gzip.decode("utf-8")) 70 | content = payload_storage.get_as_string() 71 | original = gzip.decompress(base64.b64decode(MockContent.f_content_gzip)).decode("utf-8") 72 | 73 | assert content == original 74 | assert len(content) == len(original) 75 | 76 | 77 | @pytest.mark.unit 78 | @pytest.mark.parametrize("length_multiplier,content_type,newline,json_content_type", get_by_lines_parameters()) 79 | @mock.patch("share.multiline.timedelta_circuit_breaker", new=datetime.timedelta(days=1)) 80 | def test_get_by_lines( 81 | length_multiplier: int, content_type: str, newline: bytes, json_content_type: Optional[str] 82 | ) -> None: 83 | MockContent.init_content( 84 | content_type=content_type, 85 | newline=newline, 86 | length_multiplier=length_multiplier, 87 | json_content_type=json_content_type, 88 | ) 89 | 90 | payload_content_gzip = MockContent.f_content_gzip.decode("utf-8") 91 | payload_content_plain = MockContent.f_content_plain.decode("utf-8") 92 | 93 | joiner_token: bytes = newline 94 | 95 | original: bytes = base64.b64decode(MockContent.f_content_plain) 96 | original_length: int = len(original) 97 | 98 | payload_storage = PayloadStorage( 99 | payload=payload_content_gzip, 100 | json_content_type=json_content_type, 101 | multiline_processor=multiline_processor(content_type), 102 | ) 103 | gzip_full: list[tuple[bytes, int, int, Optional[int]]] = list(payload_storage.get_by_lines(range_start=0)) 104 | 105 | payload_storage = PayloadStorage( 106 | payload=payload_content_plain, 107 | json_content_type=json_content_type, 108 | multiline_processor=multiline_processor(content_type), 109 | ) 110 | plain_full: list[tuple[bytes, int, int, Optional[int]]] = list(payload_storage.get_by_lines(range_start=0)) 111 | 112 | diff = set(gzip_full) ^ set(plain_full) 113 | assert not diff 114 | assert plain_full == gzip_full 115 | assert gzip_full[-1][2] == original_length 116 | assert plain_full[-1][2] == original_length 117 | 118 | joined = joiner_token.join([x[0] for x in plain_full]) 119 | assert joined == original 120 | 121 | if len(newline) == 0 or (json_content_type == "single"): 122 | return 123 | 124 | gzip_full_01 = gzip_full[: int(len(gzip_full) / 2)] 125 | plain_full_01 = plain_full[: int(len(plain_full) / 2)] 126 | 127 | range_start = plain_full_01[-1][2] 128 | 129 | payload_storage = PayloadStorage( 130 | payload=payload_content_gzip, 131 | json_content_type=json_content_type, 132 | multiline_processor=multiline_processor(content_type), 133 | ) 134 | gzip_full_02: list[tuple[bytes, int, int, Optional[int]]] = list( 135 | payload_storage.get_by_lines(range_start=range_start) 136 | ) 137 | 138 | payload_storage = PayloadStorage( 139 | payload=payload_content_plain, 140 | json_content_type=json_content_type, 141 | multiline_processor=multiline_processor(content_type), 142 | ) 143 | plain_full_02: list[tuple[bytes, int, int, Optional[int]]] = list( 144 | payload_storage.get_by_lines(range_start=range_start) 145 | ) 146 | 147 | diff = set(gzip_full_01) ^ set(plain_full_01) 148 | assert not diff 149 | assert plain_full_01 == gzip_full_01 150 | 151 | diff = set(gzip_full_02) ^ set(plain_full_02) 152 | assert not diff 153 | assert plain_full_02 == gzip_full_02 154 | 155 | assert plain_full_01 + plain_full_02 == plain_full 156 | assert gzip_full_02[-1][2] == original_length 157 | assert plain_full_02[-1][2] == original_length 158 | 159 | joined = ( 160 | joiner_token.join([x[0] for x in plain_full_01]) 161 | + joiner_token 162 | + joiner_token.join([x[0] for x in plain_full_02]) 163 | ) 164 | 165 | assert joined == original 166 | 167 | gzip_full_02 = gzip_full_02[: int(len(gzip_full_02) / 2)] 168 | plain_full_02 = plain_full_02[: int(len(plain_full_02) / 2)] 169 | 170 | range_start = plain_full_02[-1][2] 171 | 172 | payload_storage = PayloadStorage( 173 | payload=payload_content_gzip, 174 | json_content_type=json_content_type, 175 | multiline_processor=multiline_processor(content_type), 176 | ) 177 | gzip_full_03: list[tuple[bytes, int, int, Optional[int]]] = list( 178 | payload_storage.get_by_lines(range_start=range_start) 179 | ) 180 | 181 | payload_storage = PayloadStorage( 182 | payload=payload_content_plain, 183 | json_content_type=json_content_type, 184 | multiline_processor=multiline_processor(content_type), 185 | ) 186 | plain_full_03: list[tuple[bytes, int, int, Optional[int]]] = list( 187 | payload_storage.get_by_lines(range_start=range_start) 188 | ) 189 | 190 | diff = set(gzip_full_02) ^ set(plain_full_02) 191 | assert not diff 192 | assert plain_full_02 == gzip_full_02 193 | 194 | diff = set(gzip_full_03) ^ set(plain_full_03) 195 | assert not diff 196 | assert plain_full_03 == gzip_full_03 197 | 198 | assert plain_full_01 + plain_full_02 + plain_full_03 == plain_full 199 | assert gzip_full_03[-1][2] == original_length 200 | assert plain_full_03[-1][2] == original_length 201 | 202 | joined = ( 203 | joiner_token.join([x[0] for x in plain_full_01]) 204 | + joiner_token 205 | + joiner_token.join([x[0] for x in plain_full_02]) 206 | + joiner_token 207 | + joiner_token.join([x[0] for x in plain_full_03]) 208 | ) 209 | 210 | assert joined == original 211 | 212 | range_start = plain_full[-1][2] + random.randint(1, 100) 213 | 214 | payload_storage = PayloadStorage( 215 | payload=payload_content_gzip, 216 | json_content_type=json_content_type, 217 | multiline_processor=multiline_processor(content_type), 218 | ) 219 | gzip_full_empty: list[tuple[bytes, int, int, Optional[int]]] = list( 220 | payload_storage.get_by_lines(range_start=range_start) 221 | ) 222 | 223 | payload_storage = PayloadStorage( 224 | payload=payload_content_plain, 225 | json_content_type=json_content_type, 226 | multiline_processor=multiline_processor(content_type), 227 | ) 228 | plain_full_empty: list[tuple[bytes, int, int, Optional[int]]] = list( 229 | payload_storage.get_by_lines(range_start=range_start) 230 | ) 231 | 232 | assert not gzip_full_empty 233 | assert not plain_full_empty 234 | --------------------------------------------------------------------------------