├── .makecache
    └── .keepdir
├── pytest.ini
├── tests
    ├── handlers
    │   ├── aws
    │   │   ├── testdata
    │   │   │   ├── cloudwatch-log-3.json
    │   │   │   ├── cloudwatch-log-1.json
    │   │   │   └── cloudwatch-log-2.json
    │   │   ├── __init__.py
    │   │   └── test_replay_trigger.py
    │   └── __init__.py
    ├── __init__.py
    ├── share
    │   ├── __init__.py
    │   ├── test_environment.py
    │   ├── test_json.py
    │   ├── test_factory.py
    │   └── test_include_exclude.py
    ├── shippers
    │   ├── __init__.py
    │   ├── ssl
    │   │   ├── localhost.crt
    │   │   └── localhost.pkcs8.key
    │   ├── test_composite.py
    │   ├── test_logstash.py
    │   └── test_factory.py
    ├── storage
    │   ├── __init__.py
    │   ├── test_factory.py
    │   └── test_payload.py
    ├── scripts
    │   ├── flake8.sh
    │   ├── mypy.sh
    │   ├── black.sh
    │   ├── isort.sh
    │   ├── run_tests.sh
    │   ├── docker
    │   │   ├── black.sh
    │   │   ├── flake8.sh
    │   │   ├── mypy.sh
    │   │   ├── run_tests.sh
    │   │   └── isort.sh
    │   └── license_headers_check.sh
    ├── entrypoint.sh
    ├── Dockerfile
    └── testcontainers
    │   └── es.py
├── docs
    ├── reference
    │   ├── images
    │   │   ├── false-after-multi.png
    │   │   ├── true-after-multi.png
    │   │   ├── true-before-multi.png
    │   │   ├── false-before-multi.png
    │   │   ├── aws-serverless-lambda-flow.png
    │   │   ├── multiline-regexp-test-repl-main.png
    │   │   ├── multiline-regexp-test-repl-run.png
    │   │   ├── aws-serverless-forwarder-install-assets.png
    │   │   └── aws-serverless-forwarder-create-function.png
    │   └── toc.yml
    ├── docset.yml
    └── README-AWS.md
├── requirements-lint.txt
├── .flake8
├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE.md
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   ├── feature-request.md
    │   ├── flaky-test.md
    │   └── input-output-checklist.md
    ├── workflows
    │   ├── docs-cleanup.yml
    │   ├── github-commands-comment.yml
    │   ├── docs-build.yml
    │   ├── version-update.yml
    │   ├── test-reporter.yml
    │   ├── test.yml
    │   ├── create-tag.yml
    │   └── releases-production.yml
    ├── dependabot.yml
    └── PULL_REQUEST_TEMPLATE.md
├── requirements.txt
├── handlers
    ├── __init__.py
    └── aws
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── replay_trigger.py
    │   ├── kinesis_trigger.py
    │   ├── cloudwatch_logs_trigger.py
    │   └── s3_sqs_trigger.py
├── pyproject.toml
├── share
    ├── version.py
    ├── environment.py
    ├── json.py
    ├── utils.py
    ├── logger.py
    ├── __init__.py
    ├── events.py
    ├── factory.py
    ├── include_exlude.py
    ├── expand_event_list_from_field.py
    └── secretsmanager.py
├── requirements-tests.txt
├── dev-corner
    └── how-to-test-locally
    │   ├── .env
    │   ├── Taskfile.yaml
    │   └── README.md
├── .editorconfig
├── README.md
├── storage
    ├── __init__.py
    ├── factory.py
    ├── storage.py
    ├── s3.py
    └── payload.py
├── .gitignore
├── shippers
    ├── __init__.py
    ├── shipper.py
    ├── composite.py
    ├── factory.py
    └── logstash.py
├── mypy.ini
├── main_aws.py
├── LICENSE.txt
├── Makefile
├── .internal
    └── aws
    │   ├── cloudformation
    │       └── application.yaml
    │   └── scripts
    │       └── dist.sh
└── CONTRIBUTING.md


/.makecache/.keepdir:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     unit: unit test
4 |     integration: integration test
5 | 


--------------------------------------------------------------------------------
/tests/handlers/aws/testdata/cloudwatch-log-3.json:
--------------------------------------------------------------------------------
1 | {
2 |   "another": "continuation",
3 |   "from": "the",
4 |   "continuing": "queue"
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/reference/images/false-after-multi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/false-after-multi.png


--------------------------------------------------------------------------------
/docs/reference/images/true-after-multi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/true-after-multi.png


--------------------------------------------------------------------------------
/docs/reference/images/true-before-multi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/true-before-multi.png


--------------------------------------------------------------------------------
/requirements-lint.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8
3 | flake8-per-file-ignores
4 | isort
5 | mypy==1.10.1
6 | types-PyYAML
7 | types-mock
8 | pyflakes>=3.0.0,<3.3.0
9 | 


--------------------------------------------------------------------------------
/docs/reference/images/false-before-multi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/false-before-multi.png


--------------------------------------------------------------------------------
/tests/handlers/aws/testdata/cloudwatch-log-1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "@timestamp": "2021-12-28T11:33:08.160Z",
3 |   "log.level": "info",
4 |   "message": "trigger"
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/reference/images/aws-serverless-lambda-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/aws-serverless-lambda-flow.png


--------------------------------------------------------------------------------
/docs/reference/images/multiline-regexp-test-repl-main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/multiline-regexp-test-repl-main.png


--------------------------------------------------------------------------------
/docs/reference/images/multiline-regexp-test-repl-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/multiline-regexp-test-repl-run.png


--------------------------------------------------------------------------------
/docs/reference/images/aws-serverless-forwarder-install-assets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/aws-serverless-forwarder-install-assets.png


--------------------------------------------------------------------------------
/docs/reference/images/aws-serverless-forwarder-create-function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elastic/elastic-serverless-forwarder/HEAD/docs/reference/images/aws-serverless-forwarder-create-function.png


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude=
3 |     venv/**,
4 | max-line-length=120
5 | per-file-ignores = __init__.py:F401
6 | extend-ignore =
7 |     # See https://github.com/PyCQA/pycodestyle/issues/373
8 |     E203,
9 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [coverage:run]
 2 | omit =
 3 |     tests/*
 4 | 
 5 | [coverage:paths]
 6 | source =
 7 |     ./
 8 |     /app/
 9 |     C:\Users\jenkins\workspace\*\src\github.com\elastic\elastic-serverless-forwarder
10 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug
 3 | about: "Report confirmed bugs."
 4 | 
 5 | ---
 6 | 
 7 | Please include configurations and logs if available.
 8 | 
 9 | For confirmed bugs, please report:
10 | - Version:
11 | - Steps to Reproduce:
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | elastic-apm==6.23.0
 2 | boto3==1.39.0
 3 | ecs_logging==2.2.0
 4 | elasticsearch==7.17.12
 5 | PyYAML==6.0.2
 6 | aws_lambda_typing==2.20.0
 7 | orjson==3.10.18
 8 | requests==2.32.3
 9 | urllib3==1.26.20
10 | typing-extensions==4.13.2
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug
 3 | about: "Report confirmed bugs."
 4 | 
 5 | ---
 6 | 
 7 | Please include configurations and logs if available.
 8 | 
 9 | For confirmed bugs, please report:
10 | - Version:
11 | - Steps to Reproduce:
12 | 


--------------------------------------------------------------------------------
/handlers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 120
 3 | include = '\.pyi?$'
 4 | exclude = '''
 5 | /(
 6 |     \.git
 7 |   | venv
 8 | 
 9 |   # The following are specific to Black, you probably don't want those.
10 |   | blib2to3
11 |   | tests/data
12 | )/
13 | '''
14 | 


--------------------------------------------------------------------------------
/tests/handlers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/tests/share/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/tests/shippers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/tests/storage/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/tests/handlers/aws/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 


--------------------------------------------------------------------------------
/share/version.py:
--------------------------------------------------------------------------------
1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
3 | # you may not use this file except in compliance with the Elastic License 2.0.
4 | 
5 | version = "1.21.1"
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Enhancement request
 3 | about: Elastic Serverless Forwarder can't do all the things, but maybe it can do your things.
 4 | 
 5 | ---
 6 | 
 7 | **Describe the enhancement:**
 8 | 
 9 | **Describe a specific use case for the enhancement or feature:**
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/scripts/flake8.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
4 | # you may not use this file except in compliance with the Elastic License 2.0.
5 | 
6 | set -e
7 | 
8 | flake8 .
9 | 


--------------------------------------------------------------------------------
/requirements-tests.txt:
--------------------------------------------------------------------------------
 1 | mock==5.2.0
 2 | pytest==8.4.2
 3 | pytest-cov==6.1.1
 4 | pytest-benchmark==5.1.0
 5 | coverage==7.9.1
 6 | simplejson==3.19.2
 7 | ujson==5.9.0
 8 | pysimdjson==6.0.2
 9 | python-rapidjson==1.14
10 | cysimdjson==23.8
11 | responses==0.25.7
12 | types-requests<2.31.0.7
13 | testcontainers==3.7.1
14 | pyOpenSSL==24.0.0
15 | 


--------------------------------------------------------------------------------
/.github/workflows/docs-cleanup.yml:
--------------------------------------------------------------------------------
 1 | name: docs-cleanup
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types:
 6 |       - closed
 7 | 
 8 | jobs:
 9 |   docs-preview:
10 |     uses: elastic/docs-builder/.github/workflows/preview-cleanup.yml@main
11 |     permissions:
12 |       contents: none
13 |       id-token: write
14 |       deployments: write
15 | 


--------------------------------------------------------------------------------
/tests/scripts/mypy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
4 | # you may not use this file except in compliance with the Elastic License 2.0.
5 | 
6 | set -e
7 | 
8 | mypy --install-types --non-interactive .
9 | 


--------------------------------------------------------------------------------
/tests/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
4 | # you may not use this file except in compliance with the Elastic License 2.0.
5 | 
6 | export HOME=/home/user
7 | exec /usr/local/bin/gosu user "$@"
8 | 


--------------------------------------------------------------------------------
/tests/handlers/aws/testdata/cloudwatch-log-2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ecs": {
 3 |     "version": "1.6.0"
 4 |   },
 5 |   "log": {
 6 |     "logger": "root",
 7 |     "origin": {
 8 |       "file": {
 9 |         "line": 30,
10 |         "name": "handler.py"
11 |       },
12 |       "function": "lambda_handler"
13 |     },
14 |     "original": "trigger"
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/docs/reference/toc.yml:
--------------------------------------------------------------------------------
1 | project: 'Elastic Serverless Forwarder for AWS reference'
2 | toc:
3 |   - file: index.md
4 |   - file: aws-deploy-elastic-serverless-forwarder.md
5 |   - file: aws-elastic-serverless-forwarder-configuration.md
6 |   # - file: aws-serverless-troubleshooting.md
7 |   # - file: deploy-elastic-serverless-forwarder.md
8 |   # - file: configuration-options-for-elastic-serverless-forwarder.md


--------------------------------------------------------------------------------
/.github/workflows/github-commands-comment.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: github-commands-comment
 3 | 
 4 | on:
 5 |   pull_request_target:
 6 |     types:
 7 |       - opened
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   comment:
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       pull-requests: write
17 |     steps:
18 |       - uses: elastic/oblt-actions/elastic/github-commands@v1
19 | 


--------------------------------------------------------------------------------
/docs/docset.yml:
--------------------------------------------------------------------------------
 1 | project: 'Elastic Serverless Forwarder for AWS'
 2 | products:
 3 |   - id: elastic-serverless-forwarder
 4 | exclude:
 5 |   - README-AWS.md
 6 | cross_links:
 7 |   - docs-content
 8 |   - elasticsearch
 9 |   - integration-docs
10 |   - logstash
11 |   - logstash-docs-md
12 | toc:
13 |   - toc: reference
14 | subs:
15 |   stack:   "Elastic Stack"
16 |   es:   "Elasticsearch"
17 |   kib:   "Kibana"
18 |   ls:   "Logstash"
19 | 


--------------------------------------------------------------------------------
/dev-corner/how-to-test-locally/.env:
--------------------------------------------------------------------------------
 1 | # List of requirement files.
 2 | # Split them with , and without space, like this: example1.txt,example2.txt
 3 | REQUIREMENTS=requirements.txt
 4 | 
 5 | # List of python files/directories to add to the zip file.
 6 | # Split them with , and without space, like this: example1.txt,example2.txt
 7 | DEPENDENCIES=main_aws.py,handlers,share,storage,shippers
 8 | 
 9 | # Zip filename
10 | FILENAME=local_esf.zip
11 | 
12 | 


--------------------------------------------------------------------------------
/.github/workflows/docs-build.yml:
--------------------------------------------------------------------------------
 1 | name: docs-build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request_target: ~
 8 |   merge_group: ~
 9 | 
10 | jobs:
11 |   docs-preview:
12 |     uses: elastic/docs-builder/.github/workflows/preview-build.yml@main
13 |     with:
14 |       path-pattern: docs/**
15 |     permissions:
16 |       deployments: write
17 |       id-token: write
18 |       contents: read
19 |       pull-requests: write
20 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | indent_style = space
 6 | indent_size = 4
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 | 
12 | [*.asciidoc]
13 | trim_trailing_whitespace = false
14 | 
15 | [Makefile]
16 | indent_style = tab
17 | 
18 | [Jenkinsfile]
19 | indent_size = 2
20 | 
21 | [*.groovy]
22 | indent_size = 2
23 | 
24 | [*.feature]
25 | indent_size = 2
26 | 
27 | [*.yml]
28 | indent_size = 2


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![Build Status](https://github.com/elastic/elastic-serverless-forwarder/actions/workflows/test.yml/badge.svg)](https://github.com/elastic/elastic-serverless-forwarder/actions/workflows/test.yml)
2 | 
3 | # elastic-serverless-forwarder
4 | Elastic Serverless Forwarder
5 | 
6 | ### Changelog [link](https://github.com/elastic/elastic-serverless-forwarder/blob/main/CHANGELOG.md)
7 | ### For AWS documentation, [go here](https://github.com/elastic/elastic-serverless-forwarder/blob/main/docs/README-AWS.md)
8 | 


--------------------------------------------------------------------------------
/handlers/aws/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from .exceptions import (
 6 |     ConfigFileException,
 7 |     InputConfigException,
 8 |     OutputConfigException,
 9 |     ReplayHandlerException,
10 |     TriggerTypeException,
11 | )
12 | from .handler import lambda_handler
13 | 


--------------------------------------------------------------------------------
/storage/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from .decorator import by_lines, inflate, json_collector, multi_line
 6 | from .factory import StorageFactory
 7 | from .payload import PayloadStorage
 8 | from .s3 import S3Storage
 9 | from .storage import CommonStorage, GetByLinesIterator, ProtocolStorage, StorageDecoratorIterator, StorageReader
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .makecache
 2 | !.makecache/.keepdir
 3 | *.pyc
 4 | *.log
 5 | *.egg
 6 | *.db
 7 | *.pid
 8 | *.zip
 9 | .coverage*
10 | .DS_Store
11 | .idea
12 | .vscode
13 | .benchmarks
14 | pip-log.txt
15 | /*.egg-info
16 | /build
17 | /cover
18 | /example_project/local_settings.py
19 | /docs/html
20 | /docs/doctrees
21 | /example_project/*.db
22 | tests/.schemacache
23 | coverage
24 | .tox
25 | .eggs
26 | .cache
27 | /testdb.sql
28 | venv
29 | benchmarks/result*
30 | coverage.xml
31 | tests/elastic-serverless-forwarder-junit.xml
32 | *.code-workspace
33 | .pytest_cache/
34 | .python-version
35 | htmlcov/
36 | 


--------------------------------------------------------------------------------
/tests/scripts/black.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | set -e
 6 | if [[ $# -eq 0 ]]
 7 | then
 8 |     echo "Usage: $0 diff|fix"
 9 |     exit 1
10 | fi
11 | 
12 | if [[ "$1" = "diff" ]]
13 | then
14 |     OPTIONS="--diff --check --line-length=120"
15 | elif [[ "$1" = "fix" ]]
16 | then
17 |     OPTIONS="--line-length=120"
18 | fi
19 | 
20 | black -t py39 ${OPTIONS} .
21 | 


--------------------------------------------------------------------------------
/share/environment.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | import os
 6 | import platform
 7 | 
 8 | 
 9 | def is_aws() -> bool:
10 |     return os.getenv("AWS_EXECUTION_ENV") is not None
11 | 
12 | 
13 | def get_environment() -> str:
14 |     if is_aws():
15 |         return os.environ["AWS_EXECUTION_ENV"]
16 |     else:
17 |         return f"Python/{platform.python_version()} {platform.system()}/{platform.machine()}"
18 | 


--------------------------------------------------------------------------------
/share/json.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any, AnyStr
 6 | 
 7 | import orjson
 8 | 
 9 | 
10 | def json_dumper(json_object: Any) -> str:
11 |     if isinstance(json_object, bytes):
12 |         json_object = json_object.decode("utf-8")
13 | 
14 |     return orjson.dumps(json_object).decode("utf-8")
15 | 
16 | 
17 | def json_parser(payload: AnyStr) -> Any:
18 |     return orjson.loads(payload)
19 | 


--------------------------------------------------------------------------------
/shippers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from .composite import CompositeShipper
 6 | from .es import ElasticsearchShipper, JSONSerializer
 7 | from .factory import ShipperFactory
 8 | from .logstash import LogstashShipper
 9 | from .shipper import (
10 |     EVENT_IS_EMPTY,
11 |     EVENT_IS_FILTERED,
12 |     EVENT_IS_SENT,
13 |     EventIdGeneratorCallable,
14 |     ProtocolShipper,
15 |     ReplayHandlerCallable,
16 | )
17 | 


--------------------------------------------------------------------------------
/tests/scripts/isort.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | set -e
 7 | if [[ $# -eq 0 ]]
 8 | then
 9 |     echo "Usage: $0 diff|fix"
10 |     exit 1
11 | fi
12 | 
13 | if [[ "$1" = "diff" ]]
14 | then
15 |     OPTIONS="--diff --check --py 39 --profile black --line-length=120"
16 | elif [[ "$1" = "fix" ]]
17 | then
18 |     OPTIONS="-v --py 39 --profile black --line-length=120"
19 | fi
20 | 
21 | isort ${OPTIONS} .
22 | 


--------------------------------------------------------------------------------
/tests/share/test_environment.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | import os
 6 | from unittest import mock
 7 | 
 8 | import pytest
 9 | 
10 | from share.environment import get_environment
11 | 
12 | 
13 | @pytest.mark.unit
14 | @mock.patch.dict(os.environ, {"AWS_EXECUTION_ENV": "AWS_Lambda_Python3.12"})
15 | def test_aws_environment() -> None:
16 |     environment = get_environment()
17 |     assert environment == "AWS_Lambda_Python3.12"
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/flaky-test.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Flaky Test
 3 | about: Report a flaky test (one that doesn't pass consistently)
 4 | labels: flaky-test
 5 | ---
 6 | 
 7 | ## Flaky Test
 8 | 
 9 | * **Test Name:** Name of the failing test.
10 | * **Link:** Link to file/line number in github.
11 | * **Branch:** Git branch the test was seen in. If a PR, the branch the PR was based off.
12 | * **Artifact Link:** If available, attach the generated zip artifact associated with the stack trace for this failure.
13 | * **Notes:** Additional details about the test. e.g. theory as to failure cause
14 | 
15 | ### Stack Trace
16 | 
17 | ```
18 | paste stack trace here
19 | ```
20 | 


--------------------------------------------------------------------------------
/share/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | import hashlib
 5 | import sys
 6 | 
 7 | 
 8 | def get_hex_prefix(src: str) -> str:
 9 |     return hashlib.sha3_384(src.encode("utf-8")).hexdigest()
10 | 
11 | 
12 | def create_user_agent(esf_version: str, environment: str = sys.version) -> str:
13 |     """Creates the 'User-Agent' header given ESF version and running environment"""
14 |     return f"ElasticServerlessForwarder/{esf_version} ({environment})"
15 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | # Global options:
 2 | [mypy]
 3 | python_version = 3.12
 4 | warn_return_any = True
 5 | warn_unused_configs = True
 6 | strict = True
 7 | disallow_untyped_defs = True
 8 | no_implicit_reexport = False
 9 | exclude = venv/.*
10 | 
11 | [mypy-elasticapm.*]
12 | ignore_missing_imports = True
13 | 
14 | [mypy-boto3.*]
15 | ignore_missing_imports = True
16 | 
17 | [mypy-botocore.*]
18 | ignore_missing_imports = True
19 | 
20 | [mypy-testcontainers.*]
21 | ignore_missing_imports = True
22 | 
23 | [mypy-pytest_benchmark.*]
24 | ignore_missing_imports = True
25 | 
26 | [mypy-rapidjson.*]
27 | ignore_missing_imports = True
28 | 
29 | [mypy-cysimdjson.*]
30 | ignore_missing_imports = True
31 | 


--------------------------------------------------------------------------------
/main_aws.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any
 6 | 
 7 | from aws_lambda_typing import context as context_
 8 | 
 9 | from handlers.aws import lambda_handler
10 | 
11 | 
12 | def handler(lambda_event: dict[str, Any], lambda_context: context_.Context) -> Any:
13 |     """
14 |     AWS Lambda handler as main entrypoint
15 |     This is just a wrapper to handlers.aws.lambda_handler
16 |     """
17 |     return lambda_handler(lambda_event, lambda_context)
18 | 


--------------------------------------------------------------------------------
/tests/scripts/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | set -ex
 7 | 
 8 | # delete any __pycache__ folders to avoid hard-to-debug caching issues
 9 | find . -name __pycache__ -type d -exec rm -r '{}' + || true
10 | PYTEST_ARGS=("${PYTEST_ARGS}")
11 | py.test -vv "${PYTEST_ARGS[*]}" "${PYTEST_JUNIT}" tests -s
12 | 
13 | if [[ "${PYTEST_ADDOPTS}" == *"--cov"* ]]; then
14 |     # Transform coverage to xml so Jenkins can parse and report it
15 |     coverage xml
16 |     coverage html
17 | fi
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/input-output-checklist.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: New Input / Output
 3 | about: "Meta issue to track the creation, updating of a new input or output."
 4 | 
 5 | ---
 6 | 
 7 | # Elastic Serverless Forwarder Input / Output release checklist
 8 | 
 9 | This checklist is intended for Devs which create or update a module to make sure input/output are consistent.
10 | 
11 | ## Input
12 | 
13 | For an input to be supported, the following criterias should be met:
14 | 
15 | * [ ] Config for the input is defined
16 | * [ ] Handler for the input is defined
17 | * [ ] Unit tests exist
18 | * [ ] Integration tests exist
19 | * [ ] Documentation
20 | 
21 | ## Output
22 | 
23 | * [ ] Config for the output is defined
24 | * [ ] Implementation for the output is defined
25 | * [ ] Unit tests exist
26 | * [ ] Integration tests exist
27 | * [ ] Documentation
28 | 


--------------------------------------------------------------------------------
/share/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | import logging
 6 | import os
 7 | 
 8 | import ecs_logging
 9 | from elasticapm.handlers.logging import LoggingFilter
10 | 
11 | log_level = logging.getLevelName(os.getenv("LOG_LEVEL", "INFO").upper())
12 | 
13 | # Get the Logger
14 | logger = logging.getLogger()
15 | logger.setLevel(log_level)
16 | logger.propagate = False
17 | 
18 | # Add an ECS formatter to the Handler
19 | handler = logging.StreamHandler()
20 | handler.setFormatter(ecs_logging.StdlibFormatter())
21 | 
22 | # Add an APM log correlation
23 | handler.addFilter(LoggingFilter())  # type: ignore
24 | logger.handlers = [handler]
25 | 


--------------------------------------------------------------------------------
/share/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from .config import Config, ElasticsearchOutput, Input, LogstashOutput, Output, parse_config
 6 | from .events import normalise_event
 7 | from .expand_event_list_from_field import ExpandEventListFromField
 8 | from .factory import MultilineFactory
 9 | from .include_exlude import IncludeExcludeFilter, IncludeExcludeRule
10 | from .json import json_dumper, json_parser
11 | from .logger import logger as shared_logger
12 | from .multiline import CollectBuffer, CountMultiline, FeedIterator, PatternMultiline, ProtocolMultiline, WhileMultiline
13 | from .secretsmanager import aws_sm_expander
14 | from .utils import get_hex_prefix
15 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 |     ignore:
13 |       - dependency-name: "elasticsearch"
14 | 
15 |   - package-ecosystem: "github-actions"
16 |     directories:
17 |       - "/"
18 |       - "/.github/actions/*"
19 |     schedule:
20 |       interval: "weekly"
21 |       day: "sunday"
22 |       time: "22:00"
23 |     groups:
24 |       github-actions:
25 |         patterns:
26 |           - "*"
27 | 


--------------------------------------------------------------------------------
/share/events.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any
 6 | 
 7 | 
 8 | def normalise_event(event_payload: dict[str, Any]) -> dict[str, Any]:
 9 |     """
10 |     This method move fields payload in the event at root level and then removes it with meta payload
11 |     It has to be called as last step after any operation on the event payload just before sending to the cluster
12 |     """
13 |     if "fields" in event_payload:
14 |         fields: dict[str, Any] = event_payload["fields"]
15 |         for field_key in fields.keys():
16 |             event_payload[field_key] = fields[field_key]
17 | 
18 |         del event_payload["fields"]
19 | 
20 |     if "meta" in event_payload:
21 |         del event_payload["meta"]
22 | 
23 |     return event_payload
24 | 


--------------------------------------------------------------------------------
/handlers/aws/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | 
 6 | class TriggerTypeException(Exception):
 7 |     """Raised when there is an error related to the trigger type"""
 8 | 
 9 |     pass
10 | 
11 | 
12 | class ConfigFileException(Exception):
13 |     """Raised when there is an error related to the config file"""
14 | 
15 |     pass
16 | 
17 | 
18 | class InputConfigException(Exception):
19 |     """Raised when there is an error related to the configured input"""
20 | 
21 |     pass
22 | 
23 | 
24 | class OutputConfigException(Exception):
25 |     """Raised when there is an error related to the configured output"""
26 | 
27 |     pass
28 | 
29 | 
30 | class ReplayHandlerException(Exception):
31 |     """Raised when there is an error in ingestion in the replay queue"""
32 | 
33 |     pass
34 | 


--------------------------------------------------------------------------------
/tests/scripts/docker/black.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | pip_cache="$HOME/.cache"
 7 | docker_pip_cache="/tmp/cache/pip"
 8 | 
 9 | cd tests
10 | 
11 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t python-linters --file Dockerfile ..
12 | docker run \
13 |   -e LOCAL_USER_ID=$UID \
14 |   -e PIP_CACHE=${docker_pip_cache} \
15 |   -v ${pip_cache}:$(dirname ${docker_pip_cache}) \
16 |   -v "$(dirname $(pwd))":/app \
17 |   -w /app \
18 |   --rm python-linters \
19 |   /bin/bash \
20 |   -c "pip install --user -U pip
21 |       pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache}
22 |       pip install --user -r requirements.txt --cache-dir ${docker_pip_cache}
23 |       PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/black.sh $*"
24 | 


--------------------------------------------------------------------------------
/tests/scripts/docker/flake8.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | set -e
 7 | 
 8 | pip_cache="$HOME/.cache"
 9 | docker_pip_cache="/tmp/cache/pip"
10 | 
11 | cd tests
12 | 
13 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t lint_flake8 --file Dockerfile ..
14 | docker run \
15 |   -e LOCAL_USER_ID=$UID \
16 |   -e PIP_CACHE=${docker_pip_cache} \
17 |   -v ${pip_cache}:$(dirname ${docker_pip_cache}) \
18 |   -v "$(dirname $(pwd))":/app \
19 |   -w /app \
20 |   --rm lint_flake8 \
21 |   /bin/bash \
22 |   -c "pip install --user -U pip
23 |       pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache}
24 |       pip install --user -r requirements.txt --cache-dir ${docker_pip_cache}
25 |       PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/flake8.sh $*"
26 | 


--------------------------------------------------------------------------------
/tests/scripts/docker/mypy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | set -e
 7 | 
 8 | pip_cache="$HOME/.cache"
 9 | docker_pip_cache="/tmp/cache/pip"
10 | 
11 | cd tests
12 | 
13 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t lint_mypy --file Dockerfile ..
14 | docker run \
15 |   -e LOCAL_USER_ID=$UID \
16 |   -e PIP_CACHE=${docker_pip_cache} \
17 |   -v ${pip_cache}:$(dirname ${docker_pip_cache}) \
18 |   -v "$(dirname $(pwd))":/app \
19 |   -w /app \
20 |   --rm lint_mypy \
21 |   /bin/bash \
22 |   -c "pip install --user -U pip
23 |       pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache}
24 |       pip install --user -r requirements-tests.txt --cache-dir ${docker_pip_cache}
25 |       pip install --user -r requirements.txt --cache-dir ${docker_pip_cache}
26 |       PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/mypy.sh $*"
27 | 


--------------------------------------------------------------------------------
/tests/shippers/ssl/localhost.crt:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIIDBDCCAeygAwIBAgIVAKHPEVe18psDxZqv8JlUJTAENPmoMA0GCSqGSIb3DQEB
 3 | CwUAMBQxEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0yMjExMTgxNTA0MDlaFw0yNTEx
 4 | MTcxNTA0MDlaMBQxEjAQBgNVBAMTCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEB
 5 | BQADggEPADCCAQoCggEBAMb4JyxYDA7oFFxi2HsaGOs0Ti9B1JFVdOxX27nbGOUK
 6 | 3KAlqUDKYvCZGfrDrZxCFIVndOOfu/pKa3sg+I8naYnt2f7CLBbgYfL6Lli0FM1A
 7 | L7MQUExGRO/F3WpkBElCO5I2NFqoPHYHwk0mwfInMFJSb645wFYvPqyLsqjK44L1
 8 | ItubBE7tQp1+BNb2OmJYqerCX1H+DaK3azH3IJX3HnD15mHfccMm9PBLN47lsr/C
 9 | E7gGUb0E4ypFu9G6hsClriwdHNIqJvO2lesf6ZJaYwDq5G5opNijeN032UmhT4NA
10 | FJlUDT+/1K3ZgzN8cRI9vEtWu2UZ0LzviaYDVhy/0CUCAwEAAaNNMEswHQYDVR0O
11 | BBYEFJkk5RLf05LzU3QEFSzAG4F7ODS8MB8GA1UdIwQYMBaAFJkk5RLf05LzU3QE
12 | FSzAG4F7ODS8MAkGA1UdEwQCMAAwDQYJKoZIhvcNAQELBQADggEBAKmfSyWAuklK
13 | u0D02JugnCNC6Ea+Ug0zBVqeNWiMHnREwZ+R0CLXvhaGJviOCIeYL+M2MO8KM41S
14 | 5n2FmQCvX/cNiaVW3qZZKQD6p0hN34luenxyJv21+Zx7CdkLZo7OT3JfCKcEN+zP
15 | nvPR6ynBqAhfy0GB+3B1M7dK3DxP73zNG8TJ3XFNWWYSLvfY8wEO/tCq6wxxttYd
16 | fcjv0LYJBpsYGiE0Ll1ZIZgjLTUfCHuQCDU0XHaZF01jb5ttpxyWVoUjSbPoFnp0
17 | nIJsGTwHZXU3Y+BQdL3DRLOdnfrdJwpXpEeTRCHfBSWNcP0p2rfWz/2Nl6gtT6zo
18 | gJw8IvJqQvw=
19 | -----END CERTIFICATE-----
20 | 


--------------------------------------------------------------------------------
/tests/scripts/docker/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | set -ex
 6 | 
 7 | pip_cache="$HOME/.cache"
 8 | docker_pip_cache="/tmp/cache/pip"
 9 | 
10 | cd tests
11 | 
12 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t run_tests --file Dockerfile ..
13 | docker run \
14 |   --privileged \
15 |   -e LOCAL_USER_ID=$UID \
16 |   -e PIP_CACHE=${docker_pip_cache} \
17 |   -e PYTEST_ARGS="${PYTEST_ARGS}" \
18 |   -e PYTEST_ADDOPTS="${PYTEST_ADDOPTS}" \
19 |   -e PYTEST_JUNIT="--junitxml=/app/tests/elastic-serverless-forwarder-junit.xml" \
20 |   -e AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \
21 |   -e AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \
22 |   -v "$(dirname $(pwd))":/app \
23 |   -w /app \
24 |   --rm run_tests \
25 |   /bin/bash \
26 |   -c "sudo service docker start
27 |       pip install --user -U pip
28 |       pip install --user -r requirements-tests.txt --cache-dir ${docker_pip_cache}
29 |       pip install --user -r requirements.txt --cache-dir ${docker_pip_cache}
30 |       PATH=\${PATH}:\${HOME}/.local/bin/ timeout 60m /bin/bash ./tests/scripts/run_tests.sh"
31 | 


--------------------------------------------------------------------------------
/tests/scripts/docker/isort.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | set -e
 7 | if [[ $# -eq 0 ]]
 8 | then
 9 |     echo "Usage: $0 diff|fix"
10 |     exit 1
11 | fi
12 | 
13 | if [[ "$1" = "diff" ]]
14 | then
15 |     OPTIONS="--diff --check --profile black --line-length=120"
16 | elif [[ "$1" = "fix" ]]
17 | then
18 |     OPTIONS="--profile black --line-length=120"
19 | fi
20 | 
21 | 
22 | pip_cache="$HOME/.cache"
23 | docker_pip_cache="/tmp/cache/pip"
24 | 
25 | cd tests
26 | 
27 | docker build --build-arg UID=$UID --build-arg PYTHON_IMAGE=python:3.12 -t python-linters --file Dockerfile ..
28 | docker run \
29 |   -e LOCAL_USER_ID=$UID \
30 |   -e PIP_CACHE=${docker_pip_cache} \
31 |   -v ${pip_cache}:$(dirname ${docker_pip_cache}) \
32 |   -v "$(dirname $(pwd))":/app \
33 |   -w /app \
34 |   --rm python-linters \
35 |   /bin/bash \
36 |   -c "pip install --user -U pip
37 |       pip install --user -r requirements-lint.txt --cache-dir ${docker_pip_cache}
38 |       pip install --user -r requirements.txt --cache-dir ${docker_pip_cache}
39 |       PATH=\${PATH}:\${HOME}/.local/bin/ /bin/bash ./tests/scripts/isort.sh $*"
40 | 


--------------------------------------------------------------------------------
/shippers/shipper.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any, Callable, Protocol
 6 | 
 7 | # ReplayHandlerCallable accepts the output type, a dict of arguments for the output and the event to be replayed.
 8 | # It does not return anything.
 9 | ReplayHandlerCallable = Callable[[str, dict[str, Any], dict[str, Any]], None]
10 | 
11 | # EventIdGeneratorCallable accepts a dict of the events as argument. It returns the _id of that event.
12 | EventIdGeneratorCallable = Callable[[dict[str, Any]], str]
13 | 
14 | EVENT_IS_EMPTY = "EVENT_IS_EMPTY"
15 | EVENT_IS_FILTERED = "EVENT_IS_FILTERED"
16 | EVENT_IS_SENT = "EVENT_IS_SENT"
17 | 
18 | 
19 | class ProtocolShipper(Protocol):
20 |     """
21 |     Protocol for Shipper components
22 |     """
23 | 
24 |     def send(self, event: dict[str, Any]) -> str:
25 |         pass  # pragma: no cover
26 | 
27 |     def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None:
28 |         pass  # pragma: no cover
29 | 
30 |     def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None:
31 |         pass  # pragma: no cover
32 | 
33 |     def flush(self) -> None:
34 |         pass  # pragma: no cover
35 | 


--------------------------------------------------------------------------------
/dev-corner/how-to-test-locally/Taskfile.yaml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | env:
 4 |     # Directory to place the dependencies - just internal to this taskfile
 5 |     DIR: dependencies
 6 | 
 7 | dotenv: ['.env']
 8 | 
 9 | tasks:
10 |     default:
11 |         cmds:
12 |             - task: install-requirements
13 |             - task: build-zip-file
14 |             - task: remove-dependencies-dir
15 |             - task: add-to-zip
16 | 
17 |     install-requirements:
18 |         desc: "Install requirements from $REQUIREMENTS."
19 |         internal: true
20 |         requires:
21 |             var: REQUIREMENTS
22 |         cmds:
23 |             - rm -rf $DIR
24 |             - for:
25 |                 var: REQUIREMENTS
26 |                 split: ','
27 |               cmd: pip3.12 install -r ../../{{ .ITEM }} -t $DIR
28 | 
29 |     build-zip-file:
30 |         desc: "Zip $DIR to build $FILENAME."
31 |         internal: true
32 |         cmds:
33 |             - rm -rf $FILENAME
34 |             - cd $DIR && zip -r ../$FILENAME .
35 | 
36 |     remove-dependencies-dir:
37 |         desc: "Delete $DIR."
38 |         internal: true
39 |         cmds:
40 |             - rm -rf $DIR
41 | 
42 |     add-to-zip:
43 |         desc: "Add $DEPENDENCIES to zip file."
44 |         internal: true
45 |         cmds:
46 |             - for:
47 |                 var: DEPENDENCIES
48 |                 split: ','
49 |               cmd: zip -r $FILENAME ../../{{ .ITEM }}
50 | 


--------------------------------------------------------------------------------
/.github/workflows/version-update.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Workflow to check that the version inside share/version.py matches the version in the last entry
 3 | # of CHANGELOG
 4 | name: version-update
 5 | 
 6 | on:
 7 |   pull_request:
 8 |     paths:
 9 |       - 'share/version.py'
10 |       - 'CHANGELOG.md'
11 | 
12 | jobs:
13 | 
14 |   version-increase:
15 |     runs-on: ubuntu-latest
16 |     timeout-minutes: 5
17 | 
18 |     steps:
19 | 
20 |       - uses: actions/checkout@v5
21 | 
22 |       - name: Compare versions in share/version.py and CHANGELOG last entry
23 |         shell: bash
24 |         run: |
25 |           # Get the version inside share/version.py
26 |           version_py=$(grep -oE '[0-9]+\.[0-9]+\.[0-9]+(\-[a-zA-Z]+[0-9]+)?' share/version.py)
27 |           echo "::notice::Version inside share/version.py is $version_py."
28 | 
29 |           # Assumes the first line of the CHANGELOG file follows a format like this: '### v1.17.1 - 2024/09/23'
30 |           # Example:
31 |           # Input: '### v1.17.1 - 2024/09/23'
32 |           # Output: '1.17.1'
33 |           version_changelog=$(awk 'NR==1' CHANGELOG.md | awk '{print substr($2,2)}')
34 |           echo "::notice::Version in CHANGELOG last entry is $version_changelog."
35 | 
36 |           if [ "$version_changelog" !=  "$version_py" ]; then
37 |             error="Versions in share/version.py and CHANGELOG do not match."
38 |             reminder="Make sure CHANGELOG first line follows format '### v<version> - <date>'."
39 |             echo "::error::$error $reminder"
40 |             exit 1
41 |           fi
42 | 


--------------------------------------------------------------------------------
/.github/workflows/test-reporter.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ## Workflow to process the JUnit test results and add a report to the checks.
 3 | name: test-reporter
 4 | on:
 5 |   workflow_run:
 6 |     workflows:
 7 |       - test
 8 |     types:
 9 |       - completed
10 | 
11 | permissions:
12 |   contents: read
13 |   actions: read
14 |   checks: write
15 |   pull-requests: write
16 | 
17 | jobs:
18 |   report:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - uses: elastic/oblt-actions/test-report@v1
22 |         with:
23 |           artifact: /test-results(.*)/     # artifact name pattern
24 |           name: JUnit Tests                 # Name of the check run which will be created
25 |           path: "**/elastic-serverless-forwarder-junit.xml"                  # Path to test results (inside artifact .zip)
26 |           reporter: java-junit              # Format of test results
27 |           output-to: step-summary           # Write summary in the PR
28 | 
29 |   coverage:
30 |     if: ${{ github.event.workflow_run.event == 'pull_request' }}
31 |     runs-on: ubuntu-latest
32 |     steps:
33 |       - uses: actions/download-artifact@v6
34 |         with:
35 |           pattern: test-results*
36 |           merge-multiple: true
37 |           run-id: ${{ github.event.workflow_run.id }}
38 |       - uses: 5monkeys/cobertura-action@ee5787cc56634acddedc51f21c7947985531e6eb
39 |         with:
40 |           path: "**/coverage.xml"
41 |           skip_covered: false
42 |           minimum_coverage: 100
43 |           fail_below_threshold: true
44 |           show_line: true
45 |           show_branch: true
46 |           show_missing: true
47 | 


--------------------------------------------------------------------------------
/share/factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any, Callable
 6 | 
 7 | from .multiline import CountMultiline, PatternMultiline, ProtocolMultiline, WhileMultiline
 8 | 
 9 | _init_definition_by_multiline_type: dict[str, dict[str, Any]] = {
10 |     "count": {
11 |         "class": CountMultiline,
12 |     },
13 |     "pattern": {
14 |         "class": PatternMultiline,
15 |     },
16 |     "while_pattern": {
17 |         "class": WhileMultiline,
18 |     },
19 | }
20 | 
21 | 
22 | class MultilineFactory:
23 |     """
24 |     Multiline factory.
25 |     Provides a static method to instantiate a multiline processor
26 |     """
27 | 
28 |     @staticmethod
29 |     def create(multiline_type: str, **kwargs: Any) -> ProtocolMultiline:
30 |         """
31 |         Instantiates a concrete Multiline processor given a multiline type and args
32 |         """
33 | 
34 |         if multiline_type not in _init_definition_by_multiline_type:
35 |             raise ValueError(
36 |                 "You must provide one of the following multiline types: "
37 |                 + f"{', '.join(_init_definition_by_multiline_type.keys())}. {multiline_type} given"
38 |             )
39 | 
40 |         multiline_definition = _init_definition_by_multiline_type[multiline_type]
41 | 
42 |         multiline_builder: Callable[..., ProtocolMultiline] = multiline_definition["class"]
43 | 
44 |         return multiline_builder(**kwargs)
45 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.ref }}
14 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
15 | 
16 | jobs:
17 |   license:
18 |     runs-on: ubuntu-latest
19 |     timeout-minutes: 5
20 |     steps:
21 |     - uses: actions/checkout@v5
22 |     - run: make license
23 | 
24 |   lint:
25 |     runs-on: ubuntu-latest
26 |     timeout-minutes: 5
27 |     steps:
28 |     - uses: actions/checkout@v5
29 |     - run: make docker-lint
30 | 
31 |   coverage:
32 |     runs-on: ubuntu-latest
33 |     timeout-minutes: 60
34 | 
35 |     steps:
36 | 
37 |     - uses: actions/checkout@v5
38 | 
39 |     - uses: actions/setup-python@v6
40 |       with:
41 |         python-version: '3.12' # As defined in tests/scripts/docker/run_tests.sh
42 |         cache: 'pip'           # caching pip dependencies
43 | 
44 |     - run: make all-requirements
45 | 
46 |     - run: make coverage
47 |       env:
48 |         # See https://github.com/elastic/elastic-serverless-forwarder/pull/280#issuecomment-1461554126
49 |         AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID
50 |         AWS_SECRET_ACCESS_KEY: AWS_SECRET_ACCESS_KEY
51 |         PYTEST_JUNIT: "--junitxml=./elastic-serverless-forwarder-junit.xml"
52 | 
53 |     - name: Store test results
54 |       if: success() || failure()
55 |       uses: actions/upload-artifact@v5
56 |       with:
57 |         name: test-results
58 |         path: |
59 |           **/elastic-serverless-forwarder-junit.xml
60 |           **/coverage.xml
61 | 


--------------------------------------------------------------------------------
/docs/README-AWS.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | The Elastic Serverless Forwarder is an Amazon Web Services (AWS) Lambda function that ships logs from an AWS environment to Elastic.
 4 | 
 5 | Please refer to the official [Elastic documentation for Elastic Serverless Forwarder](https://www.elastic.co/docs/reference/aws-forwarder) for detailed instructions on how to deploy and configure the forwarder.
 6 | 
 7 | ## Overview
 8 | 
 9 | - Amazon S3 (via SQS event notifications)
10 | - Amazon Kinesis Data Streams
11 | - Amazon CloudWatch Logs subscription filters
12 | - Amazon SQS message payload
13 | 
14 | ![Lambda flow](https://github.com/elastic/elastic-serverless-forwarder/raw/lambda-v0.25.0/docs/lambda-flow.png)
15 | 
16 | ## Important - v1.6.0
17 | 
18 | #### Version 1.6.0 introduces a new event ID format which is backwards incompatible with previously indexed events. Be aware that previously indexed events would be duplicated if they trigger the forwarder again after upgrading to this version. More information is available at [our troubleshooting documentation](https://www.elastic.co/guide/en/observability/master/aws-serverless-troubleshooting.html#aws-serverless-troubleshooting-event-id-format).
19 | 
20 | ## Resources and links
21 | 
22 | * [Elastic documentation for Elastic Serverless Forwarder](https://www.elastic.co/docs/reference/aws-forwarder)
23 | * [Elastic documentation for integrations](https://docs.elastic.co/en/integrations)
24 | * [Blog: Elastic and AWS Serverless Application Repository (SAR): Speed time to actionable insights with frictionless log ingestion from Amazon S3](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3)
25 | 


--------------------------------------------------------------------------------
/tests/share/test_json.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from unittest import TestCase
 6 | 
 7 | import pytest
 8 | 
 9 | from share import json_dumper, json_parser
10 | 
11 | 
12 | @pytest.mark.unit
13 | class TestJsonParser(TestCase):
14 |     def test_json_parser(self) -> None:
15 |         with self.subTest("loads raises"):
16 |             with self.assertRaises(Exception):
17 |                 json_parser("[")
18 | 
19 |             with self.subTest("loads array"):
20 |                 loaded = json_parser("[1, 2, 3]")
21 |                 assert [1, 2, 3] == loaded
22 | 
23 |             with self.subTest("loads dict"):
24 |                 loaded = json_parser('{"key":"value"}')
25 |                 assert {"key": "value"} == loaded
26 | 
27 |             with self.subTest("loads scalar"):
28 |                 loaded = json_parser('"a string"')
29 |                 assert "a string" == loaded
30 | 
31 | 
32 | @pytest.mark.unit
33 | class TestJsonDumper(TestCase):
34 |     def test_json_dumper(self) -> None:
35 |         with self.subTest("dumps raises"):
36 |             with self.assertRaises(Exception):
37 |                 json_dumper(set())
38 | 
39 |         with self.subTest("dumps bytes"):
40 |             dumped = json_dumper(b"bytes")
41 |             assert '"bytes"' == dumped
42 | 
43 |         with self.subTest("dumps str"):
44 |             dumped = json_dumper("string")
45 |             assert '"string"' == dumped
46 | 
47 |         with self.subTest("dumps dict"):
48 |             dumped = json_dumper({"key": "value"})
49 |             assert '{"key":"value"}' == dumped
50 | 


--------------------------------------------------------------------------------
/tests/shippers/ssl/localhost.pkcs8.key:
--------------------------------------------------------------------------------
 1 | -----BEGIN PRIVATE KEY-----
 2 | MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDG+CcsWAwO6BRc
 3 | Yth7GhjrNE4vQdSRVXTsV9u52xjlCtygJalAymLwmRn6w62cQhSFZ3Tjn7v6Smt7
 4 | IPiPJ2mJ7dn+wiwW4GHy+i5YtBTNQC+zEFBMRkTvxd1qZARJQjuSNjRaqDx2B8JN
 5 | JsHyJzBSUm+uOcBWLz6si7KoyuOC9SLbmwRO7UKdfgTW9jpiWKnqwl9R/g2it2sx
 6 | 9yCV9x5w9eZh33HDJvTwSzeO5bK/whO4BlG9BOMqRbvRuobApa4sHRzSKibztpXr
 7 | H+mSWmMA6uRuaKTYo3jdN9lJoU+DQBSZVA0/v9St2YMzfHESPbxLVrtlGdC874mm
 8 | A1Ycv9AlAgMBAAECggEAFGyx3L/3EOqq+GbfMRfXOgHHCtvXncmndIF6WURP9Gce
 9 | icQD8VY1PNyCibbfkfxIjf82RQ2HJuTPzrZuPYe+sj171MaK7xg+hco/yrRFtGIj
10 | 415H1+SYvAJUPdEhjYGiitpMmofNmRIn1HKnkxuJvtGejER6ZX55EQ5QGzqkizM/
11 | dfBxK7VirLD48WyOpbni4paow9155BVSp8gyt0gKyfDWwojmwFJhAcJk79HoPRvG
12 | 450lhzPt2Twdzv5r9FbRL93pA0OnWrY5VoEAlKRxIeDr/pHm3anTJpzd3gHbAkXF
13 | Yk5+8wy2h5BqbyVDhrST8unWhdTwDuS+EfFXY5FwcQKBgQDmQm2OZ09nbkb8iv46
14 | VMp37WoFrevwvqFnN9rdgdPPdaJgrxO/86XltqZtlNhQQokTqxHtUT7/7EQh23P/
15 | Yo8whccIMCd/2BmKK/vaK8b/lt0y5hRDepQkJd9Yy99sZCZIq+GqdqDdyl3KUF5n
16 | v/9uqkd/GPGaeg33PxrqccmH2QKBgQDdNkNbEmLDwpGXx1X4A1ZFwk33opPYEf0Q
17 | F8JTCuZ6fuhqRGYBtn+et4apqWylyO2UZ0pi2PeQR+ua8nsBaW+vfeumzlEElvCD
18 | Qv/x4qB8sCnlOj/QtRRNyv/SGaatySEAaxoEghbbLPcbvvWsxzHQ6VlOxAF6ei3t
19 | F/1k4PwHLQKBgQCdXABrNYc6dE7nAEZFrvS/iOzP/NAlsuGzHKTihHswaIAT+xJQ
20 | OD7EAlMyyGocT5xGaL7TpJadZ+YYDVX0znJ1ArfGjMIxyImLuAtiSlCxE3UP99UZ
21 | WIgtPASrNoj2FmtjdrO+P1wotsfqH4qk9L2n7470+SMEIy2wLtxCJIlJ4QKBgQCU
22 | vh7uRt+YJ+VD/GoG+R9yiqNoZq4otHfH8WHd+s9dAKaAhftdHXyUmWz3+g8vLnrp
23 | tcZjzuYv4tw+dNtW6LGLfA0PPV1my6Nvb4av+6XUEZQqKU+to4TChkQb3tmfs0T4
24 | hguZimuZ9pM12eJRyiLqCW3es5cW5r6o1N+yjEGLBQKBgQDhgIvw+Ug2KORKND3C
25 | 7d7WlujRnGgUBygR4dLl//xfu9wrE9aSO+auCn4AXfukeeNn+a7V1Nh8j2Fsw9dF
26 | HomK3swZO4ab/DKDelNOHd0zyyjdFxJeSWg8CXDv5j1WR+Soh7Xqv+sO4RtQWNXQ
27 | nMfcUb52goDx+rT5ZPmjOeucSA==
28 | -----END PRIVATE KEY-----
29 | 


--------------------------------------------------------------------------------
/tests/scripts/license_headers_check.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | if [[ $# -eq 0 ]]
 7 | then
 8 |     echo "Usage: $0 check|fix"
 9 |     exit 1
10 | fi
11 | 
12 | FILES=$(find . \( -iname "*.py" -or -iname "*.sh" \) -not -path "./venv/*")
13 | for FILE in $FILES
14 | do
15 |     MISSING=$(grep --files-without-match "Licensed under the Elastic License 2.0" "$FILE")
16 |     if [[ -n "$MISSING" ]]
17 |     then
18 |         if [[ "$1" = "fix" ]]
19 |         then
20 |             echo fix "$FILE"
21 |             TMPFILE=$(mktemp /tmp/license.XXXXXXXXXX)
22 |             if [[ "$FILE" == *".sh" && $(grep "#!/usr/bin/env bash" "$FILE") ]]
23 |             then
24 |                 cat <<EOF > "$TMPFILE"
25 | #!/usr/bin/env bash
26 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
27 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
28 | # you may not use this file except in compliance with the Elastic License 2.0.
29 | EOF
30 |             tail -n +2 "$FILE" >> "$TMPFILE"
31 |             mv "$TMPFILE" "$FILE"
32 |             chmod 755 "$FILE"
33 |             else
34 |                 cat <<EOF > "$TMPFILE"
35 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
36 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
37 | # you may not use this file except in compliance with the Elastic License 2.0.
38 | 
39 | EOF
40 |                 cat "$FILE" >> "$TMPFILE"
41 |                 mv "$TMPFILE" "$FILE"
42 |             fi
43 |         else
44 |             echo "File with missing copyright header:"
45 |             echo "$MISSING"
46 |             exit 1
47 |         fi
48 |     fi
49 | done
50 | 


--------------------------------------------------------------------------------
/tests/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTHON_IMAGE
 2 | FROM ${PYTHON_IMAGE}
 3 | 
 4 | RUN apt-get -qq update && apt-get -qq -y --no-install-recommends install \
 5 |     ca-certificates \
 6 |     curl \
 7 |     docker.io \
 8 |     libsasl2-dev \
 9 |     sudo \
10 |     zip && \
11 |     rm -rf /var/lib/apt/lists/*
12 | 
13 | ARG UID={${UID}:-1001}
14 | ENV USER_ID=${LOCAL_USER_ID:-${UID}}
15 | 
16 | RUN echo '{"storage-driver": "vfs"}' > /etc/docker/daemon.json
17 | 
18 | # setup user
19 | RUN useradd --shell /bin/bash -u $USER_ID --gid 0 --non-unique --comment "" --create-home user
20 | RUN usermod -a -G sudo user
21 | RUN usermod -a -G docker user
22 | RUN echo "user ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/user
23 | 
24 | 
25 | # connection to ha.pool.sks-keyservers.net fails sometimes, so let's retry with couple different servers
26 | RUN for server in $(shuf -e ha.pool.sks-keyservers.net \
27 |                             hkp://p80.pool.sks-keyservers.net:80 \
28 |                             keyserver.ubuntu.com \
29 |                             hkp://keyserver.ubuntu.com:80 \
30 |                             pgp.mit.edu) ; do gpg --no-tty --keyserver "$server" --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 && s=0 && break || s=$?; done; (exit $s)
31 | 
32 | RUN curl -o /usr/local/bin/gosu -sSL "https://github.com/tianon/gosu/releases/download/1.14/gosu-$(dpkg --print-architecture)" \
33 |     && curl -o /usr/local/bin/gosu.asc -sSL "https://github.com/tianon/gosu/releases/download/1.14/gosu-$(dpkg --print-architecture).asc" \
34 |     && gpg --verify /usr/local/bin/gosu.asc \
35 |     && rm /usr/local/bin/gosu.asc \
36 |     && chmod +x /usr/local/bin/gosu
37 | 
38 | COPY tests/entrypoint.sh /usr/local/bin/entrypoint.sh
39 | 
40 | COPY requirements.txt /
41 | 
42 | # if we're in a pypy image, link pypy/pypy3 to /usr/local/bin/python
43 | RUN if command -v pypy3; then ln -s $(command -v pypy3) /usr/local/bin/python; elif command -v pypy; then ln -s $(command -v pypy) /usr/local/bin/python; fi
44 | 
45 | RUN chmod +x /usr/local/bin/entrypoint.sh
46 | 
47 | WORKDIR /app
48 | 
49 | ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
50 | 


--------------------------------------------------------------------------------
/tests/share/test_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from unittest import TestCase
 6 | 
 7 | import pytest
 8 | 
 9 | from share import CountMultiline, MultilineFactory, PatternMultiline, WhileMultiline
10 | 
11 | 
12 | @pytest.mark.unit
13 | class TestMultilineFactory(TestCase):
14 |     def test_create(self) -> None:
15 |         with self.subTest("create count multiline success"):
16 |             multiline = MultilineFactory.create(multiline_type="count", count_lines=1)
17 | 
18 |             assert isinstance(multiline, CountMultiline)
19 | 
20 |         with self.subTest("create count multiline error"):
21 |             with self.assertRaises(TypeError):
22 |                 MultilineFactory.create(multiline_type="count")
23 | 
24 |         with self.subTest("create pattern multiline success"):
25 |             multiline = MultilineFactory.create(multiline_type="pattern", pattern=".+", match="after")
26 | 
27 |             assert isinstance(multiline, PatternMultiline)
28 | 
29 |         with self.subTest("create pattern multiline error"):
30 |             with self.assertRaises(TypeError):
31 |                 MultilineFactory.create(multiline_type="pattern")
32 | 
33 |         with self.subTest("create while_pattern multiline success"):
34 |             multiline = MultilineFactory.create(multiline_type="while_pattern", pattern=".+")
35 | 
36 |             assert isinstance(multiline, WhileMultiline)
37 | 
38 |         with self.subTest("create while_pattern multiline error"):
39 |             with self.assertRaises(TypeError):
40 |                 MultilineFactory.create(multiline_type="while_pattern")
41 | 
42 |         with self.subTest("create invalid type"):
43 |             with self.assertRaisesRegex(
44 |                 ValueError,
45 |                 "^You must provide one of the following multiline types: "
46 |                 "count, pattern, while_pattern. invalid type given$",
47 |             ):
48 |                 MultilineFactory.create(multiline_type="invalid type")
49 | 


--------------------------------------------------------------------------------
/storage/factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any, Callable, Optional
 6 | 
 7 | from share import ExpandEventListFromField, ProtocolMultiline, json_dumper
 8 | 
 9 | from .payload import PayloadStorage
10 | from .s3 import S3Storage
11 | from .storage import ProtocolStorage
12 | 
13 | _init_definition_by_storage_type: dict[str, dict[str, Any]] = {
14 |     "s3": {"class": S3Storage, "kwargs": ["bucket_name", "object_key"]},
15 |     "payload": {"class": PayloadStorage, "kwargs": ["payload"]},
16 | }
17 | 
18 | 
19 | class StorageFactory:
20 |     """
21 |     Storage factory.
22 |     Provides static methods to instantiate a Storage
23 |     """
24 | 
25 |     @staticmethod
26 |     def create(
27 |         storage_type: str,
28 |         json_content_type: Optional[str] = None,
29 |         event_list_from_field_expander: Optional[ExpandEventListFromField] = None,
30 |         multiline_processor: Optional[ProtocolMultiline] = None,
31 |         **kwargs: Any,
32 |     ) -> ProtocolStorage:
33 |         """
34 |         Instantiates a concrete Storage given its type and the storage init kwargs
35 |         """
36 | 
37 |         if storage_type not in _init_definition_by_storage_type:
38 |             raise ValueError(
39 |                 "You must provide one of the following storage types: "
40 |                 + f"{', '.join(_init_definition_by_storage_type.keys())}"
41 |             )
42 | 
43 |         storage_definition = _init_definition_by_storage_type[storage_type]
44 |         storage_kwargs = storage_definition["kwargs"]
45 | 
46 |         init_kwargs: list[str] = [key for key in kwargs.keys() if key in storage_kwargs and kwargs[key]]
47 |         if len(init_kwargs) != len(storage_kwargs):
48 |             raise ValueError(
49 |                 f"You must provide the following not empty init kwargs for {storage_type}: "
50 |                 + f"{', '.join(storage_kwargs)}. (provided: {json_dumper(kwargs)})"
51 |             )
52 | 
53 |         kwargs["json_content_type"] = json_content_type
54 |         kwargs["multiline_processor"] = multiline_processor
55 |         kwargs["event_list_from_field_expander"] = event_list_from_field_expander
56 | 
57 |         storage_builder: Callable[..., ProtocolStorage] = storage_definition["class"]
58 |         return storage_builder(**kwargs)
59 | 


--------------------------------------------------------------------------------
/.github/workflows/create-tag.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Workflow to create a new git tag if version.py variable version gets updated
 3 | name: create-tag
 4 | 
 5 | permissions:
 6 |   contents: write # write permission is required to create a GitHub release
 7 | 
 8 | on:
 9 |   push:
10 |     branches:
11 |       - 'main'
12 |     paths:
13 |       - 'share/version.py'
14 | 
15 | jobs:
16 | 
17 |   create-tag:
18 |     runs-on: ubuntu-latest
19 |     timeout-minutes: 5
20 | 
21 |     steps:
22 | 
23 |       - uses: actions/checkout@v5
24 | 
25 |       - name: Get version number
26 |         shell: bash
27 |         run: |
28 |           VERSION=$(grep -oE '[0-9]+\.[0-9]+\.[0-9]+(\-[a-zA-Z]+[0-9]+)?' share/version.py)
29 |           echo "VERSION=${VERSION}" >> $GITHUB_ENV
30 |           echo "::notice::ESF version is $VERSION."
31 | 
32 |       - name: Check if version increased
33 |         id: version
34 |         shell: bash
35 |         run: |
36 |           git fetch --tags
37 | 
38 |           # We will list all tags sorted by the version after the prefix lambda-v.
39 |           # We retrieve only the first line - that is, the most recent version.
40 |           # After that, we remove the prefix to only get the version number.
41 |           old_version=$(git tag --list --sort=-version:refname "lambda-v*" | awk 'NR==1{print $1}' | awk -F"lambda-v" '{ print $NF }')
42 | 
43 |           # We now need to compare the current version inside version.py.
44 |           IFS='.' read -a new_numbers <<< ${{ env.VERSION }}
45 |           IFS='.' read -a old_numbers <<< $old_version
46 | 
47 |           CREATE_TAG=false # only create tag if version increased
48 |           for i in 0 1 2
49 |           do
50 |             if [[ ${new_numbers[i]} > ${old_numbers[i]} ]]
51 |             then
52 |               CREATE_TAG=true
53 |               break
54 |             elif [[ ${new_numbers[i]} < ${old_numbers[i]} ]]
55 |             then
56 |               break
57 |             fi
58 |           done
59 | 
60 |           echo "CREATE_TAG=${CREATE_TAG}" >> $GITHUB_ENV
61 |           echo "::notice::Latest version is $old_version."
62 |           echo "::notice::Current version is ${{ env.VERSION }}."
63 |           echo "::notice::The result for creating tag is $CREATE_TAG."
64 | 
65 |       - name: Create tag
66 |         if: env.CREATE_TAG == 'true' # run only in case CREATE_TAG is true
67 |         uses: actions/github-script@v8
68 |         with:
69 |           script: |
70 |             github.rest.git.createRef({
71 |               owner: context.repo.owner,
72 |               repo: context.repo.repo,
73 |               ref: 'refs/tags/lambda-v' + "${{ env.VERSION }}",
74 |               sha: context.sha
75 |             })
76 | 


--------------------------------------------------------------------------------
/dev-corner/how-to-test-locally/README.md:
--------------------------------------------------------------------------------
 1 | This document contains details about how to build ESF Lambda locally.
 2 | Once built, the Lambda can be deployed to validate functionality.
 3 | 
 4 | ## Building lambda
 5 | 
 6 | To build the Lambda, you may use one of the options below,
 7 | 
 8 | ### Using Makefile
 9 | 
10 | To build,
11 | 
12 | ```shell
13 | make package
14 | ```
15 | 
16 | This will generate a Lambda zip named `local_esf.zip`.
17 | 
18 | To clean up any leftover resources,
19 | 
20 | ```shell
21 | make clean
22 | ```
23 | 
24 | ### Using Task file
25 | 
26 | #### Requirements
27 | 
28 | - [Terraform](https://www.terraform.io/)
29 | - (Optional) [Taskfile](https://taskfile.dev/installation/)
30 | 
31 | 
32 | #### Building
33 | 
34 | **Important note**: ESF dependencies have been tested on architecture `x86_64`. Make sure to use it as well.
35 | 
36 | You can build your own, or you can choose to run:
37 | ```bash
38 | task
39 | ```
40 | To build it automatically.
41 | 
42 | You can update the task variables in the `.env` file:
43 | - The list of python dependencies, `DEPENDENCIES`.
44 | - The list of python requirement files, `REQUIREMENTS`.
45 | - The name of the zip file, `FILENAME`.
46 | 
47 | 
48 | ## Deploying Lambda
49 | 
50 | Once Lambda zip is ready, you should use the code in [ESF terraform repository](https://github.com/elastic/terraform-elastic-esf).
51 | 
52 | > **NOTE**: ESF lambda function is using architecture `x86_64`.
53 | 
54 | Place your `local_esf.zip` (or `<FILENAME>` if you changed the value) in the same directory as ESF terraform.
55 | 
56 | Go to `esf.tf` file and edit:
57 | 
58 | ```terraform
59 | locals {
60 |   ...
61 |   dependencies-file = "local_esf.zip" # value of FILENAME in .env
62 |   ...
63 | }
64 | ```
65 | 
66 | Remove/comment these lines from `esf.tf` file:
67 | 
68 | ```terraform
69 | #resource "terraform_data" "curl-dependencies-zip" {
70 | #  provisioner "local-exec" {
71 | #    command = "curl -L -O ${local.dependencies-bucket-url}/${local.dependencies-file}"
72 | #  }
73 | #}
74 | ```
75 | 
76 | And fix the now missing dependency in `dependencies-file`:
77 | 
78 | ```terraform
79 | resource "aws_s3_object" "dependencies-file" {
80 |   bucket = local.config-bucket-name
81 |   key    = local.dependencies-file
82 |   source = local.dependencies-file
83 | 
84 |   depends_on = [aws_s3_bucket.esf-config-bucket] #, terraform_data.curl-dependencies-zip]
85 | }
86 | ```
87 | 
88 | Now follow the README file from [ESF terraform repository](https://github.com/elastic/terraform-elastic-esf) on how to configure the remaining necessary variables. You will have to configure `release-version` variable, but it will not be relevant to this. You can set any value you want for it.
89 | 
90 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- Type of change
 2 | Please label this PR with one of the following labels, depending on the scope of your change:
 3 | - Bug
 4 | - Enhancement
 5 | - Breaking change
 6 | - Deprecation
 7 | - Cleanup
 8 | - Docs
 9 | -->
10 | 
11 | ## What does this PR do?
12 | 
13 | <!-- Mandatory
14 | Explain here the changes you made on the PR. Please explain the WHAT: patterns used, algorithms implemented, design architecture, message processing, etc.
15 | -->
16 | 
17 | ## Why is it important?
18 | 
19 | <!-- Mandatory
20 | Explain here the WHY, or the rationale/motivation for the changes.
21 | -->
22 | 
23 | ## Checklist
24 | 
25 | <!-- Mandatory
26 | Add a checklist of things that are required to be reviewed in order to have the PR approved
27 | 
28 | List here all the items you have verified BEFORE sending this PR. Please DO NOT remove any item, striking through those that do not apply. (Just in case, strikethrough uses two tildes. ~~Scratch this.~~)
29 | -->
30 | 
31 | - [ ] My code follows the style guidelines of this project
32 | - [ ] I have commented my code, particularly in hard-to-understand areas
33 | - [ ] I have made corresponding changes to the documentation
34 | - [ ] I have made corresponding change to the default configuration files
35 | - [ ] I have added tests that prove my fix is effective or that my feature works
36 | - [ ] I have added an entry in `CHANGELOG.md` and updated `share/version.py`, if my change requires a new release.
37 | 
38 | ## Author's Checklist
39 | 
40 | <!-- Recommended
41 | Add a checklist of things that are required to be reviewed in order to have the PR approved
42 | -->
43 | - [ ]
44 | 
45 | ## How to test this PR locally
46 | 
47 | <!-- Recommended
48 | Explain here how this PR will be tested by the reviewer: commands, dependencies, steps, etc.
49 | -->
50 | 
51 | ## Related issues
52 | 
53 | <!-- Recommended
54 | Link related issues below. Insert the issue link or reference after the word "Closes" if merging this should automatically close it.
55 | 
56 | - Closes #123
57 | - Relates #123
58 | - Requires #123
59 | - Superseds #123
60 | -->
61 | -
62 | 
63 | ## Use cases
64 | 
65 | <!-- Recommended
66 | Explain here the different behaviors that this PR introduces or modifies in this project, user roles, environment configuration, etc.
67 | 
68 | If you are familiar with Gherkin test scenarios, we recommend its usage: https://cucumber.io/docs/gherkin/reference/
69 | -->
70 | 
71 | ## Screenshots
72 | 
73 | <!-- Optional
74 | Add here screenshots about how the project will be changed after the PR is applied. They could be related to web pages, terminal, etc, or any other image you consider important to be shared with the team.
75 | -->
76 | 
77 | ## Logs
78 | 
79 | <!-- Recommended
80 | Paste here output logs discovered while creating this PR, such as stack traces or integration logs, or any other output you consider important to be shared with the team.
81 | -->
82 | 


--------------------------------------------------------------------------------
/tests/share/test_include_exclude.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from __future__ import annotations
 6 | 
 7 | from unittest import TestCase
 8 | 
 9 | import pytest
10 | 
11 | from share import IncludeExcludeFilter, IncludeExcludeRule
12 | 
13 | _message = "a message"
14 | 
15 | 
16 | @pytest.mark.unit
17 | class TestIncludeExclude(TestCase):
18 |     def test_include_exclude(self) -> None:
19 |         with self.subTest("no rules"):
20 |             include_exclude_filter = IncludeExcludeFilter()
21 |             assert include_exclude_filter.filter(_message) is True
22 | 
23 |         with self.subTest("exclude rule match"):
24 |             include_exclude_filter = IncludeExcludeFilter(exclude_patterns=[IncludeExcludeRule(pattern="message")])
25 |             assert include_exclude_filter.filter(_message) is False
26 | 
27 |         with self.subTest("exclude rule not match"):
28 |             include_exclude_filter = IncludeExcludeFilter(exclude_patterns=[IncludeExcludeRule(pattern="not matching")])
29 |             assert include_exclude_filter.filter(_message) is True
30 | 
31 |         with self.subTest("include rule match"):
32 |             include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="message")])
33 |             assert include_exclude_filter.filter(_message) is True
34 | 
35 |         with self.subTest("include rule not match"):
36 |             include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="not matching")])
37 |             assert include_exclude_filter.filter(_message) is False
38 | 
39 |         with self.subTest("both rules exclude priority"):
40 |             include_exclude_filter = IncludeExcludeFilter(
41 |                 include_patterns=[IncludeExcludeRule(pattern="message")],
42 |                 exclude_patterns=[IncludeExcludeRule(pattern="message")],
43 |             )
44 |             assert include_exclude_filter.filter(_message) is False
45 | 
46 |         with self.subTest("both rules include match"):
47 |             include_exclude_filter = IncludeExcludeFilter(
48 |                 include_patterns=[IncludeExcludeRule(pattern="message")],
49 |                 exclude_patterns=[IncludeExcludeRule(pattern="not matching")],
50 |             )
51 |             assert include_exclude_filter.filter(_message) is True
52 | 
53 |         with self.subTest("both rules no match"):
54 |             include_exclude_filter = IncludeExcludeFilter(
55 |                 include_patterns=[IncludeExcludeRule(pattern="not matching")],
56 |                 exclude_patterns=[IncludeExcludeRule(pattern="not matching")],
57 |             )
58 |             assert include_exclude_filter.filter(_message) is False
59 | 


--------------------------------------------------------------------------------
/shippers/composite.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from copy import deepcopy
 6 | from typing import Any, Optional
 7 | 
 8 | from share import IncludeExcludeFilter, shared_logger
 9 | 
10 | from .shipper import (
11 |     EVENT_IS_EMPTY,
12 |     EVENT_IS_FILTERED,
13 |     EVENT_IS_SENT,
14 |     EventIdGeneratorCallable,
15 |     ProtocolShipper,
16 |     ReplayHandlerCallable,
17 | )
18 | 
19 | 
20 | class CompositeShipper:
21 |     """
22 |     Composite Shipper.
23 |     This class implements composite pattern for shippers
24 |     """
25 | 
26 |     def __init__(self, **kwargs: Any):
27 |         self._shippers: list[ProtocolShipper] = []
28 |         self._include_exclude_filter: Optional[IncludeExcludeFilter] = None
29 | 
30 |     def add_include_exclude_filter(self, include_exclude_filter: Optional[IncludeExcludeFilter]) -> None:
31 |         """
32 |         IncludeExcludeFilter setter.
33 |         Add an includeExcludeFilter to the composite
34 |         """
35 |         self._include_exclude_filter = include_exclude_filter
36 | 
37 |     def add_shipper(self, shipper: ProtocolShipper) -> None:
38 |         """
39 |         Shipper setter.
40 |         Add a shipper to the composite
41 |         """
42 |         self._shippers.append(shipper)
43 | 
44 |     def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None:
45 |         for shipper in self._shippers:
46 |             shipper.set_event_id_generator(event_id_generator=event_id_generator)
47 | 
48 |     def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None:
49 |         for shipper in self._shippers:
50 |             shipper.set_replay_handler(replay_handler=replay_handler)
51 | 
52 |     def send(self, event: dict[str, Any]) -> str:
53 |         message: str = ""
54 |         if "fields" in event and "message" in event["fields"]:
55 |             message = event["fields"]["message"]
56 |         elif "message" in event:
57 |             message = event["message"]
58 | 
59 |         if len(message.strip()) == 0:
60 |             shared_logger.debug("event is empty: message is zero length")
61 |             return EVENT_IS_EMPTY
62 | 
63 |         if self._include_exclude_filter is not None and not self._include_exclude_filter.filter(message):
64 |             shared_logger.debug("event is filtered according to filter rules")
65 |             return EVENT_IS_FILTERED
66 | 
67 |         for shipper in self._shippers:
68 |             # dict are mutated if not deep copied, every shipper can mutate the
69 |             # events it receives without affecting the events of other shippers
70 |             sent_event = deepcopy(event)
71 |             shipper.send(sent_event)
72 | 
73 |         return EVENT_IS_SENT
74 | 
75 |     def flush(self) -> None:
76 |         for shipper in self._shippers:
77 |             shipper.flush()
78 | 


--------------------------------------------------------------------------------
/tests/storage/test_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | import re
 6 | from unittest import TestCase
 7 | 
 8 | import pytest
 9 | 
10 | from storage import PayloadStorage, S3Storage, StorageFactory
11 | 
12 | 
13 | @pytest.mark.unit
14 | class TestStorageFactory(TestCase):
15 |     def test_create(self) -> None:
16 |         with self.subTest("create s3 storage success"):
17 |             storage = StorageFactory.create(storage_type="s3", bucket_name="bucket_name", object_key="object_key")
18 | 
19 |             assert isinstance(storage, S3Storage)
20 | 
21 |         with self.subTest("create s3 storage error"):
22 |             with self.assertRaisesRegex(
23 |                 ValueError,
24 |                 re.escape(
25 |                     "You must provide the following not empty init kwargs for"
26 |                     + " s3: bucket_name, object_key. (provided: {})"
27 |                 ),
28 |             ):
29 |                 StorageFactory.create(storage_type="s3")
30 | 
31 |         with self.subTest("create s3 storage empty kwargs"):
32 |             with self.assertRaisesRegex(
33 |                 ValueError,
34 |                 re.escape(
35 |                     "You must provide the following not empty init kwargs for s3: bucket_name, object_key."
36 |                     + ' (provided: {"bucket_name":"","object_key":""})'
37 |                 ),
38 |             ):
39 |                 StorageFactory.create(storage_type="s3", bucket_name="", object_key="")
40 | 
41 |         with self.subTest("create payload storage success"):
42 |             storage = StorageFactory.create(storage_type="payload", payload="payload")
43 | 
44 |             assert isinstance(storage, PayloadStorage)
45 | 
46 |         with self.subTest("create payload storage error"):
47 |             with self.assertRaisesRegex(
48 |                 ValueError,
49 |                 re.escape(
50 |                     "You must provide the following not empty init kwargs for" + " payload: payload. (provided: {})"
51 |                 ),
52 |             ):
53 |                 StorageFactory.create(storage_type="payload")
54 | 
55 |         with self.subTest("create payload storage empty kwargs"):
56 |             with self.assertRaisesRegex(
57 |                 ValueError,
58 |                 re.escape(
59 |                     "You must provide the following not empty init kwargs for payload: payload."
60 |                     + ' (provided: {"payload":""})'
61 |                 ),
62 |             ):
63 |                 StorageFactory.create(storage_type="payload", payload="")
64 | 
65 |         with self.subTest("create invalid type"):
66 |             with self.assertRaisesRegex(
67 |                 ValueError, "^You must provide one of the following storage types: s3, payload$"
68 |             ):
69 |                 StorageFactory.create(storage_type="invalid type")
70 | 


--------------------------------------------------------------------------------
/storage/storage.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from abc import ABCMeta
 6 | from io import BytesIO
 7 | from typing import Any, Callable, Iterator, Optional, Protocol, TypeVar, Union
 8 | 
 9 | from typing_extensions import TypeAlias
10 | 
11 | from share import ExpandEventListFromField, ProtocolMultiline
12 | 
13 | # CHUNK_SIZE is how much we read from the gzip stream at every iteration in the inflate decorator
14 | # BEWARE, this CHUNK_SIZE has a huge impact on performance, contrary to what we stated here:
15 | # https://github.com/elastic/elastic-serverless-forwarder/pull/11#discussion_r732587976
16 | # Reinstating to 1M from 1K resulted on 6.2M gzip of 35.1 of inflated content
17 | # to be ingested in 45 secs instead of having the lambda timing out
18 | CHUNK_SIZE: int = 1024**2
19 | 
20 | 
21 | def is_gzip_content(content: bytes) -> bool:
22 |     return content.startswith(b"\037\213")  # gzip compression method
23 | 
24 | 
25 | class StorageReader:
26 |     """
27 |     StorageReader is an interface for contents returned by storage.
28 |     It wraps the underlying type and forward to it
29 |     """
30 | 
31 |     def __init__(self, raw: Any):
32 |         self._raw = raw
33 | 
34 |     def __getattr__(self, item: str) -> Any:
35 |         return getattr(self._raw, item)
36 | 
37 | 
38 | # GetByLinesIterator yields a tuple of content, starting offset, ending offset
39 | # and optional offset of a list of expanded events
40 | GetByLinesIterator: TypeAlias = Iterator[tuple[bytes, int, int, Optional[int]]]
41 | 
42 | 
43 | class ProtocolStorage(Protocol):
44 |     """
45 |     Protocol for Storage components
46 |     """
47 | 
48 |     json_content_type: Optional[str]
49 |     multiline_processor: Optional[ProtocolMultiline]
50 |     event_list_from_field_expander: Optional[ExpandEventListFromField]
51 | 
52 |     def get_by_lines(self, range_start: int) -> GetByLinesIterator:
53 |         pass  # pragma: no cover
54 | 
55 |     def get_as_string(self) -> str:
56 |         pass  # pragma: no cover
57 | 
58 | 
59 | class CommonStorage(metaclass=ABCMeta):
60 |     """
61 |     Common class for Storage components
62 |     """
63 | 
64 |     json_content_type: Optional[str] = None
65 |     multiline_processor: Optional[ProtocolMultiline] = None
66 |     event_list_from_field_expander: Optional[ExpandEventListFromField] = None
67 | 
68 | 
69 | ProtocolStorageType = TypeVar("ProtocolStorageType", bound=ProtocolStorage)
70 | 
71 | # StorageDecoratorIterator yields a tuple of content (expressed as `StorageReader` or bytes), starting offset,
72 | # ending offset, newline and optional offset of a list of expanded events
73 | StorageDecoratorIterator: TypeAlias = Iterator[tuple[Union[StorageReader, bytes], int, int, bytes, Optional[int]]]
74 | 
75 | # StorageDecoratorCallable accepts a `ProtocolStorageType`, the range start offset, the content as BytesIO and a boolean
76 | # flag indicating if the content is gzipped as arguments. It returns a `StorageDecoratorIterator`
77 | StorageDecoratorCallable = Callable[[ProtocolStorageType, int, BytesIO, bool], StorageDecoratorIterator]
78 | 


--------------------------------------------------------------------------------
/shippers/factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any, Callable
 6 | 
 7 | from share.config import ElasticsearchOutput, LogstashOutput, Output
 8 | 
 9 | from .es import ElasticsearchShipper
10 | from .logstash import LogstashShipper
11 | from .shipper import ProtocolShipper
12 | 
13 | _init_definition_by_output: dict[str, dict[str, Any]] = {
14 |     "elasticsearch": {
15 |         "class": ElasticsearchShipper,
16 |     },
17 |     "logstash": {
18 |         "class": LogstashShipper,
19 |     },
20 | }
21 | 
22 | 
23 | class ShipperFactory:
24 |     """
25 |     Shipper factory.
26 |     Provides static methods to instantiate a shipper
27 |     """
28 | 
29 |     @staticmethod
30 |     def create_from_output(output_type: str, output: Output) -> ProtocolShipper:
31 |         """
32 |         Instantiates a concrete Shipper given an output type and an Output instance
33 |         """
34 | 
35 |         if output_type == "elasticsearch":
36 |             if not isinstance(output, ElasticsearchOutput):
37 |                 raise ValueError(f"output expected to be ElasticsearchOutput type, given {type(output)}")
38 | 
39 |             return ShipperFactory.create(
40 |                 output_type="elasticsearch",
41 |                 elasticsearch_url=output.elasticsearch_url,
42 |                 username=output.username,
43 |                 password=output.password,
44 |                 cloud_id=output.cloud_id,
45 |                 api_key=output.api_key,
46 |                 es_datastream_name=output.es_datastream_name,
47 |                 tags=output.tags,
48 |                 batch_max_actions=output.batch_max_actions,
49 |                 batch_max_bytes=output.batch_max_bytes,
50 |                 ssl_assert_fingerprint=output.ssl_assert_fingerprint,
51 |                 es_dead_letter_index=output.es_dead_letter_index,
52 |             )
53 | 
54 |         if output_type == "logstash":
55 |             if not isinstance(output, LogstashOutput):
56 |                 raise ValueError(f"output expected to be LogstashOutput type, given {type(output)}")
57 | 
58 |             return ShipperFactory.create(
59 |                 output_type="logstash",
60 |                 logstash_url=output.logstash_url,
61 |                 username=output.username,
62 |                 password=output.password,
63 |                 max_batch_size=output.max_batch_size,
64 |                 compression_level=output.compression_level,
65 |                 ssl_assert_fingerprint=output.ssl_assert_fingerprint,
66 |                 tags=output.tags,
67 |             )
68 | 
69 |         raise ValueError(
70 |             f"You must provide one of the following outputs: " f"{', '.join(_init_definition_by_output.keys())}"
71 |         )
72 | 
73 |     @staticmethod
74 |     def create(output_type: str, **kwargs: Any) -> ProtocolShipper:
75 |         """
76 |         Instantiates a concrete Shipper given an output type and the shipper init kwargs
77 |         """
78 | 
79 |         if output_type not in _init_definition_by_output:
80 |             raise ValueError(
81 |                 f"You must provide one of the following outputs: " f"{', '.join(_init_definition_by_output.keys())}"
82 |             )
83 | 
84 |         output_definition = _init_definition_by_output[output_type]
85 | 
86 |         output_builder: Callable[..., ProtocolShipper] = output_definition["class"]
87 | 
88 |         return output_builder(**kwargs)
89 | 


--------------------------------------------------------------------------------
/handlers/aws/replay_trigger.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Any, Optional
 6 | 
 7 | from share import Config, ElasticsearchOutput, Input, LogstashOutput, Output, shared_logger
 8 | from shippers import CompositeShipper, ProtocolShipper, ShipperFactory
 9 | 
10 | from .exceptions import InputConfigException, OutputConfigException, ReplayHandlerException
11 | from .utils import delete_sqs_record
12 | 
13 | 
14 | class ReplayedEventReplayHandler:
15 |     def __init__(self, replay_queue_arn: str):
16 |         self._replay_queue_arn = replay_queue_arn
17 |         self._failed_event_ids: list[str] = []
18 |         self._events_with_receipt_handle: dict[str, str] = {}
19 | 
20 |     def add_event_with_receipt_handle(self, event_uniq_id: str, receipt_handle: str) -> None:
21 |         self._events_with_receipt_handle[event_uniq_id] = receipt_handle
22 | 
23 |     def replay_handler(
24 |         self, output_destination: str, output_args: dict[str, Any], event_payload: dict[str, Any]
25 |     ) -> None:
26 |         event_uniq_id: str = event_payload["_id"] + output_destination
27 |         self._failed_event_ids.append(event_uniq_id)
28 | 
29 |     def flush(self) -> None:
30 |         for failed_event_uniq_id in self._failed_event_ids:
31 |             del self._events_with_receipt_handle[failed_event_uniq_id]
32 | 
33 |         for receipt_handle in self._events_with_receipt_handle.values():
34 |             delete_sqs_record(self._replay_queue_arn, receipt_handle)
35 | 
36 |         if len(self._failed_event_ids) > 0:
37 |             raise ReplayHandlerException()
38 | 
39 | 
40 | def get_shipper_for_replay_event(
41 |     config: Config,
42 |     output_destination: str,
43 |     output_args: dict[str, Any],
44 |     event_input_id: str,
45 |     replay_handler: ReplayedEventReplayHandler,
46 | ) -> Optional[CompositeShipper]:
47 |     event_input: Optional[Input] = config.get_input_by_id(event_input_id)
48 |     if event_input is None:
49 |         raise InputConfigException(f"Cannot load input for input id {event_input_id}")
50 | 
51 |     output: Optional[Output] = event_input.get_output_by_destination(output_destination)
52 |     if output is None:
53 |         raise OutputConfigException(f"Cannot load output with destination {output_destination}")
54 | 
55 |     # Let's wrap the specific output shipper in the composite one, since the composite deepcopy the mutating events
56 |     shipper: CompositeShipper = CompositeShipper()
57 | 
58 |     if output.type == "elasticsearch":
59 |         assert isinstance(output, ElasticsearchOutput)
60 |         output.es_datastream_name = output_args["es_datastream_name"]
61 |         shared_logger.debug("setting ElasticSearch shipper")
62 |         elasticsearch: ProtocolShipper = ShipperFactory.create_from_output(output_type=output.type, output=output)
63 | 
64 |         shipper.add_shipper(elasticsearch)
65 |         shipper.set_replay_handler(replay_handler=replay_handler.replay_handler)
66 | 
67 |         return shipper
68 | 
69 |     if output.type == "logstash":
70 |         assert isinstance(output, LogstashOutput)
71 |         shared_logger.debug("setting Logstash shipper")
72 |         logstash: ProtocolShipper = ShipperFactory.create_from_output(output_type=output.type, output=output)
73 | 
74 |         shipper.add_shipper(logstash)
75 |         shipper.set_replay_handler(replay_handler=replay_handler.replay_handler)
76 | 
77 |         return shipper
78 | 
79 |     return None
80 | 


--------------------------------------------------------------------------------
/share/include_exlude.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import re
  8 | from typing import Optional
  9 | 
 10 | 
 11 | class IncludeExcludeRule:
 12 |     """
 13 |     IncludeExcludeRule represents a pattern rule
 14 |     """
 15 | 
 16 |     def __init__(self, pattern: str):
 17 |         self.pattern = re.compile(pattern)
 18 | 
 19 |     def __eq__(self, other: object) -> bool:
 20 |         assert isinstance(other, IncludeExcludeRule)
 21 | 
 22 |         return self.pattern == other.pattern
 23 | 
 24 | 
 25 | class IncludeExcludeFilter:
 26 |     """
 27 |     Base class for IncludeExclude filter
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         include_patterns: Optional[list[IncludeExcludeRule]] = None,
 33 |         exclude_patterns: Optional[list[IncludeExcludeRule]] = None,
 34 |     ):
 35 |         self._include_rules: Optional[list[IncludeExcludeRule]] = None
 36 |         self._exclude_rules: Optional[list[IncludeExcludeRule]] = None
 37 | 
 38 |         if include_patterns is not None and len(include_patterns) > 0:
 39 |             self.include_rules = include_patterns
 40 | 
 41 |         if exclude_patterns is not None and len(exclude_patterns) > 0:
 42 |             self.exclude_rules = exclude_patterns
 43 | 
 44 |         self._always_yield = self._include_rules is None and self._exclude_rules is None
 45 | 
 46 |         self._include_only = self._include_rules is not None and self._exclude_rules is None
 47 |         self._exclude_only = self._exclude_rules is not None and self._include_rules is None
 48 | 
 49 |     def _is_included(self, message: str) -> bool:
 50 |         assert self._include_rules is not None
 51 | 
 52 |         for include_rule in self._include_rules:
 53 |             if include_rule.pattern.search(message) is not None:
 54 |                 return True
 55 | 
 56 |         return False
 57 | 
 58 |     def _is_excluded(self, message: str) -> bool:
 59 |         assert self._exclude_rules is not None
 60 | 
 61 |         for exclude_rule in self._exclude_rules:
 62 |             if exclude_rule.pattern.search(message) is not None:
 63 |                 return True
 64 | 
 65 |         return False
 66 | 
 67 |     def filter(self, message: str) -> bool:
 68 |         """
 69 |         filter returns True if the event is included or not excluded
 70 |         """
 71 | 
 72 |         if self._always_yield:
 73 |             return True
 74 | 
 75 |         if self._include_only:
 76 |             return self._is_included(message)
 77 | 
 78 |         if self._exclude_only:
 79 |             return not self._is_excluded(message)
 80 | 
 81 |         if self._is_excluded(message):
 82 |             return False
 83 | 
 84 |         return self._is_included(message)
 85 | 
 86 |     def __eq__(self, other: object) -> bool:
 87 |         assert isinstance(other, IncludeExcludeFilter)
 88 | 
 89 |         return self.include_rules == other.include_rules and self.exclude_rules == other.exclude_rules
 90 | 
 91 |     @property
 92 |     def include_rules(self) -> Optional[list[IncludeExcludeRule]]:
 93 |         return self._include_rules
 94 | 
 95 |     @include_rules.setter
 96 |     def include_rules(self, value: list[IncludeExcludeRule]) -> None:
 97 |         self._include_rules = value
 98 | 
 99 |     @property
100 |     def exclude_rules(self) -> Optional[list[IncludeExcludeRule]]:
101 |         return self._exclude_rules
102 | 
103 |     @exclude_rules.setter
104 |     def exclude_rules(self, value: list[IncludeExcludeRule]) -> None:
105 |         self._exclude_rules = value
106 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Elastic License 2.0
 2 | 
 3 | URL: https://www.elastic.co/licensing/elastic-license
 4 | 
 5 | ## Acceptance
 6 | 
 7 | By using the software, you agree to all of the terms and conditions below.
 8 | 
 9 | ## Copyright License
10 | 
11 | The licensor grants you a non-exclusive, royalty-free, worldwide,
12 | non-sublicensable, non-transferable license to use, copy, distribute, make
13 | available, and prepare derivative works of the software, in each case subject to
14 | the limitations and conditions below.
15 | 
16 | ## Limitations
17 | 
18 | You may not provide the software to third parties as a hosted or managed
19 | service, where the service provides users with access to any substantial set of
20 | the features or functionality of the software.
21 | 
22 | You may not move, change, disable, or circumvent the license key functionality
23 | in the software, and you may not remove or obscure any functionality in the
24 | software that is protected by the license key.
25 | 
26 | You may not alter, remove, or obscure any licensing, copyright, or other notices
27 | of the licensor in the software. Any use of the licensor’s trademarks is subject
28 | to applicable law.
29 | 
30 | ## Patents
31 | 
32 | The licensor grants you a license, under any patent claims the licensor can
33 | license, or becomes able to license, to make, have made, use, sell, offer for
34 | sale, import and have imported the software, in each case subject to the
35 | limitations and conditions in this license. This license does not cover any
36 | patent claims that you cause to be infringed by modifications or additions to
37 | the software. If you or your company make any written claim that the software
38 | infringes or contributes to infringement of any patent, your patent license for
39 | the software granted under these terms ends immediately. If your company makes
40 | such a claim, your patent license ends immediately for work on behalf of your
41 | company.
42 | 
43 | ## Notices
44 | 
45 | You must ensure that anyone who gets a copy of any part of the software from you
46 | also gets a copy of these terms.
47 | 
48 | If you modify the software, you must include in any modified copies of the
49 | software prominent notices stating that you have modified the software.
50 | 
51 | ## No Other Rights
52 | 
53 | These terms do not imply any licenses other than those expressly granted in
54 | these terms.
55 | 
56 | ## Termination
57 | 
58 | If you use the software in violation of these terms, such use is not licensed,
59 | and your licenses will automatically terminate. If the licensor provides you
60 | with a notice of your violation, and you cease all violation of this license no
61 | later than 30 days after you receive that notice, your licenses will be
62 | reinstated retroactively. However, if you violate these terms after such
63 | reinstatement, any additional violation of these terms will cause your licenses
64 | to terminate automatically and permanently.
65 | 
66 | ## No Liability
67 | 
68 | *As far as the law allows, the software comes as is, without any warranty or
69 | condition, and the licensor will not be liable to you for any damages arising
70 | out of these terms or the use or nature of the software, under any kind of
71 | legal claim.*
72 | 
73 | ## Definitions
74 | 
75 | The **licensor** is the entity offering these terms, and the **software** is the
76 | software the licensor makes available under these terms, including any portion
77 | of it.
78 | 
79 | **you** refers to the individual or entity agreeing to these terms.
80 | 
81 | **your company** is any legal entity, sole proprietorship, or other kind of
82 | organization that you work for, plus all organizations that have control over,
83 | are under the control of, or are under common control with that
84 | organization. **control** means ownership of substantially all the assets of an
85 | entity, or the power to direct its management and policies by vote, contract, or
86 | otherwise. Control can be direct or indirect.
87 | 
88 | **your licenses** are all the licenses granted to you for the software under
89 | these terms.
90 | 
91 | **use** means anything you do with the software requiring one of your licenses.
92 | 
93 | **trademark** means trademarks, service marks, and similar rights.
94 | 


--------------------------------------------------------------------------------
/.github/workflows/releases-production.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # IMPORTANT:
  3 | # If you change the name of this file, you will have to update
  4 | # https://github.com/elastic/oblt-infra/blob/main/conf/resources/repos/elastic-serverless-forwarder/01-aws-oidc-github.tf
  5 | # to include the current one!
  6 | 
  7 | # Workflow to push zip with dependencies to S3 bucket every time the ESF version is updated
  8 | # (we need this for ESF terraform), and to publish the new SAR version
  9 | name: releases-production
 10 | 
 11 | on:
 12 |   workflow_run:
 13 |     workflows: [create-tag]
 14 |     types:
 15 |       - completed
 16 | 
 17 | permissions:
 18 |   id-token: write   # This is required for requesting the JWT
 19 |   contents: read    # This is required for actions/checkout
 20 | 
 21 | env:
 22 |   AWS_REGION: "eu-central-1"
 23 |   AWS_ACCOUNT_ID: "267093732750" # account 'elastic-observability-prod'
 24 | 
 25 | jobs:
 26 | 
 27 |   get-esf-version:
 28 |     runs-on: ubuntu-latest
 29 |     timeout-minutes: 5
 30 | 
 31 |     outputs:
 32 |       version: ${{ steps.get-version.outputs.version }}
 33 | 
 34 |     steps:
 35 |       - uses: actions/checkout@v5
 36 | 
 37 |       - name: Get version number
 38 |         id: get-version
 39 |         shell: bash
 40 |         run: |
 41 |           version=$(grep -oE '[0-9]+\.[0-9]+\.[0-9]+(\-[a-zA-Z]+[0-9]+)?' share/version.py)
 42 |           echo "version=${version}" >> $GITHUB_OUTPUT
 43 |           echo "::notice::ESF version is ${version}."
 44 | 
 45 | 
 46 |   build-and-upload-dependencies:
 47 |     runs-on: ubuntu-latest
 48 |     timeout-minutes: 30
 49 |     needs: get-esf-version
 50 | 
 51 |     env:
 52 |       BUCKET_NAME: "esf-dependencies"
 53 | 
 54 |     steps:
 55 |       # See https://docs.aws.amazon.com/lambda/latest/dg/python-package.html#python-package-create-dependencies
 56 | 
 57 |       - uses: actions/checkout@v5
 58 |         with:
 59 |           ref: 'lambda-v${{ needs.get-esf-version.outputs.version }}'
 60 | 
 61 |       - uses: actions/setup-python@v6
 62 |         with:
 63 |           python-version: '3.12'
 64 |           cache: 'pip'          # caching pip dependencies
 65 | 
 66 |       - name: Install requirements in a directory and zip it.
 67 |         shell: bash
 68 |         run: |
 69 |           pip3 install -r requirements.txt -t ./dependencies
 70 |           cd dependencies && zip -r ../lambda-v${{ needs.get-esf-version.outputs.version }}.zip .
 71 | 
 72 |       - name: Place handlers in the zip file.
 73 |         shell: bash
 74 |         run: |
 75 |           zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip main_aws.py
 76 |           zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip handlers
 77 |           zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip share
 78 |           zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip storage
 79 |           zip -r ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip shippers
 80 | 
 81 |       - name: Configure AWS credentials
 82 |         uses: elastic/oblt-actions/aws/auth@v1
 83 |         with:
 84 |           aws-account-id: "${{ env.AWS_ACCOUNT_ID }}"
 85 |           aws-region: "${{ env.AWS_REGION }}"
 86 | 
 87 |       - name: Copy file to s3
 88 |         run: |
 89 |           aws s3 cp ./lambda-v${{ needs.get-esf-version.outputs.version }}.zip s3://${{ env.BUCKET_NAME }}/
 90 | 
 91 | 
 92 |   release-sar:
 93 |     runs-on: ubuntu-latest
 94 |     timeout-minutes: 30
 95 |     needs: get-esf-version
 96 | 
 97 |     env:
 98 |       BUCKET_NAME: "elastic-serverless-forwarder"
 99 | 
100 |     steps:
101 |       - uses: actions/checkout@v5
102 |         with:
103 |           ref: 'lambda-v${{ needs.get-esf-version.outputs.version }}'
104 | 
105 |       - uses: elastic/oblt-actions/aws/auth@v1
106 |         with:
107 |           aws-account-id: "${{ env.AWS_ACCOUNT_ID }}"
108 |           aws-region: "${{ env.AWS_REGION }}"
109 | 
110 |       - uses: aws-actions/setup-sam@c71dd89d980e49367c70391e8ada4353f52f2800 # v2
111 |         with:
112 |           use-installer: true
113 |           token: ${{ secrets.GITHUB_TOKEN }}
114 | 
115 |       - name: Build and package
116 |         run: |
117 |           .internal/aws/scripts/dist.sh \
118 |             elastic-serverless-forwarder \
119 |             ${{ needs.get-esf-version.outputs.version }} \
120 |             ${{ env.BUCKET_NAME }} \
121 |             ${{ env.AWS_ACCOUNT_ID }} \
122 |             ${{ env.AWS_REGION }} \
123 |             "Elastic"
124 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: help license all-requirements requirements requirements-lint requirements-tests benchmark black coverage flake8 integration-test isort lint mypy test unit-test docker-benchmark docker-black docker-coverage docker-flake8 docker-integration-test docker-isort docker-lint docker-mypy docker-test docker-unit-test
  2 | SHELL := /bin/bash
  3 | 
  4 | help: ## Display this help text
  5 | 	@grep -E '^[a-zA-Z_-]+[%]?:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
  6 | 
  7 | benchmark: PYTEST_ARGS=-m benchmark ## Run benchmarks on the host
  8 | benchmark: export PYTEST_ADDOPTS=--benchmark-group-by=group
  9 | benchmark: test
 10 | 
 11 | unit-test: PYTEST_ARGS=-m unit ## Run unit tests on the host
 12 | unit-test: test
 13 | 
 14 | integration-test: PYTEST_ARGS=-m integration ## Run integration tests on the host
 15 | integration-test: test
 16 | 
 17 | test: PYTEST_ARGS_FLAGS=$(if $(PYTEST_ARGS),$(PYTEST_ARGS),-m not benchmark) ## Run unit tests on the host
 18 | test:
 19 | 	PYTEST_ARGS="${PYTEST_ARGS_FLAGS}" tests/scripts/${SCRIPTS_BASE_DIR}run_tests.sh
 20 | 
 21 | coverage: export PYTEST_ADDOPTS=--cov=. --cov-context=test --cov-config=.coveragerc --cov-branch ## Run tests with coverage on the host
 22 | coverage: export COVERAGE_FILE=.coverage
 23 | coverage: test
 24 | 
 25 | lint: black flake8 isort mypy  ## Lint the project on the host
 26 | 
 27 | black:  ## Run black in the project on the host
 28 | 	tests/scripts/${SCRIPTS_BASE_DIR}black.sh diff
 29 | 
 30 | flake8:  ## Run flake8 in the project on the host
 31 | 	tests/scripts/${SCRIPTS_BASE_DIR}flake8.sh
 32 | 
 33 | isort:  ## Run isort in the project on the host
 34 | 	tests/scripts/${SCRIPTS_BASE_DIR}isort.sh diff
 35 | 
 36 | mypy: ## Run mypy in the project on the host
 37 | 	tests/scripts/${SCRIPTS_BASE_DIR}mypy.sh
 38 | 
 39 | package: ## Package lambda by installing python dependencies matching x86_64
 40 | 	mkdir deps && \
 41 |     pip install --target=./deps --platform manylinux2014_x86_64 --implementation cp --python-version 3.12 --only-binary=:all: --upgrade -r requirements.txt && \
 42 |     cd ./deps && \
 43 |     zip -r ../local_esf.zip . && \
 44 |     cd .. && \
 45 |     zip -r local_esf.zip main_aws.py handlers share storage shippers && \
 46 |     rm -r ./deps
 47 | 
 48 | clean: ## cleanup any leftover resources
 49 | 	rm -f -r ./deps
 50 | 	rm -f local_esf.zip
 51 | 
 52 | docker-test:  ## Run all tests on docker
 53 | docker-test: SCRIPTS_BASE_DIR=docker/
 54 | docker-test: test
 55 | 
 56 | docker-benchmark:  ## Run benchmarks on docker
 57 | docker-benchmark: SCRIPTS_BASE_DIR=docker/
 58 | docker-benchmark: benchmark
 59 | 
 60 | docker-unit-test:  ## Run unit tests on docker
 61 | docker-unit-test: SCRIPTS_BASE_DIR=docker/
 62 | docker-unit-test: unit-test
 63 | 
 64 | docker-integration-test:  ## Run integration tests on docker
 65 | docker-integration-test: SCRIPTS_BASE_DIR=docker/
 66 | docker-integration-test: integration-test
 67 | 
 68 | docker-coverage:  ## Run tests with coverage on docker
 69 | docker-coverage: SCRIPTS_BASE_DIR=docker/
 70 | docker-coverage: coverage
 71 | 
 72 | docker-lint: docker-black docker-flake8 docker-isort docker-mypy  ## Lint the project on docker
 73 | 
 74 | docker-black:  ## Run black in the project on docker
 75 | docker-black: SCRIPTS_BASE_DIR=docker/
 76 | docker-black: black
 77 | 
 78 | docker-flake8:  ## Run flake8 in the project on docker
 79 | docker-flake8: SCRIPTS_BASE_DIR=docker/
 80 | docker-flake8: flake8
 81 | 
 82 | docker-isort:  ## Run isort in the project on docker
 83 | docker-isort: SCRIPTS_BASE_DIR=docker/
 84 | docker-isort: isort
 85 | 
 86 | docker-mypy:  ## Run mypy in the project on docker
 87 | docker-mypy: SCRIPTS_BASE_DIR=docker/
 88 | docker-mypy: mypy
 89 | 
 90 | license:  ## Run license validation in the project
 91 | 	tests/scripts/license_headers_check.sh check
 92 | 
 93 | all-requirements: requirements-lint requirements-tests requirements ## Install all requirements on the host
 94 | 
 95 | requirements: .makecache/requirements.txt  ## Install app requirements on the host
 96 | 
 97 | requirements-lint: .makecache/requirements-lint.txt  ## Install all linting requirements on the host
 98 | 
 99 | requirements-tests: .makecache/requirements-tests.txt  ## Install tests requirements on the host
100 | 
101 | .makecache/requirements.txt: requirements.txt
102 | 	pip3 install -r requirements.txt
103 | 	touch .makecache/requirements.txt
104 | 
105 | .makecache/requirements-lint.txt: requirements-lint.txt
106 | 	pip3 install -r requirements-lint.txt
107 | 	touch .makecache/requirements-lint.txt
108 | 
109 | .makecache/requirements-tests.txt: requirements-tests.txt
110 | 	pip3 install -r requirements-tests.txt
111 | 	touch .makecache/requirements-tests.txt
112 | 


--------------------------------------------------------------------------------
/tests/handlers/aws/test_replay_trigger.py:
--------------------------------------------------------------------------------
 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 3 | # you may not use this file except in compliance with the Elastic License 2.0.
 4 | 
 5 | from typing import Optional
 6 | from unittest import TestCase
 7 | 
 8 | import mock
 9 | import pytest
10 | 
11 | from handlers.aws import OutputConfigException
12 | from handlers.aws.replay_trigger import ReplayedEventReplayHandler, get_shipper_for_replay_event
13 | from share import parse_config
14 | from shippers import CompositeShipper, ElasticsearchShipper, LogstashShipper
15 | 
16 | 
17 | @pytest.mark.unit
18 | class TestReplayTrigger(TestCase):
19 |     @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"])
20 |     def test_get_shipper_for_replay_event(self) -> None:
21 |         with self.subTest("Logstash shipper from replay event"):
22 |             config_yaml_kinesis = """
23 |                                 inputs:
24 |                                   - type: kinesis-data-stream
25 |                                     id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream
26 |                                     outputs:
27 |                                         - type: logstash
28 |                                           args:
29 |                                             logstash_url: logstash_url
30 |                             """
31 |             config = parse_config(config_yaml_kinesis)
32 |             replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue")
33 |             logstash_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event(
34 |                 config,
35 |                 "logstash_url",
36 |                 {},
37 |                 "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream",
38 |                 replay_handler,
39 |             )
40 |             assert isinstance(logstash_shipper, CompositeShipper)
41 |             assert isinstance(logstash_shipper._shippers[0], LogstashShipper)
42 | 
43 |         with self.subTest("Elasticsearch shipper from replay event"):
44 |             config_yaml_kinesis = """
45 |                                 inputs:
46 |                                   - type: kinesis-data-stream
47 |                                     id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream
48 |                                     outputs:
49 |                                       - type: elasticsearch
50 |                                         args:
51 |                                           elasticsearch_url: "elasticsearch_url"
52 |                                           username: "username"
53 |                                           password: "password"
54 |                                           es_datastream_name: "es_datastream_name"
55 |                             """
56 |             config = parse_config(config_yaml_kinesis)
57 |             replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue")
58 |             elasticsearch_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event(
59 |                 config,
60 |                 "elasticsearch_url",
61 |                 {"es_datastream_name": "es_datastream_name"},
62 |                 "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream",
63 |                 replay_handler,
64 |             )
65 | 
66 |             assert isinstance(elasticsearch_shipper, CompositeShipper)
67 |             assert isinstance(elasticsearch_shipper._shippers[0], ElasticsearchShipper)
68 | 
69 |         with self.subTest("Exception from output destination"):
70 |             config_yaml_kinesis = """
71 |                                 inputs:
72 |                                   - type: kinesis-data-stream
73 |                                     id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream
74 |                                     outputs:
75 |                                         - type: output_type
76 |                                           args:
77 |                                             output_arg: output_arg
78 |                             """
79 |             config = parse_config(config_yaml_kinesis)
80 |             replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue")
81 |             with self.assertRaisesRegex(OutputConfigException, "test"):
82 |                 get_shipper_for_replay_event(
83 |                     config,
84 |                     "test",
85 |                     {},
86 |                     "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream",
87 |                     replay_handler,
88 |                 )
89 | 


--------------------------------------------------------------------------------
/storage/s3.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | from io import SEEK_SET, BytesIO
  6 | from typing import Any, Optional
  7 | 
  8 | import boto3
  9 | import botocore.client
 10 | import elasticapm  # noqa: F401
 11 | from botocore.response import StreamingBody
 12 | 
 13 | from share import ExpandEventListFromField, ProtocolMultiline, shared_logger
 14 | 
 15 | from .decorator import by_lines, inflate, json_collector, multi_line
 16 | from .storage import (
 17 |     CHUNK_SIZE,
 18 |     CommonStorage,
 19 |     GetByLinesIterator,
 20 |     StorageDecoratorIterator,
 21 |     StorageReader,
 22 |     is_gzip_content,
 23 | )
 24 | 
 25 | 
 26 | class S3Storage(CommonStorage):
 27 |     """
 28 |     S3 Storage.
 29 |     This class implements concrete S3 Storage
 30 |     """
 31 | 
 32 |     _s3_client = boto3.client(
 33 |         "s3", config=botocore.client.Config(retries={"total_max_attempts": 10, "mode": "standard"})
 34 |     )
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         bucket_name: str,
 39 |         object_key: str,
 40 |         json_content_type: Optional[str] = None,
 41 |         multiline_processor: Optional[ProtocolMultiline] = None,
 42 |         event_list_from_field_expander: Optional[ExpandEventListFromField] = None,
 43 |     ):
 44 |         self._bucket_name: str = bucket_name
 45 |         self._object_key: str = object_key
 46 |         self.json_content_type = json_content_type
 47 |         self.multiline_processor = multiline_processor
 48 |         self.event_list_from_field_expander = event_list_from_field_expander
 49 | 
 50 |     @multi_line
 51 |     @json_collector
 52 |     @by_lines
 53 |     @inflate
 54 |     def _generate(self, range_start: int, body: BytesIO, is_gzipped: bool) -> StorageDecoratorIterator:
 55 |         """
 56 |         Concrete implementation of the iterator for get_by_lines
 57 |         """
 58 | 
 59 |         file_ending_offset: int = range_start
 60 | 
 61 |         def chunk_lambda() -> Any:
 62 |             return body.read(CHUNK_SIZE)
 63 | 
 64 |         if is_gzipped:
 65 |             reader: StorageReader = StorageReader(raw=body)
 66 |             yield reader, 0, 0, b"", None
 67 |         else:
 68 |             for chunk in iter(chunk_lambda, b""):
 69 |                 file_starting_offset = file_ending_offset
 70 |                 file_ending_offset += len(chunk)
 71 | 
 72 |                 shared_logger.debug("_generate flat", extra={"offset": file_ending_offset})
 73 |                 yield chunk, file_ending_offset, file_starting_offset, b"", None
 74 | 
 75 |     def get_by_lines(self, range_start: int) -> GetByLinesIterator:
 76 |         original_range_start: int = range_start
 77 | 
 78 |         s3_object_head = self._s3_client.head_object(Bucket=self._bucket_name, Key=self._object_key)
 79 | 
 80 |         content_type: str = s3_object_head["ContentType"]
 81 |         content_length: int = s3_object_head["ContentLength"]
 82 |         shared_logger.debug(
 83 |             "get_by_lines",
 84 |             extra={
 85 |                 "content_type": content_type,
 86 |                 "range_start": range_start,
 87 |                 "bucket_name": self._bucket_name,
 88 |                 "object_key": self._object_key,
 89 |             },
 90 |         )
 91 | 
 92 |         file_content: BytesIO = BytesIO(b"")
 93 |         self._s3_client.download_fileobj(self._bucket_name, self._object_key, file_content)
 94 | 
 95 |         file_content.flush()
 96 |         file_content.seek(0, SEEK_SET)
 97 |         is_gzipped: bool = False
 98 |         if is_gzip_content(file_content.readline()):
 99 |             is_gzipped = True
100 |             range_start = 0
101 | 
102 |         if range_start < content_length:
103 |             file_content.seek(range_start, SEEK_SET)
104 | 
105 |             for log_event, line_starting_offset, line_ending_offset, _, event_expanded_offset in self._generate(
106 |                 original_range_start, file_content, is_gzipped
107 |             ):
108 |                 assert isinstance(log_event, bytes)
109 |                 yield log_event, line_starting_offset, line_ending_offset, event_expanded_offset
110 |         else:
111 |             shared_logger.info(f"requested file content from {range_start}, file size {content_length}: skip it")
112 | 
113 |     def get_as_string(self) -> str:
114 |         shared_logger.debug("get_as_string", extra={"bucket_name": self._bucket_name, "object_key": self._object_key})
115 |         s3_object = self._s3_client.get_object(Bucket=self._bucket_name, Key=self._object_key, Range="bytes=0-")
116 | 
117 |         body: StreamingBody = s3_object["Body"]
118 |         return str(body.read(s3_object["ContentLength"]).decode("utf-8"))
119 | 


--------------------------------------------------------------------------------
/.internal/aws/cloudformation/application.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Transform: AWS::Serverless-2016-10-31
  3 | Description: >
  4 |   Elastic Serverless Forwarder
  5 | 
  6 |   SAM Template for the application, not intended to be deployed on its own
  7 | 
  8 | Parameters:
  9 |   ElasticServerlessForwarderS3ConfigFile:
 10 |     Type: String
 11 |   ElasticServerlessForwarderSSMSecrets:
 12 |     Type: CommaDelimitedList
 13 |   ElasticServerlessForwarderKMSKeys:
 14 |     Type: CommaDelimitedList
 15 |   ElasticServerlessForwarderSQSEvents:
 16 |     Type: CommaDelimitedList
 17 |   ElasticServerlessForwarderSQSEvents2:
 18 |     Type: CommaDelimitedList
 19 |   ElasticServerlessForwarderS3SQSEvents:
 20 |     Type: CommaDelimitedList
 21 |   ElasticServerlessForwarderS3SQSEvents2:
 22 |     Type: CommaDelimitedList
 23 |   ElasticServerlessForwarderKinesisEvents:
 24 |     Type: CommaDelimitedList
 25 |   ElasticServerlessForwarderKinesisEvents2:
 26 |     Type: CommaDelimitedList
 27 |   ElasticServerlessForwarderCloudWatchLogsEvents:
 28 |     Type: CommaDelimitedList
 29 |   ElasticServerlessForwarderCloudWatchLogsEvents2:
 30 |     Type: CommaDelimitedList
 31 |   ElasticServerlessForwarderS3Buckets:
 32 |     Type: CommaDelimitedList
 33 |   ElasticServerlessForwarderSecurityGroups:
 34 |     Type: CommaDelimitedList
 35 |   ElasticServerlessForwarderSubnets:
 36 |     Type: CommaDelimitedList
 37 | Resources:
 38 |   ElasticServerlessForwarderContinuingDLQ:
 39 |     Type: AWS::SQS::Queue
 40 |     Properties:
 41 |       DelaySeconds: 0
 42 |       QueueName: !Join [ "-", ["elastic-serverless-forwarder-continuing-dlq", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]]
 43 |       VisibilityTimeout: 910
 44 |       SqsManagedSseEnabled: true
 45 |   ElasticServerlessForwarderContinuingQueue:
 46 |     Type: AWS::SQS::Queue
 47 |     Properties:
 48 |       DelaySeconds: 0
 49 |       QueueName: !Join [ "-", ["elastic-serverless-forwarder-continuing-queue", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]]
 50 |       RedrivePolicy: { "deadLetterTargetArn" : !GetAtt ElasticServerlessForwarderContinuingDLQ.Arn, "maxReceiveCount" : 1 }
 51 |       VisibilityTimeout: 910
 52 |       SqsManagedSseEnabled: true
 53 |   ElasticServerlessForwarderReplayDLQ:
 54 |     Type: AWS::SQS::Queue
 55 |     Properties:
 56 |       DelaySeconds: 0
 57 |       QueueName: !Join [ "-", ["elastic-serverless-forwarder-replay-dlq", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]]
 58 |       VisibilityTimeout: 910
 59 |       SqsManagedSseEnabled: true
 60 |   ElasticServerlessForwarderReplayQueue:
 61 |     Type: AWS::SQS::Queue
 62 |     Properties:
 63 |       DelaySeconds: 0
 64 |       QueueName: !Join [ "-", ["elastic-serverless-forwarder-replay-queue", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]]
 65 |       RedrivePolicy: { "deadLetterTargetArn" : !GetAtt ElasticServerlessForwarderReplayDLQ.Arn, "maxReceiveCount" : 3 }
 66 |       VisibilityTimeout: 910
 67 |       SqsManagedSseEnabled: true
 68 |   ApplicationElasticServerlessForwarder:
 69 |     Type: AWS::Serverless::Function
 70 |     Properties:
 71 |       Timeout: 900
 72 |       MemorySize: 512
 73 |       CodeUri: %codeUri%
 74 |       Runtime: python3.12
 75 |       Architectures:
 76 |         - x86_64
 77 |       Handler: main_aws.handler
 78 |       Environment:
 79 |           Variables:
 80 |               S3_CONFIG_FILE: !Ref ElasticServerlessForwarderS3ConfigFile
 81 |               SQS_CONTINUE_URL: !Ref ElasticServerlessForwarderContinuingQueue
 82 |               SQS_REPLAY_URL: !Ref ElasticServerlessForwarderReplayQueue
 83 |       Events:
 84 |         SQSContinuingEvent:
 85 |           Type: SQS
 86 |           Properties:
 87 |             Queue: !GetAtt ElasticServerlessForwarderContinuingQueue.Arn
 88 |             BatchSize: 10
 89 |             Enabled: true
 90 |   Fn::Transform:
 91 |     Type: AWS::CloudFormation::Macro
 92 |     Name: %sarAppName%-macro
 93 | Metadata:
 94 |   AWS::ServerlessRepo::Application:
 95 |     Name: helper-application-%sarAppName%
 96 |     Description: |
 97 |         NOTE: DO NOT DEPLOY
 98 |         Deploy elastic-serverless-forwarder instead. This is a helper SAM template for the application and not intended to be deployed on its own.
 99 |     Author: %sarAuthorName%
100 |     SemanticVersion: %semanticVersion%
101 |     LicenseUrl: %codeUri%/LICENSE.txt
102 |     HomePageUrl: https://github.com/elastic/elastic-serverless-forwarder
103 |     SourceCodeUrl: https://github.com/elastic/elastic-serverless-forwarder
104 | Outputs:
105 |   EsfLambdaFunctionARN:
106 |     Description: ARN of the ESF Lambda Function
107 |     Value: !GetAtt ApplicationElasticServerlessForwarder.Arn
108 |   EsfLambdaFunctionRoleARN:
109 |     Description: ARN of the IAM role associated with the ESF Lambda function
110 |     Value: !GetAtt ApplicationElasticServerlessForwarderRole.Arn
111 | 


--------------------------------------------------------------------------------
/storage/payload.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | import base64
  5 | import binascii
  6 | import gzip
  7 | from io import SEEK_SET, BytesIO
  8 | from typing import Any, Optional
  9 | 
 10 | from share import ExpandEventListFromField, ProtocolMultiline, shared_logger
 11 | 
 12 | from .decorator import by_lines, inflate, json_collector, multi_line
 13 | from .storage import (
 14 |     CHUNK_SIZE,
 15 |     CommonStorage,
 16 |     GetByLinesIterator,
 17 |     StorageDecoratorIterator,
 18 |     StorageReader,
 19 |     is_gzip_content,
 20 | )
 21 | 
 22 | 
 23 | class PayloadStorage(CommonStorage):
 24 |     """
 25 |     PayloadStorage Storage.
 26 |     This class implements concrete Payload Storage.
 27 |     The payload might be base64 and gzip encoded
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         payload: str,
 33 |         json_content_type: Optional[str] = None,
 34 |         multiline_processor: Optional[ProtocolMultiline] = None,
 35 |         event_list_from_field_expander: Optional[ExpandEventListFromField] = None,
 36 |     ):
 37 |         self._payload: str = payload
 38 |         self.json_content_type = json_content_type
 39 |         self.multiline_processor = multiline_processor
 40 |         self.event_list_from_field_expander = event_list_from_field_expander
 41 | 
 42 |     @multi_line
 43 |     @json_collector
 44 |     @by_lines
 45 |     @inflate
 46 |     def _generate(self, range_start: int, body: BytesIO, is_gzipped: bool) -> StorageDecoratorIterator:
 47 |         """
 48 |         Concrete implementation of the iterator for get_by_lines
 49 |         """
 50 | 
 51 |         file_ending_offset: int = range_start
 52 | 
 53 |         def chunk_lambda() -> Any:
 54 |             return body.read(CHUNK_SIZE)
 55 | 
 56 |         if is_gzipped:
 57 |             reader: StorageReader = StorageReader(raw=body)
 58 |             yield reader, 0, 0, b"", None
 59 |         else:
 60 |             for chunk in iter(chunk_lambda, b""):
 61 |                 file_starting_offset = file_ending_offset
 62 |                 file_ending_offset += len(chunk)
 63 | 
 64 |                 shared_logger.debug("_generate flat", extra={"offset": file_ending_offset})
 65 |                 yield chunk, file_starting_offset, file_ending_offset, b"", None
 66 | 
 67 |     def get_by_lines(self, range_start: int) -> GetByLinesIterator:
 68 |         original_range_start: int = range_start
 69 | 
 70 |         is_gzipped: bool = False
 71 |         is_b64encoded: bool = False
 72 |         try:
 73 |             base64_decoded = base64.b64decode(self._payload, validate=True)
 74 |             # we try to unicode decode to catch if `base64.b64decode` decoded to non-valid unicode:
 75 |             # in this case `UnicodeDecodeError` will be thrown, this mean that the original was not base64 encoded
 76 |             # we try this only if it's not gzipped, because in that case `UnicodeDecodeError` will be thrown anyway
 77 |             if not is_gzip_content(base64_decoded):
 78 |                 base64_decoded.decode("utf-8")
 79 |                 # if `UnicodeDecodeError` was thrown, the content was not base64 encoded
 80 |                 # and the below assignment will not be executed
 81 |                 is_b64encoded = True
 82 |             else:
 83 |                 # we have gzip content that was base64 encoded
 84 |                 # let's do the proper assignment
 85 |                 is_b64encoded = True
 86 |         except (UnicodeDecodeError, ValueError, binascii.Error):
 87 |             # it was not valid unicode base64 encoded value or is it bare gzip content
 88 |             # just take as it is and encode to unicode bytes
 89 |             base64_decoded = self._payload.encode("utf-8")
 90 | 
 91 |         if is_gzip_content(base64_decoded):
 92 |             is_gzipped = True
 93 |             range_start = 0
 94 | 
 95 |         shared_logger.debug(
 96 |             "get_by_lines",
 97 |             extra={
 98 |                 "range_start": original_range_start,
 99 |                 "is_b64encoded": is_b64encoded,
100 |                 "is_gzipped": is_gzipped,
101 |             },
102 |         )
103 | 
104 |         content_length = len(base64_decoded)
105 |         if range_start < content_length:
106 |             file_content: BytesIO = BytesIO(base64_decoded)
107 | 
108 |             file_content.flush()
109 |             file_content.seek(range_start, SEEK_SET)
110 | 
111 |             for log_event, line_starting_offset, line_ending_offset, _, event_expanded_offset in self._generate(
112 |                 original_range_start, file_content, is_gzipped
113 |             ):
114 |                 assert isinstance(log_event, bytes)
115 |                 yield log_event, line_starting_offset, line_ending_offset, event_expanded_offset
116 |         else:
117 |             shared_logger.info(f"requested payload content from {range_start}, payload size {content_length}: skip it")
118 | 
119 |     def get_as_string(self) -> str:
120 |         try:
121 |             base64_decoded = base64.b64decode(self._payload, validate=True)
122 |             if not is_gzip_content(base64_decoded):
123 |                 base64_decoded.decode("utf-8")
124 |         except (UnicodeDecodeError, ValueError, binascii.Error):
125 |             base64_decoded = self._payload.encode("utf-8")
126 | 
127 |         if is_gzip_content(base64_decoded):
128 |             return gzip.decompress(base64_decoded).decode("utf-8")
129 | 
130 |         return base64_decoded.decode("utf-8")
131 | 


--------------------------------------------------------------------------------
/.internal/aws/scripts/dist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 3 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
 4 | # you may not use this file except in compliance with the Elastic License 2.0.
 5 | 
 6 | set -ex
 7 | 
 8 | echo "    AWS CLI (https://aws.amazon.com/cli/), AWS SAM CLI (https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-sam-cli.html), docker and Python3.12 with pip3 required"
 9 | echo "    Please, execute from root folder of the repo"
10 | 
11 | if [[ $# -ne 5 && $# -ne 6 ]]
12 | then
13 |     echo "Usage: $0 sar-app-name semantic-version bucket-name account-id region"
14 |     echo "    Arguments:"
15 |     echo "    sar-app-name: name of the app to be deployed in SAR"
16 |     echo "    semantic-version: semantic version of the app to deploy in SAR"
17 |     echo "    bucket-name: bucket name where to store the zip artifact for SAR code"
18 |     echo "                 (it will be created if it doesn't exists, otherwise "
19 |     echo "                  you need already to have proper access to it)"
20 |     echo "    account-id: AWS account id to use for deploying"
21 |     echo "    region: region where to deploy in SAR for"
22 |     echo "    sar-author-name: name of the author of the app to be deployed in SAR"
23 |     echo "                  (default to Elastic))"
24 |     exit 1
25 | fi
26 | 
27 | SAR_APP_NAME="$1"
28 | SEMANTIC_VERSION="$2"
29 | BUCKET="$3"
30 | ACCOUNT_ID="$4"
31 | REGION="$5"
32 | SAR_AUTHOR_NAME="${6:-Elastic}"
33 | TMPDIR=$(mktemp -d /tmp/dist.XXXXXXXXXX)
34 | CODE_URI="${TMPDIR}/sources"
35 | 
36 | trap "rm -rf ${TMPDIR}" EXIT
37 | 
38 | aws s3api get-bucket-location --bucket "${BUCKET}" --region "${REGION}" || aws s3api create-bucket --acl private --bucket "${BUCKET}" --region "${REGION}" --create-bucket-configuration LocationConstraint="${REGION}" || aws s3api create-bucket --acl private --bucket "${BUCKET}" --region "${REGION}"
39 | 
40 | # Check if region is in AWS GovCloud and create bucket arn
41 | if [[ "${REGION}" == *gov* ]]; then
42 |   BUCKET_ARN="arn:aws-us-gov:s3:::${BUCKET}"
43 |   AWS_OR_AWS_GOV="aws-us-gov"
44 | else
45 |   BUCKET_ARN="arn:aws:s3:::${BUCKET}"
46 |   AWS_OR_AWS_GOV="aws"
47 | fi
48 | 
49 | BUCKET_RESOURCE="${BUCKET_ARN}/*"
50 | 
51 | cat <<EOF > "${TMPDIR}/policy.json"
52 | {
53 |     "Version": "2012-10-17",
54 |     "Statement": [
55 |         {
56 |             "Effect": "Allow",
57 |             "Principal": {
58 |                 "Service":  "serverlessrepo.amazonaws.com"
59 |             },
60 |             "Action": "s3:GetObject",
61 |             "Resource": "${BUCKET_RESOURCE}",
62 |             "Condition" : {
63 |                 "StringEquals": {
64 |                     "aws:SourceAccount": "${ACCOUNT_ID}"
65 |                 }
66 |             }
67 |         }
68 |     ]
69 | }
70 | EOF
71 | 
72 | aws s3api put-bucket-policy --bucket "${BUCKET}" --region "${REGION}" --policy "file://${TMPDIR}/policy.json"
73 | mkdir -v -p "${CODE_URI}"
74 | cp -v requirements.txt "${CODE_URI}/"
75 | cp -v main_aws.py "${CODE_URI}/"
76 | find {handlers,share,shippers,storage} -not -name "*__pycache__*" -type d -print0|xargs -t -0 -Idirname mkdir -v -p "${CODE_URI}/dirname"
77 | find {handlers,share,shippers,storage} -not -name "*__pycache__*" -name "*.py" -exec cp -v '{}' "${CODE_URI}/{}" \;
78 | cp -v LICENSE.txt "${CODE_URI}/LICENSE.txt"
79 | cp -v docs/README-AWS.md "${CODE_URI}/README.md"
80 | 
81 | sed -e "s|%codeUri%|${CODE_URI}|g" -e "s/%sarAppName%/${SAR_APP_NAME}/g" -e "s/%sarAuthorName%/${SAR_AUTHOR_NAME}/g" -e "s/%semanticVersion%/${SEMANTIC_VERSION}/g" -e "s/%awsRegion%/${REGION}/g" -e "s/%awsOrGov%/${AWS_OR_AWS_GOV}/g" .internal/aws/cloudformation/macro.yaml > "${TMPDIR}/macro.yaml"
82 | sed -e "s|%codeUri%|${CODE_URI}|g" -e "s/%sarAppName%/${SAR_APP_NAME}/g" -e "s/%sarAuthorName%/${SAR_AUTHOR_NAME}/g" -e "s/%semanticVersion%/${SEMANTIC_VERSION}/g" -e "s/%awsRegion%/${REGION}/g" -e "s/%accountID%/${ACCOUNT_ID}/g" -e "s/%awsOrGov%/${AWS_OR_AWS_GOV}/g" .internal/aws/cloudformation/template.yaml > "${TMPDIR}/template.yaml"
83 | sed -e "s|%codeUri%|${CODE_URI}|g" -e "s/%sarAppName%/${SAR_APP_NAME}/g" -e "s/%sarAuthorName%/${SAR_AUTHOR_NAME}/g" -e "s/%semanticVersion%/${SEMANTIC_VERSION}/g" -e "s/%awsRegion%/${REGION}/g" -e "s/%codeURIBucket%/${BUCKET}/g" .internal/aws/cloudformation/application.yaml > "${TMPDIR}/application.yaml"
84 | 
85 | sam build --debug --use-container --build-dir "${TMPDIR}/.aws-sam/build/macro" --template-file "${TMPDIR}/macro.yaml" --region "${REGION}"
86 | sam package --template-file "${TMPDIR}/.aws-sam/build/macro/template.yaml" --output-template-file "${TMPDIR}/.aws-sam/build/macro/packaged.yaml" --s3-bucket "${BUCKET}" --region "${REGION}"
87 | sam publish --template "${TMPDIR}/.aws-sam/build/macro/packaged.yaml" --region "${REGION}"
88 | 
89 | sam build --debug --use-container --build-dir "${TMPDIR}/.aws-sam/build/application" --template-file "${TMPDIR}/application.yaml" --region "${REGION}"
90 | sam package --template-file "${TMPDIR}/.aws-sam/build/application/template.yaml" --output-template-file "${TMPDIR}/.aws-sam/build/application/packaged.yaml" --s3-bucket "${BUCKET}" --region "${REGION}"
91 | sam publish --template "${TMPDIR}/.aws-sam/build/application/packaged.yaml" --region "${REGION}"
92 | 
93 | sam build --debug --use-container --build-dir "${TMPDIR}/.aws-sam/build/template" --template-file "${TMPDIR}/template.yaml" --region "${REGION}"
94 | sam package --template-file "${TMPDIR}/.aws-sam/build/template/template.yaml" --output-template-file "${TMPDIR}/.aws-sam/build/template/packaged.yaml" --s3-bucket "${BUCKET}" --region "${REGION}"
95 | sam publish --template "${TMPDIR}/.aws-sam/build/template/packaged.yaml" --region "${REGION}"
96 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to the Elastic Serverless Forwarder
 2 | 
 3 | If you have a bugfix or new feature that you would like to contribute to
 4 | elastic-serverless-forwarder, please find or open an issue about it first. Talk about what you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change.
 5 | 
 6 | We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code.
 7 | 
 8 | ## Running Elastic Serverless Forwarder locally
 9 | 
10 | We don't provide yet a tool for running Elastic Serverless Forwarder locally. A good first contribution would be to add such support.
11 | 
12 | ## Code structure
13 | 
14 | The code in the repository is organised according to some conventions.
15 | The folders starting with a dot (`.`) are to be considered internal to Elastic workflow and you should not usually be concerned about them.
16 | 
17 | In `docs` folder there is the documentation specific to every serverless solution we support (at the moment only AWS Lambda).
18 | 
19 | The `tests` folder contains both unit and integration tests for the whole code base, structured mimicking the folders/pacakges structure of the main code base. An exception is the `scripts` folder where maintenance helper scripts (usually in `bash`) reside.
20 | 
21 | We identified so far three components of the project, on top of the serverless function handlers for every cloud solution supported (at the moment only AWS Lambda):
22 |     * `shippers`: the package related to outputs. Either you are sending data to Elasticsearch, Logstash, or anything else, your code must reside here.
23 |     * `storage`: the package related to inputs. Either you are reading data from S3, a bytes blob payload, or anything else, your code must reside here.
24 |     * `share`: the package for common shared utilities that are not related to the above domains and don't contain code related to specific application handling.
25 | 
26 | In the `handlers` package it resides the code with logic related to the specific serverless solutions, each of them in a specific subpackages (at the moment only AWS Lambda): everything related to a specific cloud serverless solution must reside there.
27 | 
28 | 
29 | ## Contributing Code Changes
30 | 
31 | The process for contributing to any of the Elastic repositories is similar.
32 | 
33 | 1. Please make sure you have signed the [Contributor License Agreement](http://www.elastic.co/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once.
34 | 
35 | 2. Install the required dependencies. We have three different dependencies sets respectively for the app, linting and tests. You can install them all together or separately, either in a virtualenv or not, according to your preferences. The `make` targets provided are the following:
36 |    * `all-requirements`     Install all requirements on the host
37 |    * `requirements`         Install app requirements on the host
38 |    * `requirements-lint`    Install all linting requirements on the host
39 |    * `requirements-tests`   Install tests requirements on the host
40 | 
41 | 3. Run the linter, license check and test suite to ensure your changes do not break existing code. The `make` targets provided are the following:
42 |    * `lint`                 Lint the project on the host
43 |    * `black`                Run black in the project on the host
44 |    * `isort`                Run isort in the project on the host
45 |    * `mypy`                 Run mypy in the project on the host
46 |    * `license`              Run license validation in the project
47 |    * `test`                 Run all tests on the host
48 |    * `integration-test`     Run integration tests on the host
49 |    * `unit-test`            Run unit tests on the host
50 |    * `coverage`             Run tests on the host with coverage
51 | 
52 | 4. A subset of the previous tasks can be run in docker (that's the method used in CI), these are the equivalent `make` targets provided:
53 |    * `docker-lint`          Lint the project on docker
54 |    * `docker-black`         Run black in the project on docker
55 |    * `docker-isort`         Run isort in the project on docker
56 |    * `docker-mypy`          Run mypy in the project on docker
57 |    * `docker-test`          Run tests on docker
58 |    * `docker-integration-test` Run integration tests on docker
59 |    * `docker-unit-test`     Run unit tests on docker
60 |    * `docker-coverage`      Run tests on docker with coverage
61 | 
62 | 5. Scripts for automated fix of linting and license are provided where available. They are the following:
63 |     * `./tests/scripts/black.sh fix`
64 |     * `./tests/scripts/isort.sh fix`
65 |     * `./tests/scripts/license_headers_check.sh fix`
66 | 
67 | 7. Rebase your changes.   Update your local repository with the most recent code from the main elastic-serverless-forwarder repository, and rebase your branch on top of the latest `main` elastic-serverless-forwarder branch.
68 | 
69 | 8. Submit a pull request. Push your local changes to your forked copy of the repository and submit a pull request. In the pull request, describe what your changes do and mention the number of the issue where discussion has taken place, eg “Closes #123″. Please add or modify tests related to your changes. We tend to reach 100% coverage for all the code outside the `handlers` folder.
70 | 
71 | Then sit back and wait. There will probably be a discussion about the pull
72 | request and, if any changes are needed, we would love to work with you to get your pull request merged into elastic-serverless-forwarder
73 | 


--------------------------------------------------------------------------------
/shippers/logstash.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import gzip
  6 | from typing import Any, Optional
  7 | 
  8 | from requests import Session
  9 | from requests.adapters import HTTPAdapter
 10 | from requests.exceptions import RequestException
 11 | from urllib3.util.retry import Retry
 12 | 
 13 | from share import json_dumper, normalise_event, shared_logger
 14 | from shippers.shipper import EventIdGeneratorCallable, ReplayHandlerCallable
 15 | 
 16 | _EVENT_SENT = "_EVENT_SENT"
 17 | _EVENT_BUFFERED = "_EVENT_BUFFERED"
 18 | 
 19 | _TIMEOUT = 10
 20 | _MAX_RETRIES = 4
 21 | _STATUS_FORCE_LIST = [429, 500, 502, 503, 504]
 22 | # A backoff factor to apply between attempts after the second try. urllib3 will sleep for:
 23 | # {backoff factor} * (2 ** ({number of total retries} - 1))
 24 | # seconds. If the backoff_factor is 1, then sleep() will sleep for [0s, 2s, 4s, …] between retries.
 25 | _BACKOFF_FACTOR = 1
 26 | 
 27 | 
 28 | class LogstashAdapter(HTTPAdapter):
 29 |     """
 30 |     An HTTP adapter specific for Logstash that encapsulates the retry/backoff parameters and allows to verify
 31 |     certificates by SSL fingerprint
 32 |     """
 33 | 
 34 |     def __init__(self, fingerprint: str, *args, **kwargs):  # type: ignore
 35 |         self._fingerprint = fingerprint
 36 |         retry_strategy = Retry(total=_MAX_RETRIES, backoff_factor=_BACKOFF_FACTOR, status_forcelist=_STATUS_FORCE_LIST)
 37 |         HTTPAdapter.__init__(self, max_retries=retry_strategy, *args, **kwargs)  # type: ignore
 38 | 
 39 |     def init_poolmanager(self, *args, **kwargs):  # type: ignore
 40 |         if self._fingerprint:
 41 |             kwargs["assert_fingerprint"] = self._fingerprint
 42 |         return super().init_poolmanager(*args, **kwargs)  # type: ignore
 43 | 
 44 | 
 45 | class LogstashShipper:
 46 |     """
 47 |     Logstash Shipper.
 48 |     This class implements concrete Logstash Shipper
 49 |     """
 50 | 
 51 |     def __init__(
 52 |         self,
 53 |         logstash_url: str = "",
 54 |         username: str = "",
 55 |         password: str = "",
 56 |         max_batch_size: int = 1,
 57 |         compression_level: int = 9,
 58 |         ssl_assert_fingerprint: str = "",
 59 |         tags: list[str] = [],
 60 |     ) -> None:
 61 |         if logstash_url:
 62 |             self._logstash_url = logstash_url
 63 |         else:
 64 |             raise ValueError("You must provide logstash_url")
 65 | 
 66 |         self._replay_handler: Optional[ReplayHandlerCallable] = None
 67 |         self._event_id_generator: Optional[EventIdGeneratorCallable] = None
 68 |         self._events_batch: list[dict[str, Any]] = []
 69 | 
 70 |         self._max_batch_size = max_batch_size
 71 | 
 72 |         self._tags = tags
 73 | 
 74 |         if 0 <= compression_level <= 9:
 75 |             self._compression_level = compression_level
 76 |         else:
 77 |             raise ValueError("compression_level must be an integer value between 0 and 9")
 78 | 
 79 |         self._replay_args: dict[str, Any] = {}
 80 | 
 81 |         self._session = self._get_session(self._logstash_url, username, password, ssl_assert_fingerprint)
 82 | 
 83 |     @staticmethod
 84 |     def _get_session(url: str, username: str, password: str, ssl_assert_fingerprint: str) -> Session:
 85 |         session = Session()
 86 | 
 87 |         if username:
 88 |             session.auth = (username, password)
 89 | 
 90 |         if ssl_assert_fingerprint:
 91 |             session.verify = False
 92 | 
 93 |         session.mount(url, LogstashAdapter(ssl_assert_fingerprint))
 94 | 
 95 |         return session
 96 | 
 97 |     def send(self, event: dict[str, Any]) -> str:
 98 |         if "_id" not in event and self._event_id_generator is not None:
 99 |             event["_id"] = self._event_id_generator(event)
100 | 
101 |         event["tags"] = ["forwarded"]
102 |         event["tags"] += self._tags
103 | 
104 |         event = normalise_event(event)
105 | 
106 |         # Let's move _id to @metadata._id for logstash
107 |         if "_id" in event:
108 |             event["@metadata"] = {"_id": event["_id"]}
109 |             del event["_id"]
110 | 
111 |         self._events_batch.append(event)
112 |         if len(self._events_batch) < self._max_batch_size:
113 |             return _EVENT_BUFFERED
114 | 
115 |         self._send()
116 | 
117 |         return _EVENT_SENT
118 | 
119 |     def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None:
120 |         self._event_id_generator = event_id_generator
121 | 
122 |     def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None:
123 |         self._replay_handler = replay_handler
124 | 
125 |     def flush(self) -> None:
126 |         if len(self._events_batch) > 0:
127 |             self._send()
128 | 
129 |         self._events_batch.clear()
130 | 
131 |         return
132 | 
133 |     def _send(self) -> None:
134 |         ndjson = "\n".join(json_dumper(event) for event in self._events_batch)
135 | 
136 |         try:
137 |             response = self._session.put(
138 |                 self._logstash_url,
139 |                 data=gzip.compress(ndjson.encode("utf-8"), self._compression_level),
140 |                 headers={"Content-Encoding": "gzip", "Content-Type": "application/x-ndjson"},
141 |                 timeout=_TIMEOUT,
142 |             )
143 | 
144 |             if response.status_code == 401:
145 |                 raise RequestException("Authentication error")
146 | 
147 |             self._events_batch.clear()
148 | 
149 |         except RequestException as e:
150 |             shared_logger.error(
151 |                 f"logstash shipper encountered an error while publishing events to logstash. Error: {str(e)}"
152 |             )
153 | 
154 |             if self._replay_handler is not None:
155 |                 for event in self._events_batch:
156 |                     # let's put back the _id field from @metadata._id
157 |                     if "@metadata" in event and "_id" in event["@metadata"]:
158 |                         event["_id"] = event["@metadata"]["_id"]
159 |                         del event["@metadata"]
160 | 
161 |                     self._replay_handler(self._logstash_url, self._replay_args, event)
162 | 


--------------------------------------------------------------------------------
/tests/shippers/test_composite.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | from typing import Any
  6 | from unittest import TestCase
  7 | 
  8 | import pytest
  9 | 
 10 | from share import IncludeExcludeFilter, IncludeExcludeRule
 11 | from shippers import (
 12 |     EVENT_IS_EMPTY,
 13 |     EVENT_IS_FILTERED,
 14 |     EVENT_IS_SENT,
 15 |     CompositeShipper,
 16 |     EventIdGeneratorCallable,
 17 |     ReplayHandlerCallable,
 18 | )
 19 | 
 20 | 
 21 | class DummyShipper:
 22 |     def send(self, event: dict[str, Any]) -> str:
 23 |         self._sent.append(event)
 24 |         return "dummy"
 25 | 
 26 |     def set_event_id_generator(self, event_id_generator: EventIdGeneratorCallable) -> None:
 27 |         self._event_id_generator = event_id_generator
 28 | 
 29 |     def set_replay_handler(self, replay_handler: ReplayHandlerCallable) -> None:
 30 |         self._replay_handler = replay_handler
 31 | 
 32 |     def flush(self) -> None:
 33 |         self._flushed = True
 34 | 
 35 |     def __init__(self, **kwargs: Any):
 36 |         self._sent: list[dict[str, Any]] = []
 37 |         self._flushed = False
 38 | 
 39 | 
 40 | @pytest.mark.unit
 41 | class TestCompositeShipper(TestCase):
 42 |     def test_add_shipper(self) -> None:
 43 |         dummy_shipper = DummyShipper()
 44 |         composite_shipper = CompositeShipper()
 45 |         composite_shipper.add_shipper(dummy_shipper)
 46 |         assert composite_shipper._shippers == [dummy_shipper]
 47 | 
 48 |     def test_add_include_exclude_filter(self) -> None:
 49 |         composite_shipper = CompositeShipper()
 50 |         include_exclude_filter = IncludeExcludeFilter()
 51 |         composite_shipper.add_include_exclude_filter(include_exclude_filter)
 52 |         assert composite_shipper._include_exclude_filter == include_exclude_filter
 53 | 
 54 |     def test_send(self) -> None:
 55 |         dummy_shipper = DummyShipper()
 56 |         composite_shipper = CompositeShipper()
 57 |         composite_shipper.add_shipper(dummy_shipper)
 58 |         assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"})
 59 |         assert dummy_shipper._sent == []
 60 | 
 61 |         assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}})
 62 |         assert dummy_shipper._sent == []
 63 | 
 64 |         assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""})
 65 |         assert dummy_shipper._sent == []
 66 | 
 67 |         assert EVENT_IS_SENT == composite_shipper.send({"message": "will pass"})
 68 |         assert dummy_shipper._sent == [{"message": "will pass"}]
 69 | 
 70 |         dummy_shipper._sent = []
 71 | 
 72 |         assert EVENT_IS_SENT == composite_shipper.send({"fields": {"message": "will pass"}})
 73 |         assert dummy_shipper._sent == [{"fields": {"message": "will pass"}}]
 74 | 
 75 |         dummy_shipper._sent = []
 76 | 
 77 |         include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="match")])
 78 |         composite_shipper.add_include_exclude_filter(include_exclude_filter)
 79 | 
 80 |         assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"})
 81 |         assert dummy_shipper._sent == []
 82 | 
 83 |         assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}})
 84 |         assert dummy_shipper._sent == []
 85 | 
 86 |         assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""})
 87 |         assert dummy_shipper._sent == []
 88 | 
 89 |         assert EVENT_IS_SENT == composite_shipper.send({"fields": {"message": "match"}})
 90 |         assert dummy_shipper._sent == [{"fields": {"message": "match"}}]
 91 | 
 92 |         dummy_shipper._sent = []
 93 | 
 94 |         include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="match")])
 95 |         composite_shipper.add_include_exclude_filter(include_exclude_filter)
 96 |         assert EVENT_IS_SENT == composite_shipper.send({"message": "match"})
 97 |         assert dummy_shipper._sent == [{"message": "match"}]
 98 | 
 99 |         dummy_shipper._sent = []
100 | 
101 |         assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"})
102 |         assert dummy_shipper._sent == []
103 | 
104 |         assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}})
105 |         assert dummy_shipper._sent == []
106 | 
107 |         assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""})
108 |         assert dummy_shipper._sent == []
109 | 
110 |         include_exclude_filter = IncludeExcludeFilter(include_patterns=[IncludeExcludeRule(pattern="not match")])
111 |         composite_shipper.add_include_exclude_filter(include_exclude_filter)
112 | 
113 |         assert EVENT_IS_EMPTY == composite_shipper.send({"miss": "message field"})
114 |         assert dummy_shipper._sent == []
115 | 
116 |         assert EVENT_IS_EMPTY == composite_shipper.send({"fields": {"message": ""}})
117 |         assert dummy_shipper._sent == []
118 | 
119 |         assert EVENT_IS_EMPTY == composite_shipper.send({"message": ""})
120 |         assert dummy_shipper._sent == []
121 | 
122 |         assert EVENT_IS_FILTERED == composite_shipper.send({"fields": {"message": "a message"}})
123 |         assert dummy_shipper._sent == []
124 | 
125 |         dummy_shipper._sent = []
126 | 
127 |         assert EVENT_IS_FILTERED == composite_shipper.send({"message": "a message"})
128 |         assert dummy_shipper._sent == []
129 | 
130 |     def test_set_event_id_generator(self) -> None:
131 |         dummy_shipper = DummyShipper()
132 |         composite_shipper = CompositeShipper()
133 |         composite_shipper.add_shipper(dummy_shipper)
134 | 
135 |         def event_id_generator(event: dict[str, Any]) -> str:
136 |             return ""
137 | 
138 |         composite_shipper.set_event_id_generator(event_id_generator=event_id_generator)
139 |         assert dummy_shipper._event_id_generator == event_id_generator
140 | 
141 |     def test_set_replay_handler(self) -> None:
142 |         dummy_shipper = DummyShipper()
143 |         composite_shipper = CompositeShipper()
144 |         composite_shipper.add_shipper(dummy_shipper)
145 | 
146 |         def replay_handler(output_type: str, output_args: dict[str, Any], payload: dict[str, Any]) -> None:
147 |             return
148 | 
149 |         composite_shipper.set_replay_handler(replay_handler=replay_handler)
150 |         assert dummy_shipper._replay_handler == replay_handler
151 | 
152 |     def test_flush(self) -> None:
153 |         dummy_shipper = DummyShipper()
154 |         composite_shipper = CompositeShipper()
155 |         composite_shipper.add_shipper(dummy_shipper)
156 |         composite_shipper.flush()
157 |         assert dummy_shipper._flushed is True
158 | 


--------------------------------------------------------------------------------
/share/expand_event_list_from_field.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | from copy import deepcopy
  6 | from typing import Any, Callable, Iterator, Optional, Union
  7 | 
  8 | from .json import json_dumper
  9 | from .logger import logger as shared_logger
 10 | 
 11 | # ExpandEventListFromFieldResolverCallable accepts an integration_scope and the field to expand events list from as
 12 | # arguments. It returns the resolved name of the field to expand the events list from.
 13 | ExpandEventListFromFieldResolverCallable = Callable[[str, str], str]
 14 | 
 15 | 
 16 | class ExpandEventListFromField:
 17 |     def __init__(
 18 |         self,
 19 |         field_to_expand_event_list_from: str,
 20 |         integration_scope: str,
 21 |         field_resolver: ExpandEventListFromFieldResolverCallable,
 22 |         root_fields_to_add_to_expanded_event: Optional[Union[str, list[str]]] = None,
 23 |         last_event_expanded_offset: Optional[int] = None,
 24 |     ):
 25 |         self._last_event_expanded_offset: Optional[int] = last_event_expanded_offset
 26 |         self._root_fields_to_add_to_expanded_event = root_fields_to_add_to_expanded_event
 27 |         self._field_to_expand_event_list_from: str = field_resolver(integration_scope, field_to_expand_event_list_from)
 28 | 
 29 |     def _expand_event_list_from_field(
 30 |         self, json_object: dict[str, Any], starting_offset: int, ending_offset: int
 31 |     ) -> Iterator[tuple[Any, int, Optional[int], bool, bool]]:
 32 |         if len(self._field_to_expand_event_list_from) == 0 or self._field_to_expand_event_list_from not in json_object:
 33 |             yield None, starting_offset, 0, True, False
 34 |         else:
 35 |             events_list: list[Any] = json_object[self._field_to_expand_event_list_from]
 36 |             # let's set to 1 if empty list to avoid division by zero in the line below,
 37 |             # for loop will be not executed anyway
 38 |             offset_skew = 0
 39 |             events_list_length = max(1, len(events_list))
 40 |             avg_event_length = (ending_offset - starting_offset) / events_list_length
 41 |             if self._last_event_expanded_offset is not None and len(events_list) > self._last_event_expanded_offset + 1:
 42 |                 offset_skew = self._last_event_expanded_offset + 1
 43 |                 events_list = events_list[offset_skew:]
 44 | 
 45 |             # Let's compute the root_fields_to_add_to_expanded_event only once per events to expand
 46 |             root_fields_to_add_to_expanded_event: dict[str, Any] = {}
 47 |             if self._root_fields_to_add_to_expanded_event == "all":
 48 |                 root_fields_to_add_to_expanded_event = deepcopy(json_object)
 49 |                 del root_fields_to_add_to_expanded_event[self._field_to_expand_event_list_from]
 50 |             # we want to add only a list of root fields
 51 |             elif isinstance(self._root_fields_to_add_to_expanded_event, list):
 52 |                 for root_field_to_add_to_expanded_event in self._root_fields_to_add_to_expanded_event:
 53 |                     if root_field_to_add_to_expanded_event in json_object:
 54 |                         root_fields_to_add_to_expanded_event[root_field_to_add_to_expanded_event] = json_object[
 55 |                             root_field_to_add_to_expanded_event
 56 |                         ]
 57 |                     else:
 58 |                         shared_logger.debug(
 59 |                             f"`{root_field_to_add_to_expanded_event}` field specified in "
 60 |                             f"`root_fields_to_add_to_expanded_event` parameter is not present at root level"
 61 |                             f" to expanded event not present at root level"
 62 |                         )
 63 | 
 64 |             for event_n, event in enumerate(events_list):
 65 |                 if self._root_fields_to_add_to_expanded_event:
 66 |                     # we can and want to add the root fields only in case the event is a not empty json object
 67 |                     if isinstance(event, dict) and len(event) > 0:
 68 |                         # we want to add all the root fields
 69 |                         event.update(root_fields_to_add_to_expanded_event)
 70 |                     else:
 71 |                         shared_logger.debug("root fields to be added on a non json object event")
 72 | 
 73 |                 event_n += offset_skew
 74 |                 yield event, int(
 75 |                     starting_offset + (event_n * avg_event_length)
 76 |                 ), event_n, event_n == events_list_length - 1, True
 77 | 
 78 |     def expand(
 79 |         self, log_event: bytes, json_object: Optional[dict[str, Any]], starting_offset: int, ending_offset: int
 80 |     ) -> Iterator[tuple[bytes, int, int, Optional[int]]]:
 81 |         if json_object is None:
 82 |             yield log_event, starting_offset, ending_offset, None
 83 |         else:
 84 |             # expanded_ending_offset is set to the starting_offset because if we want to set it to the beginning of the
 85 |             # json object in case of a message from the continuation queue. if we update it, if the payload is continued
 86 |             # we will fetch the content of the payload from the middle of the json object, failing to parse it
 87 |             expanded_ending_offset: int = starting_offset
 88 | 
 89 |             for (
 90 |                 expanded_event,
 91 |                 expanded_starting_offset,
 92 |                 expanded_event_n,
 93 |                 is_last_expanded_event,
 94 |                 event_was_expanded,
 95 |             ) in self._expand_event_list_from_field(json_object, starting_offset, ending_offset):
 96 |                 if event_was_expanded:
 97 |                     # empty values once json dumped might have a len() greater than 0, this will prevent
 98 |                     # them to be skipped later as empty value, so we yield as zero length bytes string
 99 |                     if not expanded_event:
100 |                         expanded_log_event = b""
101 |                     else:
102 |                         expanded_log_event = json_dumper(expanded_event).encode("utf-8")
103 | 
104 |                     if is_last_expanded_event:
105 |                         expanded_event_n = None
106 |                         # only when we reach the last expanded event we can move the ending offset
107 |                         expanded_ending_offset = ending_offset
108 |                 else:
109 |                     expanded_event_n = None
110 |                     expanded_log_event = log_event
111 |                     expanded_ending_offset = ending_offset
112 | 
113 |                 yield expanded_log_event, expanded_starting_offset, expanded_ending_offset, expanded_event_n
114 | 


--------------------------------------------------------------------------------
/share/secretsmanager.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import re
  6 | from typing import Any, Union
  7 | 
  8 | import boto3
  9 | from botocore.client import BaseClient as BotoBaseClient
 10 | from orjson import JSONDecodeError
 11 | 
 12 | from .json import json_parser
 13 | from .logger import logger as shared_logger
 14 | 
 15 | 
 16 | def _get_aws_sm_client(region_name: str) -> BotoBaseClient:
 17 |     """
 18 |     Getter for secrets manager client
 19 |     Extracted for mocking
 20 |     """
 21 | 
 22 |     return boto3.client("secretsmanager", region_name=region_name)
 23 | 
 24 | 
 25 | def aws_sm_expander(config_yaml: str) -> str:
 26 |     """
 27 |     Secrets Manager expander for config file
 28 |     It scans the file for the secrets manager arn pattern, checks for correct configuration,
 29 |     retrieves the values from the secret manager and replaces them in the config file.
 30 |     Exceptions will be raised for the following scenarios:
 31 |         - Not respecting the arn pattern
 32 |         - Input is for both plain text and json keys for the same secret manager name
 33 |         - The fetched value is empty
 34 |     """
 35 | 
 36 |     config_secret_entry_values: dict[str, str] = {}
 37 |     secret_arn_by_secret_name: dict[str, str] = {}
 38 |     secret_key_values_cache: dict[str, dict[str, Any]] = {}
 39 |     secret_consistency_len_check: dict[str, int] = {}
 40 | 
 41 |     re_pattern = r"arn:aws:secretsmanager:(?:[^:]+)?:(?:[^:]+)?:secret:(?:[^\"']+)?"
 42 |     found_secrets_entries = re.findall(re_pattern, config_yaml)
 43 | 
 44 |     for secret_arn in found_secrets_entries:
 45 |         splitted_secret_arn = secret_arn.split(":")
 46 | 
 47 |         if len(splitted_secret_arn) != 7 and len(splitted_secret_arn) != 8:
 48 |             raise SyntaxError("Invalid arn format: {}".format(secret_arn))
 49 | 
 50 |         if secret_arn not in config_secret_entry_values:
 51 |             config_secret_entry_values[secret_arn] = ""
 52 | 
 53 |         region = splitted_secret_arn[3]
 54 |         secrets_manager_name = splitted_secret_arn[6]
 55 | 
 56 |         if region == "":
 57 |             raise ValueError("Must be provided region in arn: {}".format(secret_arn))
 58 | 
 59 |         if secrets_manager_name == "":
 60 |             raise ValueError("Must be provided secrets manager name in arn: {}".format(secret_arn))
 61 | 
 62 |         if secrets_manager_name not in secret_consistency_len_check:
 63 |             secret_consistency_len_check[secrets_manager_name] = len(splitted_secret_arn)
 64 |         else:
 65 |             if secret_consistency_len_check[secrets_manager_name] != len(splitted_secret_arn):
 66 |                 raise ValueError(
 67 |                     "You cannot have both plain text and json key for the same secret: {}".format(secret_arn)
 68 |                 )
 69 | 
 70 |         if region not in secret_key_values_cache:
 71 |             secret_key_values_cache[region] = {}
 72 | 
 73 |         if secrets_manager_name not in secret_key_values_cache[region]:
 74 |             secret_key_values_cache[region][secrets_manager_name] = {}
 75 | 
 76 |         secret_arn_by_secret_name[secrets_manager_name] = ":".join(splitted_secret_arn[0:7])
 77 | 
 78 |     for region in secret_key_values_cache:
 79 |         for secrets_manager_name in secret_key_values_cache[region]:
 80 |             secret_arn = secret_arn_by_secret_name[secrets_manager_name]
 81 |             str_secrets = get_secret_values(secret_arn, region)
 82 |             parsed_secrets = parse_secrets_str(str_secrets, secret_arn)
 83 | 
 84 |             secret_key_values_cache[region][secrets_manager_name] = parsed_secrets
 85 | 
 86 |     for config_secret_entry in config_secret_entry_values:
 87 |         splitted_secret_arn = config_secret_entry.split(":")
 88 | 
 89 |         region = splitted_secret_arn[3]
 90 |         secrets_manager_name = splitted_secret_arn[6]
 91 | 
 92 |         if len(splitted_secret_arn) == 8:
 93 |             wanted_key = splitted_secret_arn[-1]
 94 |             if wanted_key == "":
 95 |                 raise ValueError(f"Error for secret {config_secret_entry}: key must not be empty")
 96 | 
 97 |             if not isinstance(secret_key_values_cache[region][secrets_manager_name], dict):
 98 |                 raise ValueError(f"Error for secret {config_secret_entry}: expected to be keys/values pair")
 99 | 
100 |             if wanted_key in secret_key_values_cache[region][secrets_manager_name]:
101 |                 fetched_secret_entry_value = secret_key_values_cache[region][secrets_manager_name][wanted_key]
102 |                 if fetched_secret_entry_value == "":
103 |                     raise ValueError(f"Error for secret {config_secret_entry}: must not be empty")
104 |                 config_secret_entry_values[config_secret_entry] = fetched_secret_entry_value
105 |             else:
106 |                 raise KeyError(f"Error for secret {config_secret_entry}: key not found")
107 |         else:
108 |             if secret_key_values_cache[region][secrets_manager_name] == "":
109 |                 raise ValueError(f"Error for secret {config_secret_entry}: must not be empty")
110 |             elif not isinstance(secret_key_values_cache[region][secrets_manager_name], str):
111 |                 raise ValueError(f"Error for secret {config_secret_entry}: expected to be a string")
112 | 
113 |             config_secret_entry_values[config_secret_entry] = secret_key_values_cache[region][secrets_manager_name]
114 | 
115 |         config_yaml = config_yaml.replace(config_secret_entry, config_secret_entry_values[config_secret_entry])
116 | 
117 |     return config_yaml
118 | 
119 | 
120 | def get_secret_values(secret_arn: str, region_name: str) -> str:
121 |     """
122 |     Calls the get_secret_value api from secrets manager, and returns the values.
123 |     If the secret is created in a binary format, it will be received as a byte string
124 |     on the "BinarySecret" key (boto3 does the base64 decoding internally).
125 |     Raises exceptions for ClientError errors.
126 |     """
127 | 
128 |     secrets: str = ""
129 |     client = _get_aws_sm_client(region_name)
130 | 
131 |     try:
132 |         get_secret_value_response = client.get_secret_value(SecretId=secret_arn)
133 |     except Exception as e:
134 |         raise e
135 |     else:
136 |         if "SecretString" in get_secret_value_response:
137 |             secrets = get_secret_value_response["SecretString"]
138 | 
139 |         else:
140 |             secrets = get_secret_value_response["SecretBinary"].decode("utf-8")
141 | 
142 |     return secrets
143 | 
144 | 
145 | def parse_secrets_str(secrets: str, secret_arn: str) -> Union[str, dict[str, Any]]:
146 |     """
147 |     Helper function to determine if the secrets from secrets manager are json or plain text.
148 |     Returns str or dict only.
149 |     """
150 | 
151 |     try:
152 |         parsed_secrets: dict[str, str] = json_parser(secrets)
153 |     except JSONDecodeError:
154 |         shared_logger.debug("parsed secrets as plaintext")
155 |         return secrets
156 |     else:
157 |         shared_logger.debug("parsed secrets as json")
158 |         return parsed_secrets
159 | 


--------------------------------------------------------------------------------
/handlers/aws/kinesis_trigger.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import datetime
  6 | from typing import Any, Iterator, Optional
  7 | 
  8 | from botocore.client import BaseClient as BotoBaseClient
  9 | 
 10 | from share import ExpandEventListFromField, ProtocolMultiline, shared_logger
 11 | from storage import ProtocolStorage, StorageFactory
 12 | 
 13 | from .utils import get_account_id_from_arn, get_kinesis_stream_name_type_and_region_from_arn
 14 | 
 15 | 
 16 | def _handle_kinesis_move(
 17 |     sqs_client: BotoBaseClient,
 18 |     sqs_destination_queue: str,
 19 |     kinesis_record: dict[str, Any],
 20 |     event_input_id: str,
 21 |     config_yaml: str,
 22 |     continuing_queue: bool = True,
 23 |     last_ending_offset: Optional[int] = None,
 24 |     last_event_expanded_offset: Optional[int] = None,
 25 | ) -> None:
 26 |     """
 27 |     Handler of the continuation/replay queue for kinesis data stream inputs.
 28 |     If a kinesis data stream records batch cannot be fully processed before the timeout of the lambda, the handler will
 29 |     be called for the continuation queue: it will send new sqs messages for the unprocessed records to the
 30 |     internal continuing sqs queue.
 31 |     If a sqs message has an eventSourceARN not present in the config.yaml ids, then the handler should be called,
 32 |     so it can get placed in the internal replay queue.
 33 | 
 34 |     :param continuing_queue: should be set to true if the sqs message is going to be placed in the continuing
 35 |     queue. Otherwise, we assume it will be placed in the replaying queue, and, in that case, it should be set to false.
 36 |     """
 37 | 
 38 |     sequence_number = kinesis_record["kinesis"]["sequenceNumber"]
 39 |     partition_key = kinesis_record["kinesis"]["partitionKey"]
 40 |     approximate_arrival_timestamp = kinesis_record["kinesis"]["approximateArrivalTimestamp"]
 41 |     stream_type, stream_name, _ = get_kinesis_stream_name_type_and_region_from_arn(event_input_id)
 42 | 
 43 |     message_attributes = {
 44 |         "config": {"StringValue": config_yaml, "DataType": "String"},
 45 |         "originalStreamType": {"StringValue": stream_type, "DataType": "String"},
 46 |         "originalStreamName": {"StringValue": stream_name, "DataType": "String"},
 47 |         "originalPartitionKey": {"StringValue": partition_key, "DataType": "String"},
 48 |         "originalSequenceNumber": {"StringValue": sequence_number, "DataType": "String"},
 49 |         "originalEventSourceARN": {"StringValue": event_input_id, "DataType": "String"},
 50 |         "originalApproximateArrivalTimestamp": {
 51 |             "StringValue": str(approximate_arrival_timestamp),
 52 |             "DataType": "Number",
 53 |         },
 54 |     }
 55 | 
 56 |     if last_ending_offset is not None:
 57 |         message_attributes["originalLastEndingOffset"] = {"StringValue": str(last_ending_offset), "DataType": "Number"}
 58 | 
 59 |     if last_event_expanded_offset is not None:
 60 |         message_attributes["originalLastEventExpandedOffset"] = {
 61 |             "StringValue": str(last_event_expanded_offset),
 62 |             "DataType": "Number",
 63 |         }
 64 | 
 65 |     kinesis_data: str = kinesis_record["kinesis"]["data"]
 66 | 
 67 |     sqs_client.send_message(
 68 |         QueueUrl=sqs_destination_queue,
 69 |         MessageBody=kinesis_data,
 70 |         MessageAttributes=message_attributes,
 71 |     )
 72 | 
 73 |     if continuing_queue:
 74 |         shared_logger.debug(
 75 |             "continuing",
 76 |             extra={
 77 |                 "sqs_continuing_queue": sqs_destination_queue,
 78 |                 "last_ending_offset": last_ending_offset,
 79 |                 "last_event_expanded_offset": last_event_expanded_offset,
 80 |                 "partition_key": partition_key,
 81 |                 "approximate_arrival_timestamp": approximate_arrival_timestamp,
 82 |                 "sequence_number": sequence_number,
 83 |             },
 84 |         )
 85 |     else:
 86 |         shared_logger.debug(
 87 |             "replaying",
 88 |             extra={
 89 |                 "sqs_replaying_queue": sqs_destination_queue,
 90 |                 "partition_key": partition_key,
 91 |                 "approximate_arrival_timestamp": approximate_arrival_timestamp,
 92 |                 "sequence_number": sequence_number,
 93 |             },
 94 |         )
 95 | 
 96 | 
 97 | def _handle_kinesis_record(
 98 |     event: dict[str, Any],
 99 |     input_id: str,
100 |     event_list_from_field_expander: ExpandEventListFromField,
101 |     json_content_type: Optional[str],
102 |     multiline_processor: Optional[ProtocolMultiline],
103 | ) -> Iterator[tuple[dict[str, Any], int, Optional[int], int]]:
104 |     """
105 |     Handler for kinesis data stream inputs.
106 |     It iterates through kinesis records in the kinesis trigger and process
107 |     the content of kinesis.data payload
108 |     """
109 |     account_id = get_account_id_from_arn(input_id)
110 |     for kinesis_record_n, kinesis_record in enumerate(event["Records"]):
111 |         storage: ProtocolStorage = StorageFactory.create(
112 |             storage_type="payload",
113 |             payload=kinesis_record["kinesis"]["data"],
114 |             json_content_type=json_content_type,
115 |             event_list_from_field_expander=event_list_from_field_expander,
116 |             multiline_processor=multiline_processor,
117 |         )
118 | 
119 |         stream_type, stream_name, aws_region = get_kinesis_stream_name_type_and_region_from_arn(
120 |             kinesis_record["eventSourceARN"]
121 |         )
122 | 
123 |         events = storage.get_by_lines(range_start=0)
124 | 
125 |         for log_event, starting_offset, ending_offset, event_expanded_offset in events:
126 |             assert isinstance(log_event, bytes)
127 | 
128 |             es_event: dict[str, Any] = {
129 |                 "@timestamp": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
130 |                 "fields": {
131 |                     "message": log_event.decode("utf-8"),
132 |                     "log": {
133 |                         "offset": starting_offset,
134 |                         "file": {
135 |                             "path": kinesis_record["eventSourceARN"],
136 |                         },
137 |                     },
138 |                     "aws": {
139 |                         "kinesis": {
140 |                             "type": stream_type,
141 |                             "name": stream_name,
142 |                             "partition_key": kinesis_record["kinesis"]["partitionKey"],
143 |                             "sequence_number": kinesis_record["kinesis"]["sequenceNumber"],
144 |                         }
145 |                     },
146 |                     "cloud": {
147 |                         "provider": "aws",
148 |                         "region": aws_region,
149 |                         "account": {"id": account_id},
150 |                     },
151 |                 },
152 |                 "meta": {
153 |                     "approximate_arrival_timestamp": int(
154 |                         float(kinesis_record["kinesis"]["approximateArrivalTimestamp"]) * 1000
155 |                     ),
156 |                 },
157 |             }
158 | 
159 |             yield es_event, ending_offset, event_expanded_offset, kinesis_record_n
160 | 


--------------------------------------------------------------------------------
/handlers/aws/cloudwatch_logs_trigger.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import datetime
  6 | from typing import Any, Iterator, Optional
  7 | 
  8 | from botocore.client import BaseClient as BotoBaseClient
  9 | 
 10 | from share import ExpandEventListFromField, ProtocolMultiline, json_parser, shared_logger
 11 | from storage import ProtocolStorage, StorageFactory
 12 | 
 13 | from .utils import GZIP_ENCODING, PAYLOAD_ENCODING_KEY, get_account_id_from_arn, gzip_base64_encoded
 14 | 
 15 | 
 16 | def _from_awslogs_data_to_event(awslogs_data: str) -> Any:
 17 |     """
 18 |     Returns cloudwatch logs event from base64 encoded and gzipped payload
 19 |     """
 20 |     storage: ProtocolStorage = StorageFactory.create(storage_type="payload", payload=awslogs_data)
 21 |     cloudwatch_logs_payload_plain = storage.get_as_string()
 22 |     return json_parser(cloudwatch_logs_payload_plain)
 23 | 
 24 | 
 25 | def _handle_cloudwatch_logs_move(
 26 |     sqs_client: BotoBaseClient,
 27 |     sqs_destination_queue: str,
 28 |     cloudwatch_logs_event: dict[str, Any],
 29 |     input_id: str,
 30 |     config_yaml: str,
 31 |     continuing_queue: bool = True,
 32 |     current_log_event: int = 0,
 33 |     last_ending_offset: Optional[int] = None,
 34 |     last_event_expanded_offset: Optional[int] = None,
 35 | ) -> None:
 36 |     """
 37 |     Handler of the continuation queue for cloudwatch logs inputs
 38 |     If a cloudwatch logs data payload cannot be fully processed before the
 39 |     timeout of the lambda this handler will be called: it will
 40 |     send new sqs messages for the unprocessed payload to the
 41 |     internal continuing sqs queue
 42 |     """
 43 | 
 44 |     log_group_name = cloudwatch_logs_event["logGroup"]
 45 |     log_stream_name = cloudwatch_logs_event["logStream"]
 46 |     logs_events = cloudwatch_logs_event["logEvents"][current_log_event:]
 47 | 
 48 |     for current_log_event, log_event in enumerate(logs_events):
 49 |         if current_log_event > 0:
 50 |             last_ending_offset = None
 51 | 
 52 |         message_attributes = {
 53 |             "config": {"StringValue": config_yaml, "DataType": "String"},
 54 |             "originalEventId": {"StringValue": log_event["id"], "DataType": "String"},
 55 |             "originalEventSourceARN": {"StringValue": input_id, "DataType": "String"},
 56 |             "originalLogGroup": {"StringValue": log_group_name, "DataType": "String"},
 57 |             "originalLogStream": {"StringValue": log_stream_name, "DataType": "String"},
 58 |             "originalEventTimestamp": {"StringValue": str(log_event["timestamp"]), "DataType": "Number"},
 59 |             PAYLOAD_ENCODING_KEY: {"StringValue": GZIP_ENCODING, "DataType": "String"},
 60 |         }
 61 | 
 62 |         if last_ending_offset is not None:
 63 |             message_attributes["originalLastEndingOffset"] = {
 64 |                 "StringValue": str(last_ending_offset),
 65 |                 "DataType": "Number",
 66 |             }
 67 | 
 68 |         if last_event_expanded_offset is not None:
 69 |             message_attributes["originalLastEventExpandedOffset"] = {
 70 |                 "StringValue": str(last_event_expanded_offset),
 71 |                 "DataType": "Number",
 72 |             }
 73 | 
 74 |         # forward compressed message to sqs queue
 75 |         sqs_client.send_message(
 76 |             QueueUrl=sqs_destination_queue,
 77 |             MessageBody=gzip_base64_encoded(log_event["message"]),
 78 |             MessageAttributes=message_attributes,
 79 |         )
 80 | 
 81 |         if continuing_queue:
 82 |             shared_logger.debug(
 83 |                 "continuing",
 84 |                 extra={
 85 |                     "sqs_continuing_queue": sqs_destination_queue,
 86 |                     "last_ending_offset": last_ending_offset,
 87 |                     "last_event_expanded_offset": last_event_expanded_offset,
 88 |                     "event_id": log_event["id"],
 89 |                     "event_timestamp": log_event["timestamp"],
 90 |                 },
 91 |             )
 92 |         else:
 93 |             shared_logger.debug(
 94 |                 "replaying",
 95 |                 extra={
 96 |                     "sqs_replaying_queue": sqs_destination_queue,
 97 |                     "event_id": log_event["id"],
 98 |                     "event_timestamp": log_event["timestamp"],
 99 |                 },
100 |             )
101 | 
102 | 
103 | def _handle_cloudwatch_logs_event(
104 |     event: dict[str, Any],
105 |     aws_region: str,
106 |     input_id: str,
107 |     event_list_from_field_expander: ExpandEventListFromField,
108 |     json_content_type: Optional[str],
109 |     multiline_processor: Optional[ProtocolMultiline],
110 | ) -> Iterator[tuple[dict[str, Any], int, Optional[int], int]]:
111 |     """
112 |     Handler for cloudwatch logs inputs.
113 |     It iterates through the logEvents in cloudwatch logs trigger payload and process
114 |     content of body payload in the log event.
115 |     If a log event cannot be fully processed before the
116 |     timeout of the lambda it will call the sqs continuing handler
117 |     """
118 | 
119 |     account_id = get_account_id_from_arn(input_id)
120 | 
121 |     log_group_name = event["logGroup"]
122 |     log_stream_name = event["logStream"]
123 | 
124 |     for cloudwatch_log_event_n, cloudwatch_log_event in enumerate(event["logEvents"]):
125 |         event_id = cloudwatch_log_event["id"]
126 |         event_timestamp = cloudwatch_log_event["timestamp"]
127 | 
128 |         storage_message: ProtocolStorage = StorageFactory.create(
129 |             storage_type="payload",
130 |             payload=cloudwatch_log_event["message"],
131 |             json_content_type=json_content_type,
132 |             event_list_from_field_expander=event_list_from_field_expander,
133 |             multiline_processor=multiline_processor,
134 |         )
135 | 
136 |         events = storage_message.get_by_lines(range_start=0)
137 | 
138 |         for log_event, starting_offset, ending_offset, event_expanded_offset in events:
139 |             assert isinstance(log_event, bytes)
140 | 
141 |             es_event: dict[str, Any] = {
142 |                 "@timestamp": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
143 |                 "fields": {
144 |                     "message": log_event.decode("utf-8"),
145 |                     "log": {
146 |                         "offset": starting_offset,
147 |                         "file": {
148 |                             "path": f"{log_group_name}/{log_stream_name}",
149 |                         },
150 |                     },
151 |                     "aws": {
152 |                         "cloudwatch": {
153 |                             "log_group": log_group_name,
154 |                             "log_stream": log_stream_name,
155 |                             "event_id": event_id,
156 |                         }
157 |                     },
158 |                     "cloud": {
159 |                         "provider": "aws",
160 |                         "region": aws_region,
161 |                         "account": {"id": account_id},
162 |                     },
163 |                 },
164 |                 "meta": {"event_timestamp": event_timestamp},
165 |             }
166 | 
167 |             yield es_event, ending_offset, event_expanded_offset, cloudwatch_log_event_n
168 | 


--------------------------------------------------------------------------------
/handlers/aws/s3_sqs_trigger.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import datetime
  6 | from typing import Any, Iterator, Optional, Union
  7 | from urllib.parse import unquote_plus
  8 | 
  9 | import elasticapm
 10 | from botocore.client import BaseClient as BotoBaseClient
 11 | 
 12 | from share import ExpandEventListFromField, ProtocolMultiline, json_dumper, json_parser, shared_logger
 13 | from storage import ProtocolStorage, StorageFactory
 14 | 
 15 | from .utils import (
 16 |     discover_integration_scope,
 17 |     expand_event_list_from_field_resolver,
 18 |     get_account_id_from_arn,
 19 |     get_bucket_name_from_arn,
 20 | )
 21 | 
 22 | 
 23 | def _handle_s3_sqs_move(
 24 |     sqs_client: BotoBaseClient,
 25 |     sqs_destination_queue: str,
 26 |     sqs_record: dict[str, Any],
 27 |     input_id: str,
 28 |     config_yaml: str,
 29 |     current_s3_record: int = 0,
 30 |     continuing_queue: bool = True,
 31 |     last_ending_offset: Optional[int] = None,
 32 |     last_event_expanded_offset: Optional[int] = None,
 33 | ) -> None:
 34 |     """
 35 |     Handler of the continuation/replay queue for s3-sqs inputs.
 36 |     If a sqs message cannot be fully processed before the timeout of the lambda, the handler will be called
 37 |     for the continuation queue: it will send new sqs messages for the unprocessed records to the
 38 |     internal continuing sqs queue.
 39 |     If a sqs message has an eventSourceARN not present in the config.yaml ids, then the handler should be called,
 40 |     so it can get placed in the internal replay queue.
 41 | 
 42 |     :param continuing_queue: should be set to true if the sqs message is going to be placed in the continuing
 43 |     queue. Otherwise, we assume it will be placed in the replaying queue, and, in that case, it should be set to false.
 44 |     """
 45 | 
 46 |     body = json_parser(sqs_record["body"])
 47 |     body["Records"] = body["Records"][current_s3_record:]
 48 |     if last_ending_offset is not None:
 49 |         body["Records"][0]["last_ending_offset"] = last_ending_offset
 50 | 
 51 |     if last_event_expanded_offset is not None:
 52 |         body["Records"][0]["last_event_expanded_offset"] = last_event_expanded_offset
 53 |     elif "last_event_expanded_offset" in body["Records"][0]:
 54 |         del body["Records"][0]["last_event_expanded_offset"]
 55 | 
 56 |     sqs_record["body"] = json_dumper(body)
 57 | 
 58 |     sqs_client.send_message(
 59 |         QueueUrl=sqs_destination_queue,
 60 |         MessageBody=sqs_record["body"],
 61 |         MessageAttributes={
 62 |             "config": {"StringValue": config_yaml, "DataType": "String"},
 63 |             "originalEventSourceARN": {"StringValue": input_id, "DataType": "String"},
 64 |         },
 65 |     )
 66 | 
 67 |     if continuing_queue:
 68 |         shared_logger.debug(
 69 |             "continuing",
 70 |             extra={
 71 |                 "sqs_continuing_queue": sqs_destination_queue,
 72 |                 "last_ending_offset": last_ending_offset,
 73 |                 "last_event_expanded_offset": last_event_expanded_offset,
 74 |                 "current_s3_record": current_s3_record,
 75 |             },
 76 |         )
 77 |     else:
 78 |         shared_logger.debug(
 79 |             "replaying",
 80 |             extra={
 81 |                 "sqs_replaying_queue": sqs_destination_queue,
 82 |                 "input_id": input_id,
 83 |                 "message_id": sqs_record["messageId"],
 84 |             },
 85 |         )
 86 | 
 87 | 
 88 | def _handle_s3_sqs_event(
 89 |     sqs_record_body: dict[str, Any],
 90 |     input_id: str,
 91 |     field_to_expand_event_list_from: str,
 92 |     root_fields_to_add_to_expanded_event: Optional[Union[str, list[str]]],
 93 |     json_content_type: Optional[str],
 94 |     multiline_processor: Optional[ProtocolMultiline],
 95 | ) -> Iterator[tuple[dict[str, Any], int, Optional[int], int]]:
 96 |     """
 97 |     Handler for s3-sqs input.
 98 |     It takes an sqs record in the sqs trigger and process
 99 |     corresponding object in S3 buckets sending to the defined outputs.
100 |     """
101 | 
102 |     account_id = get_account_id_from_arn(input_id)
103 | 
104 |     for s3_record_n, s3_record in enumerate(sqs_record_body["Records"]):
105 |         aws_region = s3_record["awsRegion"]
106 |         bucket_arn = unquote_plus(s3_record["s3"]["bucket"]["arn"], "utf-8")
107 |         object_key = unquote_plus(s3_record["s3"]["object"]["key"], "utf-8")
108 |         event_time = int(datetime.datetime.strptime(s3_record["eventTime"], "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() * 1000)
109 |         last_ending_offset = s3_record["last_ending_offset"] if "last_ending_offset" in s3_record else 0
110 |         last_event_expanded_offset = (
111 |             s3_record["last_event_expanded_offset"] if "last_event_expanded_offset" in s3_record else None
112 |         )
113 | 
114 |         integration_scope = discover_integration_scope(object_key)
115 | 
116 |         event_list_from_field_expander = ExpandEventListFromField(
117 |             field_to_expand_event_list_from,
118 |             integration_scope,
119 |             expand_event_list_from_field_resolver,
120 |             root_fields_to_add_to_expanded_event,
121 |             last_event_expanded_offset,
122 |         )
123 | 
124 |         assert len(bucket_arn) > 0
125 |         assert len(object_key) > 0
126 | 
127 |         bucket_name: str = get_bucket_name_from_arn(bucket_arn)
128 |         storage: ProtocolStorage = StorageFactory.create(
129 |             storage_type="s3",
130 |             bucket_name=bucket_name,
131 |             object_key=object_key,
132 |             json_content_type=json_content_type,
133 |             event_list_from_field_expander=event_list_from_field_expander,
134 |             multiline_processor=multiline_processor,
135 |         )
136 | 
137 |         span = elasticapm.capture_span(f"WAIT FOR OFFSET STARTING AT {last_ending_offset}")
138 |         span.__enter__()
139 |         events = storage.get_by_lines(range_start=last_ending_offset)
140 | 
141 |         for log_event, starting_offset, ending_offset, event_expanded_offset in events:
142 |             assert isinstance(log_event, bytes)
143 | 
144 |             if span:
145 |                 span.__exit__(None, None, None)
146 |                 span = None  # type: ignore
147 | 
148 |             es_event: dict[str, Any] = {
149 |                 "@timestamp": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
150 |                 "fields": {
151 |                     "message": log_event.decode("utf-8"),
152 |                     "log": {
153 |                         "offset": starting_offset,
154 |                         "file": {
155 |                             "path": "https://{0}.s3.{1}.amazonaws.com/{2}".format(bucket_name, aws_region, object_key),
156 |                         },
157 |                     },
158 |                     "aws": {
159 |                         "s3": {
160 |                             "bucket": {"name": bucket_name, "arn": bucket_arn},
161 |                             "object": {"key": object_key},
162 |                         }
163 |                     },
164 |                     "cloud": {
165 |                         "provider": "aws",
166 |                         "region": aws_region,
167 |                         "account": {"id": account_id},
168 |                     },
169 |                 },
170 |                 "meta": {"event_time": event_time, "integration_scope": integration_scope},
171 |             }
172 | 
173 |             yield es_event, ending_offset, event_expanded_offset, s3_record_n
174 | 


--------------------------------------------------------------------------------
/tests/shippers/test_logstash.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | import datetime
  5 | import gzip
  6 | from copy import deepcopy
  7 | from typing import Any
  8 | from unittest import TestCase
  9 | from unittest.mock import MagicMock
 10 | 
 11 | import pytest
 12 | import responses
 13 | import ujson
 14 | from requests import PreparedRequest
 15 | 
 16 | from shippers.logstash import _EVENT_SENT, _MAX_RETRIES, LogstashShipper
 17 | 
 18 | _now = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
 19 | 
 20 | _dummy_event: dict[str, Any] = {
 21 |     "@timestamp": _now,
 22 |     "fields": {
 23 |         "message": "A dummy message",
 24 |         "log": {
 25 |             "offset": 10,
 26 |             "file": {
 27 |                 "path": "https://bucket_name.s3.aws-region.amazonaws.com/file.key",
 28 |             },
 29 |         },
 30 |         "aws": {
 31 |             "s3": {
 32 |                 "bucket": {
 33 |                     "name": "arn:aws:s3:::bucket_name",
 34 |                     "arn": "bucket_name",
 35 |                 },
 36 |                 "object": {
 37 |                     "key": "file.key",
 38 |                 },
 39 |             },
 40 |         },
 41 |         "cloud": {
 42 |             "provider": "aws",
 43 |             "region": "aws-region",
 44 |         },
 45 |     },
 46 |     "meta": {},
 47 | }
 48 | 
 49 | _dummy_expected_event: dict[str, Any] = {
 50 |     "@timestamp": _now,
 51 |     "_id": "_id",
 52 |     "message": "A dummy message",
 53 |     "log": {
 54 |         "offset": 10,
 55 |         "file": {
 56 |             "path": "https://bucket_name.s3.aws-region.amazonaws.com/file.key",
 57 |         },
 58 |     },
 59 |     "aws": {
 60 |         "s3": {
 61 |             "bucket": {
 62 |                 "name": "arn:aws:s3:::bucket_name",
 63 |                 "arn": "bucket_name",
 64 |             },
 65 |             "object": {
 66 |                 "key": "file.key",
 67 |             },
 68 |         },
 69 |     },
 70 |     "cloud": {
 71 |         "provider": "aws",
 72 |         "region": "aws-region",
 73 |     },
 74 |     "tags": ["forwarded"],
 75 | }
 76 | 
 77 | 
 78 | def _dummy_replay_handler(output_type: str, output_args: dict[str, Any], event_payload: dict[str, Any]) -> None:
 79 |     pass
 80 | 
 81 | 
 82 | @pytest.mark.unit
 83 | class TestLogstashShipper(TestCase):
 84 |     @responses.activate
 85 |     def test_send_successful(self) -> None:
 86 |         def request_callback(request: PreparedRequest) -> tuple[int, dict[Any, Any], str]:
 87 |             _payload = []
 88 |             assert request.headers["Content-Encoding"] == "gzip"
 89 |             assert request.headers["Content-Type"] == "application/x-ndjson"
 90 |             assert request.body is not None
 91 |             assert isinstance(request.body, bytes)
 92 | 
 93 |             events = gzip.decompress(request.body).decode("utf-8").split("\n")
 94 |             for event in events:
 95 |                 _payload.append(ujson.loads(event))
 96 | 
 97 |             expected_event = deepcopy(_dummy_expected_event)
 98 |             expected_event["@metadata"] = {"_id": "_id"}
 99 |             del expected_event["_id"]
100 | 
101 |             assert _payload == [expected_event, expected_event]
102 | 
103 |             return 200, {}, "okay"
104 | 
105 |         def event_id_generator(event: dict[str, Any]) -> str:
106 |             return "_id"
107 | 
108 |         url = "http://logstash_url"
109 |         event = deepcopy(_dummy_event)
110 |         responses.add_callback(responses.PUT, url, callback=request_callback)
111 |         logstash_shipper = LogstashShipper(logstash_url=url, max_batch_size=2)
112 |         logstash_shipper.set_event_id_generator(event_id_generator)
113 |         logstash_shipper.send(event)
114 |         logstash_shipper.send(event)
115 | 
116 |     @responses.activate
117 |     def test_send_failures(self) -> None:
118 |         url = "http://logstash_url"
119 |         with self.subTest("Does not exceed max_retries"):
120 |             responses.put(url=url, status=429)
121 |             responses.put(url=url, status=429)
122 |             responses.put(url=url, status=429)
123 |             responses.put(url=url, status=200)
124 |             event = deepcopy(_dummy_event)
125 |             logstash_shipper = LogstashShipper(logstash_url=url)
126 |             assert logstash_shipper.send(event) == _EVENT_SENT
127 |         with self.subTest("Exceeds max retries, replay handler set"):
128 |             for i in range(_MAX_RETRIES):
129 |                 responses.put(url=url, status=429)
130 |             responses.put(url=url, status=429)
131 |             logstash_shipper = LogstashShipper(logstash_url=url)
132 |             replay_handler = MagicMock(side_effect=_dummy_replay_handler)
133 |             logstash_shipper.set_replay_handler(replay_handler)
134 |             event = deepcopy(_dummy_event)
135 |             assert logstash_shipper.send(event) == _EVENT_SENT
136 |             replay_handler.assert_called_once_with(url, {}, event)
137 |         with self.subTest("Exceeds max retries, replay handler not set"):
138 |             for i in range(_MAX_RETRIES):
139 |                 responses.put(url=url, status=429)
140 |             responses.put(url=url, status=429)
141 |             replay_handler = MagicMock(side_effect=_dummy_replay_handler)
142 |             logstash_shipper = LogstashShipper(logstash_url=url)
143 |             event = deepcopy(_dummy_event)
144 |             assert logstash_shipper.send(event) == _EVENT_SENT
145 |             replay_handler.assert_not_called()
146 |         with self.subTest("Authentication error, request is not retried"):
147 |             responses.put(url=url, status=401)
148 |             logstash_shipper = LogstashShipper(logstash_url=url)
149 |             replay_handler = MagicMock(side_effect=_dummy_replay_handler)
150 |             logstash_shipper.set_replay_handler(replay_handler)
151 |             event = deepcopy(_dummy_event)
152 |             assert logstash_shipper.send(event) == _EVENT_SENT
153 |             replay_handler.assert_called_once_with(url, {}, event)
154 | 
155 |     @responses.activate
156 |     def test_flush(self) -> None:
157 |         url = "http://logstash_url"
158 |         responses.put(url=url, status=200)
159 |         responses.put(url=url, status=200)
160 |         logstash_shipper = LogstashShipper(logstash_url=url, max_batch_size=2)
161 |         event = deepcopy(_dummy_event)
162 |         logstash_shipper.send(event)
163 |         assert logstash_shipper._events_batch == [event]
164 |         logstash_shipper.flush()
165 |         assert logstash_shipper._events_batch == []
166 | 
167 |     @responses.activate
168 |     def test_buffer_handling_at_capacity(self) -> None:
169 |         url = "http://logstash_url"
170 |         responses.put(url=url, status=200)
171 |         responses.put(url=url, status=200)
172 |         responses.put(url=url, status=200)
173 | 
174 |         logstash_shipper = LogstashShipper(logstash_url=url, max_batch_size=2)
175 |         event = deepcopy(_dummy_event)
176 | 
177 |         logstash_shipper.send(event)  # this should not trigger the send
178 |         assert logstash_shipper._events_batch == [event]
179 |         logstash_shipper.send(event)  # this should trigger the send and empty the buffer
180 |         assert logstash_shipper._events_batch == []
181 |         logstash_shipper.send(event)  # this should not trigger the send
182 |         assert logstash_shipper._events_batch == [event]
183 |         logstash_shipper.flush()  # this should trigger the send and empty the buffer
184 |         assert logstash_shipper._events_batch == []
185 | 


--------------------------------------------------------------------------------
/tests/testcontainers/es.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import ssl
  8 | import time
  9 | from typing import Any
 10 | 
 11 | from elasticsearch import Elasticsearch
 12 | from OpenSSL import crypto as OpenSSLCrypto
 13 | from testcontainers.core.container import DockerContainer
 14 | from testcontainers.core.waiting_utils import wait_container_is_ready
 15 | 
 16 | DEFAULT_USERNAME = "elastic"
 17 | DEFAULT_PASSWORD = "password"
 18 | 
 19 | 
 20 | class ElasticsearchContainer(DockerContainer):  # type: ignore
 21 |     """
 22 |     Elasticsearch container.
 23 | 
 24 |     Example
 25 |     -------
 26 |     ::
 27 | 
 28 |         with ElasticsearchContainer() as esc:
 29 |             # NOTE: container will terminate once out of this with statement
 30 |             url = esc.get_url()
 31 |     """
 32 | 
 33 |     _DEFAULT_IMAGE = "docker.elastic.co/elasticsearch/elasticsearch"
 34 |     _DEFAULT_VERSION = "7.17.20"
 35 |     _DEFAULT_PORT = 9200
 36 |     _DEFAULT_USERNAME = DEFAULT_USERNAME
 37 |     _DEFAULT_PASSWORD = DEFAULT_PASSWORD
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         image: str = _DEFAULT_IMAGE,
 42 |         version: str = _DEFAULT_VERSION,
 43 |         port: int = _DEFAULT_PORT,
 44 |         username: str = _DEFAULT_USERNAME,
 45 |         password: str = _DEFAULT_PASSWORD,
 46 |     ):
 47 |         image = f"{image}:{version}"
 48 |         super(ElasticsearchContainer, self).__init__(image=image, entrypoint="sleep")
 49 |         self.with_command("infinity")
 50 | 
 51 |         self.port = port
 52 |         self.host = ""
 53 |         self.exposed_port = 0
 54 |         self.ssl_assert_fingerprint = ""
 55 | 
 56 |         self.elastic_user: str = username
 57 |         self.elastic_password: str = password
 58 | 
 59 |         self.with_exposed_ports(self.port)
 60 | 
 61 |         self._pipelines_ids: set[str] = set()
 62 |         self._index_indices: set[str] = set()
 63 | 
 64 |     def _configure(self) -> None:
 65 |         """
 66 |         Values set here will override any value set by calling <instance>.with_env(...)
 67 |         after initializing this class before <instance>.start()
 68 |         """
 69 | 
 70 |         exit_code, _ = self.get_wrapped_container().exec_run(
 71 |             cmd="elasticsearch-certutil cert --silent --name localhost --dns localhost --keep-ca-key "
 72 |             "--out /usr/share/elasticsearch/elasticsearch-ssl-http.zip --self-signed --ca-pass '' --pass ''"
 73 |         )
 74 |         assert exit_code == 0
 75 | 
 76 |         exit_code, _ = self.get_wrapped_container().exec_run(
 77 |             cmd="unzip /usr/share/elasticsearch/elasticsearch-ssl-http.zip -d /usr/share/elasticsearch/config/certs/"
 78 |         )
 79 | 
 80 |         assert exit_code == 0
 81 | 
 82 |         self.get_wrapped_container().exec_run(
 83 |             cmd="/bin/tini -- /usr/local/bin/docker-entrypoint.sh",
 84 |             detach=True,
 85 |             environment={
 86 |                 "ES_JAVA_OPTS": "-Xms1g -Xmx1g",
 87 |                 "ELASTIC_PASSWORD": self.elastic_password,
 88 |                 "xpack.security.enabled": "true",
 89 |                 "discovery.type": "single-node",
 90 |                 "network.bind_host": "0.0.0.0",
 91 |                 "network.publish_host": "0.0.0.0",
 92 |                 "logger.org.elasticsearch": "DEBUG",
 93 |                 "xpack.security.http.ssl.enabled": "true",
 94 |                 "xpack.security.http.ssl.keystore.path": "/usr/share/elasticsearch/config/certs/localhost/"
 95 |                 "localhost.p12",
 96 |             },
 97 |         )
 98 | 
 99 |     def get_url(self) -> str:
100 |         return f"https://{self.host}:{self.exposed_port}"
101 | 
102 |     @wait_container_is_ready()  # type: ignore
103 |     def _connect(self) -> None:
104 |         self.host = self.get_container_host_ip()
105 |         self.exposed_port = int(self.get_exposed_port(self.port))
106 | 
107 |         while True:
108 |             try:
109 |                 pem_server_certificate: str = ssl.get_server_certificate((self.host, self.exposed_port))
110 |                 openssl_certificate = OpenSSLCrypto.load_certificate(
111 |                     OpenSSLCrypto.FILETYPE_PEM, pem_server_certificate.encode("utf-8")
112 |                 )
113 |             except Exception:
114 |                 time.sleep(1)
115 |             else:
116 |                 self.ssl_assert_fingerprint = str(openssl_certificate.digest("sha256").decode())
117 |                 break
118 | 
119 |         assert len(self.ssl_assert_fingerprint) > 0
120 | 
121 |         self.es_client = Elasticsearch(
122 |             hosts=[f"{self.host}:{self.exposed_port}"],
123 |             scheme="https",
124 |             http_auth=(self.elastic_user, self.elastic_password),
125 |             ssl_assert_fingerprint=self.ssl_assert_fingerprint,
126 |             verify_certs=False,
127 |             timeout=30,
128 |             max_retries=10,
129 |             retry_on_timeout=True,
130 |             raise_on_error=False,
131 |             raise_on_exception=False,
132 |         )
133 | 
134 |         while not self.es_client.ping():
135 |             time.sleep(1)
136 | 
137 |         while True:
138 |             cluster_health = self.es_client.cluster.health(wait_for_status="green")
139 |             if "status" in cluster_health and cluster_health["status"] == "green":
140 |                 break
141 | 
142 |             time.sleep(1)
143 | 
144 |     def reset(self) -> None:
145 |         for index in self._index_indices:
146 |             self.es_client.indices.delete_data_stream(name=index)
147 | 
148 |         if self.es_client.indices.exists(index="logs-stash.elasticsearch-output"):
149 |             self.es_client.indices.delete_data_stream(name="logs-stash.elasticsearch-output")
150 | 
151 |         self._index_indices = set()
152 | 
153 |         for pipeline_id in self._pipelines_ids:
154 |             self.es_client.ingest.delete_pipeline(id=pipeline_id)
155 | 
156 |         self._pipelines_ids = set()
157 | 
158 |     def start(self) -> ElasticsearchContainer:
159 |         super().start()
160 |         self._configure()
161 |         self._connect()
162 |         return self
163 | 
164 |     def count(self, **kwargs: Any) -> dict[str, Any]:
165 |         if "index" in kwargs and ("ignore_unavailable" not in kwargs or kwargs["ignore_unavailable"] is not True):
166 |             self._index_indices.add(kwargs["index"])
167 | 
168 |         return self.es_client.count(**kwargs)
169 | 
170 |     def refresh(self, **kwargs: Any) -> dict[str, Any]:
171 |         if "index" in kwargs and ("ignore_unavailable" not in kwargs or kwargs["ignore_unavailable"] is not True):
172 |             self._index_indices.add(kwargs["index"])
173 | 
174 |         return self.es_client.indices.refresh(**kwargs)
175 | 
176 |     def put_pipeline(self, **kwargs: Any) -> dict[str, Any]:
177 |         if "id" in kwargs:
178 |             self._pipelines_ids.add(kwargs["id"])
179 | 
180 |         return self.es_client.ingest.put_pipeline(**kwargs)
181 | 
182 |     def delete_by_query(self, **kwargs: Any) -> dict[str, Any]:
183 |         if "index" in kwargs:
184 |             self._index_indices.add(kwargs["index"])
185 | 
186 |         return self.es_client.delete_by_query(**kwargs)
187 | 
188 |     def put_settings(self, **kwargs: Any) -> dict[str, Any]:
189 |         if "index" in kwargs:
190 |             self._index_indices.add(kwargs["index"])
191 | 
192 |         return self.es_client.indices.put_settings(**kwargs)
193 | 
194 |     def exists(self, **kwargs: Any) -> bool:
195 |         exists = self.es_client.indices.exists(**kwargs)
196 |         if exists and "index" in kwargs:
197 |             self._index_indices.add(kwargs["index"])
198 | 
199 |         return exists
200 | 
201 |     def search(self, **kwargs: Any) -> dict[str, Any]:
202 |         if "index" in kwargs:
203 |             self._index_indices.add(kwargs["index"])
204 | 
205 |         return self.es_client.search(**kwargs)
206 | 
207 |     def index(self, **kwargs: Any) -> dict[str, Any]:
208 |         if "index" in kwargs:
209 |             self._index_indices.add(kwargs["index"])
210 | 
211 |         return self.es_client.index(**kwargs)
212 | 
213 |     def create_data_stream(self, **kwargs: Any) -> dict[str, Any]:
214 |         if "name" in kwargs:
215 |             self._index_indices.add(kwargs["name"])
216 | 
217 |         return self.es_client.indices.create_data_stream(**kwargs)
218 | 


--------------------------------------------------------------------------------
/tests/shippers/test_factory.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import re
  6 | from unittest import TestCase
  7 | 
  8 | from share import ElasticsearchOutput, LogstashOutput, Output
  9 | from shippers import ElasticsearchShipper, LogstashShipper, ProtocolShipper, ShipperFactory
 10 | 
 11 | 
 12 | class TestShipperFactory(TestCase):
 13 |     def test_create(self) -> None:
 14 |         with self.subTest("create elasticsearch shipper success elasticsearch_url and http auth"):
 15 |             shipper: ProtocolShipper = ShipperFactory.create(
 16 |                 output_type="elasticsearch",
 17 |                 elasticsearch_url="elasticsearch_url",
 18 |                 username="username",
 19 |                 password="password",
 20 |                 es_datastream_name="es_datastream_name",
 21 |             )
 22 | 
 23 |             assert isinstance(shipper, ElasticsearchShipper)
 24 | 
 25 |         with self.subTest("create elasticsearch shipper success elasticsearch_url and api key"):
 26 |             shipper = ShipperFactory.create(
 27 |                 output_type="elasticsearch",
 28 |                 elasticsearch_url="elasticsearch_url",
 29 |                 api_key="api_key",
 30 |                 es_datastream_name="es_datastream_name",
 31 |             )
 32 | 
 33 |             assert isinstance(shipper, ElasticsearchShipper)
 34 | 
 35 |         with self.subTest("create elasticsearch shipper success cloud id and http auth"):
 36 |             shipper = ShipperFactory.create(
 37 |                 output_type="elasticsearch",
 38 |                 cloud_id="cloud_id:bG9jYWxob3N0OjkyMDAkMA==",
 39 |                 username="username",
 40 |                 password="password",
 41 |                 es_datastream_name="es_datastream_name",
 42 |             )
 43 | 
 44 |             assert isinstance(shipper, ElasticsearchShipper)
 45 | 
 46 |         with self.subTest("create elasticsearch shipper success cloud id and api key"):
 47 |             shipper = ShipperFactory.create(
 48 |                 output_type="elasticsearch",
 49 |                 cloud_id="cloud_id:bG9jYWxob3N0OjkyMDAkMA==",
 50 |                 api_key="api_key",
 51 |                 es_datastream_name="es_datastream_name",
 52 |             )
 53 | 
 54 |             assert isinstance(shipper, ElasticsearchShipper)
 55 | 
 56 |         with self.subTest("create logstash shipper success with only logstash_url"):
 57 |             shipper = ShipperFactory.create(
 58 |                 output_type="logstash",
 59 |                 logstash_url="http://myhost:8080",
 60 |             )
 61 | 
 62 |             assert isinstance(shipper, LogstashShipper)
 63 | 
 64 |         with self.subTest("create logstash shipper success with logstash_url, batch size and compression level"):
 65 |             shipper = ShipperFactory.create(
 66 |                 output_type="logstash",
 67 |                 logstash_url="http://myhost:8080",
 68 |                 max_batch_size=50,
 69 |                 compression_level=9,
 70 |             )
 71 | 
 72 |             assert isinstance(shipper, LogstashShipper)
 73 |         with self.subTest("create elasticsearch shipper no kwargs error"):
 74 |             with self.assertRaisesRegex(ValueError, "You must provide one between elasticsearch_url or cloud_id"):
 75 |                 ShipperFactory.create(output_type="elasticsearch")
 76 | 
 77 |         with self.subTest("create logstash shipper no kwargs error"):
 78 |             with self.assertRaisesRegex(ValueError, "You must provide logstash_url"):
 79 |                 ShipperFactory.create(output_type="logstash")
 80 | 
 81 |         with self.subTest("create elasticsearch shipper empty elasticsearch_url and no cloud_id"):
 82 |             with self.assertRaisesRegex(ValueError, "You must provide one between elasticsearch_url or cloud_id"):
 83 |                 ShipperFactory.create(output_type="elasticsearch", elasticsearch_url="")
 84 | 
 85 |         with self.subTest("create elasticsearch shipper empty cloud_id and no elasticsearch_url"):
 86 |             with self.assertRaisesRegex(ValueError, "You must provide one between elasticsearch_url or cloud_id"):
 87 |                 ShipperFactory.create(output_type="elasticsearch", cloud_id="")
 88 | 
 89 |         with self.subTest("create elasticsearch shipper empty username and no api_key"):
 90 |             with self.assertRaisesRegex(ValueError, "You must provide one between username and password or api_key"):
 91 |                 ShipperFactory.create(output_type="elasticsearch", elasticsearch_url="elasticsearch_url", username="")
 92 | 
 93 |         with self.subTest("create elasticsearch shipper empty api_key and no username"):
 94 |             with self.assertRaisesRegex(ValueError, "You must provide one between username and password or api_key"):
 95 |                 ShipperFactory.create(output_type="elasticsearch", elasticsearch_url="elasticsearch_url", api_key="")
 96 | 
 97 |         with self.subTest("create logstash shipper compression level lower than 0"):
 98 |             with self.assertRaisesRegex(ValueError, "compression_level must be an integer value between 0 and 9"):
 99 |                 ShipperFactory.create(output_type="logstash", logstash_url="logstash_url", compression_level=-1)
100 | 
101 |         with self.subTest("create logstash shipper compression level higher than 9"):
102 |             with self.assertRaisesRegex(ValueError, "compression_level must be an integer value between 0 and 9"):
103 |                 ShipperFactory.create(output_type="logstash", logstash_url="logstash_url", compression_level=10)
104 | 
105 |         with self.subTest("create invalid type"):
106 |             with self.assertRaisesRegex(
107 |                 ValueError, re.escape("You must provide one of the following outputs: elasticsearch")
108 |             ):
109 |                 ShipperFactory.create(output_type="invalid type")
110 | 
111 |     def test_create_from_output(self) -> None:
112 |         elasticsearch_output = ElasticsearchOutput(
113 |             elasticsearch_url="elasticsearch_url",
114 |             username="username",
115 |             password="password",
116 |             es_datastream_name="es_datastream_name",
117 |         )
118 | 
119 |         with self.subTest("create output type elasticsearch"):
120 |             with self.assertRaisesRegex(
121 |                 ValueError,
122 |                 re.escape("output expected to be ElasticsearchOutput type, given <class 'share.config.Output'>"),
123 |             ):
124 |                 ShipperFactory.create_from_output(
125 |                     output_type="elasticsearch", output=Output(output_type="elasticsearch")
126 |                 )
127 | 
128 |         with self.subTest("create from output elasticsearch shipper success"):
129 |             shipper: ProtocolShipper = ShipperFactory.create_from_output(
130 |                 output_type=elasticsearch_output.type, output=elasticsearch_output
131 |             )
132 | 
133 |             assert isinstance(shipper, ElasticsearchShipper)
134 | 
135 |         with self.subTest("create from output invalid type"):
136 |             with self.assertRaisesRegex(
137 |                 ValueError, re.escape("You must provide one of the following outputs: elasticsearch, logstash")
138 |             ):
139 |                 ShipperFactory.create_from_output(output_type="invalid type", output=elasticsearch_output)
140 | 
141 |         logstash_output = LogstashOutput(logstash_url="logstash_url")
142 | 
143 |         with self.subTest("create output type logstash"):
144 |             with self.assertRaisesRegex(
145 |                 ValueError,
146 |                 re.escape("output expected to be LogstashOutput type, given <class 'share.config.Output'>"),
147 |             ):
148 |                 ShipperFactory.create_from_output(output_type="logstash", output=Output(output_type="logstash"))
149 | 
150 |         with self.subTest("create from output logstash shipper success"):
151 |             logstash_shipper: ProtocolShipper = ShipperFactory.create_from_output(
152 |                 output_type=logstash_output.type, output=logstash_output
153 |             )
154 | 
155 |             assert isinstance(logstash_shipper, LogstashShipper)
156 | 
157 |         with self.subTest("create from output invalid type"):
158 |             with self.assertRaisesRegex(
159 |                 ValueError, re.escape("You must provide one of the following outputs: elasticsearch, logstash")
160 |             ):
161 |                 ShipperFactory.create_from_output(output_type="invalid type", output=logstash_output)
162 | 


--------------------------------------------------------------------------------
/tests/storage/test_payload.py:
--------------------------------------------------------------------------------
  1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  2 | # or more contributor license agreements. Licensed under the Elastic License 2.0;
  3 | # you may not use this file except in compliance with the Elastic License 2.0.
  4 | 
  5 | import base64
  6 | import datetime
  7 | import gzip
  8 | import random
  9 | from typing import Optional
 10 | 
 11 | import mock
 12 | import pytest
 13 | 
 14 | from storage import PayloadStorage
 15 | 
 16 | from .test_benchmark import (
 17 |     _IS_PLAIN,
 18 |     _LENGTH_ABOVE_THRESHOLD,
 19 |     MockContentBase,
 20 |     get_by_lines_parameters,
 21 |     multiline_processor,
 22 | )
 23 | 
 24 | 
 25 | class MockContent(MockContentBase):
 26 |     @staticmethod
 27 |     def init_content(
 28 |         content_type: str,
 29 |         newline: bytes,
 30 |         length_multiplier: int = _LENGTH_ABOVE_THRESHOLD,
 31 |         json_content_type: Optional[str] = None,
 32 |     ) -> None:
 33 |         MockContentBase.init_content(
 34 |             content_type=content_type,
 35 |             newline=newline,
 36 |             length_multiplier=length_multiplier,
 37 |             json_content_type=json_content_type,
 38 |         )
 39 | 
 40 |         MockContent.f_content_gzip = base64.b64encode(gzip.compress(MockContentBase.mock_content))
 41 |         MockContent.f_content_plain = base64.b64encode(MockContentBase.mock_content)
 42 |         MockContent.f_size_gzip = len(MockContent.f_content_gzip)
 43 |         MockContent.f_size_plain = len(MockContent.f_content_plain)
 44 | 
 45 | 
 46 | @pytest.mark.unit
 47 | def test_get_as_string_plain() -> None:
 48 |     MockContent.init_content(content_type=_IS_PLAIN, newline=b"\n")
 49 |     original = base64.b64decode(MockContent.f_content_plain).decode("utf-8")
 50 |     payload_storage = PayloadStorage(payload=original)
 51 |     content = payload_storage.get_as_string()
 52 |     assert content == original
 53 |     assert len(content) == len(original)
 54 | 
 55 | 
 56 | @pytest.mark.unit
 57 | def test_get_as_string_base64() -> None:
 58 |     MockContent.init_content(content_type=_IS_PLAIN, newline=b"\n")
 59 |     payload_storage = PayloadStorage(payload=MockContent.f_content_plain.decode("utf-8"))
 60 |     content = payload_storage.get_as_string()
 61 |     original = base64.b64decode(MockContent.f_content_plain).decode("utf-8")
 62 |     assert content == original
 63 |     assert len(content) == len(original)
 64 | 
 65 | 
 66 | @pytest.mark.unit
 67 | def test_get_as_string_gzip() -> None:
 68 |     MockContent.init_content(content_type=_IS_PLAIN, newline=b"\n")
 69 |     payload_storage = PayloadStorage(payload=MockContent.f_content_gzip.decode("utf-8"))
 70 |     content = payload_storage.get_as_string()
 71 |     original = gzip.decompress(base64.b64decode(MockContent.f_content_gzip)).decode("utf-8")
 72 | 
 73 |     assert content == original
 74 |     assert len(content) == len(original)
 75 | 
 76 | 
 77 | @pytest.mark.unit
 78 | @pytest.mark.parametrize("length_multiplier,content_type,newline,json_content_type", get_by_lines_parameters())
 79 | @mock.patch("share.multiline.timedelta_circuit_breaker", new=datetime.timedelta(days=1))
 80 | def test_get_by_lines(
 81 |     length_multiplier: int, content_type: str, newline: bytes, json_content_type: Optional[str]
 82 | ) -> None:
 83 |     MockContent.init_content(
 84 |         content_type=content_type,
 85 |         newline=newline,
 86 |         length_multiplier=length_multiplier,
 87 |         json_content_type=json_content_type,
 88 |     )
 89 | 
 90 |     payload_content_gzip = MockContent.f_content_gzip.decode("utf-8")
 91 |     payload_content_plain = MockContent.f_content_plain.decode("utf-8")
 92 | 
 93 |     joiner_token: bytes = newline
 94 | 
 95 |     original: bytes = base64.b64decode(MockContent.f_content_plain)
 96 |     original_length: int = len(original)
 97 | 
 98 |     payload_storage = PayloadStorage(
 99 |         payload=payload_content_gzip,
100 |         json_content_type=json_content_type,
101 |         multiline_processor=multiline_processor(content_type),
102 |     )
103 |     gzip_full: list[tuple[bytes, int, int, Optional[int]]] = list(payload_storage.get_by_lines(range_start=0))
104 | 
105 |     payload_storage = PayloadStorage(
106 |         payload=payload_content_plain,
107 |         json_content_type=json_content_type,
108 |         multiline_processor=multiline_processor(content_type),
109 |     )
110 |     plain_full: list[tuple[bytes, int, int, Optional[int]]] = list(payload_storage.get_by_lines(range_start=0))
111 | 
112 |     diff = set(gzip_full) ^ set(plain_full)
113 |     assert not diff
114 |     assert plain_full == gzip_full
115 |     assert gzip_full[-1][2] == original_length
116 |     assert plain_full[-1][2] == original_length
117 | 
118 |     joined = joiner_token.join([x[0] for x in plain_full])
119 |     assert joined == original
120 | 
121 |     if len(newline) == 0 or (json_content_type == "single"):
122 |         return
123 | 
124 |     gzip_full_01 = gzip_full[: int(len(gzip_full) / 2)]
125 |     plain_full_01 = plain_full[: int(len(plain_full) / 2)]
126 | 
127 |     range_start = plain_full_01[-1][2]
128 | 
129 |     payload_storage = PayloadStorage(
130 |         payload=payload_content_gzip,
131 |         json_content_type=json_content_type,
132 |         multiline_processor=multiline_processor(content_type),
133 |     )
134 |     gzip_full_02: list[tuple[bytes, int, int, Optional[int]]] = list(
135 |         payload_storage.get_by_lines(range_start=range_start)
136 |     )
137 | 
138 |     payload_storage = PayloadStorage(
139 |         payload=payload_content_plain,
140 |         json_content_type=json_content_type,
141 |         multiline_processor=multiline_processor(content_type),
142 |     )
143 |     plain_full_02: list[tuple[bytes, int, int, Optional[int]]] = list(
144 |         payload_storage.get_by_lines(range_start=range_start)
145 |     )
146 | 
147 |     diff = set(gzip_full_01) ^ set(plain_full_01)
148 |     assert not diff
149 |     assert plain_full_01 == gzip_full_01
150 | 
151 |     diff = set(gzip_full_02) ^ set(plain_full_02)
152 |     assert not diff
153 |     assert plain_full_02 == gzip_full_02
154 | 
155 |     assert plain_full_01 + plain_full_02 == plain_full
156 |     assert gzip_full_02[-1][2] == original_length
157 |     assert plain_full_02[-1][2] == original_length
158 | 
159 |     joined = (
160 |         joiner_token.join([x[0] for x in plain_full_01])
161 |         + joiner_token
162 |         + joiner_token.join([x[0] for x in plain_full_02])
163 |     )
164 | 
165 |     assert joined == original
166 | 
167 |     gzip_full_02 = gzip_full_02[: int(len(gzip_full_02) / 2)]
168 |     plain_full_02 = plain_full_02[: int(len(plain_full_02) / 2)]
169 | 
170 |     range_start = plain_full_02[-1][2]
171 | 
172 |     payload_storage = PayloadStorage(
173 |         payload=payload_content_gzip,
174 |         json_content_type=json_content_type,
175 |         multiline_processor=multiline_processor(content_type),
176 |     )
177 |     gzip_full_03: list[tuple[bytes, int, int, Optional[int]]] = list(
178 |         payload_storage.get_by_lines(range_start=range_start)
179 |     )
180 | 
181 |     payload_storage = PayloadStorage(
182 |         payload=payload_content_plain,
183 |         json_content_type=json_content_type,
184 |         multiline_processor=multiline_processor(content_type),
185 |     )
186 |     plain_full_03: list[tuple[bytes, int, int, Optional[int]]] = list(
187 |         payload_storage.get_by_lines(range_start=range_start)
188 |     )
189 | 
190 |     diff = set(gzip_full_02) ^ set(plain_full_02)
191 |     assert not diff
192 |     assert plain_full_02 == gzip_full_02
193 | 
194 |     diff = set(gzip_full_03) ^ set(plain_full_03)
195 |     assert not diff
196 |     assert plain_full_03 == gzip_full_03
197 | 
198 |     assert plain_full_01 + plain_full_02 + plain_full_03 == plain_full
199 |     assert gzip_full_03[-1][2] == original_length
200 |     assert plain_full_03[-1][2] == original_length
201 | 
202 |     joined = (
203 |         joiner_token.join([x[0] for x in plain_full_01])
204 |         + joiner_token
205 |         + joiner_token.join([x[0] for x in plain_full_02])
206 |         + joiner_token
207 |         + joiner_token.join([x[0] for x in plain_full_03])
208 |     )
209 | 
210 |     assert joined == original
211 | 
212 |     range_start = plain_full[-1][2] + random.randint(1, 100)
213 | 
214 |     payload_storage = PayloadStorage(
215 |         payload=payload_content_gzip,
216 |         json_content_type=json_content_type,
217 |         multiline_processor=multiline_processor(content_type),
218 |     )
219 |     gzip_full_empty: list[tuple[bytes, int, int, Optional[int]]] = list(
220 |         payload_storage.get_by_lines(range_start=range_start)
221 |     )
222 | 
223 |     payload_storage = PayloadStorage(
224 |         payload=payload_content_plain,
225 |         json_content_type=json_content_type,
226 |         multiline_processor=multiline_processor(content_type),
227 |     )
228 |     plain_full_empty: list[tuple[bytes, int, int, Optional[int]]] = list(
229 |         payload_storage.get_by_lines(range_start=range_start)
230 |     )
231 | 
232 |     assert not gzip_full_empty
233 |     assert not plain_full_empty
234 | 


--------------------------------------------------------------------------------