├── .gitignore
├── src
    ├── util
    │   ├── mod.rs
    │   └── http.rs
    ├── lifecycle
    │   ├── mod.rs
    │   ├── invocation_rate.rs
    │   └── flush_control.rs
    ├── lib.rs
    ├── secrets
    │   ├── mod.rs
    │   ├── error.rs
    │   ├── client.rs
    │   ├── secretsmanager.rs
    │   └── paramstore.rs
    ├── lambda
    │   ├── mod.rs
    │   ├── constants.rs
    │   ├── types.rs
    │   ├── api.rs
    │   ├── telemetry_api.rs
    │   └── logs.rs
    ├── test_util.rs
    ├── env.rs
    └── main.rs
├── contrib
    └── coldstarts.png
├── rust-toolchain.toml
├── Makefile
├── scripts
    ├── publish-lambda-version.sh
    └── manual-deploy.sh
├── Cargo.toml
├── .github
    └── workflows
    │   ├── ci.yml
    │   └── release.yml
├── DEVELOPING.md
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | tmp/
3 | 


--------------------------------------------------------------------------------
/src/util/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod http;
2 | 


--------------------------------------------------------------------------------
/src/lifecycle/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod flush_control;
2 | mod invocation_rate;
3 | 


--------------------------------------------------------------------------------
/contrib/coldstarts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/streamfold/rotel-lambda-extension/HEAD/contrib/coldstarts.png


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "1.91.1"
3 | targets = ["x86_64-unknown-linux-gnu", "aarch64-unknown-linux-gnu"]
4 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod env;
2 | pub mod lambda;
3 | pub mod lifecycle;
4 | pub mod secrets;
5 | pub mod util;
6 | 
7 | #[cfg(test)]
8 | mod test_util;
9 | 


--------------------------------------------------------------------------------
/src/secrets/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod client;
 2 | mod error;
 3 | mod paramstore;
 4 | mod secretsmanager;
 5 | 
 6 | pub const SECRETS_MANAGER_SERVICE: &str = "secretsmanager";
 7 | pub const PARAM_STORE_SERVICE: &str = "ssm";
 8 | 
 9 | // This is the minimum of what SecretsManager and ParamStore supports for
10 | // batch calls. It would be surprising to have > 10 secrets.
11 | pub const MAX_LOOKUP_LEN: usize = 10;
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build test deploy
 2 | 
 3 | #
 4 | # These targets are used for local testing, the actual
 5 | # release targets are defined in the Github action files.
 6 | #
 7 | 
 8 | DEPLOY_NAME ?= rotel-extension-test
 9 | 
10 | build:
11 | 	cargo lambda build --extension --release
12 | 
13 | test:
14 | 	cargo nextest run
15 | 
16 | deploy: build
17 | 	cargo lambda deploy --extension --compatible-runtimes provided.al2023 --binary-name rotel-extension ${DEPLOY_NAME}


--------------------------------------------------------------------------------
/src/util/http.rs:
--------------------------------------------------------------------------------
 1 | use http_body_util::BodyExt;
 2 | use hyper::body::Incoming;
 3 | use tower::BoxError;
 4 | 
 5 | pub async fn response_string(body: Incoming) -> Result<String, BoxError> {
 6 |     Ok(body
 7 |         .collect()
 8 |         .await
 9 |         .map_err(|e| format!("Failed to read response {}", e))
10 |         .map(|c| c.to_bytes())
11 |         .map(|s| String::from_utf8(s.to_vec()))?
12 |         .map_err(|e| format!("Unable to convert response body to string: {}", e))?)
13 | }
14 | 


--------------------------------------------------------------------------------
/src/lambda/mod.rs:
--------------------------------------------------------------------------------
 1 | use opentelemetry_proto::tonic::common::v1::any_value::Value::StringValue;
 2 | use opentelemetry_proto::tonic::common::v1::{AnyValue, KeyValue};
 3 | 
 4 | pub mod api;
 5 | mod constants;
 6 | mod logs;
 7 | pub mod telemetry_api;
 8 | pub mod types;
 9 | 
10 | pub(crate) fn otel_string_attr(key: &str, value: &str) -> KeyValue {
11 |     KeyValue {
12 |         key: key.to_string(),
13 |         value: Some(AnyValue {
14 |             value: Some(StringValue(value.to_string())),
15 |         }),
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/lambda/constants.rs:
--------------------------------------------------------------------------------
 1 | pub const REGISTER_PATH: &str = "/2020-01-01/extension/register";
 2 | pub const NEXT_PATH: &str = "/2020-01-01/extension/event/next";
 3 | pub const TELEMETRY_PATH: &str = "/2022-07-01/telemetry";
 4 | 
 5 | pub const TELEMETRY_API_SCHEMA: &str = "2022-12-13";
 6 | 
 7 | pub const EXTENSION_NAME_HEADER: &str = "Lambda-Extension-Name";
 8 | pub const EXTENSION_ACCEPT_FEATURE: &str = "Lambda-Extension-Accept-Feature";
 9 | 
10 | pub const EXTENSION_FEATURE_ACCOUNTID: &str = "accountId";
11 | 
12 | pub const EXTENSION_ID_HEADER: &str = "Lambda-Extension-Identifier";
13 | 


--------------------------------------------------------------------------------
/src/test_util.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Once;
 2 | 
 3 | static INIT_CRYPTO: Once = Once::new();
 4 | pub fn init_crypto() {
 5 |     INIT_CRYPTO.call_once(|| {
 6 |         rustls::crypto::aws_lc_rs::default_provider()
 7 |             .install_default()
 8 |             .unwrap()
 9 |     });
10 | }
11 | 
12 | pub fn parse_test_arns(test_arns: String) -> Vec<(String, String)> {
13 |     test_arns
14 |         .split(",")
15 |         .filter(|s| !s.is_empty())
16 |         .filter_map(|pair| {
17 |             let parts: Vec<&str> = pair.splitn(2, '=').collect();
18 |             if parts.len() == 2 {
19 |                 Some((parts[0].trim().to_string(), parts[1].trim().to_string()))
20 |             } else {
21 |                 None // Skip malformed pairs that don't have an equals sign
22 |             }
23 |         })
24 |         .collect()
25 | }
26 | 


--------------------------------------------------------------------------------
/scripts/publish-lambda-version.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -lt 1 ]; then
 4 |     echo "Usage: $0 ARN [...ARN]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | for ARN in "$@"; do
 9 |   # Extract region (us-east-1)
10 |   LAYER_REGION=$(echo $ARN | cut -d':' -f4)
11 | 
12 |   # Extract layer name (rotel-extension)
13 |   LAYER_NAME=$(echo $ARN | cut -d':' -f7)
14 | 
15 |   # Extract version number (2)
16 |   LAYER_VERSION=$(echo $ARN | cut -d':' -f8)
17 | 
18 |   echo "Adding layer version permission for layer name: $LAYER_NAME, region: $LAYER_REGION, version: $LAYER_VERSION"
19 | 
20 |   aws lambda add-layer-version-permission \
21 |     --layer-name "$LAYER_NAME" \
22 |     --version-number "$LAYER_VERSION" \
23 |     --statement-id add-public-access \
24 |     --region "$LAYER_REGION" \
25 |     --principal "*" \
26 |     --action lambda:GetLayerVersion
27 | done


--------------------------------------------------------------------------------
/scripts/manual-deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |   echo "Usage: $0 arch layer-name region how-many"
 5 |   exit 1
 6 | fi
 7 | 
 8 | set -e
 9 | 
10 | ARCH="$1"
11 | shift
12 | 
13 | if [ "$ARCH" != "x86-64" -a "$ARCH" != "arm64" ]; then
14 |   echo "Invalid arch: $ARCH"
15 |   exit 1
16 | fi
17 | 
18 | LAYER_NAME="$1"
19 | shift
20 | 
21 | REGION="$1"
22 | shift
23 | 
24 | HOW_MANY="$1"
25 | shift
26 | 
27 | echo "Deploying arch $ARCH as $LAYER_NAME to $REGION $HOW_MANY times...sleeping 5 seconds"
28 | sleep 5
29 | 
30 | export AWS_PROFILE=AdministratorAccess-418653438961
31 | 
32 | OUT=/tmp/lambda-deploy.out
33 | 
34 | rm -f $OUT
35 | for ((i = 0; i < $HOW_MANY; ++i)); do
36 |   echo "Deploying $LAYER_NAME for iter $i"
37 |   echo
38 |   AWS_REGION="$REGION" cargo lambda deploy --extension --region "$REGION" --lambda-dir "target/lambda/${ARCH}" \
39 |     --binary-name rotel-extension "$LAYER_NAME" | tee -a $OUT
40 | done
41 | 
42 | ./scripts/publish-lambda-version.sh $( grep 'extension arn' "$OUT"  | awk '{print $4}' )
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/src/lambda/types.rs:
--------------------------------------------------------------------------------
 1 | use serde::{Deserialize, Serialize};
 2 | 
 3 | #[derive(Debug, Deserialize)]
 4 | #[serde(rename_all = "camelCase")]
 5 | pub struct RegisterResponseBody {
 6 |     pub function_name: String,
 7 |     pub function_version: String,
 8 |     pub handler: String,
 9 |     pub account_id: Option<String>,
10 | 
11 |     // This is returned in a header
12 |     #[serde(skip_deserializing)]
13 |     pub extension_id: String,
14 | }
15 | 
16 | #[derive(Debug, Serialize)]
17 | #[serde(rename_all = "camelCase")]
18 | pub struct TelemetryAPISubscribe {
19 |     pub schema_version: String,
20 |     pub types: Vec<String>,
21 |     pub buffering: TelemetryAPISubscribeBuffering,
22 |     pub destination: TelemetryAPISubscribeDestination,
23 | }
24 | 
25 | #[derive(Debug, Serialize)]
26 | #[serde(rename_all = "camelCase")]
27 | pub struct TelemetryAPISubscribeBuffering {
28 |     pub max_items: u32,
29 |     pub max_bytes: u32,
30 |     pub timeout_ms: u32,
31 | }
32 | 
33 | #[derive(Debug, Serialize)]
34 | #[serde(rename_all = "camelCase")]
35 | pub struct TelemetryAPISubscribeDestination {
36 |     pub protocol: String,
37 | 
38 |     #[serde(rename = "URI")]
39 |     pub uri: String,
40 | }
41 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rotel-extension"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | homepage = "https://github.com/streamfold/rotel-lambda-extension"
 6 | readme = "README.md"
 7 | rust-version = "1.91"
 8 | 
 9 | [dependencies]
10 | hyper-util = { version = "0.1", features = ["full"] }
11 | hyper = { version = "1", features = ["full"] }
12 | lambda-extension = { git = "https://github.com/streamfold/aws-lambda-rust-runtime", branch = "json-record-types" }
13 | http-body-util = "0.1.2"
14 | bytes = "1.9.0"
15 | serde = "1"
16 | tokio-util = "0.7.13"
17 | serde_json = "1.0.135"
18 | tokio = { version = "1", features = ["macros"] }
19 | tracing = "0.1"
20 | http = "1.2.0"
21 | clap = { version = "4.5.23", features = ["derive", "env"] }
22 | futures = "0.3.31"
23 | tower-http = { version = "0.6.2", features = ["limit", "trace", "compression-gzip", "validate-request"] }
24 | dotenvy = { git = "https://github.com/streamfold/dotenvy", branch = "custom-substitution" }
25 | rustls = "0.23.20"
26 | tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
27 | tracing-appender = "0.2.3"
28 | tower = { version = "0.5.2", features = ["retry", "timeout"] }
29 | rotel = { git = "https://github.com/streamfold/rotel", rev = "ec30d9f4a4cf479316247231efb543a11bbfa70d", default-features = false}
30 | opentelemetry-proto = "0.30.0"
31 | chrono = "0.4.40"
32 | opentelemetry-semantic-conventions = { version = "0.30.0", features = ["semconv_experimental"] }
33 | hyper-rustls = "0.27.5"
34 | hmac = "0.12"
35 | sha2 = "0.10"
36 | hex = "0.4"
37 | regex = "1.11.1"
38 | 
39 | [dev-dependencies]
40 | tempfile = "3.19.1"
41 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 | env:
 7 |   CARGO_TERM_COLOR: always
 8 | 
 9 | jobs:
10 |   test:
11 |     name: test - ${{ matrix.target }}
12 |     strategy:
13 |       fail-fast: true
14 |       matrix:
15 |         include:
16 |           - target: x86_64-unknown-linux-gnu
17 |             runner: ubuntu-latest
18 |             os: ubuntu
19 | #          - target: aarch64-unknown-linux-gnu
20 | #            runner: ubuntu-24.04-arm
21 | #            os: ubuntu
22 |     runs-on: ${{ matrix.runner }}
23 | 
24 |     permissions:
25 |       id-token: write
26 |       contents: read
27 | 
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |       - name: update apt cache
31 |         run: sudo apt-get update
32 |       - name: install protoc
33 |         run: sudo apt-get install -y protobuf-compiler
34 |       - uses: actions-rust-lang/setup-rust-toolchain@v1
35 |         with:
36 |           target: ${{ matrix.target }}
37 |           rustflags: ""
38 |       - name: Configure AWS credentials
39 |         uses: aws-actions/configure-aws-credentials@v4
40 |         with:
41 |           role-to-assume: ${{ secrets.AWS_LAMBDA_CI_ROLE_ARN }}
42 |           aws-region: us-east-1
43 |       - name: run test
44 |         env:
45 |           TEST_SECRETSMANAGER_ARNS: ${{ secrets.TEST_SECRETSMANAGER_ARNS }}
46 |           TEST_PARAMSTORE_ARNS: ${{ secrets.TEST_PARAMSTORE_ARNS }}
47 |           TEST_ENVSECRET_ARNS: ${{ secrets.TEST_ENVSECRET_ARNS }}
48 |         run: |
49 |           cargo test --target ${{ matrix.target }} --verbose
50 | 
51 |   # Check formatting with rustfmt
52 |   formatting:
53 |     name: cargo fmt
54 |     runs-on: ubuntu-latest
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |       # Ensure rustfmt is installed and setup problem matcher
58 |       - uses: actions-rust-lang/setup-rust-toolchain@v1
59 |         with:
60 |           components: rustfmt
61 |           rustflags: ""
62 |       - name: Rustfmt Check
63 |         uses: actions-rust-lang/rustfmt@v1
64 | 


--------------------------------------------------------------------------------
/src/secrets/error.rs:
--------------------------------------------------------------------------------
 1 | use http::uri::InvalidUri;
 2 | use std::fmt;
 3 | use tower::BoxError;
 4 | 
 5 | #[derive(Debug)]
 6 | pub enum Error {
 7 |     InvalidService(String),
 8 |     UriParseError(InvalidUri),
 9 |     HttpError(hyper_util::client::legacy::Error),
10 |     HttpResponseError(hyper::Error),
11 |     HttpResponseErrorParse(BoxError),
12 |     AwsError { code: String, message: String },
13 |     InvalidSecrets(Vec<String>),
14 |     SigningError(rotel::aws_api::error::Error),
15 |     SerdeError(serde_json::Error),
16 | }
17 | 
18 | impl fmt::Display for Error {
19 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20 |         match self {
21 |             Error::InvalidService(svc) => write!(f, "Invalid service: {}", svc),
22 |             Error::AwsError { code, message } => write!(f, "AWS error [{}]: {}", code, message),
23 |             Error::HttpError(e) => write!(f, "HTTP error: {}", e),
24 |             Error::HttpResponseError(e) => write!(f, "Failed to parse HTTP response: {}", e),
25 |             Error::HttpResponseErrorParse(e) => write!(f, "Failed to parse HTTP response: {}", e),
26 |             Error::UriParseError(e) => write!(f, "Unable to parse endpoint url: {}", e),
27 |             Error::InvalidSecrets(params) => {
28 |                 write!(f, "Unable to lookup secret values: {:?}", params)
29 |             }
30 |             Error::SigningError(e) => {
31 |                 write!(f, "Failed to sign request: {}", e)
32 |             }
33 |             Error::SerdeError(e) => write!(f, "Serialization error: {}", e),
34 |         }
35 |     }
36 | }
37 | 
38 | impl std::error::Error for Error {}
39 | 
40 | impl From<InvalidUri> for Error {
41 |     fn from(err: InvalidUri) -> Self {
42 |         Error::UriParseError(err)
43 |     }
44 | }
45 | 
46 | impl From<BoxError> for Error {
47 |     fn from(err: BoxError) -> Self {
48 |         Error::HttpResponseErrorParse(err)
49 |     }
50 | }
51 | 
52 | impl From<hyper_util::client::legacy::Error> for Error {
53 |     fn from(err: hyper_util::client::legacy::Error) -> Self {
54 |         Error::HttpError(err)
55 |     }
56 | }
57 | 
58 | impl From<hyper::Error> for Error {
59 |     fn from(err: hyper::Error) -> Self {
60 |         Error::HttpResponseError(err)
61 |     }
62 | }
63 | 
64 | impl From<serde_json::Error> for Error {
65 |     fn from(err: serde_json::Error) -> Self {
66 |         Error::SerdeError(err)
67 |     }
68 | }
69 | 
70 | impl From<rotel::aws_api::error::Error> for Error {
71 |     fn from(err: rotel::aws_api::error::Error) -> Self {
72 |         Error::SigningError(err)
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/secrets/client.rs:
--------------------------------------------------------------------------------
 1 | use crate::secrets::error::Error;
 2 | use crate::secrets::paramstore::ParameterStore;
 3 | use crate::secrets::secretsmanager::SecretsManager;
 4 | use crate::util::http::response_string;
 5 | use bytes::Bytes;
 6 | use http::Request;
 7 | use http_body_util::{BodyExt, Full};
 8 | use hyper_rustls::ConfigBuilderExt;
 9 | use hyper_rustls::HttpsConnector;
10 | use hyper_util::client::legacy::Client as HyperClient;
11 | use hyper_util::client::legacy::connect::HttpConnector;
12 | use hyper_util::rt::{TokioExecutor, TokioTimer};
13 | use rotel::aws_api::creds::AwsCreds;
14 | use rustls::ClientConfig;
15 | use std::time::Duration;
16 | use tower::BoxError;
17 | 
18 | /// Main client for AWS services
19 | pub struct AwsClient {
20 |     pub(crate) creds: AwsCreds,
21 |     client: HyperClient<HttpsConnector<HttpConnector>, Full<Bytes>>,
22 | }
23 | 
24 | impl AwsClient {
25 |     /// Create a new AWS client
26 |     pub fn new(creds: AwsCreds) -> Result<Self, BoxError> {
27 |         let client = build_hyper_client()?;
28 | 
29 |         Ok(Self { client, creds })
30 |     }
31 | 
32 |     /// Get an instance of the SecretsManager service
33 |     pub fn secrets_manager(&self) -> SecretsManager<'_> {
34 |         SecretsManager::new(self)
35 |     }
36 | 
37 |     /// Get an instance of the ParameterStore service
38 |     pub fn parameter_store(&self) -> ParameterStore<'_> {
39 |         ParameterStore::new(self)
40 |     }
41 | 
42 |     pub async fn perform(&self, req: Request<Full<Bytes>>) -> Result<Bytes, Error> {
43 |         let resp = self.client.request(req).await?;
44 | 
45 |         // Handle AWS errors
46 |         let (parts, body) = resp.into_parts();
47 |         if !parts.status.is_success() {
48 |             let error_body = response_string(body).await?;
49 | 
50 |             return Err(Error::AwsError {
51 |                 code: parts.status.as_str().to_string(),
52 |                 message: error_body,
53 |             });
54 |         }
55 | 
56 |         // Parse success response
57 |         Ok(body.collect().await?.to_bytes())
58 |     }
59 | }
60 | 
61 | fn build_hyper_client() -> Result<HyperClient<HttpsConnector<HttpConnector>, Full<Bytes>>, BoxError>
62 | {
63 |     let tls_config = ClientConfig::builder()
64 |         .with_native_roots()?
65 |         .with_no_client_auth();
66 | 
67 |     let https = hyper_rustls::HttpsConnectorBuilder::new()
68 |         .with_tls_config(tls_config)
69 |         .https_or_http()
70 |         .enable_http2()
71 |         .build();
72 | 
73 |     let client = hyper_util::client::legacy::Client::builder(TokioExecutor::new())
74 |         .pool_idle_timeout(Duration::from_secs(30))
75 |         .pool_max_idle_per_host(2)
76 |         .timer(TokioTimer::new())
77 |         .build::<_, Full<Bytes>>(https);
78 | 
79 |     Ok(client)
80 | }
81 | 


--------------------------------------------------------------------------------
/DEVELOPING.md:
--------------------------------------------------------------------------------
 1 | # Developing
 2 | 
 3 | Prerequisites:
 4 | - [Cargo Lambda](https://www.cargo-lambda.info/): Build and deploy tool
 5 | 
 6 | ## Building
 7 | 
 8 | ```shell
 9 | make build
10 | ```
11 | 
12 | This will perform a release build of the extension. 
13 | 
14 | Read more about building your lambda extension in [the Cargo Lambda documentation](https://www.cargo-lambda.info/commands/build.html#extensions).
15 | 
16 | ## Deploy testing
17 | 
18 | If you want to test a deployment, you can use the following command. By default it will publish as the layer name `rotel-extension-test`. 
19 | 
20 | ```shell
21 | make deploy
22 | ```
23 | 
24 | In order to use the layer in other AWS accounts, you will need to run the following command to publish it. Pass the ARN of the layer output from the above command as an argument.
25 | 
26 | ```shell
27 |  ./scripts/publish-lambda-version.sh arn:aws:lambda:us-east-1:999999999999:layer:rotel-extension-test:29
28 | ```
29 | 
30 | _You must set the valid AWS CLI credentials in your environment first._ 
31 | 
32 | ## Production Deploy and Publish
33 | 
34 | When a release is created, the `release.yml` Github action will deploy and publish a new Lambda layer for both x86-64 and arm64 architectures.
35 | The layer will be published to multiple regions, controlled by the regions matrix in the action script.
36 | 
37 | Release names/tags should follow a specific pattern:
38 | 
39 | For *alpha releases*:
40 | - `v1-alpha`
41 | - `v2-alpha`
42 | - ...
43 |   This will result in layers: `rotel-extension-amd64-alpha:1`, `rotel-extension-amd64-alpha:2`, etc. The last value is the Lambda layer version.
44 | 
45 | For production releases:
46 | - `v1`
47 | - `v2`
48 | - ...
49 |   This will result in layers: `rotel-extension:1`, `rotel-extension:2`, etc.
50 | 
51 | **NOTE**: There is no way to control the version number that AWS generates for a new Lambda layer. Therefore, we can only
52 | rely on the auto-incrementing values to match the release name if we follow the same incrementing version scheme.
53 | 
54 | For the *arm64* architecture, the extension is named `rotel-extension-arm64-alpha` and `rotel-extension-arm64`.
55 | 
56 | _There may be some gaps in release numbers due to trying to keep version numbers and lambda layer version numbers in sync._
57 | 
58 | ## Manual deploy
59 | 
60 | The Lambda layer version numbers can sometimes require manual adjustment to ensure they align across regions. They can
61 | be incremented by manually deploying versions of the layer until the version matches the required level. Follow this process
62 | to raise a layer version number to a specific value.
63 | 
64 | 1. Pick a Rotel release build that you want to deploy, including the right architecture.
65 | 1. Find the Github action run for that tag, for example: [v1-alpha](https://github.com/streamfold/rotel-lambda-extension/actions/runs/14323997150).
66 | 1. Download the artifact you want to deploy to raise the layer version number.
67 | 1. Run: `rm -rf target/lambda && mkdir -p target/lambda && unzip extensions-<...>.zip -d target/lambda`
68 | 1. Login to the aws cli via sso
69 | 1. Run the following script:
70 | ```shell
71 | ./scripts/manual-deploy.sh <arch> <layer-name> <region> <how-many>
72 | ```
73 | - `arch`: either _x86-64_ or _arm64_
74 | - `layer-name`: full name of layer including arch and version suffix, examples: `rotel-extension-arm64-alpha`, `rotel-extension-amd64-alpha`, etc. (check Lambda console in case)
75 | - `region`: region to deploy to
76 | - `how-many`: how many times to deploy. If the current version is 3 and you need it to be 10, you'd pass "7" to deploy 7 times (3 + 7 = 10)
77 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [ created ]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     name: build ${{ matrix.arch }}
10 |     strategy:
11 |       matrix:
12 |         arch: [x86-64, arm64]
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - name: update apt cache
17 |         run: sudo apt-get update
18 |       - name: install protoc
19 |         run: sudo apt-get install -y protobuf-compiler
20 |       - uses: actions-rust-lang/setup-rust-toolchain@v1
21 |         with:
22 |           target: x86_64-unknown-linux-gnu
23 |           rustflags: ""
24 |       - name: install cargo lambda
25 |         run: |
26 |           pip3 install cargo-lambda
27 |       - name: Set build env
28 |         run: echo "BUILD_SHORT_SHA=$(echo -n $GITHUB_SHA | cut -c 1-7)" >> $GITHUB_ENV
29 |       - name: build
30 |         run: cargo lambda build --extension --release --${{ matrix.arch }} --lambda-dir target/lambda/${{ matrix.arch }}
31 |       - uses: actions/upload-artifact@v4
32 |         with:
33 |           name: extensions-${{github.ref_name}}-${{ matrix.arch }}
34 |           path: target/lambda/
35 |           overwrite: true
36 | 
37 |   release:
38 |     name: release ${{ matrix.arch }} to ${{ matrix.region }}
39 |     needs: [build]
40 |     runs-on: ubuntu-latest
41 |     strategy:
42 |       matrix:
43 |         arch: [ x86-64, arm64 ]
44 |         # Note: As we expand these we may need to pre-create the old version numbers so that the
45 |         # latest lambda layer version matches across all regions. See DEVELOPING.md.
46 |         region: [ us-east-1, us-east-2, us-west-1, us-west-2, ca-central-1,
47 |                   eu-central-1, eu-north-1, eu-west-1, eu-west-2, eu-west-3,
48 |                   ap-southeast-1, ap-southeast-2, ap-northeast-1, ap-northeast-2, ap-south-1,
49 |                   sa-east-1 ]
50 | 
51 |     permissions:
52 |       id-token: write
53 |       contents: read
54 | 
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |       - uses: actions/download-artifact@v4
58 |         with:
59 |           name: extensions-${{github.ref_name}}-${{ matrix.arch }}
60 |           path: target/lambda/
61 |       - uses: actions-rust-lang/setup-rust-toolchain@v1
62 |         with:
63 |           target: x86_64-unknown-linux-gnu
64 |           rustflags: ""
65 |       - name: install cargo lambda
66 |         run: |
67 |           pip3 install cargo-lambda
68 |       - name: set amd64 arch suffix
69 |         if: matrix.arch == 'x86-64'
70 |         run: echo 'EXT_ARCH_SUFFIX=-amd64' >> $GITHUB_ENV
71 |       - name: set arm64 arch suffix
72 |         if: matrix.arch == 'arm64'
73 |         run: echo 'EXT_ARCH_SUFFIX=-arm64' >> $GITHUB_ENV
74 |       - name: set version suffix
75 |         if: contains(github.ref_name, 'alpha')
76 |         run: echo 'EXT_VERSION_SUFFIX=-alpha' >> $GITHUB_ENV
77 | 
78 |       - name: Configure AWS credentials
79 |         uses: aws-actions/configure-aws-credentials@v4
80 |         with:
81 |           role-to-assume: ${{ secrets.AWS_LAMBDA_DEPLOY_ROLE_ARN }}
82 |           aws-region: us-east-1
83 | 
84 |       - name: deploy
85 |         env:
86 |           AWS_REGION: ${{matrix.region}} # Shouldn't need, but doesn't seem to respect CLI?
87 |         run: |
88 |           cargo lambda deploy --extension --region ${{matrix.region}} --lambda-dir target/lambda/${{ matrix.arch }} \
89 |             --compatible-runtimes provided.al2023 --binary-name rotel-extension \
90 |             "rotel-extension${EXT_ARCH_SUFFIX}${EXT_VERSION_SUFFIX}" | tee -a /tmp/lambda-deploy.out
91 | 
92 |       - name: publish release
93 |         run: |
94 |           ./scripts/publish-lambda-version.sh $( grep 'extension arn' /tmp/lambda-deploy.out  | awk '{print $4}' )


--------------------------------------------------------------------------------
/src/lambda/api.rs:
--------------------------------------------------------------------------------
  1 | use crate::lambda::constants;
  2 | use crate::lambda::constants::TELEMETRY_API_SCHEMA;
  3 | use crate::lambda::types::{
  4 |     RegisterResponseBody, TelemetryAPISubscribe, TelemetryAPISubscribeBuffering,
  5 |     TelemetryAPISubscribeDestination,
  6 | };
  7 | use crate::util::http::response_string;
  8 | use bytes::Bytes;
  9 | use http::header::CONTENT_TYPE;
 10 | use http::{Method, Request};
 11 | use http_body_util::BodyExt;
 12 | use http_body_util::Full;
 13 | use hyper_util::client::legacy::Client;
 14 | use hyper_util::client::legacy::connect::HttpConnector;
 15 | use lambda_extension::NextEvent;
 16 | use std::net::SocketAddr;
 17 | use tower::BoxError;
 18 | 
 19 | pub async fn register(
 20 |     client: Client<HttpConnector, Full<Bytes>>,
 21 | ) -> Result<RegisterResponseBody, BoxError> {
 22 |     let events = serde_json::json!({"events": ["INVOKE", "SHUTDOWN"]});
 23 | 
 24 |     let url = lambda_api_url(constants::REGISTER_PATH)?;
 25 |     let req = Request::builder()
 26 |         .method(Method::POST)
 27 |         .uri(&url)
 28 |         // This value must match the binary name, or this call will 403
 29 |         .header(constants::EXTENSION_NAME_HEADER, "rotel-extension")
 30 |         .header(
 31 |             constants::EXTENSION_ACCEPT_FEATURE,
 32 |             constants::EXTENSION_FEATURE_ACCOUNTID,
 33 |         )
 34 |         .header(CONTENT_TYPE, "application/json")
 35 |         .body(Full::from(Bytes::from(serde_json::to_vec(&events)?)))?;
 36 | 
 37 |     let resp = client.request(req).await?;
 38 |     let (parts, body) = resp.into_parts();
 39 |     let status = parts.status;
 40 | 
 41 |     if status != 200 {
 42 |         let text = body
 43 |             .collect()
 44 |             .await
 45 |             .map_err(|e| format!("Failed to read response body from {}: {}", url, e))
 46 |             .map(|c| c.to_bytes())
 47 |             .map(|s| String::from_utf8(s.to_vec()))?
 48 |             .map_err(|e| format!("Unable to convert response body to string: {}", e))?;
 49 |         return Err(format!(
 50 |             "Can not register extension at {}, got {}: {}",
 51 |             url, status, text,
 52 |         )
 53 |         .into());
 54 |     }
 55 | 
 56 |     let ext_id = match parts.headers.get(constants::EXTENSION_ID_HEADER) {
 57 |         None => {
 58 |             return Err("Can not get extension id, got no header".into());
 59 |         }
 60 |         Some(v) => match v.to_str() {
 61 |             Ok(v) => v,
 62 |             Err(e) => {
 63 |                 return Err(
 64 |                     format!("Can not get extension id, got invalid header value: {}", e).into(),
 65 |                 );
 66 |             }
 67 |         },
 68 |     };
 69 | 
 70 |     let body = body.collect().await?.to_bytes();
 71 |     let mut reg_resp: RegisterResponseBody = serde_json::from_slice(&body)?;
 72 | 
 73 |     reg_resp.extension_id = ext_id.to_string();
 74 |     Ok(reg_resp)
 75 | }
 76 | 
 77 | // Sends a "next" request to the Lambda runtime API, which will wait until
 78 | // the next invocation request or shutdown. This request may block for an undermined
 79 | // amount of time since Lambda may put the instance to sleep. Therefore, there should
 80 | // not be a timeout set on this request.
 81 | pub async fn next_request(
 82 |     client: Client<HttpConnector, Full<Bytes>>,
 83 |     ext_id: &str,
 84 | ) -> Result<NextEvent, BoxError> {
 85 |     let url = lambda_api_url(constants::NEXT_PATH)?;
 86 |     let req = Request::builder()
 87 |         .method(Method::GET)
 88 |         .uri(&url)
 89 |         .header(constants::EXTENSION_ID_HEADER, ext_id)
 90 |         .body(Full::default())?;
 91 | 
 92 |     let resp = client.request(req).await?;
 93 | 
 94 |     let (parts, body) = resp.into_parts();
 95 |     let status = parts.status;
 96 |     let text = response_string(body).await?;
 97 | 
 98 |     if status != 200 {
 99 |         return Err(format!(
100 |             "Runtime API next request failed at {}, returned: {}: {}",
101 |             url, status, text
102 |         )
103 |         .into());
104 |     }
105 | 
106 |     let event: NextEvent = serde_json::from_str(text.as_str())
107 |         .map_err(|e| format!("Unable to deser next_event: {}", e))?;
108 | 
109 |     Ok(event)
110 | }
111 | 
112 | pub async fn telemetry_subscribe(
113 |     client: Client<HttpConnector, Full<Bytes>>,
114 |     ext_id: &str,
115 |     addr: &SocketAddr,
116 | ) -> Result<(), BoxError> {
117 |     let sub = serde_json::json!(TelemetryAPISubscribe {
118 |         schema_version: TELEMETRY_API_SCHEMA.to_string(),
119 |         types: vec![
120 |             "platform".to_string(),
121 |             "function".to_string(),
122 |             "extension".to_string()
123 |         ],
124 |         buffering: TelemetryAPISubscribeBuffering {
125 |             // todo: these are the defaults from API ref, consider adjusting
126 |             max_items: 1000,
127 |             max_bytes: 256 * 1024,
128 |             timeout_ms: 100,
129 |         },
130 |         destination: TelemetryAPISubscribeDestination {
131 |             protocol: "HTTP".to_string(),
132 |             uri: format!("http://sandbox.localdomain:{}/", addr.port()),
133 |         },
134 |     });
135 | 
136 |     let url = lambda_api_url(constants::TELEMETRY_PATH)?;
137 |     let req = Request::builder()
138 |         .method(Method::PUT)
139 |         .uri(&url)
140 |         .header(CONTENT_TYPE, "application/json")
141 |         .header(constants::EXTENSION_ID_HEADER, ext_id)
142 |         .body(Full::from(Bytes::from(serde_json::to_vec(&sub)?)))?;
143 | 
144 |     let resp = client.request(req).await?;
145 |     if resp.status() != 200 {
146 |         return Err(format!(
147 |             "Can not subscribe to telemetry API at {}, got {}",
148 |             url,
149 |             resp.status()
150 |         )
151 |         .into());
152 |     }
153 | 
154 |     Ok(())
155 | }
156 | 
157 | fn lambda_api_url(path: &str) -> Result<String, BoxError> {
158 |     let base_api = std::env::var("AWS_LAMBDA_RUNTIME_API")
159 |         .map_err(|e| format!("Unable to read AWS_LAMBDA_RUNTIME_API: {:?}", e))?;
160 | 
161 |     if base_api.starts_with("http://") {
162 |         Ok(format!("{}{}", base_api, path))
163 |     } else {
164 |         Ok(format!("http://{}{}", base_api, path))
165 |     }
166 | }
167 | 


--------------------------------------------------------------------------------
/src/secrets/secretsmanager.rs:
--------------------------------------------------------------------------------
  1 | use crate::secrets::SECRETS_MANAGER_SERVICE;
  2 | use crate::secrets::client::AwsClient;
  3 | use crate::secrets::error::Error;
  4 | use bytes::Bytes;
  5 | use http::header::CONTENT_TYPE;
  6 | use http::{HeaderMap, HeaderValue, Method, Uri};
  7 | use rotel::aws_api::arn::AwsArn;
  8 | use rotel::aws_api::auth::{AwsRequestSigner, SystemClock};
  9 | use serde::Deserialize;
 10 | use serde_json::json;
 11 | use std::collections::HashMap;
 12 | use tracing::error;
 13 | 
 14 | pub struct SecretsManager<'a> {
 15 |     client: &'a AwsClient,
 16 |     service_name: &'static str,
 17 | }
 18 | 
 19 | #[derive(Debug, Deserialize)]
 20 | pub struct BatchResponse {
 21 |     #[serde(rename = "Errors")]
 22 |     pub errors: Vec<BatchResponseError>,
 23 | 
 24 |     #[serde(rename = "SecretValues")]
 25 |     pub secret_values: Vec<ResponseSecret>,
 26 | }
 27 | 
 28 | #[derive(Debug, Deserialize)]
 29 | pub struct BatchResponseError {
 30 |     // #[serde(rename = "ErrorCode")]
 31 |     // pub error_code: String,
 32 |     //
 33 |     #[serde(rename = "Message")]
 34 |     pub message: String,
 35 | 
 36 |     #[serde(rename = "SecretId")]
 37 |     pub secret_id: String,
 38 | }
 39 | 
 40 | #[derive(Debug, Deserialize)]
 41 | pub struct ResponseSecret {
 42 |     #[serde(rename = "ARN")]
 43 |     pub arn: Option<String>,
 44 | 
 45 |     #[serde(rename = "CreatedDate")]
 46 |     pub created_date: f64,
 47 | 
 48 |     #[serde(rename = "Name")]
 49 |     pub name: String,
 50 | 
 51 |     //
 52 |     // #[serde(rename = "SecretBinary")]
 53 |     // pub secret_binary: Option<Base64>,
 54 |     #[serde(rename = "SecretString")]
 55 |     pub secret_string: String,
 56 | 
 57 |     #[serde(rename = "VersionId")]
 58 |     pub version_id: String,
 59 |     // #[serde(rename = "VersionStages")]
 60 |     // pub version_stages: Vec<String>,
 61 | }
 62 | 
 63 | impl<'a> SecretsManager<'a> {
 64 |     pub(crate) fn new(client: &'a AwsClient) -> Self {
 65 |         Self {
 66 |             client,
 67 |             service_name: SECRETS_MANAGER_SERVICE,
 68 |         }
 69 |     }
 70 | 
 71 |     pub async fn batch_get_secret(
 72 |         &self,
 73 |         secret_arns: &[AwsArn],
 74 |     ) -> Result<HashMap<String, ResponseSecret>, Error> {
 75 |         let mut arns_by_endpoint = HashMap::new();
 76 |         for arn in secret_arns {
 77 |             if arn.service() != self.service_name {
 78 |                 return Err(Error::InvalidService(arn.service().clone()));
 79 |             }
 80 | 
 81 |             arns_by_endpoint
 82 |                 .entry(arn.get_endpoint())
 83 |                 .or_insert_with(|| Vec::new())
 84 |                 .push(arn);
 85 |         }
 86 | 
 87 |         let mut res = HashMap::new();
 88 |         for (endpoint, arns) in &arns_by_endpoint {
 89 |             let endpoint = endpoint.parse::<Uri>()?;
 90 | 
 91 |             let payload = json!({
 92 |                 "SecretIdList": arns.iter().map(|arn| arn.to_string()).collect::<Vec<String>>(),
 93 |             });
 94 | 
 95 |             let payload_bytes = Bytes::from(serde_json::to_vec(&payload)?);
 96 | 
 97 |             let mut hdrs = HeaderMap::new();
 98 |             hdrs.insert(
 99 |                 "X-Amz-Target",
100 |                 HeaderValue::from_static("secretsmanager.BatchGetSecretValue"),
101 |             );
102 |             hdrs.insert(
103 |                 CONTENT_TYPE,
104 |                 HeaderValue::from_static("application/x-amz-json-1.1"),
105 |             );
106 | 
107 |             // Sign the request
108 |             let signer = AwsRequestSigner::new(self.service_name, arns[0].region(), SystemClock);
109 |             let signed_request = signer.sign(
110 |                 endpoint,
111 |                 Method::POST,
112 |                 hdrs,
113 |                 payload_bytes,
114 |                 &self.client.creds,
115 |             )?;
116 | 
117 |             // Send the request
118 |             let response = self.client.perform(signed_request).await?;
119 | 
120 |             let result: BatchResponse = serde_json::from_slice(response.as_ref())?;
121 | 
122 |             if !result.errors.is_empty() {
123 |                 let arns = result
124 |                     .errors
125 |                     .into_iter()
126 |                     .map(|e| (e.secret_id, e.message))
127 |                     .collect::<Vec<(String, String)>>();
128 |                 error!(arns = ?arns, "Unable to lookup secrets");
129 |                 return Err(Error::InvalidSecrets(
130 |                     arns.into_iter().map(|arn| arn.0).collect(),
131 |                 ));
132 |             }
133 | 
134 |             for secret in result.secret_values {
135 |                 if secret.arn.is_none() {
136 |                     error!(secret = secret.name, "Secret was missing ARN");
137 |                     return Err(Error::InvalidSecrets(
138 |                         secret_arns.into_iter().map(|arn| arn.to_string()).collect(),
139 |                     ));
140 |                 }
141 | 
142 |                 let arn = secret.arn.clone().unwrap();
143 |                 res.insert(arn, secret);
144 |             }
145 |         }
146 | 
147 |         Ok(res)
148 |     }
149 | }
150 | 
151 | #[cfg(test)]
152 | mod tests {
153 |     use rotel::aws_api::creds::AwsCreds;
154 | 
155 |     use super::*;
156 |     use crate::test_util::{init_crypto, parse_test_arns};
157 | 
158 |     #[tokio::test]
159 |     async fn test_basic_secret_retrieval() {
160 |         // TEST_SECRETSMANAGER_ARNS should be set to a comma-separated list of k=v pairs,
161 |         // where k is an ARN of a secret and v is the secret value to test against.
162 |         let test_secret_arns = std::env::var("TEST_SECRETSMANAGER_ARNS");
163 |         if !test_secret_arns.is_ok() {
164 |             println!("Skipping test_basic_secret_retrieval due to unset envvar");
165 |             return;
166 |         }
167 | 
168 |         let mut test_arns = parse_test_arns(test_secret_arns.unwrap());
169 | 
170 |         init_crypto();
171 | 
172 |         let client = AwsClient::new(AwsCreds::from_env()).unwrap();
173 | 
174 |         let ss = client.secrets_manager();
175 | 
176 |         let parsed_arns: Vec<AwsArn> = test_arns
177 |             .iter()
178 |             .map(|(arn, _)| arn.parse::<AwsArn>().unwrap())
179 |             .collect();
180 |         let res = ss.batch_get_secret(&parsed_arns).await.unwrap();
181 | 
182 |         for (test_arn, test_value) in &test_arns {
183 |             let entry = res.get(test_arn).unwrap();
184 |             assert_eq!(*test_value, entry.secret_string);
185 |         }
186 | 
187 |         // Test for non-existent ARN
188 |         test_arns.push((
189 |             "arn:aws:secretsmanager:us-east-1:123345654789:secret:does-not-exist".to_string(),
190 |             "foobar".to_string(),
191 |         ));
192 | 
193 |         let parsed_arns: Vec<AwsArn> = test_arns
194 |             .iter()
195 |             .map(|(arn, _)| arn.parse::<AwsArn>().unwrap())
196 |             .collect();
197 |         let res = ss.batch_get_secret(&parsed_arns).await;
198 | 
199 |         assert!(res.is_err());
200 |     }
201 | }
202 | 


--------------------------------------------------------------------------------
/src/lifecycle/invocation_rate.rs:
--------------------------------------------------------------------------------
  1 | // If we didn't execute for 5mins, reset
  2 | const RESET_LENGTH_MILLIS: u64 = 300 * 1_000;
  3 | 
  4 | const DECAY: f64 = 0.07;
  5 | 
  6 | const WARMUP_COUNT: u8 = 20;
  7 | 
  8 | #[derive(Default, Debug)]
  9 | pub struct InvocationRate {
 10 |     last_time_millis: u64,
 11 |     value: f64,
 12 |     count: u8,
 13 | }
 14 | 
 15 | impl InvocationRate {
 16 |     pub fn add(&mut self, now_millis: u64) {
 17 |         // invalid, discard
 18 |         if now_millis <= self.last_time_millis {
 19 |             return;
 20 |         }
 21 | 
 22 |         let delta_millis = now_millis - self.last_time_millis;
 23 | 
 24 |         // If we haven't run in a while, reset our state
 25 |         if delta_millis >= RESET_LENGTH_MILLIS {
 26 |             self.value = 0.0;
 27 |             self.last_time_millis = now_millis;
 28 |             self.count = 0;
 29 |             return;
 30 |         }
 31 | 
 32 |         // First time, start value at the first delta
 33 |         if self.count == 0 {
 34 |             self.value = delta_millis as f64;
 35 |             self.last_time_millis = now_millis;
 36 |             self.count = 1;
 37 |             return;
 38 |         }
 39 | 
 40 |         let delta_millis = delta_millis as f64;
 41 |         self.value = (delta_millis * DECAY) + (self.value * (1.0 - DECAY));
 42 |         self.last_time_millis = now_millis;
 43 | 
 44 |         if self.count < WARMUP_COUNT {
 45 |             self.count += 1;
 46 |         }
 47 |     }
 48 | 
 49 |     pub fn is_faster_than(&self, rate_millis: u64) -> Option<bool> {
 50 |         // not ready
 51 |         if self.count < WARMUP_COUNT {
 52 |             return None;
 53 |         }
 54 | 
 55 |         Some((self.value as u64) < rate_millis)
 56 |     }
 57 | }
 58 | 
 59 | #[cfg(test)]
 60 | mod tests {
 61 |     use super::*;
 62 | 
 63 |     #[test]
 64 |     fn test_initial_state() {
 65 |         let rate = InvocationRate::default();
 66 |         assert_eq!(rate.last_time_millis, 0);
 67 |         assert_eq!(rate.value, 0.0);
 68 |         assert_eq!(rate.count, 0);
 69 | 
 70 |         // Should return None when not warmed up
 71 |         assert_eq!(rate.is_faster_than(100), None);
 72 |     }
 73 | 
 74 |     #[test]
 75 |     fn test_first_invocation() {
 76 |         let mut rate = InvocationRate::default();
 77 |         rate.add(1000);
 78 | 
 79 |         assert_eq!(rate.last_time_millis, 1000);
 80 |         assert_eq!(rate.value, 1000.0);
 81 |         assert_eq!(rate.count, 1);
 82 |         assert_eq!(rate.is_faster_than(100), None); // Still not warmed up
 83 |     }
 84 | 
 85 |     #[test]
 86 |     fn test_warmup_phase() {
 87 |         let mut rate = InvocationRate::default();
 88 | 
 89 |         // Add 19 invocations (not enough to complete warmup)
 90 |         for i in 1..20 {
 91 |             rate.add(i * 100);
 92 |             assert_eq!(rate.count, i as u8);
 93 |             assert_eq!(rate.is_faster_than(50), None); // Still warming up
 94 |         }
 95 | 
 96 |         // Add the final invocation to complete warmup
 97 |         rate.add(2000);
 98 |         assert_eq!(rate.count, 20);
 99 | 
100 |         // Now we should get a real result instead of None
101 |         assert!(rate.is_faster_than(50).is_some());
102 |     }
103 | 
104 |     #[test]
105 |     fn test_reset_on_large_time_gap() {
106 |         let mut rate = InvocationRate::default();
107 | 
108 |         // Add some initial invocations
109 |         for i in 1..=20 {
110 |             rate.add(i * 100);
111 |         }
112 | 
113 |         // State before reset
114 |         assert_eq!(rate.count, 20);
115 |         assert!(rate.value > 0.0);
116 | 
117 |         // Add an invocation with a gap larger than RESET_LENGTH_MILLIS
118 |         rate.add(2000 + RESET_LENGTH_MILLIS + 1);
119 | 
120 |         // Should have reset
121 |         assert_eq!(rate.value, 0.0);
122 |         assert_eq!(rate.count, 0);
123 |     }
124 | 
125 |     #[test]
126 |     fn test_steady_state_faster_than_threshold() {
127 |         let mut rate = InvocationRate::default();
128 | 
129 |         // Complete warmup with small deltas (fast invocations)
130 |         for i in 1..=WARMUP_COUNT {
131 |             rate.add(i as u64 * 50); // 50ms intervals
132 |         }
133 | 
134 |         // Should be faster than 100ms
135 |         assert_eq!(rate.is_faster_than(100), Some(true));
136 |     }
137 | 
138 |     #[test]
139 |     fn test_steady_state_slower_than_threshold() {
140 |         let mut rate = InvocationRate::default();
141 | 
142 |         // Complete warmup with larger deltas (slow invocations)
143 |         for i in 1..=WARMUP_COUNT {
144 |             rate.add(i as u64 * 200); // 200ms intervals
145 |         }
146 | 
147 |         // Should NOT be faster than 100ms
148 |         assert_eq!(rate.is_faster_than(100), Some(false));
149 |     }
150 | 
151 |     #[test]
152 |     fn test_discard_invalid_timestamp() {
153 |         let mut rate = InvocationRate::default();
154 | 
155 |         // Set initial state
156 |         rate.add(1000);
157 |         assert_eq!(rate.last_time_millis, 1000);
158 |         assert_eq!(rate.count, 1);
159 | 
160 |         // Try to add an earlier timestamp (should be discarded)
161 |         rate.add(500);
162 | 
163 |         // State should remain unchanged
164 |         assert_eq!(rate.last_time_millis, 1000);
165 |         assert_eq!(rate.count, 1);
166 | 
167 |         // Same timestamp should also be discarded
168 |         rate.add(1000);
169 |         assert_eq!(rate.last_time_millis, 1000);
170 |         assert_eq!(rate.count, 1);
171 |     }
172 | 
173 |     #[test]
174 |     fn test_exponential_decay() {
175 |         let mut rate = InvocationRate::default();
176 | 
177 |         // Add first invocation
178 |         rate.add(1000);
179 |         assert_eq!(rate.value, 1000.0);
180 | 
181 |         // Add second invocation with 100ms delta
182 |         rate.add(1100);
183 |         let first_value = rate.value;
184 |         assert!(first_value > 0.0);
185 | 
186 |         // Add third invocation with same delta
187 |         rate.add(1200);
188 |         let second_value = rate.value;
189 | 
190 |         // Value should be approaching the delta with exponential decay
191 |         assert!(second_value > 0.0);
192 |         assert_ne!(second_value, first_value); // Should have changed
193 | 
194 |         // After many iterations with the same delta, value should approach
195 |         // a steady state related to that delta
196 |         for i in 3..75 {
197 |             rate.add(1000 + i * 100);
198 |         }
199 | 
200 |         // Final value should be close to delta * DECAY / (1 - (1 - DECAY))
201 |         // which is just equal to delta * DECAY / DECAY = delta
202 |         let expected_steady_state = 100.0 * DECAY / DECAY;
203 |         let tolerance = 5.0; // Allow some numerical error
204 | 
205 |         assert!((rate.value - expected_steady_state).abs() < tolerance);
206 |     }
207 | 
208 |     #[test]
209 |     fn test_changing_rates() {
210 |         let mut rate = InvocationRate::default();
211 | 
212 |         // Warm up with fast invocations
213 |         for i in 1..=WARMUP_COUNT {
214 |             rate.add(i as u64 * 50);
215 |         }
216 | 
217 |         // Should be faster than 100ms
218 |         assert_eq!(rate.is_faster_than(100), Some(true));
219 | 
220 |         // Switch to slow invocations
221 |         for i in 0..10 {
222 |             rate.add((WARMUP_COUNT as u64) * 50 + 1 + i * 200);
223 |         }
224 | 
225 |         // Should now be slower than 100ms
226 |         assert_eq!(rate.is_faster_than(100), Some(false));
227 |     }
228 | }
229 | 


--------------------------------------------------------------------------------
/src/secrets/paramstore.rs:
--------------------------------------------------------------------------------
  1 | use crate::secrets::PARAM_STORE_SERVICE;
  2 | use crate::secrets::client::AwsClient;
  3 | use crate::secrets::error::Error;
  4 | use bytes::Bytes;
  5 | use http::header::CONTENT_TYPE;
  6 | use http::{HeaderMap, HeaderValue, Method, Uri};
  7 | use rotel::aws_api::arn::AwsArn;
  8 | use rotel::aws_api::auth::{AwsRequestSigner, SystemClock};
  9 | use serde::Deserialize;
 10 | use serde_json::json;
 11 | use std::collections::HashMap;
 12 | use tracing::error;
 13 | 
 14 | pub struct ParameterStore<'a> {
 15 |     client: &'a AwsClient,
 16 |     service_name: &'static str,
 17 | }
 18 | 
 19 | #[derive(Debug, Deserialize)]
 20 | pub struct GetParametersResponse {
 21 |     /// The parameter object.
 22 |     #[serde(rename = "Parameters")]
 23 |     pub parameters: Vec<Parameter>,
 24 | 
 25 |     #[serde(rename = "InvalidParameters")]
 26 |     pub invalid_parameters: Vec<InvalidParameters>,
 27 | }
 28 | 
 29 | #[derive(Debug, Deserialize)]
 30 | pub struct InvalidParameters {
 31 |     #[serde(rename = "Name")]
 32 |     pub name: String,
 33 | }
 34 | 
 35 | #[derive(Debug, Deserialize)]
 36 | pub struct Parameter {
 37 |     /// The Amazon Resource Name (ARN) of the parameter.
 38 |     #[serde(rename = "ARN")]
 39 |     pub arn: Option<String>,
 40 | 
 41 |     // /// The data type of the parameter, such as text, aws:ec2:image, or aws:tag-specification.
 42 |     // #[serde(rename = "DataType")]
 43 |     // pub data_type: Option<String>,
 44 |     /// The last modification date of the parameter.
 45 |     #[serde(rename = "LastModifiedDate")]
 46 |     pub last_modified_date: Option<f64>,
 47 | 
 48 |     /// The name of the parameter.
 49 |     #[serde(rename = "Name")]
 50 |     pub name: String,
 51 | 
 52 |     // /// The unique identifier for the parameter version.
 53 |     // #[serde(rename = "Selector")]
 54 |     // pub selector: Option<String>,
 55 | 
 56 |     // /// The parameter source.
 57 |     // #[serde(rename = "SourceResult")]
 58 |     // pub source_result: Option<String>,
 59 |     /// The parameter type.
 60 |     #[serde(rename = "Type")]
 61 |     pub type_: String,
 62 | 
 63 |     /// The parameter value.
 64 |     #[serde(rename = "Value")]
 65 |     pub value: String,
 66 | 
 67 |     /// The parameter version.
 68 |     #[serde(rename = "Version")]
 69 |     pub version: Option<i64>,
 70 |     // /// Tags associated with the parameter.
 71 |     // #[serde(rename = "Tags")]
 72 |     // pub tags: Option<HashMap<String, String>>,
 73 | }
 74 | 
 75 | impl<'a> ParameterStore<'a> {
 76 |     pub(crate) fn new(client: &'a AwsClient) -> Self {
 77 |         Self {
 78 |             client,
 79 |             service_name: PARAM_STORE_SERVICE,
 80 |         }
 81 |     }
 82 | 
 83 |     pub async fn get_parameters(
 84 |         &self,
 85 |         param_arns: &[AwsArn],
 86 |     ) -> Result<HashMap<String, Parameter>, Error> {
 87 |         let mut arns_by_endpoint = HashMap::new();
 88 |         for arn in param_arns {
 89 |             if arn.service() != self.service_name {
 90 |                 return Err(Error::InvalidService(arn.service().clone()));
 91 |             }
 92 | 
 93 |             arns_by_endpoint
 94 |                 .entry(arn.get_endpoint())
 95 |                 .or_insert_with(|| Vec::new())
 96 |                 .push(arn);
 97 |         }
 98 | 
 99 |         let mut res = HashMap::new();
100 |         for (endpoint, arns) in &arns_by_endpoint {
101 |             let endpoint = endpoint.parse::<Uri>()?;
102 | 
103 |             let payload = json!({
104 |                 "Names": arns.iter().map(|arn| arn.to_string()).collect::<Vec<String>>(),
105 |                 "WithDecryption": true,
106 |             });
107 | 
108 |             let payload_bytes = Bytes::from(serde_json::to_vec(&payload)?);
109 | 
110 |             let mut hdrs = HeaderMap::new();
111 |             hdrs.insert(
112 |                 "X-Amz-Target",
113 |                 HeaderValue::from_static("AmazonSSM.GetParameters"),
114 |             );
115 |             hdrs.insert(
116 |                 CONTENT_TYPE,
117 |                 HeaderValue::from_static("application/x-amz-json-1.1"),
118 |             );
119 | 
120 |             // Sign the request
121 |             let signer = AwsRequestSigner::new(self.service_name, arns[0].region(), SystemClock);
122 |             let signed_request = signer.sign(
123 |                 endpoint,
124 |                 Method::POST,
125 |                 hdrs,
126 |                 payload_bytes,
127 |                 &self.client.creds,
128 |             )?;
129 | 
130 |             // Send the request
131 |             let response = self.client.perform(signed_request).await?;
132 | 
133 |             let result: GetParametersResponse = serde_json::from_slice(response.as_ref())?;
134 | 
135 |             if !result.invalid_parameters.is_empty() {
136 |                 return Err(Error::InvalidSecrets(
137 |                     result
138 |                         .invalid_parameters
139 |                         .into_iter()
140 |                         .map(|i| i.name)
141 |                         .collect(),
142 |                 ));
143 |             }
144 | 
145 |             for param in result.parameters {
146 |                 if param.arn.is_none() {
147 |                     error!(parameter = param.name, "Parameter was missing ARN");
148 |                     return Err(Error::InvalidSecrets(
149 |                         arns.into_iter().map(|arn| arn.to_string()).collect(),
150 |                     ));
151 |                 }
152 | 
153 |                 let arn = param.arn.clone().unwrap();
154 |                 res.insert(arn, param);
155 |             }
156 |         }
157 | 
158 |         Ok(res)
159 |     }
160 | }
161 | 
162 | #[cfg(test)]
163 | mod tests {
164 |     use rotel::aws_api::creds::AwsCreds;
165 | 
166 |     use super::*;
167 |     use crate::test_util::{init_crypto, parse_test_arns};
168 | 
169 |     #[tokio::test]
170 |     async fn test_basic_paramstore_retrieval() {
171 |         // TEST_PARAMSTORE_ARNS should be set to a comma-separated list of k=v pairs,
172 |         // where k is an ARN of a secret and v is the secret value to test against.
173 |         let test_paramstore_arns = std::env::var("TEST_PARAMSTORE_ARNS");
174 |         if !test_paramstore_arns.is_ok() {
175 |             println!("Skipping test_basic_paramstore_retrieval due to unset envvar");
176 |             return;
177 |         }
178 | 
179 |         let mut test_arns = parse_test_arns(test_paramstore_arns.unwrap());
180 | 
181 |         init_crypto();
182 | 
183 |         let client = AwsClient::new(AwsCreds::from_env()).unwrap();
184 | 
185 |         let ps = client.parameter_store();
186 | 
187 |         let arn_values: Vec<AwsArn> = test_arns
188 |             .iter()
189 |             .map(|(arn, _)| arn.parse::<AwsArn>().unwrap())
190 |             .collect();
191 |         let res = ps.get_parameters(&arn_values).await.unwrap();
192 | 
193 |         for test_arn in &test_arns {
194 |             let entry = res.get(&test_arn.0).unwrap();
195 | 
196 |             assert_eq!(test_arn.1, entry.value);
197 |         }
198 | 
199 |         // Test for non-existent ARN
200 |         test_arns.push((
201 |             "arn:aws:ssm:us-east-1:123374564789:parameter/invalid-param".to_string(),
202 |             "foobar".to_string(),
203 |         ));
204 | 
205 |         let arn_values: Vec<AwsArn> = test_arns
206 |             .iter()
207 |             .map(|(arn, _)| arn.parse::<AwsArn>().unwrap())
208 |             .collect();
209 |         let res = ps.get_parameters(&arn_values).await;
210 | 
211 |         assert!(res.is_err());
212 |     }
213 | }
214 | 


--------------------------------------------------------------------------------
/src/lifecycle/flush_control.rs:
--------------------------------------------------------------------------------
  1 | use crate::lifecycle::flush_control::FlushMode::{AfterCall, Periodic};
  2 | use crate::lifecycle::invocation_rate::InvocationRate;
  3 | use std::sync::{Arc, Mutex};
  4 | 
  5 | // Default flush interval that captures any long duration
  6 | // lambda invocations. If we flush at the end or periodically at the
  7 | // beginning of an invocation, then this interval is reset
  8 | pub const DEFAULT_FLUSH_INTERVAL_MILLIS: u64 = 60 * 1_000;
  9 | 
 10 | // Interval used when flushing periodically at the beginning of an
 11 | // invocation.
 12 | const PERIODIC_FLUSH_RATE_MILLIS: u64 = 20 * 1_000;
 13 | 
 14 | // If the invocation rate is faster than this, switch to periodically
 15 | // flushing on an interval timer. Otherwise we'll flush at the end of
 16 | // an invocation.
 17 | const ACTIVE_INVOCATION_RATE_MILLIS: u64 = 60 * 1_000;
 18 | 
 19 | pub trait Clock {
 20 |     fn now(&self) -> u64;
 21 | }
 22 | 
 23 | pub struct FlushControl<C: Clock> {
 24 |     rate: InvocationRate,
 25 |     inner: Arc<Mutex<Inner>>,
 26 |     clock: C,
 27 | }
 28 | 
 29 | struct Inner {
 30 |     last_flush: u64,
 31 | }
 32 | 
 33 | pub enum FlushMode<C: Clock> {
 34 |     AfterCall,
 35 |     Periodic(PeriodicFlushControl<C>),
 36 | }
 37 | 
 38 | pub struct PeriodicFlushControl<C: Clock> {
 39 |     inner: Arc<Mutex<Inner>>,
 40 |     clock: C,
 41 | }
 42 | 
 43 | impl<C: Clock> PeriodicFlushControl<C> {
 44 |     pub fn should_flush(&mut self) -> bool {
 45 |         let now_millis = self.clock.now();
 46 |         let mut g = self.inner.lock().unwrap();
 47 | 
 48 |         if now_millis > g.last_flush && (now_millis - g.last_flush) > PERIODIC_FLUSH_RATE_MILLIS {
 49 |             g.last_flush = now_millis;
 50 |             true
 51 |         } else {
 52 |             false
 53 |         }
 54 |     }
 55 | }
 56 | 
 57 | impl<C: Clock + Clone> FlushControl<C> {
 58 |     pub fn new(clock: C) -> Self {
 59 |         Self {
 60 |             clock: clock.clone(),
 61 |             rate: InvocationRate::default(),
 62 |             inner: Arc::new(Mutex::new(Inner {
 63 |                 last_flush: clock.now(),
 64 |             })),
 65 |         }
 66 |     }
 67 | 
 68 |     pub fn pick(&mut self) -> FlushMode<C> {
 69 |         let now_millis = self.clock.now();
 70 |         self.rate.add(now_millis);
 71 | 
 72 |         let mode = match self.rate.is_faster_than(ACTIVE_INVOCATION_RATE_MILLIS) {
 73 |             // Not initialized, stick to flush per call
 74 |             None => AfterCall,
 75 | 
 76 |             Some(is_faster) => match is_faster {
 77 |                 true => Periodic(PeriodicFlushControl {
 78 |                     clock: self.clock.clone(),
 79 |                     inner: self.inner.clone(),
 80 |                 }),
 81 |                 false => AfterCall,
 82 |             },
 83 |         };
 84 | 
 85 |         match mode {
 86 |             AfterCall => {
 87 |                 // Update last flush time so that if we switch to periodic, we don't
 88 |                 // immediately attempt a flush because last_flush hasn't been updated
 89 |                 let mut g = self.inner.lock().unwrap();
 90 |                 g.last_flush = now_millis;
 91 |             }
 92 |             _ => {}
 93 |         }
 94 | 
 95 |         mode
 96 |     }
 97 | }
 98 | 
 99 | #[cfg(test)]
100 | mod tests {
101 |     use super::*;
102 |     use std::cell::Cell;
103 |     use std::rc::Rc;
104 | 
105 |     // Test implementation of the Clock trait
106 |     #[derive(Clone)]
107 |     struct TestClock {
108 |         time: Rc<Cell<u64>>,
109 |     }
110 | 
111 |     impl TestClock {
112 |         fn new(initial_time: u64) -> Self {
113 |             Self {
114 |                 time: Rc::new(Cell::new(initial_time)),
115 |             }
116 |         }
117 | 
118 |         fn advance(&self, millis: u64) {
119 |             self.time.set(self.time.get() + millis);
120 |         }
121 |     }
122 | 
123 |     impl Clock for TestClock {
124 |         fn now(&self) -> u64 {
125 |             self.time.get()
126 |         }
127 |     }
128 | 
129 |     #[test]
130 |     fn test_initial_state() {
131 |         let clock = TestClock::new(1000);
132 |         let mut flush_control = FlushControl::new(clock);
133 | 
134 |         // Initially, we should get AfterCall mode since InvocationRate isn't warmed up
135 |         match flush_control.pick() {
136 |             FlushMode::AfterCall => {}
137 |             _ => panic!("Expected AfterCall mode initially"),
138 |         }
139 |     }
140 | 
141 |     #[test]
142 |     fn test_after_call_mode_for_slow_invocations() {
143 |         let clock = TestClock::new(1000);
144 |         let mut flush_control = FlushControl::new(clock.clone());
145 | 
146 |         // Complete warmup with slow invocations (greater than ACTIVE_INVOCATION_RATE_MILLIS)
147 |         for i in 1..=20 {
148 |             clock.advance(ACTIVE_INVOCATION_RATE_MILLIS + 1000); // Very slow rate
149 |             let mode = flush_control.pick();
150 | 
151 |             // During warmup, we should still get AfterCall
152 |             if i < 20 {
153 |                 match mode {
154 |                     FlushMode::AfterCall => {}
155 |                     _ => panic!("Expected AfterCall mode during warmup"),
156 |                 }
157 |             } else {
158 |                 // After warmup with slow invocations, we should still get AfterCall
159 |                 match mode {
160 |                     FlushMode::AfterCall => {}
161 |                     _ => panic!("Expected AfterCall mode for slow invocations"),
162 |                 }
163 |             }
164 |         }
165 |     }
166 | 
167 |     #[test]
168 |     fn test_periodic_mode_for_fast_invocations() {
169 |         let clock = TestClock::new(1000);
170 |         let mut flush_control = FlushControl::new(clock.clone());
171 | 
172 |         // Complete warmup with fast invocations (less than ACTIVE_INVOCATION_RATE_MILLIS)
173 |         for _i in 1..=20 {
174 |             clock.advance(ACTIVE_INVOCATION_RATE_MILLIS / 2); // Fast rate
175 |             let _ = flush_control.pick();
176 |         }
177 | 
178 |         // One more pick() after warmup should give us Periodic mode
179 |         match flush_control.pick() {
180 |             FlushMode::Periodic(_) => {}
181 |             _ => panic!("Expected Periodic mode for fast invocations"),
182 |         }
183 |     }
184 | 
185 |     #[test]
186 |     fn test_transition_from_periodic_to_after_call() {
187 |         let clock = TestClock::new(1000);
188 |         let mut flush_control = FlushControl::new(clock.clone());
189 | 
190 |         // Warm up with fast invocations
191 |         for _ in 1..=20 {
192 |             clock.advance(ACTIVE_INVOCATION_RATE_MILLIS / 2);
193 |             let _ = flush_control.pick();
194 |         }
195 | 
196 |         // Should be in Periodic mode now
197 |         match flush_control.pick() {
198 |             FlushMode::Periodic(_) => {}
199 |             _ => panic!("Expected to be in Periodic mode"),
200 |         }
201 | 
202 |         // Now switch to slow invocations
203 |         for _ in 1..=10 {
204 |             clock.advance(ACTIVE_INVOCATION_RATE_MILLIS * 2);
205 |             let mode = flush_control.pick();
206 | 
207 |             // Eventually should switch back to AfterCall
208 |             if let FlushMode::AfterCall = mode {
209 |                 return; // Test passed
210 |             }
211 |         }
212 | 
213 |         panic!("Failed to transition back to AfterCall mode");
214 |     }
215 | 
216 |     #[test]
217 |     fn test_periodic_flush_control() {
218 |         let clock = TestClock::new(1000);
219 |         let mut flush_control = FlushControl::new(clock.clone());
220 | 
221 |         // Warm up with fast invocations to get to Periodic mode
222 |         for _ in 1..=20 {
223 |             clock.advance(PERIODIC_FLUSH_RATE_MILLIS / 2);
224 |             let _ = flush_control.pick();
225 |         }
226 | 
227 |         // Get the PeriodicFlushControl
228 |         let mut periodic_control = match flush_control.pick() {
229 |             FlushMode::Periodic(control) => control,
230 |             _ => panic!("Expected to get PeriodicFlushControl"),
231 |         };
232 | 
233 |         // Initially, should not flush (time elapsed is 0)
234 |         assert!(!periodic_control.should_flush());
235 | 
236 |         // Advance time but still below threshold
237 |         clock.advance(100);
238 |         assert!(!periodic_control.should_flush());
239 | 
240 |         // Advance time past threshold
241 |         clock.advance(PERIODIC_FLUSH_RATE_MILLIS);
242 |         assert!(periodic_control.should_flush());
243 | 
244 |         // After flushing, should not flush again immediately
245 |         assert!(!periodic_control.should_flush());
246 | 
247 |         // After another interval, should flush again
248 |         clock.advance(PERIODIC_FLUSH_RATE_MILLIS + 1);
249 |         assert!(periodic_control.should_flush());
250 |     }
251 | 
252 |     #[test]
253 |     fn test_multiple_periodic_flush_controls_share_state() {
254 |         let clock = TestClock::new(1000);
255 |         let mut flush_control = FlushControl::new(clock.clone());
256 | 
257 |         // Warm up with fast invocations
258 |         for _ in 1..=20 {
259 |             clock.advance(ACTIVE_INVOCATION_RATE_MILLIS / 2);
260 |             let _ = flush_control.pick();
261 |         }
262 | 
263 |         // Get first periodic control
264 |         let mut periodic_control1 = match flush_control.pick() {
265 |             FlushMode::Periodic(control) => control,
266 |             _ => panic!("Expected to get PeriodicFlushControl"),
267 |         };
268 | 
269 |         // Get second periodic control
270 |         let mut periodic_control2 = match flush_control.pick() {
271 |             FlushMode::Periodic(control) => control,
272 |             _ => panic!("Expected to get PeriodicFlushControl"),
273 |         };
274 | 
275 |         // Advance time past threshold
276 |         clock.advance(PERIODIC_FLUSH_RATE_MILLIS + 1);
277 | 
278 |         // First control should indicate a flush is needed
279 |         assert!(periodic_control1.should_flush());
280 | 
281 |         // Second control should not indicate a flush is needed
282 |         // since the last_flush was updated by the first control
283 |         assert!(!periodic_control2.should_flush());
284 | 
285 |         // After waiting another interval, both should be able to flush
286 |         clock.advance(PERIODIC_FLUSH_RATE_MILLIS + 1);
287 |         assert!(periodic_control2.should_flush());
288 |         assert!(!periodic_control1.should_flush()); // First one affected by second one's flush
289 |     }
290 | }
291 | 


--------------------------------------------------------------------------------
/src/lambda/telemetry_api.rs:
--------------------------------------------------------------------------------
  1 | use crate::lambda::logs::{Log, parse_logs};
  2 | use crate::lambda::otel_string_attr;
  3 | use bytes::Bytes;
  4 | use http::header::CONTENT_TYPE;
  5 | use http::{Method, Request, Response, StatusCode};
  6 | use http_body_util::{BodyExt, Full};
  7 | use hyper::body::Body;
  8 | use hyper_util::rt::{TokioExecutor, TokioIo};
  9 | use hyper_util::server::conn::auto::Builder;
 10 | use hyper_util::service::TowerToHyperService;
 11 | use lambda_extension::{LambdaTelemetry, LambdaTelemetryRecord};
 12 | use opentelemetry_proto::tonic::logs::v1::ResourceLogs;
 13 | use opentelemetry_proto::tonic::resource::v1::Resource;
 14 | use opentelemetry_semantic_conventions::attribute::FAAS_INVOKED_PROVIDER;
 15 | use opentelemetry_semantic_conventions::resource::{
 16 |     FAAS_MAX_MEMORY, FAAS_NAME, FAAS_VERSION, SERVICE_NAME,
 17 | };
 18 | use opentelemetry_semantic_conventions::trace::FAAS_INVOKED_REGION;
 19 | use rotel::bounded_channel::BoundedSender;
 20 | use rotel::listener::Listener;
 21 | use std::fmt::{Debug, Display};
 22 | use std::future::Future;
 23 | use std::net::SocketAddr;
 24 | use std::ops::Add;
 25 | use std::pin::Pin;
 26 | use std::sync::{LazyLock, Mutex};
 27 | use std::task::{Context, Poll};
 28 | use std::time::{Duration, Instant};
 29 | use tokio_util::sync::CancellationToken;
 30 | use tower::{BoxError, Service, ServiceBuilder};
 31 | use tracing::{debug, error, warn};
 32 | 
 33 | // We don't want to create a logging loop, so limit how often we log
 34 | // failures in certain code paths that may loop.
 35 | const LOG_LIMIT_INTERVAL_SECS: u64 = 60;
 36 | static LOG_LIMIT_LAST_LOG: LazyLock<Mutex<Option<Instant>>> = LazyLock::new(|| Mutex::new(None));
 37 | 
 38 | pub struct TelemetryAPI {
 39 |     pub listener: Listener,
 40 |     pub logs_tx: BoundedSender<ResourceLogs>,
 41 | }
 42 | 
 43 | impl TelemetryAPI {
 44 |     pub fn new(listener: Listener, logs_tx: BoundedSender<ResourceLogs>) -> Self {
 45 |         Self { listener, logs_tx }
 46 |     }
 47 | 
 48 |     pub fn addr(&self) -> SocketAddr {
 49 |         self.listener.bound_address().unwrap()
 50 |     }
 51 | 
 52 |     // todo: abstract this with the server code in the otlp http receiver
 53 |     pub async fn run(
 54 |         self,
 55 |         bus_tx: BoundedSender<LambdaTelemetry>,
 56 |         cancellation: CancellationToken,
 57 |     ) -> Result<(), BoxError> {
 58 |         let resource = resource_from_env();
 59 |         let svc =
 60 |             ServiceBuilder::new().service(TelemetryService::new(resource, bus_tx, self.logs_tx));
 61 |         let svc = TowerToHyperService::new(svc);
 62 | 
 63 |         let timer = hyper_util::rt::TokioTimer::new();
 64 |         let graceful = hyper_util::server::graceful::GracefulShutdown::new();
 65 | 
 66 |         let mut builder = Builder::new(TokioExecutor::new());
 67 |         builder
 68 |             .http1()
 69 |             .header_read_timeout(Some(std::time::Duration::from_secs(3)))
 70 |             .timer(timer.clone());
 71 |         builder.http2().timer(timer);
 72 | 
 73 |         let listener = self.listener.into_async()?;
 74 |         loop {
 75 |             let stream = tokio::select! {
 76 |                 r = listener.accept() => {
 77 |                     match r {
 78 |                         Ok((stream, _)) => stream,
 79 |                         Err(e) => return Err(e.into()),
 80 |                     }
 81 |                 },
 82 |                 _ = cancellation.cancelled() => break
 83 |             };
 84 | 
 85 |             let io = TokioIo::new(stream);
 86 | 
 87 |             let conn = builder.serve_connection(io, svc.clone());
 88 |             let fut = graceful.watch(conn.into_owned());
 89 | 
 90 |             tokio::spawn(async move {
 91 |                 let _ = fut.await.map_err(|e| {
 92 |                     if let Some(hyper_err) = e.downcast_ref::<hyper::Error>() {
 93 |                         // xxx: is there any way to get the error kind?
 94 |                         let err_str = format!("{:?}", hyper_err);
 95 | 
 96 |                         // This may imply a client shutdown race: https://github.com/hyperium/hyper/issues/3775
 97 |                         let err_not_connected = err_str.contains("NotConnected");
 98 |                         // There is no idle timeout, so header timeout is hit first
 99 |                         let err_hdr_timeout = err_str.contains("HeaderTimeout");
100 | 
101 |                         if !err_not_connected && !err_hdr_timeout {
102 |                             error!("error serving connection: {:?}", hyper_err);
103 |                         }
104 |                     } else {
105 |                         error!("error serving connection: {:?}", e);
106 |                     }
107 |                 });
108 |             });
109 |         }
110 | 
111 |         // gracefully shutdown existing connections
112 |         graceful.shutdown().await;
113 | 
114 |         Ok(())
115 |     }
116 | }
117 | 
118 | #[derive(Clone)]
119 | pub struct TelemetryService {
120 |     resource: Resource,
121 |     bus_tx: BoundedSender<LambdaTelemetry>,
122 |     logs_tx: BoundedSender<ResourceLogs>,
123 | }
124 | 
125 | impl TelemetryService {
126 |     fn new(
127 |         resource: Resource,
128 |         bus_tx: BoundedSender<LambdaTelemetry>,
129 |         logs_tx: BoundedSender<ResourceLogs>,
130 |     ) -> Self {
131 |         Self {
132 |             resource,
133 |             bus_tx,
134 |             logs_tx,
135 |         }
136 |     }
137 | }
138 | 
139 | impl<H> Service<Request<H>> for TelemetryService
140 | where
141 |     H: Body + Send + Sync + 'static,
142 |     <H as Body>::Data: Send + Sync + Clone,
143 |     <H as Body>::Error: Display + Debug + Send + Sync + ToString,
144 | {
145 |     type Response = Response<Full<Bytes>>;
146 |     type Error = BoxError;
147 |     type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
148 | 
149 |     fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
150 |         Poll::Ready(Ok(()))
151 |     }
152 | 
153 |     fn call(&mut self, req: Request<H>) -> Self::Future {
154 |         let (parts, body) = req.into_parts();
155 | 
156 |         // This part could be decoupled out to a layer, but they are complicated
157 |         // to setup, so inlining for now.
158 |         if parts.method != Method::POST {
159 |             return Box::pin(futures::future::ok(
160 |                 response_4xx(StatusCode::METHOD_NOT_ALLOWED).unwrap(),
161 |             ));
162 |         }
163 | 
164 |         if parts
165 |             .headers
166 |             .get(CONTENT_TYPE)
167 |             .is_none_or(|ct| ct != "application/json")
168 |         {
169 |             return Box::pin(futures::future::ok(
170 |                 response_4xx(StatusCode::BAD_REQUEST).unwrap(),
171 |             ));
172 |         }
173 | 
174 |         Box::pin(handle_request(
175 |             self.bus_tx.clone(),
176 |             self.logs_tx.clone(),
177 |             self.resource.clone(),
178 |             body,
179 |         ))
180 |     }
181 | }
182 | 
183 | async fn handle_request<H>(
184 |     bus_tx: BoundedSender<LambdaTelemetry>,
185 |     logs_tx: BoundedSender<ResourceLogs>,
186 |     resource: Resource,
187 |     body: H,
188 | ) -> Result<Response<Full<Bytes>>, BoxError>
189 | where
190 |     H: Body,
191 |     <H as Body>::Error: Debug,
192 | {
193 |     let buf = body.collect().await.unwrap().to_bytes();
194 | 
195 |     let events: Vec<LambdaTelemetry> = serde_json::from_slice(&buf.to_vec())
196 |         .map_err(|e| format!("unable to parse telemetry events from json: {}", e))?;
197 | 
198 |     let mut log_events = vec![];
199 |     for event in events {
200 |         // We should avoid logging on Extension or Function events, since it can cause a logging
201 |         // loop
202 |         match event.record {
203 |             LambdaTelemetryRecord::Extension(log) => {
204 |                 log_events.push(Log::Extension(event.time, log));
205 |                 continue;
206 |             }
207 |             LambdaTelemetryRecord::Function(log) => {
208 |                 log_events.push(Log::Function(event.time, log));
209 |                 continue;
210 |             }
211 |             _ => {
212 |                 // Keep this for debugging for now
213 |                 debug!("received telemetry event from lambda: {:?}", event);
214 |             }
215 |         }
216 | 
217 |         match event.record {
218 |             LambdaTelemetryRecord::PlatformRuntimeDone { .. } => {
219 |                 if let Err(e) = bus_tx.send(event.clone()).await {
220 |                     error!("unable to send telemetry event to bus: {}", e);
221 |                     // Should handle this?
222 |                 }
223 |             }
224 |             _ => {} // todo: handle more
225 |         }
226 |     }
227 | 
228 |     if !log_events.is_empty() {
229 |         // Error logging here could create a loop, make sure to rate limit
230 |         let logs = parse_logs(resource, log_events);
231 |         match logs {
232 |             Ok(rl) => {
233 |                 if let Err(e) = logs_tx.send(rl).await {
234 |                     log_with_limit(move || warn!("Failed to send logs: {}", e));
235 |                 }
236 |             }
237 |             Err(e) => {
238 |                 log_with_limit(move || warn!("Failed to convert log events: {}", e));
239 |             }
240 |         }
241 |     }
242 | 
243 |     Ok(Response::builder()
244 |         .status(StatusCode::OK)
245 |         .body(Full::default())
246 |         .unwrap())
247 | }
248 | 
249 | fn response_4xx(code: StatusCode) -> Result<Response<Full<Bytes>>, hyper::Error> {
250 |     response_4xx_with_body(code, Bytes::default())
251 | }
252 | 
253 | fn response_4xx_with_body(
254 |     code: StatusCode,
255 |     body: Bytes,
256 | ) -> Result<Response<Full<Bytes>>, hyper::Error> {
257 |     Ok(Response::builder()
258 |         .status(code)
259 |         .body(Full::new(body))
260 |         .unwrap())
261 | }
262 | 
263 | fn resource_from_env() -> Resource {
264 |     let mut r = Resource::default();
265 | 
266 |     r.attributes
267 |         .push(otel_string_attr(FAAS_INVOKED_PROVIDER, "aws"));
268 |     if let Ok(val) = std::env::var("AWS_LAMBDA_FUNCTION_NAME") {
269 |         r.attributes
270 |             .push(otel_string_attr(SERVICE_NAME, val.as_str()));
271 |         r.attributes.push(otel_string_attr(FAAS_NAME, val.as_str()));
272 |     } else {
273 |         r.attributes
274 |             .push(otel_string_attr(SERVICE_NAME, "unknown_service"));
275 |     }
276 | 
277 |     if let Ok(val) = std::env::var("AWS_LAMBDA_FUNCTION_MEMORY_SIZE") {
278 |         r.attributes
279 |             .push(otel_string_attr(FAAS_MAX_MEMORY, val.as_str()));
280 |     }
281 |     if let Ok(val) = std::env::var("AWS_LAMBDA_FUNCTION_VERSION") {
282 |         r.attributes
283 |             .push(otel_string_attr(FAAS_VERSION, val.as_str()));
284 |     }
285 |     if let Ok(val) = std::env::var("AWS_REGION") {
286 |         r.attributes
287 |             .push(otel_string_attr(FAAS_INVOKED_REGION, val.as_str()))
288 |     }
289 | 
290 |     r
291 | }
292 | 
293 | fn log_with_limit<F: Fn()>(f: F) {
294 |     // Don't block under any circumstance, prefer to just not log
295 |     match LOG_LIMIT_LAST_LOG.try_lock() {
296 |         Err(_) => return,
297 |         Ok(mut g) => {
298 |             let now = Instant::now();
299 |             if g.is_none() {
300 |                 f();
301 |                 *g = Some(now)
302 |             } else {
303 |                 if g.unwrap()
304 |                     .add(Duration::from_secs(LOG_LIMIT_INTERVAL_SECS))
305 |                     .lt(&now)
306 |                 {
307 |                     f();
308 |                     *g = Some(now);
309 |                 }
310 |             }
311 |         }
312 |     };
313 | }
314 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Rotel Lambda Extension
  2 | 
  3 | Rotel Lambda Extension is an advanced AWS Lambda extension layer, built on top of [Rotel](https://github.com/streamfold/rotel). Rotel is lightweight and perfectly suited for collecting OpenTelemetry data in resource-constrained environments. By minimizing binary size, reducing cold start latency, and lowering memory overhead, this extension optimizes performance and cost efficiency in AWS Lambda deployments.
  4 | 
  5 | ![Coldstart Comparison](/contrib/coldstarts.png)
  6 | 
  7 | _This chart compares cold start times between Rotel, the [OpenTelemetry Lambda](https://github.com/open-telemetry/opentelemetry-lambda/blob/main/collector/README.md), and the [Datadog OTEL Lambda](https://docs.datadoghq.com/serverless/aws_lambda/opentelemetry/?tab=python) layers. Check out the benchmark code [here](https://github.com/streamfold/python-lambda-benchmark)._ 
  8 | 
  9 | The Rotel Lambda Extension integrates with the Lambda [TelemetryAPI](https://docs.aws.amazon.com/lambda/latest/dg/telemetry-api.html) to collect **function logs** and **extension logs** and will export them to the configured exporter. This can reduce your Lambda observability costs if you combine it with [disabling CloudWatch Logs](#disabling-cloudwatch-logs). 
 10 | 
 11 | ## Using
 12 | 
 13 | Choose the Lambda layer that matches your Lambda runtime architecture (**alpha** versions shown). The `{version}` field
 14 | of the ARN should match the integer value from the latest [release](https://github.com/streamfold/rotel-lambda-extension/releases),
 15 | so the version for `v12-alpha` would be `12`. 
 16 | 
 17 | | Architecture | ARN                                                                                | Version                                                                                                                                                                 |
 18 | |--------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 19 | | x86-64/amd64 | `arn:aws:lambda:{region}:418653438961:layer:rotel-extension-amd64-alpha:{version}` | ![Version](https://img.shields.io/github/v/release/streamfold/rotel-lambda-extension?filter=*alpha&label=version&labelColor=%2338BDF8&color=%23312E81&cacheSeconds=600) |
 20 | | arm64        | `arn:aws:lambda:{region}:418653438961:layer:rotel-extension-arm64-alpha:{version}` | ![Version](https://img.shields.io/github/v/release/streamfold/rotel-lambda-extension?filter=*alpha&label=version&labelColor=%2338BDF8&color=%23312E81&cacheSeconds=600) |
 21 | 
 22 | The layer is deployed in the following AWS regions (if you don't see yours, let us know!):
 23 | - us-east-{1, 2}, us-west-{1, 2}
 24 | - eu-central-1, eu-north-1, eu-west-{1, 2, 3}
 25 | - ca-central-1
 26 | - ap-southeast-{1, 2}, ap-northeast-{1, 2}
 27 | - ap-south-1
 28 | - sa-east-1
 29 | 
 30 | The layer supports the Amazon Linux 2023
 31 | [Lambda runtime](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html#runtimes-supported)
 32 | (`provided.al2023`).
 33 | 
 34 | ## Auto instrumentation
 35 | 
 36 | The Rotel Lambda layer can be used alongside the language support extension layers, found [here](https://github.com/open-telemetry/opentelemetry-lambda?tab=readme-ov-file#extension-layer-language-support). The default Rotel OTLP receiver configuration matches the defaults used for OTEL auto-instrumentation.
 37 | 
 38 | To use a language layer, pick the extension layer ARN that matches your runtime language and include it **in addition** to the Rotel layer ARN above. You will need to set `AWS_LAMBDA_EXEC_WRAPPER` so that your code is auto-instrumented on start up. Make sure to consult the documentation for your instrumentation layer.
 39 | 
 40 | To see how this works in practice, check out this Node.js
 41 | [example ✨](https://github.com/streamfold/nodejs-aws-lambda-example). 
 42 | 
 43 | ## Configuration
 44 | 
 45 | The Rotel Lambda Extension is configured using the same environment variables documented
 46 | for Rotel,
 47 | [documented here](https://github.com/streamfold/rotel?tab=readme-ov-file#configuration).
 48 | 
 49 | To ease configuration for Lambda environments, you can set `ROTEL_ENV_FILE` to the path
 50 | name of a file and that file will be interpreted as an `.env` file. For example, set
 51 | `ROTEL_ENV_FILE=/var/task/rotel.env` and include the following `rotel.env` file in your
 52 | function bundle:
 53 | ```shell
 54 | ROTEL_OTLP_EXPORTER_ENDPOINT=https://api.axiom.co
 55 | ROTEL_OTLP_EXPORTER_PROTOCOL=http
 56 | ROTEL_OTLP_EXPORTER_CUSTOM_HEADERS="Authorization=Bearer ${AXIOM_API_KEY},X-Axiom-Dataset=${AXIOM_DATASET}"
 57 | ```
 58 | 
 59 | The values `${AXIOM_API_KEY}` and `${AXIOM_DATASET}` will be resolved from the environment of the function,
 60 | allowing you to set the secret values in your AWS Lambda function definition and out of the on-disk file.
 61 | 
 62 | ### Secrets
 63 | 
 64 | Secret values can be retrieved from **[AWS Secrets Manager](https://aws.amazon.com/secrets-manager/)** or from **[AWS Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html)** by specifying the full
 65 | ARN of the stored secret as the environment variable name. This allows you to keep secret values out of configuration
 66 | files.
 67 | 
 68 | **AWS Secrets Manager Example**
 69 | 
 70 | ```shell
 71 | ROTEL_OTLP_EXPORTER_ENDPOINT=https://api.axiom.co
 72 | ROTEL_OTLP_EXPORTER_PROTOCOL=http
 73 | ROTEL_OTLP_EXPORTER_CUSTOM_HEADERS="Authorization=Bearer ${arn:aws:secretsmanager:us-east-1:123377354456:secret:axiom-api-key-r1l7G9},X-Axiom-Dataset=${AXIOM_DATASET}"
 74 | ```
 75 | 
 76 | Secrets retrieved from AWS Secrets Manager also support JSON encoded secret key/value pairs. The secret
 77 | value can be retrieved by suffixing the ARN with a `#json-key` where `json-key` is the top-level JSON key. For example,
 78 | if the secret named `axiom-r1l7G9` contained:
 79 | 
 80 | ```json
 81 | {
 82 |   "key": "1234abcd",
 83 |   "dataset": "my-dataset"
 84 | }
 85 | ```
 86 | 
 87 | Then the following example would extract those values:
 88 | ```shell
 89 | ROTEL_OTLP_EXPORTER_ENDPOINT=https://api.axiom.co
 90 | ROTEL_OTLP_EXPORTER_PROTOCOL=http
 91 | ROTEL_OTLP_EXPORTER_CUSTOM_HEADERS="Authorization=Bearer ${arn:aws:secretsmanager:us-east-1:123377354456:secret:axiom-r1l7G9#key},X-Axiom-Dataset=${arn:aws:secretsmanager:us-east-1:123377354456:secret:axiom-r1l7G9#dataset}"
 92 | ```
 93 | 
 94 | 
 95 | **AWS Parameter Store Example**
 96 | 
 97 | ```shell
 98 | ROTEL_OTLP_EXPORTER_ENDPOINT=https://api.axiom.co
 99 | ROTEL_OTLP_EXPORTER_PROTOCOL=http
100 | ROTEL_OTLP_EXPORTER_CUSTOM_HEADERS="Authorization=Bearer ${arn:aws:ssm:us-east-1:123377354456:parameter/axiom-api-key},X-Axiom-Dataset=${AXIOM_DATASET}"
101 | ```
102 | 
103 | **URI Format**
104 | 
105 | In addition to the `${arn:...}` format, you can also use a URI format with the prefix `secret://`. This can be easier to use in configuration
106 | formats that reserve the `${..}` syntax for variable interpolation. The URI format must be set at the beginning of the variable name, so:
107 | ```shell
108 | ROTEL_CLICKHOUSE_EXPORTER_PASSWORD="secret://arn:aws:ssm:us-east-1:123377354456:parameter/clickhouse-password"
109 | ```
110 | 
111 | This supports the `#json-key` format as well to extract JSON secrets:
112 | ```shell
113 | ROTEL_CLICKHOUSE_EXPORTER_PASSWORD="secret://arn:aws:secretsmanager:us-east-1:123377354456:secret:ch-creds-r1l7G9#password"
114 | ```
115 | 
116 | **Permissions:**
117 | 
118 | You must ensure the following IAM permissions exist for your Lambda runtime execution role:
119 | 
120 | * Secrets Manager
121 |   - [`secretsmanager:GetSecretValue`](https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html)
122 |   - [`secretsmanager:BatchGetSecretValue`](https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_BatchGetSecretValue.html)
123 | * Parameter Store
124 |   - [`ssm:GetParameters`](https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_GetParameters.html)
125 | 
126 | Secrets must be stored as a plaintext secret string value for AWS Secrets Manager and as a SecureString for AWS Parameter Store. 
127 | 
128 | **NOTE**:
129 | 
130 | AWS API calls can increase cold start latency by 100-150 ms even when made within the same region, so be
131 | mindful of that impact when retrieving secrets. Secrets are retrieved in batches up to 10, so retrieving
132 | multiple secret values should not take longer than a single secret.
133 | 
134 | Secrets are only retrieved on initialization, so subsequent invocations are not impacted.
135 | 
136 | ### Default resource attributes
137 | 
138 | Log messages forwarded with the TelemetryAPI will automatically use a `service.name` equal to the AWS Lambda function name. Trace spans will default to the configured SDK value. You can set `service.name`, and any other resource attribute, with the following environment variable:
139 | 
140 | ```shell
141 | ROTEL_OTEL_RESOURCE_ATTRIBUTES="service.name=my-lambda-api,service.version=2.0.0"
142 | ```
143 | 
144 | This will insert or replace those resource attributes on all traces, logs, and metrics. See Rotel
145 | [docs](https://github.com/streamfold/rotel?tab=readme-ov-file#setting-resource-attributes) for more info.
146 | 
147 | ## Disabling CloudWatch Logs
148 | 
149 | By default, AWS Lambda will send all Lambda logs to Amazon CloudWatch. To reduce costs, you may want to disable those logs if you are forwarding your logs to an external logging provider.
150 | 
151 | 1. Open the AWS Console and navigate to AWS Lambda
152 | 2. Navigate to your Lambda function
153 | 3. Select Configuration -> Permissions
154 | 4. Click the execution role under "Role Name" to pop over to the IAM console
155 | 5. Edit the role in the IAM console and remove any `logs:*` actions
156 |    - if you are using a custom policy, edit the policy to remove `logs:*` actions
157 |    - if you are using an AWS Managed policy, like `AWSLambdaBasicExecutionRole`, remove it from the role
158 | 6. Save the role and your next execution should not send logs to CloudWatch
159 | 
160 | ## Adaptive Flushing
161 | 
162 | This extension uses an **adaptive flushing model**, similar to the one implemented in [Datadog's new Rust Lambda extension](https://www.datadoghq.com/blog/engineering/datadog-lambda-extension-rust/).
163 | 
164 | On the initial invocation after a cold start, the extension flushes all telemetry data **at the end** of each function invocation. This ensures minimal delay in telemetry availability. However, because the flush happens *after* the invocation completes, it can slightly increase the billed duration of the function.
165 | 
166 | If the extension detects a regular invocation pattern—such as invocations occurring at least once per minute—it will switch to **periodic flushing at the start** of each invocation. This overlaps the flush operation with the function’s execution time, reducing the likelihood of added billed duration due to telemetry flushing.
167 | 
168 | For long-running invocations, a **global backup timer** is used to flush telemetry periodically. This timer is reset whenever a regular flush occurs, ensuring that telemetry is still sent even if invocation patterns become irregular.
169 | 
170 | ## Examples
171 | 
172 | These are example repos demonstrating how to use the Rotel Lambda Extension.
173 | 
174 | * [Node.js Auto Instrumentation](https://github.com/streamfold/nodejs-aws-lambda-example): This uses the Node.js auto instrumentation [layer](https://github.com/open-telemetry/opentelemetry-lambda/blob/main/nodejs/README.md) to instrument a Node.js app and emit metrics, logs and traces to Honeycomb.
175 | * [Python + Clickhouse](https://github.com/streamfold/python-aws-lambda-clickhouse-example): Python application with manual OpenTelemetry instrumentation, sending OpenTelemetry traces and logs to Clickhouse. All Lambda logs are converted to OTel and immediately sent to Clickhouse, so this can avoid expensive Cloudwatch log costs. This example uses the JSON data type by default to improve the query support for OTel key/value attributes.
176 | 
177 | ## Community
178 | 
179 | Want to chat about this project, share feedback, or suggest improvements? Join our [Discord server](https://discord.gg/reUqNWTSGC)! Whether you're a user of this project or not, we'd love to hear your thoughts and ideas. See you there! 🚀
180 | 
181 | ## Developing
182 | 
183 | See [DEVELOPING](/DEVELOPING.md) for developer instructions.
184 | 
185 | ---
186 | 
187 | Built with ❤️ by Streamfold.
188 | 


--------------------------------------------------------------------------------
/src/lambda/logs.rs:
--------------------------------------------------------------------------------
  1 | use crate::lambda::otel_string_attr;
  2 | use chrono::{DateTime, Utc};
  3 | use opentelemetry_proto::tonic::common::v1::any_value::Value::StringValue;
  4 | use opentelemetry_proto::tonic::common::v1::{AnyValue, InstrumentationScope};
  5 | use opentelemetry_proto::tonic::logs::v1::{LogRecord, ResourceLogs, ScopeLogs, SeverityNumber};
  6 | use opentelemetry_proto::tonic::resource::v1::Resource;
  7 | use opentelemetry_semantic_conventions::attribute::FAAS_INVOCATION_ID;
  8 | use serde_json::Value;
  9 | use std::time::SystemTime;
 10 | use tower::BoxError;
 11 | 
 12 | const LOG_SCOPE: &str = "github.com/streamfold/rotel-lambda-extension";
 13 | 
 14 | pub(crate) enum Log {
 15 |     Function(DateTime<Utc>, Value),
 16 |     Extension(DateTime<Utc>, Value),
 17 | }
 18 | 
 19 | impl Log {
 20 |     fn get_type(&self) -> String {
 21 |         match self {
 22 |             Log::Function { .. } => "function".to_string(),
 23 |             Log::Extension { .. } => "extension".to_string(),
 24 |         }
 25 |     }
 26 | 
 27 |     fn into_parts(self) -> (DateTime<Utc>, serde_json::Value) {
 28 |         match self {
 29 |             Log::Function(dt, l) => (dt, l),
 30 |             Log::Extension(dt, l) => (dt, l),
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | pub(crate) fn parse_logs(resource: Resource, logs: Vec<Log>) -> Result<ResourceLogs, BoxError> {
 36 |     let mut rl = ResourceLogs {
 37 |         resource: Some(resource),
 38 |         ..Default::default()
 39 |     };
 40 | 
 41 |     let mut sl = ScopeLogs {
 42 |         scope: Some(InstrumentationScope {
 43 |             name: LOG_SCOPE.to_string(),
 44 |             ..Default::default()
 45 |         }),
 46 |         ..Default::default()
 47 |     };
 48 | 
 49 |     let now = SystemTime::now()
 50 |         .duration_since(std::time::UNIX_EPOCH)
 51 |         .unwrap();
 52 | 
 53 |     let log_records: Result<Vec<_>, _> = logs
 54 |         .into_iter()
 55 |         .map(|log| {
 56 |             let log_type = log.get_type();
 57 |             let (time, record) = log.into_parts();
 58 | 
 59 |             let mut lr = LogRecord::default();
 60 | 
 61 |             lr.attributes
 62 |                 .push(otel_string_attr("type", log_type.as_str()));
 63 |             lr.time_unix_nano = time.timestamp_nanos_opt().unwrap_or(now.as_nanos() as i64) as u64;
 64 |             lr.observed_time_unix_nano = now.as_nanos() as u64;
 65 | 
 66 |             // Logs can be JSON or String
 67 |             // https://docs.aws.amazon.com/lambda/latest/dg/telemetry-schema-reference.html#telemetry-api-function
 68 |             match record {
 69 |                 Value::Object(mut rec) => {
 70 |                     if let Some(Value::String(ts)) = rec.get("timestamp") {
 71 |                         if let Ok(dt) = DateTime::parse_from_rfc3339(ts.as_str()) {
 72 |                             if let Some(nanos) = dt.timestamp_nanos_opt() {
 73 |                                 lr.time_unix_nano = nanos as u64;
 74 |                             }
 75 |                         }
 76 |                     }
 77 |                     if let Some(Value::String(level)) = rec.get("level") {
 78 |                         lr.severity_number = i32::from(severity_text_to_number(level));
 79 |                         lr.severity_text = lr.severity_number().as_str_name().to_string();
 80 |                     }
 81 |                     if let Some(Value::String(request_id)) = rec.get("requestId") {
 82 |                         lr.attributes
 83 |                             .push(otel_string_attr(FAAS_INVOCATION_ID, request_id));
 84 |                     }
 85 |                     if let Some(Value::String(msg)) = rec.remove("message") {
 86 |                         lr.body = Some(AnyValue {
 87 |                             value: Some(StringValue(msg)),
 88 |                         })
 89 |                     } else if let Some(Value::Object(mut fields)) = rec.remove("fields") {
 90 |                         if let Some(Value::String(msg)) = fields.remove("message") {
 91 |                             lr.body = Some(AnyValue {
 92 |                                 value: Some(StringValue(msg)),
 93 |                             })
 94 |                         }
 95 |                     }
 96 |                 }
 97 |                 Value::String(rec) => {
 98 |                     lr.body = Some(AnyValue {
 99 |                         value: Some(StringValue(rec)),
100 |                     })
101 |                 }
102 |                 _ => {
103 |                     return Err(format!("invalid log record type: {:?}", record));
104 |                 }
105 |             };
106 | 
107 |             Ok(lr)
108 |         })
109 |         .collect();
110 | 
111 |     match log_records {
112 |         Ok(lr) => sl.log_records = lr,
113 |         Err(e) => return Err(format!("Failed to parse log records: {}", e).into()),
114 |     }
115 | 
116 |     rl.scope_logs = vec![sl];
117 | 
118 |     Ok(rl)
119 | }
120 | 
121 | fn severity_text_to_number(level: &String) -> SeverityNumber {
122 |     let upper = level.to_uppercase();
123 | 
124 |     match upper.as_str() {
125 |         "TRACE" => SeverityNumber::Trace,
126 |         "TRACE2" => SeverityNumber::Trace2,
127 |         "TRACE3" => SeverityNumber::Trace3,
128 |         "TRACE4" => SeverityNumber::Trace4,
129 |         "DEBUG" => SeverityNumber::Debug,
130 |         "DEBUG2" => SeverityNumber::Debug2,
131 |         "DEBUG3" => SeverityNumber::Debug3,
132 |         "DEBUG4" => SeverityNumber::Debug4,
133 |         "INFO" => SeverityNumber::Info,
134 |         "INFO2" => SeverityNumber::Info2,
135 |         "INFO3" => SeverityNumber::Info3,
136 |         "INFO4" => SeverityNumber::Info4,
137 |         "WARN" => SeverityNumber::Warn,
138 |         "WARN2" => SeverityNumber::Warn2,
139 |         "WARN3" => SeverityNumber::Warn3,
140 |         "WARN4" => SeverityNumber::Warn4,
141 |         "ERROR" => SeverityNumber::Error,
142 |         "ERROR2" => SeverityNumber::Error2,
143 |         "ERROR3" => SeverityNumber::Error3,
144 |         "ERROR4" => SeverityNumber::Error4,
145 |         "FATAL" => SeverityNumber::Fatal,
146 |         "FATAL2" => SeverityNumber::Fatal2,
147 |         "FATAL3" => SeverityNumber::Fatal3,
148 |         "FATAL4" => SeverityNumber::Fatal4,
149 |         "CRITICAL" => SeverityNumber::Fatal,
150 |         "ALL" => SeverityNumber::Trace,
151 |         "WARNING" => SeverityNumber::Warn,
152 |         _ => SeverityNumber::Unspecified,
153 |     }
154 | }
155 | 
156 | #[cfg(test)]
157 | mod tests {
158 |     use crate::lambda::logs::{Log, parse_logs};
159 |     use crate::lambda::otel_string_attr;
160 |     use chrono::DateTime;
161 |     use lambda_extension::LambdaTelemetryRecord;
162 |     use opentelemetry_proto::tonic::common::v1::KeyValue;
163 |     use opentelemetry_proto::tonic::common::v1::any_value::Value::StringValue;
164 |     use opentelemetry_proto::tonic::logs::v1::SeverityNumber;
165 |     use opentelemetry_proto::tonic::resource::v1::Resource;
166 |     use opentelemetry_semantic_conventions::attribute::FAAS_INVOCATION_ID;
167 |     use opentelemetry_semantic_conventions::resource::SERVICE_NAME;
168 |     use serde_json::Value;
169 |     use std::collections::HashMap;
170 |     use std::ops::{Add, Sub};
171 |     use std::time::{Duration, SystemTime};
172 | 
173 |     #[test]
174 |     fn test_json_record() {
175 |         let json_rec = r#"{
176 |     "time": "2022-10-12T00:03:50.000Z",
177 |     "type": "extension",
178 |     "record": {
179 |        "timestamp": "2022-10-12T00:03:50.000Z",
180 |        "level": "INFO",
181 |        "requestId": "79b4f56e-95b1-4643-9700-2807f4e68189",
182 |        "message": "Hello world, I am an extension!"
183 |     }
184 | }"#;
185 |         let str_rec = r#"{
186 |     "time": "2022-10-12T00:03:50.000Z",
187 |     "type": "function",
188 |     "record": "[INFO] Hello world, I am an extension!"
189 | }"#;
190 | 
191 |         let as_json: LambdaTelemetryRecord = serde_json::from_str(json_rec).unwrap();
192 |         println!("as json: {:?}", as_json);
193 | 
194 |         let as_str: LambdaTelemetryRecord = serde_json::from_str(str_rec).unwrap();
195 |         println!("as str: {:?}", as_str);
196 |     }
197 | 
198 |     #[test]
199 |     fn test_log_parse() {
200 |         let now = SystemTime::now();
201 |         let tm1 = DateTime::from(now.sub(Duration::from_secs(3600)));
202 |         let tm2 = tm1.add(Duration::from_secs(60));
203 |         let tm3 = tm2.add(Duration::from_secs(60));
204 |         let mut r = Resource::default();
205 |         r.attributes
206 |             .push(otel_string_attr(SERVICE_NAME, "test_log_parse"));
207 | 
208 |         let logs = vec![
209 |             Log::Function(
210 |                 tm1,
211 |                 Value::Object(json_map(HashMap::from([
212 |                     ("timestamp", Value::String(tm2.to_rfc3339())),
213 |                     ("level", Value::String("warn".to_string())),
214 |                     ("requestId", Value::String("1234abcd".to_string())),
215 |                     ("message", Value::String("the message".to_string())),
216 |                 ]))),
217 |             ),
218 |             Log::Extension(tm3, Value::String("INFO Plain text message".to_string())),
219 |         ];
220 | 
221 |         let mut res = parse_logs(r, logs).unwrap();
222 | 
223 |         assert_eq!(1, res.scope_logs.len());
224 |         assert_eq!(2, res.scope_logs[0].log_records.len());
225 | 
226 |         assert_eq!(
227 |             Some("test_log_parse".to_string()),
228 |             find_str_attr(&res.resource.unwrap().attributes, SERVICE_NAME)
229 |         );
230 | 
231 |         let log2 = res.scope_logs[0].log_records.pop().unwrap();
232 |         let log1 = res.scope_logs[0].log_records.pop().unwrap();
233 | 
234 |         // log 1
235 |         assert_eq!(
236 |             tm2.timestamp_nanos_opt().unwrap() as u64,
237 |             log1.time_unix_nano
238 |         );
239 |         assert_eq!(SeverityNumber::Warn as i32, log1.severity_number);
240 |         assert_eq!(SeverityNumber::Warn.as_str_name(), log1.severity_text);
241 |         assert_eq!(2, log1.attributes.len());
242 |         assert_eq!(
243 |             Some("1234abcd".to_string()),
244 |             find_str_attr(&log1.attributes, FAAS_INVOCATION_ID)
245 |         );
246 |         assert_eq!(
247 |             Some("function".to_string()),
248 |             find_str_attr(&log1.attributes, "type")
249 |         );
250 |         assert_eq!(
251 |             StringValue("the message".to_string()),
252 |             log1.body.unwrap().value.unwrap()
253 |         );
254 | 
255 |         // log 2
256 |         assert_eq!(
257 |             Some("extension".to_string()),
258 |             find_str_attr(&log2.attributes, "type")
259 |         );
260 |         assert_eq!(
261 |             StringValue("INFO Plain text message".to_string()),
262 |             log2.body.unwrap().value.unwrap()
263 |         );
264 |     }
265 | 
266 |     #[test]
267 |     fn test_log_parse_invalid() {
268 |         let tm1 = DateTime::from(SystemTime::now().sub(Duration::from_secs(3600)));
269 |         let mut r = Resource::default();
270 |         r.attributes
271 |             .push(otel_string_attr(SERVICE_NAME, "test_log_parse"));
272 | 
273 |         let logs = vec![Log::Extension(
274 |             tm1,
275 |             Value::Array(vec![Value::String("invalid".to_string())]),
276 |         )];
277 | 
278 |         let res = parse_logs(r, logs);
279 |         assert!(res.is_err())
280 |     }
281 | 
282 |     #[test]
283 |     fn test_log_parse_fields() {
284 |         let now = SystemTime::now();
285 |         let tm1 = DateTime::from(now.sub(Duration::from_secs(3600)));
286 |         let tm2 = tm1.add(Duration::from_secs(60));
287 |         let mut r = Resource::default();
288 |         r.attributes
289 |             .push(otel_string_attr(SERVICE_NAME, "test_log_parse"));
290 | 
291 |         let logs = vec![Log::Function(
292 |             tm1,
293 |             Value::Object(json_map(HashMap::from([
294 |                 ("timestamp", Value::String(tm2.to_rfc3339())),
295 |                 ("level", Value::String("warn".to_string())),
296 |                 ("requestId", Value::String("1234abcd".to_string())),
297 |                 (
298 |                     "fields",
299 |                     Value::Object(json_map(HashMap::from([(
300 |                         "message",
301 |                         Value::String("the message".to_string()),
302 |                     )]))),
303 |                 ),
304 |             ]))),
305 |         )];
306 | 
307 |         let mut res = parse_logs(r, logs).unwrap();
308 | 
309 |         assert_eq!(1, res.scope_logs.len());
310 |         assert_eq!(1, res.scope_logs[0].log_records.len());
311 | 
312 |         assert_eq!(
313 |             Some("test_log_parse".to_string()),
314 |             find_str_attr(&res.resource.unwrap().attributes, SERVICE_NAME)
315 |         );
316 | 
317 |         let log1 = res.scope_logs[0].log_records.pop().unwrap();
318 | 
319 |         assert_eq!(
320 |             tm2.timestamp_nanos_opt().unwrap() as u64,
321 |             log1.time_unix_nano
322 |         );
323 |         assert_eq!(
324 |             Some("1234abcd".to_string()),
325 |             find_str_attr(&log1.attributes, FAAS_INVOCATION_ID)
326 |         );
327 |         assert_eq!(
328 |             StringValue("the message".to_string()),
329 |             log1.body.unwrap().value.unwrap()
330 |         );
331 |     }
332 | 
333 |     fn json_map(m: HashMap<&str, Value>) -> serde_json::Map<String, Value> {
334 |         let mut new_map = serde_json::Map::new();
335 |         for (k, v) in m.into_iter() {
336 |             new_map.insert(k.to_string(), v);
337 |         }
338 |         new_map
339 |     }
340 | 
341 |     fn find_str_attr(attrs: &Vec<KeyValue>, key: &str) -> Option<String> {
342 |         attrs
343 |             .iter()
344 |             .find(|kv| kv.key.eq(key))
345 |             .map(|kv| match kv.value.clone().unwrap().value.unwrap() {
346 |                 StringValue(v) => Some(v),
347 |                 _ => None,
348 |             })
349 |             .flatten()
350 |     }
351 | }
352 | 


--------------------------------------------------------------------------------
/src/env.rs:
--------------------------------------------------------------------------------
  1 | use crate::secrets::client::AwsClient;
  2 | use crate::secrets::{MAX_LOOKUP_LEN, PARAM_STORE_SERVICE, SECRETS_MANAGER_SERVICE};
  3 | use regex::Regex;
  4 | use rotel::aws_api::arn::AwsArn;
  5 | use rotel::aws_api::creds::AwsCreds;
  6 | use std::collections::HashMap;
  7 | use tokio::time::Instant;
  8 | use tower::BoxError;
  9 | use tracing::{debug, warn};
 10 | 
 11 | pub struct EnvArnParser {
 12 |     arn_sub_re: Regex,
 13 |     secret_prefix_re: Regex,
 14 | }
 15 | 
 16 | impl EnvArnParser {
 17 |     pub fn new() -> Self {
 18 |         Self {
 19 |             arn_sub_re: Regex::new(r"\$\{(arn:[^}]+)}").unwrap(),
 20 |             secret_prefix_re: Regex::new(r"^secret://(arn:.+)$").unwrap(),
 21 |         }
 22 |     }
 23 | 
 24 |     pub fn extract_arns_from_env(&self) -> HashMap<String, String> {
 25 |         let mut sec_subs = HashMap::new();
 26 |         for (k, v) in std::env::vars() {
 27 |             if !k.starts_with("ROTEL_") {
 28 |                 continue;
 29 |             }
 30 | 
 31 |             // Check for ${arn:...} format
 32 |             for capture in self.arn_sub_re.captures_iter(v.as_str()) {
 33 |                 let matched = capture.get(1).unwrap().as_str().to_string();
 34 |                 sec_subs.insert(matched, "".to_string());
 35 |             }
 36 | 
 37 |             // Check for secret://arn:... format
 38 |             if let Some(capture) = self.secret_prefix_re.captures(v.as_str()) {
 39 |                 let matched = capture.get(1).unwrap().as_str().to_string();
 40 |                 sec_subs.insert(matched, "".to_string());
 41 |             }
 42 |         }
 43 | 
 44 |         sec_subs
 45 |     }
 46 | 
 47 |     pub fn update_env_arn_secrets(&self, arn_map: HashMap<String, String>) {
 48 |         let mut updates = HashMap::new();
 49 |         for (k, v) in std::env::vars() {
 50 |             if !k.starts_with("ROTEL_") {
 51 |                 continue;
 52 |             }
 53 | 
 54 |             let mut result = v.clone();
 55 | 
 56 |             // Handle ${arn:...} format
 57 |             result = self
 58 |                 .arn_sub_re
 59 |                 .replace_all(result.as_str(), |caps: &regex::Captures| {
 60 |                     let matched = caps.get(1).unwrap().as_str();
 61 | 
 62 |                     match arn_map.get(matched) {
 63 |                         None => "",
 64 |                         Some(v) => v,
 65 |                     }
 66 |                 })
 67 |                 .into_owned();
 68 | 
 69 |             // Handle secret://arn:... format
 70 |             if let Some(capture) = self.secret_prefix_re.captures(result.as_str()) {
 71 |                 let matched = capture.get(1).unwrap().as_str();
 72 |                 if let Some(secret_value) = arn_map.get(matched) {
 73 |                     result = secret_value.clone();
 74 |                 }
 75 |             }
 76 | 
 77 |             if v != result {
 78 |                 updates.insert(k, result);
 79 |             }
 80 |         }
 81 | 
 82 |         for (k, v) in updates {
 83 |             unsafe { std::env::set_var(k, v.to_string()) }
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | pub async fn resolve_secrets(
 89 |     aws_creds: AwsCreds,
 90 |     secure_arns: &mut HashMap<String, String>,
 91 | ) -> Result<(), BoxError> {
 92 |     let secrets_start = Instant::now();
 93 | 
 94 |     let client = AwsClient::new(aws_creds)?;
 95 | 
 96 |     let mut arns_by_svc = HashMap::new();
 97 |     for (arn_str, _) in secure_arns.iter() {
 98 |         let arn = arn_str.parse::<AwsArn>()?;
 99 | 
100 |         if arn.service() != SECRETS_MANAGER_SERVICE && arn.service() != PARAM_STORE_SERVICE {
101 |             return Err(format!("Unknown secret ARN service name: {}", arn.service()).into());
102 |         }
103 | 
104 |         if arn.service() == PARAM_STORE_SERVICE && arn.resource_field() != "" {
105 |             return Err(format!(
106 |                 "JSON field selection not allowed for parameter store: {}",
107 |                 arn.to_string()
108 |             )
109 |             .into());
110 |         }
111 | 
112 |         // This should never happen, but avoid silent bugs later
113 |         if arn.to_string() != *arn_str {
114 |             return Err(format!(
115 |                 "ARN value did not match input string: {} != {}",
116 |                 arn.to_string(),
117 |                 arn_str
118 |             )
119 |             .into());
120 |         }
121 | 
122 |         let arn_without_field = arn.clone().set_resource_field("".to_string());
123 | 
124 |         arns_by_svc
125 |             .entry(arn.service().clone())
126 |             .or_insert_with(|| HashMap::new())
127 |             .entry(arn_without_field)
128 |             .or_insert_with(|| Vec::new())
129 |             .push(arn);
130 |     }
131 | 
132 |     for (svc, arns_by_base) in arns_by_svc {
133 |         for arn_chunk in arns_by_base
134 |             .keys()
135 |             .cloned()
136 |             .collect::<Vec<AwsArn>>()
137 |             .chunks(MAX_LOOKUP_LEN)
138 |         {
139 |             if svc == SECRETS_MANAGER_SERVICE {
140 |                 let sm = client.secrets_manager();
141 | 
142 |                 match sm.batch_get_secret(arn_chunk).await {
143 |                     Ok(res) => {
144 |                         for (arn, secret) in res {
145 |                             let aws_arn = arn.parse::<AwsArn>()?;
146 |                             match arns_by_base.get(&aws_arn) {
147 |                                 None => {
148 |                                     return Err(format!(
149 |                                         "Returned secret ARN was not found: {}",
150 |                                         arn
151 |                                     )
152 |                                     .into());
153 |                                 }
154 |                                 Some(entry) => {
155 |                                     for full_arn in entry {
156 |                                         if full_arn.resource_field() == "" {
157 |                                             secure_arns.insert(
158 |                                                 full_arn.to_string(),
159 |                                                 secret.secret_string.clone(),
160 |                                             );
161 |                                             continue;
162 |                                         }
163 | 
164 |                                         match serde_json::from_str::<HashMap<String, String>>(
165 |                                             secret.secret_string.as_str(),
166 |                                         ) {
167 |                                             Ok(json) => match json.get(full_arn.resource_field()) {
168 |                                                 None => return Err(format!(
169 |                                                     "Secret JSON did not contain field {}: {:?}",
170 |                                                     full_arn.resource_field(),
171 |                                                     full_arn
172 |                                                 )
173 |                                                 .into()),
174 |                                                 Some(value) => {
175 |                                                     secure_arns.insert(
176 |                                                         full_arn.to_string(),
177 |                                                         value.to_string(),
178 |                                                     );
179 |                                                 }
180 |                                             },
181 |                                             Err(_) => {
182 |                                                 return Err(format!(
183 |                                                     "Unable to parse secret string as JSON: {:?}",
184 |                                                     full_arn
185 |                                                 )
186 |                                                 .into());
187 |                                             }
188 |                                         }
189 |                                     }
190 |                                 }
191 |                             }
192 |                         }
193 |                     }
194 |                     Err(err) => {
195 |                         warn!(
196 |                             "Unable to resolve ARNs from secrets manager: {:?}: {:?}",
197 |                             arn_chunk, err,
198 |                         );
199 |                         return Err("Unable to resolve ARNs from secrets manager".into());
200 |                     }
201 |                 }
202 |             } else {
203 |                 let ps = client.parameter_store();
204 | 
205 |                 match ps.get_parameters(arn_chunk).await {
206 |                     Ok(res) => {
207 |                         for (arn, param) in res {
208 |                             secure_arns.insert(arn, param.value);
209 |                         }
210 |                     }
211 |                     Err(err) => {
212 |                         warn!(
213 |                             "Unable to resolve ARNs from parameter store: {:?}: {:?}",
214 |                             arn_chunk, err,
215 |                         );
216 |                         return Err("Unable to resolve ARNs from parameter store".into());
217 |                     }
218 |                 }
219 |             }
220 |         }
221 |     }
222 | 
223 |     debug!(
224 |         "Resolved all secrets in {} ms",
225 |         Instant::now().duration_since(secrets_start).as_millis()
226 |     );
227 |     Ok(())
228 | }
229 | 
230 | #[cfg(test)]
231 | mod tests {
232 |     use rotel::aws_api::creds::AwsCreds;
233 | 
234 |     use crate::env::{EnvArnParser, resolve_secrets};
235 |     use crate::test_util::{init_crypto, parse_test_arns};
236 |     use std::collections::HashMap;
237 | 
238 |     #[test]
239 |     fn test_extract_and_update_arns_from_env() {
240 |         unsafe { std::env::set_var("ROTEL_DONT_EXPAND", "${SOMETHING}") }
241 |         unsafe { std::env::set_var("ROTEL_SINGLE", "${arn:test1}") }
242 |         unsafe { std::env::set_var("ROTEL_MULTI", "${arn:test2} - ${arn:test3}") }
243 |         unsafe { std::env::set_var("ROTEL_ALREADY_EXISTS", "Bearer ${arn:test2}") }
244 |         unsafe { std::env::set_var("ROTEL_WONT_UPDATE", "empty:${arn:test4}") }
245 |         unsafe { std::env::set_var("ROTEL_SECRET_PREFIX", "secret://arn:test5") }
246 | 
247 |         let es = EnvArnParser::new();
248 |         let mut hm = es.extract_arns_from_env();
249 | 
250 |         assert_eq!(5, hm.len());
251 |         assert!(hm.contains_key("arn:test1"));
252 |         assert!(hm.contains_key("arn:test2"));
253 |         assert!(hm.contains_key("arn:test3"));
254 |         assert!(hm.contains_key("arn:test4"));
255 |         assert!(hm.contains_key("arn:test5"));
256 | 
257 |         hm.insert("arn:test1".to_string(), "result-1".to_string());
258 |         hm.insert("arn:test2".to_string(), "result-2".to_string());
259 |         hm.insert("arn:test3".to_string(), "result-3".to_string());
260 |         hm.insert("arn:test5".to_string(), "secret-result".to_string());
261 | 
262 |         es.update_env_arn_secrets(hm);
263 | 
264 |         assert_eq!("${SOMETHING}", std::env::var("ROTEL_DONT_EXPAND").unwrap());
265 |         assert_eq!("result-1", std::env::var("ROTEL_SINGLE").unwrap());
266 |         assert_eq!("result-2 - result-3", std::env::var("ROTEL_MULTI").unwrap());
267 |         assert_eq!(
268 |             "Bearer result-2",
269 |             std::env::var("ROTEL_ALREADY_EXISTS").unwrap()
270 |         );
271 |         assert_eq!("empty:", std::env::var("ROTEL_WONT_UPDATE").unwrap());
272 |         assert_eq!(
273 |             "secret-result",
274 |             std::env::var("ROTEL_SECRET_PREFIX").unwrap()
275 |         );
276 | 
277 |         unsafe { std::env::remove_var("ROTEL_DONT_EXPAND") }
278 |         unsafe { std::env::remove_var("ROTEL_SINGLE") }
279 |         unsafe { std::env::remove_var("ROTEL_MULTI") }
280 |         unsafe { std::env::remove_var("ROTEL_ALREADY_EXISTS") }
281 |         unsafe { std::env::remove_var("ROTEL_WONT_UPDATE") }
282 |         unsafe { std::env::remove_var("ROTEL_SECRET_PREFIX") }
283 |     }
284 | 
285 |     #[tokio::test]
286 |     async fn test_resolve_multiple_secrets() {
287 |         // TEST_ENVSECRET_ARNS should be set to a comma-separated list of k=v pairs,
288 |         // where k is an ARN of a secret and v is the secret value to test against.
289 |         let test_envsecret_arns = std::env::var("TEST_ENVSECRET_ARNS");
290 |         if !test_envsecret_arns.is_ok() {
291 |             println!("Skipping test_resolve_multiple_secrets due to unset envvar");
292 |             return;
293 |         }
294 | 
295 |         let test_arns = parse_test_arns(test_envsecret_arns.unwrap());
296 | 
297 |         init_crypto();
298 | 
299 |         let mut test_arn_map = HashMap::new();
300 |         for (test_arn, _) in &test_arns {
301 |             test_arn_map.insert(test_arn.clone(), "".to_string());
302 |         }
303 | 
304 |         let res = resolve_secrets(AwsCreds::from_env(), &mut test_arn_map).await;
305 |         assert!(res.is_ok());
306 | 
307 |         for (test_arn, test_value) in test_arns {
308 |             let result = test_arn_map.get(&test_arn).unwrap();
309 |             assert_eq!(test_value, *result);
310 |         }
311 |     }
312 | 
313 |     #[tokio::test]
314 |     async fn test_resolve_secrets_with_failures() {
315 |         let test_envsecret_arns = std::env::var("TEST_ENVSECRET_FAIL_ARNS");
316 |         if !test_envsecret_arns.is_ok() {
317 |             println!("Skipping test_resolve_secrets_with_failures due to unset envvar");
318 |             return;
319 |         }
320 | 
321 |         let test_arns = parse_test_arns(test_envsecret_arns.unwrap());
322 | 
323 |         init_crypto();
324 | 
325 |         for (test_arn, _) in &test_arns {
326 |             let mut test_arn_map = HashMap::new();
327 |             test_arn_map.insert(test_arn.clone(), "".to_string());
328 | 
329 |             let res = resolve_secrets(AwsCreds::from_env(), &mut test_arn_map).await;
330 |             assert!(res.is_err());
331 |         }
332 |     }
333 | }
334 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | extern crate core;
  2 | 
  3 | use bytes::Bytes;
  4 | use clap::{Parser, ValueEnum};
  5 | use dotenvy::Substitutor;
  6 | use http_body_util::Full;
  7 | use hyper_util::client::legacy::Client;
  8 | use hyper_util::client::legacy::connect::HttpConnector;
  9 | use hyper_util::rt::{TokioExecutor, TokioTimer};
 10 | use lambda_extension::{LambdaTelemetryRecord, NextEvent};
 11 | use rotel::aws_api::creds::AwsCreds;
 12 | use rotel::bounded_channel::bounded;
 13 | use rotel::init::agent::Agent;
 14 | use rotel::init::args::{AgentRun, Exporter};
 15 | use rotel::init::misc::bind_endpoints;
 16 | use rotel::init::parse;
 17 | use rotel::init::wait;
 18 | use rotel::listener::Listener;
 19 | use rotel::topology::flush_control::{FlushBroadcast, FlushSender};
 20 | use rotel_extension::env::{EnvArnParser, resolve_secrets};
 21 | use rotel_extension::lambda;
 22 | use rotel_extension::lambda::telemetry_api::TelemetryAPI;
 23 | use rotel_extension::lifecycle::flush_control::{
 24 |     Clock, DEFAULT_FLUSH_INTERVAL_MILLIS, FlushControl, FlushMode,
 25 | };
 26 | use rustls::crypto::CryptoProvider;
 27 | use std::collections::HashMap;
 28 | use std::env;
 29 | use std::net::SocketAddr;
 30 | use std::ops::Add;
 31 | use std::process::ExitCode;
 32 | use std::time::{Duration, SystemTime, UNIX_EPOCH};
 33 | use tokio::task::JoinSet;
 34 | use tokio::time::{Instant, Interval, timeout};
 35 | use tokio::{pin, select};
 36 | use tokio_util::sync::CancellationToken;
 37 | use tower_http::BoxError;
 38 | use tracing::level_filters::LevelFilter;
 39 | use tracing::{debug, error, info, warn};
 40 | use tracing_subscriber::layer::SubscriberExt;
 41 | use tracing_subscriber::{EnvFilter, Registry};
 42 | 
 43 | pub const SENDING_QUEUE_SIZE: usize = 10;
 44 | 
 45 | //
 46 | // todo: these constants should be configurable
 47 | 
 48 | pub const LOGS_QUEUE_SIZE: usize = 50;
 49 | 
 50 | pub const FLUSH_PIPELINE_TIMEOUT_MILLIS: u64 = 500;
 51 | pub const FLUSH_EXPORTERS_TIMEOUT_MILLIS: u64 = 3_000;
 52 | 
 53 | #[derive(Debug, Parser)]
 54 | #[command(name = "rotel-lambda-extension")]
 55 | #[command(bin_name = "rotel-lambda-extension")]
 56 | struct Arguments {
 57 |     #[arg(long, env = "ROTEL_TELEMETRY_ENDPOINT", default_value = "0.0.0.0:8990", value_parser = parse::parse_endpoint)]
 58 |     telemetry_endpoint: SocketAddr,
 59 | 
 60 |     #[arg(
 61 |         value_enum,
 62 |         long,
 63 |         global = true,
 64 |         env = "ROTEL_LOG_FORMAT",
 65 |         default_value = "text"
 66 |     )]
 67 |     /// Log format
 68 |     log_format: LogFormatArg,
 69 | 
 70 |     #[arg(long, global = true, env = "ROTEL_ENVIRONMENT", default_value = "dev")]
 71 |     /// Environment
 72 |     environment: String,
 73 | 
 74 |     // This is ignored in these options, but we keep it here to avoid an error on unknown
 75 |     // options
 76 |     #[arg(long)]
 77 |     env_file: Option<String>,
 78 | 
 79 |     #[command(flatten)]
 80 |     agent_args: Box<AgentRun>,
 81 | }
 82 | 
 83 | // Minimal option to allow us to parse out the env from a file
 84 | #[derive(Debug, Parser)]
 85 | #[clap(ignore_errors = true)]
 86 | struct EnvFileArguments {
 87 |     #[arg(long, env = "ROTEL_ENV_FILE")]
 88 |     env_file: Option<String>,
 89 | }
 90 | 
 91 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, ValueEnum)]
 92 | pub enum LogFormatArg {
 93 |     Text,
 94 |     Json,
 95 | }
 96 | 
 97 | fn main() -> ExitCode {
 98 |     let start_time = Instant::now();
 99 | 
100 |     let env_opt = EnvFileArguments::parse();
101 |     if let Some(env_file) = env_opt.env_file {
102 |         if let Err(e) = load_env_file(&env_file) {
103 |             eprintln!("Can not load envfile: {}", e);
104 |             return ExitCode::FAILURE;
105 |         }
106 |     }
107 | 
108 |     let opt = Arguments::parse();
109 | 
110 |     let _guard = match setup_logging() {
111 |         Ok(guard) => guard,
112 |         Err(e) => {
113 |             eprintln!("ERROR: failed to setup logging: {}", e);
114 |             return ExitCode::FAILURE;
115 |         }
116 |     };
117 | 
118 |     let agent = opt.agent_args;
119 |     let mut port_map = match bind_endpoints(&[
120 |         agent.otlp_receiver.otlp_grpc_endpoint,
121 |         agent.otlp_receiver.otlp_http_endpoint,
122 |         opt.telemetry_endpoint,
123 |     ]) {
124 |         Ok(ports) => ports,
125 |         Err(e) => {
126 |             eprintln!("ERROR: {}", e);
127 | 
128 |             return ExitCode::from(1);
129 |         }
130 |     };
131 | 
132 |     // Remove this, the rest are passed to the agent
133 |     let telemetry_listener = port_map.remove(&opt.telemetry_endpoint).unwrap();
134 | 
135 |     match run_extension(
136 |         start_time,
137 |         agent,
138 |         port_map,
139 |         telemetry_listener,
140 |         &opt.environment,
141 |     ) {
142 |         Ok(_) => {}
143 |         Err(e) => {
144 |             error!(error = ?e, "Failed to run agent.");
145 |             return ExitCode::from(1);
146 |         }
147 |     }
148 | 
149 |     ExitCode::SUCCESS
150 | }
151 | 
152 | fn load_env_file(env_file: &String) -> Result<(), BoxError> {
153 |     let subs = load_env_file_updates(env_file)?;
154 | 
155 |     for (key, val) in subs {
156 |         unsafe { env::set_var(key, val) }
157 |     }
158 | 
159 |     Ok(())
160 | }
161 | 
162 | fn load_env_file_updates(env_file: &String) -> Result<Vec<(String, String)>, BoxError> {
163 |     let mut updates = Vec::new();
164 |     for item in dotenvy::from_filename_iter_custom_sub(env_file, ArnEnvSubstitutor {})
165 |         .map_err(|e| format!("failed to open env file {}: {}", env_file, e))?
166 |     {
167 |         let (key, val) = item.map_err(|e| format!("unable to parse line: {}", e))?;
168 |         updates.push((key, val))
169 |     }
170 | 
171 |     Ok(updates)
172 | }
173 | 
174 | #[derive(Clone)]
175 | struct ArnEnvSubstitutor;
176 | impl Substitutor for ArnEnvSubstitutor {
177 |     fn substitute(&self, val: &str) -> Option<String> {
178 |         // We'll expand this later
179 |         if val.starts_with("arn:") {
180 |             // need to escape curly braces
181 |             return Some(format!("${{{}}}", val));
182 |         }
183 | 
184 |         // Fall back to normal env expansion
185 |         match std::env::var(val) {
186 |             Ok(s) => Some(s),
187 |             Err(_) => None,
188 |         }
189 |     }
190 | }
191 | 
192 | #[tokio::main]
193 | async fn run_extension(
194 |     start_time: Instant,
195 |     mut agent_args: Box<AgentRun>,
196 |     port_map: HashMap<SocketAddr, Listener>,
197 |     telemetry_listener: Listener,
198 |     env: &String,
199 | ) -> Result<(), BoxError> {
200 |     let mut tapi_join_set = JoinSet::new();
201 |     let mut agent_join_set = JoinSet::new();
202 | 
203 |     let client = build_hyper_client();
204 | 
205 |     let (bus_tx, mut bus_rx) = bounded(10);
206 |     let (logs_tx, logs_rx) = bounded(LOGS_QUEUE_SIZE);
207 | 
208 |     let aws_creds = AwsCreds::from_env();
209 | 
210 |     //
211 |     // Resolve secrets
212 |     //
213 |     let es = EnvArnParser::new();
214 |     let mut secure_arns = es.extract_arns_from_env();
215 |     if !secure_arns.is_empty() {
216 |         if CryptoProvider::get_default().is_none() {
217 |             rustls::crypto::aws_lc_rs::default_provider()
218 |                 .install_default()
219 |                 .unwrap();
220 |         }
221 | 
222 |         resolve_secrets(aws_creds.clone(), &mut secure_arns).await?;
223 |         es.update_env_arn_secrets(secure_arns);
224 | 
225 |         // We must reparse arguments now that the environment has been updated
226 |         agent_args = Arguments::parse().agent_args;
227 |     }
228 | 
229 |     let r = match lambda::api::register(client.clone()).await {
230 |         Ok(r) => r,
231 |         Err(e) => return Err(format!("Failed to register extension: {}", e).into()),
232 |     };
233 | 
234 |     let (mut flush_pipeline_tx, flush_pipeline_sub) = FlushBroadcast::new().into_parts();
235 |     let (mut flush_exporters_tx, flush_exporters_sub) = FlushBroadcast::new().into_parts();
236 | 
237 |     let agent_cancel = CancellationToken::new();
238 |     {
239 |         // We control flushing manually, so set this to zero to disable the batch timer
240 |         agent_args.batch.batch_timeout = Duration::ZERO;
241 | 
242 |         // Catch the default no config mode and default to the blackhole exporter
243 |         // instead of failing to start
244 |         if agent_args.exporter.is_none() && agent_args.exporters.is_none() {
245 |             if agent_args.otlp_exporter.base.endpoint.is_none()
246 |                 && agent_args.otlp_exporter.base.traces_endpoint.is_none()
247 |                 && agent_args.otlp_exporter.base.metrics_endpoint.is_none()
248 |                 && agent_args.otlp_exporter.base.logs_endpoint.is_none()
249 |             {
250 |                 // todo: We should be able to startup with no config and not fail, identify best
251 |                 // default mode.
252 |                 info!("Automatically selecting blackhole exporter due to missing endpoint configs");
253 |                 agent_args.exporter = Some(Exporter::Blackhole);
254 |             }
255 |         }
256 | 
257 |         let agent = Agent::new(agent_args, port_map, SENDING_QUEUE_SIZE, env.clone())
258 |             .with_logs_rx(logs_rx)
259 |             .with_pipeline_flush(flush_pipeline_sub)
260 |             .with_exporters_flush(flush_exporters_sub);
261 |         let token = agent_cancel.clone();
262 |         let agent_fut = async move { agent.run(token).await };
263 | 
264 |         agent_join_set.spawn(agent_fut);
265 |     };
266 | 
267 |     if let Err(e) = lambda::api::telemetry_subscribe(
268 |         client.clone(),
269 |         &r.extension_id,
270 |         &telemetry_listener.bound_address()?,
271 |     )
272 |     .await
273 |     {
274 |         return Err(format!("Failed to subscribe to telemetry: {}", e).into());
275 |     }
276 | 
277 |     let telemetry = TelemetryAPI::new(telemetry_listener, logs_tx);
278 |     let telemetry_cancel = CancellationToken::new();
279 |     {
280 |         let token = telemetry_cancel.clone();
281 |         let telemetry_fut = async move { telemetry.run(bus_tx.clone(), token).await };
282 |         tapi_join_set.spawn(telemetry_fut)
283 |     };
284 | 
285 |     // Set up our global flush interval, will be reset when we flush periodically
286 |     let mut default_flush_interval =
287 |         tokio::time::interval(Duration::from_millis(DEFAULT_FLUSH_INTERVAL_MILLIS));
288 |     default_flush_interval.tick().await; // first tick is instant
289 | 
290 |     info!(
291 |         "Rotel Lambda Extension started in {}ms",
292 |         start_time.elapsed().as_millis()
293 |     );
294 | 
295 |     // Must perform next_request to get the first INVOKE call
296 |     let next_evt = match lambda::api::next_request(client.clone(), &r.extension_id).await {
297 |         Ok(evt) => evt,
298 |         Err(e) => return Err(format!("Failed to read next event: {}", e).into()),
299 |     };
300 |     handle_next_response(next_evt);
301 | 
302 |     let mut flush_control = FlushControl::new(SystemClock {});
303 | 
304 |     'outer: loop {
305 |         let mode = flush_control.pick();
306 |         let should_shutdown;
307 | 
308 |         match mode {
309 |             FlushMode::AfterCall => {
310 |                 'inner: loop {
311 |                     //
312 |                     // We must flush after every invocation
313 |                     //
314 |                     select! {
315 |                         msg = bus_rx.next() => {
316 |                             if let Some(evt) = msg {
317 |                                 if let LambdaTelemetryRecord::PlatformRuntimeDone {..} = evt.record {
318 |                                     break 'inner;
319 |                                 }
320 |                             }
321 |                         },
322 |                         e = wait::wait_for_any_task(&mut tapi_join_set) => {
323 |                             match e {
324 |                                 Ok(()) => warn!("Unexpected early exit of TelemetryAPI."),
325 |                                 Err(e) => return Err(e),
326 |                             }
327 |                         },
328 |                         e = wait::wait_for_any_task(&mut agent_join_set) => {
329 |                             match e {
330 |                                 Ok(()) => warn!("Unexpected early exit of extension."),
331 |                                 Err(e) => return Err(e),
332 |                             }
333 |                         },
334 |                         _ = default_flush_interval.tick() => {
335 |                             force_flush(&mut flush_pipeline_tx, &mut flush_exporters_tx, &mut default_flush_interval).await;
336 |                         }
337 |                     }
338 |                 }
339 | 
340 |                 //
341 |                 // Force a flush
342 |                 //
343 |                 force_flush(
344 |                     &mut flush_pipeline_tx,
345 |                     &mut flush_exporters_tx,
346 |                     &mut default_flush_interval,
347 |                 )
348 |                 .await;
349 | 
350 |                 debug!("Received a platform runtime done message, invoking next request");
351 |                 let next_evt =
352 |                     match lambda::api::next_request(client.clone(), &r.extension_id).await {
353 |                         Ok(evt) => evt,
354 |                         Err(e) => return Err(format!("Failed to read next event: {}", e).into()),
355 |                     };
356 | 
357 |                 should_shutdown = handle_next_response(next_evt);
358 |             }
359 |             FlushMode::Periodic(mut control) => {
360 |                 // Check if we need to force a flush, this should happen concurrently with the
361 |                 // function invocation.
362 |                 if control.should_flush() {
363 |                     force_flush(
364 |                         &mut flush_pipeline_tx,
365 |                         &mut flush_exporters_tx,
366 |                         &mut default_flush_interval,
367 |                     )
368 |                     .await;
369 |                 }
370 | 
371 |                 let next_event_fut = lambda::api::next_request(client.clone(), &r.extension_id);
372 |                 pin!(next_event_fut);
373 | 
374 |                 'periodic_inner: loop {
375 |                     select! {
376 |                         biased;
377 | 
378 |                         next_resp = &mut next_event_fut => {
379 |                             // Reset the default flush timer on invocation, since we are checking whether to flush
380 |                             // at the top of the invocation anyways
381 |                             default_flush_interval.reset();
382 | 
383 |                             match next_resp {
384 |                                 Err(e) => return Err(format!("Failed to read next event: {}", e).into()),
385 |                                 Ok(next_evt) => {
386 |                                     should_shutdown = handle_next_response(next_evt);
387 | 
388 |                                     break 'periodic_inner;
389 |                                 }
390 | 
391 |                             }
392 |                         }
393 | 
394 |                         _ = bus_rx.next() => {
395 |                             // Mostly ignore these here for now
396 |                         },
397 | 
398 |                         e = wait::wait_for_any_task(&mut tapi_join_set) => {
399 |                             match e {
400 |                                 Ok(()) => warn!("Unexpected early exit of TelemetryAPI."),
401 |                                 Err(e) => return Err(e),
402 |                             }
403 |                         },
404 | 
405 |                         e = wait::wait_for_any_task(&mut agent_join_set) => {
406 |                             match e {
407 |                                 Ok(()) => warn!("Unexpected early exit of extension."),
408 |                                 Err(e) => return Err(e),
409 |                             }
410 |                         },
411 | 
412 |                         _ = default_flush_interval.tick() => {
413 |                             force_flush(&mut flush_pipeline_tx, &mut flush_exporters_tx, &mut default_flush_interval).await;
414 |                         }
415 |                     }
416 |                 }
417 |             }
418 |         }
419 | 
420 |         if should_shutdown {
421 |             info!("Shutdown received, exiting");
422 |             break 'outer;
423 |         }
424 |     }
425 | 
426 |     // We have two seconds to completely shutdown
427 |     let final_stop = Instant::now().add(Duration::from_secs(2));
428 | 
429 |     // Wait up to 500ms for the TelemetryAPI to shutdown, this will stop the logs pipeline
430 |     telemetry_cancel.cancel();
431 |     wait::wait_for_tasks_with_timeout(&mut tapi_join_set, Duration::from_millis(500)).await?;
432 | 
433 |     agent_cancel.cancel();
434 | 
435 |     // Wait for agent
436 |     wait::wait_for_tasks_with_deadline(&mut agent_join_set, final_stop).await?;
437 | 
438 |     Ok(())
439 | }
440 | 
441 | async fn force_flush(
442 |     pipeline_tx: &mut FlushSender,
443 |     exporters_tx: &mut FlushSender,
444 |     default_flush: &mut Interval,
445 | ) {
446 |     let start = Instant::now();
447 |     match timeout(
448 |         Duration::from_millis(FLUSH_PIPELINE_TIMEOUT_MILLIS),
449 |         pipeline_tx.broadcast(),
450 |     )
451 |     .await
452 |     {
453 |         Err(_) => {
454 |             warn!("timeout waiting to flush pipelines");
455 |             return;
456 |         }
457 |         Ok(Err(e)) => {
458 |             warn!("failed to flush pipelines: {}", e);
459 |             return;
460 |         }
461 |         _ => {}
462 |     }
463 |     let duration = Instant::now().duration_since(start);
464 |     debug!(?duration, "finished flushing pipeline");
465 | 
466 |     let start = Instant::now();
467 |     match timeout(
468 |         Duration::from_millis(FLUSH_EXPORTERS_TIMEOUT_MILLIS),
469 |         exporters_tx.broadcast(),
470 |     )
471 |     .await
472 |     {
473 |         Err(_) => {
474 |             warn!("timeout waiting to flush exporters");
475 |             return;
476 |         }
477 |         Ok(Err(e)) => {
478 |             warn!("failed to flush exporters: {}", e);
479 |             return;
480 |         }
481 |         _ => {}
482 |     }
483 |     let duration = Instant::now().duration_since(start);
484 |     debug!(?duration, "finished flushing exporters");
485 |     default_flush.reset();
486 | }
487 | 
488 | fn handle_next_response(evt: NextEvent) -> bool {
489 |     match evt {
490 |         NextEvent::Invoke(invoke) => debug!("Received an invoke request: {:?}", invoke),
491 |         NextEvent::Shutdown(_) => return true,
492 |     }
493 | 
494 |     false
495 | }
496 | 
497 | type LoggerGuard = tracing_appender::non_blocking::WorkerGuard;
498 | 
499 | // todo: match logging to the recommended lambda extension approach
500 | fn setup_logging() -> Result<LoggerGuard, BoxError> {
501 |     let (non_blocking_writer, guard) = tracing_appender::non_blocking(std::io::stdout());
502 | 
503 |     let filter = EnvFilter::builder()
504 |         .with_default_directive(LevelFilter::INFO.into())
505 |         .from_env()?
506 |         .add_directive("opentelemetry=warn".parse()?)
507 |         .add_directive("opentelemetry_sdk=warn".parse()?);
508 | 
509 |     let is_json = env::var("AWS_LAMBDA_LOG_FORMAT")
510 |         .unwrap_or_default()
511 |         .to_uppercase()
512 |         == "JSON";
513 | 
514 |     let layer = tracing_subscriber::fmt::layer()
515 |         .with_writer(non_blocking_writer)
516 |         // disable printing of the module
517 |         .with_target(false)
518 |         // cloudwatch will add time
519 |         .without_time()
520 |         // cloudwatch doesn't play nice with escape codes
521 |         .with_ansi(false);
522 | 
523 |     if is_json {
524 |         let file_layer = layer.json();
525 | 
526 |         let subscriber = Registry::default().with(filter).with(file_layer);
527 |         tracing::subscriber::set_global_default(subscriber).unwrap();
528 |     } else {
529 |         let file_layer = layer.compact();
530 | 
531 |         let subscriber = Registry::default().with(filter).with(file_layer);
532 |         tracing::subscriber::set_global_default(subscriber).unwrap();
533 |     }
534 | 
535 |     Ok(guard)
536 | }
537 | 
538 | fn build_hyper_client() -> Client<HttpConnector, Full<Bytes>> {
539 |     hyper_util::client::legacy::Client::builder(TokioExecutor::new())
540 |         // todo: make configurable
541 |         .pool_idle_timeout(Duration::from_secs(30))
542 |         .pool_max_idle_per_host(5)
543 |         .timer(TokioTimer::new())
544 |         .build::<_, Full<Bytes>>(HttpConnector::new())
545 | }
546 | 
547 | #[cfg(test)]
548 | mod test {
549 |     use super::*;
550 |     use std::io::Write;
551 |     use tempfile::NamedTempFile;
552 | 
553 |     #[test]
554 |     fn test_env_var_subs() {
555 |         let tf = write_env_file(vec![
556 |             "ROTEL_FOO=nottouched",
557 |             "ROTEL_SUB=\"Bearer ${TOKEN}\"",
558 |             "ROTEL_DOUBLE_SUB=${TEAM}-${TOKEN}",
559 |             "ROTEL_ESCAPED=\"NotMe\\${TEAM}\"",
560 |         ]);
561 | 
562 |         unsafe { std::env::set_var("TOKEN", "123abc") };
563 |         unsafe { std::env::set_var("TEAM", "frontend") };
564 | 
565 |         let tf_path = tf.path().to_str().unwrap().to_string();
566 |         let updates = load_env_file_updates(&tf_path).unwrap();
567 | 
568 |         assert_eq!(
569 |             vec![
570 |                 ("ROTEL_FOO".to_string(), "nottouched".to_string()),
571 |                 ("ROTEL_SUB".to_string(), "Bearer 123abc".to_string()),
572 |                 (
573 |                     "ROTEL_DOUBLE_SUB".to_string(),
574 |                     "frontend-123abc".to_string()
575 |                 ),
576 |                 ("ROTEL_ESCAPED".to_string(), "NotMe${TEAM}".to_string())
577 |             ],
578 |             updates
579 |         );
580 |     }
581 | 
582 |     fn write_env_file(envs: Vec<&str>) -> NamedTempFile {
583 |         let mut tf = NamedTempFile::new().unwrap();
584 | 
585 |         for env in envs {
586 |             tf.write_all(format!("{}\n", env).as_ref()).unwrap();
587 |         }
588 |         tf.flush().unwrap();
589 | 
590 |         tf
591 |     }
592 | }
593 | 
594 | #[derive(Clone)]
595 | struct SystemClock;
596 | 
597 | impl Clock for SystemClock {
598 |     fn now(&self) -> u64 {
599 |         SystemTime::now()
600 |             .duration_since(UNIX_EPOCH)
601 |             .unwrap()
602 |             .as_millis() as u64
603 |     }
604 | }
605 | 


--------------------------------------------------------------------------------