├── .dockerignore
├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   └── issue.yml
    ├── PULL_REQUEST_TEMPLATE.md
    ├── gh-runner-setup.sh
    └── workflows
    │   ├── chappy.yaml
    │   ├── engines.yaml
    │   ├── helper-ballista.yaml
    │   ├── helper-clickhouse.yaml
    │   ├── helper-hadoop.yaml
    │   ├── helper-trino.yaml
    │   ├── labels.yaml
    │   ├── semantic-pr.yaml
    │   ├── shell.yaml
    │   └── style-check.yaml
├── .gitignore
├── .isort.cfg
├── .markdownlint.yaml
├── .terraformignore
├── .yamllint.yaml
├── LICENSE
├── README.md
├── chappy
    ├── .dockerignore
    ├── Cargo.lock
    ├── Cargo.toml
    ├── examples
    │   ├── Cargo.toml
    │   ├── client_async.rs
    │   ├── client_sync.rs
    │   ├── helpers.rs
    │   ├── multi_clients.rs
    │   ├── n_to_n.rs
    │   ├── server.rs
    │   └── slow_client.rs
    ├── interceptor
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── bindings.rs
    │   │   ├── conf.rs
    │   │   ├── debug_fmt.rs
    │   │   ├── lib.rs
    │   │   └── utils.rs
    ├── perforator
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── binding_service.rs
    │   │   ├── conf.rs
    │   │   ├── forwarder.rs
    │   │   ├── fwd_protocol.rs
    │   │   ├── lib.rs
    │   │   ├── main.rs
    │   │   ├── metrics.rs
    │   │   ├── perforator.rs
    │   │   ├── quic_utils.rs
    │   │   ├── shutdown.rs
    │   │   └── spawn.rs
    ├── seed
    │   ├── Cargo.toml
    │   ├── build.rs
    │   ├── seed.proto
    │   └── src
    │   │   ├── address_stream.rs
    │   │   ├── cluster_manager
    │   │       ├── manager.rs
    │   │       ├── message.rs
    │   │       ├── mod.rs
    │   │       ├── state.rs
    │   │       └── summary.rs
    │   │   ├── lib.rs
    │   │   ├── main.rs
    │   │   ├── registered_endpoints.rs
    │   │   └── seed_service.rs
    └── util
    │   ├── Cargo.toml
    │   └── src
    │       ├── awaitable_map.rs
    │       ├── lib.rs
    │       ├── protocol.rs
    │       ├── tcp_connect.rs
    │       ├── test.rs
    │       └── tracing_helpers.rs
├── cli
    ├── common.py
    ├── core.py
    ├── flags.py
    ├── main.py
    ├── plugins
    │   ├── ballista.py
    │   ├── chappy.py
    │   ├── clickhouse.py
    │   ├── dask.py
    │   ├── databend.py
    │   ├── dremio.py
    │   ├── lambdacli.py
    │   ├── monitoring.py
    │   ├── scaling.py
    │   ├── scheduler.py
    │   ├── spark.py
    │   ├── tfcloud.py
    │   └── trino.py
    └── requirements.txt
├── docker
    ├── ballista
    │   ├── README.md
    │   ├── distributed-handler.py
    │   ├── distributed.Dockerfile
    │   ├── docker-compose.yaml
    │   ├── standalone-handler.py
    │   └── standalone.Dockerfile
    ├── chappy
    │   ├── build.Dockerfile
    │   ├── dev-handler.py
    │   ├── dev.Dockerfile
    │   └── docker-compose.yaml
    ├── cli
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── docker-compose.yaml
    │   └── entrypoint.sh
    ├── clickhouse
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── config.xml
    │   ├── docker-compose.yaml
    │   └── lambda-handler.py
    ├── dask
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── docker-compose.yaml
    │   └── lambda-handler.py
    ├── databend
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── bootstrap.sh
    │   ├── databend-meta.toml
    │   ├── databend-query.toml
    │   ├── docker-compose.yaml
    │   └── lambda-handler.py
    ├── dremio
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── docker-compose.yaml
    │   ├── dremio-env
    │   ├── dremio.conf
    │   └── lambda-handler.py
    ├── scaling
    │   ├── Dockerfile
    │   ├── docker-compose.yaml
    │   └── lambda-handler.py
    ├── spark
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── docker-compose.yaml
    │   ├── lambda-handler.py
    │   ├── spark-class
    │   └── spark-defaults.conf
    └── trino
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── docker-compose.yaml
    │   ├── lambda-handler.py
    │   ├── metastore-site.xml
    │   └── trino-etc
    │       ├── catalog
    │           └── hive.properties
    │       ├── config.properties
    │       ├── jvm.config
    │       └── node.properties
├── docs
    ├── scaling_metrics.md
    └── standalone_engine_metrics.md
├── infra
    ├── common.hcl
    ├── common
    │   ├── env
    │   │   └── conf.tf
    │   └── lambda
    │   │   ├── iam.tf
    │   │   ├── inputs.tf
    │   │   ├── lambda.tf
    │   │   └── outputs.tf
    ├── monitoring
    │   ├── README.md
    │   └── bigquery
    │   │   ├── .terraform.lock.hcl
    │   │   ├── engine_durations.json
    │   │   ├── main.tf
    │   │   ├── scaling_durations.json
    │   │   └── terragrunt.hcl
    ├── runtime
    │   ├── README.md
    │   ├── ballista
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── build_and_print.sh
    │   ├── chappy
    │   │   ├── .terraform.lock.hcl
    │   │   ├── ecs.tf
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── clickhouse
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── core
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── dask
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── databend
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── dremio
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── lambdacli
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── scaling
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── scheduler
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   ├── spark
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    │   └── trino
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   └── terragrunt.hcl
    ├── terragrunt.cloud.hcl
    └── terragrunt.local.hcl
├── l12n
└── l12n-shell


/.dockerignore:
--------------------------------------------------------------------------------
1 | **/.terraform
2 | *.generated.tf
3 | *.generated.tfvars
4 | __pycache__
5 | *.pyc
6 | .env
7 | **/target
8 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # top-most EditorConfig file
 2 | root = true
 3 | 
 4 | [*]
 5 | charset = utf-8
 6 | end_of_line = lf
 7 | indent_size = 2
 8 | indent_style = space
 9 | insert_final_newline = true
10 | max_line_length = 80
11 | 
12 | [*.py]
13 | indent_size = 4
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: File a bug report to help us improve
 3 | title: "Bug Report Title"
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: Current Behavior
 8 |       description: A concise description of what you're experiencing.
 9 |     validations:
10 |       required: true
11 |   - type: textarea
12 |     attributes:
13 |       label: Expected Behavior
14 |       description: A concise description of what you expected to happen.
15 |     validations:
16 |       required: true
17 |   - type: textarea
18 |     attributes:
19 |       label: Steps To Reproduce
20 |       description: Steps to reproduce the behavior.
21 |     validations:
22 |       required: true
23 |   - type: textarea
24 |     attributes:
25 |       label: Environment
26 |       description: |
27 |         Describe the environment you've encountered the bug in, e.g., the operating system, the config file and the version
28 |     validations:
29 |       required: true
30 |   - type: textarea
31 |     attributes:
32 |       label: Anything else?
33 |       description: |
34 |         Links? References? Anything that will give us more context about the issue you are encountering!
35 | 
36 |         Tip: You can attach images, recordings, or log files by clicking this area to highlight it and then dragging files in.
37 |     validations:
38 |       required: false
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Usage Question
4 |     url: https://github.com/cloudfuse-io/lambdatization/discussions/new?category=q-a
5 |     about: Just want to ask a question? Go on
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue.yml:
--------------------------------------------------------------------------------
 1 | name: Issue
 2 | description: File an issue with a definition of done
 3 | title: "Story Title"
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: Description
 8 |       description: A detailed user-facing description of the story.
 9 |     validations:
10 |       required: true
11 |   - type: textarea
12 |     attributes:
13 |       label: Definition of Done
14 |       description: |
15 |         A concise list of tasks that must be addressed before this story can be considered done.
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     attributes:
20 |       label: Anything else?
21 |       description: |
22 |         Links? References? Anything that will give us more context about the issue you are encountering!
23 | 
24 |         Tip: You can attach images, recordings, or log files by clicking this area to highlight it and then dragging files in.
25 |     validations:
26 |       required: false
27 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### :newspaper: PR description
 2 | 
 3 | <!--
 4 | Describe the objective(s) of this PR.
 5 | 
 6 | If this PR is linked to an issue, you can specify its reference instead:
 7 | Closes #
 8 | -->
 9 | 
10 | ### :memo: Reviewer instructions
11 | 
12 | <!--
13 | Provide indications to the reviewer:
14 | - where should he start?
15 | - are their changes that are side effects to the original PR objective?
16 | -->
17 | 


--------------------------------------------------------------------------------
/.github/gh-runner-setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | echo "Target distribution should be Ubuntu Focal 20.04"
 4 | 
 5 | if [ -z "$2" ]
 6 |   then
 7 |     echo "Provide runner address as first argument and Github token as second"
 8 | fi
 9 | 
10 | 
11 | REMOTE_ADDRESS=$1
12 | GITHUB_TOKEN=$2
13 | REMOTE_HOME="/home/ubuntu"
14 | REMOTE_USER=ubuntu
15 | REMOTE=$REMOTE_USER@$REMOTE_ADDRESS
16 | 
17 | ssh -o "StrictHostKeyChecking=no" $REMOTE 'bash -s' << ENDSSH
18 | set -e
19 | 
20 | # install docker
21 | sudo apt-get update
22 | sudo apt-get install -y \
23 |     ca-certificates \
24 |     curl \
25 |     gnupg \
26 | 	unzip \
27 | 	jq \
28 | 	python3-pip \
29 |     lsb-release
30 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
31 | sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu focal stable"
32 | sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
33 | sudo usermod -aG docker \$USER
34 | 
35 | 
36 | mkdir actions-runner && cd actions-runner
37 | curl -o actions-runner-linux-x64-2.299.1.tar.gz -L https://github.com/actions/runner/releases/download/v2.299.1/actions-runner-linux-x64-2.299.1.tar.gz
38 | tar xzf ./actions-runner-linux-x64-2.299.1.tar.gz
39 | ./config.sh --url https://github.com/cloudfuse-io/lambdatization --token $GITHUB_TOKEN --unattended --name github-action-runner-1-renamed
40 | mkdir /home/ubuntu/hostedtoolcache
41 | sudo ./svc.sh install
42 | sudo ./svc.sh start
43 | ENDSSH
44 | 


--------------------------------------------------------------------------------
/.github/workflows/chappy.yaml:
--------------------------------------------------------------------------------
 1 | name: "Chappy"
 2 | on: pull_request
 3 | env:
 4 |   DEBIAN_FRONTEND: noninteractive
 5 | jobs:
 6 |   chappy:
 7 |     name: Lint and tests
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v3
11 |       - uses: actions-rs/toolchain@v1
12 |         with:
13 |           toolchain: stable
14 |       - name: Install Protoc
15 |         uses: arduino/setup-protoc@v1
16 |         with:
17 |           version: '3.x'
18 |       - uses: actions-rs/cargo@v1
19 |         with:
20 |           command: clippy
21 |           args: --manifest-path chappy/Cargo.toml
22 |       - uses: actions-rs/cargo@v1
23 |         with:
24 |           command: test
25 |           args: --manifest-path chappy/Cargo.toml
26 | 


--------------------------------------------------------------------------------
/.github/workflows/engines.yaml:
--------------------------------------------------------------------------------
 1 | name: Lambdatized Engines
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 |     types:
 8 |       - opened
 9 |       - synchronize
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.ref }}
13 |   cancel-in-progress: ${{ github.event_name == 'pull_request' }}
14 | 
15 | jobs:
16 |   engine:
17 |     strategy:
18 |       matrix:
19 |         engine_name: [spark, dremio, databend, dask, trino, ballista, clickhouse]
20 |     runs-on: ubuntu-20.04
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v3
24 |       - name: Build CLI
25 |         run: |
26 |           L12N_BUILD=1 ./l12n-shell
27 |       - name: Build Engine
28 |         run: |
29 |           COMPOSE_FILE=docker/${{ matrix.engine_name }}/docker-compose.yaml
30 |           ./l12n-shell l12n build-images --compose-file=$COMPOSE_FILE
31 | 
32 |   branch-protection:
33 |     needs:
34 |       - engine
35 |     if: always()
36 |     runs-on: ubuntu-latest
37 |     name: Branch Protection
38 |     steps:
39 |       - name: Failure
40 |         if: contains(join(needs.*.result, ','), 'failure') || contains(join(needs.*.result, ','), 'cancelled')
41 |         run: |
42 |           # This check runs after any other job failed.
43 |           exit 1
44 |       - name: Success
45 |         run: |
46 |           # This check runs after all other jobs are done or skipped
47 |           exit 0
48 | 


--------------------------------------------------------------------------------
/.github/workflows/helper-ballista.yaml:
--------------------------------------------------------------------------------
 1 | name: Ballista
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       ballista-ref:
 6 |         required: true
 7 |         description: Branch, tag or SHA to checkout (e.g. 0.9.0)
 8 | 
 9 | 
10 | jobs:
11 |   build:
12 |     name: Ballista release
13 |     runs-on: ubuntu-20.04
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |         with:
17 |           repository: 'apache/arrow-ballista'
18 |           ref: ${{ github.event.inputs.ballista-ref }}
19 |       - name: Dependencies
20 |         run: |
21 |           sudo apt-get -y install \
22 |             libssl-dev \
23 |             openssl \
24 |             zlib1g \
25 |             zlib1g-dev \
26 |             libpq-dev \
27 |             cmake \
28 |             protobuf-compiler \
29 |             curl \
30 |             unzip
31 |       - uses: actions-rs/toolchain@v1
32 |         with:
33 |           toolchain: stable
34 |       - uses: actions-rs/cargo@v1
35 |         with:
36 |           command: build
37 |           args: --features "flight-sql s3" --release --bin ballista-scheduler --bin ballista-executor --bin ballista-cli
38 |       - name: Release
39 |         uses: softprops/action-gh-release@v1
40 |         with:
41 |           tag_name: ballista-${{ github.event.inputs.ballista-ref }}
42 |           body: Plain build of the ${{ github.event.inputs.ballista-ref }} ref of Ballista
43 |           files: |
44 |             target/release/ballista-scheduler
45 |             target/release/ballista-executor
46 |             target/release/ballista-cli
47 | 


--------------------------------------------------------------------------------
/.github/workflows/helper-clickhouse.yaml:
--------------------------------------------------------------------------------
 1 | name: Clickhouse
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       clickhouse-ref:
 6 |         required: true
 7 |         description: Ref to checkout from cloudfuse-io/ClickHouse, should use clang-15 (e.g. v22.10.2.11-patch)
 8 | 
 9 | 
10 | env:
11 |   AWS_ACCESS_KEY_ID: "${{ secrets.BBTOOL_KEY }}"
12 |   AWS_SECRET_ACCESS_KEY: "${{ secrets.BBTOOL_SECRET }}"
13 |   S3_BUILDS_BUCKET: "cloudfuse-builds"
14 |   S3_TEST_REPORTS_BUCKET: "cloudfuse-builds"
15 |   S3_URL: "https://s3.us-east-2.amazonaws.com"
16 | 
17 | jobs:
18 |   build:
19 |     name: clickhouse release
20 |     runs-on: self-hosted
21 |     steps:
22 |       - uses: actions/checkout@v3
23 |         with:
24 |           repository: 'cloudfuse-io/ClickHouse'
25 |           ref: ${{ github.event.inputs.clickhouse-ref }}
26 |           fetch-depth: 0
27 |           submodules: 'recursive'
28 |       - uses: actions/setup-python@v4
29 |         with:
30 |           python-version: '3.10'
31 |           architecture: 'x64'
32 |         env:
33 |           AGENT_TOOLSDIRECTORY: /home/ubuntu/hostedtoolcache
34 |       - name: Pip install
35 |         run: pip install boto3 boto3 unidiff pygithub
36 |       - name: Build deb
37 |         run: |
38 |           cd tests/ci
39 |           python3 build_check.py "package_release"
40 |       - name: Build image
41 |         run: |
42 |           cd tests/ci
43 |           python3 docker_server.py --no-push-images --no-alpine --no-reports
44 |       - name: Login to GitHub Container Registry
45 |         uses: docker/login-action@v2
46 |         with:
47 |           registry: ghcr.io
48 |           username: ${{ github.actor }}
49 |           password: ${{ secrets.GITHUB_TOKEN }}
50 |       - name: Push imgage to ghcr
51 |         run: |
52 |           CLI_IMAGE="ghcr.io/${{ github.repository }}:clickhouse-${{ github.event.inputs.clickhouse-ref }}"
53 |           docker tag clickhouse/clickhouse-server:head-amd64 "$CLI_IMAGE"
54 |           docker push "$CLI_IMAGE"
55 | 


--------------------------------------------------------------------------------
/.github/workflows/helper-hadoop.yaml:
--------------------------------------------------------------------------------
 1 | name: Hadoop
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       hadoop-version:
 6 |         required: true
 7 |         description: The Hadoop version (e.g. 3.2.0), should be in apache.archive.org
 8 | 
 9 | 
10 | jobs:
11 |   build:
12 |     name: Hadoop mirroring
13 |     runs-on: ubuntu-20.04
14 |     steps:
15 |       - name: Download
16 |         run: curl -L https://archive.apache.org/dist/hadoop/common/hadoop-${{ github.event.inputs.hadoop-version }}/hadoop-${{ github.event.inputs.hadoop-version }}.tar.gz -o hadoop-${{ github.event.inputs.hadoop-version }}.tar.gz
17 |       - name: Release
18 |         uses: softprops/action-gh-release@v1
19 |         with:
20 |           tag_name: hadoop-${{ github.event.inputs.hadoop-version }}
21 |           body: Mirroring of the Hadoop ${{ github.event.inputs.hadoop-version }} package
22 |           files: hadoop-${{ github.event.inputs.hadoop-version }}.tar.gz
23 | 


--------------------------------------------------------------------------------
/.github/workflows/helper-trino.yaml:
--------------------------------------------------------------------------------
 1 | name: Trino
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       trino-version:
 6 |         required: true
 7 |         description: The Trino version (e.g. 378), should be compatible with the patch
 8 | 
 9 | 
10 | jobs:
11 |   build:
12 |     name: Trino Build
13 |     runs-on: ubuntu-20.04
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |         with:
17 |           repository: 'cloudfuse-io/trino'
18 |           ref: ${{ github.event.inputs.trino-version }}-patch
19 |       - uses: actions/setup-java@v3
20 |         with:
21 |           distribution: 'zulu'
22 |           java-version: 17
23 |           cache: 'maven'
24 |       - name: Build
25 |         run: ./mvnw -pl core/trino-main,core/trino-server clean install -DskipTests
26 |       - name: Release
27 |         uses: softprops/action-gh-release@v1
28 |         with:
29 |           tag_name: trino-server-${{ github.event.inputs.trino-version }}
30 |           body: Custom build of Trino Server version ${{ github.event.inputs.trino-version }} disabling file descriptor checks
31 |           files: core/trino-server/target/trino-server-${{ github.event.inputs.trino-version }}.tar.gz
32 | 


--------------------------------------------------------------------------------
/.github/workflows/labels.yaml:
--------------------------------------------------------------------------------
 1 | name: Labels
 2 | on:
 3 |   pull_request:
 4 |     types:
 5 |       - opened
 6 |       - labeled
 7 |       - unlabeled
 8 |       - synchronize
 9 | 
10 | jobs:
11 |   check-labels:
12 |     name: Check Labels
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: No Labels
16 |         run: |
17 |           # Fail if the PR has no labels.
18 |           echo '${{ toJson(github.event.pull_request.labels.*.name) }}' |
19 |             jq -re 'length > 0'
20 |       - name: Blocked
21 |         run: |
22 |           # Fail if the PR has the 'blocked' label.
23 |           echo '${{ toJson(github.event.pull_request.labels.*.name) }}' |
24 |             jq -re 'all(. != "blocked")'
25 | 


--------------------------------------------------------------------------------
/.github/workflows/semantic-pr.yaml:
--------------------------------------------------------------------------------
 1 | name: Semantic PR
 2 | on:
 3 |   pull_request_target:
 4 |     types:
 5 |       - opened
 6 |       - reopened
 7 |       - edited
 8 |       - synchronize
 9 | 
10 | jobs:
11 |   validate-title:
12 |     name: Validate PR title
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: amannn/action-semantic-pull-request@v5
16 |         env:
17 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 | 


--------------------------------------------------------------------------------
/.github/workflows/shell.yaml:
--------------------------------------------------------------------------------
 1 | name: l12n-shell tests
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 |     types:
 8 |       - opened
 9 |       - synchronize
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.ref }}
13 |   cancel-in-progress: ${{ github.event_name == 'pull_request' }}
14 | 
15 | jobs:
16 |   tests:
17 |     runs-on: ubuntu-20.04
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v3
21 |       - name: Create env dir
22 |         run: |
23 |           mkdir envs
24 |           sudo ln -s $(pwd)/l12n-shell /usr/local/bin/l12n-shell
25 |           cd envs
26 |           echo "L12N_VAR=myvar" > .env
27 |       - name: Build CLI
28 |         run: |
29 |           cd envs
30 |           L12N_BUILD=1 l12n-shell
31 |       - name: Command as argument
32 |         run: |
33 |           cd envs
34 |           result=$(l12n-shell echo hello)
35 |           [[ "$result" = "hello" ]]
36 |       - name: Command piped
37 |         run: |
38 |           cd envs
39 |           result=$(echo "echo world" | l12n-shell )
40 |           [[ "$result" = "world" ]]
41 |       - name: Check env
42 |         run: |
43 |           cd envs
44 |           result=$(echo 'echo $L12N_VAR' | l12n-shell )
45 |           [[ "$result" = "myvar" ]]
46 |       - name: Check l12n
47 |         run: |
48 |           l12n-shell l12n
49 |       - name: Recursive l12n-shell
50 |         run: |
51 |           cd envs
52 |           l12n-shell ../l12n-shell env | grep L12N_VAR=myvar
53 | 


--------------------------------------------------------------------------------
/.github/workflows/style-check.yaml:
--------------------------------------------------------------------------------
 1 | name: "Style Check"
 2 | on: pull_request
 3 | env:
 4 |   DEBIAN_FRONTEND: noninteractive
 5 | jobs:
 6 |   style:
 7 |     name: Style Check
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 | 
11 |       - uses: actions/checkout@v3
12 |         with:
13 |           fetch-depth: 0
14 | 
15 |       - uses: actions/setup-python@v4
16 |         with:
17 |           python-version: '3.9'
18 | 
19 |       - name: Black
20 |         run: |
21 |           pip install --upgrade black
22 |           black --check --diff --include "(\\.pyi?|\\.ipynb)$" --exclude "" .
23 | 
24 |       - name: isort
25 |         run: |
26 |           pip install --upgrade isort
27 |           isort . -c
28 | 
29 |       - name: yamllint
30 |         run: |
31 |           pip install --upgrade yamllint
32 |           git ls-files '*.yaml' '.yml' | xargs yamllint
33 | 
34 |       - name: Terraform fmt
35 |         run: |
36 |           terraform fmt -check -recursive -diff
37 | 
38 |       - name: Markdown Lint
39 |         run: |
40 |           npm install --no-save markdownlint-cli
41 |           npx markdownlint --ignore node_modules .
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Local .terraform directories
 2 | **/.terraform
 3 | 
 4 | # .tfstate files
 5 | *.tfstate
 6 | *.tfstate.*
 7 | 
 8 | # Crash log files
 9 | crash.log
10 | 
11 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most
12 | # .tfvars files are managed as part of configuration and so should be included in
13 | # version control.
14 | #
15 | # example.tfvars
16 | 
17 | # Ignore generated files 
18 | *.generated.tf
19 | *.generated.tfvars
20 | 
21 | # Include override files you do wish to add to version control using negated pattern
22 | #
23 | # !example_override.tf
24 | 
25 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
26 | # example: *tfplan*
27 | 
28 | .env
29 | envs
30 | 
31 | __pycache__
32 | *.pyc 
33 | 
34 | nohup.out
35 | 
36 | .vscode
37 | .venv
38 | 
39 | target
40 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile=black
3 | 


--------------------------------------------------------------------------------
/.markdownlint.yaml:
--------------------------------------------------------------------------------
1 | default: false
2 | 
3 | # Tags
4 | html: false
5 | indentation: false
6 | links: false
7 | spelling: true
8 | whitespace: true
9 | 


--------------------------------------------------------------------------------
/.terraformignore:
--------------------------------------------------------------------------------
1 | *
2 | !infra/
3 | 


--------------------------------------------------------------------------------
/.yamllint.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | yaml-files:
 4 |   - '*.yaml'
 5 |   - '*.yml'
 6 | 
 7 | rules:
 8 |   braces: enable
 9 |   brackets: enable
10 |   colons: enable
11 |   commas: enable
12 |   comments:
13 |     level: warning
14 |   comments-indentation:
15 |     level: warning
16 |   document-end: disable
17 |   document-start: disable
18 |   empty-lines: enable
19 |   empty-values: disable
20 |   hyphens: enable
21 |   indentation: enable
22 |   key-duplicates: enable
23 |   key-ordering: disable
24 |   line-length: disable
25 |   new-line-at-end-of-file: enable
26 |   new-lines: enable
27 |   octal-values: disable
28 |   quoted-strings: disable
29 |   trailing-spaces: enable
30 |   truthy: disable
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2022 cloudfuse
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/chappy/.dockerignore:
--------------------------------------------------------------------------------
1 | target
2 | 


--------------------------------------------------------------------------------
/chappy/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = ["examples", "interceptor", "perforator", "seed", "util"]
 3 | 
 4 | [workspace.dependencies]
 5 | anyhow = "1.0.71"
 6 | chrono = "0.4"
 7 | futures = "0.3.25"
 8 | ipnet = "2.7.1"
 9 | lazy_static = "1.4.0"
10 | libloading = "0.8.0"
11 | nix = "0.26.1"
12 | opentelemetry = "0.19.0"
13 | opentelemetry-otlp = { version = "0.12.0", features = [
14 |     "reqwest-client",
15 |     "reqwest-rustls",
16 |     "http-proto",
17 | ] }
18 | prost = "0.11.7"
19 | quinn = "0.10.1"
20 | quinn-proto = "0.10.1"
21 | rand = "0.8.5"
22 | rcgen = "0.10.0"
23 | rustls = "0.21.1"
24 | socket2 = "0.5.1"
25 | tokio = { version = "1.24.2", features = ["macros"] }
26 | tokio-metrics = "0.2.2"
27 | tokio-stream = "0.1.11"
28 | tonic = "0.9.1"
29 | tower = "0.4.13"
30 | tracing = "0.1.37"
31 | tracing-opentelemetry = "0.19.0"
32 | tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
33 | 


--------------------------------------------------------------------------------
/chappy/examples/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "chappy-examples"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [[bin]]
 7 | name = "example-client-sync"
 8 | path = "client_sync.rs"
 9 | 
10 | [[bin]]
11 | name = "example-client-async"
12 | path = "client_async.rs"
13 | 
14 | [[bin]]
15 | name = "example-client-slow"
16 | path = "slow_client.rs"
17 | 
18 | [[bin]]
19 | name = "example-multi-clients"
20 | path = "multi_clients.rs"
21 | 
22 | [[bin]]
23 | name = "example-n-to-n"
24 | path = "n_to_n.rs"
25 | 
26 | [[bin]]
27 | name = "example-server"
28 | path = "server.rs"
29 | 
30 | [lib]
31 | name = "helpers"
32 | path = "helpers.rs"
33 | 
34 | [dependencies]
35 | env_logger = "0.10.0"
36 | log = "0.4.17"
37 | rand = "0.8.5"
38 | tokio = { version = "1.28.1" }
39 | tokio-metrics = "0.2.2"
40 | 


--------------------------------------------------------------------------------
/chappy/examples/client_async.rs:
--------------------------------------------------------------------------------
 1 | use helpers::send_pseudo_random_async;
 2 | 
 3 | use log::info;
 4 | use std::env;
 5 | use tokio::net::TcpStream;
 6 | 
 7 | #[allow(non_snake_case)]
 8 | #[tokio::main(flavor = "current_thread")]
 9 | async fn main() {
10 |     env_logger::Builder::from_default_env()
11 |         .format_timestamp_millis()
12 |         .init();
13 |     let monitor = tokio_metrics::TaskMonitor::new();
14 | 
15 |     let SERVER_VIRTUAL_IP: String = env::var("SERVER_VIRTUAL_IP").unwrap();
16 |     let BATCH_SIZE: usize = env::var("BATCH_SIZE").unwrap().parse().unwrap();
17 |     let BYTES_SENT: usize = env::var("BYTES_SENT").unwrap().parse().unwrap();
18 |     let NB_BATCH: usize = BYTES_SENT / BATCH_SIZE;
19 |     let DEBUG_EVERY_BATCH: usize = NB_BATCH / 10;
20 | 
21 |     let server_address = format!("{}:8080", SERVER_VIRTUAL_IP);
22 |     info!("Connecting to echo server {}", server_address);
23 |     let stream = monitor
24 |         .instrument(TcpStream::connect(&server_address))
25 |         .await
26 |         .unwrap();
27 |     info!("Connection to echo server successful");
28 | 
29 |     let (write_handle, read_handle) = send_pseudo_random_async(
30 |         stream,
31 |         BATCH_SIZE,
32 |         NB_BATCH,
33 |         DEBUG_EVERY_BATCH,
34 |         Some(&monitor),
35 |     )
36 |     .await;
37 |     write_handle.await.unwrap();
38 |     info!("Write {} Bytes completed", BYTES_SENT);
39 |     read_handle.await.unwrap();
40 |     info!("Read {} Bytes completed", BYTES_SENT);
41 |     info!("Monitor: {:?}", monitor);
42 | }
43 | 


--------------------------------------------------------------------------------
/chappy/examples/client_sync.rs:
--------------------------------------------------------------------------------
 1 | use helpers::send_pseudo_random;
 2 | 
 3 | use log::info;
 4 | use std::env;
 5 | use std::net::TcpStream;
 6 | 
 7 | #[allow(non_snake_case)]
 8 | fn main() {
 9 |     env_logger::Builder::from_default_env()
10 |         .format_timestamp_millis()
11 |         .init();
12 | 
13 |     let SERVER_VIRTUAL_IP: String = env::var("SERVER_VIRTUAL_IP").unwrap();
14 |     let BATCH_SIZE: usize = env::var("BATCH_SIZE").unwrap().parse().unwrap();
15 |     let BYTES_SENT: usize = env::var("BYTES_SENT").unwrap().parse().unwrap();
16 |     let NB_BATCH: usize = BYTES_SENT / BATCH_SIZE;
17 |     let DEBUG_EVERY_BATCH: usize = NB_BATCH / 10;
18 | 
19 |     let server_address = format!("{}:8080", SERVER_VIRTUAL_IP);
20 |     info!("Connecting to echo server {}", server_address);
21 |     let stream = TcpStream::connect(&server_address).unwrap();
22 |     info!("Connection to echo server successful");
23 | 
24 |     let (write_handle, read_handle) =
25 |         send_pseudo_random(stream, BATCH_SIZE, NB_BATCH, DEBUG_EVERY_BATCH);
26 |     write_handle.join().unwrap();
27 |     info!("Write {} Bytes completed", BYTES_SENT);
28 |     read_handle.join().unwrap();
29 |     info!("Read {} Bytes completed", BYTES_SENT);
30 | }
31 | 


--------------------------------------------------------------------------------
/chappy/examples/multi_clients.rs:
--------------------------------------------------------------------------------
 1 | use helpers::send_pseudo_random;
 2 | use log::info;
 3 | use std::env;
 4 | use std::net::TcpStream;
 5 | 
 6 | #[allow(non_snake_case)]
 7 | fn main() {
 8 |     env_logger::Builder::from_default_env()
 9 |         .format_timestamp_millis()
10 |         .init();
11 | 
12 |     let SERVER_VIRTUAL_IP: String = env::var("SERVER_VIRTUAL_IP").unwrap();
13 |     let BATCH_SIZE: usize = env::var("BATCH_SIZE").unwrap().parse().unwrap();
14 |     let BYTES_SENT: usize = env::var("BYTES_SENT").unwrap().parse().unwrap();
15 |     let NB_BATCH: usize = BYTES_SENT / BATCH_SIZE;
16 |     let DEBUG_EVERY_BATCH: usize = NB_BATCH / 10;
17 | 
18 |     let server_address = format!("{}:8080", SERVER_VIRTUAL_IP);
19 |     info!("Connecting to echo server {}", server_address);
20 |     let stream1 = TcpStream::connect(&server_address).unwrap();
21 |     info!("Connecting to echo server {} again", server_address);
22 |     let stream2 = TcpStream::connect(&server_address).unwrap();
23 |     info!("Connections to echo server successful");
24 | 
25 |     let (write1, read1) = send_pseudo_random(stream1, BATCH_SIZE, NB_BATCH / 2, DEBUG_EVERY_BATCH);
26 |     let (write2, read2) = send_pseudo_random(stream2, BATCH_SIZE, NB_BATCH / 2, DEBUG_EVERY_BATCH);
27 |     write1.join().unwrap();
28 |     info!("Write {} Bytes completed", BYTES_SENT / 2);
29 |     read1.join().unwrap();
30 |     info!("Read {} Bytes completed", BYTES_SENT / 2);
31 |     write2.join().unwrap();
32 |     info!("Write {} Bytes completed", BYTES_SENT / 2);
33 |     read2.join().unwrap();
34 |     info!("Read {} Bytes completed", BYTES_SENT / 2);
35 | }
36 | 


--------------------------------------------------------------------------------
/chappy/examples/server.rs:
--------------------------------------------------------------------------------
 1 | use log::{debug, error, info};
 2 | use std::io::Read;
 3 | use std::io::Write;
 4 | use std::net::{TcpListener, TcpStream};
 5 | use std::thread;
 6 | 
 7 | fn handle_client(mut stream: TcpStream) {
 8 |     // read 20 bytes at a time from stream echoing back to stream
 9 |     let mut bytes_echoed = 0;
10 |     loop {
11 |         let mut read = [0; 16 * 1028];
12 |         match stream.read(&mut read) {
13 |             Ok(n) => {
14 |                 bytes_echoed += n;
15 |                 if n == 0 {
16 |                     stream.flush().unwrap();
17 |                     debug!("Stream EOF, bytes echoed: {}", bytes_echoed);
18 |                     break;
19 |                 }
20 |                 stream.write_all(&read[0..n]).unwrap();
21 |             }
22 |             Err(err) => {
23 |                 panic!("{:?}", err);
24 |             }
25 |         }
26 |     }
27 | }
28 | 
29 | fn run() {
30 |     let listener = TcpListener::bind("localhost:8080").unwrap();
31 | 
32 |     for stream in listener.incoming() {
33 |         match stream {
34 |             Ok(stream) => {
35 |                 info!(
36 |                     "New incomming request on {} from {}",
37 |                     stream.local_addr().unwrap(),
38 |                     stream.peer_addr().unwrap()
39 |                 );
40 |                 thread::spawn(move || {
41 |                     handle_client(stream);
42 |                 });
43 |             }
44 |             Err(_) => {
45 |                 error!("TcpListener incoming() failed.");
46 |             }
47 |         }
48 |     }
49 | }
50 | 
51 | fn main() {
52 |     env_logger::Builder::from_default_env()
53 |         .format_timestamp_millis()
54 |         .init();
55 |     info!("Starting server...");
56 |     run()
57 | }
58 | 


--------------------------------------------------------------------------------
/chappy/examples/slow_client.rs:
--------------------------------------------------------------------------------
 1 | use helpers::send_slow;
 2 | 
 3 | use log::info;
 4 | use std::{env, time::Duration};
 5 | use tokio::net::TcpStream;
 6 | 
 7 | #[allow(non_snake_case)]
 8 | #[tokio::main(flavor = "current_thread")]
 9 | async fn main() {
10 |     env_logger::Builder::from_default_env()
11 |         .format_timestamp_millis()
12 |         .init();
13 |     let monitor = tokio_metrics::TaskMonitor::new();
14 | 
15 |     let SERVER_VIRTUAL_IP: String = env::var("SERVER_VIRTUAL_IP").unwrap();
16 |     let NB_BATCH: usize = 2;
17 |     let TIME_BETWEEN_BATCH_SEC = 10;
18 | 
19 |     let server_address = format!("{}:8080", SERVER_VIRTUAL_IP);
20 |     info!("Connecting to echo server {}", server_address);
21 |     let stream = monitor
22 |         .instrument(TcpStream::connect(&server_address))
23 |         .await
24 |         .unwrap();
25 |     info!("Connection to echo server successful");
26 | 
27 |     let (write_handle, read_handle) = send_slow(
28 |         stream,
29 |         NB_BATCH,
30 |         Duration::from_secs(TIME_BETWEEN_BATCH_SEC),
31 |         Some(&monitor),
32 |     )
33 |     .await;
34 |     write_handle.await.unwrap();
35 |     info!("Write {} batches completed", NB_BATCH);
36 |     read_handle.await.unwrap();
37 |     info!("Read {} batches completed", NB_BATCH);
38 |     info!("Monitor: {:?}", monitor);
39 | }
40 | 


--------------------------------------------------------------------------------
/chappy/interceptor/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "chappy-interceptor"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [lib]
 7 | name = "chappy"
 8 | crate-type = ["cdylib"]
 9 | 
10 | [dependencies]
11 | chappy-util = { path = "../util" }
12 | futures = { workspace = true }
13 | ipnet = { workspace = true }
14 | lazy_static = { workspace = true }
15 | libloading = { workspace = true }
16 | nix = { workspace = true }
17 | tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
18 | tracing = { workspace = true }
19 | 


--------------------------------------------------------------------------------
/chappy/interceptor/src/bindings.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     debug_fmt,
 3 |     utils::{
 4 |         self,
 5 |         ParsedAddress::{LocalVirtual, NotVirtual, RemoteVirtual, Unknown},
 6 |     },
 7 |     LIBC_LOADED,
 8 | };
 9 | use chappy_util::init_tracing_shared_lib;
10 | use nix::{
11 |     libc::{__errno_location, c_int, sockaddr, socklen_t, ECONNREFUSED},
12 |     sys::socket::{SockaddrIn, SockaddrLike},
13 | };
14 | use std::ptr;
15 | use tracing::debug_span;
16 | 
17 | use utils::{parse_virtual, request_punch};
18 | 
19 | type ConnectSymbol<'a> =
20 |     libloading::Symbol<'a, unsafe extern "C" fn(c_int, *const sockaddr, socklen_t) -> c_int>;
21 | 
22 | /// # Safety
23 | ///
24 | /// This function can be called the same way the libc `connect` function is called
25 | #[no_mangle]
26 | pub unsafe extern "C" fn connect(sockfd: c_int, addr: *const sockaddr, len: socklen_t) -> c_int {
27 |     init_tracing_shared_lib();
28 |     let span = debug_span!("connect", sock = sockfd);
29 |     let _entered = span.enter();
30 |     let libc_connect: ConnectSymbol = LIBC_LOADED.get(b"connect").unwrap();
31 |     let code = match parse_virtual(addr, len) {
32 |         RemoteVirtual(addr_in) => {
33 |             if let Ok(new_addr) = request_punch(sockfd, addr_in) {
34 |                 debug_fmt::dst_rewrite("connect", sockfd, &new_addr, &addr_in);
35 |                 libc_connect(sockfd, ptr::addr_of!(new_addr).cast(), new_addr.len())
36 |             } else {
37 |                 *__errno_location() = ECONNREFUSED;
38 |                 -1
39 |             }
40 |         }
41 |         LocalVirtual(addr_in) => {
42 |             let local = SockaddrIn::new(127, 0, 0, 1, addr_in.port());
43 |             debug_fmt::dst_rewrite("connect", sockfd, &local, &addr_in);
44 |             libc_connect(sockfd, ptr::addr_of!(local).cast(), local.len())
45 |         }
46 |         NotVirtual | Unknown => {
47 |             debug_fmt::dst("connect", sockfd, addr, len);
48 |             libc_connect(sockfd, addr, len)
49 |         }
50 |     };
51 |     debug_fmt::return_code("connect", sockfd, code);
52 |     code
53 | }
54 | 


--------------------------------------------------------------------------------
/chappy/interceptor/src/conf.rs:
--------------------------------------------------------------------------------
 1 | use std::env::var;
 2 | 
 3 | pub(crate) fn virtual_subnet() -> Option<ipnet::Ipv4Net> {
 4 |     var("CHAPPY_VIRTUAL_SUBNET")
 5 |         .map(|v| v.parse().unwrap())
 6 |         .ok()
 7 | }
 8 | 
 9 | pub(crate) fn virtual_ip() -> Option<String> {
10 |     var("CHAPPY_VIRTUAL_IP").ok()
11 | }
12 | 


--------------------------------------------------------------------------------
/chappy/interceptor/src/debug_fmt.rs:
--------------------------------------------------------------------------------
 1 | use nix::{
 2 |     libc::{c_int, sockaddr, socklen_t},
 3 |     sys::socket::{SockaddrIn, SockaddrLike},
 4 | };
 5 | use std::net::Ipv4Addr;
 6 | use tracing::trace;
 7 | 
 8 | pub(crate) fn dst_rewrite(func: &str, fd: c_int, new_addr: &SockaddrIn, old_addr: &SockaddrIn) {
 9 |     trace!(
10 |         "Calling libc.{}({}, {}:{}) instead of ({}, {}:{})",
11 |         func,
12 |         fd,
13 |         Ipv4Addr::from(new_addr.ip()),
14 |         new_addr.port(),
15 |         fd,
16 |         Ipv4Addr::from(old_addr.ip()),
17 |         old_addr.port()
18 |     );
19 | }
20 | 
21 | pub(crate) unsafe fn dst(func: &str, fd: c_int, addr: *const sockaddr, len: socklen_t) {
22 |     let addr_stor = nix::sys::socket::SockaddrStorage::from_raw(addr, Some(len)).unwrap();
23 |     let addr = if let Some(addr) = addr_stor.as_sockaddr_in() {
24 |         format!("{}:{}", Ipv4Addr::from(addr.ip()), addr.port())
25 |     } else {
26 |         String::from("not-ipv4")
27 |     };
28 |     trace!("Calling libc.{}({}, {})", func, fd, addr);
29 | }
30 | 
31 | pub(crate) fn return_code(func: &str, fd: c_int, code: c_int) {
32 |     if code == -1 {
33 |         trace!("libc.{}({}): errno {}", func, fd, nix::errno::errno())
34 |     } else {
35 |         trace!("libc.{}({}): success", func, fd)
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/chappy/interceptor/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod bindings;
 2 | mod conf;
 3 | mod debug_fmt;
 4 | mod utils;
 5 | 
 6 | #[macro_use]
 7 | extern crate lazy_static;
 8 | 
 9 | pub use bindings::connect;
10 | 
11 | lazy_static! {
12 |     pub(crate) static ref RUNTIME: tokio::runtime::Runtime =
13 |         tokio::runtime::Builder::new_multi_thread()
14 |             .worker_threads(1)
15 |             .enable_all()
16 |             .build()
17 |             .unwrap();
18 |     pub(crate) static ref LIBC_LOADED: libloading::Library =
19 |         unsafe { libloading::Library::new("/lib/x86_64-linux-gnu/libc.so.6").unwrap() };
20 | }
21 | 


--------------------------------------------------------------------------------
/chappy/perforator/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "chappy-perforator"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | anyhow = { workspace = true }
 8 | chappy-seed = { path = "../seed" }
 9 | chappy-util = { path = "../util" }
10 | futures = { workspace = true }
11 | lazy_static = { workspace = true }
12 | quinn = { workspace = true }
13 | quinn-proto = { workspace = true }
14 | rand = { workspace = true }
15 | rcgen = { workspace = true }
16 | rustls = { workspace = true, features = ["quic"] }
17 | socket2 = { workspace = true, features = ["all"] }
18 | tokio = { workspace = true, features = ["rt", "signal"] }
19 | tokio-metrics = { workspace = true }
20 | tokio-stream = { workspace = true }
21 | tonic = { workspace = true }
22 | tower = { workspace = true }
23 | tracing = { workspace = true }
24 | 


--------------------------------------------------------------------------------
/chappy/perforator/src/conf.rs:
--------------------------------------------------------------------------------
 1 | use std::env::var;
 2 | 
 3 | pub struct ChappyConf {
 4 |     pub cluster_id: String,
 5 |     pub cluster_size: u32,
 6 |     pub connection_timeout_ms: u64,
 7 |     pub seed_hostname: String,
 8 |     pub seed_port: String,
 9 |     pub virtual_ip: String,
10 | }
11 | 
12 | impl ChappyConf {
13 |     pub(crate) fn load() -> Self {
14 |         Self {
15 |             cluster_id: var("CHAPPY_CLUSTER_ID").unwrap_or_else(|_| String::from("default")),
16 |             cluster_size: var("CHAPPY_CLUSTER_SIZE").unwrap().parse().unwrap(),
17 |             connection_timeout_ms: 3000,
18 |             seed_hostname: var("CHAPPY_SEED_HOSTNAME").unwrap(),
19 | 
20 |             seed_port: var("CHAPPY_SEED_PORT").unwrap(),
21 |             virtual_ip: var("CHAPPY_VIRTUAL_IP").unwrap(),
22 |         }
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/chappy/perforator/src/fwd_protocol.rs:
--------------------------------------------------------------------------------
 1 | use core::panic;
 2 | use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
 3 | use tracing::{debug, error};
 4 | 
 5 | #[derive(Clone, Debug, PartialEq)]
 6 | pub struct InitQuery {
 7 |     pub target_port: u16,
 8 |     pub connect_only: bool,
 9 | }
10 | 
11 | impl InitQuery {
12 |     pub async fn read<R: AsyncRead + Unpin>(recv: &mut R) -> Self {
13 |         let target_port = recv.read_u16().await.unwrap();
14 |         let connect_only = match recv.read_u8().await.unwrap() {
15 |             1 => true,
16 |             0 => false,
17 |             _ => panic!("expect 0 or 1"),
18 |         };
19 |         Self {
20 |             target_port,
21 |             connect_only,
22 |         }
23 |     }
24 | 
25 |     pub async fn write<W: AsyncWrite + Unpin>(self, send: &mut W) {
26 |         send.write_u16(self.target_port).await.unwrap();
27 |         send.write_u8(u8::from(self.connect_only)).await.unwrap();
28 |     }
29 | }
30 | 
31 | #[derive(Clone, Debug, PartialEq)]
32 | pub struct InitResponse {
33 |     pub code: u8,
34 | }
35 | 
36 | impl InitResponse {
37 |     pub async fn read<R: AsyncRead + Unpin>(recv: &mut R) -> std::io::Result<Self> {
38 |         let code = recv.read_u8().await?;
39 |         Ok(InitResponse { code })
40 |     }
41 | 
42 |     pub async fn write<W: AsyncWrite + Unpin>(self, send: &mut W) {
43 |         send.write_u8(self.code).await.unwrap();
44 |     }
45 | }
46 | 
47 | /// Async copy then shutdown writer.
48 | pub async fn copy<R, W>(mut reader: R, mut writer: W) -> std::io::Result<()>
49 | where
50 |     R: AsyncRead + Unpin,
51 |     W: AsyncWrite + Unpin,
52 | {
53 |     match tokio::io::copy(&mut reader, &mut writer).await {
54 |         Ok(bytes_read) => {
55 |             debug!(bytes_read, "completed");
56 |             writer.shutdown().await?;
57 |             Ok(())
58 |         }
59 |         Err(err) => {
60 |             error!(%err, "error while copying");
61 |             Err(err)
62 |         }
63 |     }
64 | }
65 | 
66 | #[cfg(test)]
67 | mod tests {
68 |     use super::*;
69 | 
70 |     #[tokio::test]
71 |     async fn query_roundtrip() {
72 |         let original = InitQuery {
73 |             target_port: 80,
74 |             connect_only: true,
75 |         };
76 |         let mut buf = vec![];
77 |         original.clone().write(&mut buf).await;
78 |         let result = InitQuery::read(&mut buf.as_slice()).await;
79 |         assert_eq!(original, result);
80 |     }
81 | 
82 |     #[tokio::test]
83 |     async fn response_roundtrip() {
84 |         let original = InitResponse { code: 1 };
85 |         let mut buf = vec![];
86 |         original.clone().write(&mut buf).await;
87 |         let result = InitResponse::read(&mut buf.as_slice()).await.unwrap();
88 |         assert_eq!(original, result);
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/chappy/perforator/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod binding_service;
 2 | mod conf;
 3 | pub mod forwarder;
 4 | pub mod fwd_protocol;
 5 | pub mod metrics;
 6 | pub mod perforator;
 7 | pub mod quic_utils;
 8 | pub mod shutdown;
 9 | pub mod spawn;
10 | 
11 | #[macro_use]
12 | extern crate lazy_static;
13 | 
14 | /// The name of all certificates are issued for
15 | pub const SERVER_NAME: &str = "chappy";
16 | 
17 | /// A fictive name to issue punch connections against
18 | pub const PUNCH_SERVER_NAME: &str = "chappy-punch";
19 | 
20 | lazy_static! {
21 |     pub static ref CHAPPY_CONF: conf::ChappyConf = conf::ChappyConf::load();
22 | }
23 | 


--------------------------------------------------------------------------------
/chappy/perforator/src/main.rs:
--------------------------------------------------------------------------------
 1 | use chappy_perforator::{
 2 |     binding_service::BindingService,
 3 |     forwarder::Forwarder,
 4 |     metrics::{meter, print_metrics},
 5 |     perforator::Perforator,
 6 |     shutdown::{gracefull, GracefullyRunnable, Shutdown},
 7 |     CHAPPY_CONF,
 8 | };
 9 | use chappy_util::{close_tracing, init_tracing};
10 | use futures::FutureExt;
11 | use std::{sync::Arc, time::Duration};
12 | use tonic::async_trait;
13 | use tracing::{info, info_span, Instrument};
14 | 
15 | struct SrvRunnable;
16 | 
17 | #[async_trait]
18 | impl GracefullyRunnable for SrvRunnable {
19 |     async fn run(&self, shutdown: &Shutdown) {
20 |         let tcp_port = 5000;
21 |         let seed_addr = format!("{}:{}", CHAPPY_CONF.seed_hostname, CHAPPY_CONF.seed_port);
22 |         let quic_port = 5001;
23 |         info!(
24 |             perforator_tcp_port = tcp_port,
25 |             perforator_quic_port = quic_port,
26 |             seed_address = %seed_addr
27 |         );
28 | 
29 |         let forwarder = Arc::new(Forwarder::new(quic_port));
30 |         let binding_service = Arc::new(BindingService::new(quic_port));
31 |         let perforator = Arc::new(Perforator::new(
32 |             Arc::clone(&forwarder),
33 |             binding_service,
34 |             tcp_port,
35 |         ));
36 |         let node_binding = perforator.bind_node(shutdown.create_guard()).await;
37 | 
38 |         let mut shtdwn_hook_guard = shutdown.create_guard();
39 |         tokio::spawn(async move {
40 |             shtdwn_hook_guard.wait_shutdown().await;
41 |             print_metrics();
42 |             // TODO -> this call seems to be stuck
43 |             node_binding.close().await;
44 |         });
45 | 
46 |         tokio::join!(
47 |             shutdown
48 |                 .create_guard()
49 |                 .run_cancellable(
50 |                     perforator.run_tcp_server(shutdown),
51 |                     Duration::from_millis(10)
52 |                 )
53 |                 .map(|o| o.ok()),
54 |             shutdown
55 |                 .create_guard()
56 |                 .run_cancellable(
57 |                     forwarder.run_quic_server(shutdown),
58 |                     Duration::from_millis(10)
59 |                 )
60 |                 .map(|o| o.ok()),
61 |         );
62 |     }
63 | }
64 | 
65 | #[tokio::main(flavor = "current_thread")]
66 | async fn main() {
67 |     init_tracing(&format!("perf-{}", CHAPPY_CONF.virtual_ip));
68 | 
69 |     meter(
70 |         gracefull(SrvRunnable, Duration::from_secs(1))
71 |             .instrument(info_span!("perforator", virt_ip = CHAPPY_CONF.virtual_ip)),
72 |     )
73 |     .await;
74 |     close_tracing();
75 | }
76 | 


--------------------------------------------------------------------------------
/chappy/perforator/src/metrics.rs:
--------------------------------------------------------------------------------
 1 | use std::future::Future;
 2 | 
 3 | lazy_static! {
 4 |     static ref TASK_MONITOR: tokio_metrics::TaskMonitor = tokio_metrics::TaskMonitor::new();
 5 | }
 6 | 
 7 | pub fn meter<T>(fut: T) -> impl Future<Output = T::Output> + Send + 'static
 8 | where
 9 |     T: Future + Send + 'static,
10 |     T::Output: Send + 'static,
11 | {
12 |     TASK_MONITOR.instrument(fut)
13 | }
14 | 
15 | pub fn print_metrics() {
16 |     tracing::info!("Monitor: {:?}", *TASK_MONITOR);
17 | }
18 | 


--------------------------------------------------------------------------------
/chappy/perforator/src/spawn.rs:
--------------------------------------------------------------------------------
 1 | use crate::metrics::meter;
 2 | use crate::shutdown::{Cancelled, ShutdownGuard};
 3 | use std::future::Future;
 4 | use std::time::Duration;
 5 | use tokio::task::JoinHandle;
 6 | use tracing::{Instrument, Span};
 7 | 
 8 | pub fn spawn_task<T>(
 9 |     shutdown_guard: ShutdownGuard,
10 |     span: Span,
11 |     future: T,
12 | ) -> JoinHandle<Result<T::Output, Cancelled>>
13 | where
14 |     T: Future + Send + 'static,
15 |     T::Output: Send + 'static,
16 | {
17 |     tokio::spawn(meter(
18 |         shutdown_guard
19 |             .run_cancellable(future, Duration::from_millis(50))
20 |             .instrument(span),
21 |     ))
22 | }
23 | 


--------------------------------------------------------------------------------
/chappy/seed/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "chappy-seed"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | build = "build.rs"
 6 | 
 7 | [dependencies]
 8 | chappy-util = { path = "../util" }
 9 | chrono = { workspace = true }
10 | futures = { workspace = true }
11 | prost = { workspace = true }
12 | tokio = { workspace = true, features = ["rt"] }
13 | tokio-stream = { workspace = true }
14 | tonic = { workspace = true }
15 | tracing = { workspace = true }
16 | 
17 | 
18 | [build-dependencies]
19 | tonic-build = "0.9.2"
20 | 


--------------------------------------------------------------------------------
/chappy/seed/build.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     tonic_build::configure()
3 |         .compile(&["seed.proto"], &["."])
4 |         .unwrap();
5 | }
6 | 


--------------------------------------------------------------------------------
/chappy/seed/seed.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package seed;
 4 | 
 5 | message Address {
 6 |     string ip = 1;
 7 |     int32 port = 2;
 8 | }
 9 | 
10 | message ClientBindingRequest {
11 |     string cluster_id = 1;
12 |     string target_virtual_ip = 2;
13 |     string source_virtual_ip = 3;
14 | }
15 | 
16 | message ClientBindingResponse {
17 |     Address target_nated_addr = 1;
18 |     bytes server_certificate = 2;
19 |     bool failed_punch_request = 3;
20 | }
21 | 
22 | message ServerBindingRequest {
23 |     string cluster_id = 1;
24 |     string virtual_ip = 2;
25 |     bytes server_certificate = 3;
26 | }
27 | 
28 | message ServerPunchRequest {
29 |     Address client_nated_addr = 1;
30 |     string client_virtual_ip = 2;
31 | }
32 | 
33 | message NodeBindingRequest {
34 |     string cluster_id = 1;
35 |     uint32 cluster_size = 2;
36 |     string source_virtual_ip = 3;
37 | }
38 | 
39 | message NodeBindingResponse {}
40 | 
41 | service Seed {
42 |     rpc BindClient(ClientBindingRequest) returns (ClientBindingResponse) {}
43 |     rpc BindServer(ServerBindingRequest) returns (stream ServerPunchRequest) {}
44 |     rpc BindNode(stream NodeBindingRequest) returns (NodeBindingResponse) {}
45 | }
46 | 


--------------------------------------------------------------------------------
/chappy/seed/src/address_stream.rs:
--------------------------------------------------------------------------------
 1 | use crate::{AddressConv, ServerPunchRequest};
 2 | use futures::{Stream, StreamExt};
 3 | use std::pin::Pin;
 4 | use std::task::{Context, Poll};
 5 | use tokio::sync::mpsc::UnboundedReceiver;
 6 | use tokio_stream::wrappers::UnboundedReceiverStream;
 7 | use tracing::{debug, Span};
 8 | 
 9 | type PunchRequestResult = Result<ServerPunchRequest, tonic::Status>;
10 | 
11 | pub struct PunchRequestStream {
12 |     // TODO: Avoid boxing here (nit)
13 |     inner: Pin<Box<dyn Stream<Item = PunchRequestResult> + Send>>,
14 |     parent_span: Span,
15 | }
16 | 
17 | impl PunchRequestStream {
18 |     pub fn new(recv: UnboundedReceiver<ServerPunchRequest>, parent_span: Span) -> Self {
19 |         let span = parent_span.clone();
20 |         let inner = UnboundedReceiverStream::new(recv)
21 |             .map(move |preq| {
22 |                 let addr = AddressConv(preq.client_nated_addr.as_ref().unwrap().clone());
23 |                 debug!(parent: &span, tgt_nat=%addr, "forwarding punch request");
24 |                 Ok(preq)
25 |             })
26 |             .boxed();
27 |         Self { inner, parent_span }
28 |     }
29 | }
30 | 
31 | impl Stream for PunchRequestStream {
32 |     type Item = PunchRequestResult;
33 | 
34 |     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
35 |         self.inner.poll_next_unpin(cx)
36 |     }
37 | }
38 | 
39 | impl Drop for PunchRequestStream {
40 |     fn drop(&mut self) {
41 |         debug!(parent: &self.parent_span, "PunchRequestStream dropped");
42 |     }
43 | }
44 | 
45 | #[cfg(test)]
46 | mod tests {
47 |     use super::*;
48 | 
49 |     #[tokio::test]
50 |     async fn test_manager() {
51 |         let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<()>();
52 |         let _stream = UnboundedReceiverStream::new(rx);
53 |         assert!(!tx.is_closed());
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/chappy/seed/src/cluster_manager/message.rs:
--------------------------------------------------------------------------------
 1 | use chrono::{DateTime, Utc};
 2 | use tokio::sync::oneshot;
 3 | 
 4 | use super::summary::*;
 5 | 
 6 | #[derive(Debug)]
 7 | pub enum Message {
 8 |     BindNodeStart {
 9 |         time: DateTime<Utc>,
10 |         cluster_size: u32,
11 |         virt_ip: String,
12 |     },
13 |     BindNodeEnd {
14 |         time: DateTime<Utc>,
15 |         virt_ip: String,
16 |     },
17 |     BindServerStart {
18 |         virt_ip: String,
19 |     },
20 |     BindServerResponse {
21 |         virt_ip: String,
22 |     },
23 |     // BindServerEnd usually arrives too late
24 |     BindClientStart {
25 |         src_virt_ip: String,
26 |         tgt_virt_ip: String,
27 |     },
28 |     BindClientEnd {
29 |         src_virt_ip: String,
30 |         tgt_virt_ip: String,
31 |     },
32 |     GetSummary {
33 |         tx: oneshot::Sender<Summary>,
34 |     },
35 | }
36 | 
37 | impl Message {
38 |     pub fn now() -> DateTime<Utc> {
39 |         Utc::now()
40 |     }
41 | 
42 |     pub fn get_summary() -> (Message, oneshot::Receiver<Summary>) {
43 |         let (tx, rx) = oneshot::channel();
44 |         (Message::GetSummary { tx }, rx)
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/chappy/seed/src/cluster_manager/mod.rs:
--------------------------------------------------------------------------------
1 | mod manager;
2 | mod message;
3 | mod state;
4 | mod summary;
5 | 
6 | pub use manager::{ClusterManager, ClusterManagerTask};
7 | pub use message::Message;
8 | 


--------------------------------------------------------------------------------
/chappy/seed/src/cluster_manager/summary.rs:
--------------------------------------------------------------------------------
 1 | use chrono::{DateTime, Utc};
 2 | use std::fmt;
 3 | 
 4 | pub enum IntervalSummary {
 5 |     Some {
 6 |         first_node_start: DateTime<Utc>,
 7 |         last_node_start: DateTime<Utc>,
 8 |         first_node_end: Option<DateTime<Utc>>,
 9 |         last_node_end: Option<DateTime<Utc>>,
10 |     },
11 |     Empty,
12 | }
13 | 
14 | pub struct NodeSummary {
15 |     pub expected_size: u32,
16 |     pub nodes: u32,
17 |     pub finished_nodes: u32,
18 | }
19 | 
20 | pub struct Summary {
21 |     pub interval: IntervalSummary,
22 |     pub node: NodeSummary,
23 | }
24 | 
25 | impl fmt::Debug for IntervalSummary {
26 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
27 |         match self {
28 |             IntervalSummary::Some {
29 |                 first_node_start,
30 |                 last_node_start,
31 |                 first_node_end,
32 |                 last_node_end,
33 |             } => {
34 |                 let start_interval = last_node_start.signed_duration_since(*first_node_start);
35 |                 if let (Some(fne), Some(lne)) = (first_node_end, last_node_end) {
36 |                     let end_interval = lne.signed_duration_since(*fne);
37 |                     write!(
38 |                         f,
39 |                         "starts: {:?}, ends: {:?}",
40 |                         start_interval.to_std().unwrap(),
41 |                         end_interval.to_std().unwrap(),
42 |                     )
43 |                 } else {
44 |                     write!(f, "starts: {:?}, no end", start_interval.to_std().unwrap())
45 |                 }
46 |             }
47 |             IntervalSummary::Empty => f.write_str("Empty cluster"),
48 |         }
49 |     }
50 | }
51 | 
52 | impl fmt::Debug for NodeSummary {
53 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
54 |         write!(
55 |             f,
56 |             "{} expected, {} started, {} ended",
57 |             self.expected_size, self.nodes, self.finished_nodes
58 |         )
59 |     }
60 | }
61 | 
62 | impl fmt::Debug for Summary {
63 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
64 |         write!(f, "{:?} ({:?})", self.interval, self.node)
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/chappy/seed/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod seed {
 2 |     tonic::include_proto!("seed");
 3 | }
 4 | 
 5 | pub use seed::*;
 6 | mod address_stream;
 7 | mod cluster_manager;
 8 | mod registered_endpoints;
 9 | pub mod seed_service;
10 | 
11 | use std::{net::SocketAddr, str::FromStr};
12 | 
13 | /// Address conversion newtype
14 | pub struct AddressConv(pub Address);
15 | 
16 | impl From<AddressConv> for SocketAddr {
17 |     fn from(addr: AddressConv) -> Self {
18 |         SocketAddr::from_str(&format!("{}", addr)).unwrap()
19 |     }
20 | }
21 | 
22 | impl std::fmt::Display for AddressConv {
23 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
24 |         write!(f, "{}:{}", self.0.ip, self.0.port)
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/chappy/seed/src/main.rs:
--------------------------------------------------------------------------------
 1 | use chappy_seed::{seed_server::SeedServer, seed_service::SeedService};
 2 | use chappy_util::init_tracing;
 3 | use std::env;
 4 | use std::time::Duration;
 5 | use tokio::signal::unix::{signal, SignalKind};
 6 | use tokio::time::timeout;
 7 | use tonic::{transport::Server, Result};
 8 | use tracing::{debug, info, warn};
 9 | 
10 | #[tokio::main(flavor = "current_thread")]
11 | async fn main() -> Result<(), Box<dyn std::error::Error>> {
12 |     init_tracing("seed");
13 |     let port = env::var("PORT").unwrap();
14 |     debug!("Starting seed on port {}...", port);
15 |     let (service, task) = SeedService::new();
16 |     Server::builder()
17 |         .add_service(SeedServer::new(service))
18 |         .serve_with_shutdown(format!("0.0.0.0:{}", port).parse()?, async {
19 |             let mut sigterm = signal(SignalKind::terminate()).unwrap();
20 |             let mut sigint = signal(SignalKind::interrupt()).unwrap();
21 |             tokio::select! {
22 |                 _ = sigint.recv()=> {
23 |                     info!("SIGINT received, exiting gracefully...")
24 |                 }
25 |                 _ = sigterm.recv()=> {
26 |                     info!("SIGTERM received, exiting gracefully...")
27 |                 }
28 |             };
29 |         })
30 |         .await
31 |         .unwrap();
32 | 
33 |     match timeout(Duration::from_millis(1000), task.wait()).await {
34 |         Ok(_) => info!("Gracefull shutdown completed"),
35 |         Err(_) => warn!("Grace period elapsed, forcefully shutting down"),
36 |     };
37 |     Ok(())
38 | }
39 | 


--------------------------------------------------------------------------------
/chappy/seed/src/registered_endpoints.rs:
--------------------------------------------------------------------------------
 1 | use crate::ServerPunchRequest;
 2 | use chappy_util::awaitable_map::AwaitableMap;
 3 | use std::{net::SocketAddr, time::Duration};
 4 | use tokio::sync::mpsc::UnboundedSender;
 5 | use tokio::{sync::mpsc, time::timeout};
 6 | use tonic::{Result, Status};
 7 | use tracing::{error, info};
 8 | 
 9 | #[derive(PartialEq, Eq, Hash, Clone)]
10 | struct VirtualTarget {
11 |     cluster_id: String,
12 |     ip: String,
13 | }
14 | 
15 | #[derive(Clone)]
16 | pub struct ResolvedTarget {
17 |     pub natted_address: SocketAddr,
18 |     pub punch_req_stream: mpsc::UnboundedSender<ServerPunchRequest>,
19 |     pub server_certificate: Vec<u8>,
20 | }
21 | 
22 | /// Map virtual addresses to the NATed endpoint and punch request stream
23 | /// TODO: a cleanup mechanism of old endpoints
24 | pub struct RegisteredEndpoints(AwaitableMap<VirtualTarget, ResolvedTarget>);
25 | 
26 | impl RegisteredEndpoints {
27 |     pub fn new() -> Self {
28 |         Self(AwaitableMap::new())
29 |     }
30 | 
31 |     pub async fn get(
32 |         &self,
33 |         tgt_ip: &str,
34 |         cluster_id: &str,
35 |     ) -> Result<ResolvedTarget, tonic::Status> {
36 |         let virtual_target_key = VirtualTarget {
37 |             ip: tgt_ip.to_owned(),
38 |             cluster_id: cluster_id.to_owned(),
39 |         };
40 | 
41 |         // TODO adjust timeout duration
42 |         let resolved_target_timeout = timeout(
43 |             Duration::from_secs(10),
44 |             // Assume the value does not need to be reset because we use a
45 |             // different cluster each time
46 |             self.0.get(virtual_target_key, |_| false),
47 |         )
48 |         .await;
49 | 
50 |         let resolved_target = if let Ok(target) = resolved_target_timeout {
51 |             target
52 |         } else {
53 |             let msg = "Target ip could not be resolved";
54 |             error!(msg);
55 |             return Err(Status::not_found(msg));
56 |         };
57 | 
58 |         Ok(resolved_target)
59 |     }
60 | 
61 |     pub fn insert(
62 |         &self,
63 |         server_nated_addr: SocketAddr,
64 |         req_tx: UnboundedSender<ServerPunchRequest>,
65 |         server_certificate: &[u8],
66 |         registered_ip: &str,
67 |         cluster_id: &str,
68 |     ) {
69 |         let resolved_target = ResolvedTarget {
70 |             natted_address: server_nated_addr,
71 |             punch_req_stream: req_tx,
72 |             server_certificate: server_certificate.to_vec(),
73 |         };
74 |         let virtual_target_key = VirtualTarget {
75 |             ip: registered_ip.to_owned(),
76 |             cluster_id: cluster_id.to_owned(),
77 |         };
78 | 
79 |         // replace the new target in the registered endpoint map
80 |         if let Some(prev_tgt) = self.0.insert(virtual_target_key.clone(), resolved_target) {
81 |             if prev_tgt.punch_req_stream.is_closed() {
82 |                 info!(
83 |                     ip = virtual_target_key.ip,
84 |                     cluster = virtual_target_key.cluster_id,
85 |                     "replaced closed target"
86 |                 );
87 |             } else {
88 |                 error!(
89 |                     ip = virtual_target_key.ip,
90 |                     cluster = virtual_target_key.cluster_id,
91 |                     "replaced unclosed target"
92 |                 );
93 |             }
94 |         }
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/chappy/util/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "chappy-util"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | anyhow = { workspace = true }
 8 | chrono = { workspace = true }
 9 | opentelemetry = { workspace = true, features = ["rt-tokio-current-thread"] }
10 | opentelemetry-otlp = { workspace = true }
11 | tokio = { workspace = true }
12 | tracing = { workspace = true }
13 | tracing-opentelemetry = { workspace = true }
14 | tracing-subscriber = { workspace = true }
15 | 


--------------------------------------------------------------------------------
/chappy/util/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod awaitable_map;
2 | pub mod protocol;
3 | pub mod tcp_connect;
4 | pub mod test;
5 | mod tracing_helpers;
6 | 
7 | pub use tracing_helpers::{close_tracing, init_tracing, init_tracing_shared_lib};
8 | 


--------------------------------------------------------------------------------
/chappy/util/src/protocol.rs:
--------------------------------------------------------------------------------
 1 | /// Protocol talked between the interceptor and the perforator
 2 | use crate::tcp_connect::connect_retry;
 3 | use std::io::{Error as IoError, ErrorKind as IoErrorKind, Result as IoResult};
 4 | use std::net::Ipv4Addr;
 5 | use std::time::Duration;
 6 | use tokio::io::{AsyncReadExt, AsyncWriteExt};
 7 | use tokio::net::TcpStream;
 8 | 
 9 | const REGISTER_HEADER_LENGTH: usize = 13;
10 | const REGISTER_CLIENT_HEADER_BYTES: [u8; REGISTER_HEADER_LENGTH] = *b"chappy_client";
11 | 
12 | #[derive(Debug)]
13 | pub enum ParsedTcpStream {
14 |     ClientRegistration {
15 |         source_port: u16,
16 |         target_virtual_ip: Ipv4Addr,
17 |         target_port: u16,
18 |         response_writer: ResponseWriter,
19 |     },
20 |     Raw(TcpStream),
21 | }
22 | 
23 | impl ParsedTcpStream {
24 |     pub async fn from(mut stream: TcpStream) -> Self {
25 |         let mut buff = [0; REGISTER_HEADER_LENGTH];
26 |         stream.peek(&mut buff).await.unwrap();
27 |         if buff == REGISTER_CLIENT_HEADER_BYTES {
28 |             stream.read_exact(&mut buff).await.unwrap();
29 |             let source_port = stream.read_u16().await.unwrap();
30 |             let target_virtual_ip: Ipv4Addr = stream.read_u32().await.unwrap().into();
31 |             let target_port = stream.read_u16().await.unwrap();
32 |             Self::ClientRegistration {
33 |                 source_port,
34 |                 target_virtual_ip,
35 |                 target_port,
36 |                 response_writer: ResponseWriter(stream),
37 |             }
38 |         } else {
39 |             Self::Raw(stream)
40 |         }
41 |     }
42 | }
43 | 
44 | #[derive(Debug)]
45 | pub struct ResponseWriter(TcpStream);
46 | 
47 | impl ResponseWriter {
48 |     pub async fn write_success(mut self) {
49 |         self.0.write_u8(0).await.unwrap();
50 |         self.0.flush().await.unwrap();
51 |     }
52 | 
53 |     pub async fn write_failure(mut self) {
54 |         self.0.write_u8(1).await.unwrap();
55 |         self.0.flush().await.unwrap();
56 |     }
57 | }
58 | 
59 | pub async fn register_client(
60 |     perforator_address: &str,
61 |     source_port: u16,
62 |     target_virtual_ip: Ipv4Addr,
63 |     target_port: u16,
64 | ) -> IoResult<()> {
65 |     let mut stream = connect_retry(perforator_address, Duration::from_secs(3)).await?;
66 |     stream.write_all(&REGISTER_CLIENT_HEADER_BYTES).await?;
67 |     stream.write_u16(source_port).await?;
68 |     stream.write_u32(target_virtual_ip.into()).await?;
69 |     stream.write_u16(target_port).await?;
70 |     stream.flush().await?;
71 |     if stream.read_u8().await? > 0 {
72 |         return Err(IoError::new(
73 |             IoErrorKind::AddrNotAvailable,
74 |             anyhow::anyhow!("Perforator could not reach target"),
75 |         ));
76 |     }
77 |     stream
78 |         .read_u8()
79 |         .await
80 |         .expect_err("Connection should have been closed by peer");
81 |     Ok(())
82 | }
83 | 


--------------------------------------------------------------------------------
/chappy/util/src/tcp_connect.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{ErrorKind as IoErrorKind, Result as IoResult};
 2 | use std::time::{Duration, Instant};
 3 | use tokio::net::{TcpStream, ToSocketAddrs};
 4 | use tracing::{error, warn};
 5 | 
 6 | /// Due to cloud function provisioning and setup times, the target addresses
 7 | /// might not be available right way. This helper helps bridge that gap by
 8 | /// retrying a TCP connection for the specified duration.
 9 | pub async fn connect_retry<A: ToSocketAddrs>(addr: A, timeout: Duration) -> IoResult<TcpStream> {
10 |     let start = Instant::now();
11 |     let mut backoff = 0;
12 |     let mut first = true;
13 |     loop {
14 |         match TcpStream::connect(&addr).await {
15 |             Ok(stream) => return Ok(stream),
16 |             Err(err) if err.kind() == IoErrorKind::ConnectionRefused => {
17 |                 if start.elapsed() > timeout {
18 |                     error!("TCP connection and retries refused");
19 |                     return Err(err);
20 |                 }
21 |                 if first {
22 |                     warn!("TCP connection refused, retrying with linear backoff...");
23 |                     first = false;
24 |                 }
25 |                 tokio::time::sleep(Duration::from_millis(20 + backoff)).await;
26 |                 backoff += 5;
27 |                 continue;
28 |             }
29 |             Err(err) => return Err(err),
30 |         }
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/chappy/util/src/test.rs:
--------------------------------------------------------------------------------
 1 | use tokio::net::TcpListener;
 2 | use tokio::sync::{mpsc, oneshot};
 3 | use tokio::task::JoinHandle;
 4 | 
 5 | /// Get some available ports from the OS by probing with shortlived TCP servers.
 6 | ///
 7 | /// Note that the port being free at the time this function completes does not
 8 | /// guaranty the it will still be the case when the caller tries to bind to it.
 9 | pub async fn available_ports(number: usize) -> Vec<u16> {
10 |     let (tx, mut rx) = mpsc::channel(number);
11 | 
12 |     // Spawn as many servers as requested ports in parallel, to avoid that the
13 |     // OS reassigns the same port multiple times.
14 |     #[allow(clippy::needless_collect)]
15 |     let spawned_servers: Vec<(oneshot::Sender<()>, JoinHandle<()>)> = (0..number)
16 |         .map(|_| {
17 |             let tx = tx.clone();
18 |             let (resp_tx, resp_rx) = oneshot::channel();
19 |             let handle = tokio::spawn(async move {
20 |                 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
21 |                 let port = listener.local_addr().unwrap().port();
22 |                 tx.send(port).await.unwrap();
23 |                 drop(tx);
24 |                 resp_rx.await.unwrap();
25 |             });
26 |             (resp_tx, handle)
27 |         })
28 |         .collect();
29 |     drop(tx);
30 | 
31 |     // First gather all the ports and only then send the shutdown signal to the
32 |     // servers
33 |     let mut ports = Vec::with_capacity(number);
34 |     while let Some(port) = rx.recv().await {
35 |         ports.push(port);
36 |     }
37 |     let handles = spawned_servers
38 |         .into_iter()
39 |         .map(|(tx, handle)| {
40 |             tx.send(()).unwrap();
41 |             handle
42 |         })
43 |         .collect::<Vec<_>>();
44 | 
45 |     // Wait for all the servers to be dropped to make sure the ports can now be
46 |     // reused
47 |     for handle in handles {
48 |         handle.await.unwrap();
49 |     }
50 |     ports
51 | }
52 | 
53 | #[cfg(test)]
54 | mod tests {
55 |     use super::*;
56 |     use std::collections::HashSet;
57 | 
58 |     #[tokio::test]
59 |     async fn test_available_ports() {
60 |         let target_len = 10;
61 |         let ports = available_ports(target_len).await;
62 |         assert_eq!(ports.len(), target_len);
63 |         ports
64 |             .iter()
65 |             .for_each(|&p| assert!(p > 1024, "not in the RFC 6056 ephemeral range"));
66 |         let set = HashSet::<_>::from_iter(ports.into_iter());
67 |         assert_eq!(set.len(), target_len);
68 |         for port in set {
69 |             TcpListener::bind(format!("127.0.0.1:{}", port))
70 |                 .await
71 |                 .unwrap();
72 |         }
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/chappy/util/src/tracing_helpers.rs:
--------------------------------------------------------------------------------
 1 | use chrono::prelude::{DateTime, Utc};
 2 | use opentelemetry_otlp::WithExportConfig;
 3 | use tracing_subscriber::{
 4 |     fmt::{format::Writer, time::FormatTime},
 5 |     layer::SubscriberExt,
 6 |     util::SubscriberInitExt,
 7 |     EnvFilter,
 8 | };
 9 | 
10 | struct CustomTime;
11 | 
12 | impl FormatTime for CustomTime {
13 |     fn format_time(&self, w: &mut Writer<'_>) -> core::fmt::Result {
14 |         let dt: DateTime<Utc> = std::time::SystemTime::now().into();
15 |         write!(w, "{}", dt.format("%H:%M:%S%.3f"))
16 |     }
17 | }
18 | 
19 | /// Configure and init tracing for executables
20 | pub fn init_tracing(service_name: &str) {
21 |     let reg = tracing_subscriber::registry().with(EnvFilter::from_default_env());
22 | 
23 |     let mut fmt_layer = tracing_subscriber::fmt::layer()
24 |         .with_writer(std::io::stderr)
25 |         .with_timer(CustomTime);
26 |     fmt_layer.set_ansi(false);
27 |     let reg = reg.with(fmt_layer);
28 | 
29 |     let otlp_config = (
30 |         std::env::var("CHAPPY_OPENTELEMETRY_URL"),
31 |         std::env::var("CHAPPY_OPENTELEMETRY_AUTHORIZATION"),
32 |     );
33 |     let otlp_layer = if let (Ok(ot_url), Ok(ot_auth)) = otlp_config {
34 |         let headers = std::collections::HashMap::from([("Authorization".into(), ot_auth)]);
35 |         let exporter = opentelemetry_otlp::new_exporter()
36 |             .http()
37 |             .with_endpoint(ot_url)
38 |             .with_headers(headers);
39 |         let otlp_tracer = opentelemetry_otlp::new_pipeline()
40 |             .tracing()
41 |             .with_exporter(exporter)
42 |             .with_trace_config(opentelemetry::sdk::trace::config().with_resource(
43 |                 opentelemetry::sdk::Resource::new(vec![opentelemetry::KeyValue::new(
44 |                     "service.name",
45 |                     service_name.to_owned(),
46 |                 )]),
47 |             ))
48 |             .install_batch(opentelemetry::runtime::TokioCurrentThread)
49 |             .unwrap();
50 |         let tracing_layer = tracing_opentelemetry::layer().with_tracer(otlp_tracer);
51 |         Some(tracing_layer)
52 |     } else {
53 |         None
54 |     };
55 |     let reg = reg.with(otlp_layer);
56 | 
57 |     reg.init();
58 | }
59 | 
60 | /// Configure and init tracing with some tweeks specific to shared libraries
61 | pub fn init_tracing_shared_lib() {
62 |     let mut fmt_layer = tracing_subscriber::fmt::layer()
63 |         .with_writer(std::io::stderr)
64 |         .with_target(false)
65 |         .with_timer(CustomTime);
66 |     fmt_layer.set_ansi(false);
67 | 
68 |     tracing_subscriber::registry()
69 |         .with(fmt_layer)
70 |         .with(EnvFilter::from_default_env())
71 |         .try_init()
72 |         .ok();
73 | }
74 | 
75 | pub fn close_tracing() {
76 |     opentelemetry::global::shutdown_tracer_provider();
77 | }
78 | 


--------------------------------------------------------------------------------
/cli/flags.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | TRACE_FLAG_VAR = "L12N_TRACE"
4 | TRACE = TRACE_FLAG_VAR in os.environ
5 | 


--------------------------------------------------------------------------------
/cli/main.py:
--------------------------------------------------------------------------------
 1 | import pkgutil
 2 | import sys
 3 | 
 4 | import common
 5 | import core
 6 | import plugins
 7 | from invoke import Collection, Program
 8 | 
 9 | 
10 | def list_plugins():
11 |     for importer, modname, _ in pkgutil.iter_modules(plugins.__path__):
12 |         mod = importer.find_module(modname).load_module(modname)
13 |         yield (modname, mod)
14 | 
15 | 
16 | class L12nProgram(Program):
17 |     """A custom Program that doesn't print useless core options"""
18 | 
19 |     def print_help(self) -> None:
20 |         print(
21 |             f"""Usage: {self.binary} [--core-opts] <subcommand> [--subcommand-opts] ...
22 | 
23 | Core options:
24 | 
25 |   -e, --echo                     Echo executed commands before running.
26 |   -h [STRING], --help[=STRING]   Show core or per-task help and exit.
27 |   -V, --version                  Show version and exit.
28 | 
29 | Plugins:
30 | """
31 |         )
32 |         active = common.active_plugins()
33 |         plugin_entry = lambda name: f"[{'x' if name in active else ' '}] {name}"
34 |         self.print_columns([(plugin_entry(p[0]), p[1].__doc__) for p in list_plugins()])
35 |         self.list_tasks()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     namespace = Collection.from_module(core)
40 |     namespace.configure({"run": {"env": common.conf(core.VALIDATORS)}})
41 | 
42 |     plugin_set = common.active_plugins()
43 |     # import all the modules in the plugins folder as collections
44 |     for modname, mod in list_plugins():
45 |         if modname in plugin_set:
46 |             namespace.add_collection(Collection.from_module(mod))
47 |             if hasattr(mod, "VALIDATORS"):
48 |                 namespace.configure({"run": {"env": common.conf(mod.VALIDATORS)}})
49 |             plugin_set.remove(modname)
50 | 
51 |     if len(plugin_set) > 0:
52 |         sys.exit(f"Unknown plugins: {plugin_set}")
53 | 
54 |     program = L12nProgram(
55 |         binary="l12n",
56 |         namespace=namespace,
57 |         version="0.1.0",
58 |     )
59 |     program.run()
60 | 


--------------------------------------------------------------------------------
/cli/plugins/clickhouse.py:
--------------------------------------------------------------------------------
 1 | """Clickhouse on AWS Lambda"""
 2 | 
 3 | import core
 4 | from common import AWS_REGION
 5 | from invoke import task
 6 | 
 7 | 
 8 | @task(autoprint=True)
 9 | def lambda_example(c, json_output=False, month="01"):
10 |     """SUM(trip_distance) GROUP_BY payment_type with direct FROM s3()"""
11 |     sql = f"""
12 | SELECT payment_type, SUM(trip_distance) 
13 | FROM s3('https://{core.bucket_name(c)}.s3.{AWS_REGION()}.amazonaws.com/nyc-taxi/2019/{month}/*', 'Parquet')
14 | GROUP BY payment_type"""
15 |     if not json_output:
16 |         print(sql)
17 |     return core.run_lambda(c, "clickhouse", sql, json_output=json_output)
18 | 


--------------------------------------------------------------------------------
/cli/plugins/dask.py:
--------------------------------------------------------------------------------
 1 | """Dask on AWS Lambda"""
 2 | 
 3 | import core
 4 | from invoke import task
 5 | 
 6 | 
 7 | @task(autoprint=True)
 8 | def lambda_example(c, json_output=False, month="01"):
 9 |     """SUM(trip_distance) GROUP_BY payment_type"""
10 |     sql = f"""
11 | CREATE TABLE nyctaxi2019{month} WITH (
12 |     location = "s3://{core.bucket_name(c)}/nyc-taxi/2019/{month}/*",
13 |     format = "parquet"
14 | );
15 | 
16 | SELECT payment_type, SUM(trip_distance) 
17 | FROM nyctaxi2019{month}
18 | GROUP BY payment_type
19 | """
20 |     if not json_output:
21 |         print(sql)
22 |     return core.run_lambda(c, "dask", sql, json_output=json_output)
23 | 


--------------------------------------------------------------------------------
/cli/plugins/databend.py:
--------------------------------------------------------------------------------
 1 | """Databend on AWS Lambda"""
 2 | 
 3 | import core
 4 | from common import AWS_REGION
 5 | from invoke import task
 6 | 
 7 | 
 8 | @task(autoprint=True)
 9 | def lambda_example(c, json_output=False, month="01"):
10 |     """SUM(trip_distance) GROUP_BY payment_type"""
11 |     # NOTE: __RUNTIME_PROVIDED__ is interpolated by the handler with actual credentials
12 |     sql = f"""
13 | CREATE STAGE IF NOT EXISTS taxi2019{month}
14 | URL = 's3://{core.bucket_name(c)}/nyc-taxi/2019/{month}/'
15 | CONNECTION = (
16 | __RUNTIME_PROVIDED__
17 | REGION = '{AWS_REGION()}'
18 | )
19 | FILE_FORMAT = (type = 'PARQUET');
20 | 
21 | SELECT payment_type, SUM(trip_distance) 
22 | FROM @taxi2019{month}
23 | GROUP BY payment_type;
24 | """
25 |     if not json_output:
26 |         print(sql)
27 |     return core.run_lambda(c, "databend", sql, json_output=json_output)
28 | 


--------------------------------------------------------------------------------
/cli/plugins/dremio.py:
--------------------------------------------------------------------------------
 1 | """Dremio on AWS Lambda"""
 2 | 
 3 | import core
 4 | from invoke import task
 5 | 
 6 | 
 7 | @task(autoprint=True)
 8 | def lambda_example(c, json_output=False, month="01"):
 9 |     """SUM(trip_distance) GROUP_BY payment_type"""
10 |     sql = f"""
11 | SELECT payment_type, SUM(trip_distance) 
12 | FROM s3source."{core.bucket_name(c)}"."nyc-taxi"."2019"."{month}"
13 | GROUP BY payment_type
14 | """
15 |     if not json_output:
16 |         print(sql)
17 |     return core.run_lambda(c, "dremio", sql, json_output=json_output)
18 | 


--------------------------------------------------------------------------------
/cli/plugins/lambdacli.py:
--------------------------------------------------------------------------------
 1 | """Deployment of the L12N CLI image in Lambda"""
 2 | 
 3 | import base64
 4 | import io
 5 | import json
 6 | import logging
 7 | import os
 8 | import random
 9 | import subprocess
10 | 
11 | import awslambdaric.bootstrap
12 | import dotenv
13 | from common import aws, format_lambda_output, terraform_output
14 | from invoke import Exit, task
15 | 
16 | logging.getLogger().setLevel(logging.INFO)
17 | 
18 | READ_ONLY_REPO_DIR = "/repo"
19 | 
20 | 
21 | @task
22 | def run_bootstrap(c):
23 |     """Call this as the lambda entrypoint"""
24 |     awslambdaric.bootstrap.run(
25 |         f"{READ_ONLY_REPO_DIR}/cli",
26 |         "plugins.lambdacli.handler",
27 |         os.getenv("AWS_LAMBDA_RUNTIME_API"),
28 |     )
29 | 
30 | 
31 | @task(autoprint=True)
32 | def invoke(c, command, sampling=None, json_output=False):
33 |     """Invoke the AWS Lambda function with the CLI image
34 | 
35 |     Commands that need to connect to a Docker server will fail. Local Terraform
36 |     states are not added to the image, so use a remote backend to enable
37 |     commands that use Terraform outputs."""
38 |     lambda_name = terraform_output(c, "lambdacli", "lambda_name")
39 |     cmd_b64 = base64.b64encode(command.encode()).decode()
40 |     body = {"cmd": cmd_b64}
41 |     if sampling is not None:
42 |         body["sampling"] = sampling
43 |     lambda_res = aws("lambda").invoke(
44 |         FunctionName=lambda_name,
45 |         Payload=json.dumps(body).encode(),
46 |         InvocationType="RequestResponse",
47 |     )
48 |     resp_payload = lambda_res["Payload"].read().decode()
49 |     if "FunctionError" in lambda_res:
50 |         raise Exit(message=resp_payload, code=1)
51 |     return format_lambda_output(resp_payload, json_output)
52 | 
53 | 
54 | def handler(event, context):
55 |     """Handler for the AWS Lambda function running the CLI image
56 | 
57 |     Fields in event object:
58 |     - cmd: base64 encoded command to run
59 |     - sampling: 0.25 means 3 out of 4 runs will be randomly canceled"""
60 | 
61 |     # Some gymnastic is required to have the repo in the writable location /tmp
62 |     os.system("rm -rf /tmp/*")
63 |     os.system(f"cp -r {READ_ONLY_REPO_DIR} /tmp")
64 |     os.environ["REPO_DIR"] = f"/tmp{READ_ONLY_REPO_DIR}"
65 |     os.environ["PATH"] = f"{os.environ['PATH']}:/tmp{READ_ONLY_REPO_DIR}"
66 | 
67 |     # Load envfile from secrets
68 |     envfile_str: str = aws("secretsmanager").get_secret_value(
69 |         SecretId=os.environ["ENV_FILE_SECRET_ID"],
70 |         VersionId=os.environ["ENV_FILE_SECRET_VERSION_ID"],
71 |     )["SecretString"]
72 |     dotenv.load_dotenv(stream=io.StringIO(envfile_str), override=True)
73 | 
74 |     cmd = base64.b64decode(event["cmd"]).decode("utf-8")
75 |     if random.uniform(0, 1) > float(event.get("sampling", 1)):
76 |         logging.info(f"Skipping run of CMD: {cmd}")
77 |         return {"stdout": "Run skipped"}
78 |     res = subprocess.Popen(
79 |         ["/bin/bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE
80 |     )
81 |     logging.info("""=== CMD ===""")
82 |     logging.info(cmd)
83 |     stdout, stderr = res.communicate()
84 |     logging.info("""=== STDOUT ===""")
85 |     logging.info(stdout.decode())
86 |     logging.info("""=== STDERR ===""")
87 |     logging.info(stderr.decode())
88 |     logging.info("""=== RETURNCODE ===""")
89 |     logging.info(res.returncode)
90 |     if res.returncode != 0:
91 |         raise Exception(stderr.decode())
92 |     return {
93 |         "stdout": stdout.decode(),
94 |         "stderr": stderr.decode(),
95 |     }
96 | 


--------------------------------------------------------------------------------
/cli/plugins/scaling.py:
--------------------------------------------------------------------------------
 1 | """Benchmark scaling Docker based lambdas"""
 2 | 
 3 | import asyncio
 4 | import json
 5 | import random
 6 | import time
 7 | 
 8 | from common import AsyncAWS, aws, terraform_output, wait_deployment
 9 | from invoke import task
10 | 
11 | # Set a sleep duration to make sure every invocation is alocated to a new Lambda
12 | # container and doesn't trigger a warm start
13 | SLEEP_DURATION = 2
14 | 
15 | 
16 | def resize(lambda_name, size_mb) -> str:
17 |     wait_deployment(lambda_name)
18 |     aws("lambda").update_function_configuration(
19 |         FunctionName=lambda_name, MemorySize=size_mb
20 |     )
21 |     wait_deployment(lambda_name)
22 |     response = aws("lambda").publish_version(
23 |         FunctionName=lambda_name,
24 |     )
25 |     return response["Version"]
26 | 
27 | 
28 | async def invoke_batch(nb, lambda_name, version, memory_mb):
29 |     async with AsyncAWS("lambda") as s:
30 |         start_time = time.time()
31 |         cold_starts = 0
32 |         placeholder_size = None
33 |         p90 = None
34 |         p99 = None
35 |         error = None
36 |         # start all invocations at once
37 |         payload_data = json.dumps({"sleep": SLEEP_DURATION}).encode()
38 |         tasks = asyncio.as_completed(
39 |             [s.invoke_lambda(lambda_name, version, payload_data) for _ in range(nb)]
40 |         )
41 |         # iterate through results as they are generated
42 |         for cnt, task in enumerate(tasks, start=1):
43 |             try:
44 |                 res = await task
45 |             except Exception as e:
46 |                 if "We currently do not have sufficient capacity" in str(e):
47 |                     error = "insufficient_capacity"
48 |                     break
49 |                 else:
50 |                     raise e
51 |             if placeholder_size is None:
52 |                 placeholder_size = res["placeholder_size"]
53 |             else:
54 |                 assert placeholder_size == res["placeholder_size"]
55 |             assert memory_mb == res["memory_limit_in_mb"]
56 |             if res["cold_start"]:
57 |                 cold_starts += 1
58 |             # record quantiles when appropriate
59 |             if cnt == int(0.9 * nb):
60 |                 p90 = time.time() - start_time
61 |             elif cnt == int(0.99 * nb):
62 |                 p99 = time.time() - start_time
63 |         if error is None and cold_starts != nb:
64 |             error = "warm_starts"
65 |         external_duration_sec = time.time() - start_time
66 |         return {
67 |             "nb_run": nb,
68 |             "memory_size_mb": memory_mb,
69 |             "sleep_duration_sec": SLEEP_DURATION,
70 |             "placeholder_size": placeholder_size,
71 |             "nb_cold_start": cold_starts,
72 |             "total_duration_sec": external_duration_sec,
73 |             "p90_duration_sec": p90,
74 |             "p99_duration_sec": p99,
75 |             "error": error,
76 |         }
77 | 
78 | 
79 | @task(autoprint=True)
80 | def run(c, nb=128, memory_mb=2048):
81 |     """Run "nb" Lambdas with "memory_mb" size"""
82 |     lambda_names = terraform_output(c, "scaling", "lambda_names").split(",")
83 |     picked_lambda = random.choice(lambda_names)
84 |     version = resize(picked_lambda, memory_mb)
85 |     res = asyncio.run(invoke_batch(nb, picked_lambda, version, memory_mb))
86 |     return [res]
87 | 


--------------------------------------------------------------------------------
/cli/plugins/scheduler.py:
--------------------------------------------------------------------------------
1 | """Periodically execute benchmarks from AWS Lambda"""
2 | 


--------------------------------------------------------------------------------
/cli/plugins/spark.py:
--------------------------------------------------------------------------------
 1 | """Spark on AWS Lambda"""
 2 | 
 3 | import core
 4 | from invoke import task
 5 | 
 6 | 
 7 | @task(autoprint=True)
 8 | def lambda_example_hive(c, json_output=False, month="01"):
 9 |     """SUM(trip_distance) GROUP_BY payment_type with preliminary CREATE EXTERNAL TABLE"""
10 |     sql = f"""
11 | CREATE EXTERNAL TABLE taxi2019{month} (trip_distance FLOAT, payment_type STRING) 
12 | STORED AS PARQUET LOCATION 's3a://{core.bucket_name(c)}/nyc-taxi/2019/{month}/';
13 | SELECT payment_type, SUM(trip_distance) 
14 | FROM taxi2019{month} 
15 | GROUP BY payment_type
16 | """
17 |     if not json_output:
18 |         print(sql)
19 |     return core.run_lambda(c, "spark", sql, json_output=json_output)
20 | 
21 | 
22 | @task(autoprint=True)
23 | def lambda_example_direct(c, json_output=False, month="01"):
24 |     """SUM(trip_distance) GROUP_BY payment_type with direct FROM parquet.s3a://"""
25 |     sql = f"""
26 | SELECT payment_type, SUM(trip_distance) 
27 | FROM parquet.`s3a://{core.bucket_name(c)}/nyc-taxi/2019/{month}/` 
28 | GROUP BY payment_type
29 | """
30 |     if not json_output:
31 |         print(sql)
32 |     return core.run_lambda(c, "spark", sql, json_output=json_output)
33 | 


--------------------------------------------------------------------------------
/cli/plugins/trino.py:
--------------------------------------------------------------------------------
 1 | """Trino on AWS Lambda"""
 2 | 
 3 | import core
 4 | from invoke import task
 5 | 
 6 | 
 7 | @task(autoprint=True)
 8 | def lambda_example(c, json_output=False, month="01"):
 9 |     """SUM(trip_distance) GROUP_BY payment_type with preliminary CREATE EXTERNAL TABLE"""
10 |     sql = f"""
11 | CREATE TABLE hive.default.taxi2019{month} (trip_distance REAL, payment_type VARCHAR)
12 | WITH (
13 |   external_location = 's3a://{core.bucket_name(c)}/nyc-taxi/2019/{month}/',
14 |   format = 'PARQUET'
15 | );
16 | 
17 | SELECT payment_type, SUM(trip_distance)
18 | FROM hive.default.taxi2019{month}
19 | GROUP BY payment_type;
20 | """
21 |     if not json_output:
22 |         print(sql)
23 |     return core.run_lambda(c, "trino", sql, json_output=json_output)
24 | 


--------------------------------------------------------------------------------
/cli/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp==3.8.3
2 | awslambdaric==2.0.4
3 | boto3==1.24.27
4 | dynaconf==3.1.9
5 | google-cloud-bigquery==3.3.5
6 | invoke==1.7.1
7 | python-dotenv==0.21.0
8 | requests==2.28.1
9 | 


--------------------------------------------------------------------------------
/docker/ballista/README.md:
--------------------------------------------------------------------------------
 1 | # Ballista lambdatization tricks
 2 | 
 3 | ## List of tricks
 4 | 
 5 | - Since there are not official docker images for ballista, we provide our own
 6 |   build of Ballista with the project's CI.
 7 | - the default config for the scheduler is on standalone using sled. Sled default
 8 |   directory is set to /dev/shm wich is not available in lambda. In order to cover
 9 |   for this we use the inline parameter --sled-dir to change the sled directory
10 |   towards /tmp/scheduler (/tmp being the only writable dir on lambda env)
11 | - For the excecutor the default working dir is on /tmp, when we try to change it
12 |   an internal tmp folder /tmp/executor. The executor failed to start with error:
13 |   `Failed to init Executor RuntimeEnv`
14 | 


--------------------------------------------------------------------------------
/docker/ballista/distributed.Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | ARG BALLISTA_VERSION=0.11.0
 3 | ARG FUNCTION_DIR="/function"
 4 | 
 5 | FROM rust:bullseye as chappy-build
 6 | RUN apt update && apt install -y protobuf-compiler
 7 | RUN mkdir /code
 8 | WORKDIR /code
 9 | COPY chappy/ .
10 | RUN cargo build --release
11 | 
12 | 
13 | FROM curlimages/curl as bin-dependencies
14 | ARG BALLISTA_VERSION
15 | 
16 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/ballista-${BALLISTA_VERSION}/ballista-scheduler -o /tmp/ballista-scheduler
17 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/ballista-${BALLISTA_VERSION}/ballista-executor -o /tmp/ballista-executor
18 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/ballista-${BALLISTA_VERSION}/ballista-cli -o /tmp/ballista-cli
19 | 
20 | 
21 | FROM python:3.10-bullseye as ric-dependency
22 | 
23 | ARG FUNCTION_DIR
24 | ENV DEBIAN_FRONTEND=noninteractive
25 | 
26 | # Install aws-lambda-cpp build dependencies
27 | RUN apt-get update && \
28 |     apt-get -y install\
29 |     g++ \
30 |     make \
31 |     cmake \
32 |     unzip \
33 |     libcurl4-openssl-dev
34 | 
35 | # Include global arg in this stage of the build
36 | ARG FUNCTION_DIR
37 | # Create function directory
38 | RUN mkdir -p ${FUNCTION_DIR}
39 | 
40 | # Copy function code
41 | COPY docker/ballista/distributed-handler.py ${FUNCTION_DIR}/lambda-handler.py
42 | 
43 | # Install the runtime interface client and lambda requirements
44 | RUN pip3 install \
45 |     --target ${FUNCTION_DIR} \
46 |     awslambdaric
47 | 
48 | 
49 | FROM python:3.10-slim-bullseye
50 | ARG FUNCTION_DIR
51 | 
52 | ENV RUST_LOG=warn
53 | ENV RUST_BACKTRACE=full
54 | ENV LD_PRELOAD=/opt/ballista/libchappy.so
55 | 
56 | COPY --from=bin-dependencies /tmp/ballista-scheduler /opt/ballista/ballista-scheduler
57 | COPY --from=bin-dependencies /tmp/ballista-executor /opt/ballista/ballista-executor
58 | COPY --from=bin-dependencies /tmp/ballista-cli /opt/ballista/ballista-cli
59 | COPY --from=chappy-build /code/target/release/libchappy.so /opt/ballista/libchappy.so
60 | COPY --from=chappy-build /code/target/release/chappy-perforator /opt/ballista/chappy-perforator
61 | 
62 | RUN chmod +x /opt/ballista/ballista-scheduler  && \
63 |   chmod +x /opt/ballista/ballista-executor  && \
64 |   chmod +x /opt/ballista/ballista-cli && \
65 |   chmod +x /opt/ballista/chappy-perforator
66 | 
67 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
68 | 
69 | WORKDIR ${FUNCTION_DIR}
70 | 
71 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
72 | CMD [ "lambda-handler.handler" ]
73 | 


--------------------------------------------------------------------------------
/docker/ballista/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   ballista_standalone:
 4 |     build:
 5 |       context: .
 6 |       dockerfile: standalone.Dockerfile
 7 |     image: cloudfuse-io/l12n:ballista
 8 |     cap_drop:
 9 |       - ALL
10 |     read_only: true
11 |     volumes:
12 |       - ballista-tmp:/tmp
13 |     entrypoint:
14 |       - python3
15 |       - lambda-handler.py
16 |     environment:
17 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
18 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
19 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
20 |       - AWS_DEFAULT_REGION=$L12N_AWS_REGION
21 |       - DATA_BUCKET_NAME
22 |   ballista_distributed:
23 |     build:
24 |       context: ../..
25 |       dockerfile: docker/ballista/distributed.Dockerfile
26 |     image: cloudfuse-io/l12n:ballista-distributed
27 |     cap_drop:
28 |       - ALL
29 |     read_only: true
30 |     volumes:
31 |       - ballista-tmp:/tmp
32 |     entrypoint:
33 |       - python3
34 |       - distributed-handler.py
35 |     environment:
36 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
37 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
38 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
39 |       - AWS_DEFAULT_REGION=$L12N_AWS_REGION
40 |       - DATA_BUCKET_NAME
41 | 
42 | volumes:
43 |   ballista-tmp:
44 | 


--------------------------------------------------------------------------------
/docker/ballista/standalone.Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | ARG BALLISTA_VERSION=0.11.0
 3 | ARG RELEASE_FLAG=release
 4 | ARG FUNCTION_DIR="/function"
 5 | 
 6 | 
 7 | FROM curlimages/curl as bin-dependencies
 8 | ARG BALLISTA_VERSION
 9 | 
10 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/ballista-${BALLISTA_VERSION}/ballista-scheduler -o /tmp/ballista-scheduler
11 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/ballista-${BALLISTA_VERSION}/ballista-executor -o /tmp/ballista-executor
12 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/ballista-${BALLISTA_VERSION}/ballista-cli -o /tmp/ballista-cli
13 | 
14 | 
15 | FROM python:3.10-bullseye as ric-dependency
16 | 
17 | ARG FUNCTION_DIR
18 | ENV DEBIAN_FRONTEND=noninteractive
19 | 
20 | # Install aws-lambda-cpp build dependencies
21 | RUN apt-get update && \
22 |     apt-get -y install\
23 |     g++ \
24 |     make \
25 |     cmake \
26 |     unzip \
27 |     libcurl4-openssl-dev
28 | 
29 | # Include global arg in this stage of the build
30 | ARG FUNCTION_DIR
31 | # Create function directory
32 | RUN mkdir -p ${FUNCTION_DIR}
33 | 
34 | # Copy function code
35 | COPY standalone-handler.py ${FUNCTION_DIR}/lambda-handler.py
36 | 
37 | # Install the runtime interface client and lambda requirements
38 | RUN pip3 install \
39 |     --target ${FUNCTION_DIR} \
40 |     awslambdaric
41 | 
42 | 
43 | FROM python:3.10-slim-bullseye
44 | ARG FUNCTION_DIR
45 | 
46 | ENV RUST_LOG=warn
47 | ENV RUST_BACKTRACE=full
48 | 
49 | COPY --from=bin-dependencies /tmp/ballista-scheduler /opt/ballista/ballista-scheduler
50 | COPY --from=bin-dependencies /tmp/ballista-executor /opt/ballista/ballista-executor
51 | COPY --from=bin-dependencies /tmp/ballista-cli /opt/ballista/ballista-cli
52 | 
53 | RUN chmod +x /opt/ballista/ballista-scheduler  && \
54 |   chmod +x /opt/ballista/ballista-executor  && \
55 |   chmod +x /opt/ballista/ballista-cli
56 | 
57 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
58 | 
59 | WORKDIR ${FUNCTION_DIR}
60 | 
61 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
62 | CMD [ "lambda-handler.handler" ]
63 | 


--------------------------------------------------------------------------------
/docker/chappy/build.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:bullseye as build
 2 | ARG BUILD_FLAG=""
 3 | 
 4 | RUN apt update && apt install -y protobuf-compiler
 5 | 
 6 | RUN mkdir /code
 7 | 
 8 | WORKDIR /code
 9 | COPY . .
10 | 
11 | RUN --mount=type=cache,target=./target \
12 |   --mount=type=cache,target=/usr/local/cargo/git \
13 |   --mount=type=cache,target=/usr/local/cargo/registry \
14 |   cargo build ${BUILD_FLAG} && \
15 |   cp -r ./target /target
16 | 


--------------------------------------------------------------------------------
/docker/chappy/dev.Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | ARG FUNCTION_DIR="/function"
 3 | 
 4 | FROM python:3.10-bullseye
 5 | RUN apt-get update && \
 6 |     apt-get install -y \
 7 |         g++ \
 8 |         make \
 9 |         cmake \
10 |         unzip \
11 |         apt-transport-https \
12 |         ca-certificates \
13 |         libcurl4-openssl-dev && \
14 |     apt-get clean && \
15 |     rm -rf /var/lib/apt/lists/* && \
16 |     rm -rf /var/cache/apt/*
17 | 
18 | RUN apt-get update && \
19 |     apt-get install -y alien && \
20 |     curl -O https://lambda-insights-extension.s3-ap-northeast-1.amazonaws.com/amazon_linux/lambda-insights-extension.rpm && \
21 |     alien --to-deb lambda-insights-extension.rpm -i && \
22 |     rm -f lambda-insights-extension.rpm && \
23 |     apt-get remove -y alien && \
24 |     apt-get clean && \
25 |     rm -rf /var/lib/apt/lists/* && \
26 |     rm -rf /var/cache/apt/*
27 | 
28 | ARG FUNCTION_DIR
29 | 
30 | RUN mkdir -p ${FUNCTION_DIR}
31 | 
32 | RUN pip3 install \
33 |     --target ${FUNCTION_DIR} \
34 |     awslambdaric \
35 |     boto3
36 | 
37 | WORKDIR ${FUNCTION_DIR}
38 | COPY dev-handler.py .
39 | 
40 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
41 | CMD [ "dev-handler.handler" ]
42 | 


--------------------------------------------------------------------------------
/docker/chappy/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   chappydev:
 4 |     build:
 5 |       dockerfile: dev.Dockerfile
 6 |     image: cloudfuse-io/l12n:chappy-dev
 7 |     cap_drop:
 8 |       - ALL
 9 |     read_only: true
10 |     volumes:
11 |       - chappy-dev-tmp:/tmp
12 |     entrypoint:
13 |       - python3
14 |       - dev-handler.py
15 |     environment:
16 |       - AWS_REGION=$L12N_AWS_REGION
17 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
18 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
19 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
20 |       - DATA_BUCKET_NAME
21 | 
22 | volumes:
23 |   chappy-dev-tmp:
24 | 


--------------------------------------------------------------------------------
/docker/cli/README.md:
--------------------------------------------------------------------------------
 1 | # L12N CLI image
 2 | 
 3 | This image provides an "all in one" environment to run the various operations
 4 | that are defined by the L12N CLI:
 5 | - it can be executed locally by running the `l12n-shell` or inside lambda using
 6 |   the `lambdacli` module
 7 | - it takes care of both pinning dependencies and setting the right environment
 8 |   variables with sensible defaults
 9 | 
10 | ## Configuration
11 | 
12 | The following environment variables are expected to be configured in the final
13 | runtime:
14 | - `REPO_DIR` the root directory of the repository
15 | - `CALLING_DIR` the current working directory when calling this script
16 | 
17 | If using the `cli` target (or `entrypoint.sh` in general), also provide:
18 | - `HOST_UID` user ID of the host system caller
19 | - `HOST_GID` group ID of the host system caller
20 | - `HOST_DOCKER_GID` group ID assigned to the docker socket
21 | 


--------------------------------------------------------------------------------
/docker/cli/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   lambdacli:
 4 |     build:
 5 |       context: ../..
 6 |       dockerfile: docker/cli/Dockerfile
 7 |       target: lambda
 8 |     image: cloudfuse-io/l12n:lambdacli
 9 |     cap_drop:
10 |       - ALL
11 |     read_only: true
12 |     user: nobody
13 |     environment:
14 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
15 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
16 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
17 |       - L12N_AWS_REGION
18 |       - L12N_PLUGINS=lambdaric
19 |     ulimits:
20 |       nofile:
21 |         soft: 1024
22 |         hard: 1024
23 | 


--------------------------------------------------------------------------------
/docker/cli/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script must be executed with elevated priviledges (root)
 4 | 
 5 | # Using bindfs, we map the host UID/GID to the local container user defined with
 6 | # CONTAINER_USER and CONTAINER_GROUP. The volume to be shared with the host
 7 | # should be mounted on /mnt/host and will be made accessible to the container
 8 | # user at /host
 9 | 
10 | # Arguments:
11 | # - The first argument should be the path on the host
12 | # - The following arguments are commands that will be executed in the user space
13 | # mounted directory /host
14 | 
15 | set -e
16 | 
17 | if [[ -z "$CONTAINER_USER" || -z "$CONTAINER_GROUP" ]]; then
18 |     echo 'ERROR: set CONTAINER_USER and CONTAINER_GROUP variables to the target user' >&2
19 |     exit 1
20 | fi
21 | 
22 | if [[ -z "$HOST_UID" || -z "$HOST_GID" || -z "$HOST_DOCKER_GID" ]]; then
23 |     echo 'ERROR: set HOST_UID=$(id -u), HOST_GID=$(id -g) and HOST_DOCKER_GID=$(stat -c %g ${HOST_DOCKER_SOCKET})' >&2
24 |     exit 1
25 | fi
26 | 
27 | groupadd -g $HOST_DOCKER_GID -o hostdocker
28 | usermod --append --groups hostdocker $CONTAINER_USER
29 | 
30 | mkdir /host
31 | # bindfs enables us to maintain permission consistency on the bound host
32 | # the /host volume can be written to safely by the provided user
33 | bindfs --force-user=$CONTAINER_USER --force-group=$CONTAINER_GROUP \
34 |     --create-for-user=$HOST_UID --create-for-group=$HOST_GID \
35 |     --chown-ignore --chgrp-ignore \
36 |     /mnt/host /host
37 | 
38 | # Using Dockerfile WORKDIR creates a sort of race with bindfs so we use `cd` instead
39 | cd $CALLING_DIR
40 | ln -s $REPO_DIR/l12n /usr/local/bin/l12n
41 | 
42 | # Drop privileges and execute next container command, or 'bash' if not specified.
43 | EXEC_CMD="exec sudo --preserve-env=PATH --preserve-env --set-home --user=$CONTAINER_USER --"
44 | if [[ $# -gt 0 ]]; then
45 |     echo "$@" | $EXEC_CMD bash
46 | else
47 |     $EXEC_CMD bash
48 | fi
49 | 


--------------------------------------------------------------------------------
/docker/clickhouse/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | ARG CLICKHOUSE_VERSION=22.10.2.11
 3 | ARG FUNCTION_DIR="/function"
 4 | 
 5 | 
 6 | FROM ubuntu:20.04 as ric-dependency
 7 | 
 8 | ENV DEBIAN_FRONTEND=noninteractive 
 9 | 
10 | RUN apt-get update && \
11 |     apt-get install -y \
12 |     g++ \
13 |     make \
14 |     cmake \
15 |     unzip \
16 |     python3 \
17 |     python3-pip \
18 |     libcurl4-openssl-dev
19 | ARG FUNCTION_DIR
20 | RUN mkdir -p ${FUNCTION_DIR}
21 | RUN pip3 install \
22 |     --target ${FUNCTION_DIR} \
23 |     awslambdaric
24 | COPY lambda-handler.py ${FUNCTION_DIR}
25 | 
26 | 
27 | FROM ghcr.io/cloudfuse-io/lambdatization:clickhouse-v$CLICKHOUSE_VERSION-patch
28 | ARG FUNCTION_DIR
29 | 
30 | RUN apt-get update -y && \
31 |     apt-get install -y python3 && \
32 |     apt-get clean && \
33 |     rm -rf /var/lib/apt/lists/* && \
34 |     rm -rf /var/cache/apt/*
35 | 
36 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
37 | COPY ./config.xml /etc/clickhouse-server/config.xml
38 | ENV CLICKHOUSE_WATCHDOG_ENABLE=0
39 | RUN rm /etc/clickhouse-server/config.d/docker_related_config.xml
40 | 
41 | WORKDIR ${FUNCTION_DIR}
42 | 
43 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
44 | CMD [ "lambda-handler.handler" ]
45 | 


--------------------------------------------------------------------------------
/docker/clickhouse/README.md:
--------------------------------------------------------------------------------
 1 | # ClickHouse lambdatization tricks
 2 | 
 3 | ## List of tricks
 4 | 
 5 | - Clickhouse offers an Alpine version of its image, but we favor the Ubuntu one
 6 |   as we also used Debian/Ubuntu for the other images. The Alpine image is only
 7 |   30MB smaller so the gain wouldn't be huge anyway.
 8 | - Lambda does not support `prctl` with the `PR_SET_NAME` flag. We provide a
 9 |   [custom build](/.github/workflows/helper-clickhouse.yaml) that doesn't raise
10 |   an exception when that call fails. To create a new image:
11 |   - setup an [Ubuntu 20.04 runner](/.github/gh-runner-setup.sh)
12 |   - create a branch in [cloudfuse-io/ClickHouse][cloudfuse_clickhouse_fork] with
13 |     the patch
14 |   - run the ClickHouse [build action][clickhouse_build_action]
15 | - For some reason, `libunwind` doesn't seem to work on lambda. In the custom
16 |   build we tried to disable it (`-DUSE_UNWIND=0`), but then CMake didn't have
17 |   libgcc_eh available, so we force link to it instead
18 |   
19 | ```
20 | target_link_libraries(cxxabi PUBLIC /usr/lib/gcc/x86_64-linux-gnu/9/libgcc_eh.a)
21 | ```
22 | 
23 | [cloudfuse_clickhouse_fork]: https://github.com/cloudfuse-io/ClickHouse/branches
24 | [clickhouse_build_action]: https://github.com/cloudfuse-io/lambdatization/actions/workflows/helper-clickhouse.yaml
25 | 


--------------------------------------------------------------------------------
/docker/clickhouse/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   clickhouse:
 4 |     build: .
 5 |     image: cloudfuse-io/l12n:clickhouse
 6 |     cap_drop:
 7 |       - ALL
 8 |     read_only: true
 9 |     user: nobody
10 |     volumes:
11 |       - clickhouse-tmp:/tmp
12 |     entrypoint:
13 |       # - bash
14 |       - python3
15 |       - lambda-handler.py
16 |     environment:
17 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
18 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
19 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
20 |       - AWS_REGION=$L12N_AWS_REGION
21 |       - DATA_BUCKET_NAME
22 |     ulimits:
23 |       nofile:
24 |         soft: 1024
25 |         hard: 1024
26 | 
27 | volumes:
28 |   clickhouse-tmp:
29 | 


--------------------------------------------------------------------------------
/docker/dask/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | ARG DASK_VERSION=2022.10.0-py3.9
 3 | ARG FUNCTION_DIR="/function"
 4 | 
 5 | 
 6 | FROM daskdev/dask:$DASK_VERSION
 7 | ARG FUNCTION_DIR
 8 | 
 9 | RUN mamba install -y \
10 |         dask-sql \
11 |         pyarrow \
12 |         s3fs \
13 |         awslambdaric \
14 |     && mamba clean -tipy \
15 |     && find /opt/conda/ -type f,l -name '*.a' -delete \
16 |     && find /opt/conda/ -type f,l -name '*.pyc' -delete \
17 |     && find /opt/conda/ -type f,l -name '*.js.map' -delete \
18 |     && find /opt/conda/lib/python*/site-packages/bokeh/server/static -type f,l -name '*.js' -not -name '*.min.js' -delete \
19 |     && rm -rf /opt/conda/pkgs
20 | 
21 | RUN mkdir -p ${FUNCTION_DIR}
22 | COPY lambda-handler.py ${FUNCTION_DIR}
23 | WORKDIR ${FUNCTION_DIR}
24 | 
25 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
26 | CMD [ "lambda-handler.handler" ]
27 | 


--------------------------------------------------------------------------------
/docker/dask/README.md:
--------------------------------------------------------------------------------
1 | # Dask lambdatization tricks
2 | 
3 | ## Tricks
4 | 
5 | - We needed to disable "nanny" (`--no-nanny`) because it's using
6 |   `multiprocessing` features that don't work on Lambda (because of missing
7 |   `/dev/shm`)
8 | 


--------------------------------------------------------------------------------
/docker/dask/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   dask:
 4 |     build: .
 5 |     image: cloudfuse-io/l12n:dask
 6 |     cap_drop:
 7 |       - ALL
 8 |     read_only: true
 9 |     volumes:
10 |       - dask-tmp:/tmp
11 |     entrypoint:
12 |       - python3
13 |       - lambda-handler.py
14 |     environment:
15 |       - AWS_REGION=$L12N_AWS_REGION
16 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
17 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
18 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
19 |       - DATA_BUCKET_NAME
20 | 
21 | volumes:
22 |   dask-tmp:
23 | 


--------------------------------------------------------------------------------
/docker/dask/lambda-handler.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import logging
  3 | import os
  4 | import socket
  5 | import subprocess
  6 | import sys
  7 | import time
  8 | from contextlib import closing
  9 | 
 10 | from dask_sql import Context
 11 | from distributed import Client
 12 | 
 13 | logging.getLogger().setLevel(logging.INFO)
 14 | 
 15 | 
 16 | IS_COLD_START = True
 17 | CLIENT = None
 18 | CONTEXT = None
 19 | 
 20 | 
 21 | def init():
 22 |     global CLIENT, CONTEXT
 23 |     subprocess.Popen(
 24 |         ["dask-scheduler"], stdout=sys.stdout, stderr=sys.stderr, bufsize=1
 25 |     )
 26 |     while True:
 27 |         with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
 28 |             if sock.connect_ex(("localhost", 8786)) == 0:
 29 |                 break
 30 | 
 31 |     subprocess.Popen(
 32 |         ["dask-worker", "tcp://localhost:8786", "--no-nanny"],
 33 |         stdout=sys.stdout,
 34 |         stderr=sys.stderr,
 35 |         bufsize=1,
 36 |     )
 37 |     # Client is somehow implicitely used by Dask
 38 |     CLIENT = Client("localhost:8786")
 39 |     CONTEXT = Context()
 40 | 
 41 | 
 42 | def query(sql: str) -> str:
 43 |     """Splits the sql statements and return the result of the last one"""
 44 |     plan = CONTEXT.sql(sql)
 45 |     # CREATE TABLE statements return None as plan
 46 |     if plan is not None:
 47 |         return str(CONTEXT.sql(sql).compute())
 48 |     else:
 49 |         return "No plan to compute"
 50 | 
 51 | 
 52 | def handler(event, context):
 53 |     """AWS Lambda handler"""
 54 |     start = time.time()
 55 |     global IS_COLD_START
 56 |     is_cold_start = IS_COLD_START
 57 |     IS_COLD_START = False
 58 |     if is_cold_start:
 59 |         init()
 60 |     src_command = base64.b64decode(event["query"]).decode("utf-8")
 61 | 
 62 |     logging.debug(CLIENT.scheduler_info())
 63 |     resp = ""
 64 |     parsed_queries = []
 65 |     for sql in src_command.split(";"):
 66 |         sql = sql.strip()
 67 |         if sql == "":
 68 |             continue
 69 |         parsed_queries.append(sql)
 70 |         resp = query(sql)
 71 |     logging.debug(CLIENT.scheduler_info())
 72 | 
 73 |     result = {
 74 |         "resp": resp,
 75 |         "logs": "",
 76 |         "parsed_queries": parsed_queries,
 77 |         "context": {
 78 |             "cold_start": is_cold_start,
 79 |             "handler_duration_sec": time.time() - start,
 80 |         },
 81 |     }
 82 |     return result
 83 | 
 84 | 
 85 | if __name__ == "__main__":
 86 |     query_str = f"""
 87 | CREATE TABLE nyctaxi WITH (
 88 |     location = "s3://{os.getenv("DATA_BUCKET_NAME")}/nyc-taxi/2019/01/*",
 89 |     format = "parquet"
 90 | );
 91 | 
 92 | SELECT payment_type, SUM(trip_distance) 
 93 | FROM nyctaxi
 94 | GROUP BY payment_type
 95 | """
 96 |     res = handler(
 97 |         {"query": base64.b64encode(query_str.encode("utf-8"))},
 98 |         {},
 99 |     )
100 |     print(res)
101 | 


--------------------------------------------------------------------------------
/docker/databend/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | ARG DATABEND_VERSION=v1.1.55-nightly
 3 | ARG FUNCTION_DIR="/function"
 4 | 
 5 | 
 6 | FROM ubuntu:22.04 as download
 7 | ARG DATABEND_VERSION
 8 | RUN apt update && apt install -y curl
 9 | RUN DB_REL="https://github.com/datafuselabs/databend/releases" && \
10 |     curl -LO $DB_REL/download/${DATABEND_VERSION}/databend-${DATABEND_VERSION}-x86_64-unknown-linux-gnu.tar.gz
11 | RUN mkdir -p /databend/target/release/ && \
12 |     tar -xvf databend-${DATABEND_VERSION}-x86_64-unknown-linux-gnu.tar.gz -C /databend/target/release/ --strip-components=1 bin
13 | 
14 | 
15 | FROM ubuntu:22.04 as ric-dependency
16 | RUN apt-get update && \
17 |     apt-get install -y \
18 |     g++ \
19 |     make \
20 |     cmake \
21 |     unzip \
22 |     python3 \
23 |     python3-pip \
24 |     libcurl4-openssl-dev
25 | ARG FUNCTION_DIR
26 | RUN mkdir -p ${FUNCTION_DIR}
27 | RUN pip3 install \
28 |     --target ${FUNCTION_DIR} \
29 |     awslambdaric \
30 |     requests
31 | COPY lambda-handler.py ${FUNCTION_DIR}
32 | 
33 | 
34 | FROM ubuntu:22.04
35 | ARG FUNCTION_DIR
36 | 
37 | RUN apt-get update -y && \
38 |     apt-get install -y apt-transport-https ca-certificates python3 && \
39 |     apt-get clean && \
40 |     rm -rf /var/lib/apt/lists/* && \
41 |     rm -rf /var/cache/apt/*
42 | 
43 | COPY --from=download /databend/target/release/databend-query /databend-query
44 | COPY --from=download /databend/target/release/databend-meta /databend-meta
45 | COPY ./bootstrap.sh /bootstrap.sh
46 | COPY ./databend-query.toml /databend-query.toml
47 | COPY ./databend-meta.toml /databend-meta.toml
48 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
49 | WORKDIR ${FUNCTION_DIR}
50 | 
51 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
52 | CMD [ "lambda-handler.handler" ]
53 | 


--------------------------------------------------------------------------------
/docker/databend/README.md:
--------------------------------------------------------------------------------
1 | # Databend lambdatization tricks
2 | 


--------------------------------------------------------------------------------
/docker/databend/bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | /databend-meta -c /databend-meta.toml &> /tmp/databend-meta.log  &
 4 | P1=$!
 5 | # add health check to remove the race condition issue during databend-query bootstrap
 6 | sleep 1
 7 | /databend-query -c /databend-query.toml &> /tmp/databend-query.log  &
 8 | P2=$!
 9 | 
10 | tail -f /tmp/databend-query.log &
11 | P3=$!
12 | 
13 | tail -f /tmp/databend-meta.log &
14 | P4=$!
15 | wait $P1 $P2 $P3 $P4
16 | 


--------------------------------------------------------------------------------
/docker/databend/databend-meta.toml:
--------------------------------------------------------------------------------
 1 | log_dir = "/tmp/logs"
 2 | admin_api_address = "0.0.0.0:28101"
 3 | grpc_api_address = "0.0.0.0:9191"
 4 | # databend-query fetch this address to update its databend-meta endpoints list,
 5 | # in case databend-meta cluster changes.
 6 | grpc_api_advertise_host = "127.0.0.1"
 7 | 
 8 | [raft_config]
 9 | id = 1
10 | raft_dir = "/tmp/raft"
11 | raft_api_port = 28103
12 | 
13 | # Assign raft_{listen|advertise}_host in test config.
14 | # This allows you to catch a bug in unit tests when something goes wrong in raft meta nodes communication.
15 | raft_listen_host = "127.0.0.1"
16 | raft_advertise_host = "localhost"
17 | 
18 | # Start up mode: single node cluster
19 | single = true
20 | 


--------------------------------------------------------------------------------
/docker/databend/databend-query.toml:
--------------------------------------------------------------------------------
 1 | [query]
 2 | max_active_sessions = 256
 3 | wait_timeout_mills = 5000
 4 | 
 5 | # For flight rpc.
 6 | flight_api_address = "0.0.0.0:9090"
 7 | 
 8 | # Databend Query http address.
 9 | # For admin RESET API.
10 | admin_api_address = "0.0.0.0:8080"
11 | 
12 | # Databend Query metrics RESET API.
13 | metric_api_address = "0.0.0.0:7070"
14 | 
15 | # Databend Query MySQL Handler.
16 | mysql_handler_host = "0.0.0.0"
17 | mysql_handler_port = 3307
18 | 
19 | 
20 | # Databend Query ClickHouse HTTP Handler.
21 | clickhouse_http_handler_host = "0.0.0.0"
22 | clickhouse_http_handler_port = 8124
23 | 
24 | # Query Handler: HTTP API
25 | http_handler_host = "0.0.0.0"
26 | http_handler_port = 8000
27 | 
28 | tenant_id = "test_tenant"
29 | cluster_id = "test_cluster"
30 | 
31 | [log]
32 | level = "INFO"
33 | dir = "/tmp/logs"
34 | 
35 | [meta]
36 | # To enable embedded meta-store, set address to ""
37 | endpoints = ["0.0.0.0:9191"]
38 | username = "root"
39 | password = "root"
40 | client_timeout_in_second = 60
41 | 
42 | # Storage config.
43 | [storage]
44 | type = "fs"
45 | 
46 | # fs storage.
47 | [storage.fs]
48 | data_path = "/tmp/data"
49 | 
50 | [cache]
51 | # Type of storage to keep the table data cache
52 | data_cache_storage = "none"
53 | 


--------------------------------------------------------------------------------
/docker/databend/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   databend:
 4 |     build: .
 5 |     image: cloudfuse-io/l12n:databend
 6 |     cap_drop:
 7 |       - ALL
 8 |     read_only: true
 9 |     volumes:
10 |       - databend-tmp:/tmp
11 |     entrypoint:
12 |       - python3
13 |       - lambda-handler.py
14 |     environment:
15 |       - AWS_REGION=$L12N_AWS_REGION
16 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
17 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
18 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
19 |       - DATA_BUCKET_NAME
20 | 
21 | volumes:
22 |   databend-tmp:
23 | 


--------------------------------------------------------------------------------
/docker/databend/lambda-handler.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import logging
  3 | import os
  4 | import subprocess
  5 | import sys
  6 | import time
  7 | 
  8 | import requests
  9 | import requests.auth
 10 | 
 11 | logging.getLogger().setLevel(logging.INFO)
 12 | 
 13 | 
 14 | IS_COLD_START = True
 15 | SESSION_CREDENTIALS = f"""
 16 | ACCESS_KEY_ID ='{os.getenv("AWS_ACCESS_KEY_ID")}' 
 17 | SECRET_ACCESS_KEY ='{os.getenv("AWS_SECRET_ACCESS_KEY")}' 
 18 | SESSION_TOKEN ='{os.getenv("AWS_SESSION_TOKEN")}'
 19 | """
 20 | 
 21 | 
 22 | def init():
 23 |     """Start Databend server"""
 24 |     subprocess.Popen(["/bootstrap.sh"], stdout=sys.stdout, stderr=sys.stderr, bufsize=1)
 25 | 
 26 | 
 27 | def query(sql, timeout):
 28 |     """Try to connect to server until timeout is reached to run the query"""
 29 |     # Run query
 30 |     start_time = time.time()
 31 |     logging.info(f"Running {sql}")
 32 |     sql = sql.replace("__RUNTIME_PROVIDED__", SESSION_CREDENTIALS)
 33 |     while True:
 34 |         try:
 35 |             basic = requests.auth.HTTPBasicAuth("root", "root")
 36 |             resp = requests.post(
 37 |                 "http://localhost:8000/v1/query/",
 38 |                 headers={"Content-Type": "application/json"},
 39 |                 auth=basic,
 40 |                 json={"sql": sql, "pagination": {"wait_time_secs": 1000}},
 41 |             )
 42 |             json_resp = resp.json()
 43 |             if "error" in json_resp and json_resp["error"] is not None:
 44 |                 raise Exception(json_resp["error"]["message"])
 45 |             resp.raise_for_status()
 46 |             return json_resp
 47 |         except requests.exceptions.ConnectionError:
 48 |             if time.time() - start_time < timeout:
 49 |                 time.sleep(0.2)
 50 |             else:
 51 |                 raise Exception("Attempt to run SQL query timed out")
 52 | 
 53 | 
 54 | def handler(event, context):
 55 |     """AWS Lambda handler"""
 56 |     start = time.time()
 57 |     global IS_COLD_START
 58 |     is_cold_start = IS_COLD_START
 59 |     IS_COLD_START = False
 60 |     if is_cold_start:
 61 |         init()
 62 |     src_command = base64.b64decode(event["query"]).decode("utf-8")
 63 | 
 64 |     resp = ""
 65 |     parsed_queries = []
 66 |     for sql in src_command.split(";"):
 67 |         sql = sql.strip()
 68 |         if sql == "":
 69 |             continue
 70 |         resp = query(sql, 30)
 71 |         parsed_queries.append(sql)
 72 | 
 73 |     result = {
 74 |         "resp": resp,
 75 |         "logs": "",
 76 |         "parsed_queries": parsed_queries,
 77 |         "context": {
 78 |             "cold_start": is_cold_start,
 79 |             "handler_duration_sec": time.time() - start,
 80 |         },
 81 |     }
 82 |     return result
 83 | 
 84 | 
 85 | if __name__ == "__main__":
 86 |     query_str = f"""
 87 | CREATE TRANSIENT TABLE taxi201901
 88 | (
 89 |     payment_type VARCHAR,
 90 |     trip_distance FLOAT
 91 | );
 92 | 
 93 | COPY INTO taxi201901
 94 |   FROM 's3://{os.getenv("DATA_BUCKET_NAME")}/nyc-taxi/2019/01/'
 95 |   credentials=(__RUNTIME_PROVIDED__)
 96 |   pattern ='.*[.]parquet'
 97 |   file_format = (type = 'PARQUET');
 98 | 
 99 | SELECT payment_type, SUM(trip_distance) 
100 | FROM taxi201901
101 | GROUP BY payment_type;
102 | """
103 |     res = handler(
104 |         {"query": base64.b64encode(query_str.encode("utf-8"))},
105 |         {},
106 |     )
107 |     print(res)
108 | 


--------------------------------------------------------------------------------
/docker/dremio/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG DREMIO_VERSION=22.1
 2 | ARG FUNCTION_DIR="/function"
 3 | 
 4 | FROM debian:11.4 as ric-dependency
 5 | RUN apt-get update && \
 6 |     apt-get install -y \
 7 |     g++ \
 8 |     make \
 9 |     cmake \
10 |     unzip \
11 |     python3 \
12 |     python3-pip \
13 |     libcurl4-openssl-dev
14 | ARG FUNCTION_DIR
15 | RUN mkdir -p ${FUNCTION_DIR}
16 | RUN pip3 install \
17 |     --target ${FUNCTION_DIR} \
18 |     awslambdaric requests
19 | COPY lambda-handler.py ${FUNCTION_DIR}
20 | 
21 | 
22 | FROM dremio/dremio-oss:$DREMIO_VERSION
23 | 
24 | ARG FUNCTION_DIR
25 | 
26 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
27 | COPY ./dremio.conf /opt/dremio/conf/
28 | COPY ./dremio-env /opt/dremio/conf/
29 | 
30 | USER root
31 | RUN  mkdir -p /var/lib/dremio/dremio/.aws && ln -s /tmp/aws/credentials /var/lib/dremio/dremio/.aws/credentials
32 | RUN  mkdir -p /home/sbx_user1051/.aws && ln -s /tmp/aws/credentials /home/sbx_user1051/.aws/credentials
33 | USER dremio
34 | 
35 | WORKDIR ${FUNCTION_DIR}
36 | 
37 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
38 | CMD [ "lambda-handler.handler" ]
39 | 


--------------------------------------------------------------------------------
/docker/dremio/README.md:
--------------------------------------------------------------------------------
 1 | # Dremio lambdatization tricks
 2 | 
 3 | - Move the local path to `/tmp` as it is the only writeable one on lambda
 4 | - create a Dremio user and use its credentials to:
 5 |   - create a source
 6 |   - start the query
 7 |   - poll for the result
 8 | - By default Dremio tries to discover its private IP and uses that to
 9 |   communicate. We want to loopback on `localhost` instead, hence the
10 |   configuration `registration.publish-host: "localhost"`
11 | 


--------------------------------------------------------------------------------
/docker/dremio/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   dremio:
 4 |     build: .
 5 |     image: cloudfuse-io/l12n:dremio
 6 |     cap_drop:
 7 |       - ALL
 8 |     read_only: true
 9 |     volumes:
10 |       - dremio-tmp:/tmp
11 |     entrypoint:
12 |       - python3
13 |       - lambda-handler.py
14 |     environment:
15 |       - AWS_REGION=$L12N_AWS_REGION
16 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
17 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
18 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
19 |       - DATA_BUCKET_NAME
20 | 
21 | volumes:
22 |   dremio-tmp:
23 | 


--------------------------------------------------------------------------------
/docker/dremio/dremio-env:
--------------------------------------------------------------------------------
  1 | #
  2 | # Dremio environment variables used by Dremio daemon
  3 | #
  4 | 
  5 | #
  6 | # Directory where Dremio logs are written
  7 | # Default to $DREMIO_HOME/log
  8 | #
  9 | DREMIO_LOG_DIR=/tmp/log
 10 | 
 11 | #
 12 | # Send logs to console and not to log files. The DREMIO_LOG_DIR is ignored if set.
 13 | #
 14 | # DREMIO_LOG_TO_CONSOLE=1
 15 | 
 16 | #
 17 | # Directory where Dremio pidfiles are written
 18 | # Default to $DREMIO_HOME/run
 19 | #
 20 | DREMIO_PID_DIR=/tmp/run
 21 | 
 22 | #
 23 | # Max total memory size (in MB) for the Dremio process
 24 | #
 25 | # If not set, default to using max heap and max direct.
 26 | #
 27 | # If both max heap and max direct are set, this is not used
 28 | # If one is set, the other is calculated as difference
 29 | # of max memory and the one that is set.
 30 | #
 31 | #DREMIO_MAX_MEMORY_SIZE_MB=
 32 | 
 33 | #
 34 | # Max heap memory size (in MB) for the Dremio process
 35 | #
 36 | # Default to 4096 for server
 37 | #
 38 | #DREMIO_MAX_HEAP_MEMORY_SIZE_MB=4096
 39 | 
 40 | #
 41 | # Max direct memory size (in MB) for the Dremio process
 42 | #
 43 | # Default to 8192 for server
 44 | #
 45 | #DREMIO_MAX_DIRECT_MEMORY_SIZE_MB=8192
 46 | 
 47 | #
 48 | # Max permanent generation memory size (in MB) for the Dremio process
 49 | # (Only used for Java 7)
 50 | #
 51 | # Default to 512 for server
 52 | #
 53 | #DREMIO_MAX_PERMGEN_MEMORY_SIZE_MB=512
 54 | 
 55 | #
 56 | # Garbage collection logging is enabled by default. Set the following
 57 | # parameter to "no" to disable garbage collection logging.
 58 | #
 59 | DREMIO_GC_LOGS_ENABLED="no"
 60 | 
 61 | #
 62 | # Send GC logs to console and not to log files. The DREMIO_LOG_DIR is ignored if set.
 63 | # Default is set to "no"
 64 | #
 65 | #DREMIO_GC_LOG_TO_CONSOLE="no"
 66 | 
 67 | #
 68 | # By default G1GC is used as java garbage collection.
 69 | # This can be overriden by changing this parameter
 70 | #
 71 | #DREMIO_GC_OPTS="-XX:+UseG1GC"
 72 | 
 73 | #
 74 | # Java version will be checked by default.
 75 | # Currently only java 8 is supported by dremio.
 76 | # This check can be disabled by changing value to false.
 77 | #
 78 | #DREMIO_JAVA_VERSION_CHECK="true"
 79 | 
 80 | #
 81 | # The scheduling priority for the server
 82 | #
 83 | # Default to 0
 84 | #
 85 | # DREMIO_NICENESS=0
 86 | #
 87 | 
 88 | #
 89 | # Number of seconds after which the server is killed forcibly it it hasn't stopped
 90 | #
 91 | # Default to 120
 92 | #
 93 | #DREMIO_STOP_TIMEOUT=120
 94 | 
 95 | # Extra Java options - shared between dremio and dremio-admin commands
 96 | #
 97 | #DREMIO_JAVA_EXTRA_OPTS=
 98 | 
 99 | # Extra Java options - client only (dremio-admin command)
100 | #
101 | #DREMIO_JAVA_CLIENT_EXTRA_OPTS=
102 | 
103 | # Extra Java options - server only (dremio command)
104 | #
105 | #DREMIO_JAVA_SERVER_EXTRA_OPTS=
106 | 


--------------------------------------------------------------------------------
/docker/dremio/dremio.conf:
--------------------------------------------------------------------------------
 1 | paths: {
 2 |   # the local path for dremio to store data.
 3 |   local: "/tmp/data"
 4 | }
 5 | 
 6 | services: {
 7 |   coordinator.enabled: true,
 8 |   coordinator.master.enabled: true,
 9 |   executor.enabled: true,
10 |   flight.use_session_service: true
11 | }
12 | 
13 | 
14 | registration.publish-host: "localhost"
15 | 


--------------------------------------------------------------------------------
/docker/scaling/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG IMAGE_SIZE_MB
 2 | ARG FUNCTION_DIR="/function"
 3 | 
 4 | FROM ubuntu:20.04 as ric-dependency
 5 | 
 6 | ENV DEBIAN_FRONTEND=noninteractive
 7 | 
 8 | RUN apt-get update && \
 9 |     apt-get install -y \
10 |     g++ \
11 |     make \
12 |     cmake \
13 |     unzip \
14 |     python3 \
15 |     python3-pip \
16 |     libcurl4-openssl-dev
17 | ARG FUNCTION_DIR
18 | RUN mkdir -p ${FUNCTION_DIR}
19 | RUN pip3 install \
20 |     --target ${FUNCTION_DIR} \
21 |     awslambdaric
22 | COPY lambda-handler.py ${FUNCTION_DIR}
23 | 
24 | 
25 | FROM ubuntu:20.04
26 | 
27 | ENV DEBIAN_FRONTEND=noninteractive
28 | 
29 | ARG FUNCTION_DIR
30 | ARG IMAGE_SIZE_MB
31 | 
32 | RUN apt-get update \
33 |   && apt-get install -y python3 \
34 |   && rm -rf /var/lib/apt/lists/*
35 | 
36 | RUN head -c ${IMAGE_SIZE_MB}MB /dev/urandom > /placeholder.bin
37 | 
38 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
39 | WORKDIR ${FUNCTION_DIR}
40 | 
41 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
42 | CMD [ "lambda-handler.handler" ]
43 | 


--------------------------------------------------------------------------------
/docker/scaling/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   placeholder_50:
 4 |     build:
 5 |       context: .
 6 |       args:
 7 |         - IMAGE_SIZE_MB=50
 8 |     image: cloudfuse-io/l12n:placeholder-50
 9 |     cap_drop:
10 |       - ALL
11 |     read_only: true
12 |     entrypoint:
13 |       - python3
14 |       - lambda-handler.py
15 |     environment:
16 |       - AWS_REGION=$L12N_AWS_REGION
17 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
18 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
19 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
20 | 
21 |   placeholder_200:
22 |     build:
23 |       context: .
24 |       args:
25 |         - IMAGE_SIZE_MB=200
26 |     image: cloudfuse-io/l12n:placeholder-200
27 |     cap_drop:
28 |       - ALL
29 |     read_only: true
30 |     entrypoint:
31 |       - python3
32 |       - lambda-handler.py
33 |     environment:
34 |       - AWS_REGION=$L12N_AWS_REGION
35 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
36 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
37 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
38 | 
39 |   placeholder_800:
40 |     build:
41 |       context: .
42 |       args:
43 |         - IMAGE_SIZE_MB=800
44 |     image: cloudfuse-io/l12n:placeholder-800
45 |     cap_drop:
46 |       - ALL
47 |     read_only: true
48 |     entrypoint:
49 |       - python3
50 |       - lambda-handler.py
51 |     environment:
52 |       - AWS_REGION=$L12N_AWS_REGION
53 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
54 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
55 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
56 | 


--------------------------------------------------------------------------------
/docker/scaling/lambda-handler.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import time
 4 | 
 5 | logging.getLogger().setLevel(logging.INFO)
 6 | 
 7 | IS_COLD_START = True
 8 | 
 9 | 
10 | def handler(event, context):
11 |     global IS_COLD_START
12 |     is_cold_start = IS_COLD_START
13 |     IS_COLD_START = False
14 |     placeholder_size = os.path.getsize("/placeholder.bin")
15 |     # We can set a sleep duration to make sure every invocation is alocated to a
16 |     # new Lambda container and doesn't trigger a warm start. Sleeping on warm
17 |     # start would bias the corrected duration (observed duration - sleep time)
18 |     if is_cold_start:
19 |         time.sleep(event.get("sleep", 0))
20 |     return {
21 |         "placeholder_size": placeholder_size,
22 |         "cold_start": is_cold_start,
23 |         "memory_limit_in_mb": int(context.memory_limit_in_mb),
24 |     }
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     res = handler({}, {})
29 |     print(res)
30 | 


--------------------------------------------------------------------------------
/docker/spark/Dockerfile:
--------------------------------------------------------------------------------
 1 | # The Spark version must be compatible with the Hadoop and AWS SDK versions
 2 | # provided. 
 3 | ARG SPARK_VERSION=v3.2.2
 4 | # For Hadoop, a good way to figure out the version is to check
 5 | # existing Hadoop related dependencies in the /opt/spark/jars dir of the 
 6 | # Spark image.
 7 | ARG HADOOP_VERSION=3.3.1
 8 | # There is no clear rule for the SDK version required by Hadoop and Spark
 9 | ARG AWS_JAVA_SDK_VERSION=1.12.302
10 | 
11 | ARG FUNCTION_DIR="/function"
12 | 
13 | FROM curlimages/curl as jar-dependencies
14 | ARG AWS_JAVA_SDK_VERSION
15 | ARG HADOOP_VERSION
16 | 
17 | RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/$AWS_JAVA_SDK_VERSION/aws-java-sdk-core-$AWS_JAVA_SDK_VERSION.jar -o /tmp/aws-java-sdk-core-$AWS_JAVA_SDK_VERSION.jar
18 | RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/$AWS_JAVA_SDK_VERSION/aws-java-sdk-s3-$AWS_JAVA_SDK_VERSION.jar -o /tmp/aws-java-sdk-s3-$AWS_JAVA_SDK_VERSION.jar
19 | RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-dynamodb/$AWS_JAVA_SDK_VERSION/aws-java-sdk-dynamodb-$AWS_JAVA_SDK_VERSION.jar -o /tmp/aws-java-sdk-dynamodb-$AWS_JAVA_SDK_VERSION.jar
20 | RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/$AWS_JAVA_SDK_VERSION/aws-java-sdk-$AWS_JAVA_SDK_VERSION.jar -o /tmp/aws-java-sdk-$AWS_JAVA_SDK_VERSION.jar
21 | RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/$HADOOP_VERSION/hadoop-common-$HADOOP_VERSION.jar -o /tmp/hadoop-common-$HADOOP_VERSION.jar
22 | RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/$HADOOP_VERSION/hadoop-aws-$HADOOP_VERSION.jar -o /tmp/hadoop-aws-$HADOOP_VERSION.jar
23 | 
24 | 
25 | # The debian version used to get the Lambdaric should ideally match the one 
26 | # in the Spark image (cat /etc/debian_version)
27 | FROM debian:11.4 as ric-dependency
28 | RUN apt-get update && \
29 |     apt-get install -y \
30 |     g++ \
31 |     make \
32 |     cmake \
33 |     unzip \
34 |     python3 \
35 |     python3-pip \
36 |     libcurl4-openssl-dev
37 | ARG FUNCTION_DIR
38 | RUN mkdir -p ${FUNCTION_DIR}
39 | RUN pip3 install \
40 |     --target ${FUNCTION_DIR} \
41 |     awslambdaric
42 | COPY lambda-handler.py ${FUNCTION_DIR}
43 | 
44 | 
45 | FROM apache/spark:$SPARK_VERSION
46 | ARG AWS_JAVA_SDK_VERSION
47 | ARG HADOOP_VERSION
48 | ARG FUNCTION_DIR
49 | 
50 | COPY --from=jar-dependencies /tmp/aws-java-sdk-core-$AWS_JAVA_SDK_VERSION.jar /opt/spark/jars/aws-java-sdk-core-$AWS_JAVA_SDK_VERSION.jar
51 | COPY --from=jar-dependencies /tmp/aws-java-sdk-s3-$AWS_JAVA_SDK_VERSION.jar /opt/spark/jars/aws-java-sdk-s3-$AWS_JAVA_SDK_VERSION.jar
52 | COPY --from=jar-dependencies /tmp/aws-java-sdk-dynamodb-$AWS_JAVA_SDK_VERSION.jar /opt/spark/jars/aws-java-sdk-dynamodb-$AWS_JAVA_SDK_VERSION.jar
53 | COPY --from=jar-dependencies /tmp/aws-java-sdk-$AWS_JAVA_SDK_VERSION.jar /opt/spark/jars/aws-java-sdk-$AWS_JAVA_SDK_VERSION.jar
54 | COPY --from=jar-dependencies /tmp/hadoop-common-$HADOOP_VERSION.jar /opt/spark/jars/hadoop-common-$HADOOP_VERSION.jar
55 | COPY --from=jar-dependencies /tmp/hadoop-aws-$HADOOP_VERSION.jar /opt/spark/jars/hadoop-aws-$HADOOP_VERSION.jar
56 | 
57 | USER root
58 | 
59 | RUN apt update && \
60 |     apt install -y python3 && \
61 |     rm -rf /var/lib/apt/lists/*
62 | 
63 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
64 | COPY ./spark-class /opt/spark/bin/
65 | COPY ./spark-defaults.conf /opt/spark/conf/
66 | 
67 | WORKDIR ${FUNCTION_DIR}
68 | 
69 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
70 | CMD [ "lambda-handler.handler" ]
71 | 


--------------------------------------------------------------------------------
/docker/spark/README.md:
--------------------------------------------------------------------------------
 1 | # Spark lambdatization tricks
 2 | 
 3 | ## List of tricks
 4 | 
 5 | - We use the official Spark image as base. It is pretty lightweight (could be
 6 |   even lighter?) but doesn't contain the jars necessary to read from S3
 7 | - We grab the Hadoop and AWS SDK jars from Maven Central by following the class
 8 |   not found errors.
 9 | - We need to `cd` to `/tmp` before executing the `spark-sql` command so that the
10 |   workdir is writeable
11 | - We override `spark-class` because it uses process substitution (`<(...)`)
12 |   which is using `/dev/fd/63` as a tmp file and that is not allowed inside
13 |   Lambda
14 | - We set `spark.driver.bindAddress` to `localhost`, otherwise the port binding
15 |   fails in Lambda
16 | 
17 | ## Changing Spark Version
18 | 
19 | Because of all the tweeks listed above, using a different Spark version would
20 | require reconfiguring many elements:
21 | 
22 | - set compatible Hadoop and AWS SDK versions
23 | - update `spark-class` file if it changed
24 | 


--------------------------------------------------------------------------------
/docker/spark/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   spark:
 4 |     build: .
 5 |     image: cloudfuse-io/l12n:spark
 6 |     cap_drop:
 7 |       - ALL
 8 |     read_only: true
 9 |     volumes:
10 |       - spark-tmp:/tmp
11 |     entrypoint:
12 |       - python3
13 |       - lambda-handler.py
14 |     environment:
15 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
16 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
17 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
18 |       - AWS_REGION=$L12N_AWS_REGION
19 |       - DATA_BUCKET_NAME
20 | 
21 | volumes:
22 |   spark-tmp:
23 | 


--------------------------------------------------------------------------------
/docker/spark/spark-defaults.conf:
--------------------------------------------------------------------------------
1 | spark.driver.bindAddress  localhost
2 | 


--------------------------------------------------------------------------------
/docker/trino/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FUNCTION_DIR="/function"
 2 | ARG HADOOP_VERSION=3.2.0
 3 | # The SDK version must be the one in the Hadoop package
 4 | ARG AWS_JAVA_SDK_VERSION=1.11.375
 5 | ARG METASTORE_VERSION=3.0.0
 6 | # We use custom builds of trino-server
 7 | ARG TRINO_VERSION=378
 8 | 
 9 | 
10 | FROM ubuntu:20.04 as ric-dependency
11 | 
12 | ENV DEBIAN_FRONTEND=noninteractive 
13 | 
14 | RUN apt-get update && \
15 |     apt-get install -y \
16 |     g++ \
17 |     make \
18 |     cmake \
19 |     unzip \
20 |     python3 \
21 |     python3-pip \
22 |     libcurl4-openssl-dev
23 | ARG FUNCTION_DIR
24 | RUN mkdir -p ${FUNCTION_DIR}
25 | RUN pip3 install \
26 |     --target ${FUNCTION_DIR} \
27 |     awslambdaric
28 | COPY lambda-handler.py ${FUNCTION_DIR}
29 | 
30 | 
31 | FROM ubuntu:20.04
32 | ARG HADOOP_VERSION
33 | ARG METASTORE_VERSION
34 | ARG TRINO_VERSION
35 | ARG AWS_JAVA_SDK_VERSION
36 | 
37 | ENV DEBIAN_FRONTEND=noninteractive 
38 | 
39 | RUN apt-get update && apt-get install -y \
40 |   curl \
41 |   less \
42 |   openjdk-11-jdk \
43 |   python3 \
44 |   && rm -rf /var/lib/apt/lists/*
45 | ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/
46 | RUN ln -s /usr/bin/python3 /usr/bin/python
47 | 
48 | # HIVE METASTORE
49 | 
50 | WORKDIR /opt
51 | 
52 | ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION}
53 | ENV HIVE_HOME=/opt/apache-hive-metastore-${METASTORE_VERSION}-bin
54 | # jars used by Trino
55 | ENV HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_VERSION}.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}.jar
56 | 
57 | RUN curl -L https://repo1.maven.org/maven2/org/apache/hive/hive-standalone-metastore/${METASTORE_VERSION}/hive-standalone-metastore-${METASTORE_VERSION}-bin.tar.gz | tar zxf - && \
58 |   # Download from mirror and trim some unused libraries
59 |   curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
60 |   cd hadoop-${HADOOP_VERSION}/share/hadoop/ && \
61 |   rm -r client/* && \
62 |   rm -r hdfs/* && \
63 |   rm -r mapreduce/* && \
64 |   rm -r yarn/* && \
65 |   find ./tools/lib -type f -not \( -name "aws-java-sdk-bundle-${AWS_JAVA_SDK_VERSION}.jar" -or -name "hadoop-aws-${HADOOP_VERSION}.jar" \) -delete
66 | 
67 | ENV PATH="${HIVE_HOME}/bin:${PATH}"
68 | COPY metastore-site.xml ${HIVE_HOME}/conf
69 | 
70 | # TRINO
71 | 
72 | ENV TRINO_HOME=/opt/trino-server-${TRINO_VERSION}
73 | RUN curl -L https://github.com/cloudfuse-io/lambdatization/releases/download/trino-server-${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz | tar zxf - && \
74 |   curl -L https://repo1.maven.org/maven2/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar -o ${TRINO_HOME}/bin/trino && \
75 |   chmod +x ${TRINO_HOME}/bin/trino
76 | ENV PATH="${TRINO_HOME}/bin:${PATH}"
77 | COPY trino-etc ${TRINO_HOME}/etc
78 | 
79 | # LAMBDA ENTRYPOINT
80 | 
81 | ARG FUNCTION_DIR
82 | COPY --from=ric-dependency ${FUNCTION_DIR} ${FUNCTION_DIR}
83 | WORKDIR ${FUNCTION_DIR}
84 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
85 | CMD [ "lambda-handler.handler" ]
86 | 


--------------------------------------------------------------------------------
/docker/trino/README.md:
--------------------------------------------------------------------------------
 1 | # Trino lambdatization tricks
 2 | 
 3 | ## List of tricks
 4 | 
 5 | - Trino loads many plugins by default, which implies opening many jar files in
 6 |   parallel. To make sure this process doesn't exceed the system's maximum number
 7 |   of file descriptors, it performs a check of the ulimit when starting. The
 8 |   minimum required is 4096, but unfortunately we have a hard limit on AWS Lambda
 9 |   at 1024. We had to [rebuild][trino_action] Trino with a patch that:
10 |     - loads less plugins
11 |     - removes the check on fileno
12 | - Trino, like Dremio, automatically detects its private IP and tries to use it
13 |   for internal connections. We didn't find a knob to disable this behaviour, so
14 |   we had to harcode it in the patch.
15 | - It seems you cannot query S3 without using the Hive metastore, so we had to
16 |   install a local version of it running on Derby which adds to the init time.
17 | - The container image is huge (>2GB):
18 |   - we are pulling in a full Hadoop distribution, in which most files won't be
19 |     used. We started removing some libraries from it but we could probably trim
20 |     a few more hundreds of MBs
21 |   - we could also use a remote Hive metastore (like Glue) instead of installing
22 |     a local one
23 |   - obviously, we could use a smaller base image
24 | 
25 | [trino_action]: https://github.com/cloudfuse-io/lambdatization/actions/workflows/helper-trino.yaml
26 | 
27 | ## Updating Trino version
28 | 
29 | To change the Trino version, the patch needs to be applied to that version (xxx):
30 | ```bash
31 | git clone cloudfuse-io/trino
32 | cd trino
33 | git checkout 378-patch
34 | git checkout -b xxx-patch
35 | git rebase xxx
36 | git push
37 | ```
38 | 
39 | Then run the build in the [Trino workflow][trino_workflow] with your new Trino
40 | version number xxx
41 | 
42 | [trino_workflow]: https://github.com/cloudfuse-io/lambdatization/actions/workflows/helper-trino.yaml
43 | 


--------------------------------------------------------------------------------
/docker/trino/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   trino:
 4 |     build: .
 5 |     image: cloudfuse-io/l12n:trino
 6 |     cap_drop:
 7 |       - ALL
 8 |     read_only: true
 9 |     volumes:
10 |       - trino-tmp:/tmp
11 |     user: nobody
12 |     entrypoint:
13 |       # - bash
14 |       - python3
15 |       - lambda-handler.py
16 |     environment:
17 |       - AWS_ACCESS_KEY_ID=$LAMBDA_ACCESS_KEY_ID
18 |       - AWS_SECRET_ACCESS_KEY=$LAMBDA_SECRET_ACCESS_KEY
19 |       - AWS_SESSION_TOKEN=$LAMBDA_SESSION_TOKEN
20 |       - AWS_REGION=$L12N_AWS_REGION
21 |       - DATA_BUCKET_NAME
22 |     networks:
23 |       - tmpengine
24 |     ulimits:
25 |       nofile:
26 |         soft: 1024
27 |         hard: 1024
28 | 
29 | volumes:
30 |   trino-tmp:
31 | 
32 | 
33 | networks:
34 |   tmpengine:
35 | 


--------------------------------------------------------------------------------
/docker/trino/metastore-site.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 |     <property>
 3 |         <name>metastore.thrift.uris</name>
 4 |         <value>thrift://localhost:9083</value>
 5 |         <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
 6 |     </property>
 7 |     <property>
 8 |         <name>metastore.task.threads.always</name>
 9 |         <value>org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.MaterializationsCacheCleanerTask</value>
10 |     </property>
11 |     <property>
12 |         <name>metastore.expression.proxy</name>
13 |         <value>org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy</value>
14 |     </property>
15 |     <property>
16 |         <name>metastore.storage.schema.reader.impl</name>
17 |         <value>org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader</value>
18 |     </property>
19 |     <property>
20 |         <name>datanucleus.autoCreateSchema</name>
21 |         <value>false</value>
22 |         <description>Creates necessary schema on a startup, we use init script instead</description>
23 |     </property> 
24 | </configuration>
25 | 


--------------------------------------------------------------------------------
/docker/trino/trino-etc/catalog/hive.properties:
--------------------------------------------------------------------------------
1 | connector.name=hive
2 | hive.metastore.uri=thrift://localhost:9083
3 | hive.storage-format=ORC
4 | 
5 | hive.allow-drop-table=true
6 | 


--------------------------------------------------------------------------------
/docker/trino/trino-etc/config.properties:
--------------------------------------------------------------------------------
1 | #single node install config
2 | coordinator=true
3 | node-scheduler.include-coordinator=true
4 | http-server.http.port=8080
5 | discovery.uri=http://localhost:8080
6 | log.path=/tmp/trino/var/log/server.log
7 | 


--------------------------------------------------------------------------------
/docker/trino/trino-etc/jvm.config:
--------------------------------------------------------------------------------
 1 | -server
 2 | -Xmx1G
 3 | -XX:-UseBiasedLocking
 4 | -XX:+UseG1GC
 5 | -XX:G1HeapRegionSize=32M
 6 | -XX:+ExplicitGCInvokesConcurrent
 7 | -XX:+HeapDumpOnOutOfMemoryError
 8 | -XX:+ExitOnOutOfMemoryError
 9 | -XX:-OmitStackTraceInFastThrow
10 | -XX:ReservedCodeCacheSize=256M
11 | -XX:PerMethodRecompilationCutoff=10000
12 | -XX:PerBytecodeRecompilationCutoff=10000
13 | -Djdk.attach.allowAttachSelf=true
14 | -Djdk.nio.maxCachedBufferSize=2000000
15 | 


--------------------------------------------------------------------------------
/docker/trino/trino-etc/node.properties:
--------------------------------------------------------------------------------
1 | node.environment=docker
2 | node.data-dir=/tmp/trino
3 | 


--------------------------------------------------------------------------------
/docs/scaling_metrics.md:
--------------------------------------------------------------------------------
 1 | # Tips about AWS Lambda scaling metrics
 2 | 
 3 | - The total duration represents the end to end execution time to complete
 4 |   the invocation of a given number of AWS Lambda functions in parallel.
 5 | - The percentiles `PX` represent the time to successfuly complete `X` % of the
 6 |   function runs.
 7 | - To avoid re-using existing Lambda containers (warm starts) during the
 8 |   invocation of a batch, and thus properly measure parallelism, we maintain the
 9 |   functions running during a given "sleep duration". This duration is then
10 |   discounted from the batch invocation duration.​
11 | - We run different images, all using the same lightweight base image (~40MB),
12 |   but embed a differently sized "payload file".
13 | 


--------------------------------------------------------------------------------
/docs/standalone_engine_metrics.md:
--------------------------------------------------------------------------------
 1 | # Tips about query engine metrics in AWS Lambda
 2 | 
 3 | - All engines are deployed in "single node" mode.
 4 | - They perform an aggregation on a Parquet file (cold start), then are invoked
 5 |   again right away on a different Parquet file (warm start). Both files are NYC
 6 |   Taxi Parquet archives of approximatively 120MB.
 7 | - The aggregation is a GROUP BY on the "payment type" column and a SUM on the
 8 |   "trip distance" column. This query is at the same time simple enough for all
 9 |   engines to easily support it but complex enough to force the engine to scan at
10 |   least 2 full columns from the Parquet file.
11 | - The durations displayed cover the entire invocation time of the lambda
12 |   function as perceived by the client.
13 | 


--------------------------------------------------------------------------------
/infra/common.hcl:
--------------------------------------------------------------------------------
 1 | generate "versions" {
 2 |   path      = "versions.generated.tf"
 3 |   if_exists = "overwrite"
 4 |   contents  = <<EOF
 5 | terraform {
 6 |   required_version = ">=1"
 7 |   required_providers {
 8 |     aws = {
 9 |       source  = "hashicorp/aws"
10 |       version = "~> 4.45"
11 |     }
12 |     google = {
13 |       source = "hashicorp/google"
14 |       version = "~> 3.0"
15 |     }
16 |   }
17 | }
18 | EOF
19 | }
20 | 
21 | locals {
22 |   extra_arguments = {
23 |     commands = [
24 |       "init",
25 |       "apply",
26 |       "destroy",
27 |       "output",
28 |       "fmt",
29 |     ]
30 | 
31 |     data_dir = "${get_env("CALLING_DIR")}/.terraform/data/"
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/infra/common/env/conf.tf:
--------------------------------------------------------------------------------
 1 | locals {
 2 |   module_name = "l12n"
 3 | }
 4 | 
 5 | # use the workspace name as stage
 6 | output "stage" {
 7 |   value = terraform.workspace
 8 | }
 9 | 
10 | output "module_name" {
11 |   value = local.module_name
12 | }
13 | 
14 | 
15 | output "default_tags" {
16 |   value = {
17 |     module      = local.module_name
18 |     provisioner = "terraform"
19 |     stage       = terraform.workspace
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/infra/common/lambda/iam.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_iam_role" "lambda_role" {
 2 |   name = "${module.env.module_name}_${var.function_base_name}_${var.region_name}_${module.env.stage}"
 3 | 
 4 |   assume_role_policy = <<EOF
 5 | {
 6 |   "Version": "2012-10-17",
 7 |   "Statement": [
 8 |     {
 9 |       "Action": "sts:AssumeRole",
10 |       "Principal": {
11 |         "Service": "lambda.amazonaws.com"
12 |       },
13 |       "Effect": "Allow",
14 |       "Sid": ""
15 |     }
16 |   ]
17 | }
18 | EOF
19 | }
20 | 
21 | resource "aws_iam_role_policy" "lambda_default_policy" {
22 |   name = "${module.env.module_name}_${var.function_base_name}_${var.region_name}_${module.env.stage}"
23 |   role = aws_iam_role.lambda_role.id
24 | 
25 |   policy = <<EOF
26 | {
27 |   "Version": "2012-10-17",
28 |   "Statement": [
29 |     {
30 |       "Action": [
31 |         "logs:CreateLogStream",
32 |         "logs:PutLogEvents"
33 |       ],
34 |       "Resource": "${aws_cloudwatch_log_group.lambda_log_group.arn}",
35 |       "Effect": "Allow"
36 |     }
37 |   ]
38 | }
39 | EOF
40 | }
41 | 
42 | resource "aws_iam_role_policy_attachment" "lamba_exec_role_eni" {
43 |   role       = aws_iam_role.lambda_role.name
44 |   policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
45 | }
46 | 
47 | resource "aws_iam_role_policy_attachment" "additional-attachments" {
48 |   count = length(var.additional_policies)
49 | 
50 |   role       = aws_iam_role.lambda_role.name
51 |   policy_arn = var.additional_policies[count.index]
52 | }
53 | 


--------------------------------------------------------------------------------
/infra/common/lambda/inputs.tf:
--------------------------------------------------------------------------------
 1 | # global configuration
 2 | 
 3 | module "env" {
 4 |   source = "../env"
 5 | }
 6 | 
 7 | variable "region_name" {}
 8 | 
 9 | # function related configuration
10 | 
11 | variable "function_base_name" {}
12 | 
13 | variable "docker_image" {}
14 | 
15 | variable "memory_size" {}
16 | 
17 | variable "ephemeral_storage" {
18 |   default = 512
19 | }
20 | 
21 | variable "timeout" {}
22 | 
23 | variable "additional_policies" {
24 |   type    = list(any)
25 |   default = []
26 | }
27 | 
28 | variable "environment" {
29 |   type = map(any)
30 | }
31 | 


--------------------------------------------------------------------------------
/infra/common/lambda/lambda.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_lambda_function" "lambda" {
 2 |   package_type  = "Image"
 3 |   image_uri     = var.docker_image
 4 |   function_name = "${module.env.module_name}-${var.function_base_name}-${module.env.stage}"
 5 |   role          = aws_iam_role.lambda_role.arn
 6 |   memory_size   = var.memory_size
 7 |   ephemeral_storage {
 8 |     size = var.ephemeral_storage
 9 |   }
10 |   timeout = var.timeout
11 | 
12 |   environment {
13 |     variables = merge(
14 |       {
15 |         STAGE = module.env.stage
16 |       },
17 |       var.environment
18 |     )
19 |   }
20 | 
21 |   depends_on = [aws_iam_role_policy_attachment.lamba_exec_role_eni]
22 | }
23 | 
24 | resource "aws_lambda_function_event_invoke_config" "lambda_conf" {
25 |   function_name                = aws_lambda_function.lambda.function_name
26 |   maximum_event_age_in_seconds = 60
27 |   maximum_retry_attempts       = 0
28 | }
29 | 
30 | 
31 | resource "aws_cloudwatch_log_group" "lambda_log_group" {
32 |   name              = "/aws/lambda/${aws_lambda_function.lambda.function_name}"
33 |   retention_in_days = 14
34 | }
35 | 


--------------------------------------------------------------------------------
/infra/common/lambda/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "lambda_arn" {
 2 |   value = aws_lambda_function.lambda.arn
 3 | }
 4 | 
 5 | output "lambda_name" {
 6 |   value = aws_lambda_function.lambda.function_name
 7 | }
 8 | 
 9 | output "lambda_role_name" {
10 |   value = aws_iam_role.lambda_role.name
11 | }
12 | 
13 | output "role_arn" {
14 |   value = aws_iam_role.lambda_role.arn
15 | }
16 | 
17 | output "log_group_name" {
18 |   value = aws_cloudwatch_log_group.lambda_log_group.name
19 | }
20 | 
21 | output "log_group_arn" {
22 |   value = aws_cloudwatch_log_group.lambda_log_group.arn
23 | }
24 | 


--------------------------------------------------------------------------------
/infra/monitoring/README.md:
--------------------------------------------------------------------------------
 1 | # Monitoring Infra
 2 | 
 3 | To setup an instance of the monitoring infrastructure:
 4 | - create a GCP project and set it as L12N_GCP_PROJECT_ID
 5 | - choose a GCP region and set it as L12N_GCP_REGION
 6 | - before deploying, you must run `l12n monitoring.login` and authenticate
 7 |   yourself with an email that has access to the project ID specified above
 8 | - run `l12n monitoring.deploy`. Because of its different lifecycle, this
 9 |   deployment is independent of the rest of the infrastructure.
10 | - check out the other `l12n monitoring.*` commands to discover how you can push
11 |   metrics from the various execution setups.
12 | 


--------------------------------------------------------------------------------
/infra/monitoring/bigquery/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.46.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:EZB4OgvytV38JpWyye9zoMQ0bfT9yB9xSXM5NY3Lrws=",
 9 |     "zh:1678e6a4bdb3d81a6713adc62ca0fdb8250c584e10c10d1daca72316e9db8df2",
10 |     "zh:329903acf86ef6072502736dff4c43c2b50f762a958f76aa924e2d74c7fca1e3",
11 |     "zh:33db8131fe0ec7e1d9f30bc9f65c2440e9c1f708d681b6062757a351f1df7ce6",
12 |     "zh:3a3b010bc393784c16f4b6cdce7f76db93d5efa323fce4920bfea9e9ba6abe44",
13 |     "zh:979e2713a5759a7483a065e149e3cb69db9225326fc0457fa3fc3a48aed0c63f",
14 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
15 |     "zh:9efcf0067e16ad53da7504178a05eb2118770b4ae00c193c10ecad4cbfce308e",
16 |     "zh:a10655bf1b6376ab7f3e55efadf54dc70f7bd07ca11369557c312095076f9d62",
17 |     "zh:b0394dd42cbd2a718a7dd7ae0283f04769aaf8b3d52664e141da59c0171a11ab",
18 |     "zh:b958e614c2cf6d9c05a6ad5e94dc5c04b97ebfb84415da068be5a081b5ebbe24",
19 |     "zh:ba5069e624210c63ad9e633a8eb0108b21f2322bc4967ba2b82d09168c466888",
20 |     "zh:d7dfa597a17186e7f4d741dd7111849f1c0dd6f7ebc983043d8262d2fb37b408",
21 |     "zh:e8a641ca2c99f96d64fa2725875e797273984981d3e54772a2823541c44e3cd3",
22 |     "zh:f89898b7067c4246293a8007f59f5cfcac7b8dd251d39886c7a53ba596251466",
23 |     "zh:fb1e1df1d5cc208e08a850f8e84423bce080f01f5e901791c79df369d3ed52f2",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/monitoring/bigquery/engine_durations.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "engine",
 4 |     "mode": "NULLABLE",
 5 |     "type": "STRING"
 6 |   },
 7 |   {
 8 |     "name": "external_duration_ms",
 9 |     "mode": "NULLABLE",
10 |     "type": "INTEGER"
11 |   },
12 |   {
13 |     "name": "cold_start",
14 |     "mode": "NULLABLE",
15 |     "type": "BOOLEAN"
16 |   },
17 |   {
18 |     "name": "timestamp",
19 |     "mode": "NULLABLE",
20 |     "type": "TIMESTAMP"
21 |   },
22 |   {
23 |     "name": "aws_region",
24 |     "mode": "NULLABLE",
25 |     "type": "STRING"
26 |   },
27 |   {
28 |     "name": "revision",
29 |     "mode": "NULLABLE",
30 |     "type": "STRING"
31 |   },
32 |   {
33 |     "name": "is_dirty",
34 |     "mode": "NULLABLE",
35 |     "type": "BOOLEAN"
36 |   },
37 |   {
38 |     "name": "branch",
39 |     "mode": "NULLABLE",
40 |     "type": "STRING"
41 |   }
42 | ]


--------------------------------------------------------------------------------
/infra/monitoring/bigquery/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | variable "project_id" {}
 3 | 
 4 | module "env" {
 5 |   source = "../../common/env"
 6 | }
 7 | 
 8 | locals {
 9 |   account_id_raw                = "${module.env.module_name}${module.env.stage}"
10 |   stage_underscore              = replace(module.env.stage, "-", "_")
11 |   dataset_id                    = "${module.env.module_name}_metrics_${local.stage_underscore}"
12 |   standalone_durations_table_id = "${module.env.module_name}-standalone-engine-durations-${module.env.stage}"
13 |   scaling_durations_table_id    = "${module.env.module_name}-scaling-durations-${module.env.stage}"
14 | }
15 | 
16 | provider "google" {
17 |   project = var.project_id
18 |   region  = var.region_name
19 | }
20 | 
21 | resource "google_service_account" "bigquery_write" {
22 |   account_id   = "bigquery-write-${substr(md5(local.account_id_raw), 0, 6)}"
23 |   display_name = "BigQuery Write"
24 | }
25 | 
26 | resource "google_project_iam_member" "firestore_owner_binding" {
27 |   role   = "roles/bigquery.dataEditor"
28 |   member = "serviceAccount:${google_service_account.bigquery_write.email}"
29 | }
30 | 
31 | resource "google_service_account_key" "bigquery_write_key" {
32 |   service_account_id = google_service_account.bigquery_write.name
33 | }
34 | 
35 | resource "google_bigquery_dataset" "dataset" {
36 |   dataset_id                  = local.dataset_id
37 |   friendly_name               = "Lambdatization Metrics"
38 |   default_table_expiration_ms = 1576800000000 # 50 years ;-)
39 |   labels                      = module.env.default_tags
40 | }
41 | 
42 | resource "google_bigquery_table" "standalone_engine_durations" {
43 |   dataset_id          = google_bigquery_dataset.dataset.dataset_id
44 |   table_id            = local.standalone_durations_table_id
45 |   labels              = module.env.default_tags
46 |   deletion_protection = false
47 | 
48 |   time_partitioning {
49 |     type  = "DAY"
50 |     field = "timestamp"
51 |   }
52 | 
53 |   schema = file("${path.module}/engine_durations.json")
54 | }
55 | 
56 | resource "google_bigquery_table" "scaling_durations" {
57 |   dataset_id          = google_bigquery_dataset.dataset.dataset_id
58 |   table_id            = local.scaling_durations_table_id
59 |   labels              = module.env.default_tags
60 |   deletion_protection = false
61 | 
62 |   time_partitioning {
63 |     type  = "DAY"
64 |     field = "timestamp"
65 |   }
66 | 
67 |   schema = file("${path.module}/scaling_durations.json")
68 | }
69 | 
70 | output "service_account_key" {
71 |   value     = google_service_account_key.bigquery_write_key.private_key
72 |   sensitive = true
73 | }
74 | 
75 | output "standalone_durations_table_id" {
76 |   value = "${var.project_id}.${local.dataset_id}.${local.standalone_durations_table_id}"
77 | }
78 | 
79 | output "scaling_durations_table_id" {
80 |   value = "${var.project_id}.${local.dataset_id}.${local.scaling_durations_table_id}"
81 | }
82 | 


--------------------------------------------------------------------------------
/infra/monitoring/bigquery/scaling_durations.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "placeholder_size_mb",
 4 |     "mode": "NULLABLE",
 5 |     "type": "INTEGER"
 6 |   },
 7 |   {
 8 |     "name": "sleep_duration_ms",
 9 |     "mode": "NULLABLE",
10 |     "type": "INTEGER"
11 |   },
12 |   {
13 |     "name": "total_duration_ms",
14 |     "mode": "NULLABLE",
15 |     "type": "INTEGER"
16 |   },
17 |   {
18 |     "name": "p90_duration_ms",
19 |     "mode": "NULLABLE",
20 |     "type": "INTEGER"
21 |   },
22 |   {
23 |     "name": "p99_duration_ms",
24 |     "mode": "NULLABLE",
25 |     "type": "INTEGER"
26 |   },
27 |   {
28 |     "name": "nb_run",
29 |     "mode": "NULLABLE",
30 |     "type": "INTEGER"
31 |   },
32 |   {
33 |     "name": "nb_cold_start",
34 |     "mode": "NULLABLE",
35 |     "type": "INTEGER"
36 |   },
37 |   {
38 |     "name": "memory_size_mb",
39 |     "mode": "NULLABLE",
40 |     "type": "INTEGER"
41 |   },
42 |   {
43 |     "name": "timestamp",
44 |     "mode": "NULLABLE",
45 |     "type": "TIMESTAMP"
46 |   },
47 |   {
48 |     "name": "aws_region",
49 |     "mode": "NULLABLE",
50 |     "type": "STRING"
51 |   },
52 |   {
53 |     "name": "revision",
54 |     "mode": "NULLABLE",
55 |     "type": "STRING"
56 |   },
57 |   {
58 |     "name": "is_dirty",
59 |     "mode": "NULLABLE",
60 |     "type": "BOOLEAN"
61 |   },
62 |   {
63 |     "name": "branch",
64 |     "mode": "NULLABLE",
65 |     "type": "STRING"
66 |   },
67 |   {
68 |     "name": "error",
69 |     "mode": "NULLABLE",
70 |     "type": "STRING"
71 |   }
72 | ]


--------------------------------------------------------------------------------
/infra/monitoring/bigquery/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | 
 6 | inputs = {
 7 |   region_name = get_env("L12N_GCP_REGION")
 8 |   project_id  = get_env("L12N_GCP_PROJECT_ID")
 9 | }
10 | 


--------------------------------------------------------------------------------
/infra/runtime/README.md:
--------------------------------------------------------------------------------
 1 | ## Notes:
 2 | 
 3 | - To simulate to the best the Lambda environement locally, we set flags such as
 4 |   `read_only` (except for `/tmp`) and a non root user in Docker Compose. We also
 5 |   set the relevant environment variables that are present in AWS Lambda:
 6 |   - token based credentials (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and
 7 |     `AWS_SESSION_TOKEN`)
 8 |   - `AWS_REGION` or `AWS_DEFAULT_REGION` (both are set in Lambda)
 9 | - We use the Python AWS Lambda Runtime Interface Client, but it requires adding
10 |   Python to the base image. A Rust based Interface Client would spare us a few
11 |   dozen MBs for most engines. This is not critical for most engines as the image
12 |   loading (1 to 2 seconds) is by far dominated by the engine init duration and
13 |   choping off a few MBs will not decrease that duration significantly.
14 | 


--------------------------------------------------------------------------------
/infra/runtime/ballista/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/ballista/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "ballista_standalone_image" {}
 4 | 
 5 | variable "ballista_distributed_image" {}
 6 | 
 7 | variable "bucket_arn" {}
 8 | 
 9 | module "env" {
10 |   source = "../../common/env"
11 | }
12 | 
13 | provider "aws" {
14 |   region = var.region_name
15 |   default_tags {
16 |     tags = module.env.default_tags
17 |   }
18 | }
19 | 
20 | resource "aws_iam_policy" "s3_access" {
21 |   name = "${module.env.module_name}-ballista-s3-access-${var.region_name}-${module.env.stage}"
22 | 
23 |   policy = <<EOF
24 | {
25 |     "Version": "2012-10-17",
26 |     "Statement": [
27 |         {
28 |             "Sid": "objectlevel",
29 |             "Effect": "Allow",
30 |             "Action": "s3:*",
31 |             "Resource": "${var.bucket_arn}/*"
32 |         },
33 |         {
34 |             "Sid": "bucketlevel",
35 |             "Effect": "Allow",
36 |             "Action": "s3:*",
37 |             "Resource": "${var.bucket_arn}"
38 |         }
39 |     ]
40 | }
41 | EOF
42 | }
43 | 
44 | module "engine" {
45 |   source = "../../common/lambda"
46 | 
47 |   function_base_name = "ballista"
48 |   region_name        = var.region_name
49 |   docker_image       = var.ballista_standalone_image
50 |   memory_size        = 2048
51 |   timeout            = 300
52 | 
53 |   additional_policies = [aws_iam_policy.s3_access.arn]
54 |   environment         = {}
55 | 
56 | }
57 | 
58 | module "distributed_engine" {
59 |   source = "../../common/lambda"
60 | 
61 |   function_base_name = "ballista-distributed"
62 |   region_name        = var.region_name
63 |   docker_image       = var.ballista_distributed_image
64 |   memory_size        = 4096
65 |   timeout            = 300
66 | 
67 |   additional_policies = [aws_iam_policy.s3_access.arn]
68 |   environment = {
69 |     CHAPPY_VIRTUAL_SUBNET : "172.28.0.0/16",
70 |   }
71 | 
72 | }
73 | 
74 | output "lambda_name" {
75 |   value = module.engine.lambda_name
76 | }
77 | 
78 | output "distributed_lambda_name" {
79 |   value = module.distributed_engine.lambda_name
80 | }
81 | 


--------------------------------------------------------------------------------
/infra/runtime/ballista/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "ballista"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name                = local.region_name
33 |   ballista_standalone_image  = "dummy_overriden_by_before_hook"
34 |   ballista_distributed_image = "dummy_overriden_by_before_hook"
35 |   bucket_arn                 = dependency.core.outputs.bucket_arn
36 | }
37 | 


--------------------------------------------------------------------------------
/infra/runtime/build_and_print.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | COMPOSE_FILE=../../../docker/$1/docker-compose.yaml
4 | l12n docker-login \
5 |     build-images --compose-file=$COMPOSE_FILE \
6 |     push-images --compose-file=$COMPOSE_FILE && \
7 | l12n print-image-vars --compose-file=$COMPOSE_FILE $2 > images.generated.tfvars
8 | 


--------------------------------------------------------------------------------
/infra/runtime/chappy/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.54.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:j/L01+hlHVM2X2VrkQC2WtMZyu4ZLhDMw+HDJ7k0Y2Q=",
 9 |     "zh:24358aefc06b3f38878680fe606dab2570cb58ab952750c47e90b81d3b05e606",
10 |     "zh:3fc0ef459d6bb4fbb0e4eb7b8adadddd636efa6d975be6e70de7327d83e15729",
11 |     "zh:67e765119726f47b1916316ac95c3cd32ac074b454f2a67b6127120b476bc483",
12 |     "zh:71aed1300debac24f11263a6f8a231c6432497b25e623e8f34e27121af65f523",
13 |     "zh:722043077e63713d4e458f3228be30c21fcff5b6660c6de8b96967337cdc604a",
14 |     "zh:76d67be4220b93cfaca0882f46db9a42b4ca48285a64fe304f108dde85f4d611",
15 |     "zh:81534c18d9f02648b1644a7937e7bea56e91caef13b41de121ee51168faad680",
16 |     "zh:89983ab2596846d5f3413ff1b5b9b21424c3c757a54dcc5a4604d3ac34fea1a6",
17 |     "zh:8a603ac6884de5dc51c372f641f9613aefd87059ff6e6a74b671f6864226e06f",
18 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
19 |     "zh:b6fae6c1cda6d842406066dac7803d24a597b62da5fae33bcd50c5dae70140c2",
20 |     "zh:bc4c3b4bfb715beecf5186dfeb91173ef1a9c0b68e8c45cbeee180195bbfa37f",
21 |     "zh:c741a3fe7d085593a160e79596bd237afc9503c836abcc95fd627554cdf16ec0",
22 |     "zh:f6763e96485e1ea5b67a33bbd04042e412508b2b06946acf957fb68a314d893e",
23 |     "zh:fc7144577ea7d6e05c276b54a9f8f8609be7b4d0a128aa45f233a4b0e5cbf090",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/chappy/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "chappydev_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-chappydev-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "objectlevel",
27 |             "Effect": "Allow",
28 |             "Action": "s3:*",
29 |             "Resource": "${var.bucket_arn}/*"
30 |         },
31 |         {
32 |             "Sid": "bucketlevel",
33 |             "Effect": "Allow",
34 |             "Action": "s3:*",
35 |             "Resource": "${var.bucket_arn}"
36 |         }
37 |     ]
38 | }
39 | EOF
40 | }
41 | 
42 | resource "aws_iam_policy" "lambda_insights" {
43 |   name = "${module.env.module_name}-chappydev-lambda-insights-${var.region_name}-${module.env.stage}"
44 | 
45 |   policy = <<EOF
46 | {
47 |     "Version": "2012-10-17",
48 |     "Statement": [
49 |         {
50 |             "Effect": "Allow",
51 |             "Action": "logs:CreateLogGroup",
52 |             "Resource": "*"
53 |         },
54 |         {
55 |             "Effect": "Allow",
56 |             "Action": [
57 |                 "logs:CreateLogStream",
58 |                 "logs:PutLogEvents"
59 |             ],
60 |             "Resource": "arn:aws:logs:*:*:log-group:/aws/lambda-insights:*"
61 |         }
62 |     ]
63 | }
64 | EOF
65 | }
66 | 
67 | module "dev_lambda" {
68 |   source = "../../common/lambda"
69 | 
70 |   function_base_name = "chappydev"
71 |   region_name        = var.region_name
72 |   docker_image       = var.chappydev_image
73 |   memory_size        = 2048
74 |   timeout            = 300
75 | 
76 |   additional_policies = [aws_iam_policy.s3_access.arn, aws_iam_policy.lambda_insights.arn]
77 |   environment         = {}
78 | 
79 | }
80 | 
81 | output "dev_lambda_name" {
82 |   value = module.dev_lambda.lambda_name
83 | }
84 | 


--------------------------------------------------------------------------------
/infra/runtime/chappy/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "chappy"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name     = local.region_name
33 |   chappydev_image = "dummy_overriden_by_before_hook"
34 |   bucket_arn      = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/runtime/clickhouse/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/clickhouse/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "clickhouse_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-clickhouse-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "objectlevel",
27 |             "Effect": "Allow",
28 |             "Action": "s3:*",
29 |             "Resource": "${var.bucket_arn}/*"
30 |         },
31 |         {
32 |             "Sid": "bucketlevel",
33 |             "Effect": "Allow",
34 |             "Action": "s3:*",
35 |             "Resource": "${var.bucket_arn}"
36 |         }
37 |     ]
38 | }
39 | EOF
40 | }
41 | 
42 | module "engine" {
43 |   source = "../../common/lambda"
44 | 
45 |   function_base_name = "clickhouse"
46 |   region_name        = var.region_name
47 |   docker_image       = var.clickhouse_image
48 |   memory_size        = 2048
49 |   timeout            = 300
50 | 
51 |   additional_policies = [aws_iam_policy.s3_access.arn]
52 |   environment         = {}
53 | 
54 | }
55 | 
56 | output "lambda_name" {
57 |   value = module.engine.lambda_name
58 | }
59 | 


--------------------------------------------------------------------------------
/infra/runtime/clickhouse/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "clickhouse"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name      = local.region_name
33 |   clickhouse_image = "dummy_overriden_by_before_hook"
34 |   bucket_arn       = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/runtime/core/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/core/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {
 2 |   description = "The AWS region name (eu-west-1, us-east2...) in which the stack will be deployed"
 3 | }
 4 | 
 5 | module "env" {
 6 |   source = "../../common/env"
 7 | }
 8 | 
 9 | data "aws_caller_identity" "current" {}
10 | 
11 | 
12 | provider "aws" {
13 |   region = var.region_name
14 |   default_tags {
15 |     tags = module.env.default_tags
16 |   }
17 | }
18 | 
19 | resource "aws_ecr_repository" "main" {
20 |   name                 = "${module.env.module_name}-${module.env.stage}"
21 |   image_tag_mutability = "MUTABLE"
22 |   force_delete         = true
23 | 
24 |   image_scanning_configuration {
25 |     scan_on_push = false
26 |   }
27 | 
28 |   lifecycle {
29 |     ignore_changes = [tags]
30 |   }
31 | }
32 | 
33 | resource "aws_s3_bucket" "data" {
34 |   bucket        = "${module.env.module_name}-${data.aws_caller_identity.current.account_id}-${var.region_name}-${module.env.stage}"
35 |   force_destroy = true
36 | }
37 | 
38 | resource "aws_s3_object_copy" "nyc_taxi" {
39 |   count  = 6
40 |   bucket = aws_s3_bucket.data.id
41 |   key    = "nyc-taxi/2019/0${count.index + 1}/data.parquet"
42 |   source = "ursa-labs-taxi-data/2019/0${count.index + 1}/data.parquet"
43 | 
44 |   lifecycle {
45 |     ignore_changes = [tags_all]
46 |   }
47 | }
48 | 
49 | output "repository_url" {
50 |   value = aws_ecr_repository.main.repository_url
51 | }
52 | 
53 | output "repository_arn" {
54 |   value = aws_ecr_repository.main.arn
55 | }
56 | 
57 | output "bucket_name" {
58 |   value = aws_s3_bucket.data.id
59 | }
60 | 
61 | output "bucket_arn" {
62 |   value = aws_s3_bucket.data.arn
63 | }
64 | 


--------------------------------------------------------------------------------
/infra/runtime/core/terragrunt.hcl:
--------------------------------------------------------------------------------
1 | include "root" {
2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
3 | }
4 | 
5 | inputs = {
6 |   region_name = get_env("L12N_AWS_REGION")
7 | }
8 | 


--------------------------------------------------------------------------------
/infra/runtime/dask/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/dask/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "dask_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-dask-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "objectlevel",
27 |             "Effect": "Allow",
28 |             "Action": "s3:*",
29 |             "Resource": "${var.bucket_arn}/*"
30 |         },
31 |         {
32 |             "Sid": "bucketlevel",
33 |             "Effect": "Allow",
34 |             "Action": "s3:*",
35 |             "Resource": "${var.bucket_arn}"
36 |         }
37 |     ]
38 | }
39 | EOF
40 | }
41 | 
42 | module "engine" {
43 |   source = "../../common/lambda"
44 | 
45 |   function_base_name = "dask"
46 |   region_name        = var.region_name
47 |   docker_image       = var.dask_image
48 |   memory_size        = 2048
49 |   timeout            = 300
50 | 
51 |   additional_policies = [aws_iam_policy.s3_access.arn]
52 |   environment         = {}
53 | 
54 | }
55 | 
56 | output "lambda_name" {
57 |   value = module.engine.lambda_name
58 | }
59 | 


--------------------------------------------------------------------------------
/infra/runtime/dask/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "dask"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name = local.region_name
33 |   dask_image  = "dummy_overriden_by_before_hook"
34 |   bucket_arn  = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/runtime/databend/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/databend/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "databend_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-databend-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "objectlevel",
27 |             "Effect": "Allow",
28 |             "Action": "s3:*",
29 |             "Resource": "${var.bucket_arn}/*"
30 |         },
31 |         {
32 |             "Sid": "bucketlevel",
33 |             "Effect": "Allow",
34 |             "Action": "s3:*",
35 |             "Resource": "${var.bucket_arn}"
36 |         }
37 |     ]
38 | }
39 | EOF
40 | }
41 | 
42 | module "engine" {
43 |   source = "../../common/lambda"
44 | 
45 |   function_base_name = "databend"
46 |   region_name        = var.region_name
47 |   docker_image       = var.databend_image
48 |   memory_size        = 2048
49 |   timeout            = 300
50 | 
51 |   additional_policies = [aws_iam_policy.s3_access.arn]
52 |   environment         = {}
53 | 
54 | }
55 | 
56 | output "lambda_name" {
57 |   value = module.engine.lambda_name
58 | }
59 | 


--------------------------------------------------------------------------------
/infra/runtime/databend/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "databend"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name    = local.region_name
33 |   databend_image = "dummy_overriden_by_before_hook"
34 |   bucket_arn     = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/runtime/dremio/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/dremio/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "dremio_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-dremio-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "listbuckets",
27 |             "Effect": "Allow",
28 |             "Action": [
29 |                 "s3:GetBucketLocation",
30 |                 "s3:ListAllMyBuckets"
31 |             ],
32 |             "Resource": [
33 |                 "arn:aws:s3:::*"
34 |             ]
35 |         },
36 |         {
37 |             "Sid": "objectlevel",
38 |             "Effect": "Allow",
39 |             "Action": "s3:*",
40 |             "Resource": "${var.bucket_arn}/*"
41 |         },
42 |         {
43 |             "Sid": "bucketlevel",
44 |             "Effect": "Allow",
45 |             "Action": "s3:*",
46 |             "Resource": "${var.bucket_arn}"
47 |         }
48 |     ]
49 | }
50 | EOF
51 | }
52 | 
53 | module "engine" {
54 |   source = "../../common/lambda"
55 | 
56 |   function_base_name = "dremio"
57 |   region_name        = var.region_name
58 |   docker_image       = var.dremio_image
59 |   memory_size        = 2048
60 |   timeout            = 300
61 | 
62 |   additional_policies = [aws_iam_policy.s3_access.arn]
63 |   environment         = {}
64 | 
65 | }
66 | 
67 | output "lambda_name" {
68 |   value = module.engine.lambda_name
69 | }
70 | 


--------------------------------------------------------------------------------
/infra/runtime/dremio/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "dremio"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name  = local.region_name
33 |   dremio_image = "dummy_overriden_by_before_hook"
34 |   bucket_arn   = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/runtime/lambdacli/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/lambdacli/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "lambdacli_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | variable "env_file" {
 8 |   sensitive = true
 9 | }
10 | 
11 | module "env" {
12 |   source = "../../common/env"
13 | }
14 | 
15 | provider "aws" {
16 |   region = var.region_name
17 |   default_tags {
18 |     tags = module.env.default_tags
19 |   }
20 | }
21 | 
22 | resource "aws_secretsmanager_secret" "envfile" {
23 |   name = "${module.env.module_name}-clienv-${module.env.stage}"
24 | }
25 | 
26 | resource "aws_secretsmanager_secret_version" "envfile" {
27 |   secret_id     = aws_secretsmanager_secret.envfile.id
28 |   secret_string = var.env_file
29 | }
30 | 
31 | resource "aws_iam_policy" "secret_access" {
32 |   name = "${module.env.module_name}-lambdacli-secret-${var.region_name}-${module.env.stage}"
33 | 
34 |   policy = <<EOF
35 | {
36 |   "Version": "2012-10-17",
37 |   "Statement": [
38 |     {
39 |       "Effect": "Allow",
40 |       "Action": "secretsmanager:GetSecretValue",
41 |       "Resource": "${aws_secretsmanager_secret.envfile.arn}"
42 |     }
43 |   ]
44 | }
45 | EOF
46 | }
47 | 
48 | resource "aws_iam_policy" "lambda_invoke" {
49 |   name = "${module.env.module_name}-lambdacli-invoke-${var.region_name}-${module.env.stage}"
50 | 
51 |   policy = <<EOF
52 | {
53 |   "Version": "2012-10-17",
54 |   "Statement": [
55 |     {
56 |       "Effect": "Allow",
57 |       "Action": [
58 |         "lambda:InvokeFunction",
59 |         "lambda:GetFunctionConfiguration",
60 |         "lambda:PublishVersion",
61 |         "lambda:UpdateFunctionConfiguration"
62 |       ],
63 |       "Resource": "*"
64 |     }
65 |   ]
66 | }
67 | EOF
68 | }
69 | 
70 | module "lambdacli" {
71 |   source = "../../common/lambda"
72 | 
73 |   function_base_name = "lambdacli"
74 |   region_name        = var.region_name
75 |   docker_image       = var.lambdacli_image
76 |   memory_size        = 3008
77 |   ephemeral_storage  = 4096
78 |   timeout            = 600
79 | 
80 |   additional_policies = [aws_iam_policy.secret_access.arn, aws_iam_policy.lambda_invoke.arn]
81 | 
82 |   environment = {
83 |     ENV_FILE_SECRET_ID         = aws_secretsmanager_secret.envfile.id
84 |     ENV_FILE_SECRET_VERSION_ID = aws_secretsmanager_secret_version.envfile.version_id
85 |     # region is required early to get the secret
86 |     L12N_AWS_REGION = var.region_name
87 |   }
88 | }
89 | 
90 | output "lambda_name" {
91 |   value = module.lambdacli.lambda_name
92 | }
93 | 
94 | output "lambda_arn" {
95 |   value = module.lambdacli.lambda_arn
96 | }
97 | 


--------------------------------------------------------------------------------
/infra/runtime/lambdacli/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "cli"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name     = local.region_name
33 |   lambdacli_image = ["dummy_overriden_by_before_hook"]
34 |   bucket_arn      = dependency.core.outputs.bucket_arn
35 |   # Remove AWS_ vars as creds are provided through role in AWS Lambda
36 |   env_file = run_cmd("--terragrunt-quiet", "bash", "-c", "cat ${get_env("CALLING_DIR")}/.env | sed '/^AWS_/d'")
37 | }
38 | 


--------------------------------------------------------------------------------
/infra/runtime/scaling/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/scaling/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "images" {
 4 |   type = list(string)
 5 | }
 6 | 
 7 | variable "placeholder_sizes" {
 8 |   type = list(number)
 9 | }
10 | 
11 | locals {
12 |   # tflint-ignore: terraform_unused_declarations
13 |   validate_sizes = (length(var.placeholder_sizes) != length(var.images)) ? tobool("Placeholder size list and image name list must have the same size.") : true
14 | }
15 | 
16 | variable "bucket_arn" {}
17 | 
18 | module "env" {
19 |   source = "../../common/env"
20 | }
21 | 
22 | provider "aws" {
23 |   region = var.region_name
24 |   default_tags {
25 |     tags = module.env.default_tags
26 |   }
27 | }
28 | 
29 | module "placeholder" {
30 |   source = "../../common/lambda"
31 |   count  = length(var.placeholder_sizes)
32 | 
33 |   function_base_name = "placeholder-${element(var.placeholder_sizes, count.index)}"
34 |   region_name        = var.region_name
35 |   docker_image       = element(var.images, count.index)
36 |   memory_size        = 2048
37 |   timeout            = 300
38 | 
39 |   additional_policies = []
40 |   environment         = {}
41 | }
42 | 
43 | output "lambda_names" {
44 |   value = join(",", module.placeholder.*.lambda_name)
45 | }
46 | 


--------------------------------------------------------------------------------
/infra/runtime/scaling/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "scaling", "--list"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name       = local.region_name
33 |   images            = ["dummy-50", "dummy-200", "dummy-800"]
34 |   placeholder_sizes = [50, 200, 800]
35 |   bucket_arn        = dependency.core.outputs.bucket_arn
36 | }
37 | 


--------------------------------------------------------------------------------
/infra/runtime/scheduler/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/scheduler/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "lambdacli_lambda_arn" {}
 4 | 
 5 | variable "lambdacli_lambda_name" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | locals {
19 |   # avoid undesired updates of .terraform.lock.hcl that dirty the git status
20 |   init_flags = "--flags='-lockfile=readonly'"
21 | }
22 | 
23 | ## STANDALONE ENGINES
24 | 
25 | locals {
26 |   standalone_engine_cmd   = "l12n init ${local.init_flags} monitoring.bench-cold-warm"
27 |   standalone_engine_input = jsonencode({ "cmd" : base64encode(local.standalone_engine_cmd), "sampling" : 0.5 })
28 | }
29 | 
30 | resource "aws_cloudwatch_event_rule" "standalone_engine_schedule" {
31 |   name                = "${module.env.module_name}-standalone-engine-sched-${module.env.stage}"
32 |   description         = "Start standalone engine benchmark"
33 |   schedule_expression = "rate(30 minutes)"
34 | }
35 | 
36 | resource "aws_cloudwatch_event_target" "standalone_engine_schedule" {
37 |   rule  = aws_cloudwatch_event_rule.standalone_engine_schedule.name
38 |   arn   = var.lambdacli_lambda_arn
39 |   input = local.standalone_engine_input
40 | }
41 | 
42 | resource "aws_lambda_permission" "allow_standalone_engine" {
43 |   action        = "lambda:InvokeFunction"
44 |   function_name = var.lambdacli_lambda_name
45 |   principal     = "events.amazonaws.com"
46 |   source_arn    = aws_cloudwatch_event_rule.standalone_engine_schedule.arn
47 | }
48 | 
49 | ## LAMBDA SCALE UP
50 | 
51 | locals {
52 |   scales          = [64, 128, 256]
53 |   scaling_cmds    = [for sc in local.scales : "l12n init ${local.init_flags} monitoring.bench-scaling -n ${sc}"]
54 |   scaling_encoded = [for cmd in local.scaling_cmds : base64encode(cmd)]
55 |   samplings       = [for sc in local.scales : 8 / sc]
56 |   scaling_inputs  = [for i in range(length(local.scales)) : jsonencode({ "cmd" : local.scaling_encoded[i], "sampling" : local.samplings[i] })]
57 | }
58 | 
59 | resource "aws_cloudwatch_event_rule" "scaling_schedule" {
60 |   count               = length(local.scales)
61 |   name                = "${module.env.module_name}-scaling-sched-${local.scales[count.index]}-${module.env.stage}"
62 |   description         = "Start scaling benchmark with ${local.scales[count.index]} functions"
63 |   schedule_expression = "cron(${count.index * 10 + 4} * * * ? *)"
64 | }
65 | 
66 | resource "aws_cloudwatch_event_target" "scaling_schedule" {
67 |   count = length(local.scales)
68 |   rule  = aws_cloudwatch_event_rule.scaling_schedule[count.index].name
69 |   arn   = var.lambdacli_lambda_arn
70 |   input = local.scaling_inputs[count.index]
71 | }
72 | 
73 | resource "aws_lambda_permission" "allow_scaling" {
74 |   count         = length(local.scales)
75 |   action        = "lambda:InvokeFunction"
76 |   function_name = var.lambdacli_lambda_name
77 |   principal     = "events.amazonaws.com"
78 |   source_arn    = aws_cloudwatch_event_rule.scaling_schedule[count.index].arn
79 | }
80 | 


--------------------------------------------------------------------------------
/infra/runtime/scheduler/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "lambdacli" {
 6 |   config_path = "../lambdacli"
 7 | 
 8 |   mock_outputs = {
 9 |     lambda_name = "mock_name"
10 |     lambda_arn  = "arn:aws:lambda:us-west-1:123456789012:function:mock_name"
11 |   }
12 | }
13 | 
14 | dependency "bigquery" {
15 |   config_path = "../../monitoring/bigquery"
16 | }
17 | 
18 | locals {
19 |   region_name = get_env("L12N_AWS_REGION")
20 | }
21 | 
22 | inputs = {
23 |   region_name           = local.region_name
24 |   lambdacli_lambda_name = dependency.lambdacli.outputs.lambda_name
25 |   lambdacli_lambda_arn  = dependency.lambdacli.outputs.lambda_arn
26 | }
27 | 


--------------------------------------------------------------------------------
/infra/runtime/spark/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/spark/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "spark_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-spark-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "objectlevel",
27 |             "Effect": "Allow",
28 |             "Action": "s3:*",
29 |             "Resource": "${var.bucket_arn}/*"
30 |         },
31 |         {
32 |             "Sid": "bucketlevel",
33 |             "Effect": "Allow",
34 |             "Action": "s3:*",
35 |             "Resource": "${var.bucket_arn}"
36 |         }
37 |     ]
38 | }
39 | EOF
40 | }
41 | 
42 | module "engine" {
43 |   source = "../../common/lambda"
44 | 
45 |   function_base_name = "spark"
46 |   region_name        = var.region_name
47 |   docker_image       = var.spark_image
48 |   memory_size        = 2048
49 |   timeout            = 300
50 | 
51 |   additional_policies = [aws_iam_policy.s3_access.arn]
52 |   environment         = {}
53 | 
54 | }
55 | 
56 | output "lambda_name" {
57 |   value = module.engine.lambda_name
58 | }
59 | 


--------------------------------------------------------------------------------
/infra/runtime/spark/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "spark"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name = local.region_name
33 |   spark_image = "dummy_overriden_by_before_hook"
34 |   bucket_arn  = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/runtime/trino/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "4.45.0"
 6 |   constraints = "~> 4.45"
 7 |   hashes = [
 8 |     "h1:J/XjRsEJIpxi+mczXQfnH3nvfACv3LRDtrthQJCIibY=",
 9 |     "zh:22da03786f25658a000d1bcc28c780816a97e7e8a1f59fff6eee7d452830e95e",
10 |     "zh:2543be56eee0491eb0c79ca1c901dcbf71da26625961fe719f088263fef062f4",
11 |     "zh:31a1da1e3beedfd88c3c152ab505bdcf330427f26b75835885526f7bb75c4857",
12 |     "zh:4409afe50f225659d5f378fe9303a45052953a1219f7f1acc82b69d07528b7ba",
13 |     "zh:4dadec3b783f10d2f8eef3dab5e817baae9c932a7967d45fe3d77fcbcbdaa438",
14 |     "zh:55be80d6e24828dcb0db7a0226fb275415c1c0ad63dd2f33b76f3ac0cd64e6a6",
15 |     "zh:560bba29efb7dbe0bfcc937369d88817aa31a8d18aa25395b1afe2576cb04495",
16 |     "zh:6caacc202e83438ff63d5d96733e283f44e349668d96c6b1c5c7df463ebf85cc",
17 |     "zh:6cabab83a61d5b4ac801c5a5d57556a0e76ec8dc879d28cf777509db5f6a657e",
18 |     "zh:96c4528bf9c16edb8841b68479ec51c499ed7fa680462fa28caeab3fc168bb43",
19 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
20 |     "zh:cdc0b47ff840d708fbf75abfe86d23dc7f1dffdd233a771822a17b5c637f4769",
21 |     "zh:d9a9583e82776d1ebb6cf6c3d47acc2b302f8778f470ceffe7579dc794eb1feb",
22 |     "zh:e9367ca9f6f6418a23cdf8d01f29dd0c4f614e78499f52a767a422e4c334b915",
23 |     "zh:f6d355a2fb3bcebb597f68bbca4fa2aaa364efd29240236c582375e219d77656",
24 |   ]
25 | }
26 | 
27 | provider "registry.terraform.io/hashicorp/google" {
28 |   version     = "3.90.1"
29 |   constraints = "~> 3.0"
30 |   hashes = [
31 |     "h1:91QFfSGwMX4wKH5u+/FEMf2W3mToJxHtw/Ty0nvrDEU=",
32 |     "zh:07aabc8e46a5a2b29932e10677b23d4ce9d9a25f22ab61d3307a6b0e7998c84e",
33 |     "zh:0b63cd9534a98ed0fee794da495833046ad5319bd2da3102e21a941b7e2b857e",
34 |     "zh:17f815d57e1426edf8818323ab8e1022c8ec60dce0ced89a3b8e5dde5a95b3cc",
35 |     "zh:37855eae3542f2ebc6416984b124533d00299e0e01dcd7d2bc2205469cb9eceb",
36 |     "zh:579aa32a8e3fa317ddbd28c99a6449ae8864a5b7d10247bca6496f399cb36701",
37 |     "zh:703f71e0231cfe7a025c61db361d928189adba1d4fad2fe77f783dc73c8afe30",
38 |     "zh:afcd80c31cb1ed75ce6813269618e01ab29af68dae7aae1c51521c13acdaa678",
39 |     "zh:b21302f65a0d37045216912695d1ef718a1fe1732c30dc5654891fe2519b8e4e",
40 |     "zh:b69d0c8a74c2cd6233681db37e01aaaf1a6fb6bb24c83f7715bd2b456083e29d",
41 |     "zh:d4fb305816b143cb26c1827c79e56651347fd41809a57184e4807fb3f804f510",
42 |     "zh:fa24173ef9524bdfa1c5cada5188489554b08374f9519fe545f3fc1d3a9d9d4f",
43 |   ]
44 | }
45 | 


--------------------------------------------------------------------------------
/infra/runtime/trino/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region_name" {}
 2 | 
 3 | variable "trino_image" {}
 4 | 
 5 | variable "bucket_arn" {}
 6 | 
 7 | module "env" {
 8 |   source = "../../common/env"
 9 | }
10 | 
11 | provider "aws" {
12 |   region = var.region_name
13 |   default_tags {
14 |     tags = module.env.default_tags
15 |   }
16 | }
17 | 
18 | resource "aws_iam_policy" "s3_access" {
19 |   name = "${module.env.module_name}-trino-s3-access-${var.region_name}-${module.env.stage}"
20 | 
21 |   policy = <<EOF
22 | {
23 |     "Version": "2012-10-17",
24 |     "Statement": [
25 |         {
26 |             "Sid": "objectlevel",
27 |             "Effect": "Allow",
28 |             "Action": "s3:*",
29 |             "Resource": "${var.bucket_arn}/*"
30 |         },
31 |         {
32 |             "Sid": "bucketlevel",
33 |             "Effect": "Allow",
34 |             "Action": "s3:*",
35 |             "Resource": "${var.bucket_arn}"
36 |         }
37 |     ]
38 | }
39 | EOF
40 | }
41 | 
42 | module "engine" {
43 |   source = "../../common/lambda"
44 | 
45 |   function_base_name = "trino"
46 |   region_name        = var.region_name
47 |   docker_image       = var.trino_image
48 |   memory_size        = 2048
49 |   timeout            = 300
50 | 
51 |   additional_policies = [aws_iam_policy.s3_access.arn]
52 |   environment         = {}
53 | 
54 | }
55 | 
56 | output "lambda_name" {
57 |   value = module.engine.lambda_name
58 | }
59 | 


--------------------------------------------------------------------------------
/infra/runtime/trino/terragrunt.hcl:
--------------------------------------------------------------------------------
 1 | include "root" {
 2 |   path = find_in_parent_folders("terragrunt.${get_env("TF_STATE_BACKEND")}.hcl")
 3 | }
 4 | 
 5 | dependency "core" {
 6 |   config_path = "../core"
 7 | 
 8 |   mock_outputs = {
 9 |     bucket_arn = "arn:aws:s3:::mock"
10 |   }
11 | }
12 | 
13 | locals {
14 |   region_name = get_env("L12N_AWS_REGION")
15 | }
16 | 
17 | 
18 | terraform {
19 |   before_hook "deploy_images" {
20 |     commands = ["apply"]
21 |     execute  = ["../build_and_print.sh", "trino"]
22 |   }
23 | 
24 |   extra_arguments "image_vars" {
25 |     commands  = ["apply"]
26 |     arguments = ["-var-file=${get_terragrunt_dir()}/images.generated.tfvars"]
27 |   }
28 | 
29 | }
30 | 
31 | inputs = {
32 |   region_name = local.region_name
33 |   trino_image = "dummy_overriden_by_before_hook"
34 |   bucket_arn  = dependency.core.outputs.bucket_arn
35 | }
36 | 


--------------------------------------------------------------------------------
/infra/terragrunt.cloud.hcl:
--------------------------------------------------------------------------------
 1 | generate "backend" {
 2 |   path      = "backend.generated.tf"
 3 |   if_exists = "overwrite"
 4 |   contents  = <<EOC
 5 | terraform {
 6 |   cloud {
 7 |     organization = "${get_env("TF_ORGANIZATION")}"
 8 |     token        = "${get_env("TF_API_TOKEN")}"
 9 |     workspaces {
10 |       name = "${get_env("TF_WORKSPACE_PREFIX")}${replace(path_relative_to_include(), "/", "-")}"
11 |     }
12 |   }
13 | }
14 | EOC
15 | }
16 | 
17 | 
18 | locals {
19 |   common = read_terragrunt_config(find_in_parent_folders("common.hcl"))
20 | }
21 | 
22 | terraform {
23 |   extra_arguments "data_dir" {
24 |     commands = local.common.locals.extra_arguments.commands
25 |     env_vars = {
26 |       TF_DATA_DIR = "${local.common.locals.extra_arguments.data_dir}/${path_relative_to_include()}"
27 |     }
28 |   }
29 | }
30 | 
31 | generate = local.common.generate
32 | 


--------------------------------------------------------------------------------
/infra/terragrunt.local.hcl:
--------------------------------------------------------------------------------
 1 | remote_state {
 2 |   backend = "local"
 3 |   generate = {
 4 |     path      = "backend.generated.tf"
 5 |     if_exists = "overwrite"
 6 |   }
 7 |   config = {
 8 |     path = "${get_env("CALLING_DIR")}/.terraform/state/${path_relative_to_include()}/terraform.tfstate"
 9 |   }
10 | }
11 | 
12 | locals {
13 |   common = read_terragrunt_config(find_in_parent_folders("common.hcl"))
14 | }
15 | 
16 | terraform {
17 |   extra_arguments "data_dir" {
18 |     commands = local.common.locals.extra_arguments.commands
19 |     env_vars = {
20 |       TF_DATA_DIR = "${local.common.locals.extra_arguments.data_dir}/${path_relative_to_include()}"
21 |     }
22 |   }
23 | }
24 | 
25 | generate = local.common.generate
26 | 


--------------------------------------------------------------------------------
/l12n:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | python -u $REPO_DIR/cli/main.py "$@"
4 | 


--------------------------------------------------------------------------------