├── requirements-test.txt ├── cloud-init.yml ├── lambdas └── scale_out_runner │ ├── .chalice │ ├── config.json │ ├── deployed │ │ └── prod.json │ └── prod_iam.json │ ├── requirements.txt │ ├── .gitignore │ └── app.py ├── license-templates ├── LICENSE.txt └── LICENSE.rst ├── tests ├── conftest.py └── lambdas │ └── scale_out_runner │ ├── conftest.py │ └── test_app.py ├── arm-docker-ami └── packer │ ├── files │ ├── docker-permissions.sh │ └── install-dependencies.sh │ ├── vars │ └── variables.pkrvars.hcl │ └── linux-ami.pkr.hcl ├── README.rst ├── github-runner-ami └── packer │ ├── files │ ├── cloudwatch-metrics-github-runners │ ├── runner │ ├── install-subversion.sh │ ├── mounts_setup.sh │ ├── create-hostedtools-cache.sh │ ├── configure_kernel.sh │ ├── cleanup.sh │ ├── actions-runner-ec2-reporting.sh │ ├── source-list-additions.sh │ ├── rules.v4 │ ├── install-nodejs.sh │ ├── actions.runner-supervisor.service │ ├── install-github-cli.sh │ ├── regctl.sh │ ├── install-files.sh │ ├── actions.runner.service │ ├── stop-runner-if-no-job.sh │ ├── runner-cleanup-workdir.sh │ ├── timber.key │ ├── install-dependencies.sh │ ├── runner_bootstrap.sh │ ├── docker.sh │ ├── git.sh │ ├── vector.toml │ └── runner-supervisor.py │ ├── vars │ └── variables.pkrvars.hcl │ └── ubuntu2004.pkr.hcl ├── .flake8 ├── requirements.txt ├── pyproject.toml ├── .gitignore ├── .pre-commit-config.yaml ├── scripts ├── list_committers └── store-agent-creds.py └── LICENSE /requirements-test.txt: -------------------------------------------------------------------------------- 1 | pytest~=6.0 2 | moto 3 | -------------------------------------------------------------------------------- /cloud-init.yml: -------------------------------------------------------------------------------- 1 | #cloud-config 2 | # 2022-01-21 19:31 BMT 3 | 4 | mounts: 5 | - [tmpfs, /var/lib/docker, tmpfs, "defaults,noatime,size=85%"] 6 | - [tmpfs, /home/runner/actions-runner/_work, tmpfs, "defaults,noatime"] 7 | 8 | 9 | runcmd: 10 | - 11 | - bash 12 | - -c 13 | - | 14 | set -eu -o pipefail 15 | echo "AWS_DEFAULT_REGION=$(cloud-init query region)" >> /etc/environment 16 | echo "AWS_DEFAULT_REGION=$(cloud-init query region)" >> /etc/default/vector 17 | #aws logs create-log-stream --log-group-name GitHubRunners --log-stream-name $(hostname) || true 18 | #systemctl enable --now vector 19 | systemctl enable --now actions.runner 20 | sysctl -w net.netfilter.nf_conntrack_tcp_be_liberal=1 21 | -------------------------------------------------------------------------------- /lambdas/scale_out_runner/.chalice/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "app_name": "scale_out_runner", 4 | "stages": { 5 | "dev": { 6 | "api_gateway_stage": "api" 7 | }, 8 | "prod": { 9 | "api_gateway_stage": "api", 10 | "autogen_policy": false, 11 | "iam_policy_file": "prod_iam.json", 12 | "environment_variables": { 13 | "GH_WEBHOOK_TOKEN_ENCRYPTED": "AQICAHg1MGVq8MAqYXSkkgy6iL19KSI14nJw8DelmHFuRpAfvQEh/mniWicOD0N1aVuHhp+VAAAAfDB6BgkqhkiG9w0BBwagbTBrAgEAMGYGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMVqkAu70BaR2rDQ4qAgEQgDlIAxDrBlblL6XNKkJIs+zrNEMcGpS68JnoVh2s1oKEzbBXzYS16/9ZrW9CwUV4ULAd2EsGb90iz74=", 14 | "ASG_NAME": "RunnerASG", 15 | "ASG_REGION_NAME": "us-east-2" 16 | } 17 | } 18 | }, 19 | "automatic_layer": true 20 | } 21 | -------------------------------------------------------------------------------- /license-templates/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Licensed to the Apache Software Foundation (ASF) under one 2 | or more contributor license agreements. See the NOTICE file 3 | distributed with this work for additional information 4 | regarding copyright ownership. The ASF licenses this file 5 | to you under the Apache License, Version 2.0 (the 6 | "License"); you may not use this file except in compliance 7 | with the License. You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, 12 | software distributed under the License is distributed on an 13 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | KIND, either express or implied. See the License for the 15 | specific language governing permissions and limitations 16 | under the License. 17 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /license-templates/LICENSE.rst: -------------------------------------------------------------------------------- 1 | .. Licensed to the Apache Software Foundation (ASF) under one 2 | or more contributor license agreements. See the NOTICE file 3 | distributed with this work for additional information 4 | regarding copyright ownership. The ASF licenses this file 5 | to you under the Apache License, Version 2.0 (the 6 | "License"); you may not use this file except in compliance 7 | with the License. You may obtain a copy of the License at 8 | 9 | .. http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | .. Unless required by applicable law or agreed to in writing, 12 | software distributed under the License is distributed on an 13 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | KIND, either express or implied. See the License for the 15 | specific language governing permissions and limitations 16 | under the License. 17 | -------------------------------------------------------------------------------- /lambdas/scale_out_runner/requirements.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | boto3 19 | chalice 20 | -------------------------------------------------------------------------------- /lambdas/scale_out_runner/.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | .chalice/deployments/ 19 | .chalice/venv/ 20 | __pycache__/ 21 | -------------------------------------------------------------------------------- /arm-docker-ami/packer/files/docker-permissions.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | sudo usermod -a -G docker ec2-user 19 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. Licensed to the Apache Software Foundation (ASF) under one 2 | or more contributor license agreements. See the NOTICE file 3 | distributed with this work for additional information 4 | regarding copyright ownership. The ASF licenses this file 5 | to you under the Apache License, Version 2.0 (the 6 | "License"); you may not use this file except in compliance 7 | with the License. You may obtain a copy of the License at 8 | 9 | .. http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | .. Unless required by applicable law or agreed to in writing, 12 | software distributed under the License is distributed on an 13 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | KIND, either express or implied. See the License for the 15 | specific language governing permissions and limitations 16 | under the License. 17 | 18 | CI Infrastructure for Apache Airflow 19 | ==================================== 20 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/cloudwatch-metrics-github-runners: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | */1 * * * * nobody /usr/local/sbin/actions-runner-ec2-reporting.sh 19 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/runner: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | runner ALL=(ALL) NOPASSWD:/usr/sbin/swapoff -a, /usr/bin/rm -f /swapfile, /usr/bin/apt clean 19 | -------------------------------------------------------------------------------- /arm-docker-ami/packer/files/install-dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | 21 | yum update 22 | yum install docker --assumeyes 23 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/install-subversion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -uo pipefail 21 | 22 | apt install subversion -y 23 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | [flake8] 18 | max-line-length = 110 19 | ignore = E203,E231,E731,W504,I001,W503 20 | exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg 21 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/mounts_setup.sh: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | sudo mount -t tmpfs -o size=85% tmpfs /var/lib/docker 19 | sudo mount -t tmpfs -o tmpfs /home/runner/actions-runner/_work 20 | -------------------------------------------------------------------------------- /lambdas/scale_out_runner/.chalice/deployed/prod.json: -------------------------------------------------------------------------------- 1 | { 2 | "resources": [ 3 | { 4 | "name": "managed-layer", 5 | "resource_type": "lambda_layer", 6 | "layer_version_arn": "arn:aws:lambda:eu-central-1:827901512104:layer:scale_out_runner-prod-managed-layer:35" 7 | }, 8 | { 9 | "name": "api_handler_role", 10 | "resource_type": "iam_role", 11 | "role_arn": "arn:aws:iam::827901512104:role/scale_out_runner-prod-api_handler", 12 | "role_name": "scale_out_runner-prod-api_handler" 13 | }, 14 | { 15 | "name": "api_handler", 16 | "resource_type": "lambda_function", 17 | "lambda_arn": "arn:aws:lambda:eu-central-1:827901512104:function:scale_out_runner-prod" 18 | }, 19 | { 20 | "name": "rest_api", 21 | "resource_type": "rest_api", 22 | "rest_api_id": "2onm92olq7", 23 | "rest_api_url": "https://2onm92olq7.execute-api.eu-central-1.amazonaws.com/api/" 24 | } 25 | ], 26 | "schema_version": "2.0", 27 | "backend": "api" 28 | } 29 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | boto3 19 | click~=7.1 20 | chalice 21 | pygithub 22 | pytest~=6.0 23 | python-dynamodb-lock-whatnick~=0.9.3 24 | psutil 25 | rich-click 26 | requests 27 | tenacity~=6.0 28 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/create-hostedtools-cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -eu -o pipefail 20 | echo "Creating hosted tools cache:" 21 | install --owner runner --directory /opt/hostedtoolcache 22 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/configure_kernel.sh: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # Needed to run multiple kind instances 18 | cat >>/etc/sysctl.conf </dev/null; then 20 | # Only report metric when we're doing something -- no point paying to submit zeros 21 | aws cloudwatch put-metric-data --metric-name jobs-running --value "$(pgrep -c Runner.Worker)" --namespace github.actions 22 | fi 23 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | [tool.black] 18 | line-length = 110 19 | target-version = ['py36', 'py37', 'py38'] 20 | skip-string-normalization = true 21 | 22 | [tool.isort] 23 | line_length = 110 24 | combine_as_imports = true 25 | default_section = 'THIRDPARTY' 26 | # Need to be consistent with the exclude config defined in pre-commit-config.yaml 27 | skip = ['build','.tox','venv'] 28 | profile = 'black' 29 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/source-list-additions.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | apt-key adv --recv-key 0x1646B01B86E50310 "1646B01B86E50310" 21 | echo "deb https://dl.yarnpkg.com/debian/ stable main" > /etc/apt/sources.list.d/yarn.list 22 | apt-key add /tmp/timber.key 23 | echo "deb https://repositories.timber.io/public/vector/deb/ubuntu focal main" > /etc/apt/sources.list.d/timber.list 24 | -------------------------------------------------------------------------------- /github-runner-ami/packer/vars/variables.pkrvars.hcl: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | vpc_id = "vpc-d73487bd" 19 | ami_name = "airflow-runner-ami" 20 | aws_regions = ["eu-central-1", "us-east-2"] 21 | packer_role_arn = "arn:aws:iam::827901512104:role/packer-role" 22 | runner_version = "2.309.0-airflow11" 23 | session_manager_instance_profile_name = "packer_ssm_instance_profile" 24 | runner_labels = "airflow-runner,vm-runner" 25 | -------------------------------------------------------------------------------- /lambdas/scale_out_runner/.chalice/prod_iam.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "logs:CreateLogStream", 9 | "kms:Decrypt", 10 | "autoscaling:SetDesiredCapacity", 11 | "ssm:GetParameter", 12 | "logs:CreateLogGroup", 13 | "logs:PutLogEvents", 14 | "dynamodb:UpdateItem" 15 | ], 16 | "Resource": [ 17 | "arn:aws:ssm:*:827901512104:parameter/runners/*/configOverlay", 18 | "arn:aws:autoscaling:*:827901512104:autoScalingGroup:*:autoScalingGroupName/AshbRunnerASG", 19 | "arn:aws:kms:*:827901512104:key/48a58710-7ac6-4f88-995f-758a6a450faa", 20 | "arn:aws:dynamodb:*:827901512104:table/GithubRunnerQueue", 21 | "arn:*:logs:*:*:*" 22 | ] 23 | }, 24 | { 25 | "Sid": "VisualEditor1", 26 | "Effect": "Allow", 27 | "Action": [ 28 | "autoscaling:DescribeAutoScalingGroups" 29 | ], 30 | "Resource": "*" 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /tests/lambdas/scale_out_runner/conftest.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import os 19 | import sys 20 | 21 | import pytest 22 | from chalice.test import Client 23 | 24 | path = os.path.dirname(__file__) 25 | idx = path.rfind('/tests/') 26 | path = path[:idx] + path[idx + 6 :] 27 | sys.path.append(path) 28 | 29 | 30 | @pytest.fixture 31 | def client(request): 32 | app = getattr(request.module, "app") 33 | 34 | with Client(app) as client: 35 | yield client 36 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/rules.v4: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Generated by iptables-save v1.8.4 on Thu Jan 14 13:59:27 2021 19 | *filter 20 | :INPUT ACCEPT [833:75929] 21 | :FORWARD DROP [0:0] 22 | :OUTPUT ACCEPT [794:143141] 23 | :DOCKER-USER - [0:0] 24 | -A FORWARD -j DOCKER-USER 25 | # Dis-allow any docker container to access the metadata service 26 | -A DOCKER-USER -d 169.254.169.254/32 -j REJECT --reject-with icmp-port-unreachable 27 | -A DOCKER-USER -j RETURN 28 | COMMIT 29 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/install-nodejs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | # https://github.com/actions/virtual-environments/blob/525f79f479cca77aef4e0a680548b65534c64a18/images/linux/scripts/installers/nodejs.sh 20 | curl -sL https://raw.githubusercontent.com/mklement0/n-install/stable/bin/n-install | bash -s -- -ny - 21 | ~/n/bin/n lts 22 | npm install -g grunt gulp n parcel-bundler typescript newman 23 | npm install -g --save-dev webpack webpack-cli 24 | npm install -g npm 25 | rm -rf ~/n 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | __pycache__/ 3 | .mypy_cache 4 | 5 | # Created by https://www.toptal.com/developers/gitignore/api/terraform 6 | # Edit at https://www.toptal.com/developers/gitignore?templates=terraform 7 | 8 | ### Terraform ### 9 | # Local .terraform directories 10 | **/.terraform/* 11 | 12 | # .tfstate files 13 | *.tfstate 14 | *.tfstate.* 15 | 16 | # Crash log files 17 | crash.log 18 | 19 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 20 | # .tfvars files are managed as part of configuration and so should be included in 21 | # version control. 22 | # 23 | # example.tfvars 24 | 25 | # Ignore override files as they are usually used to override resources locally and so 26 | # are not checked in 27 | override.tf 28 | override.tf.json 29 | *_override.tf 30 | *_override.tf.json 31 | 32 | # Include override files you do wish to add to version control using negated pattern 33 | # !example_override.tf 34 | 35 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 36 | # example: *tfplan* 37 | 38 | # IDE Files 39 | .idea 40 | *.iml 41 | 42 | # Session manager binaries 43 | session-manager-plugin.* 44 | 45 | # End of https://www.toptal.com/developers/gitignore/api/terraform 46 | github-runner-ami/packer/session-manager-plugin.deb 47 | github-runner-ami/packer/session-manager-plugin.rpm 48 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/actions.runner-supervisor.service: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [Unit] 19 | Description=Fetch credentials and supervise GitHub Actions Runner 20 | After=network.target 21 | Before=actions.runner.service 22 | 23 | [Service] 24 | Type=notify 25 | ExecStart=/opt/runner-supervisor/bin/python /opt/runner-supervisor/bin/runner-supervisor 26 | # We need to run as root to have the ability to open the netlink connector socket 27 | User=root 28 | WorkingDirectory=/home/runner/actions-runner 29 | Restart=always 30 | EnvironmentFile=/etc/environment 31 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/install-github-cli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -uo pipefail 21 | 22 | # https://github.com/actions/virtual-environments/blob/f2fdcef0e020770b1b6cc58bda3b4a01f0286f5e/images/linux/scripts/installers/github-cli.sh 23 | url=$(curl -s https://api.github.com/repos/cli/cli/releases/latest | jq -r '.assets[].browser_download_url|select(contains("linux") and contains("amd64") and contains(".deb"))') 24 | 25 | cd /tmp 26 | curl -fsL --remote-name "$url" 27 | apt install /tmp/gh_*_linux_amd64.deb 28 | rm /tmp/gh_*_linux_amd64.deb 29 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/regctl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | set -exu -o pipefail 19 | 20 | architecture=$(uname -m) 21 | if [[ ${architecture} == "x86_64" ]] ; then 22 | # Well. Docker compose got it right, but regctl didn't ¯\_(ツ)_/¯ 23 | architecture="amd64" 24 | fi 25 | # Hard-code regctl version 26 | regctl_version="v0.4.3" 27 | regctl_binary="regctl-$(uname -s)-${architecture}" 28 | curl -L "https://github.com/regclient/regclient/releases/download/${regctl_version}/${regctl_binary}" -o "/usr/local/bin/regctl" 29 | chmod a+x "/usr/local/bin/regctl" 30 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/install-files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -eu -o pipefail 20 | 21 | mkdir /etc/iptables/ /etc/vector 22 | 23 | install --owner root --mode=0644 --target-directory "/etc/systemd/system/" "/tmp/etc-systemd-system/"* 24 | install --owner root --mode=0755 --target-directory "/usr/local/sbin" "/tmp/usr-local-sbin/"* 25 | install --owner root --mode=0755 --target-directory "/usr/local/bin" "/tmp/usr-local-bin/"* 26 | install --owner root --mode=0644 --target-directory "/etc/iptables" "/tmp/etc-iptables/"* 27 | install --owner root --mode=0644 --target-directory "/etc/cron.d" "/tmp/etc-cron.d/"* 28 | install --owner root --mode=0644 --target-directory "/etc/sudoers.d" "/tmp/etc-sudoers.d/"* 29 | install --owner root --mode=0644 --target-directory "/etc/vector/" "/tmp/etc-vector/"* 30 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/actions.runner.service: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [Unit] 19 | Description=GitHub Actions Runner 20 | After=network.target actions.runner-supervisor.service 21 | Requires=actions.runner-supervisor.service 22 | BindsTo=actions.runner-supervisor.service 23 | 24 | [Service] 25 | ExecStartPre=!/usr/local/sbin/runner-cleanup-workdir.sh 26 | ExecStart=/home/runner/actions-runner/run.sh --once --startuptype service --labels $RUNNER_LABELS 27 | ExecStop=/usr/local/bin/stop-runner-if-no-job.sh $MAINPID 28 | EnvironmentFile=/etc/environment 29 | Environment=GITHUB_ACTIONS_RUNNER_CHANNEL_TIMEOUT=300 30 | Environment=RUNNER_LABELS=airflow-runner,vm-runner 31 | User=runner 32 | WorkingDirectory=/home/runner/actions-runner 33 | KillMode=mixed 34 | KillSignal=SIGTERM 35 | TimeoutStopSec=30min 36 | Restart=on-success 37 | 38 | [Install] 39 | WantedBy=multi-user.target 40 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/stop-runner-if-no-job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -u 20 | 21 | MAINPID="${MAINPID:-${1:-}}" 22 | 23 | if [[ -z "$MAINPID" ]]; then 24 | echo "No MAINPID, assuming it already crashed!" 25 | exit 0 26 | fi 27 | 28 | if pgrep --ns $MAINPID -a Runner.Worker > /dev/null; then 29 | echo "Waiting for current job to finish" 30 | while pgrep --ns $MAINPID -a Runner.Worker; do 31 | # Job running -- just wait for it to exit 32 | sleep 10 33 | done 34 | 35 | else 36 | # If there were _no_ Workers running, ask the main process to stop. If there 37 | # were Workers running, then Runner.Listener would stop automatically because 38 | # of the `--once` 39 | pkill --ns $MAINPID Runner.Listener || true 40 | fi 41 | 42 | # Wait for it to shut down 43 | echo "Waiting for main Runner.Listener $MAINPID process to stop" 44 | while pgrep --ns $MAINPID -a Runner.Listener; do 45 | sleep 5 46 | done 47 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/runner-cleanup-workdir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -eu -o pipefail 20 | echo "Left-over containers:" 21 | docker ps -a 22 | docker ps -qa | xargs --verbose --no-run-if-empty docker rm -fv 23 | 24 | echo "Log in to a paid docker user to get unlimited docker pulls" 25 | aws ssm get-parameter --with-decryption --name /runners/apache/airflow/dockerPassword | \ 26 | jq .Parameter.Value -r | \ 27 | sudo -u runner docker login --username airflowcirunners --password-stdin 28 | 29 | if [[ -d ~runner/actions-runner/_work/airflow/airflow ]]; then 30 | cd ~runner/actions-runner/_work/airflow/airflow 31 | 32 | chown --changes -R runner: . 33 | if [[ -e .git ]]; then 34 | sudo -u runner bash -c " 35 | git reset --hard && \ 36 | git submodule deinit --all -f && \ 37 | git submodule foreach git clean -fxd && \ 38 | git clean -fxd \ 39 | " 40 | fi 41 | fi 42 | 43 | # Remove left over mssql data dirs 44 | find . -maxdepth 1 -name 'tmp-mssql-volume-*' -type d -printf 'Deleting %f\n' -exec sudo rm -r {} + 45 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/timber.key: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | -----BEGIN PGP PUBLIC KEY BLOCK----- 19 | Version: GnuPG v2 20 | 21 | mQENBF9gFZ0BCADETtIHM8y5ehMoyNiZcriK+tHXyKnbZCKtMCKcC4ll94/6pekQ 22 | jKIPWg8OXojkCtwua/TsddtQmOhUxAUtv6K0jO8r6sJ8rezMhuNH8J8rMqWgzv9d 23 | 2+U7Z7GFgcP0OeD+KigtnR8uyp50suBmEDC8YytmmbESmG261Y38vZME0VvQ+CMy 24 | Yi/FvKXBXugaiCtaz0a5jVE86qSZbKbuaTHGiLn05xjTqc4FfyP4fi4oT2r6GGyL 25 | Bn5ob84OjXLQwfbZIIrNFR10BvL2SRLL0kKKVlMBBADodtkdwaTt0pGuyEJ+gVBz 26 | 629PZBtSrwVRU399jGSfsxoiLca9//c7OJzHABEBAAG0OkNsb3Vkc21pdGggUGFj 27 | a2FnZSAodGltYmVyL3ZlY3RvcikgPHN1cHBvcnRAY2xvdWRzbWl0aC5pbz6JATcE 28 | EwEIACEFAl9gFZ0CGy8FCwkIBwMFFQoJCAsFFgIDAQACHgECF4AACgkQNUPbLQor 29 | xLhf6gf8DyfIpKjvEeW/O8lRUTpkiPKezJbb+udZboCXJKDD02Q9PE3hfEfQRr5X 30 | muytL7YMPvzqBVuP3xV5CN3zvtiQQbZiDhstImVyd+t24pQTkjzkvy+A2yvUuIkE 31 | RWxuey41f5FNj/7wdfJnHoU9uJ/lvsb7DLXw7FBMZFNBR6LED/d+b61zMzVvmFZA 32 | gsrCGwr/jfySwnpShmKdJaMTHQx0qt2RfXwNm2V6i900tAuMUWnmUIz5/9vENPKm 33 | 0+31I43a/QgmIrKEePhwn2jfA1oRlYzdv+PbblSTfjTStem+GqQkj9bZsAuqVH8g 34 | 3vq0NvX0k2CLi/W9mTiSdHXFChI15A== 35 | =k36w 36 | -----END PGP PUBLIC KEY BLOCK----- 37 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/install-dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | 21 | debconf-set-selections <> /etc/environment 27 | 28 | useradd --create-home runner -G docker 29 | 30 | install --owner runner --directory ~runner/actions-runner 31 | 32 | cd ~runner/actions-runner 33 | curl -L "https://github.com/ashb/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz" | tar -zx 34 | 35 | python3 -mvenv /opt/runner-supervisor 36 | /opt/runner-supervisor/bin/pip install -U pip python-dynamodb-lock-whatnick==0.9.3 click==7.1.2 psutil 'tenacity~=6.0' 37 | 38 | install --owner root --mode 0755 /tmp/runner-supervisor /opt/runner-supervisor/bin/runner-supervisor 39 | 40 | systemctl enable iptables.service 41 | systemctl enable vector.service 42 | 43 | # We don't enable actions.runner.service here, but instead in the user-data 44 | # script, as otherwise it would happen to early, before we have had a chance to 45 | # drop the AWS_DEFAULT_REGION in to /etc/environment 46 | -------------------------------------------------------------------------------- /tests/lambdas/scale_out_runner/test_app.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import json 19 | 20 | import pytest 21 | from app import app # noqa 22 | 23 | 24 | @pytest.fixture(autouse=True) 25 | def no_requests(monkeypatch): 26 | monkeypatch.setenv("GH_WEBHOOK_TOKEN", "abc") 27 | 28 | 29 | def test_no_auth(client): 30 | response = client.http.post('/', body=json.dumps({'hello': 'world'})) 31 | assert response.status_code == 400 32 | 33 | 34 | @pytest.mark.parametrize( 35 | "sig", 36 | [ 37 | "md5=", 38 | # Valid, but not prefixed 39 | "160156e060356c9444613b224fc5613a0a25315b7898fd5d8c7656bd8a6654af", 40 | ], 41 | ) 42 | def test_bad_auth(sig, client): 43 | response = client.http.post( 44 | '/', 45 | headers={ 46 | 'X-Hub-Signature-256': sig, 47 | }, 48 | body=json.dumps({'hello': 'world'}), 49 | ) 50 | assert response.status_code == 400 51 | 52 | 53 | def test_auth(client): 54 | response = client.http.post( 55 | '/', 56 | headers={ 57 | 'X-Hub-Signature-256': 'sha256=160156e060356c9444613b224fc5613a0a25315b7898fd5d8c7656bd8a6654af' 58 | }, 59 | body=json.dumps({'hello': 'world'}), 60 | ) 61 | assert response.status_code == 200 62 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -exu -o pipefail 21 | 22 | for pkg in docker.io docker-doc docker-compose podman-docker containerd runc; do sudo apt-get remove $pkg --assume-yes || true; done 23 | 24 | sudo apt-get update 25 | sudo apt-get install ca-certificates curl gnupg 26 | 27 | sudo install -m 0755 -d /etc/apt/keyrings 28 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg 29 | sudo chmod a+r /etc/apt/keyrings/docker.gpg 30 | 31 | echo \ 32 | "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ 33 | "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ 34 | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 35 | 36 | sudo apt-get update 37 | 38 | # Same version as MIN_VERSIONS in Breeze 39 | DOCKER_VERSION_STRING="5:24.0.7-1~ubuntu.20.04~focal" 40 | DOCKER_COMPOSE_VERSION_STRING="2.20.2-1~ubuntu.20.04~focal" 41 | DOCKER_BUILDX_VERSION_STRING="0.11.2-1~ubuntu.20.04~focal" 42 | sudo apt-get install \ 43 | "docker-ce=${DOCKER_VERSION_STRING}" \ 44 | "docker-ce-cli=${DOCKER_VERSION_STRING}" \ 45 | containerd.io \ 46 | "docker-buildx-plugin=${DOCKER_BUILDX_VERSION_STRING}" \ 47 | "docker-compose-plugin=${DOCKER_COMPOSE_VERSION_STRING}" --assume-yes --allow-downgrades 48 | -------------------------------------------------------------------------------- /arm-docker-ami/packer/linux-ami.pkr.hcl: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | variable "vpc_id" { 19 | type = string 20 | } 21 | variable "ami_name" { 22 | type = string 23 | } 24 | variable "aws_regions" { 25 | type = list(string) 26 | } 27 | variable "packer_role_arn" { 28 | type = string 29 | } 30 | variable "session_manager_instance_profile_name" { 31 | type = string 32 | } 33 | 34 | source "amazon-ebs" "docker-runner" { 35 | region = var.aws_regions[0] 36 | ami_name = "${var.ami_name}-v2" 37 | ami_regions = var.aws_regions 38 | tag { 39 | key = "Name" 40 | value = "arm-docker-ami" 41 | } 42 | snapshot_tag { 43 | key = "Name" 44 | value = "arm-docker-ami-root" 45 | } 46 | encrypt_boot = false 47 | instance_type = "m6g.large" 48 | communicator = "ssh" 49 | ssh_username = "ec2-user" 50 | ssh_interface = "session_manager" 51 | iam_instance_profile = var.session_manager_instance_profile_name 52 | subnet_filter { 53 | # Just pick a random subnet in the VPC -- we only have the three defaults so this is fine! 54 | random = true 55 | } 56 | vpc_id = var.vpc_id 57 | source_ami_filter { 58 | filters = { 59 | virtualization-type = "hvm" 60 | architecture= "arm64", 61 | name = "amzn2-ami-kernel-5.10-hvm-*" 62 | root-device-type = "ebs" 63 | } 64 | owners = ["amazon"] 65 | most_recent = true 66 | } 67 | } 68 | 69 | build { 70 | sources = [ 71 | "source.amazon-ebs.docker-runner" 72 | ] 73 | 74 | provisioner "shell" { 75 | inline = [ 76 | "echo Connected via SSM at '${build.User}@${build.Host}:${build.Port}'" 77 | ] 78 | } 79 | 80 | # Since we connect as a non-root user, we have to "stage" the files to a writable folder, which we then move 81 | # in to place with the approriate permissions via install-files.sh provisioner step 82 | provisioner "shell" { 83 | inline = [ 84 | "mkdir -p /tmp/etc-systemd-system /tmp/usr-local-sbin /tmp/usr-local-bin /tmp/etc-sudoers.d /tmp/etc-iptables /tmp/etc-cron.d" 85 | ] 86 | } 87 | provisioner "shell" { 88 | scripts = [ 89 | "./files/install-dependencies.sh", 90 | "./files/docker-permissions.sh", 91 | ] 92 | execute_command = "chmod +x '{{ .Path }}'; sudo sh -c '{{ .Vars }} {{ .Path }}'" 93 | environment_vars = [ 94 | ] 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | --- 18 | default_stages: [commit, push] 19 | default_language_version: 20 | # force all unspecified python hooks to run python3 21 | python: python3 22 | minimum_pre_commit_version: "1.20.0" 23 | repos: 24 | - repo: meta 25 | hooks: 26 | - id: identity 27 | - id: check-hooks-apply 28 | - repo: https://github.com/Lucas-C/pre-commit-hooks 29 | rev: v1.1.9 30 | hooks: 31 | - id: forbid-tabs 32 | - id: insert-license 33 | name: Add license 34 | exclude: ^\.github/.*$|^license-templates/|\.json$ 35 | args: 36 | - --comment-style 37 | - "|#|" 38 | - --license-filepath 39 | - license-templates/LICENSE.txt 40 | - --fuzzy-match-generates-todo 41 | - id: insert-license 42 | name: Add license for all rst files 43 | exclude: ^\.github/.*$ 44 | args: 45 | - --comment-style 46 | - "||" 47 | - --license-filepath 48 | - license-templates/LICENSE.rst 49 | - --fuzzy-match-generates-todo 50 | files: \.rst$ 51 | - repo: https://github.com/psf/black 52 | rev: 20.8b1 53 | hooks: 54 | - id: black 55 | args: [--config=./pyproject.toml] 56 | - repo: https://github.com/pre-commit/pre-commit-hooks 57 | rev: v3.4.0 58 | hooks: 59 | - id: check-merge-conflict 60 | - id: debug-statements 61 | - id: check-builtin-literals 62 | - id: detect-private-key 63 | - id: end-of-file-fixer 64 | - id: mixed-line-ending 65 | - id: trailing-whitespace 66 | - id: fix-encoding-pragma 67 | args: 68 | - --remove 69 | - repo: https://github.com/asottile/pyupgrade 70 | rev: v2.7.4 71 | hooks: 72 | - id: pyupgrade 73 | args: ["--py36-plus"] 74 | - repo: https://github.com/pre-commit/pygrep-hooks 75 | rev: v1.7.0 76 | hooks: 77 | - id: rst-backticks 78 | - id: python-no-log-warn 79 | - repo: https://github.com/timothycrosley/isort 80 | rev: 5.7.0 81 | hooks: 82 | - id: isort 83 | name: Run isort to sort imports 84 | files: \.py$ 85 | # To keep consistent with the global isort skip config defined in setup.cfg 86 | exclude: ^build/.*$|^.tox/.*$|^venv/.*$ 87 | - repo: https://gitlab.com/pycqa/flake8 88 | rev: 3.8.4 89 | hooks: 90 | - id: flake8 91 | name: Run flake8 92 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/git.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | GIT_REPO="ppa:git-core/ppa" 20 | GIT_LFS_REPO="https://packagecloud.io/install/repositories/github/git-lfs" 21 | 22 | function download_with_retries() { 23 | # Due to restrictions of bash functions, positional arguments are used here. 24 | # In case if you using latest argument NAME, you should also set value to all previous parameters. 25 | # Example: download_with_retries $ANDROID_SDK_URL "." "android_sdk.zip" 26 | local URL="$1" 27 | local DEST="${2:-.}" 28 | local NAME="${3:-${URL##*/}}" 29 | local COMPRESSED="$4" 30 | 31 | if [[ $COMPRESSED == "compressed" ]]; then 32 | local COMMAND="curl $URL -4 -sL --compressed -o '$DEST/$NAME' -w '%{http_code}'" 33 | else 34 | local COMMAND="curl $URL -4 -sL -o '$DEST/$NAME' -w '%{http_code}'" 35 | fi 36 | 37 | echo "Downloading '$URL' to '${DEST}/${NAME}'..." 38 | retries=20 39 | interval=30 40 | while [ $retries -gt 0 ]; do 41 | ((retries--)) 42 | # Temporary disable exit on error to retry on non-zero exit code 43 | set +e 44 | http_code=$(eval $COMMAND) 45 | exit_code=$? 46 | if [ $http_code -eq 200 ] && [ $exit_code -eq 0 ]; then 47 | echo "Download completed" 48 | return 0 49 | else 50 | echo "Error — Either HTTP response code for '$URL' is wrong - '$http_code' or exit code is not 0 - '$exit_code'. Waiting $interval seconds before the next attempt, $retries attempts left" 51 | sleep $interval 52 | fi 53 | # Enable exit on error back 54 | set -e 55 | done 56 | 57 | echo "Could not download $URL" 58 | return 1 59 | } 60 | 61 | ## Install git 62 | add-apt-repository $GIT_REPO -y 63 | apt-get update 64 | apt-get install --upgrade git -y 65 | git --version 66 | 67 | # Install git-lfs 68 | curl -s $GIT_LFS_REPO/script.deb.sh | bash 69 | apt-get install -y git-lfs=2.13.3 70 | 71 | # Install git-ftp 72 | apt-get install git-ftp -y 73 | 74 | # Remove source repo's 75 | add-apt-repository --remove $GIT_REPO 76 | rm /etc/apt/sources.list.d/github_git-lfs.list 77 | 78 | #Install hub 79 | tmp_hub="/tmp/hub" 80 | mkdir -p "$tmp_hub" 81 | url=$(curl --location -s https://api.github.com/repos/github/hub/releases/latest | jq -r '.assets[].browser_download_url | select(contains("hub-linux-amd64"))') 82 | download_with_retries "$url" "$tmp_hub" 83 | tar xzf "$tmp_hub"/hub-linux-amd64-*.tgz --strip-components 1 -C "$tmp_hub" 84 | mv "$tmp_hub"/bin/hub /usr/local/bin 85 | 86 | # Add well-known SSH host keys to known_hosts 87 | ssh-keyscan -t rsa github.com >> /etc/ssh/ssh_known_hosts 88 | ssh-keyscan -t rsa ssh.dev.azure.com >> /etc/ssh/ssh_known_hosts 89 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/vector.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | data_dir = "/var/lib/vector" 19 | 20 | [api] 21 | enabled = true 22 | 23 | # Input data. Change me to a valid input source. 24 | [sources.logs] 25 | type = "journald" 26 | include_units = ["actions.runner.service", "actions.runner-supervisor.service"] 27 | 28 | [transforms.without_systemd_fields] 29 | type = "remove_fields" 30 | inputs = ["logs"] 31 | fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP", 32 | "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID", 33 | "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT", 34 | "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "SYSLOG_TIMESTAMP", 35 | "PRIORITY", "_EXE", "source_type"] 36 | 37 | [sources.runner-logs] 38 | type = "file" 39 | include = ["/home/runner/actions-runner/_diag/*.log"] 40 | 41 | [sources.runner-logs.multiline] 42 | start_pattern = '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}' 43 | mode = "halt_before" 44 | condition_pattern = '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}' 45 | timeout_ms = 250 46 | 47 | [transforms.grok-runner-logs] 48 | type = "remap" 49 | inputs=["runner-logs"] 50 | source = ''' 51 | structured, err = parse_grok(.message, "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}") 52 | 53 | if err != null { 54 | .err = err 55 | } else { 56 | . = merge(., structured) 57 | } 58 | ''' 59 | [transforms.filter-runner-logs] 60 | type = "filter" 61 | inputs = ['grok-runner-logs'] 62 | condition = ''' 63 | if .logger == "JobServerQueue" { 64 | !match!(.message, r'Try to append \d+ batches web console lines for record') 65 | } else if .logger == "HostContext" { 66 | !starts_with!(.message, "Well known directory") 67 | } else if .logger == "JobDispatcher" { 68 | !starts_with!(.message, "Successfully renew job request") 69 | } 70 | } else { 71 | true 72 | } 73 | ''' 74 | 75 | [sources.job-logs] 76 | type = "file" 77 | include = ["/home/runner/actions-runner/_diag/pages/*.log"] 78 | 79 | [transforms.grok-job-logs] 80 | type = "remap" 81 | inputs = ["job-logs"] 82 | drop_on_abort = true 83 | source = ''' 84 | structured, err = parse_grok(.message, "%{TIMESTAMP_ISO8601:timestamp} %{GREEDYDATA:message}") 85 | 86 | if err == null { 87 | . = merge(., structured) 88 | .type = "job-output" 89 | 90 | if length(.message) == 0 ?? true { 91 | abort # Don't send empty messages to cloud watch! 92 | } 93 | } 94 | ''' 95 | 96 | # Output data 97 | [sinks.cloudwatch] 98 | inputs = ["without_systemd_fields", "filter-runner-logs", "grok-job-logs"] 99 | type = "aws_cloudwatch_logs" 100 | encoding = "json" 101 | create_missing_group = false 102 | create_missing_stream = true 103 | group_name = "GitHubRunners" 104 | stream_name = "{{ host }}" 105 | region = "${AWS_DEFAULT_REGION}" 106 | -------------------------------------------------------------------------------- /scripts/list_committers: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | import rich_click as click 21 | from github import Github 22 | 23 | from rich.console import Console 24 | console = Console(color_system="standard", width=200) 25 | 26 | @click.command(short_help='List committer logins - used to sync list of committers in CI configuration') 27 | @click.option('--github-token', envvar='GITHUB_TOKEN', 28 | help="You can generate the token with readOrg permissions: " 29 | "https://github.com/settings/tokens/new?description=Read%20Org&scopes=read:org") 30 | def main(github_token): 31 | gh = Github(github_token) 32 | org = gh.get_organization('apache') 33 | committers = org.get_team_by_slug('airflow-committers') 34 | committer_usernames = sorted(f'"{c.login}"' for c in committers.get_members()) 35 | 36 | click.echo("Take the below list and:") 37 | click.echo(" - update the `/runners/apache/airflow/configOverlay` parameter in AWS SSM ParameterStore") 38 | click.echo(" - restart the self-hosted runners") 39 | click.echo( 40 | " - Inform the new committer, that it's time to open PR to update list of committers in dev/breeze/src/airflow_breeze/global_constants.py (COMMITTERS variable)" 41 | ) 42 | click.echo(',\n'.join(committer_usernames)) 43 | click.echo(""" 44 | 45 | Add the following explanation: 46 | 47 | As a new committer, adding yourself to committer list gives you automated 48 | acccess to our self-hosted runners for your PRs. Once you merge the changes, 49 | all your PRs by default will run using our self-hosted infrastructure, that is 50 | sponsored currently by AWS and Astronomer and involves running the builds on 51 | AWS spot instances that are started on-demand. 52 | 53 | Those instances are way bigger than the Public Runners available for all ASF 54 | projects. They have 8 cores and 64 GB memory and all the builds happen 55 | in-memory, and our builds are optimized to use parallelism and memory so that 56 | often what you will get is 3-4 times speed-up vs the Public instances. 57 | 58 | We currently have up to 35 such instances running at a time, so there is a 59 | possibility - if there are many committers pushing their changes in a short 60 | time that they will get into a queue so it's not always faster to get the 61 | feedback. Currently ASF has plenty (900) public runners, so the delays we 62 | experienced in the past for Public Runners is a distant pass so they should run 63 | relatively quickly as well, but this might change depending on other projects, 64 | because the runners are shared between all ASF projects. 65 | 66 | At any of your PRs you can apply the label "use public runners" before 67 | triggering the build and your build will switch to using Public Runners. 68 | 69 | Most PRs are "selective" - they will only run a subset of tests - the subeset 70 | is calculated by "Selective checks" job (you can open the job output and find 71 | out in details how the decisions are made - we have a simple rule engine that 72 | determines the set of tests to run. You can apply "full tests needed" label to 73 | PR before it starts - this way a complete set of tests will be executed - 74 | including all Python and DB versions. 75 | 76 | As a committer, you can also apply the labels to other's PRs and retrigger the 77 | build if you are in doubt if the selective checks decision was good-enough 78 | (looking at the content of the PR). 79 | 80 | We also have "canary" builds that run a complete set of tests in "main". They 81 | are run every night and after merging PRs to main. Usually 3-5 canary builds 82 | complete during the day when there are a number of PRs merged - we cancel 83 | running build when new PR gets merged before canary build succeeds and we start 84 | a new one (saves a lot of build time). The canary builds are designed to signal 85 | that some problems passed through the net and need to be fixed in order to turn 86 | "main" into "green" so watch out for those - we sometimes revert culprit PRs or 87 | simply implement fix PRs to do so. If you merged a PR and "canary" build failed, 88 | you should receieve notification about it (but it could be one of the previous 89 | commits that is the root cause so you need to verify that when diagnosing it 90 | by looking at past builds and other PRs. 91 | 92 | """) 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /github-runner-ami/packer/ubuntu2004.pkr.hcl: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | variable "vpc_id" { 19 | type = string 20 | } 21 | variable "ami_name" { 22 | type = string 23 | } 24 | variable "aws_regions" { 25 | type = list(string) 26 | } 27 | variable "packer_role_arn" { 28 | type = string 29 | } 30 | variable "runner_version" { 31 | type = string 32 | } 33 | variable "runner_labels" { 34 | type = string 35 | } 36 | variable "session_manager_instance_profile_name" { 37 | type = string 38 | } 39 | 40 | source "amazon-ebs" "runner_builder" { 41 | #access_key = "" 42 | #secret_key = "" 43 | region = var.aws_regions[0] 44 | ami_name = "${var.ami_name}-${var.runner_version}-v20" 45 | ami_regions = var.aws_regions 46 | tag { 47 | key = "Name" 48 | value = "github-runner-ami" 49 | } 50 | snapshot_tag { 51 | key = "Name" 52 | value = "github-runner-ami-root" 53 | } 54 | encrypt_boot = false 55 | instance_type = "t3.micro" 56 | communicator = "ssh" 57 | ssh_username = "ubuntu" 58 | ssh_interface = "session_manager" 59 | iam_instance_profile = var.session_manager_instance_profile_name 60 | subnet_filter { 61 | # Just pick a random subnet in the VPC -- we only have the three defaults so this is fine! 62 | random = true 63 | } 64 | vpc_id = var.vpc_id 65 | source_ami_filter { 66 | filters = { 67 | virtualization-type = "hvm" 68 | name = "ubuntu/images/*buntu-focal-20.04-amd64-server-*" 69 | root-device-type = "ebs" 70 | } 71 | owners = ["099720109477"] 72 | most_recent = true 73 | } 74 | launch_block_device_mappings { 75 | device_name = "/dev/sda1" 76 | volume_size = 16 77 | volume_type = "gp3" 78 | delete_on_termination = true 79 | } 80 | } 81 | 82 | build { 83 | sources = [ 84 | "source.amazon-ebs.runner_builder" 85 | ] 86 | 87 | provisioner "shell" { 88 | inline = [ 89 | "echo Connected via SSM at '${build.User}@${build.Host}:${build.Port}'" 90 | ] 91 | } 92 | 93 | # Since we connect as a non-root user, we have to "stage" the files to a writable folder, which we then move 94 | # in to place with the approriate permissions via install-files.sh provisioner step 95 | provisioner "shell" { 96 | inline = [ 97 | "mkdir -p /tmp/etc-systemd-system /tmp/usr-local-sbin /tmp/usr-local-bin /tmp/etc-sudoers.d /tmp/etc-iptables /tmp/etc-cron.d /tmp/etc-vector /tmp/etc-systemd-system-vector.service.d" 98 | ] 99 | } 100 | provisioner "file" { 101 | destination = "/tmp/etc-systemd-system/actions.runner.service" 102 | source = "./files/actions.runner.service" 103 | } 104 | provisioner "file" { 105 | destination = "/tmp/etc-systemd-system/actions.runner-supervisor.service" 106 | source = "./files/actions.runner-supervisor.service" 107 | } 108 | provisioner "file" { 109 | destination = "/tmp/usr-local-sbin/runner-cleanup-workdir.sh" 110 | source = "./files/runner-cleanup-workdir.sh" 111 | } 112 | provisioner "file" { 113 | destination = "/tmp/usr-local-bin/stop-runner-if-no-job.sh" 114 | source = "./files/stop-runner-if-no-job.sh" 115 | } 116 | provisioner "file" { 117 | destination = "/tmp/etc-sudoers.d/runner" 118 | source = "./files/runner" 119 | } 120 | provisioner "file" { 121 | destination = "/tmp/etc-iptables/rules.v4" 122 | source = "./files/rules.v4" 123 | } 124 | provisioner "file" { 125 | destination = "/tmp/usr-local-sbin/actions-runner-ec2-reporting.sh" 126 | source = "./files/actions-runner-ec2-reporting.sh" 127 | } 128 | provisioner "file" { 129 | destination = "/tmp/etc-cron.d/cloudwatch-metrics-github-runners" 130 | source = "./files/cloudwatch-metrics-github-runners" 131 | } 132 | provisioner "file" { 133 | destination = "/tmp/timber.key" 134 | source = "./files/timber.key" 135 | } 136 | provisioner "file" { 137 | destination = "/tmp/runner-supervisor" 138 | source = "./files/runner-supervisor.py" 139 | } 140 | provisioner "file" { 141 | destination = "/tmp/etc-vector/vector.toml" 142 | source = "./files/vector.toml" 143 | } 144 | provisioner "shell" { 145 | scripts = [ 146 | "./files/install-files.sh", 147 | "./files/source-list-additions.sh", 148 | "./files/install-dependencies.sh", 149 | "./files/install-nodejs.sh", 150 | "./files/install-github-cli.sh", 151 | "./files/install-subversion.sh", 152 | "./files/docker.sh", 153 | "./files/configure_kernel.sh", 154 | "./files/git.sh", 155 | "./files/runner_bootstrap.sh", 156 | "./files/create-hostedtools-cache.sh", 157 | "./files/regctl.sh", 158 | "./files/cleanup.sh", 159 | ] 160 | execute_command = "chmod +x '{{ .Path }}'; sudo sh -c '{{ .Vars }} {{ .Path }}'" 161 | environment_vars = [ 162 | "RUNNER_VERSION=${var.runner_version}", 163 | "RUNNER_LABELS=${var.runner_labels}", 164 | ] 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /scripts/store-agent-creds.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import json 19 | import os 20 | import platform 21 | import subprocess 22 | import tempfile 23 | from typing import Optional, Tuple 24 | 25 | import boto3 26 | import click 27 | import requests 28 | from botocore.exceptions import NoCredentialsError 29 | 30 | 31 | @click.command() 32 | @click.option( 33 | "--runner-version", 34 | default="2.275.1", 35 | help="Runner version to register with", 36 | metavar="VER", 37 | ) 38 | @click.option("--repo", default="apache/airflow") 39 | @click.option("--store-as", default="apache/airflow") 40 | @click.option("--runnergroup") 41 | @click.option("--token", help="GitHub runner registration token", required=False) 42 | @click.option("--index", type=int, required=False) 43 | def main( 44 | token, runner_version, store_as: Optional[str], repo, runnergroup: Optional[str], index: Optional[int] 45 | ): 46 | check_aws_config() 47 | dir = make_runner_dir(runner_version) 48 | 49 | if not token: 50 | token = click.prompt("GitHub runner registration token") 51 | 52 | if store_as is None: 53 | store_as = repo 54 | 55 | if index is None: 56 | index = get_next_index(store_as) 57 | click.echo(f"Registering as runner {index}") 58 | 59 | register_runner(dir.name, token, repo, runnergroup, store_as, index) 60 | 61 | 62 | def check_aws_config(): 63 | click.echo("Checking AWS account credentials") 64 | try: 65 | whoami = boto3.client("sts").get_caller_identity() 66 | except NoCredentialsError: 67 | click.echo("No AWS credentials found -- maybe you need to set AWS_PROFILE?", err=True) 68 | exit(1) 69 | 70 | if whoami["Account"] != "827901512104": 71 | click.echo("Wrong AWS account in use -- maybe you need to set AWS_PROFILE?", err=True) 72 | exit(1) 73 | 74 | 75 | def make_runner_dir(version): 76 | """Extract the runner tar to a temporary directory""" 77 | dir = tempfile.TemporaryDirectory() 78 | 79 | tar = _get_runner_tar(version) 80 | 81 | subprocess.check_call( 82 | ["tar", "-xzf", tar], 83 | cwd=dir.name, 84 | ) 85 | 86 | return dir 87 | 88 | 89 | def get_next_index(repo: str) -> int: 90 | """Find the next available index to store the runner credentials in AWS SSM ParameterStore""" 91 | paginator = boto3.client("ssm").get_paginator("describe_parameters") 92 | 93 | path = os.path.join('/runners/', repo, '') 94 | 95 | pages = paginator.paginate(ParameterFilters=[{"Key": "Path", "Option": "Recursive", "Values": [path]}]) 96 | 97 | seen = set() 98 | 99 | for page in pages: 100 | for param in page['Parameters']: 101 | name = param['Name'] 102 | 103 | # '/runners/1/config' -> '1' 104 | index = os.path.basename(os.path.dirname(name)) 105 | seen.add(int(index)) 106 | 107 | if not seen: 108 | return 1 109 | 110 | # Fill in any gaps too. 111 | for n in range(1, max(seen) + 2): 112 | if n not in seen: 113 | return n 114 | 115 | 116 | def register_runner(dir: str, token: str, repo: str, runnergroup: Optional[str], store_as: str, index: int): 117 | os.chdir(dir) 118 | 119 | cmd = [ 120 | "./config.sh", 121 | "--unattended", 122 | "--url", 123 | f"https://github.com/{repo}", 124 | "--token", 125 | token, 126 | "--name", 127 | f"Airflow Runner {index}", 128 | ] 129 | 130 | if runnergroup: 131 | cmd += ['--runnergroup', runnergroup] 132 | 133 | res = subprocess.call(cmd) 134 | 135 | if res != 0: 136 | exit(res) 137 | _put_runner_creds(store_as, index) 138 | 139 | 140 | def _put_runner_creds(repo: str, index: int): 141 | client = boto3.client("ssm") 142 | 143 | with open(".runner", encoding='utf-8-sig') as fh: 144 | # We want to adjust the config before storing it! 145 | config = json.load(fh) 146 | config["pullRequestSecurity"] = {} 147 | 148 | client.put_parameter( 149 | Name=f"/runners/{repo}/{index}/config", 150 | Type="String", 151 | Value=json.dumps(config, indent=2), 152 | ) 153 | 154 | with open(".credentials", encoding='utf-8-sig') as fh: 155 | client.put_parameter(Name=f"/runners/{repo}/{index}/credentials", Type="String", Value=fh.read()) 156 | 157 | with open(".credentials_rsaparams", encoding='utf-8-sig') as fh: 158 | client.put_parameter(Name=f"/runners/{repo}/{index}/rsaparams", Type="SecureString", Value=fh.read()) 159 | 160 | 161 | def _get_system_arch() -> Tuple[str, str]: 162 | uname = platform.uname() 163 | if uname.system == "Linux": 164 | system = "linux" 165 | elif uname.system == "Darwin": 166 | system = "osx" 167 | else: 168 | raise RuntimeError("Un-supported platform") 169 | 170 | if uname.machine == "x86_64": 171 | arch = "x64" 172 | else: 173 | raise RuntimeError("Un-supported architecture") 174 | 175 | return system, arch 176 | 177 | 178 | def _get_runner_tar(version) -> str: 179 | system, arch = _get_system_arch() 180 | 181 | cache = os.path.abspath(".cache") 182 | 183 | try: 184 | os.mkdir(cache) 185 | except FileExistsError: 186 | pass 187 | 188 | fname = f"actions-runner-{system}-{arch}-{version}.tar.gz" 189 | local_file = os.path.join(cache, fname) 190 | 191 | if os.path.exists(local_file): 192 | return local_file 193 | 194 | url = f"https://github.com/actions/runner/releases/download/v{version}/{fname}" 195 | click.echo(f"Getting {url}") 196 | resp = requests.get(url, stream=True) 197 | resp.raise_for_status() 198 | with open(local_file, "wb") as fh, click.progressbar(length=int(resp.headers["content-length"])) as bar: 199 | for chunk in resp.iter_content(chunk_size=40960): 200 | fh.write(chunk) 201 | bar.update(len(chunk)) 202 | return local_file 203 | 204 | 205 | if __name__ == "__main__": 206 | main() 207 | -------------------------------------------------------------------------------- /lambdas/scale_out_runner/app.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import codecs 19 | import hmac 20 | import json 21 | import logging 22 | import os 23 | from typing import cast 24 | 25 | import boto3 26 | from chalice import BadRequestError, Chalice, ForbiddenError 27 | from chalice.app import Request 28 | 29 | app = Chalice(app_name='scale_out_runner') 30 | app.log.setLevel(logging.INFO) 31 | 32 | ASG_GROUP_NAME = os.getenv('ASG_NAME', 'AshbRunnerASG') 33 | ASG_REGION_NAME = os.getenv('ASG_REGION_NAME', None) 34 | TABLE_NAME = os.getenv('COUNTER_TABLE', 'GithubRunnerQueue') 35 | _commiters = set() 36 | GH_WEBHOOK_TOKEN = None 37 | 38 | REPOS = os.getenv('REPOS') 39 | if REPOS: 40 | REPO_CONFIGURATION = json.loads(REPOS) 41 | else: 42 | REPO_CONFIGURATION = { 43 | # : [list-of-branches-to-use-self-hosted-on] 44 | 'apache/airflow': {'main', 'master'}, 45 | } 46 | del REPOS 47 | 48 | 49 | @app.route('/', methods=['POST']) 50 | def index(): 51 | validate_gh_sig(app.current_request) 52 | 53 | if app.current_request.headers.get('X-GitHub-Event', None) != "check_run": 54 | # Ignore things about installs/permissions etc 55 | return {'ignored': 'not about check_runs'} 56 | 57 | body = app.current_request.json_body 58 | 59 | repo = body['repository']['full_name'] 60 | 61 | sender = body['sender']['login'] 62 | 63 | # Other repos configured with this app, but we don't do anything with them 64 | # yet. 65 | if repo not in REPO_CONFIGURATION: 66 | app.log.info("Ignoring event for %r", repo) 67 | return {'ignored': 'Other repo'} 68 | 69 | interested_branches = REPO_CONFIGURATION[repo] 70 | 71 | branch = body['check_run']['check_suite']['head_branch'] 72 | 73 | use_self_hosted = sender in commiters() or branch in interested_branches 74 | payload = {'sender': sender, 'use_self_hosted': use_self_hosted} 75 | 76 | if body['action'] == 'completed' and body['check_run']['conclusion'] == 'cancelled': 77 | if use_self_hosted: 78 | # The only time we get a "cancelled" job is when it wasn't yet running. 79 | queue_length = increment_dynamodb_counter(-1) 80 | # Don't scale in the ASG -- let the CloudWatch alarm do that. 81 | payload['new_queue'] = queue_length 82 | else: 83 | payload = {'ignored': 'unknown sender'} 84 | 85 | elif body['action'] != 'created': 86 | payload = {'ignored': "action is not 'created'"} 87 | 88 | elif body['check_run']['status'] != 'queued': 89 | # Skipped runs are "created", but are instantly completed. Ignore anything that is not queued 90 | payload = {'ignored': "check_run.status is not 'queued'"} 91 | else: 92 | if use_self_hosted: 93 | # Increment counter in DynamoDB 94 | queue_length = increment_dynamodb_counter() 95 | payload.update(**scale_asg_if_needed(queue_length)) 96 | app.log.info( 97 | "delivery=%s branch=%s: %r", 98 | app.current_request.headers.get('X-GitHub-Delivery', None), 99 | branch, 100 | payload, 101 | ) 102 | return payload 103 | 104 | 105 | def commiters(ssm_repo_name: str = os.getenv('SSM_REPO_NAME', 'apache/airflow')): 106 | global _commiters 107 | 108 | if not _commiters: 109 | client = boto3.client('ssm') 110 | param_path = os.path.join('/runners/', ssm_repo_name, 'configOverlay') 111 | app.log.info("Loading config overlay from %s", param_path) 112 | 113 | try: 114 | 115 | resp = client.get_parameter(Name=param_path, WithDecryption=True) 116 | except client.exceptions.ParameterNotFound: 117 | app.log.debug("Failed to load config overlay", exc_info=True) 118 | return set() 119 | 120 | try: 121 | overlay = json.loads(resp['Parameter']['Value']) 122 | except ValueError: 123 | app.log.debug("Failed to parse config overlay", exc_info=True) 124 | return set() 125 | 126 | _commiters = set(overlay['pullRequestSecurity']['allowedAuthors']) 127 | 128 | return _commiters 129 | 130 | 131 | def validate_gh_sig(request: Request): 132 | sig = request.headers.get('X-Hub-Signature-256', None) 133 | if not sig or not sig.startswith('sha256='): 134 | raise BadRequestError('X-Hub-Signature-256 not of expected format') 135 | 136 | sig = sig[len('sha256=') :] 137 | calculated_sig = sign_request_body(request) 138 | 139 | app.log.debug('Checksum verification - expected %s got %s', calculated_sig, sig) 140 | 141 | if not hmac.compare_digest(sig, calculated_sig): 142 | raise ForbiddenError('Spoofed request') 143 | 144 | 145 | def sign_request_body(request: Request) -> str: 146 | global GH_WEBHOOK_TOKEN 147 | if GH_WEBHOOK_TOKEN is None: 148 | if 'GH_WEBHOOK_TOKEN' in os.environ: 149 | # Local dev support: 150 | GH_WEBHOOK_TOKEN = os.environ['GH_WEBHOOK_TOKEN'].encode('utf-8') 151 | else: 152 | encrypted = os.environb[b'GH_WEBHOOK_TOKEN_ENCRYPTED'] 153 | 154 | kms = boto3.client('kms') 155 | response = kms.decrypt(CiphertextBlob=codecs.decode(encrypted, 'base64')) 156 | GH_WEBHOOK_TOKEN = response['Plaintext'] 157 | body = cast(bytes, request.raw_body) 158 | return hmac.new(GH_WEBHOOK_TOKEN, body, digestmod='SHA256').hexdigest() # type: ignore 159 | 160 | 161 | def increment_dynamodb_counter(delta: int = 1) -> int: 162 | dynamodb = boto3.client('dynamodb') 163 | args = dict( 164 | TableName=TABLE_NAME, 165 | Key={'id': {'S': 'queued_jobs'}}, 166 | ExpressionAttributeValues={':delta': {'N': str(delta)}}, 167 | UpdateExpression='ADD queued :delta', 168 | ReturnValues='UPDATED_NEW', 169 | ) 170 | 171 | if delta < 0: 172 | # Make sure it never goes below zero! 173 | args['ExpressionAttributeValues'][':limit'] = {'N': str(-delta)} 174 | args['ConditionExpression'] = 'queued >= :limit' 175 | 176 | resp = dynamodb.update_item(**args) 177 | return int(resp['Attributes']['queued']['N']) 178 | 179 | 180 | def scale_asg_if_needed(num_queued_jobs: int) -> dict: 181 | asg = boto3.client('autoscaling', region_name=ASG_REGION_NAME) 182 | 183 | resp = asg.describe_auto_scaling_groups( 184 | AutoScalingGroupNames=[ASG_GROUP_NAME], 185 | ) 186 | 187 | asg_info = resp['AutoScalingGroups'][0] 188 | 189 | current = asg_info['DesiredCapacity'] 190 | max_size = asg_info['MaxSize'] 191 | 192 | busy = 0 193 | for instance in asg_info['Instances']: 194 | if instance['LifecycleState'] == 'InService' and instance['ProtectedFromScaleIn']: 195 | busy += 1 196 | app.log.info("Busy instances: %d, num_queued_jobs: %d, current_size: %d", busy, num_queued_jobs, current) 197 | 198 | new_size = num_queued_jobs + busy 199 | if new_size > current: 200 | if new_size <= max_size or current < max_size: 201 | try: 202 | new_size = min(new_size, max_size) 203 | asg.set_desired_capacity(AutoScalingGroupName=ASG_GROUP_NAME, DesiredCapacity=new_size) 204 | return {'new_capcity': new_size} 205 | except asg.exceptions.ScalingActivityInProgressFault as e: 206 | return {'error': str(e)} 207 | else: 208 | return {'capacity_at_max': True} 209 | else: 210 | return {'idle_instances': True} 211 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | ============================================================================ 204 | APACHE AIRFLOW SUBCOMPONENTS: 205 | 206 | The Apache Airflow project contains subcomponents with separate copyright 207 | notices and license terms. Your use of the source code for the these 208 | subcomponents is subject to the terms and conditions of the following 209 | licenses. 210 | 211 | 212 | ======================================================================== 213 | Third party Apache 2.0 licenses 214 | ======================================================================== 215 | 216 | The following components are provided under the Apache 2.0 License. 217 | See project link for details. The text of each license is also included 218 | at licenses/LICENSE-[project].txt. 219 | 220 | (ALv2 License) hue v4.3.0 (https://github.com/cloudera/hue/) 221 | (ALv2 License) jqclock v2.3.0 (https://github.com/JohnRDOrazio/jQuery-Clock-Plugin) 222 | (ALv2 License) bootstrap3-typeahead v4.0.2 (https://github.com/bassjobsen/Bootstrap-3-Typeahead) 223 | 224 | ======================================================================== 225 | MIT licenses 226 | ======================================================================== 227 | 228 | The following components are provided under the MIT License. See project link for details. 229 | The text of each license is also included at licenses/LICENSE-[project].txt. 230 | 231 | (MIT License) jquery v3.4.1 (https://jquery.org/license/) 232 | (MIT License) dagre-d3 v0.6.4 (https://github.com/cpettitt/dagre-d3) 233 | (MIT License) bootstrap v3.2 (https://github.com/twbs/bootstrap/) 234 | (MIT License) d3-tip v0.9.1 (https://github.com/Caged/d3-tip) 235 | (MIT License) dataTables v1.10.20 (https://datatables.net) 236 | (MIT License) Bootstrap Toggle v2.2.2 (http://www.bootstraptoggle.com) 237 | (MIT License) normalize.css v3.0.2 (http://necolas.github.io/normalize.css/) 238 | (MIT License) ElasticMock v1.3.2 (https://github.com/vrcmarcos/elasticmock) 239 | (MIT License) MomentJS v2.24.0 (http://momentjs.com/) 240 | (MIT License) moment-strftime v0.5.0 (https://github.com/benjaminoakes/moment-strftime) 241 | (MIT License) python-slugify v4.0.0 (https://github.com/un33k/python-slugify) 242 | (MIT License) python-nvd3 v0.15.0 (https://github.com/areski/python-nvd3) 243 | (MIT License) rich v9.2.0 (https://github.com/willmcgugan/rich) 244 | (MIT License) eonasdan-bootstrap-datetimepicker v4.17.37 (https://github.com/eonasdan/bootstrap-datetimepicker/) 245 | 246 | ======================================================================== 247 | BSD 3-Clause licenses 248 | ======================================================================== 249 | The following components are provided under the BSD 3-Clause license. See project links for details. 250 | The text of each license is also included at licenses/LICENSE-[project].txt. 251 | 252 | (BSD 3 License) d3 v5.15.0 (https://d3js.org) 253 | -------------------------------------------------------------------------------- /github-runner-ami/packer/files/runner-supervisor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | """ 19 | Obtain credentials for Actions runner and co-operate with auto scaling group 20 | 21 | The purpose of this script is to enable the self-hosted runners to operate in 22 | an AutoScaling environment (without needing admin permissions on the GitHub 23 | repo to create and delete runners.) 24 | 25 | The order of operations is: 26 | 27 | 1. Obtain credentials 28 | 29 | We have pre-created a number of credentials and stored them in Amazon KMS. 30 | This script makes use of dynamodb to obtain an exclusive lock on a set of 31 | credentials. 32 | 33 | We need the "locking" as if you use credentials that are already 34 | in use the new runner process will wait (but never error) until they are not 35 | in use. 36 | 37 | 2. Complete the ASG lifecycle action so the instance is marked as InService 38 | 39 | This might not be strictly necessary, we don't want the instance to be "in 40 | service" until the runner has started. 41 | 42 | 3. Emit metric saying whether instance is running a job or not 43 | 44 | This is used to drive the scale-in CloudWatch alarm 45 | 46 | 4. Monitor for the runner starting jobs, and protecting the instance from Scale-In when it is 47 | 48 | Since we are running in an autoscaling group we can't dictate which instance 49 | AWS choses to terminate, so we instead have to set scale-in protection when a job is running. 50 | 51 | The way we watch for jobs being executed is using the Netlink Process 52 | Connector, which is a datagram socket that a (root) process can open to the 53 | kernel, to receive push events for whenever a process starts or stops. 54 | 55 | There are more events than that send, and to limit it to the only ones we 56 | care about we use a BPF filter to drop everything else. 57 | 58 | Since it is a datagram socket it is possible we might miss a notification, so 59 | we also periodically check if the process is still alive 60 | 61 | 5. Watch for ASG instance state changing to Terminating:Wait 62 | 63 | When the ASG wants to terminate the instance, we have it configured to put 64 | the instance in to a "requested" state -- this is to avoid a race condition 65 | where the instance _isn't_ running a job (so isn't protected from scale in), 66 | gets set to Terminating, but before AWS shuts down the machine the runner 67 | process picks up and starts a Job, which leads to the job failing with "The 68 | self-hosted runner: Airflow Runner $N lost communication with the server". 69 | 70 | When we notice being in this state, we _gracefully_ shut down the runner 71 | (letting it complete any job it might have), stop it from restarting, and 72 | then allow the termination lifecycle to continue 73 | 74 | """ 75 | import ctypes 76 | import datetime 77 | import enum 78 | import errno 79 | import json 80 | import logging 81 | import os 82 | import random 83 | import selectors 84 | import shutil 85 | import signal 86 | import socket 87 | from subprocess import check_call 88 | from typing import Callable, List, Tuple, Union 89 | 90 | import boto3 91 | import click 92 | import psutil 93 | from python_dynamodb_lock.python_dynamodb_lock import DynamoDBLockClient, DynamoDBLockError 94 | from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential 95 | 96 | logging.basicConfig(level=logging.INFO) 97 | log = logging.getLogger(__name__) 98 | log.setLevel(logging.DEBUG) 99 | 100 | logging.getLogger('python_dynamodb_lock').setLevel(logging.WARNING) 101 | 102 | 103 | TABLE_NAME = os.getenv('COUNTER_TABLE', 'GithubRunnerQueue') 104 | 105 | 106 | @click.command() 107 | @click.option('--repo', default='apache/airflow') 108 | @click.option('--user', default='runner') 109 | @click.option( 110 | '--output-folder', 111 | help="Folder to write credentials to. Default of ~runner/actions-runner", 112 | default='~runner/actions-runner', 113 | ) 114 | def main(repo, output_folder, user): 115 | global INSTANCE_ID 116 | # Notify the ASG LifeCycle hook that we are now In Service and ready to 117 | # process requests/safe to be shut down 118 | 119 | # Fetch current instance ID from where cloutinit writes it to 120 | if not INSTANCE_ID: 121 | with open('/var/lib/cloud/data/instance-id') as fh: 122 | INSTANCE_ID = fh.readline().strip() 123 | 124 | log.info("Starting on %s...", INSTANCE_ID) 125 | 126 | output_folder = os.path.expanduser(output_folder) 127 | 128 | short_time = datetime.timedelta(microseconds=1) 129 | 130 | dynamodb = boto3.resource('dynamodb') 131 | client = DynamoDBLockClient( 132 | dynamodb, 133 | table_name='GitHubRunnerLocks', 134 | expiry_period=datetime.timedelta(0, 300), 135 | heartbeat_period=datetime.timedelta(seconds=10), 136 | ) 137 | 138 | # Just keep trying until we get some credentials. 139 | while True: 140 | # Have each runner try to get a credential in a random order. 141 | possibles = get_possible_credentials(repo) 142 | random.shuffle(possibles) 143 | 144 | log.info("Trying to get a set of credentials in this order: %r", possibles) 145 | 146 | notify = get_sd_notify_func() 147 | 148 | for index in possibles: 149 | try: 150 | lock = client.acquire_lock( 151 | f'{repo}/{index}', 152 | retry_period=short_time, 153 | retry_timeout=short_time, 154 | raise_context_exception=True, 155 | ) 156 | except DynamoDBLockError as e: 157 | log.info("Could not lock %s (%s)", index, e) 158 | continue 159 | 160 | with lock: 161 | log.info("Obtained lock on %s", index) 162 | write_credentials_to_files(repo, index, output_folder, user) 163 | merge_in_settings(repo, output_folder) 164 | notify(f"STATUS=Obtained lock on {index}") 165 | 166 | if get_lifecycle_state() == "Pending:Wait": 167 | complete_asg_lifecycle_hook() 168 | 169 | notify("READY=1") 170 | log.info("Watching for Runner.Worker processes") 171 | ProcessWatcher().run() 172 | 173 | client.close() 174 | 175 | exit() 176 | 177 | 178 | def get_sd_notify_func() -> Callable[[str], None]: 179 | # http://www.freedesktop.org/software/systemd/man/sd_notify.html 180 | addr = os.getenv('NOTIFY_SOCKET') 181 | if not addr: 182 | return lambda status: None 183 | 184 | sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) 185 | if addr[0] == '@': 186 | addr = '\0' + addr[1:] 187 | sock.connect(addr) 188 | 189 | def notify(status: str): 190 | sock.sendall(status.encode('utf-8')) 191 | 192 | return notify 193 | 194 | 195 | def write_credentials_to_files( 196 | repo: str, index: str, out_folder: str = '~runner/actions-runner', user: str = 'runner' 197 | ): 198 | param_path = os.path.join('/runners/', repo, index) 199 | 200 | resp = boto3.client("ssm").get_parameters_by_path(Path=param_path, Recursive=False, WithDecryption=True) 201 | 202 | param_to_file = { 203 | 'config': '.runner', 204 | 'credentials': '.credentials', 205 | 'rsaparams': '.credentials_rsaparams', 206 | } 207 | 208 | for param in resp['Parameters']: 209 | # "/runners/apache/airflow/config" -> "config" 210 | name = os.path.basename(param['Name']) 211 | filename = param_to_file.get(name, None) 212 | if filename is None: 213 | log.info("Unknown Parameter from SSM: %r", param['Name']) 214 | continue 215 | log.info("Writing %r to %r", param['Name'], filename) 216 | with open(os.path.join(out_folder, filename), "w") as fh: 217 | fh.write(param['Value']) 218 | shutil.chown(fh.name, user) 219 | os.chmod(fh.name, 0o600) 220 | del param_to_file[name] 221 | if param_to_file: 222 | raise RuntimeError(f"Missing expected params: {list(param_to_file.keys())}") 223 | 224 | 225 | def merge_in_settings(repo: str, out_folder: str) -> None: 226 | client = boto3.client('ssm') 227 | 228 | param_path = os.path.join('/runners/', repo, 'configOverlay') 229 | log.info("Loading config overlay from %s", param_path) 230 | 231 | try: 232 | 233 | resp = client.get_parameter(Name=param_path, WithDecryption=True) 234 | except client.exceptions.ParameterNotFound: 235 | log.debug("Failed to load config overlay", exc_info=True) 236 | return 237 | 238 | try: 239 | overlay = json.loads(resp['Parameter']['Value']) 240 | except ValueError: 241 | log.debug("Failed to parse config overlay", exc_info=True) 242 | return 243 | 244 | with open(os.path.join(out_folder, ".runner"), "r+") as fh: 245 | settings = json.load(fh) 246 | 247 | for key, val in overlay.items(): 248 | settings[key] = val 249 | 250 | fh.seek(0, os.SEEK_SET) 251 | os.ftruncate(fh.fileno(), 0) 252 | json.dump(settings, fh, indent=2) 253 | 254 | 255 | def get_possible_credentials(repo: str) -> List[str]: 256 | client = boto3.client("ssm") 257 | paginator = client.get_paginator("describe_parameters") 258 | 259 | path = os.path.join('/runners/', repo, '') 260 | baked_path = os.path.join(path, 'runnersList') 261 | 262 | # Pre-compute the list, to avoid making lots of requests and getting throttled by SSM API in case of 263 | # thundering herd 264 | try: 265 | log.info("Using pre-computed credentials indexes from %s", baked_path) 266 | resp = client.get_parameter(Name=baked_path) 267 | return resp['Parameter']['Value'].split(',') 268 | except client.exceptions.ParameterNotFound: 269 | pass 270 | 271 | log.info("Looking at %s for possible credentials", path) 272 | 273 | pages = paginator.paginate( 274 | ParameterFilters=[{"Key": "Path", "Option": "Recursive", "Values": [path]}], 275 | PaginationConfig={ 276 | "PageSize": 50, 277 | }, 278 | ) 279 | 280 | seen = set() 281 | 282 | for i, page in enumerate(pages): 283 | log.info("Page %d", i) 284 | for param in page['Parameters']: 285 | name = param['Name'] 286 | log.info("%s", name) 287 | 288 | # '/runners/x/1/config' -> '1/config', 289 | # '/runners/x/y/1/config' -> 'y/1/config', 290 | local_name = name[len(path) :] 291 | 292 | try: 293 | # '1/config' -> '1' 294 | index, _ = local_name.split('/') 295 | except ValueError: 296 | # Ignore any 'x/y' when we asked for 'x'. There should only be an index and a filename 297 | log.debug("Ignoring nested path %s", name) 298 | continue 299 | 300 | try: 301 | # Check it's a number, but keep variable as string 302 | int(index) 303 | except ValueError: 304 | log.debug("Ignoring non-numeric index %s", name) 305 | continue 306 | 307 | index = os.path.basename(os.path.dirname(name)) 308 | seen.add(index) 309 | 310 | if not seen: 311 | raise RuntimeError(f'No credentials found in SSM ParameterStore for {repo!r}') 312 | 313 | try: 314 | resp = client.put_parameter( 315 | Name=baked_path, Type='StringList', Value=','.join(list(seen)), Overwrite=False 316 | ) 317 | log.info("Stored pre-computed credentials indexes at %s", baked_path) 318 | except client.exceptions.ParameterAlreadyExists: 319 | # Race, we lost, never mind! 320 | pass 321 | 322 | return list(seen) 323 | 324 | 325 | OWN_ASG = None 326 | INSTANCE_ID = None 327 | 328 | 329 | def get_lifecycle_state() -> str: 330 | global INSTANCE_ID, OWN_ASG 331 | 332 | if not INSTANCE_ID: 333 | with open('/var/lib/cloud/data/instance-id') as fh: 334 | INSTANCE_ID = fh.readline().strip() 335 | 336 | asg_client = boto3.client('autoscaling') 337 | 338 | try: 339 | instances = asg_client.describe_auto_scaling_instances( 340 | InstanceIds=[INSTANCE_ID], 341 | )['AutoScalingInstances'] 342 | except asg_client.exceptions.ClientError: 343 | return "UNKNOWN" 344 | 345 | if len(instances) != 1: 346 | return "UNKNOWN" 347 | 348 | details = instances[0] 349 | 350 | if not OWN_ASG: 351 | OWN_ASG = details['AutoScalingGroupName'] 352 | 353 | return details['LifecycleState'] 354 | 355 | 356 | def complete_asg_lifecycle_hook(hook_name='WaitForInstanceReportReady', retry=False): 357 | global OWN_ASG, INSTANCE_ID 358 | # Notify the ASG LifeCycle hook that we are now InService and ready to 359 | # process requests/safe to be shut down 360 | 361 | asg_client = boto3.client('autoscaling') 362 | 363 | try: 364 | asg_client.complete_lifecycle_action( 365 | AutoScalingGroupName=OWN_ASG, 366 | InstanceId=INSTANCE_ID, 367 | LifecycleHookName=hook_name, 368 | LifecycleActionResult='CONTINUE', 369 | ) 370 | log.info("LifeCycle hook %s set to CONTINUE instance=%s", hook_name, INSTANCE_ID) 371 | except asg_client.exceptions.ClientError as e: 372 | # If the script fails for whatever reason and we re-run, the lifecycle hook may have already be 373 | # completed, so this would fail. That is not an error 374 | 375 | # We don't want the stacktrace here, just the message 376 | log.warning("Failed to complete lifecycle hook %s: %s", hook_name, str(e)) 377 | pass 378 | 379 | 380 | # Constants and types from 381 | # https://github.com/torvalds/linux/blob/fcadab740480e0e0e9fa9bd272acd409884d431a/include/uapi/linux/cn_proc.h 382 | class NlMsgFlag(enum.IntEnum): 383 | NoOp = 1 384 | Error = 2 385 | Done = 3 386 | Overrun = 4 387 | 388 | 389 | class NLMsgHdr(ctypes.Structure): 390 | """Netlink Message Header""" 391 | 392 | _fields_ = [ 393 | ("len", ctypes.c_uint32), 394 | ("type", ctypes.c_uint16), 395 | ("flags", ctypes.c_uint16), 396 | ("seq", ctypes.c_uint32), 397 | ("pid", ctypes.c_uint32), 398 | ] 399 | 400 | 401 | class ProcConnectorOp(enum.IntEnum): 402 | MCAST_LISTEN = 1 403 | MCAST_IGNORE = 2 404 | 405 | 406 | class cn_msg(ctypes.Structure): 407 | """Linux kernel Connector message""" 408 | 409 | CN_IDX_PROC = 1 410 | CN_VAL_PROC = 1 411 | 412 | _fields_ = [ 413 | ("cb_id_idx", ctypes.c_uint32), 414 | ("cb_id_val", ctypes.c_uint32), 415 | ("seq", ctypes.c_uint32), 416 | ("ack", ctypes.c_uint32), 417 | ("len", ctypes.c_uint16), 418 | ("flags", ctypes.c_uint16), 419 | ] 420 | 421 | def __init__(self, header, data, **kwargs): 422 | super().__init__(**kwargs) 423 | self.header = header 424 | self.len = ctypes.sizeof(data) 425 | self.data = data 426 | self.header.len = ctypes.sizeof(header) + ctypes.sizeof(self) + self.len 427 | 428 | def to_bytes(self): 429 | return bytes(self.header) + bytes(self) + bytes(self.data) # type: ignore 430 | 431 | 432 | class ProcEventWhat(enum.IntFlag): 433 | NONE = 0x0 434 | FORK = 0x1 435 | EXEC = 0x2 436 | UID = 0x4 437 | GID = 0x40 438 | SID = 0x80 439 | PTRACE = 0x0000010 440 | COMM = 0x0000020 441 | COREDUMP = 0x40000000 442 | EXIT = 0x80000000 443 | 444 | 445 | class proc_event(ctypes.Structure): 446 | """Base proc_event field""" 447 | 448 | _fields_ = [ 449 | ("what", ctypes.c_uint32), 450 | ("cpu", ctypes.c_uint32), 451 | ("timestamp", ctypes.c_uint64), # Number of nano seconds since system boot 452 | ] 453 | 454 | @classmethod 455 | def from_netlink_packet( 456 | cls, data 457 | ) -> Tuple["proc_event", Union[None, "exec_proc_event", "exit_proc_event"]]: 458 | """ 459 | Parse the netlink packet in to a 460 | """ 461 | # Netlink message header (struct nlmsghdr) 462 | header = NLMsgHdr.from_buffer_copy(data) 463 | data = data[ctypes.sizeof(header) :] 464 | 465 | # We already checked/filtered on header.type == NlMsgFlag.Done 466 | 467 | # Connector message header (struct cn_msg) 468 | connector_msg = cn_msg.from_buffer_copy(data) 469 | 470 | # Ignore messages from other Netlink connector types: done via BPF 471 | 472 | data = data[ctypes.sizeof(connector_msg) :] 473 | 474 | event = proc_event.from_buffer_copy(data) 475 | data = data[ctypes.sizeof(event) :] 476 | event.what = ProcEventWhat(event.what) 477 | 478 | if event.what == ProcEventWhat.EXEC: 479 | return event, exec_proc_event.from_buffer_copy(data) 480 | elif event.what == ProcEventWhat.EXIT: 481 | return event, exit_proc_event.from_buffer_copy(data) 482 | return event, None 483 | 484 | 485 | class exec_proc_event(ctypes.Structure): 486 | _fields_ = [ 487 | ("pid", ctypes.c_int32), 488 | ("tid", ctypes.c_int32), 489 | ] 490 | 491 | 492 | class exit_proc_event(ctypes.Structure): 493 | _fields_ = [ 494 | ("pid", ctypes.c_int32), 495 | ("tid", ctypes.c_int32), 496 | ("exit_code", ctypes.c_int32), 497 | ("signal", ctypes.c_int32), 498 | ] 499 | 500 | 501 | class ProcessWatcher: 502 | interesting_processes = {} 503 | 504 | protected = None 505 | in_termating_lifecycle = False 506 | 507 | def run(self): 508 | # Create a signal pipe that we can poll on 509 | sig_read, sig_write = socket.socketpair() 510 | 511 | sel = selectors.DefaultSelector() 512 | 513 | def sig_handler(signal, frame): 514 | # no-op 515 | ... 516 | 517 | sig_read.setblocking(False) 518 | sig_write.setblocking(False) 519 | sel.register(sig_read, selectors.EVENT_READ, None) 520 | 521 | proc_socket = self.open_proc_connector_socket() 522 | proc_socket.setblocking(False) 523 | 524 | signal.signal(signal.SIGINT, sig_handler) 525 | signal.signal(signal.SIGALRM, sig_handler) 526 | signal.setitimer(signal.ITIMER_REAL, 30, 30.0) 527 | signal.set_wakeup_fd(sig_write.fileno(), warn_on_full_buffer=False) 528 | 529 | sel.register(proc_socket, selectors.EVENT_READ, self.handle_proc_event) 530 | 531 | self.pgrep() 532 | 533 | try: 534 | while True: 535 | for key, mask in sel.select(): 536 | 537 | if key.fileobj == sig_read: 538 | sig = signal.Signals(key.fileobj.recv(1)[0]) # type: ignore 539 | if sig == signal.SIGALRM: 540 | self.check_still_alive() 541 | continue 542 | else: 543 | log.info(f"Got {sig.name}, exiting") 544 | return 545 | callback = key.data 546 | callback(key.fileobj, mask) 547 | finally: 548 | # Disable the timers for any cleanup code to run 549 | signal.setitimer(signal.ITIMER_REAL, 0) 550 | signal.set_wakeup_fd(-1) 551 | 552 | def pgrep(self): 553 | """Check for any interesting processes we might have missed.""" 554 | listener_found = False 555 | 556 | for proc in psutil.process_iter(['name', 'cmdline']): 557 | try: 558 | if proc.name() == "Runner.Worker" and proc.pid not in self.interesting_processes: 559 | log.info( 560 | "Found existing interesting processes, protecting from scale in %d: %s", 561 | proc.pid, 562 | proc.cmdline(), 563 | ) 564 | self.interesting_processes[proc.pid] = proc 565 | self.protect_from_scale_in(protect=True) 566 | self.dynamodb_atomic_decrement() 567 | if proc.name() == "Runner.Listener": 568 | listener_found = True 569 | except psutil.NoSuchProcess: 570 | # Process went away before we could 571 | pass 572 | 573 | if not listener_found: 574 | if self.in_termating_lifecycle: 575 | log.info("Runner.Listener process not found - OkayToTerminate instance") 576 | complete_asg_lifecycle_hook('OkayToTerminate') 577 | else: 578 | # Unprotect ourselves if somehow the runner is no longer working 579 | self.protect_from_scale_in(protect=False) 580 | 581 | def check_still_alive(self): 582 | # Check ASG status 583 | if not self.in_termating_lifecycle: 584 | state = get_lifecycle_state() 585 | if state == 'Terminating:Wait': 586 | self.in_termating_lifecycle = True 587 | self.gracefully_terminate_runner() 588 | elif state == 'Pending:Wait': 589 | complete_asg_lifecycle_hook() 590 | 591 | # proc_connector is un-reliable (UDP) so periodically check if the processes are still alive 592 | if not self.interesting_processes: 593 | self.pgrep() 594 | return 595 | 596 | # list() is used to prevent "Dict changed size during iteration" during loop below 597 | pids = list(self.interesting_processes.keys()) 598 | log.info("Checking processes %r are still alive", pids) 599 | 600 | for pid in pids: 601 | proc = self.interesting_processes[pid] 602 | if not proc.is_running() or proc.status() == psutil.STATUS_ZOMBIE: 603 | log.info("Proc %d dead but we didn't notice!", pid) 604 | del self.interesting_processes[pid] 605 | 606 | if not self.interesting_processes: 607 | log.info("No interesting processes left, unprotecting from scale in") 608 | self.protect_from_scale_in(protect=False) 609 | elif not self.protected: 610 | # If we didn't manage to protect last time, try again 611 | self.protect_from_scale_in() 612 | 613 | def gracefully_terminate_runner(self): 614 | check_call(['systemctl', 'stop', 'actions.runner', '--no-block']) 615 | 616 | def protect_from_scale_in(self, protect: bool = True): 617 | """ Set (or unset) ProtectedFromScaleIn on our instance""" 618 | if not OWN_ASG: 619 | # Not part of an ASG 620 | return 621 | 622 | if self.in_termating_lifecycle: 623 | log.info("Not trying to SetInstanceProtection, we are already in the terminating lifecycle step") 624 | return 625 | 626 | asg_client = boto3.client('autoscaling') 627 | try: 628 | self._protect_from_scale_in(asg_client, protect) 629 | self.protected = protect 630 | except asg_client.exceptions.ClientError as e: 631 | # This can happen if this the runner picks up a job "too quick", and the ASG still has the state 632 | # as Pending:Proceed, so we can't yet set it as protected 633 | log.warning("Failed to set scale in protection: %s", str(e)) 634 | 635 | @retry( 636 | wait=wait_exponential(multiplier=1, max=10), 637 | stop=stop_after_delay(30), 638 | before_sleep=before_sleep_log(log, logging.INFO), 639 | reraise=True, 640 | ) 641 | def _protect_from_scale_in(self, asg_client, protect): 642 | asg_client.set_instance_protection( 643 | AutoScalingGroupName=OWN_ASG, 644 | InstanceIds=[INSTANCE_ID], 645 | ProtectedFromScaleIn=protect, 646 | ) 647 | 648 | def dynamodb_atomic_decrement(self): 649 | dynamodb = boto3.client('dynamodb') 650 | try: 651 | resp = dynamodb.update_item( 652 | TableName=TABLE_NAME, 653 | Key={'id': {'S': 'queued_jobs'}}, 654 | ExpressionAttributeValues={':delta': {'N': '-1'}, ':limit': {'N': '0'}}, 655 | UpdateExpression='ADD queued :delta', 656 | # Make sure it never goes below zero! 657 | ConditionExpression='queued > :limit', 658 | ReturnValues='UPDATED_NEW', 659 | ) 660 | 661 | log.info("Updated DynamoDB queue length: %s", resp['Attributes']['queued']['N']) 662 | except dynamodb.exceptions.ConditionalCheckFailedException: 663 | log.warning("%s.queued was already 0, we won't decrease it any further!", TABLE_NAME) 664 | 665 | def handle_proc_event(self, sock, mask): 666 | try: 667 | data, (nlpid, nlgrps) = sock.recvfrom(1024) 668 | except OSError as e: 669 | if e.errno == errno.ENOBUFS: 670 | return 671 | raise 672 | if nlpid != 0: 673 | # Ignore messages from non-root processes 674 | return 675 | 676 | event, detail = proc_event.from_netlink_packet(data) 677 | if event.what == ProcEventWhat.EXEC: 678 | try: 679 | proc = psutil.Process(detail.pid) 680 | 681 | with proc.oneshot(): 682 | if proc.name() == "Runner.Worker": 683 | log.info( 684 | "Found new interesting processes, protecting from scale in %d: %s", 685 | detail.pid, 686 | proc.cmdline(), 687 | ) 688 | self.interesting_processes[detail.pid] = proc 689 | self.protect_from_scale_in(protect=True) 690 | self.dynamodb_atomic_decrement() 691 | 692 | except psutil.NoSuchProcess: 693 | # We lost the race, process has already exited. If it was that short lived it wasn't that 694 | # interesting anyway 695 | pass 696 | elif event.what == ProcEventWhat.EXIT: 697 | if detail.pid in self.interesting_processes: 698 | log.info("Interesting process %d exited", detail.pid) 699 | del self.interesting_processes[detail.pid] 700 | 701 | if not self.interesting_processes: 702 | log.info("Watching no processes, disabling termination protection") 703 | self.protect_from_scale_in(protect=False) 704 | elif self.in_termating_lifecycle: 705 | try: 706 | proc = psutil.Process(detail.pid) 707 | if proc.name() == "Runner.Listener": 708 | log.info("Runner.Listener process %d exited - OkayToTerminate instance", detail.pid) 709 | complete_asg_lifecycle_hook('OkayToTerminate') 710 | except psutil.NoSuchProcess: 711 | # We lost the race, process has already exited. If it was that short lived it wasn't that 712 | # interesting anyway 713 | pass 714 | 715 | def open_proc_connector_socket(self) -> socket.socket: 716 | """Open and set up a socket connected to the kernel's Proc Connector event stream 717 | 718 | This uses the Netlink family of socket, the Connector message type and the proc_event connector to get 719 | send a (UDP) message whenever a process starts or exits. 720 | 721 | We use this mechansim to get notified when processes start or stop, so we can watch for the 722 | "Runner.Worker" and enable/disable termination protection. 723 | """ 724 | 725 | class bpf_insn(ctypes.Structure): 726 | """"The BPF instruction data structure""" 727 | 728 | _fields_ = [ 729 | ("code", ctypes.c_ushort), 730 | ("jt", ctypes.c_ubyte), 731 | ("jf", ctypes.c_ubyte), 732 | ("k", ctypes.c_uint32), 733 | ] 734 | 735 | class bpf_program(ctypes.Structure): 736 | """"Structure for BIOCSETF""" 737 | 738 | _fields_ = [("bf_len", ctypes.c_uint), ("bf_insns", ctypes.POINTER(bpf_insn))] 739 | 740 | def __init__(self, program): 741 | self.bf_len = len(program) 742 | bpf_insn_array = bpf_insn * self.bf_len 743 | self.bf_insns = bpf_insn_array() 744 | 745 | # Fill the pointer 746 | for i, insn in enumerate(program): 747 | self.bf_insns[i] = insn 748 | 749 | def bpf_jump(code, k, jt, jf) -> bpf_insn: 750 | """ 751 | :param code: BPF instruction op codes 752 | :param k: argument 753 | :param jt: jump offset if true 754 | :param jf: jump offset if false 755 | """ 756 | return bpf_insn(code, jt, jf, k) 757 | 758 | def bpf_stmt(code, k): 759 | return bpf_jump(code, k, 0, 0) 760 | 761 | def packet_filter_prog(): 762 | """ 763 | A Berkley Packet Filter program to filter down the "firehose" of info we receive over the netlink 764 | socket. 765 | 766 | The Proc Connector doesn't provide any easy way to filter out the firehose of package events, and 767 | while we could ignore the things we don't care about in Python, it's more efficient to never 768 | receive those packets. "Luckily" there is the BPF, or Berkley Packet Filter, which can operate on 769 | any socket. This BPF program was taken from 770 | https://web.archive.org/web/20130601175512/https://netsplit.com/2011/02/09/the-proc-connector-and-socket-filters/ 771 | """ 772 | # A subset of Berkeley Packet Filter constants and macros, as defined in linux/filter.h. 773 | 774 | # Instruction classes 775 | BPF_LD = 0x00 776 | BPF_JMP = 0x05 777 | BPF_RET = 0x06 778 | 779 | # ld/ldx fields 780 | BPF_W = 0x00 781 | BPF_H = 0x08 782 | BPF_ABS = 0x20 783 | 784 | # alu/jmp fields 785 | BPF_JEQ = 0x10 786 | BPF_K = 0x00 787 | 788 | return bpf_program( 789 | [ 790 | # Load 16-bit ("half"-word) nlmsg.type field 791 | bpf_stmt(BPF_LD | BPF_H | BPF_ABS, NLMsgHdr.type.offset), 792 | bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, socket.htons(NlMsgFlag.Done), 1, 0), 793 | # Not NlMsgFlag.Done, return whole packet 794 | bpf_stmt(BPF_RET | BPF_K, 0xFFFFFFFF), 795 | # 796 | # Load 32-bit (word) cb_id_idx field 797 | bpf_stmt(BPF_LD | BPF_W | BPF_ABS, ctypes.sizeof(NLMsgHdr) + cn_msg.cb_id_idx.offset), 798 | bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, socket.htonl(cn_msg.CN_IDX_PROC), 1, 0), 799 | # If not CN_IDX_PROC, return whole packet 800 | bpf_stmt(BPF_RET | BPF_K, 0xFFFFFFFF), 801 | # 802 | # Load cb_id_val field 803 | bpf_stmt(BPF_LD | BPF_W | BPF_ABS, ctypes.sizeof(NLMsgHdr) + cn_msg.cb_id_val.offset), 804 | bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, socket.htonl(cn_msg.CN_VAL_PROC), 1, 0), 805 | # If not CN_VAL_PROC, return whole packet 806 | bpf_stmt(BPF_RET | BPF_K, 0xFFFFFFFF), 807 | # 808 | # If not ProcEventWhat.EXEC or ProcEventWhat.EXIT, event, filter out the packet 809 | bpf_stmt( 810 | BPF_LD | BPF_W | BPF_ABS, 811 | ctypes.sizeof(NLMsgHdr) + ctypes.sizeof(cn_msg) + proc_event.what.offset, 812 | ), 813 | bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, socket.htonl(ProcEventWhat.EXEC), 2, 0), 814 | bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, socket.htonl(ProcEventWhat.EXIT), 1, 0), 815 | bpf_stmt(BPF_RET | BPF_K, 0x0), 816 | # Return everything 817 | bpf_stmt(BPF_RET | BPF_K, 0xFFFFFFFF), 818 | ] 819 | ) 820 | 821 | # Create Netlink socket 822 | 823 | # Missing from most/all pythons 824 | NETLINK_CONNECTOR = getattr(socket, "NETLINK_CONNECTOR", 11) 825 | SO_ATTACH_FILTER = getattr(socket, "SO_ATTACH_FILTER", 26) 826 | 827 | sock = socket.socket(socket.AF_NETLINK, socket.SOCK_DGRAM, NETLINK_CONNECTOR) 828 | 829 | filter_prog = packet_filter_prog() 830 | sock.setsockopt(socket.SOL_SOCKET, SO_ATTACH_FILTER, bytes(filter_prog)) # type: ignore 831 | 832 | sock.bind((os.getpid(), cn_msg.CN_IDX_PROC)) 833 | 834 | # Send PROC_CN_MCAST_LISTEN to start receiving messages 835 | msg = cn_msg( 836 | header=NLMsgHdr(type=NlMsgFlag.Done, pid=os.getpid()), 837 | cb_id_idx=cn_msg.CN_IDX_PROC, 838 | cb_id_val=cn_msg.CN_VAL_PROC, 839 | seq=0, 840 | ack=0, 841 | data=ctypes.c_uint32(ProcConnectorOp.MCAST_LISTEN), 842 | ) 843 | 844 | data = msg.to_bytes() 845 | if sock.send(data) != len(data): 846 | raise RuntimeError("Failed to send PROC_CN_MCAST_LISTEN") 847 | 848 | return sock 849 | 850 | 851 | if __name__ == "__main__": 852 | main() 853 | --------------------------------------------------------------------------------