├── .gitignore ├── bootstrap ├── runtime.R ├── library ├── instance_handling.py └── ssh_connection.py ├── r_package_layer.py ├── setup_r_instance.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.zip 2 | R/* 3 | .idea 4 | *test* 5 | -------------------------------------------------------------------------------- /bootstrap: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | while true 4 | do 5 | HEADERS="$(mktemp)" 6 | 7 | EVENT_DATA=$(curl -sS -LD "$HEADERS" -X GET "http://${AWS_LAMBDA_RUNTIME_API}/2018-06-01/runtime/invocation/next") 8 | REQUEST_ID=$(grep -Fi Lambda-Runtime-Aws-Request-Id "$HEADERS" | tr -d '[:space:]' | cut -d: -f2) 9 | 10 | RESPONSE=$(/opt/R/bin/Rscript /opt/runtime.R $EVENT_DATA) 11 | RESPONSE_CODE=$? 12 | 13 | if [ $RESPONSE_CODE = 0 ]; then 14 | OUT="response" 15 | elif [ $RESPONSE_CODE = 100 ]; then 16 | OUT="error" 17 | fi 18 | 19 | curl -X POST "http://${AWS_LAMBDA_RUNTIME_API}/2018-06-01/runtime/invocation/$REQUEST_ID/$OUT" -d "$RESPONSE" 20 | done 21 | -------------------------------------------------------------------------------- /runtime.R: -------------------------------------------------------------------------------- 1 | output <- tryCatch( 2 | { 3 | library(jsonlite) 4 | 5 | HANDLER <- Sys.getenv("_HANDLER") 6 | args <- commandArgs(trailingOnly = TRUE) 7 | EVENT_DATA <- args[1] 8 | 9 | HANDLER_split <- strsplit(HANDLER, ".", fixed = TRUE)[[1]] 10 | file_name <- paste0(HANDLER_split[1], ".R") 11 | function_name <- HANDLER_split[2] 12 | source(file_name) 13 | params <- fromJSON(EVENT_DATA) 14 | output <- tryCatch( 15 | list(out = do.call(function_name, params), quit_status = 0), 16 | error = function(e) { 17 | list(out = e$message, quit_status = 100) 18 | } 19 | ) 20 | 21 | list(out = output$out, quit_status = output$quit_status) 22 | }, 23 | error = function(e) { 24 | list(out = e$message, quit_status = 100) 25 | } 26 | ) 27 | 28 | output$out 29 | quit(status = output$quit_status) 30 | -------------------------------------------------------------------------------- /library/instance_handling.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | 5 | def check_server_status(instance_id): 6 | command = f"aws ec2 describe-instance-status --instance-id {instance_id} --query 'InstanceStatuses[0].SystemStatus.Status' --output text --output text" 7 | return os.popen(command).read().strip() 8 | 9 | 10 | def setup_instance(ami_id, instance_type, key_name): 11 | instance_id = os.popen( 12 | f"aws ec2 run-instances --image-id {ami_id} --count 1 --instance-type {instance_type} --key-name {key_name} --query 'Instances[0].InstanceId' --output text" 13 | ).read().strip() 14 | 15 | my_server_status = check_server_status(instance_id) 16 | 17 | while my_server_status != "ok": 18 | print("Waiting for instance") 19 | time.sleep(10) 20 | my_server_status = check_server_status(instance_id) 21 | 22 | instance_ip = os.popen( 23 | f"aws ec2 describe-instances --instance-id {instance_id} --query 'Reservations[0].Instances[0].PublicIpAddress' --output text" 24 | ).read().strip() 25 | 26 | return instance_ip, instance_id 27 | 28 | def terminate_instance(instance_id): 29 | os.system( 30 | f"aws ec2 terminate-instances --instance-ids {instance_id}" 31 | ) 32 | -------------------------------------------------------------------------------- /library/ssh_connection.py: -------------------------------------------------------------------------------- 1 | import paramiko 2 | 3 | class Ssh: 4 | client = None 5 | 6 | def __init__(self, ip, key_path): 7 | 8 | try: 9 | cert = paramiko.RSAKey.from_private_key_file(key_path) 10 | self.client = paramiko.SSHClient() 11 | self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 12 | print("connecting...") 13 | self.client.connect(hostname=ip, username="ec2-user", pkey=cert) 14 | print("connected!!!") 15 | 16 | 17 | except: 18 | print("Connection Failed!!!") 19 | exit(1) 20 | 21 | def send_command(self, command, verbose=True): 22 | 23 | try: 24 | stdin, stdout, stderr = self.client.exec_command(command) 25 | if verbose: 26 | out = stdout.readlines() 27 | for outs in out: 28 | print(outs.split('\n')[0]) 29 | except: 30 | print("Command execution failed!") 31 | exit(1) 32 | 33 | def close(self): 34 | try: 35 | self.client.close() 36 | except: 37 | print("Closing connection failed!") 38 | exit(1) 39 | 40 | def upload_file(self, local_file, remote_destination): 41 | try: 42 | sftp = self.client.open_sftp() 43 | sftp.put(local_file, remote_destination) 44 | sftp.close() 45 | except: 46 | print("File upload failed!") 47 | exit(1) 48 | 49 | def download_file(self, remote_file, local_destination): 50 | try: 51 | sftp = self.client.open_sftp() 52 | sftp.get(remote_file, local_destination) 53 | sftp.close() 54 | except: 55 | print("File download failed!") 56 | exit(1) 57 | -------------------------------------------------------------------------------- /r_package_layer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | from library.ssh_connection import Ssh 6 | import library.instance_handling as instance 7 | 8 | parser = argparse.ArgumentParser() 9 | 10 | parser.add_argument("-k", "--key-path", action="store", dest="key_path", 11 | help="Path to the AWS key", required=True) 12 | parser.add_argument("-m", "--ami-id", action="store", dest="ami_id", 13 | help="id of the R Lambda AMI", required=True) 14 | parser.add_argument("-p", "--package", action="store", dest="packages", 15 | help="R packages", required=True) 16 | parser.add_argument("-t", "--terminate", action="store", dest="terminate", 17 | default=True, help="terminate instance (default: %(default)s)") 18 | parser.add_argument("-i", "--instance-type", action="store", dest="instance_type", 19 | default="t2.micro", 20 | help="instance type (default: %(default)s)") 21 | 22 | arguments = parser.parse_args() 23 | 24 | key_path = os.path.expanduser(arguments.key_path) 25 | key_name = os.path.splitext(os.path.basename(key_path))[0] 26 | 27 | print("Instance setup") 28 | my_server_ip, my_server_id = instance.setup_instance(arguments.ami_id, arguments.instance_type, key_name) 29 | 30 | print("Connecting to server") 31 | 32 | connection = Ssh(ip = my_server_ip, key_path = key_path) 33 | 34 | print("Installing R packages") 35 | 36 | connection.send_command("mkdir -p /opt/R/new_library/R/library") 37 | 38 | packages = arguments.packages.replace(',', '').split() 39 | 40 | if os.path.isfile("tmp.R"): 41 | os.remove("tmp.R") 42 | 43 | with open("tmp.R", "a") as file: 44 | file.write("chooseCRANmirror(graphics=FALSE, ind=34)\n") 45 | for package in packages: 46 | file.write("install.packages(\'" + package + "\', lib = \'/opt/R/new_library/R/library\')\n") 47 | 48 | connection.upload_file("tmp.R", "/home/ec2-user/tmp.R") 49 | connection.send_command("/opt/R/bin/Rscript /home/ec2-user/tmp.R") 50 | 51 | os.remove("tmp.R") 52 | 53 | print("Download packages") 54 | 55 | connection.send_command("cd /opt/R/new_library && zip -r -q packages.zip R/") 56 | connection.download_file("/opt/R/new_library/packages.zip", "packages.zip") 57 | 58 | if arguments.terminate: 59 | instance.terminate_instance(my_server_id) 60 | -------------------------------------------------------------------------------- /setup_r_instance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import time 6 | from library.ssh_connection import Ssh 7 | import library.instance_handling as instance 8 | 9 | parser = argparse.ArgumentParser() 10 | 11 | parser.add_argument("-r", "--r-version", action="store", default="3.5.1", 12 | dest="r_version", help="R version (default: %(default)s)") 13 | parser.add_argument("-k", "--key-path", action="store", dest="key_path", 14 | help="Path to the AWS key", required=True) 15 | parser.add_argument("-a", "--action", action="store", dest="action", 16 | default="build_r", 17 | choices=["build_r", "create_ami"], 18 | help="build R archive or create AMI (default: %(default)s; choices: build_r, create_ami)") 19 | parser.add_argument("-t", "--terminate", action="store", dest="terminate", 20 | default=True, help="terminate instance (default: %(default)s)") 21 | parser.add_argument("-i", "--instance-type", action="store", dest="instance_type", 22 | default="t2.micro", 23 | help="instance type [default: %(default)s]") 24 | parser.add_argument("-n", "--name-ami", action="store", dest="ami_name", help="name of the created AMI image (required only if --action=create_ami)") 25 | 26 | arguments = parser.parse_args() 27 | 28 | key_path = os.path.expanduser(arguments.key_path) 29 | key_name = os.path.splitext(os.path.basename(key_path))[0] 30 | 31 | if arguments.action == "create_ami": 32 | existing_ami_name = os.popen("aws ec2 describe-images --filters \'Name=name,Values=" + arguments.ami_name + "\' --query 'Images[0]' --output text").read().strip() 33 | if existing_ami_name != "None": 34 | print("AMI name not available") 35 | exit(-1) 36 | 37 | 38 | ami_id = os.popen("aws ec2 describe-images --filters 'Name=name,Values=amzn-ami-hvm-2017.03.1.20170812-x86_64-gp2' --query 'Images[0].ImageId'").read().strip() 39 | 40 | print("Instance setup") 41 | my_server_ip, my_server_id = instance.setup_instance(ami_id, arguments.instance_type, key_name) 42 | 43 | print("Connecting to server") 44 | 45 | connection = Ssh(ip = my_server_ip, key_path = key_path) 46 | 47 | print("Installing R") 48 | 49 | connection.upload_file("build_r.sh", "/home/ec2-user/build_r.sh") 50 | connection.send_command("chmod +x /home/ec2-user/build_r.sh") 51 | connection.send_command("cd /home/ec2-user && ./build_r.sh " + arguments.r_version) 52 | 53 | print("R installed") 54 | 55 | if arguments.action == "build_r": 56 | try: 57 | connection.download_file("/opt/R/R.zip", "R.zip") 58 | print("R downloaded") 59 | except: 60 | print("") 61 | elif arguments.action == "create_ami": 62 | r_lambda_ami_id = os.popen( 63 | f"aws ec2 create-image --instance-id {my_server_id} --name {arguments.ami_name} --description 'Lambda AMI with R' --query 'ImageId' --output text" 64 | ).read().strip() 65 | ami_state = os.popen( 66 | f"aws ec2 describe-images --image-id {r_lambda_ami_id} --query 'Images[0].State' --output text" 67 | ).read().strip() 68 | while ami_state != "available": 69 | print("Waiting for AMI") 70 | time.sleep(10) 71 | ami_state = os.popen( 72 | "aws ec2 describe-images --image-id " + r_lambda_ami_id + " --query 'Images[0].State' --output text" 73 | ).read().strip() 74 | print("AMI id: " + r_lambda_ami_id) 75 | else: 76 | print("Not a valid action") 77 | 78 | if arguments.terminate: 79 | instance.terminate_instance(my_server_id) 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS Lambda runtime for R 2 | 3 | This repository provides runtime for running `R` on AWS Lambda. It also provides a workflow for using dependecies like R packages by creating additional layers. 4 | 5 | AWS Lambda is a serverless solution for running scripts triggered by various events. It supports several runtimes by default (including `Python`, `Go`, `Java`, `C#`, `Node.js`, `Ruby` and `PowerShell`), but it is possible to create custom runtime environments to use almost any other, non-supported language. 6 | 7 | This approach uses AWS Lambda component, namely _layers_, as containers for `R` environment and provided packages. In your lambda function, you can use pre-built layers we provide or you can create your own ones with specific content (different `R` version, custom packages, etc.). 8 | 9 | ## Using R in AWS Lambda 10 | 11 | Using R scripts in AWS Lambda is just a few steps: 12 | 13 | 1. In [AWS Console](https://console.aws.amazon.com/lambda) create a new function (suggested region: `eu-central-1`). Choose `Custom runtime`. 14 | 2. After creating new function: 15 | 1. In _Function code_ create a new script with `.R` extension, for example `my_script.R`. 16 | 2. Paste function code inside, for example `hello_world <- function() { "Hello world!" }` 17 | 3. Change `Handler` to `[script file name].[function name]`. 18 | 4. Delete `bootstrap` and `hello.sh` files created by AWS. We don't need them, because the R runtime provides its own version of `bootstrap`. 19 | 5. In _Basic settings_, change `Timeout` to 30 seconds or more. Since `R` is not a speed demon, the default value (3 seconds) is too low. 20 | 3. Add R layer: 21 | 1. In _Designer_ panel click _Layers_ to open layers configuration for your Lambda. This is where we need to add layer containing the `R` runtime. 22 | 2. Go to `Currently available layers` below and choose the base layer in your region. Add a layer giving its ARN. For example, for `eu-central-1`, the ARN is `arn:aws:lambda:eu-central-1:599651763768:layer:basic-r:1`. 23 | 3. If your code needs any additonal packages, they should be added additional layers. Go to **Creating custom layers** below to learn how to create such layers. After you create a layers, simply add it by providing layer ARN. You can provide up to 5 layers. Lambda will look for required dependencies (e.g. packages) in them, in provided order. **Always use R layer as the first one.** Packages should be included as additional layers. 24 | 4. That's it! Now you can save and test your function. Remember to provide proper input data in JSON format - in our example it should be empty. 25 | 26 | ### Pre-built layers 27 | 28 | In your Lambda you can use published pre-built layers. 29 | 30 | ##### Currently available layers: 31 | 32 | | layer arn | region | content | 33 | | -------------------------------------------------------- | ------------ | ------------------------------------- | 34 | | arn:aws:lambda:eu-central-1:599651763768:layer:basic-r:1 | eu-central-1 | R 3.5.1 | 35 | | arn:aws:lambda:eu-central-1:599651763768:layer:dplyr | eu-central-1 | dplyr (with dependencies) for R 3.5.1 | 36 | 37 | **A layer can be used only in the provided region!** If there's no layer for your region, you can create one (for how to do that see _Basic R layer_ below) or open an issue in this repo asking for one. 38 | 39 | ## Creating custom R layers 40 | 41 | If you need packages that are not provided in the prebuilt layers, you neet to create additional layers containing them. See [R packages Layer](#r-packages-layer) for instructions how to do that. You'll create an instance, install packages and extract them into a Lambda layer. We provide an AMI and scripts that make this straightforward. 42 | 43 | If you need a different version of R than provided, you'll need to create a basic `R` layer, follow the instruction provided in [Basic R Layer](#basic-r-layer) section. Creating AMI (to use it later for `R` packages layers) is described in [Lambda AMI with R](#lambda-ami-with-r) section. 44 | 45 | #### Configuring the AWS services 46 | 47 | To be able to use this workflow, you have to configure AWS services. 48 | 49 | 1. Install/upgrade aws cli: `pip install awscli --upgrade --user` 50 | 2. Configure credentials: `aws configure` (provide: AWS Access Key ID, AWS Secret Access Key, Default region name) 51 | 3. You need a Key Pair to be able to connect to EC2 instances. If you do not have one, you can create it in the Amazon EC2 console or using `aws cli`: `aws ec2 create-key-pair --key-name [key name] --query 'KeyMaterial' --output text >> [file name].pem` (**Important:** Key file name has to be the same as the key name!). You will have to provide the path to the private key as a script argument (`-k` flag) in the next steps. 52 | 53 | Note: the instance will use your default security group. Make sure that it is open for incoming traffic from your IP on port 22 so that the script can connect and install needed packages on the instance. 54 | 55 | ### Basic R Layer 56 | 57 | 1. Run `./setup_r_instance.py -k [path to private key]`. It will create an EC2 instance, install R and download the archive `R.zip`. Check `./setup_r_instance.py --help` for options. You have to provide at least the path to the private key (`-k`). Script by default terminates the instance. If you want to prevent it set `-t=False`. 58 | 2. Run `./build_runtime.sh` script. It will create an archive `runtime.zip` with R runtime for AWS Lambda. 59 | 3. Create a new layer: `aws lambda publish-layer-version --layer-name [layer name] --zip-file fileb://runtime.zip` 60 | 61 | ### Lambda AMI with R 62 | 63 | 1. Run `./setup_r_instance.py -k [path to private key] -a create_ami -n [new ami name]`. It will create EC2 instance, install R and create AMI. Check `./setup_r_instance.py --help` for options. You have to provide at least three parameters: `-k` path to the private key (e.g. `~/.ssh/key.pem`); `-a create_ami` action; `-n` AMI name. Script by deafult terminates the instance. If you want to prevent it set `-t=False`. Script will create AMI. 64 | 65 | ### R packages Layer 66 | 67 | 1. Run `./r_package_layer.py -k [path to private key] -m [R Lambda AMI id] -p [packages to install]`. It will create an instance from AMI with R preinstalled, install required packages and download archive `packages.zip`. Check `./r_package_layer.py --help` for options. You have to provide at least three parameters: `-k` path to the private key (e.g. `~/.ssh/key.pem`); `-m` Lambda AMI with preinstalled R id; `-p` packages to install (if more than one, pass in quotes e.g. `"glue, stringr"`). Script by default terminates the instance. If you want to prevent it set `-t=False`. 68 | 2. Create a new layer: `aws lambda publish-layer-version --layer-name [layer name] --zip-file fileb://packages.zip` 69 | 70 | #### Pre-built AMI 71 | 72 | | AMI name | AMI id | region | conent | 73 | | --------------- | --------------------- | ------------ | ------- | 74 | | r-lambda-ami_id | ami-0a1147e8e86aa6175 | eu-central-1 | R 3.5.1 | 75 | | | | | | 76 | --------------------------------------------------------------------------------