├── README.md ├── file.txt.gz ├── pythonLambda └── python_main.py └── rustLambda ├── Cargo.toml └── src └── main.rs /README.md: -------------------------------------------------------------------------------- 1 | # PythonVsRustAWSLambda 2 | Testing the runtime difference between Python and Rust for AWS Lambda. 3 | 4 | All AWS Lambda's must include a handler function `lambda_handler`, this is what 5 | the lambda runtime will call be default. 6 | 7 | When triggering these lambdas off an s3 event, it's important to note more 8 | than one event might run though that lambda invocation, hence the 9 | inital loop. 10 | 11 | See the full blog post here https://www.confessionsofadataguy.com/aws-lambdas-python-vs-rust-performance-and-cost-savings/ 12 | 13 | `Rust` lambda. 14 | You will need to add the crate `cargo-lambda`. It helps you package and build 15 | the bootstrip binary needed for deployment. 16 | To build the `bootstrap.zip` that your Rust AWS Lambda needs, run `cargo lambda build --release --output-format zip` 17 | -------------------------------------------------------------------------------- /file.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielbeach/PythonVsRustAWSLambda/7498c440187fb857ecd5d53cccdb2a4f20127b43/file.txt.gz -------------------------------------------------------------------------------- /pythonLambda/python_main.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import unquote_plus 2 | import boto3 3 | from io import BytesIO 4 | import gzip 5 | 6 | fixed_widths = { 7 | "date": [0, 15], 8 | "serial_number": [15, 36], 9 | "model": [36, 79], 10 | "capacity_bytes": [79, 98], 11 | "failure": [98, 109] 12 | } 13 | 14 | 15 | def download(s3_client: object, bucket: str, key: str) -> BytesIO: 16 | file_object = BytesIO() 17 | s3_client.download_fileobj(bucket, key, file_object) 18 | file_object.seek(0) 19 | return file_object 20 | 21 | 22 | def read_mem_file(fo: BytesIO) -> list: 23 | with gzip.open(fo, mode="rt") as f: 24 | rows = f.readlines() 25 | return rows 26 | 27 | 28 | def convert_row_to_tab(raw_row: str, meta: dict) -> str: 29 | row = '' 30 | for k, v in meta.items(): 31 | column_value = raw_row[v[0]:v[1]] 32 | row += column_value.strip() + '\t' 33 | return row + '\n' 34 | 35 | 36 | def rows_to_file_object_gz(rws: list): 37 | with gzip.open('/tmp/file.gz', 'wt') as f: 38 | f.writelines(rws) 39 | 40 | 41 | def file_object_to_s3(s3_client: object, bucket: str, key: str) -> None: 42 | s3_client.upload_file('/tmp/file.gz', bucket, 43 | key.replace('.gz', '_tab.gz').replace('fixed_width_raw', 'tab_converted')) 44 | 45 | 46 | def lambda_handler(event, _): 47 | s3_client = boto3.client('s3') 48 | for record in event['Records']: 49 | bucket = record['s3']['bucket']['name'] 50 | key = unquote_plus(record['s3']['object']['key']) 51 | fo = download(s3_client, bucket, key) 52 | data = read_mem_file(fo) 53 | tab_rows = [] 54 | for row in data: 55 | tab_row = convert_row_to_tab(raw_row=row, meta=fixed_widths) 56 | tab_rows.append(tab_row) 57 | rows_to_file_object_gz(tab_rows) 58 | file_object_to_s3(s3_client, bucket, key) 59 | -------------------------------------------------------------------------------- /rustLambda/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustLambda" 3 | version = "0.1.0" 4 | edition = "2021" 5 | autobins = false 6 | 7 | [[bin]] 8 | name = "bootstrap" 9 | path = "src/main.rs" 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | aws-config = "0.54.1" 15 | aws-sdk-s3 = "0.24.0" 16 | aws_lambda_events = "0.7.3" 17 | flate2 = "1.0.25" 18 | lambda_runtime = "0.7.3" 19 | log = "0.4.17" 20 | serde = "1.0.152" 21 | serde_json = "1.0.93" 22 | tokio = {version = "1.25.0", features = ["full"]} 23 | tracing-subscriber = "0.3.16" 24 | 25 | -------------------------------------------------------------------------------- /rustLambda/src/main.rs: -------------------------------------------------------------------------------- 1 | use lambda_runtime::{handler_fn, Error}; 2 | use aws_lambda_events::s3::S3Event; 3 | use serde_json::Value; 4 | use flate2::read::{GzDecoder}; 5 | use flate2::write::GzEncoder; 6 | use flate2::GzBuilder; 7 | use flate2::Compression; 8 | use std::io::Read; 9 | use std::io::prelude::*; 10 | use std::io::BufReader; 11 | use std::fs::File; 12 | 13 | 14 | #[tokio::main] 15 | async fn main() -> Result<(), lambda_runtime::Error> { 16 | let func = handler_fn(handler); 17 | lambda_runtime::run(func).await?; 18 | Ok(()) 19 | } 20 | 21 | async fn handler(req: Value, _ctx: lambda_runtime::Context) -> Result<(), Box> { 22 | let events: S3Event = serde_json::from_value(req).unwrap();; 23 | for e in events.records { 24 | let bucket_name: String = e.s3.bucket.name.expect("Unable to get s3 bucket name."); 25 | let key: String = e.s3.object.key.expect("unable to get s3 file key"); 26 | 27 | let config: aws_config::SdkConfig = aws_config::load_from_env().await; 28 | let s3_client: aws_sdk_s3::Client = aws_sdk_s3::Client::new(&config); 29 | let data: aws_lambda_events::bytes::Bytes = s3_client 30 | .get_object() 31 | .bucket(&bucket_name) 32 | .key(&key) 33 | .send() 34 | .await.unwrap().body 35 | .collect().await.unwrap().into_bytes(); 36 | 37 | let mut d: GzDecoder<&[u8]> = GzDecoder::new(&data[..]); 38 | let mut csv_data: String = String::new(); 39 | d.read_to_string(&mut csv_data).unwrap(); 40 | 41 | let split: std::str::Lines<'_> = csv_data.lines(); 42 | let result_vector: Vec<&str> = split.collect(); 43 | 44 | let mut tab_converted: String = String::new(); 45 | for line in result_vector.iter().skip(1) { 46 | let date: &&str = &line[0..14].trim(); 47 | let serial_number: &&str = &line[15..35].trim(); 48 | let model: &&str = &line[36..78].trim(); 49 | let capacity_bytes: &&str = &line[79..97].trim(); 50 | let failure: &&str = &line[98..108].trim(); 51 | let tab_line: String = format!( "{}\t{}\t{}\t{}\t{}\n", date, serial_number, model, capacity_bytes, failure); 52 | tab_converted.push_str(&tab_line); 53 | 54 | } 55 | let f: File = File::create("/tmp/file.gz").expect("failed to create file"); 56 | let mut gz: GzEncoder = GzBuilder::new() 57 | .filename("tab_converted.txt") 58 | .write(f, Compression::default()); 59 | gz.write_all(tab_converted.as_bytes()).expect("failed to write bytes to file"); 60 | gz.finish().expect("failed to flush bytes to file"); 61 | 62 | let file: File = File::open("/tmp/file.gz").expect("problem reading file"); 63 | let mut reader: BufReader = BufReader::new(file); 64 | let mut buffer: Vec = Vec::new(); 65 | 66 | reader.read_to_end(&mut buffer).expect("error"); 67 | 68 | let remote_uri: &String = &key.replace("fixed_width_raw/", "tab_converted/"); 69 | s3_client.put_object().bucket(&bucket_name).key(remote_uri).body(buffer.into()).send().await.unwrap(); 70 | 71 | } 72 | Ok(()) 73 | } 74 | 75 | --------------------------------------------------------------------------------