├── Banyan ├── deps │ ├── build.log │ └── build.jl ├── .swp ├── test │ ├── res │ │ ├── Banyanfile.json │ │ └── Banyanfile_badcluster.json │ ├── data │ │ ├── multiple_rowgroups.parquet │ │ └── iris.csv │ ├── runtests.jl │ └── test_cluster.jl ├── src │ ├── utils_abstract_types.jl │ ├── id.jl │ ├── job.jl │ ├── future.jl │ ├── tasks.jl │ ├── queues.jl │ ├── futures.jl │ ├── Banyan.jl │ ├── jobs.jl │ ├── samples.jl │ ├── partitions.jl │ ├── pt_lib_constructors.jl │ ├── requests.jl │ ├── utils.jl │ └── clusters.jl ├── res │ ├── Banyanfile.json │ ├── pt_lib_info.json │ └── utils.jl ├── scripts │ └── deploy_pt_lib.py ├── Project.toml └── LICENSE ├── .vscode └── settings.json ├── BanyanArrays ├── test │ ├── res │ │ └── Banyanfile.json │ ├── blas.jl │ ├── haversine.jl │ ├── test_simple.jl │ ├── shallow_water.jl │ ├── test_l3.jl │ ├── runtests.jl │ └── test_l1_l2.jl ├── res │ ├── Banyanfile.json │ └── utils_ba.jl ├── src │ └── BanyanArrays.jl ├── Project.toml └── LICENSE ├── BanyanDataFrames ├── res │ └── Banyanfile.json ├── test │ ├── res │ │ └── Banyanfile.json │ ├── test_simple.jl │ └── runtests.jl ├── Project.toml ├── src │ └── BanyanDataFrames.jl └── LICENSE ├── benchmarking ├── matmul_runtime.pdf ├── matmul_runtime.png ├── blackscholes_runtime.pdf ├── blackscholes_runtime.png ├── benchmark.py └── benchmark_results.md ├── docs ├── 1-Create_an_account_with_Banyan.md ├── 5-Cluster_management.md └── 3-Cluster_creation.md ├── .gitignore ├── README.md └── LICENSE /Banyan/deps/build.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "julia.enableTelemetry": false 3 | } -------------------------------------------------------------------------------- /Banyan/.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/Banyan/.swp -------------------------------------------------------------------------------- /Banyan/test/res/Banyanfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["file://../../res/Banyanfile.json"] 3 | } 4 | -------------------------------------------------------------------------------- /BanyanArrays/test/res/Banyanfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["file://../../res/Banyanfile.json"] 3 | } 4 | -------------------------------------------------------------------------------- /BanyanDataFrames/res/Banyanfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["file://../../BanyanArrays/res/Banyanfile.json"] 3 | } 4 | -------------------------------------------------------------------------------- /Banyan/deps/build.jl: -------------------------------------------------------------------------------- 1 | # using JuliaFormatter: format 2 | 3 | # format("..", margin = 79, remove_extra_newlines = true) 4 | -------------------------------------------------------------------------------- /benchmarking/matmul_runtime.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/matmul_runtime.pdf -------------------------------------------------------------------------------- /benchmarking/matmul_runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/matmul_runtime.png -------------------------------------------------------------------------------- /benchmarking/blackscholes_runtime.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/blackscholes_runtime.pdf -------------------------------------------------------------------------------- /benchmarking/blackscholes_runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/blackscholes_runtime.png -------------------------------------------------------------------------------- /Banyan/test/data/multiple_rowgroups.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/Banyan/test/data/multiple_rowgroups.parquet -------------------------------------------------------------------------------- /Banyan/src/utils_abstract_types.jl: -------------------------------------------------------------------------------- 1 | abstract type AbstractFuture end 2 | 3 | abstract type AbstractSample end 4 | abstract type AbstractSampleWithKeys <: AbstractSample end -------------------------------------------------------------------------------- /Banyan/src/id.jl: -------------------------------------------------------------------------------- 1 | const JobId = String 2 | const ValueId = String 3 | const MessageId = String 4 | 5 | generate_value_id() = randstring(8) 6 | generate_message_id() = randstring(8) 7 | -------------------------------------------------------------------------------- /BanyanArrays/test/blas.jl: -------------------------------------------------------------------------------- 1 | @testset "Level 1: Element-wise Vector Multiplication" begin end 2 | 3 | @testset "Level 3: Matrix Multiplication" begin end 4 | 5 | @testset "Level 3: Transpose" begin end 6 | -------------------------------------------------------------------------------- /BanyanArrays/res/Banyanfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["file://../../Banyan/res/Banyanfile.json"], 3 | "require": { 4 | "cluster": {"files": ["file://utils_ba.jl"]}, 5 | "job": {"code": ["include(\"utils_ba.jl\")"]} 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /BanyanArrays/src/BanyanArrays.jl: -------------------------------------------------------------------------------- 1 | module BanyanArrays 2 | 3 | using Banyan 4 | 5 | export Array, Vector, Matrix 6 | export read_hdf5, write_hdf5 7 | export ndims, size, length, eltype 8 | export fill 9 | export map, mapslices, reduce, sort, sortlices 10 | 11 | include("array.jl") 12 | 13 | end # module -------------------------------------------------------------------------------- /BanyanDataFrames/test/res/Banyanfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["file://../../res/Banyanfile.json"], 3 | "require": { 4 | "cluster": { 5 | "packages": ["Statistics"] 6 | }, 7 | "job": { 8 | "code": [ 9 | "using Statistics" 10 | ] 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /BanyanArrays/test/haversine.jl: -------------------------------------------------------------------------------- 1 | # Expected Output According to Reference python : an array of 4841.08061686 2 | lat_ref = 0.7098 3 | lon_ref = 1.2390 4 | 5 | lat_test = ones(10) * 0.069 6 | lon_test = ones(10) * 0.069 7 | 8 | dlat = sin.((lat_test .- lat_ref) / 2) .^ 2 9 | dlon = sin.((lon_test .- lon_ref) / 2) .^ 2 10 | 11 | a = cos.(lat_ref) * cos.(lat_test) .* dlon + dlat 12 | 13 | c = asin.(sqrt.(a)) * 2 * 3959.0 # 3959.0 is miles conversion I think 14 | print(c) -------------------------------------------------------------------------------- /BanyanArrays/test/test_simple.jl: -------------------------------------------------------------------------------- 1 | @testset "Simple usage of BanyanArrays" begin 2 | run_with_job("Filling") do job 3 | println(typeof(Base.fill(1.0, 2048))) 4 | x = BanyanArrays.fill(10.0, 2048) 5 | println(typeof(x)) 6 | x = map(e -> e / 10, x) 7 | println(typeof(x)) 8 | res = sum(x) 9 | 10 | res = collect(res) 11 | @test typeof(res) == Float64 12 | @test res = 1024 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /BanyanArrays/Project.toml: -------------------------------------------------------------------------------- 1 | name = "BanyanArrays" 2 | uuid = "369465de-032e-4609-9dcf-82b89c370a7b" 3 | authors = ["Caleb Winston "] 4 | version = "0.1.0" 5 | 6 | [deps] 7 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317" 8 | 9 | [compat] 10 | Banyan = "0.1.0" 11 | julia = "1.6.0" 12 | 13 | [extras] 14 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317" 15 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 16 | 17 | [targets] 18 | test = ["Banyan", "Test"] 19 | -------------------------------------------------------------------------------- /Banyan/test/res/Banyanfile_badcluster.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "require": { 4 | "language": "jl", 5 | "cluster": { 6 | "files": [], 7 | "scripts": [], 8 | "packages": ["DataFrames"], 9 | "pt_lib_info": "file://../../res/pt_lib_info.json", 10 | "pt_lib": "file://../../res/pt_lib.jl" 11 | }, 12 | "job": { 13 | "code": ["using DataFrames; println(\"using DataFrames\")"] 14 | } 15 | } 16 | } 17 | 18 | 19 | -------------------------------------------------------------------------------- /Banyan/res/Banyanfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "require": { 4 | "language": "jl", 5 | "cluster": { 6 | "files": ["file://utils.jl"], 7 | "scripts": [], 8 | "packages": ["DataFrames", "CSV", "Parquet", "Arrow"], 9 | "pt_lib_info": "file://pt_lib_info.json", 10 | "pt_lib": "file://pt_lib.jl" 11 | }, 12 | "job": { 13 | "code": [ 14 | "using DataFrames, CSV, Parquet, Arrow" 15 | ] 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /BanyanDataFrames/test/test_simple.jl: -------------------------------------------------------------------------------- 1 | @testset "Simple usage of BanyanDataFrames" begin 2 | run_with_job("Grouping") do job 3 | # TODO: Use `get_cluster()` and `wget` to automatically load in iris.csv 4 | iris = read_csv("s3://banyan-cluster-data-pumpkincluster0-3e15290827c0c584/iris.csv") 5 | gdf = groupby(iris, :species) 6 | lengths = collect(combine(gdf, :petal_length => mean)) 7 | counts = collect(combine(gdf, nrow)) 8 | 9 | @test first(lengths)[:petal_length_mean] == 1.464 10 | @test first(counts)[:nrow] == 50 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /Banyan/src/job.jl: -------------------------------------------------------------------------------- 1 | struct Job 2 | id::JobId 3 | nworkers::Int32 4 | sample_rate::Int32 5 | locations::Dict{ValueId,Location} 6 | pending_requests::Vector{Request} 7 | futures_on_client::WeakKeyDict{ValueId,Future} 8 | cluster_name::String 9 | 10 | # TODO: Ensure that this struct and constructor (which are just for storing 11 | # information about the job) does not conflict with the `Job` function that 12 | # calls `create_job` 13 | function Job(cluster_name::String, job_id::JobId, nworkers::Integer, sample_rate::Integer)::Job 14 | new(job_id, nworkers, sample_rate, Dict(), [], Dict(), cluster_name) 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /Banyan/src/future.jl: -------------------------------------------------------------------------------- 1 | mutable struct Future <: AbstractFuture 2 | value::Any 3 | value_id::ValueId 4 | mutated::Bool 5 | stale::Bool 6 | 7 | function Future(value::Any, value_id::ValueId, mutated::Bool, stale::Bool) 8 | new_future = new(value, value_id, mutated, stale) 9 | 10 | # Create finalizer and register 11 | finalizer(new_future) do fut 12 | try 13 | record_request(DestroyRequest(fut.value_id)) 14 | catch e 15 | @warn "Failed to destroy value $(fut.value_id) because job has stopped: $e" 16 | end 17 | end 18 | 19 | new_future 20 | end 21 | end -------------------------------------------------------------------------------- /BanyanArrays/res/utils_ba.jl: -------------------------------------------------------------------------------- 1 | # NOTE: This function is shared between the client library and the PT library 2 | function indexapply(op, objs...; index::Integer=1) 3 | lists = [obj for obj in objs if obj isa AbstractVecOrTuple] 4 | length(lists) > 0 || throw(ArgumentError("Expected at least one tuple as input")) 5 | index = index isa Colon ? length(first(lists)) : index 6 | operands = [(obj isa AbstractVecOrTuple ? obj[index] : obj) for obj in objs] 7 | indexres = op(operands...) 8 | res = first(lists) 9 | if first(lists) isa Tuple 10 | res = [res...] 11 | res[index] = indexres 12 | Tuple(res) 13 | else 14 | res = copy(res) 15 | res[index] = indexres 16 | end 17 | end -------------------------------------------------------------------------------- /BanyanDataFrames/Project.toml: -------------------------------------------------------------------------------- 1 | name = "BanyanDataFrames" 2 | uuid = "dfdd4555-3ad7-41cc-a503-a251c9c652f9" 3 | authors = ["Caleb Winston "] 4 | version = "0.1.0" 5 | 6 | [deps] 7 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317" 8 | BanyanArrays = "369465de-032e-4609-9dcf-82b89c370a7b" 9 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" 10 | 11 | [compat] 12 | Banyan = "0.1.0" 13 | BanyanArrays = "0.1.0" 14 | DataFrames = "1.1.1" 15 | julia = "1.6.0" 16 | 17 | [extras] 18 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317" 19 | BanyanArrays = "369465de-032e-4609-9dcf-82b89c370a7b" 20 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 21 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 22 | 23 | [targets] 24 | test = ["Banyan", "BanyanArrays", "Test", "Statistics"] 25 | -------------------------------------------------------------------------------- /docs/1-Create_an_account_with_Banyan.md: -------------------------------------------------------------------------------- 1 | # Create an account with Banyan 2 | 3 | ## Step 1: Sign up on AWS Marketplace 4 | 1. Create an AWS account or sign into an existing AWS account. 5 | 2. Visit banyancomputing.com and click ... You will be redirected to the registration page in AWS Marketplace. Be sure to sign into AWS. Alternatively, you can directly visit the registration page in AWS Marketplace. 6 | 3. Subscribe. You will be redirected to the Banyan sign up page. 7 | 8 | ## Step 2: Sign up on Banyan website 9 | 4. Fill out the fields with your information (username, email address) and check the box for accepting terms. Click sign up. 10 | 5. Important: Do not close your browser or the web page. 11 | 6. You will receive an activation link in your email. Open that email and click on the activation link. Reset your password and sign in.... -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Files generated by invoking Julia with --code-coverage 2 | *.jl.cov 3 | *.jl.*.cov 4 | 5 | # Files generated by invoking Julia with --track-allocation 6 | *.jl.mem 7 | 8 | # System-specific files and directories generated by the BinaryProvider and BinDeps packages 9 | # They contain absolute paths specific to the host computer, and so should not be committed 10 | deps/deps.jl 11 | deps/build.log 12 | deps/downloads/ 13 | deps/usr/ 14 | deps/src/ 15 | 16 | # Build artifacts for creating documentation generated by the Documenter package 17 | docs/build/ 18 | docs/site/ 19 | 20 | # File generated by Pkg, the package manager, based on a corresponding Project.toml 21 | # It records a fixed state of all packages used by the project. As such, it should not be 22 | # committed for packages, but should be committed for applications that require a static 23 | # environment. 24 | # Manifest.toml 25 | -------------------------------------------------------------------------------- /BanyanDataFrames/src/BanyanDataFrames.jl: -------------------------------------------------------------------------------- 1 | module BanyanDataFrames 2 | 3 | using Banyan 4 | using BanyanArrays 5 | 6 | using DataFrames 7 | 8 | include("df.jl") 9 | include("gdf.jl") 10 | 11 | # Types 12 | export DataFrame, GroupedDataFrame 13 | 14 | # I/O 15 | export read_csv, write_csv, read_parquet, write_parquet, read_arrow, write_arrow 16 | 17 | # Dataframe properties 18 | export nrow, ncol, size, names, propertynames 19 | 20 | # Dataframe filtering 21 | export dropmissing, filter, unique, nonunique 22 | 23 | # Dataframe selection and column manipulation 24 | export getindex, setindex!, rename 25 | 26 | # Dataframe sorting 27 | export sort 28 | 29 | # Dataframe joining 30 | export innerjoin 31 | 32 | # Grouped dataframe properties 33 | export length, groupcols, valuecols 34 | 35 | # Grouped dataframe methods 36 | export groupby, select, transform, combine, subset 37 | 38 | end # module 39 | -------------------------------------------------------------------------------- /Banyan/src/tasks.jl: -------------------------------------------------------------------------------- 1 | ######### 2 | # Tasks # 3 | ######### 4 | 5 | mutable struct DelayedTask 6 | # Fields for use in processed task ready to be recorded 7 | code::String 8 | value_names::Dict{ValueId,String} 9 | effects::Dict{ValueId,String} 10 | pa_union::Vector{PartitionAnnotation} # Enumeration of applicable PAs 11 | # Fields for use in task yet to be processed in a call to `compute` 12 | partitioned_using_func::Union{Function,Nothing} 13 | partitioned_with_func::Union{Function,Nothing} 14 | mutation::Dict{Future,Future} # This gets converted to `effects` 15 | end 16 | 17 | DelayedTask() = DelayedTask("", Dict(), Dict(), [PartitionAnnotation()], nothing, nothing, Dict()) 18 | 19 | function to_jl(task::DelayedTask) 20 | return Dict( 21 | "code" => task.code, 22 | "value_names" => task.value_names, 23 | "effects" => task.effects, 24 | "pa_union" => [to_jl(pa) for pa in task.pa_union], 25 | ) 26 | end 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Banyan Julia 2 | 3 | Banyan Julia is an extension to the Julia programming language for that seamlessly scales existing libraries and code to massive data and compute. Banyan allows _partition types_ to be assigned to variables in existing code. Annotated code is automatically offloaded to run in a distributed fashion on managed clusters running in your AWS Virtual Private Cloud. Banyan optimizes code on-the-fly to take advantage of CPU caches and multicores in the clusters where the offloaded code runs. 4 | 5 | Software libraries can be annotated with partition types and subsequent use of the annotated functions automatically runs at scale. Currently, we are developing two annotated libraries: 6 | 7 | - BanyanArrays.jl 8 | - BanyanDataFrames.jl 9 | 10 | Eventually, you will be able to use these libraries as drop-in replacements of the standard library Arrays and the DataFrames.jl library. By changing an import statement, you can run your code as is with Banyan scaling to arbitrary data or compute needs. 11 | -------------------------------------------------------------------------------- /docs/5-Cluster_management.md: -------------------------------------------------------------------------------- 1 | # Manage a cluster 2 | 3 | ## Updating a Cluster 4 | 5 | To create a cluster, you must provide the following information. 6 | 7 | Required Paramters 8 | * `cluster_name` - name of cluster to update 9 | * `username` - username under which this cluster was created 10 | 11 | Optional Parameters 12 | * `additional_policy` - ARN of AWS IAM policy to give additional permissions to the cluster 13 | * `s3_read_write_resource` - ARN of AWS S3 bucket to give cluster read/write permission to 14 | * `num_nodes` - maximum number of nodes for cluster 15 | * `banyanfile` - Banyanfile 16 | 17 | Updating a cluster will fail under the following scenarios. 18 | * Cluster is not currently in the `running` state 19 | * Cluster has jobs currently running 20 | 21 | 22 | ## Destroying a Cluster 23 | 24 | To destroy a cluster, you must provide the following information: 25 | * `cluster_name` - name of cluster to delete 26 | * `username` - username under which this cluster was created 27 | 28 | 31 | -------------------------------------------------------------------------------- /Banyan/scripts/deploy_pt_lib.py: -------------------------------------------------------------------------------- 1 | # This script is retired. 2 | # 3 | # import sys 4 | # import json 5 | # import subprocess 6 | 7 | # import boto3 8 | # from boto3.dynamodb.types import TypeSerializer 9 | 10 | # # Upload pt_lib.jl to S3 11 | # s3 = boto3.resource('s3') 12 | # s3_client = boto3.client('s3') 13 | # s3_client.upload_file("res/pt_lib.jl", "banyan-executor", "pt_lib.jl") 14 | 15 | # # Upload pt_lib_info.json to DynamoDB (to be downloaded by `evaluate`) 16 | # dynamodb = boto3.resource('dynamodb') 17 | # clusters = dynamodb.Table('Clusters') 18 | # serializer = TypeSerializer() 19 | # with open('res/pt_lib_info.json') as pt_lib_info_file: 20 | # pt_lib_info_json = json.load(pt_lib_info_file) 21 | # clusters.update_item( 22 | # Key={ 23 | # 'cluster_id': sys.argv[1], 24 | # }, 25 | # UpdateExpression='SET pt_lib_info = :pt_lib_info', 26 | # ExpressionAttributeValues={ 27 | # ':pt_lib_info': { 28 | # k: serializer.serialize(v) for k, v in pt_lib_info_json.items() 29 | # } 30 | # } 31 | # ) 32 | 33 | # # Download pt_lib.jl from S3 to the cluster 34 | # #commands = ["\"cd /home/ec2-user\"", "\"sudo yum update -y\"", "\"aws s3 cp s3://banyan-executor /home/ec2-user --recursive\""] 35 | # #for cmd in commands: 36 | # # subprocess.Popen("pcluster ssh {n} -i {f} {cmd}".format(n=sys.argv[1], f=sys.argv[2], cmd=cmd), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 37 | -------------------------------------------------------------------------------- /benchmarking/benchmark.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | num_workers = [1, 2, 4, 8, 16] 4 | 5 | ################# 6 | # BLACK SCHOLES # 7 | ################# 8 | 9 | wo_cache_optim = [237.947, 163.376, 85.370, 42.892, 22.544] 10 | 11 | w_l2cache_optim = [103.517, 71.561, 36.956, 18.404, 9.756] 12 | w_l1cache_optim = [93.652, 81.079, 40.485, 19.570, 10.368] 13 | 14 | plt.plot(num_workers, wo_cache_optim, marker='o', label="Without Cache Optimization") #, label="w/o cache optimization") 15 | plt.plot(num_workers, w_l2cache_optim, marker='o', label="L2 Cache Optimized") 16 | plt.plot(num_workers, w_l1cache_optim, marker='o', label="L1 Cache Optimized") 17 | plt.title("Runtime for Black Scholes") 18 | plt.xlabel("Number of Workers") 19 | plt.ylabel("Execution Runtime (s)") 20 | plt.yscale("log") 21 | plt.legend() 22 | plt.grid(alpha=0.5) 23 | plt.savefig("blackscholes_runtime.pdf", dpi=200) 24 | plt.close() 25 | 26 | 27 | 28 | ########## 29 | # MATMUL # 30 | ########## 31 | 32 | wo_cache_optim = [31.543, 26.172, 13.388, 7.380, 3.964] 33 | w_cache_optim = [96.275, 68.332, 37.093, 18.993, 10.289] 34 | 35 | 36 | plt.plot(num_workers, wo_cache_optim, marker='o', label="Without Cache Optimization") #, label="w/o cache optimization") 37 | plt.plot(num_workers, w_cache_optim, marker='o', label="L2 Cache Optimized") 38 | plt.title("Runtime for Matrix Multiplication") 39 | plt.xlabel("Number of Workers") 40 | plt.ylabel("Execution Runtime (s)") 41 | plt.yscale("log") 42 | plt.legend() 43 | plt.grid(alpha=0.5) 44 | plt.savefig("matmul_runtime.pdf", dpi=200) 45 | plt.close() 46 | -------------------------------------------------------------------------------- /Banyan/Project.toml: -------------------------------------------------------------------------------- 1 | name = "Banyan" 2 | uuid = "706d138b-e922-45b9-a636-baf8ae0d5317" 3 | authors = ["Banyan Team "] 4 | version = "0.1.0" 5 | 6 | [deps] 7 | AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" 8 | AWSCore = "4f1ea46c-232b-54a6-9b17-cc2d0f3e6598" 9 | AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" 10 | AWSSQS = "6e80b5ca-5733-51f9-999e-c18680912812" 11 | Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" 12 | Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" 13 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" 14 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" 15 | FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" 16 | FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" 17 | HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" 18 | HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" 19 | IniFile = "83e8ac13-25f8-5344-8a64-a9f2b223428f" 20 | IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" 21 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 22 | Parquet = "626c502c-15b0-58ad-a749-f091afb673ae" 23 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 24 | Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" 25 | TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" 26 | 27 | [compat] 28 | AWS = "1.36.0" 29 | AWSCore = "0.6.17" 30 | AWSS3 = "0.8.3" 31 | AWSSQS = "0.6.4" 32 | Arrow = "1.5.0" 33 | CSV = "0.8.5" 34 | DataFrames = "1.1.1" 35 | FileIO = "1.9.1" 36 | FilePathsBase = "0.9.10" 37 | HDF5 = "0.15.5" 38 | HTTP = "0.8.19" 39 | IniFile = "0.5.0" 40 | IterTools = "1.3.0" 41 | JSON = "0.21.1" 42 | Parquet = "0.8.3" 43 | julia = "1.6.0" 44 | 45 | [extras] 46 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 47 | 48 | [targets] 49 | test = ["Test"] 50 | -------------------------------------------------------------------------------- /BanyanArrays/test/shallow_water.jl: -------------------------------------------------------------------------------- 1 | G = 6.67384e-11 # m/(kg*s^2) 2 | dt = 60*60*24*365.25 # Years in seconds 3 | r_ly = 9.4607e15 # Lightyear in m 4 | m_sol = 1.9891e30 # Solar mass in kg 5 | b = 0.0 6 | 7 | n = 10 8 | 9 | box_size = 1 10 | grid_spacing = 1.0 * box_size / n 11 | 12 | iterations = 10 13 | 14 | v_x = zeros((n,n)) 15 | v_y = zeros((n,n)) 16 | eta = ones((n,n)) # pressure 17 | for i=1:n 18 | eta[i] = eta[i] * 0.1 * i 19 | end 20 | 21 | #temps = [ones((n,n)), 22 | # ones((n,n)), 23 | # ones((n,n)), 24 | # ones((n,n))] 25 | 26 | tmp = ones((n,n)) 27 | du_dt = ones((n,n)) 28 | dv_dt = ones((n,n)) 29 | tmp1 = ones((n,n)) 30 | 31 | for i in 1:iterations 32 | # Compute derivatives with respect to x and y by using difference in z (by shifting) / difference in x or y (going both sides) 33 | # Derivative of eta with respect to x (second dimension?????!???!!!??) 34 | roll1 = circshift(eta, (0,-1)) 35 | roll2 = circshift(eta, (0,1)) 36 | tmp1 = (roll1 .- roll2) ./ (grid_spacing * 2.0) 37 | 38 | # Derivative of eta with respect to y (first dimension?????!???!!!??) 39 | roll1 = circshift(eta, (-1,0)) 40 | roll2 = circshift(eta, (1,0)) 41 | tmp = (roll1 .- roll2) ./ (grid_spacing * 2.0) 42 | 43 | du_dt = -g .* tmp1 - b * u 44 | dv_dt = -g .* tmp - b .* v 45 | 46 | H = 0 47 | tmp1 = ((eta .+ H) .* u) 48 | tmp2 = tmp 1 .* v 49 | 50 | roll1 = circshift(tmp1, (0,-1)) 51 | roll2 = circshift(tmp1, (0,1)) 52 | tmp1 = -1 * (roll1 .- roll2) ./ (grid_spacing * 2.0) 53 | 54 | roll1 = circshift(tmp, (0,-1)) 55 | roll2 = circshift(tmp, (0,1)) 56 | tmp = tmp1 .- ((roll1 .- roll2) ./ (grid_spacing * 2.0)) 57 | 58 | # On to line 106 59 | end -------------------------------------------------------------------------------- /Banyan/src/queues.jl: -------------------------------------------------------------------------------- 1 | #using AWSCore 2 | #using AWSSQS 3 | 4 | function get_scatter_queue(job_id::JobId) 5 | return sqs_get_queue( 6 | get_aws_config(), 7 | string("banyan_", job_id, "_scatter.fifo"), 8 | ) 9 | end 10 | 11 | function get_gather_queue(job_id::JobId) 12 | return sqs_get_queue( 13 | get_aws_config(), 14 | string("banyan_", job_id, "_gather.fifo"), 15 | ) 16 | end 17 | 18 | function receive_next_message(queue_name) 19 | m = sqs_receive_message(queue_name) 20 | while isnothing(m) 21 | m = sqs_receive_message(queue_name) 22 | end 23 | content = m[:message] 24 | sqs_delete_message(queue_name, m) 25 | if startswith(content, "EVALUATION_END") 26 | @debug "Received evaluation end" 27 | println(content[15:end]) 28 | response = Dict{String,Any}("kind" => "EVALUATION_END") 29 | response["end"] = (endswith(content, "MESSAGE_END")) 30 | # TODO: Maybe truncate by chopping off the MESSAGE_END 31 | response 32 | elseif startswith(content, "JOB_FAILURE") 33 | @debug "Job failed" 34 | global current_job_status 35 | current_job_status = "failed" 36 | # TODO: Document why the 12 here is necessary 37 | println(content[12:end]) 38 | error("Job failed; see preceding output") 39 | else 40 | @debug "Received scatter or gather request" 41 | JSON.parse(content) 42 | end 43 | end 44 | 45 | function send_message(queue_name, message) 46 | sqs_send_message( 47 | queue_name, 48 | message, 49 | (:MessageGroupId, "1"), 50 | (:MessageDeduplicationId, generate_message_id()), 51 | ) 52 | end 53 | -------------------------------------------------------------------------------- /BanyanArrays/test/test_l3.jl: -------------------------------------------------------------------------------- 1 | function matmul() 2 | 3 | # Create data 4 | n = future(Int32(15e2)) 5 | m = future(Int32(10e3)) 6 | p = future(Int32(5e3)) 7 | A = future() # n x m 8 | B = future() # m x p 9 | 10 | # Where the data is located 11 | val(n) 12 | val(m) 13 | val(p) 14 | mem(A, Int64(evaluate(n) * evaluate(m)), Float64) 15 | mem(B, Int64(evaluate(m) * evaluate(p)), Float64) 16 | 17 | # How the data is partitioned 18 | pt(n, Div()) 19 | pt(m, Replicate()) 20 | pt(p, Div()) 21 | pt(A, Block(1)) 22 | pt(B, Block(2)) 23 | pc(Cross(A, B)) 24 | pc(Co(A, n)) 25 | pc(Co(B, p)) 26 | 27 | mutated(A) 28 | mutated(B) 29 | 30 | @partitioned A B n m p begin 31 | A = fill(1, (Int64(n), Int64(m))) 32 | B = fill(2, (Int64(m), Int64(p))) 33 | end 34 | 35 | C = future() 36 | mem(C, Int64(evaluate(n) * evaluate(p)), Float64) 37 | 38 | pt(A, Block(1)) 39 | pt(B, Block(2)) 40 | pt(C, [Block(1), Block(2)]) 41 | pc(Cross(A, B)) 42 | pc(Cross((C, 1), (C, 2))) # Redundant but currently required to assert order of splitting 43 | pc(Equal((C, 1), (C, 2))) 44 | pc(Co((C, 1), A)) 45 | pc(Co((C, 2), B)) 46 | 47 | mutated(C) 48 | 49 | @partitioned A B C begin 50 | C = A * B 51 | end 52 | 53 | # C_new = future() 54 | # located(C_new, C.location) 55 | # mutated(C_new) 56 | 57 | # pt(C, [Block(1), Block(2)]) 58 | # pc(Cross((C, 1), (C, 2))) 59 | # pt(C_new, BlockMulti([1, 2])) 60 | 61 | # @partitioned C C_new begin 62 | # C_new = C 63 | # end 64 | # C = C_new 65 | 66 | evaluate(C) 67 | end 68 | 69 | @testset "Matrix multiplication" begin 70 | runtest("Matrix Multiplication", j -> begin 71 | matmul() 72 | end) 73 | end 74 | -------------------------------------------------------------------------------- /docs/3-Cluster_creation.md: -------------------------------------------------------------------------------- 1 | # Create a cluster 2 | 3 | ## Parameters for Cluster Creation 4 | 5 | To create a cluster, you must provide the following information: 6 | 7 | Required Paramters 8 | * `cluster_name` - name of cluster to create 9 | * `username` - username already registered with Banyan 10 | * `ec2_key_pair` - name of AWS EC2 Key Pair to SSH into the head node of the cluster 11 | * `num_nodes` - maximu number of nodes in cluster 12 | * `instance_type` - AWS EC2 instance type (one of `t3.large`, `t3.xlarge`, `t3.2xlarge`, `m4.4xlarge`, `m4.10xlarge`, `c5.2xlarge`, `c5.4xlarge`) 13 | 14 | Optional Parameters 15 | * `additional_policy` - name of AWS IAM Policy that user created for the cluster 16 | * `banyanfile` - Banyanfile describing how to set up cluster and jobs (format described below) 17 | * `s3_read_write_resource` - ARN of AWS S3 bucket in user account that cluster can access (e.g., to pull source code from or write logs/results back to) 18 | 19 | 20 | ## Format of Banyanfile 21 | 22 | The format of a Banyanfile is as follows: 23 | ```json 24 | { 25 | "include": [], 26 | "require": { 27 | "language": "jl"|"py", 28 | "cluster": { 29 | "commands": ["string"], 30 | "packages": ["string",], 31 | "pt_lib_info": "string", 32 | "pt_lib": "string" 33 | }, 34 | "job": { 35 | "code": ["string"] 36 | } 37 | } 38 | } 39 | ``` 40 | * **include** (list) 41 | List of paths to other Banyanfiles to include, or the actual Banyanfile dictionaries 42 | * **require** (dict) 43 | * **language** (string) 44 | Language used. Currently supporting Julia ("jl") 45 | * **cluster** (dict) 46 | * **commands** (list) 47 | List of commands to execute on creation of cluster 48 | * **packages** (list) 49 | List of language-dependent packages to install 50 | * **pt_lib_info** (dict or string) 51 | Path to pt_lib_info json file or actual pt_lib_info dict 52 | * **pt_lib** (string) 53 | Optional path to pt_lib 54 | * **jobs** (dict) 55 | * **code** (list) 56 | List of lines to code to be executed on creation of a job in this cluster 57 | -------------------------------------------------------------------------------- /Banyan/test/runtests.jl: -------------------------------------------------------------------------------- 1 | # NOTE: This file is to be copied across the `runtests.jl` of all Banyan Julia 2 | # projects 3 | 4 | using Test 5 | 6 | function include_tests_to_run(args...) 7 | clear_jobs() 8 | for arg in args 9 | include(arg) 10 | end 11 | end 12 | 13 | get_enabled_tests() = lowercase.(ARGS) 14 | 15 | # NOTE: For testing, please provide the following: 16 | # - AWS_DEFAULT_PROFILE (if you don't already have the desired default AWS account) 17 | # - BANYAN_USERNAME 18 | # - BANYAN_USER_ID 19 | # - BANYAN_API_KEY 20 | # - BANYAN_CLUSTER_NAME 21 | # - BANYAN_NWORKERS 22 | # - BANYAN_NWORKERS_ALL 23 | # 24 | # If these are not specified, we will only run tests that don't require a 25 | # configured job to be created first. 26 | 27 | function run_with_job(test_fn, name) 28 | # This function should be used for tests that need a job to be already 29 | # created to run. We look at environment variables for a specification for 30 | # how to authenticate and what cluster to run on 31 | 32 | username = get(ENV, "BANYAN_USERNAME", nothing) 33 | user_id = get(ENV, "BANYAN_USER_ID", nothing) 34 | api_key = get(ENV, "BANYAN_API_KEY", nothing) 35 | cluster_name = get(ENV, "BANYAN_CLUSTER_NAME", nothing) 36 | nworkers = get(ENV, "BANYAN_NWORKERS", nothing) 37 | 38 | if isempty(get_enabled_tests()) || 39 | any([occursin(t, lowercase(name)) for t in get_enabled_tests()]) 40 | if get(ENV, "BANYAN_NWORKERS_ALL", "false") == "true" 41 | for nworkers in [16, 8, 4, 2, 1] 42 | with_job( 43 | username = username, 44 | user_id = user_id, 45 | api_key = api_key, 46 | cluster_name = cluster_name, 47 | nworkers = parse(Int32, nworkers), 48 | banyanfile_path = "file://res/Banyanfile.json", 49 | ) do j 50 | test_fn(j) 51 | end 52 | end 53 | elseif !isnothing(nworkers) 54 | with_job( 55 | username = username, 56 | api_key = api_key, 57 | cluster_name = cluster_name, 58 | nworkers = parse(Int32, nworkers), 59 | banyanfile_path = "file://res/Banyanfile.json", 60 | user_id = user_id, 61 | ) do j 62 | test_fn(j) 63 | end 64 | end 65 | end 66 | end 67 | 68 | function run(test_fn, name) 69 | # This function should be used for tests that test cluster/job managemnt 70 | # and so they only need environment variables to dictate how to 71 | # authenticate. These can be read in from ENV on a per-test basis. 72 | 73 | if isempty(get_enabled_tests()) || 74 | any([occursin(t, lowercase(name)) for t in get_enabled_tests()]) 75 | test_fn() 76 | end 77 | end 78 | 79 | include_tests_to_run("test_cluster.jl") -------------------------------------------------------------------------------- /BanyanArrays/test/runtests.jl: -------------------------------------------------------------------------------- 1 | # NOTE: This file is to be copied across the `runtests.jl` of all Banyan Julia 2 | # projects 3 | 4 | using Test 5 | using Banyan 6 | using BanyanArrays 7 | 8 | function include_tests_to_run(args...) 9 | clear_jobs() 10 | for arg in args 11 | include(arg) 12 | end 13 | end 14 | 15 | get_enabled_tests() = lowercase.(ARGS) 16 | 17 | # NOTE: For testing, please provide the following: 18 | # - AWS_DEFAULT_PROFILE (if you don't already have the desired default AWS account) 19 | # - BANYAN_USERNAME 20 | # - BANYAN_USER_ID 21 | # - BANYAN_API_KEY 22 | # - BANYAN_CLUSTER_NAME 23 | # - BANYAN_NWORKERS 24 | # - BANYAN_NWORKERS_ALL 25 | # 26 | # If these are not specified, we will only run tests that don't require a 27 | # configured job to be created first. 28 | 29 | function run_with_job(test_fn, name) 30 | # This function should be used for tests that need a job to be already 31 | # created to run. We look at environment variables for a specification for 32 | # how to authenticate and what cluster to run on 33 | 34 | username = get(ENV, "BANYAN_USERNAME", nothing) 35 | user_id = get(ENV, "BANYAN_USER_ID", nothing) 36 | api_key = get(ENV, "BANYAN_API_KEY", nothing) 37 | cluster_name = get(ENV, "BANYAN_CLUSTER_NAME", nothing) 38 | nworkers = get(ENV, "BANYAN_NWORKERS", nothing) 39 | 40 | if isempty(get_enabled_tests()) || 41 | any([occursin(t, lowercase(name)) for t in get_enabled_tests()]) 42 | if get(ENV, "BANYAN_NWORKERS_ALL", "false") == "true" 43 | for nworkers in [16, 8, 4, 2, 1] 44 | with_job( 45 | username = username, 46 | user_id = user_id, 47 | api_key = api_key, 48 | cluster_name = cluster_name, 49 | nworkers = parse(Int32, nworkers), 50 | banyanfile_path = "file://res/Banyanfile.json", 51 | ) do j 52 | test_fn(j) 53 | end 54 | end 55 | elseif !isnothing(nworkers) 56 | with_job( 57 | username = username, 58 | api_key = api_key, 59 | cluster_name = cluster_name, 60 | nworkers = parse(Int32, nworkers), 61 | banyanfile_path = "file://res/Banyanfile.json", 62 | user_id = user_id, 63 | ) do j 64 | test_fn(j) 65 | end 66 | end 67 | end 68 | end 69 | 70 | function run(test_fn, name) 71 | # This function should be used for tests that test cluster/job managemnt 72 | # and so they only need environment variables to dictate how to 73 | # authenticate. These can be read in from ENV on a per-test basis. 74 | 75 | if isempty(get_enabled_tests()) || 76 | any([occursin(t, lowercase(name)) for t in get_enabled_tests()]) 77 | test_fn() 78 | end 79 | end 80 | 81 | include_tests_to_run("test_simple.jl") -------------------------------------------------------------------------------- /BanyanDataFrames/test/runtests.jl: -------------------------------------------------------------------------------- 1 | # NOTE: This file is to be copied across the `runtests.jl` of all Banyan Julia 2 | # projects 3 | 4 | using Test 5 | using Banyan 6 | using BanyanArrays 7 | using BanyanDataFrames 8 | 9 | using Statistics 10 | 11 | function include_tests_to_run(args...) 12 | clear_jobs() 13 | for arg in args 14 | include(arg) 15 | end 16 | end 17 | 18 | get_enabled_tests() = lowercase.(ARGS) 19 | 20 | # NOTE: For testing, please provide the following: 21 | # - AWS_DEFAULT_PROFILE (if you don't already have the desired default AWS account) 22 | # - BANYAN_USERNAME 23 | # - BANYAN_USER_ID 24 | # - BANYAN_API_KEY 25 | # - BANYAN_CLUSTER_NAME 26 | # - BANYAN_NWORKERS 27 | # - BANYAN_NWORKERS_ALL 28 | # 29 | # If these are not specified, we will only run tests that don't require a 30 | # configured job to be created first. 31 | 32 | function run_with_job(test_fn, name) 33 | # This function should be used for tests that need a job to be already 34 | # created to run. We look at environment variables for a specification for 35 | # how to authenticate and what cluster to run on 36 | 37 | username = get(ENV, "BANYAN_USERNAME", nothing) 38 | user_id = get(ENV, "BANYAN_USER_ID", nothing) 39 | api_key = get(ENV, "BANYAN_API_KEY", nothing) 40 | cluster_name = get(ENV, "BANYAN_CLUSTER_NAME", nothing) 41 | nworkers = get(ENV, "BANYAN_NWORKERS", nothing) 42 | 43 | if isempty(get_enabled_tests()) || 44 | any([occursin(t, lowercase(name)) for t in get_enabled_tests()]) 45 | if get(ENV, "BANYAN_NWORKERS_ALL", "false") == "true" 46 | for nworkers in [16, 8, 4, 2, 1] 47 | with_job( 48 | username = username, 49 | user_id = user_id, 50 | api_key = api_key, 51 | cluster_name = cluster_name, 52 | nworkers = parse(Int32, nworkers), 53 | banyanfile_path = "file://res/Banyanfile.json", 54 | ) do j 55 | test_fn(j) 56 | end 57 | end 58 | elseif !isnothing(nworkers) 59 | with_job( 60 | username = username, 61 | api_key = api_key, 62 | cluster_name = cluster_name, 63 | nworkers = parse(Int32, nworkers), 64 | banyanfile_path = "file://res/Banyanfile.json", 65 | user_id = user_id, 66 | ) do j 67 | test_fn(j) 68 | end 69 | end 70 | end 71 | end 72 | 73 | function run(test_fn, name) 74 | # This function should be used for tests that test cluster/job managemnt 75 | # and so they only need environment variables to dictate how to 76 | # authenticate. These can be read in from ENV on a per-test basis. 77 | 78 | if isempty(get_enabled_tests()) || 79 | any([occursin(t, lowercase(name)) for t in get_enabled_tests()]) 80 | test_fn() 81 | end 82 | end 83 | 84 | include_tests_to_run("test_simple.jl") -------------------------------------------------------------------------------- /benchmarking/benchmark_results.md: -------------------------------------------------------------------------------- 1 | # Benchmarking Results 2 | 3 | ## Black Scholes 4 | 5 | The following table shows the runtime for Black Scholes, averaged over 5 trials, using input data size of 1e9. 6 | 7 | | Num Workers | Sequential | Parallelized Without Cache Optimization | Parallelized With Cache Optimization | Parallelized with L1 Cache Optimization | 8 | | :---: | :---: | :---: | :---: | :---: | 9 | | 1 | 232.878 | 237.947 | 103.517 | 93.652 | 10 | | 2 | __ | 163.376 | 71.561 | 81.079 | 11 | | 4 | __ | 85.370 | 36.956 | 40.485 | 12 | | 8 | __ | 42.892 | 18.404 | 19.570 | 13 | | 16 | __ | 22.544 | 9.756 | 10.368 | 14 | 15 | 16 | ![alt text](blackscholes_runtime.png) 17 | 18 | 27 | 28 | 630 - 0:67, 0:3765 29 | 640 - 0:67, 0:117648 30 | 31 | 629 - 0:34, 0:3658 32 | 639 - 0:34, 0:114286 33 | 34 | 35 | 36 | 37 | --> 57 | 58 | 59 | ## Matrix Multiplication 60 | 61 | The following table shows the runtime (s) for matrix multiplication A x B, averaged over 5 trials, where the dimensions of A and B are (1500, 10,000) and (10,000, 5000) respectively. 62 | 63 | 64 | 65 | | Num Workers | Sequential | Parallelized Without Cache Optimization | Parallelized With L2 Cache Optimization | 66 | | :---: | :---: | :---: | :---: | 67 | | 1 | 40.920 | 31.543 | 96.275 | 68 | | 2 | __ | 26.172 | 68.332 | 69 | | 4 | __ | 13.388 | 37.093 | 70 | | 8 | __ | 7.380 | 18.993 | 71 | | 16 | __ | 3.964 | 10.289 | 72 | 73 | ![alt text](matmul_runtime.png) 74 | 75 | 76 | 112 | 113 | 597 - 1, 3, 1, 1 114 | 602 - 1, 3, 8, 8 115 | 116 | 117 | 118 | jobs 619 (4), 620 (2), 621 (1) --> 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /Banyan/src/futures.jl: -------------------------------------------------------------------------------- 1 | ########### 2 | # Futures # 3 | ########### 4 | 5 | """ 6 | Future() 7 | Future(value::Any) 8 | Future(location::Location) 9 | Future(; kwargs...) 10 | 11 | Constructs a new future, representing a value that has not yet been evaluated. 12 | """ 13 | function Future(location::Location = None(); mutate_from::Union{<:AbstractFuture,Nothing}=nothing) 14 | # Generate new value id 15 | value_id = generate_value_id() 16 | 17 | # Create new Future and assign a location to it 18 | new_future = Future(nothing, value_id, false, false) 19 | sourced(new_future, location) 20 | destined(new_future, None()) 21 | 22 | # TODO: Add Size location here if needed 23 | # Handle locations that have an associated value 24 | if location.src_name in ["None", "Client", "Value"] 25 | new_future.value = location.sample.value 26 | new_future.stale = false 27 | end 28 | 29 | if !isnothing(mutate_from) 30 | # Indicate that this future is the result of an in-place mutation of 31 | # some other value 32 | mutated(mutate_from, new_future) 33 | elseif location.src_name == "None" 34 | # For convenience, if a future is constructed with no location to 35 | # split from, we assume it will be mutated in the next code region 36 | # and mark it as mutated. This is pretty common since often when 37 | # we are creating new futures with None location it is as an 38 | # intermediate variable to store the result of some code region. 39 | # 40 | # Mutation can also be specified manually with mutate=true|false in 41 | # `partition` or implicitly through `Future` constructors 42 | mutated(new_future) 43 | end 44 | 45 | new_future 46 | end 47 | 48 | function Future(value::Any) 49 | location = if Base.summarysize(value) ≤ 4 * 1024 50 | Value(value) 51 | else 52 | Client(value) 53 | end 54 | 55 | # Create future, store value, and return 56 | Future(location) 57 | end 58 | 59 | """ 60 | Future(future::AbstractFuture) 61 | 62 | Constructs a future from a future that was already created. 63 | 64 | If the given future has not had its value mutated (meaning that the value 65 | stored with it here on the client is the most up-to-date version of it), we use 66 | its value to construct a new future from a copy of the value. 67 | 68 | However, if the future has been mutated by some code region that has already 69 | been recorded, we construct a new future with location `None` and mark it as 70 | mutated. This is because presumably in the case that we _can't_ copy over the 71 | given future, we would want to assign to it in the upcoming code region where 72 | it's going to be used. 73 | """ 74 | function Future(fut::AbstractFuture, mutation::Function=identity) 75 | fut = convert(Future, fut) 76 | if !fut.stale 77 | # Copy over value 78 | new_future = Future( 79 | deepcopy(mutation(fut.value)), 80 | generate_value_id(), 81 | # If the future is not stale, it is not mutated in a way where 82 | # a further `compute` is needed. So we can just copy its value. 83 | false, 84 | false 85 | ) 86 | 87 | # Copy over location 88 | located(new_future, deepcopy(get_location(fut))) 89 | 90 | new_future 91 | else 92 | Future() 93 | end 94 | end 95 | 96 | # convert(::Type{Future}, value::Any) = Future(value) 97 | convert(::Type{Future}, fut::Future) = fut 98 | 99 | get_location(fut::AbstractFuture) = get(get_job().locations, convert(Future, fut).value_id, nothing) 100 | get_location(value_id::ValueId) = get(get_job().locations, value_id, nothing) 101 | get_future(value_id::ValueId) = get_job().futures_on_client[value_id] 102 | -------------------------------------------------------------------------------- /Banyan/src/Banyan.jl: -------------------------------------------------------------------------------- 1 | # The Banyan client for Julia has 5 key parts: 2 | # - Job 3 | # - Future 4 | # - Location, src, dst, loc 5 | # - pt, pc 6 | # - @partitioned 7 | 8 | module Banyan 9 | 10 | using FilePathsBase: joinpath, isempty 11 | using Base: notnothing, env_project_file 12 | global BANYAN_API_ENDPOINT 13 | 14 | # TODO: Remove this 15 | # export create_job, 16 | # destroy_job, 17 | # JobRequest, 18 | # set_cluster_id, 19 | # set_job_request, 20 | # get_job_id, 21 | # evaluate, 22 | # record_request, 23 | # send_request_get_response 24 | # export Future 25 | # export PartitionAnnotation, 26 | # PartitionType, 27 | # PartitioningConstraint, 28 | # PartitioningConstraints, 29 | # Partitions 30 | # export LocationType 31 | # export DelayedTask 32 | 33 | # export @pa, @pp, @lt, @src, @dst 34 | # export pa_noconstraints 35 | # export Div, Block, Stencil 36 | # export HDF5, Value, Client 37 | # export Cross 38 | # # export Const, Mut 39 | 40 | # include("id.jl") 41 | # include("utils.jl") 42 | # include("jobs.jl") 43 | # include("locations.jl") 44 | # include("futures.jl") 45 | # include("partitions.jl") 46 | # include("queues.jl") 47 | # include("tasks.jl") 48 | # include("pa_constructors.jl") 49 | # include("pt_constructors.jl") 50 | # include("lt_constructors.jl") 51 | # include("constraint_constructors.jl") 52 | # include("macros.jl") 53 | # include("evaluation.jl") 54 | 55 | # Account management 56 | export configure 57 | 58 | # Cluster management 59 | export Cluster, 60 | create_cluster, update_cluster, destroy_cluster, get_clusters, get_cluster, assert_cluster_is_ready 61 | 62 | # Job management 63 | export Job, with_job, create_job, destroy_job, destroy_all_jobs, clear_jobs, get_jobs 64 | 65 | # Futures 66 | export AbstractFuture, Future, compute, collect 67 | 68 | # Samples 69 | export Sample, ExactSample, sample, setsample! 70 | export sample_memory_usage, 71 | sample_axes, 72 | sample_keys, 73 | sample_divisions, 74 | sample_percentile, 75 | sample_max_ngroups, 76 | sample_min, 77 | sample_max 78 | 79 | # Locations 80 | export Location, LocationSource, LocationDestination, located, sourced, destined 81 | export Value, Size, Client, None, Remote 82 | 83 | # Partition types 84 | export PartitionType, pt, pc, mutated, @partitioned 85 | export Any, 86 | Replicating, 87 | Replicated, 88 | Divided, 89 | Syncing, 90 | Reducing, 91 | ReducingWithKey, 92 | Distributing, 93 | Blocked, 94 | Grouped, 95 | ScaledBySame, 96 | Drifted, 97 | Balanced, 98 | Unbalanced, 99 | Blocked, 100 | Grouped 101 | 102 | # Partitioning constraints 103 | export Co, Cross, Equal, Sequential, Match, MatchOn, AtMost, ScaleBy 104 | 105 | # Annotations 106 | export partitioned_using, 107 | partitioned_with, 108 | keep_all_sample_keys, 109 | keep_all_sample_keys_renamed, 110 | keep_sample_keys_named, 111 | keep_sample_keys, 112 | keep_sample_rate 113 | 114 | using AWS: _get_ini_value 115 | using AWSCore 116 | using AWSS3 117 | using AWSSQS 118 | using Base64 119 | using HTTP 120 | using JSON 121 | using Random 122 | using Serialization 123 | using TOML 124 | 125 | using FileIO 126 | using FilePathsBase 127 | using IniFile 128 | 129 | using IterTools 130 | 131 | # TODO: Move locations, samples, and parts of pt_lib.jl and pt_lib_info.json 132 | # into their respective libraries where they can be specialized 133 | using HDF5, CSV, Parquet, Arrow, DataFrames 134 | 135 | # Jobs 136 | include("id.jl") 137 | include("utils.jl") 138 | include("utils_abstract_types.jl") 139 | include("queues.jl") 140 | include("jobs.jl") 141 | include("clusters.jl") 142 | 143 | # Futures 144 | include("future.jl") 145 | include("samples.jl") 146 | include("locations.jl") 147 | include("futures.jl") 148 | 149 | # Annotation 150 | include("partitions.jl") 151 | include("pt_lib_constructors.jl") 152 | include("tasks.jl") 153 | include("annotation.jl") 154 | 155 | # Utilities 156 | include("requests.jl") 157 | 158 | # Job (using locations and futures) 159 | include("job.jl") 160 | 161 | function __init__() 162 | # The user must provide the following for authentication: 163 | # - Username 164 | # - API key 165 | # - AWS credentials 166 | # - SSH key pair (used in cluster creation) 167 | 168 | global BANYAN_API_ENDPOINT 169 | BANYAN_API_ENDPOINT = "https://hcohsbhhzf.execute-api.us-west-2.amazonaws.com/dev/" 170 | 171 | load_config() 172 | end 173 | 174 | end # module 175 | -------------------------------------------------------------------------------- /Banyan/test/data/iris.csv: -------------------------------------------------------------------------------- 1 | "sepal.length","sepal.width","petal.length","petal.width","variety" 2 | 5.1,3.5,1.4,.2,"Setosa" 3 | 4.9,3,1.4,.2,"Setosa" 4 | 4.7,3.2,1.3,.2,"Setosa" 5 | 4.6,3.1,1.5,.2,"Setosa" 6 | 5,3.6,1.4,.2,"Setosa" 7 | 5.4,3.9,1.7,.4,"Setosa" 8 | 4.6,3.4,1.4,.3,"Setosa" 9 | 5,3.4,1.5,.2,"Setosa" 10 | 4.4,2.9,1.4,.2,"Setosa" 11 | 4.9,3.1,1.5,.1,"Setosa" 12 | 5.4,3.7,1.5,.2,"Setosa" 13 | 4.8,3.4,1.6,.2,"Setosa" 14 | 4.8,3,1.4,.1,"Setosa" 15 | 4.3,3,1.1,.1,"Setosa" 16 | 5.8,4,1.2,.2,"Setosa" 17 | 5.7,4.4,1.5,.4,"Setosa" 18 | 5.4,3.9,1.3,.4,"Setosa" 19 | 5.1,3.5,1.4,.3,"Setosa" 20 | 5.7,3.8,1.7,.3,"Setosa" 21 | 5.1,3.8,1.5,.3,"Setosa" 22 | 5.4,3.4,1.7,.2,"Setosa" 23 | 5.1,3.7,1.5,.4,"Setosa" 24 | 4.6,3.6,1,.2,"Setosa" 25 | 5.1,3.3,1.7,.5,"Setosa" 26 | 4.8,3.4,1.9,.2,"Setosa" 27 | 5,3,1.6,.2,"Setosa" 28 | 5,3.4,1.6,.4,"Setosa" 29 | 5.2,3.5,1.5,.2,"Setosa" 30 | 5.2,3.4,1.4,.2,"Setosa" 31 | 4.7,3.2,1.6,.2,"Setosa" 32 | 4.8,3.1,1.6,.2,"Setosa" 33 | 5.4,3.4,1.5,.4,"Setosa" 34 | 5.2,4.1,1.5,.1,"Setosa" 35 | 5.5,4.2,1.4,.2,"Setosa" 36 | 4.9,3.1,1.5,.2,"Setosa" 37 | 5,3.2,1.2,.2,"Setosa" 38 | 5.5,3.5,1.3,.2,"Setosa" 39 | 4.9,3.6,1.4,.1,"Setosa" 40 | 4.4,3,1.3,.2,"Setosa" 41 | 5.1,3.4,1.5,.2,"Setosa" 42 | 5,3.5,1.3,.3,"Setosa" 43 | 4.5,2.3,1.3,.3,"Setosa" 44 | 4.4,3.2,1.3,.2,"Setosa" 45 | 5,3.5,1.6,.6,"Setosa" 46 | 5.1,3.8,1.9,.4,"Setosa" 47 | 4.8,3,1.4,.3,"Setosa" 48 | 5.1,3.8,1.6,.2,"Setosa" 49 | 4.6,3.2,1.4,.2,"Setosa" 50 | 5.3,3.7,1.5,.2,"Setosa" 51 | 5,3.3,1.4,.2,"Setosa" 52 | 7,3.2,4.7,1.4,"Versicolor" 53 | 6.4,3.2,4.5,1.5,"Versicolor" 54 | 6.9,3.1,4.9,1.5,"Versicolor" 55 | 5.5,2.3,4,1.3,"Versicolor" 56 | 6.5,2.8,4.6,1.5,"Versicolor" 57 | 5.7,2.8,4.5,1.3,"Versicolor" 58 | 6.3,3.3,4.7,1.6,"Versicolor" 59 | 4.9,2.4,3.3,1,"Versicolor" 60 | 6.6,2.9,4.6,1.3,"Versicolor" 61 | 5.2,2.7,3.9,1.4,"Versicolor" 62 | 5,2,3.5,1,"Versicolor" 63 | 5.9,3,4.2,1.5,"Versicolor" 64 | 6,2.2,4,1,"Versicolor" 65 | 6.1,2.9,4.7,1.4,"Versicolor" 66 | 5.6,2.9,3.6,1.3,"Versicolor" 67 | 6.7,3.1,4.4,1.4,"Versicolor" 68 | 5.6,3,4.5,1.5,"Versicolor" 69 | 5.8,2.7,4.1,1,"Versicolor" 70 | 6.2,2.2,4.5,1.5,"Versicolor" 71 | 5.6,2.5,3.9,1.1,"Versicolor" 72 | 5.9,3.2,4.8,1.8,"Versicolor" 73 | 6.1,2.8,4,1.3,"Versicolor" 74 | 6.3,2.5,4.9,1.5,"Versicolor" 75 | 6.1,2.8,4.7,1.2,"Versicolor" 76 | 6.4,2.9,4.3,1.3,"Versicolor" 77 | 6.6,3,4.4,1.4,"Versicolor" 78 | 6.8,2.8,4.8,1.4,"Versicolor" 79 | 6.7,3,5,1.7,"Versicolor" 80 | 6,2.9,4.5,1.5,"Versicolor" 81 | 5.7,2.6,3.5,1,"Versicolor" 82 | 5.5,2.4,3.8,1.1,"Versicolor" 83 | 5.5,2.4,3.7,1,"Versicolor" 84 | 5.8,2.7,3.9,1.2,"Versicolor" 85 | 6,2.7,5.1,1.6,"Versicolor" 86 | 5.4,3,4.5,1.5,"Versicolor" 87 | 6,3.4,4.5,1.6,"Versicolor" 88 | 6.7,3.1,4.7,1.5,"Versicolor" 89 | 6.3,2.3,4.4,1.3,"Versicolor" 90 | 5.6,3,4.1,1.3,"Versicolor" 91 | 5.5,2.5,4,1.3,"Versicolor" 92 | 5.5,2.6,4.4,1.2,"Versicolor" 93 | 6.1,3,4.6,1.4,"Versicolor" 94 | 5.8,2.6,4,1.2,"Versicolor" 95 | 5,2.3,3.3,1,"Versicolor" 96 | 5.6,2.7,4.2,1.3,"Versicolor" 97 | 5.7,3,4.2,1.2,"Versicolor" 98 | 5.7,2.9,4.2,1.3,"Versicolor" 99 | 6.2,2.9,4.3,1.3,"Versicolor" 100 | 5.1,2.5,3,1.1,"Versicolor" 101 | 5.7,2.8,4.1,1.3,"Versicolor" 102 | 6.3,3.3,6,2.5,"Virginica" 103 | 5.8,2.7,5.1,1.9,"Virginica" 104 | 7.1,3,5.9,2.1,"Virginica" 105 | 6.3,2.9,5.6,1.8,"Virginica" 106 | 6.5,3,5.8,2.2,"Virginica" 107 | 7.6,3,6.6,2.1,"Virginica" 108 | 4.9,2.5,4.5,1.7,"Virginica" 109 | 7.3,2.9,6.3,1.8,"Virginica" 110 | 6.7,2.5,5.8,1.8,"Virginica" 111 | 7.2,3.6,6.1,2.5,"Virginica" 112 | 6.5,3.2,5.1,2,"Virginica" 113 | 6.4,2.7,5.3,1.9,"Virginica" 114 | 6.8,3,5.5,2.1,"Virginica" 115 | 5.7,2.5,5,2,"Virginica" 116 | 5.8,2.8,5.1,2.4,"Virginica" 117 | 6.4,3.2,5.3,2.3,"Virginica" 118 | 6.5,3,5.5,1.8,"Virginica" 119 | 7.7,3.8,6.7,2.2,"Virginica" 120 | 7.7,2.6,6.9,2.3,"Virginica" 121 | 6,2.2,5,1.5,"Virginica" 122 | 6.9,3.2,5.7,2.3,"Virginica" 123 | 5.6,2.8,4.9,2,"Virginica" 124 | 7.7,2.8,6.7,2,"Virginica" 125 | 6.3,2.7,4.9,1.8,"Virginica" 126 | 6.7,3.3,5.7,2.1,"Virginica" 127 | 7.2,3.2,6,1.8,"Virginica" 128 | 6.2,2.8,4.8,1.8,"Virginica" 129 | 6.1,3,4.9,1.8,"Virginica" 130 | 6.4,2.8,5.6,2.1,"Virginica" 131 | 7.2,3,5.8,1.6,"Virginica" 132 | 7.4,2.8,6.1,1.9,"Virginica" 133 | 7.9,3.8,6.4,2,"Virginica" 134 | 6.4,2.8,5.6,2.2,"Virginica" 135 | 6.3,2.8,5.1,1.5,"Virginica" 136 | 6.1,2.6,5.6,1.4,"Virginica" 137 | 7.7,3,6.1,2.3,"Virginica" 138 | 6.3,3.4,5.6,2.4,"Virginica" 139 | 6.4,3.1,5.5,1.8,"Virginica" 140 | 6,3,4.8,1.8,"Virginica" 141 | 6.9,3.1,5.4,2.1,"Virginica" 142 | 6.7,3.1,5.6,2.4,"Virginica" 143 | 6.9,3.1,5.1,2.3,"Virginica" 144 | 5.8,2.7,5.1,1.9,"Virginica" 145 | 6.8,3.2,5.9,2.3,"Virginica" 146 | 6.7,3.3,5.7,2.5,"Virginica" 147 | 6.7,3,5.2,2.3,"Virginica" 148 | 6.3,2.5,5,1.9,"Virginica" 149 | 6.5,3,5.2,2,"Virginica" 150 | 6.2,3.4,5.4,2.3,"Virginica" 151 | 5.9,3,5.1,1.8,"Virginica" -------------------------------------------------------------------------------- /Banyan/res/pt_lib_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "splits": { 3 | "ReturnNull": { 4 | "location_names": ["None"], 5 | "partition_memory_usage": "all" 6 | }, 7 | "ReadBlock": { 8 | "expected": ["key"], 9 | "required": {"name": "Distributing", "distribution": "blocked", "id": "!"}, 10 | "default": {}, 11 | "location_names": ["Remote", "Disk"], 12 | "partition_memory_usage": "div" 13 | }, 14 | "ReadGroup": { 15 | "expected": ["key", "divisions"], 16 | "required": {"name": "Distributing", "distribution": "grouped", "id": "!"}, 17 | "default": {"rev": false}, 18 | "location_names": ["Remote", "Disk"], 19 | "partition_memory_usage": "div" 20 | }, 21 | "SplitBlock": { 22 | "expected": ["key"], 23 | "required": {"name": "Distributing", "distribution": "blocked", "id": "!"}, 24 | "default": {}, 25 | "location_names": ["Memory"], 26 | "partition_memory_usage": "div" 27 | }, 28 | "SplitGroup": { 29 | "expected": ["key"], 30 | "required": {"name": "Distributing", "distribution": "grouped", "id": "!"}, 31 | "default": {"rev": false}, 32 | "location_names": ["Memory"], 33 | "partition_memory_usage": "div" 34 | }, 35 | "CopyFrom": { 36 | "required": {"name": "Replicating", "dividing": false}, 37 | "default": {}, 38 | "location_names": ["Client", "Remote", "Value", "Disk", "Memory"], 39 | "partition_memory_usage": "all" 40 | }, 41 | "Divide": { 42 | "expected": ["key"], 43 | "required": {"name": "Replicating", "dividing": true}, 44 | "default": {}, 45 | "location_names": ["Value", "Disk", "Memory"], 46 | "partition_memory_usage": "all" 47 | } 48 | }, 49 | "merges": { 50 | "Write": { 51 | "required": {"name": "Distributing"}, 52 | "default": {}, 53 | "location_names": ["Remote", "Disk"] 54 | }, 55 | "Merge": { 56 | "expected": ["key"], 57 | "required": {"name": "Distributing"}, 58 | "default": {}, 59 | "location_names": ["Memory"] 60 | }, 61 | "CopyTo": { 62 | "required": {"name": "Replicating", "replication": "all"}, 63 | "default": {}, 64 | "location_names": ["Memory", "Disk", "Remote", "Client"] 65 | }, 66 | "ReduceAndCopyTo": { 67 | "expected": ["reducer"], 68 | "required": {"name": "Replicating", "replication": null, "with_key": false}, 69 | "default": {}, 70 | "location_names": ["Memory", "Disk", "Remote"] 71 | }, 72 | "ReduceWithKeyAndCopyTo": { 73 | "expected": ["key", "reducer"], 74 | "required": {"name": "Replicating", "replication": null, "with_key": true}, 75 | "default": {}, 76 | "location_names": ["Memory", "Disk", "Remote"] 77 | } 78 | }, 79 | "casts": { 80 | "Reduce": { 81 | "src_expected": ["reducer"], 82 | "src_required": {"name": "Replicating", "replication": null, "with_key": false}, 83 | "dst_required": {"name": "Replicating", "replication": "all"}, 84 | "dst_default": {}, 85 | "matching": [] 86 | }, 87 | "ReduceWithKey": { 88 | "src_expected": ["reducer", "key"], 89 | "src_required": {"name": "Replicating", "replication": null, "with_key": true}, 90 | "dst_required": {"name": "Replicating", "replication": "all"}, 91 | "dst_default": {}, 92 | "matching": [] 93 | }, 94 | "Rebalance": { 95 | "src_required": {"name": "Distributing"}, 96 | "dst_required": {"name": "Distributing", "distribution": "blocked", "balanced": true, "id": "!"}, 97 | "dst_default": {}, 98 | "matching": ["dim"] 99 | }, 100 | "Distribute": { 101 | "src_required": {"name": "Replicating", "replication": "all"}, 102 | "dst_required": {"name": "Distributing", "distribution": "blocked", "id": "!"}, 103 | "dst_expected": ["key"], 104 | "dst_default": {}, 105 | "matching": [] 106 | }, 107 | "Consolidate": { 108 | "src_expected": ["key"], 109 | "src_required": {"name": "Distributing"}, 110 | "dst_required": {"name": "Replicating", "replication": "all"}, 111 | "dst_default": {}, 112 | "matching": [] 113 | }, 114 | "Shuffle": { 115 | "src_required": {"name": "Distributing"}, 116 | "dst_required": {"name": "Distributing", "distribution": "grouped", "id": "!"}, 117 | "dst_default": {"rev": false}, 118 | "dst_expected": ["key", "divisions"], 119 | "matching": [] 120 | }, 121 | "DistributeAndShuffle": { 122 | "src_required": {"name": "Replicating", "replication": "all"}, 123 | "dst_required": {"name": "Distributing", "distribution": "grouped", "id": "!"}, 124 | "dst_default": {"rev": false}, 125 | "dst_expected": ["key", "divisions"], 126 | "matching": [] 127 | } 128 | } 129 | } -------------------------------------------------------------------------------- /Banyan/src/jobs.jl: -------------------------------------------------------------------------------- 1 | ######## 2 | # Jobs # 3 | ######## 4 | 5 | # Process-local dictionary mapping from job IDs to instances of `Job` 6 | global jobs = Dict() 7 | 8 | # TODO: Allow for different threads to use different jobs by making this 9 | # thread-local. For now, we only allow a single `Job` for each process 10 | # and no sharing between threads; i.e., the burden is on the user to make 11 | # sure they are synchronizing access to the `Job` if using the same one from 12 | # different threads. 13 | # TODO: Allow for different threads to use the same job by wrapping each 14 | # `Job` in `jobs` in a mutex to allow only one to use it at a time. Further 15 | # modifications would be required to make sharing a job between threads 16 | # ergonomic. 17 | global current_job_id = nothing 18 | global current_job_status = nothing 19 | 20 | function set_job_id(job_id::Union{JobId, Nothing}) 21 | global current_job_id 22 | current_job_id = job_id 23 | end 24 | 25 | function get_job_id()::JobId 26 | global current_job_id 27 | if isnothing(current_job_id) 28 | error("No job selected using `with_job` or `create_job` or `set_job_id`") 29 | end 30 | current_job_id 31 | end 32 | 33 | function get_job() 34 | global jobs 35 | jobs[get_job_id()] 36 | end 37 | 38 | get_cluster_name() = get_job().cluster_name 39 | 40 | function create_job(; 41 | cluster_name::String = nothing, 42 | nworkers::Integer = 2, 43 | banyanfile_path::String = "", 44 | logs_location::String = "", 45 | sample_rate::Integer = nworkers, 46 | kwargs..., 47 | ) 48 | global jobs 49 | global current_job_id 50 | global current_job_status 51 | 52 | @debug "Creating job" 53 | if cluster_name == "" 54 | cluster_name = nothing 55 | end 56 | if banyanfile_path == "" 57 | banyanfile_path = nothing 58 | end 59 | if logs_location == "" 60 | logs_location = "client" 61 | end 62 | 63 | # Configure 64 | configure(; kwargs...) 65 | cluster_name = if isnothing(cluster_name) 66 | clusters = list_clusters() 67 | if length(clusters) == 0 68 | error("Failed to create job: you don't have any clusters created") 69 | end 70 | first(keys(clusters)) 71 | else 72 | cluster_name 73 | end 74 | 75 | # Merge Banyanfile if provided 76 | job_configuration = Dict{String,Any}( 77 | "cluster_name" => cluster_name, 78 | "num_workers" => nworkers, 79 | "logs_location" => "s3", #logs_location, 80 | ) 81 | if !isnothing(banyanfile_path) 82 | banyanfile = load_json(banyanfile_path) 83 | merge_banyanfile_with_defaults!(banyanfile) 84 | for included in banyanfile["include"] 85 | merge_banyanfile_with!(banyanfile, getnormpath(banyanfile_path, included), :job, :creation) 86 | end 87 | job_configuration["banyanfile"] = banyanfile 88 | end 89 | 90 | # Create the job 91 | @debug "Sending request for job creation" 92 | job_id = send_request_get_response(:create_job, job_configuration) 93 | job_id = job_id["job_id"] 94 | @debug "Creating job $job_id" 95 | 96 | # Store in global state 97 | current_job_id = job_id 98 | current_job_status = "running" 99 | jobs[current_job_id] = Job(cluster_name, current_job_id, nworkers, sample_rate) 100 | 101 | @debug "Finished creating job $job_id" 102 | return job_id 103 | end 104 | 105 | function destroy_job(job_id::JobId; failed = false, kwargs...) 106 | global current_job_id 107 | global current_job_status 108 | 109 | failed = false 110 | if current_job_status == "failed" 111 | failed = true 112 | end 113 | 114 | 115 | # configure(; kwargs...) 116 | 117 | @debug "Destroying job $job_id" 118 | send_request_get_response( 119 | :destroy_job, 120 | Dict{String,Any}("job_id" => job_id, "failed" => failed), 121 | ) 122 | 123 | # Remove from global state 124 | if !isnothing(current_job_id) && get_job_id() == job_id 125 | set_job_id(nothing) 126 | end 127 | delete!(jobs, job_id) 128 | end 129 | 130 | function get_jobs(cluster_name=Nothing; kwargs...) 131 | @debug "Downloading description of jobs in each cluster" 132 | configure(; kwargs...) 133 | filters = Dict() 134 | if cluster_name != Nothing 135 | filters["cluster_name"] = cluster_name 136 | end 137 | response = 138 | send_request_get_response(:describe_jobs, Dict{String,Any}("filters"=>filters)) 139 | response["jobs"] 140 | end 141 | 142 | function destroy_all_jobs(cluster_name::String; kwargs...) 143 | @debug "Destroying all jobs for cluster" 144 | configure(; kwargs...) 145 | jobs = get_jobs(cluster_name) 146 | for (job_id, job) in jobs 147 | if job["status"] == "running" 148 | destroy_job(job_id; kwargs...) 149 | end 150 | end 151 | end 152 | 153 | # destroy_job() = destroy_job(get_job_id()) 154 | 155 | # mutable struct Job 156 | # job_id::JobId 157 | # failed::Bool 158 | 159 | # # function Job(; kwargs...) 160 | # # new_job_id = create_job(; kwargs...) 161 | # # #new_job_id = create_job(;cluster_name="banyancluster", nworkers=2) 162 | # # new_job = new(new_job_id) 163 | # # finalizer(new_job) do j 164 | # # destroy_job(j.job_id) 165 | # # end 166 | 167 | # # new_job 168 | # # end 169 | # end 170 | 171 | function with_job(f::Function; kwargs...) 172 | # This is not a constructor; this is just a function that ensures that 173 | # every job is always destroyed even in the case of an error 174 | j = create_job(;kwargs...) 175 | j_destroyed = false 176 | try 177 | f(j) 178 | catch err 179 | destroy_job(j) 180 | j_destroyed = true 181 | rethrow(err) 182 | finally 183 | if !j_destroyed 184 | destroy_job(j) 185 | end 186 | end 187 | end 188 | 189 | function clear_jobs() 190 | global jobs 191 | global current_job_id 192 | if !isnothing(current_job_id) 193 | empty!(jobs[current_job_id].pending_requests) 194 | end 195 | end 196 | 197 | # TODO: Fix bug causing nbatches to be 2 when it should be 25 198 | # TODO: Fix finalizer of Job 199 | -------------------------------------------------------------------------------- /Banyan/test/test_cluster.jl: -------------------------------------------------------------------------------- 1 | @testset "Cluster Management" begin 2 | run("Configuration") do 3 | configure( 4 | username = "adminuser", 5 | api_key = "a41ef8a693682dd93189e71676b2cdc9", 6 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 7 | region = "us-west-2", 8 | ) 9 | end 10 | 11 | # TODO: Add tests for creating, destroying cluster and updating with more 12 | # complex Banyanfiles. The point of additional tests for updating is to 13 | # ensure that we parse, merge, and load Banyanfiles correctly and such 14 | # tests should cover all the different fields in a Banyanfile including 15 | # `includes` for example. 16 | 17 | run("Updating Cluster") do 18 | update_cluster( 19 | name = "c0416", 20 | banyanfile_path = "file://res/Banyanfile.json", 21 | ) 22 | end 23 | end 24 | 25 | 26 | # Tests create_cluster and calls get_clusters to ensure correct behavior 27 | # expected_presence (bool): indicates whether the cluster should be listed 28 | # expected_status (bool): indicates expected status if cluster should be listed 29 | # kwargs : arguments for create_cluster 30 | function test_create_cluster(expected_presence, expected_status; kwargs...) 31 | create_cluster(; kwargs...) 32 | clusters = get_clusters() 33 | @test haskey(clusters, name) == expected_presence 34 | if (haskey(clusters, kwargs[:name])) 35 | @test clusters[kwargs[:name]].status == expected_status 36 | end 37 | end 38 | 39 | 40 | @testset "Cluster Creation - Should Fail Immediately" begin 41 | run("createcluster_bad_username") do 42 | configure( 43 | username = "BadUser", 44 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 45 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 46 | region = "us-west-2", 47 | ) 48 | test_create_cluster( 49 | false, 50 | ""; 51 | name = "badcluster", 52 | instance_type = "t3.large", 53 | banyanfile_path = "file://res/Banyanfile.json", 54 | ) 55 | end 56 | run("createcluster_bad_api_key") do 57 | configure( 58 | username = "BanyanTest", 59 | api_key = "invalidapikey", 60 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 61 | region = "us-west-2", 62 | ) 63 | test_create_cluster( 64 | false, 65 | ""; 66 | name = "badcluster", 67 | instance_type = "t3.large", 68 | banyanfile_path = "file://res/Banyanfile.json", 69 | ) 70 | end 71 | run("createcluster_bad_ec2_key_pair_name") do 72 | configure( 73 | username = "BanyanTest", 74 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 75 | ec2_key_pair_name = "NoEC2KeyPair", 76 | region = "us-west-2", 77 | ) 78 | test_create_cluster( 79 | false, 80 | ""; 81 | name = "badcluster", 82 | instance_type = "t3.large", 83 | banyanfile_path = "file://res/Banyanfile.json", 84 | ) 85 | end 86 | run("createcluster_bad_region") do 87 | configure( 88 | username = "BanyanTest", 89 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 90 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 91 | region = "noregion", 92 | ) 93 | test_create_cluster( 94 | false, 95 | ""; 96 | name = "badcluster", 97 | instance_type = "t3.large", 98 | banyanfile_path = "file://res/Banyanfile.json", 99 | ) 100 | end 101 | run("createcluster_bad_instance_type") do 102 | configure( 103 | username = "BanyanTest", 104 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 105 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 106 | region = "us-west-2", 107 | ) 108 | test_create_cluster( 109 | false, 110 | ""; 111 | name = "badcluster", 112 | instance_type = "a1.metal", 113 | banyanfile_path = "file://res/Banyanfile.json", 114 | ) 115 | end 116 | run("createcluster_bad_banyanfile") do 117 | configure( 118 | username = "BanyanTest", 119 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 120 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 121 | region = "uw-west-2", 122 | ) 123 | test_create_cluster( 124 | false, 125 | ""; 126 | name = "badcluster", 127 | instance_type = "t3.large", 128 | banyanfile_path = "file://res/banyanfile_badcluster.json", 129 | ) 130 | end 131 | end 132 | 133 | 134 | @testset "Cluster Creation" begin 135 | run("createcluster_region") do 136 | configure( 137 | username = "BanyanTest", 138 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 139 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 140 | region = "us-east-1", 141 | ) 142 | test_create_cluster( 143 | true, 144 | :creating; 145 | name = "cluster_useast1", 146 | instance_type = "t3.large", 147 | banyanfile_path = "file://res/Banyanfile.json", 148 | ) 149 | end 150 | # run( 151 | # "createcluster_s3bucket", 152 | # () -> begin 153 | # configure(; 154 | # username = "BanyanTest", 155 | # api_key = "7FBKWAv3ld0eOfghSwhX_g", 156 | # ec2_key_pair_name = "EC2ConnectKeyPairTest", 157 | # region = "us-east-1", 158 | # ) 159 | # test_create_cluster( 160 | # true, 161 | # :creating, 162 | # name = "cluster_useast1", 163 | # instance_type = "t3.large", 164 | # banyanfile_path = "file://res/Banyanfile.json", 165 | # s3_bucket_arn = "TODOTODOTODTODOTO", 166 | # ) 167 | # end, 168 | # ) 169 | run("createcluster_iam") do 170 | configure( 171 | username = "BanyanTest", 172 | api_key = "7FBKWAv3ld0eOfghSwhX_g", 173 | ec2_key_pair_name = "EC2ConnectKeyPairTest", 174 | region = "us-east-1", 175 | ) 176 | test_create_cluster( 177 | true, 178 | :creating; 179 | name = "cluster_useast1", 180 | instance_type = "t3.large", 181 | banyanfile_path = "file://res/Banyanfile.json", 182 | s3_bucket_arn = "TODOTODOTODTODOTO", 183 | ) 184 | end 185 | end 186 | -------------------------------------------------------------------------------- /Banyan/src/samples.jl: -------------------------------------------------------------------------------- 1 | ############################################################### 2 | # Sample that caches properties returned by an AbstractSample # 3 | ############################################################### 4 | 5 | mutable struct Sample 6 | # The sample itself 7 | value::Any 8 | # Properties of the sample 9 | properties::Dict{Symbol,Any} 10 | 11 | function Sample( 12 | value::Any = nothing; 13 | properties::Dict{Symbol,Any} = Dict{Symbol,Any}(), 14 | sample_rate=get_job().sample_rate, 15 | total_memory_usage=nothing 16 | ) 17 | newsample = new(value, properties) 18 | 19 | # Fill in properties if possible 20 | if !isnothing(total_memory_usage) 21 | setsample!(newsample, :memory_usage, round(total_memory_usage / sample_rate)) 22 | end 23 | setsample!(newsample, :rate, sample_rate) 24 | 25 | newsample 26 | end 27 | # TODO: Un-comment if needed 28 | # Sample(sample::Sample, properties::Vector{String}) = 29 | # new(sample.value, sample.stale, Dict( 30 | # sample.sample_properties[prop] 31 | # for prop in properties 32 | # )) 33 | end 34 | 35 | ExactSample(value::Any = nothing; kwargs...) = Sample(value; sample_rate=1, kwargs...) 36 | 37 | # TODO: Lazily compute samples by storing sample computation in a DAG if its 38 | # getting too expensive 39 | sample(fut::AbstractFuture) = sample(get_location(fut).sample) 40 | sample(sample::Sample) = sample.value 41 | 42 | sample(fut::AbstractFuture, propertykeys...) = sample(get_location(fut).sample, propertykeys...) 43 | function sample(s::Sample, propertykeys...) 44 | properties = s.properties 45 | for (i, propertykey) in enumerate(propertykeys) 46 | properties = get!( 47 | properties, 48 | propertykey, 49 | if i < length(propertykeys) 50 | Dict() 51 | else 52 | sample(s.value, propertykeys...) 53 | end 54 | ) 55 | end 56 | properties 57 | end 58 | 59 | setsample!(fut::AbstractFuture, value) = setsample!(get_location(fut).sample, value) 60 | function setsample!(sample::Sample, value) 61 | sample.value = value 62 | end 63 | 64 | setsample!(fut::AbstractFuture, propertykeys...) = setsample!(get_location(fut).sample, propertykeys...) 65 | function setsample!(sample::Sample, propertykeys...) 66 | if length(propertykeys) == 1 67 | setsample!(sample, first(propertykeys)) 68 | else 69 | properties = sample.properties 70 | propertyvalue = last(propertykeys) 71 | propertykeys = propertykeys[1:end-1] 72 | for (i, propertykey) in enumerate(propertykeys) 73 | if i < length(propertykeys) 74 | properties = get!(properties, propertykey, Dict()) 75 | end 76 | end 77 | properties[last(propertykeys)] = propertyvalue 78 | end 79 | end 80 | 81 | # TODO: For futures with locations like Size, "scale up" the computed sample 82 | # for a useful approximation of things like length of an array 83 | 84 | #################################################################### 85 | # AbstractSample to be implemented by anything that can be sampled # 86 | #################################################################### 87 | 88 | # NOTE: We use strings for things that will be serialized to JSON and symbols 89 | # for everything else 90 | 91 | # NOTE: We use upper-camel-case for user-facing names (like names of PTs) and 92 | # all-caps-snake-case for anything internal (like names of constraints) 93 | 94 | # The purpose of the `sample` function is to allow for computing various 95 | # properties of the sample by property key name instead of an explicit 96 | # function call. This makes it easier for the `sample` and `setsample!` 97 | # functions for `Future`s to compute and cache samples. 98 | 99 | sample(as::Any, properties...) = 100 | if length(properties) == 1 101 | if first(properties) == :memory_usage 102 | sample_memory_usage(as) 103 | elseif first(properties) == :rate 104 | # This is the default but the `Sample` constructor overrides this 105 | # before-hand to allow some samples to be "exact" with a sample 106 | # rate of 1 107 | get_job().sample_rate 108 | elseif first(properties) == :keys 109 | sample_keys(as) 110 | elseif first(properties) == :axes 111 | sample_axes(as) 112 | elseif first(properties) == :groupingkeys 113 | # This is just the initial value for grouping keys. Calls to 114 | # `keep_*` functions will expand it. 115 | [] 116 | else 117 | # println(typeof(as)) 118 | # println(typeof(as) <: Any) 119 | throw(ArgumentError("Invalid sample properties: $properties")) 120 | end 121 | elseif length(properties) <= 2 && first(properties) == :statistics 122 | Dict() 123 | elseif length(properties) == 3 && first(properties) == :statistics 124 | key = properties[2] 125 | query = properties[3] 126 | if query == :max_ngroups 127 | sample_max_ngroups(as, key) 128 | elseif query == :divisions 129 | sample_divisions(as, key) 130 | elseif query == :min 131 | sample_min(as, key) 132 | elseif query == :max 133 | sample_max(as, key) 134 | else 135 | throw(ArgumentError("Invalid sample properties: $properties")) 136 | end 137 | elseif length(properties) == 5 && first(properties) == :statistics 138 | key, query, minvalue, maxvalue = properties[2:end] 139 | if query == :percentile 140 | sample_percentile(as, key, minvalue, maxvalue) 141 | else 142 | throw(ArgumentError("Invalid sample properties: $properties")) 143 | end 144 | else 145 | throw(ArgumentError("Invalid sample properties: $properties")) 146 | end 147 | 148 | sample_memory_usage(as::Any) = total_memory_usage(as) 149 | 150 | # Implementation error 151 | impl_error(fn_name, as) = error("$fn_name not implemented for $(typeof(as))") 152 | 153 | # Functions to implement for Any (e.g., for DataFrame or 154 | # Array) 155 | sample_axes(as::Any) = impl_error("sample_axes", as) 156 | sample_keys(as::Any) = impl_error("sample_keys", as) 157 | sample_divisions(as::Any, key) = impl_error("sample_divisions", as) 158 | sample_percentile(as::Any, key, minvalue, maxvalue) = impl_error("sample_percentile", as) 159 | sample_max_ngroups(as::Any, key) = impl_error("sample_max_ngroups", as) 160 | sample_min(as::Any, key) = impl_error("sample_min", as) 161 | sample_max(as::Any, key) = impl_error("sample_max", as) -------------------------------------------------------------------------------- /Banyan/src/partitions.jl: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Partition type references # 3 | ############################# 4 | 5 | const PartitionTypeReference = Tuple{ValueId,Integer} 6 | 7 | ############################ 8 | # Partitioning constraints # 9 | ############################ 10 | 11 | pt_ref_to_jl(pt_ref) = 12 | if pt_ref isa Tuple 13 | (convert(Future, pt_ref[1]).value_id, pt_ref[2] - 1) 14 | else 15 | (convert(Future, pt_ref).value_id, 0) 16 | end 17 | 18 | pt_refs_to_jl(refs) = 19 | [pt_ref_to_jl(ref) for ref in refs] 20 | 21 | struct PartitioningConstraintOverGroup 22 | type::String 23 | args::Vector{PartitionTypeReference} 24 | end 25 | 26 | struct PartitioningConstraintOverGroups 27 | type::String 28 | args::Vector{Vector{PartitionTypeReference}} 29 | end 30 | 31 | const PartitioningConstraint = Union{PartitioningConstraintOverGroup, PartitioningConstraintOverGroups} 32 | 33 | to_jl(pc::PartitioningConstraint) = Dict("type" => pc.type, "args" => pc.args) 34 | 35 | arg_to_jl_for_co(arg) = 36 | if arg isa Vector 37 | pt_refs_to_jl(arg) 38 | else 39 | [pt_ref_to_jl(arg)] 40 | end 41 | 42 | function constraint_for_co(args)::PartitioningConstraintOverGroups 43 | if any(arg isa Vector for arg in args) 44 | args = [arg_to_jl_for_co(arg) for arg in args] 45 | PartitioningConstraintOverGroups("CO_GROUP", args) 46 | else 47 | PartitioningConstraintOverGroups("CO", pt_refs_to_jl(args)) 48 | end 49 | end 50 | 51 | # TODO: Support Ordered 52 | Co(args...) = constraint_for_co(args) 53 | Cross(args...) = PartitioningConstraintOverGroup("CROSS", pt_refs_to_jl(args)) 54 | Equal(args...) = PartitioningConstraintOverGroup("EQUAL", pt_refs_to_jl(args)) 55 | Sequential(args...) = 56 | PartitioningConstraintOverGroup("SEQUENTIAL", pt_refs_to_jl(args)) 57 | Match(args...) = PartitioningConstraintOverGroup("MATCH", pt_refs_to_jl(args)) 58 | MatchOn(on, args...) = 59 | PartitioningConstraintOverGroup( 60 | "MATCH_ON=" * string(on), 61 | pt_refs_to_jl(args), 62 | ) 63 | AtMost(npartitions, args...) = 64 | PartitioningConstraintOverGroup( 65 | "AT_MOST=$npartitions", 66 | pt_refs_to_jl(args) 67 | ) 68 | ScaleBy(arg, factor::Real = 1.0, relative_to...) = 69 | PartitioningConstraintOverGroup( 70 | "SCALE_BY=$factor", 71 | pt_refs_to_jl([arg; relative_to...]) 72 | ) 73 | 74 | # TODO: Make the above constraint constructors produce dictionaries that have 75 | # fields that make sense and are specialized for each one. This will reduce 76 | # a significant amount of messy and hard-to-read code here (e.g., what in the 77 | # world isa PartitioningConstraintOverGroup vs. a 78 | # PartitioningConstraintOverGroups). 79 | 80 | # NOTE: ScaleBy constraints accept PT references but only the values of PT 81 | # references where the index is 1 are taken because we only scale relative 82 | # to the memory usage that is split by the first PT in the PT compositions 83 | # referenced 84 | 85 | # NOTE: If you require a constraint for a particular PT, the onus is on you to 86 | # ensure that whereever you use a value with that PT assigned, you always 87 | # have the PTs that are referenced by the constraint. For example, if you use 88 | # an AtMost constraint which references both PTs from a PT composition for a 89 | # value where the first PT splits across workers and the second across batches, 90 | # you need to ensure that anywhere you use the value, you actually do have a 91 | # PT composition of length 2. 92 | 93 | # NOTE: Currently, only AtMost and ScaleBy are supported as PT-level 94 | # constraints (meaning they are included as part of a PT so that the PT 95 | # cannot be applied to a variable unless the constraints are also be enforced) 96 | # while ScaleBy may not be used as PA-level constraints (constraints that are 97 | # applicable only for a single code region annotated with a PA) 98 | 99 | mutable struct PartitioningConstraints 100 | constraints::Vector{Union{PartitioningConstraint, Function}} 101 | end 102 | 103 | PartitioningConstraints() = PartitioningConstraints([]) 104 | 105 | function to_jl(constraints::PartitioningConstraints) 106 | return Dict( 107 | "constraints" => 108 | [to_jl(constraint) for constraint in constraints.constraints], 109 | ) 110 | end 111 | 112 | ################### 113 | # Partition types # 114 | ################### 115 | 116 | const PartitionTypeParameters = Dict{String, Any} 117 | 118 | mutable struct PartitionType 119 | parameters::PartitionTypeParameters 120 | constraints::PartitioningConstraints 121 | 122 | PartitionType( 123 | parameters::Dict{String, <:Any} = PartitionTypeParameters(), 124 | constraints::PartitioningConstraints = PartitioningConstraints(), 125 | ) = new(parameters, constraints) 126 | PartitionType(s::String) = new(Dict("name" => s), PartitioningConstraints()) 127 | PartitionType(parameters::PartitionTypeParameters) = new(parameters, PartitioningConstraints()) 128 | 129 | function PartitionType(args::Union{String, Pair{String,<:Any}, PartitioningConstraint, Function}...) 130 | parameters = Dict() 131 | constraints = PartitioningConstraints() 132 | 133 | # Construct parameters and constraints from arguments 134 | for arg in args 135 | if arg isa String 136 | parameters["name"] = arg 137 | elseif arg isa Pair 138 | parameters[first(arg)] = last(arg) 139 | elseif arg isa PartitioningConstraint || arg isa Function 140 | push!(constraints.constraints, arg) 141 | else 142 | throw(ArgumentError("Expected either a partition type parameter or constraint")) 143 | end 144 | end 145 | 146 | new(parameters, constraints) 147 | end 148 | end 149 | 150 | # We probably need this so we can iterate over PTs produced by Grouped and then 151 | # check the key property 152 | function Base.getproperty(pt::PartitionType, name::Symbol) 153 | if hasfield(PartitionType, name) 154 | return getfield(pt, name) 155 | end 156 | 157 | n = string(name) 158 | if haskey(pt.parameters, n) 159 | return pt.parameters[n] 160 | end 161 | error("$name not found in partition type parameters") 162 | end 163 | 164 | function to_jl(pt::PartitionType) 165 | # Interpret bangs as random IDs 166 | # TODO: Use this in the backend to interpret pt_lib_info.json 167 | for (k, v) in pt.parameters 168 | if v == "!" 169 | pt.parameters[k] = randstring(8) 170 | end 171 | end 172 | 173 | # Construct dictionary 174 | Dict("parameters" => pt.parameters, "constraints" => to_jl(pt.constraints)) 175 | end 176 | 177 | ############################## 178 | # Partition type composition # 179 | ############################## 180 | 181 | # This is mutable so that we can append PTs 182 | mutable struct PartitionTypeComposition 183 | pts::Vector{PartitionType} 184 | end 185 | 186 | to_jl(ptc::PartitionTypeComposition) = [to_jl(pt) for pt in ptc.pts] 187 | 188 | ############################## 189 | # Partition type combinators # 190 | ############################## 191 | 192 | const PTOrPTUnion = Union{PartitionType,Vector{PartitionType}} 193 | 194 | Base.:&(a::PartitionType, b::PartitionType) = 195 | if all( 196 | a.parameters[param_name] == b.parameters[param_name] for 197 | param_name in keys(a.parameters) if param_name in keys(b.parameters) 198 | ) 199 | PartitionType( 200 | merge(a.parameters, b.parameters), 201 | PartitioningConstraints( 202 | [a.constraints.constraints; b.constraints.constraints] 203 | ) 204 | ) 205 | else 206 | nothing 207 | end 208 | 209 | Base.:&(a::Vector{PartitionType}, b::PartitionType) = 210 | filter(pt->!isnothing(pt), [pt & b for pt in a]) 211 | Base.:&(a::PartitionType, b::Vector{PartitionType}) = b & a 212 | Base.:&(a::Vector{PartitionType}, b::Vector{PartitionType}) = 213 | filter(pt->!isnothing(pt), [aa & bb for aa in a for bb in b]) 214 | Base.:|(a::PTOrPTUnion, b::PTOrPTUnion) = [a; b] 215 | 216 | ######################### 217 | # Partition annotations # 218 | ######################### 219 | 220 | # TODO: Rename Partitions to PartitionTypeBinding and keep Partitioning as is 221 | mutable struct Partitions 222 | # TODO: Only use either PT stack or PT composition to be consistent in 223 | # terminology 224 | pt_stacks::Dict{ValueId,PartitionTypeComposition} 225 | end 226 | 227 | Partitions() = Partitions(Dict()) 228 | 229 | function to_jl(p::Partitions) 230 | # NOTE: This assumes that the PT compositions in `p.pt_stacks` are _not_ 231 | # delayed 232 | return Dict( 233 | "pt_stacks" => 234 | Dict(v => ptc |> to_jl for (v, ptc) in p.pt_stacks), 235 | ) 236 | end 237 | 238 | mutable struct PartitionAnnotation 239 | partitions::Partitions 240 | constraints::PartitioningConstraints 241 | end 242 | 243 | PartitionAnnotation() = PartitionAnnotation(Partitions(), PartitioningConstraints()) 244 | 245 | function to_jl(pa::PartitionAnnotation) 246 | return Dict( 247 | "partitions" => to_jl(pa.partitions), 248 | "constraints" => to_jl(pa.constraints), 249 | ) 250 | end 251 | -------------------------------------------------------------------------------- /BanyanArrays/test/test_l1_l2.jl: -------------------------------------------------------------------------------- 1 | # TODO: Make FutureArray a subtype of AbstractArray 2 | struct FutureArray{T,N} 3 | data::Future 4 | size::Future 5 | end 6 | 7 | # Defining a method like the following is helpful if the Future in a data type 8 | # isn't just the first field of the type that is a Future. In such a case, 9 | # a custom future function may be defined but users of this data type must 10 | # be sure to first call future on their data type before calling 11 | # annotations functions like `mem`/`val`/`pt`/`pc`/`mut`/`loc`/`src`/`dst`. 12 | Banyan.future(ba::T) where {T<:FutureArray} = ba.data 13 | 14 | function partitioned_vector(ba::FutureArray{T,1}) where {T} 15 | res_data = future() 16 | res_size = future() 17 | target_size = ba.size 18 | 19 | mem(ba, res_data) 20 | 21 | pt(ba, Block()) 22 | pt(res_data, Block()) 23 | pt(res_size, Replicate()) 24 | pt(target_size, Replicate()) 25 | 26 | pc(Match(ba, res_data)) 27 | pc(Match(res_size, target_size)) 28 | mutated(res_data) 29 | mutated(res_size) 30 | 31 | return ba, res_data, res_size, target_size 32 | end 33 | 34 | function partitioned_vector_by_vector( 35 | ba::FutureArray{T,1}, 36 | other::FutureArray{T,1}, 37 | ) where {T} 38 | res_data = future() 39 | res_size = future() 40 | target_size = ba.size 41 | mem(ba, other, res_data) 42 | 43 | pt(ba, BlockBalanced()) 44 | pt(other, BlockBalanced()) 45 | pt(res_data, Block()) 46 | pt(res_size, Replicate()) 47 | pt(target_size, Replicate()) 48 | 49 | pc(Match(ba, other, res_data)) 50 | pc(Match(res_size, target_size)) 51 | mutated(res_data) 52 | mutated(res_size) 53 | 54 | return ba, other, res_data, res_size, target_size 55 | end 56 | 57 | function partitioned_vector_by_scalar(ba::FutureArray{T,1}, other::T) where {T} 58 | other = future(other) 59 | res_data = future() 60 | res_size = future() 61 | target_size = ba.size 62 | # TODO: See whether ba.size is keeping arrays around for longer 63 | 64 | mem(ba, res_data) 65 | val(other) 66 | 67 | pt(ba, Block()) 68 | pt(res_data, Block()) 69 | pt(other, Replicate()) 70 | pt(res_size, Replicate()) 71 | pt(target_size, Replicate()) 72 | 73 | pc(Match(ba, res_data)) 74 | pc(Match(res_size, target_size)) 75 | mutated(res_data) 76 | mutated(res_size) 77 | 78 | return ba, other, res_data, res_size, target_size 79 | end 80 | 81 | function partitioned_replicated_value(x) 82 | x = future(x) 83 | val(x) 84 | pt(x, Replicate()) 85 | return x 86 | end 87 | 88 | function ones(::Type{T}, len::Integer)::FutureArray{T,1} where {T<:Number} 89 | data = future() 90 | data_size = future((len)) 91 | created_size = future(len) # TODO: Support n-dimensional arrays with Match 92 | ty = future(T) 93 | 94 | mem(data, len, Float64) 95 | val(data_size) 96 | val(created_size) 97 | val(ty) 98 | 99 | pt(data, Block()) 100 | pt(created_size, Div()) 101 | pt(ty, Replicate()) 102 | mutated(data) 103 | 104 | @partitioned data created_size ty begin 105 | data = ones(ty, created_size) 106 | x = DataFrame() 107 | # error("Hello!") 108 | end 109 | 110 | FutureArray{T,1}(data, data_size) 111 | end 112 | 113 | # Binary vector-vector operations 114 | for op in (:+, :-) 115 | @eval begin 116 | function Base.$op( 117 | ba::FutureArray{T,1}, 118 | other::FutureArray{T,1}, 119 | )::FutureArray{T,1} where {T} 120 | ba, other, res_data, res_size, target_size = 121 | partitioned_vector_by_vector(ba, other) 122 | op = partitioned_replicated_value($op) 123 | @partitioned op ba other res_data res_size target_size begin 124 | res_data = op(ba, other) 125 | res_size = target_size 126 | end 127 | 128 | FutureArray{T,1}(res_data, res_size) 129 | end 130 | end 131 | end 132 | 133 | # Binary vector-scalar operations 134 | 135 | for op in (:*, :/) 136 | @eval begin 137 | function Base.$op( 138 | ba::FutureArray{T,1}, 139 | other::T, 140 | )::FutureArray{T,1} where {T} 141 | ba, other, res_data, res_size, target_size = 142 | partitioned_vector_by_scalar(ba, other) 143 | op = partitioned_replicated_value($op) 144 | @partitioned op ba other res_data res_size target_size begin 145 | res_data = op(ba, other) 146 | res_size = target_size 147 | end 148 | 149 | FutureArray{T,1}(res_data, res_size) 150 | end 151 | end 152 | end 153 | 154 | for op in (:*,) 155 | @eval begin 156 | function Base.$op( 157 | other::T, 158 | ba::FutureArray{T,1}, 159 | )::FutureArray{T,1} where {T} 160 | ba, other, res_data, res_size, target_size = 161 | partitioned_vector_by_scalar(ba, other) 162 | op = partitioned_replicated_value($op) 163 | @partitioned op ba other res_data res_size target_size begin 164 | res_data = op(other, ba) 165 | res_size = target_size 166 | end 167 | 168 | FutureArray{T,1}(res_data, res_size) 169 | end 170 | end 171 | end 172 | 173 | # Broadcasting vector-vector operations 174 | function Base.broadcasted( 175 | op, 176 | ba::FutureArray{T,1}, 177 | other::FutureArray{T,1}, 178 | )::FutureArray{T,1} where {T} 179 | ba, other, res_data, res_size, target_size = 180 | partitioned_vector_by_vector(ba, other) 181 | op = partitioned_replicated_value(op) 182 | @partitioned op ba other res_data res_size target_size begin 183 | res_data = Base.broadcast(op, ba, other) 184 | res_size = target_size 185 | end 186 | 187 | FutureArray{T,1}(res_data, res_size) 188 | end 189 | 190 | # Broadcasting vector-scalar operations 191 | 192 | function broadcasted( 193 | op, 194 | ba::FutureArray{T,1}, 195 | other::T, 196 | )::FutureArray{T,1} where {T} 197 | ba, other, res_data, res_size, target_size = 198 | partitioned_vector_by_scalar(ba, other) 199 | op = partitioned_replicated_value(op) 200 | @partitioned op ba other res_data res_size target_size begin 201 | res_data = Base.broadcast(op, ba, other) 202 | res_size = target_size 203 | end 204 | 205 | FutureArray{T,1}(res_data, res_size) 206 | end 207 | 208 | broadcasted(op, ba::FutureArray{T,1}, ::Val{V}) where {T,V} = 209 | broadcasted(op, ba, T(V)) 210 | 211 | Base.broadcasted(op, ba::FutureArray, other) where {T} = 212 | broadcasted(op, ba, other) 213 | 214 | # specialized_wrapper might be Base.literal_pow 215 | Base.broadcasted(specialized_wrapper, op, ba::FutureArray, other) = 216 | broadcasted(op, ba, other) 217 | 218 | function Base.broadcasted( 219 | op, 220 | other::T, 221 | ba::FutureArray{T,1}, 222 | )::FutureArray{T,1} where {T} 223 | ba, other, res_data, res_size, target_size = 224 | partitioned_vector_by_scalar(ba, other) 225 | op = partitioned_replicated_value(op) 226 | @partitioned op ba other res_data res_size target_size begin 227 | res_data = Base.broadcast(op, other, ba) 228 | res_size = target_size 229 | end 230 | 231 | FutureArray{T,1}(res_data, res_size) 232 | end 233 | 234 | function Base.broadcasted(op, ba::FutureArray{T,1})::FutureArray{T,1} where {T} 235 | ba, res_data, res_size, target_size = partitioned_vector(ba) 236 | op = partitioned_replicated_value(op) 237 | @partitioned op ba res_data res_size target_size begin 238 | res_data = Base.broadcast(op, ba) 239 | res_size = target_size 240 | end 241 | 242 | FutureArray{T,1}(res_data, res_size) 243 | end 244 | 245 | # Unary operators 246 | for op in (:-, :+) 247 | @eval begin 248 | function Base.$op(ba::FutureArray{T,1})::FutureArray{T,1} where {T} 249 | ba, res_data, res_size, target_size = partitioned_vector(ba) 250 | op = partitioned_replicated_value($op) 251 | @partitioned op ba res_data res_size target_size begin 252 | res_data = op(ba) 253 | res_size = target_size 254 | end 255 | 256 | FutureArray{T,1}(res_data, res_size) 257 | end 258 | end 259 | end 260 | 261 | function run_bs(size::Integer) 262 | price = ones(Float64, size) * 4.0 263 | strike = ones(Float64, size) * 4.0 264 | t = ones(Float64, size) * 4.0 265 | rate = ones(Float64, size) * 4.0 266 | vol = ones(Float64, size) * 4.0 267 | 268 | evaluate(price) 269 | 270 | # c05 = Float64(3.0) 271 | # c10 = Float64(1.5) 272 | # invsqrt2 = 1.0 / sqrt(2.0) 273 | 274 | # # rsig = rate + (vol.^2) * c05 275 | # rsig = vol .^ 2 # TODO: Fix issue in FutureArray where operands are same 276 | # rsig = rsig .* c05 277 | # rsig = rsig .+ rate 278 | 279 | # # rsig 280 | 281 | # # vol_sqrt = vol .* sqrt.(t) 282 | # vol_sqrt = sqrt.(t) 283 | # vol_sqrt = vol_sqrt .* vol 284 | 285 | # # d1 = (log.(price ./ strike) + rsig .* t) ./ vol_sqrt 286 | # d1 = price ./ strike 287 | # d1 = log.(d1) 288 | # tmp = rsig .* t 289 | # d1 = d1 .+ tmp 290 | # d1 = d1 ./ vol_sqrt 291 | 292 | # # d1 293 | 294 | # d2 = d1 .- vol_sqrt 295 | 296 | # # d2 297 | 298 | # # d1 = c05 .+ c05 .* exp.(d1 .* invsqrt2) 299 | # # d2 = c05 .+ c05 .* exp.(d2 .* invsqrt2) 300 | # d1 = d1 * invsqrt2 301 | # d1 = exp.(d1) 302 | # d1 = d1 .* c05 303 | # d1 = d1 .+ c05 304 | # d2 = d2 .* invsqrt2 305 | # d2 = exp.(d2) 306 | # d2 = d2 .* c05 307 | # d2 = d2 .+ c05 308 | 309 | # # e_rt = exp.((-rate) .* t) 310 | # e_rt = -rate 311 | # e_rt = e_rt .* t 312 | # e_rt = exp.(e_rt) 313 | 314 | # # call = price .* d1 - e_rt .* strike .* d2 315 | # call = price .* d1 316 | # tmp = e_rt .* strike 317 | # tmp = tmp .* d2 318 | # call = call .- tmp 319 | 320 | # put = e_rt .* strike .* (c10 .- d2) - price .* (c10 .- d1) 321 | 322 | # evaluate(call) 323 | end 324 | 325 | @testset "Black Scholes" begin 326 | run_with_job("Black Scholes", j -> begin 327 | size = Integer(1e9) 328 | run_bs(size) 329 | # evaluate(put) 330 | end) 331 | end 332 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Banyan/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /BanyanArrays/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /BanyanDataFrames/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Banyan/src/pt_lib_constructors.jl: -------------------------------------------------------------------------------- 1 | # NOTE: Do not construct a PT such that PTs can be fused together or used as-is 2 | # in a way such that there aren't functions for splitting and merging them in 3 | # pt_lib.jl. Note that each splitting and merging function in pt_lib.jl is for 4 | # specific locations and so, for example, a Div should not be used on a value 5 | # with CSV location unless there is a splitting function for that. 6 | 7 | # Block() = PartitionType(Dict("name" => "Block")) 8 | # Block(dim) = PartitionType(Dict("name" => "Block", "dim" => dim)) 9 | # BlockBalanced() = PartitionType(Dict("name" => "Block", "balanced" => true)) 10 | # BlockBalanced(dim) = 11 | # PartitionType(Dict("name" => "Block", "dim" => dim, "balanced" => true)) 12 | # BlockUnbalanced() = PartitionType(Dict("name" => "Block", "balanced" => false)) 13 | # BlockUnbalanced(dim) = 14 | # PartitionType(Dict("name" => "Block", "dim" => dim, "balanced" => false)) 15 | 16 | # Div() = PartitionType(Dict("name" => "Replicate", "dividing" => true)) 17 | # Replicated() = PartitionType(Dict("name" => "Replicate", "replicated" => true)) 18 | # Reducing(op) = PartitionType(Dict("name" => "Replicate", "replicated" => false, "reducer" => to_jl_value(op))) 19 | 20 | # TODO: Generate AtMost and ScaledBy constraints in handling filters and joins 21 | # that introduce data skew and in other operations that explicitly don't 22 | 23 | Replicating() = PartitionType("name" => "Replicating", f->ScaleBy(f, 1.0)) 24 | Replicated() = Replicating() & PartitionType("replication" => "all", "reducer" => nothing) 25 | # TODO: Add Replicating(f) to the below if needed for reducing operations on 26 | # large objects such as unique(df::DataFrame) 27 | 28 | # TODO: Determine whether the `"reducer" => nothing` should be there 29 | Divided() = Replicating() & PartitionType("divided" => true) 30 | Syncing() = Replicating() & PartitionType("replication" => "one", "reducer" => nothing) # TODO: Determine whether this is really needed 31 | Reducing(op) = Replicating() & PartitionType("replication" => nothing, "reducer" => to_jl_value(op), "with_key" => false) 32 | ReducingWithKey(op) = Replicating() & PartitionType("replication" => nothing, "reducer" => to_jl_value(op), "with_key" => true) 33 | # TODO: Maybe replace banyan_reduce_size_by_key with an anonymous function since that actually _can_ be ser/de-ed 34 | # or instead make there be a reducing type that passes in the key to the reducing functions so it can reduce by that key 35 | # ReducingSize() = PartitionType("replication" => "one", "reducer" => "banyan_reduce_size_by_key") 36 | 37 | Distributing() = PartitionType("name" => "Distributing") 38 | Blocked(; along = nothing) = 39 | if isnothing(along) 40 | PartitionType("name" => "Distributing", "distribution" => "blocked") 41 | else 42 | PartitionType( 43 | "name" => "Distributing", 44 | "distribution" => "blocked", 45 | "key" => along, 46 | ) 47 | end 48 | Grouped() = PartitionType("name" => "Distributing", "distribution" => "grouped") 49 | # Blocked(;balanced) = PartitionType("name" => "Distributing", "distribution" => "blocked", "balanced" => balanced) 50 | # Grouped(;balanced) = PartitionType("name" => "Distributing", "distribution" => "grouped", "balanced" => balanced) 51 | 52 | ScaledBySame(;as) = PartitionType(f -> ScaleBy(f, 1.0, as)) 53 | Drifted() = Distributing() & PartitionType("id" => "!") 54 | Balanced() = 55 | Distributing() & PartitionType("balanced" => true, f -> ScaleBy(f, 1.0)) 56 | Unbalanced(; scaled_by_same_as = nothing) = 57 | if isnothing(scaled_by_same_as) 58 | Distributing() & PartitionType("balanced" => false) 59 | else 60 | Unbalanced() & ScaledBySame(as = scaled_by_same_as) 61 | end 62 | 63 | # These functions (along with `keep_sample_rate`) allow for managing memory 64 | # usage in annotated code. `keep_sample_rate` allows for setting the sample 65 | # rate as it changes from value to value. Some operations such as joins 66 | # actually require a change in sample rate so propagating this information is 67 | # important and must be done before partition annotations are applied (in 68 | # `partitioned_using`). In the partition annotation itself, we sometimes want 69 | # to set constraints on how we scale the memory usage based on how much skew 70 | # is introduced by an operation. Some operations not only change the sample 71 | # rate but also introduce skew and so applying these constraints is important. 72 | # FilteredTo and FilteredFrom help with constraining skew when it is introduced 73 | # through data filtering operations while MutatedTo and MutatedFrom allow for 74 | # propagatng skew for operations where the skew is unchanged. Balanced data 75 | # doesn't have any skew and Balanced and balanced=true help to make this clear. 76 | # TODO: Remove this if we don't need 77 | # MutatedRelativeTo(f, mutated_relative_to) = PartitionType(ScaleBy(1.0, f, mutated_relative_to)) 78 | # MutatedTo(f, mutated_to) = MutatedRelativeTo(f, mutated_to) 79 | # MutatedFrom(f, mutated_from) = MutatedRelativeTo(f, mutated_from) 80 | 81 | Distributed(args...; kwargs...) = Blocked(args...; kwargs...) | Grouped(args...; kwargs...) 82 | Partitioned(args...; kwargs...) = Distributed(args...; kwargs...) | Replicated() 83 | 84 | function Blocked( 85 | f::AbstractFuture; 86 | along = :, 87 | balanced = nothing, 88 | filtered_from = nothing, 89 | filtered_to = nothing, 90 | scaled_by_same_as = nothing, 91 | ) 92 | parameters = Dict() 93 | constraints = PartitioningConstraints() 94 | 95 | # Prepare `along` 96 | if along isa Colon 97 | along = sample(f, :axes) 98 | # TODO: Ensure that axes returns [1] for DataFrame and axes for Array 99 | # while keys returns keys for DataFrame and axes for Array 100 | end 101 | along = to_vector(along) 102 | # TODO: Maybe assert that along isa Vector{String} or Vector{Symbol} 103 | 104 | # Create PTs for each axis that can be used to block along 105 | pts::Vector{PartitionType} = [] 106 | for axis in first(along, 4) 107 | # Handle combinations of `balanced` and `filtered_from`/`filtered_to` 108 | for b in (isnothing(balanced) ? [true, false] : [balanced]) 109 | # Initialize parameters 110 | parameters = Dict("key" => axis, "balanced" => b) 111 | constraints = PartitioningConstraints() 112 | 113 | # Create `ScaleBy` constraints 114 | if b 115 | push!(constraints.constraints, ScaleBy(f, 1.0)) 116 | # TODO: Add an AtMost constraint in the case that input elements are very large 117 | else 118 | if !isnothing(filtered_from) 119 | filtered_from = to_vector(filtered_from) 120 | factor, from = maximum(filtered_from) do ff 121 | (sample(ff, :memory_usage) / sample(f, :memory_usage), filtered_from) 122 | end 123 | push!(constraints.constraints, ScaleBy(f, factor, from)) 124 | elseif !isnothing(filtered_to) 125 | filtered_to = to_vector(filtered_to) 126 | factor, to = maximum(filtered_to) do ft 127 | (sample(ft, :memory_usage) / sample(f, :memory_usage), filtered_to) 128 | end 129 | push!(constraints.constraints, ScaleBy(f, factor, to)) 130 | elseif !isnothing(scaled_by_same_as) 131 | push!(constraints.constraints, ScaleBy(f, 1.0, scaled_by_same_as)) 132 | end 133 | end 134 | 135 | # Append new PT to PT union being produced 136 | push!(pts, PartitionType(parameters, constraints)) 137 | end 138 | end 139 | 140 | # Return the resulting PT union that can then be passed into a call to `pt` 141 | # which would in turn result in a PA union 142 | Blocked() & pts 143 | end 144 | 145 | # NOTE: A reason to use Grouped for element-wise computation (with no 146 | # filtering) is to allow for the input to be re-balanced. If you just use 147 | # Any then there wouldn't be any way to re-balance right before the 148 | # computation. Grouped allows the input to have either balanced=true or 149 | # balanced=false and if balanced=true is chosen then a cast may be applied. 150 | 151 | function Grouped( 152 | f::AbstractFuture; 153 | # Parameters for splitting into groups 154 | by = nothing, 155 | balanced = nothing, 156 | rev = nothing, 157 | # Options to deal with skew 158 | filtered_from = nothing, 159 | filtered_to = nothing, 160 | scaled_by_same_as = nothing, 161 | ) 162 | # Prepare `by` 163 | by = if isnothing(by) 164 | sample(f, :groupingkeys) 165 | elseif by isa Colon 166 | sample(f, :keys) 167 | else 168 | by 169 | end 170 | by = Symbol.(by) 171 | by = to_vector(by) 172 | 173 | # Create PTs for each key that can be used to group by 174 | pts::Vector{PartitionType} = [] 175 | for key in first(by, 8) 176 | # Handle combinations of `balanced` and `filtered_from`/`filtered_to` 177 | for b in (isnothing(balanced) ? [true, false] : [balanced]) 178 | parameters = Dict("key" => key, "balanced" => b) 179 | constraints = PartitioningConstraints() 180 | 181 | # Create `ScaleBy` constraint and also compute `divisions` and 182 | # `AtMost` constraint if balanced 183 | if b 184 | # Set divisions 185 | # TODO: Change this if `divisions` is not a `Vector{Tuple{Any,Any}}` 186 | parameters["divisions"] = to_jl_value(sample(f, :statistics, key, :divisions)) 187 | max_ngroups = sample(f, :statistics, key, :max_ngroups) 188 | 189 | # Set flag for reversing the order of the groups 190 | if !isnothing(rev) 191 | parameters["rev"] = rev 192 | end 193 | 194 | # Add constraints 195 | push!(constraints.constraints, AtMost(max_ngroups, f)) 196 | push!(constraints.constraints, ScaleBy(f, 1.0)) 197 | 198 | # TODO: Make AtMost only accept a value (we can support PT references in the future if needed) 199 | # TODO: Make scheduler check that the values in AtMost or ScaledBy are actually present to ensure 200 | # that the constraint can be satisfied for this PT to be used 201 | else 202 | # TODO: Support joins 203 | if !isnothing(filtered_from) 204 | filtered_from = to_vector(filtered_from) 205 | factor, from = maximum(filtered_from) do ff 206 | min_filtered_from = sample(f, :statistics, key, :min) 207 | max_filtered_from = sample(f, :statistics, key, :max) 208 | # divisions_filtered_from = sample(ff, :statistics, key, :divisions) 209 | f_percentile = sample(f, :statistics, key, :percentile, min_filtered_from, max_filtered_from) 210 | (f_percentile, filtered_from) 211 | end 212 | push!(constraints.constraints, ScaleBy(f, factor, from)) 213 | elseif !isnothing(filtered_to) 214 | filtered_to = to_vector(filtered_to) 215 | factor, to = maximum(filtered_to) do ft 216 | min_filtered_to = sample(ft, :statistics, key, :min) 217 | max_filtered_to = sample(ft, :statistics, key, :max) 218 | # f_divisions = sample(f, :statistics, key, :divisions) 219 | f_percentile = sample(f, :statistics, key, :percentile, min_filtered_to, max_filtered_to) 220 | (1 / f_percentile, filtered_to) 221 | end 222 | push!(constraints.constraints, ScaleBy(f, factor, to)) 223 | elseif !isnothing(scaled_by_same_as) 224 | push!(constraints.constraints, ScaleBy(f, 1.0, scaled_by_same_as)) 225 | end 226 | end 227 | 228 | push!(pts, PartitionType(parameters, constraints)) 229 | end 230 | end 231 | Grouped() & pts 232 | end 233 | -------------------------------------------------------------------------------- /Banyan/src/requests.jl: -------------------------------------------------------------------------------- 1 | ################# 2 | # Magic Methods # 3 | ################# 4 | 5 | # TODO: Implement magic methods 6 | 7 | # Assume that this is mutating 8 | # function Base.getproperty(fut::Future, sym::Symbol) 9 | # end 10 | 11 | # Mutating 12 | # TODO: put this back in some way 13 | # function Base.setproperty!(fut::Future, sym::Symbol, new_value) 14 | # end 15 | 16 | ############################# 17 | # Basic methods for futures # 18 | ############################# 19 | 20 | function compute(fut::AbstractFuture) 21 | # TODO: Refactor `current_job_status` out into the `Job`s stored in 22 | # `global jobs` 23 | global current_job_status 24 | 25 | fut = convert(Future, fut) 26 | job_id = get_job_id() 27 | job = get_job() 28 | 29 | if fut.mutated 30 | # Get all tasks to be recorded in this call to `compute` 31 | tasks = [req.task for req in job.pending_requests if req isa RecordTaskRequest] 32 | 33 | # Call `partitioned_using_func`s in 2 passes - forwards and backwards. 34 | # This allows sample properties to propagate in both directions. We 35 | # must also make sure to apply mutations in each task appropriately. 36 | for t in tasks 37 | @show t.mutation 38 | @show t.effects 39 | end 40 | for t in Iterators.reverse(tasks) 41 | apply_mutation(invert(t.mutation)) 42 | end 43 | for t in tasks 44 | if !isnothing(t.partitioned_using_func) 45 | t.partitioned_using_func() 46 | end 47 | apply_mutation(t.mutation) 48 | end 49 | for t in Iterators.reverse(tasks) 50 | apply_mutation(invert(t.mutation)) 51 | if !isnothing(t.partitioned_using_func) 52 | t.partitioned_using_func() 53 | end 54 | end 55 | 56 | # Do further processing on tasks now that all samples have been 57 | # computed and sample properties have been set up to share references 58 | # as needed to prevent expensive redundant computation of sample 59 | # properties like divisions 60 | for (i, t) in enumerate(tasks) 61 | apply_mutation(t.mutation) 62 | 63 | # Call `partitioned_with_func` to create additional PAs for each task 64 | set_task(t) 65 | if !isnothing(t.partitioned_with_func) 66 | t.partitioned_with_func() 67 | end 68 | 69 | # Cascade PAs backwards. In other words, if as we go from first to 70 | # last PA we come across one that's annotating a value not 71 | # annotated in a previous PA, we copy over the annotation (the 72 | # assigned PT stack) to the previous PA. 73 | for (j, pa) in enumerate(t.pa_union) 74 | for previous_pa in Iterators.reverse(t.pa_union[1:j-1]) 75 | for value_id in keys(pa.partitions.pt_stacks) 76 | if !(value_id in keys(previous_pa.partitions.pt_stacks)) 77 | # Cascade the PT composition backwards 78 | previous_pa.partitions.pt_stacks[value_id] = 79 | deepcopy(pa.partitions.pt_stacks[value_id]) 80 | 81 | # Cascade backwards all constraints that mention the 82 | # value. NOTE: If this is not desired, users should 83 | # be explicit and assign different PT compositions for 84 | # different values. 85 | for constraint in pa.constraints.constraints 86 | # Determine whether we should copy over this constraint 87 | copy_constraint = false 88 | if constraint isa PartitioningConstraintOverGroup 89 | for arg in constraint.args 90 | if arg isa PartitionTypeReference && first(arg) == value_id 91 | copy_constraint = true 92 | end 93 | end 94 | elseif constraint isa PartitioningConstraintOverGroups 95 | for arg in constraint.args 96 | for subarg in arg 97 | if subarg isa PartitionTypeReference && first(subarg) == value_id 98 | copy_constraint = true 99 | end 100 | end 101 | end 102 | end 103 | 104 | # Copy over constraint 105 | if copy_constraint 106 | push!(previous_pa.constraints.constraints, deepcopy(constraint)) 107 | end 108 | end 109 | end 110 | end 111 | end 112 | end 113 | end 114 | 115 | # Switch back to a new task for next code region 116 | finish_task() 117 | 118 | # for t in tasks 119 | # # Apply defaults to PAs 120 | # for pa in t.pa_union 121 | # @show pa 122 | # end 123 | # end 124 | 125 | # Iterate through tasks for further processing before recording them 126 | for t in tasks 127 | @show t.code 128 | @show t.value_names 129 | @show t.mutation 130 | @show t.effects 131 | # Apply defaults to PAs 132 | for pa in t.pa_union 133 | apply_default_constraints!(pa) 134 | duplicate_for_batching!(pa) 135 | @show pa 136 | end 137 | 138 | # Destroy all closures so that all references to `Future`s are dropped 139 | t.partitioned_using_func = nothing 140 | t.partitioned_with_func = nothing 141 | 142 | # Handle 143 | empty!(t.mutation) # Drop references to `Future`s here as well 144 | end 145 | 146 | # Finalize (destroy) all `Future`s that can be destroyed 147 | GC.gc() 148 | 149 | # Destroy everything that is to be destroyed in this task 150 | for req in job.pending_requests 151 | # Don't destroy stuff where a `DestroyRequest` was produced just 152 | # because of a `mutated(old, new)` 153 | if req isa DestroyRequest && !any(req.value_id in values(t.mutation) for t in tasks) 154 | # If this value was to be downloaded to or uploaded from the 155 | # client side, delete the reference to its data 156 | if req.value_id in keys(job.futures_on_client) 157 | delete!(job.futures_on_client, req.value_id) 158 | end 159 | 160 | # Remove information about the value's location including the 161 | # sample taken from it 162 | delete!(job.locations, req.value_id) 163 | end 164 | end 165 | 166 | # Send evaluation request 167 | # Send evaluate request 168 | try 169 | response = send_evaluation(fut.value_id, job_id) 170 | catch 171 | current_job_status = "failed" 172 | rethrow() 173 | end 174 | 175 | # Get queues for moving data between client and cluster 176 | scatter_queue = get_scatter_queue(job_id) 177 | gather_queue = get_gather_queue(job_id) 178 | 179 | # Read instructions from gather queue 180 | # println("job id: ", job_id) 181 | # print("LISTENING ON: ", gather_queue) 182 | @debug "Waiting on running job $job_id" 183 | while true 184 | # TODO: Use to_jl_value and from_jl_value to support Client 185 | message = receive_next_message(gather_queue) 186 | @debug message 187 | message_type = message["kind"] 188 | message_end = message["end"] 189 | if message_type == "SCATTER_REQUEST" 190 | @debug "Received scatter request" 191 | # Send scatter 192 | value_id = message["value_id"] 193 | f = job.futures_on_client[value_id] 194 | send_message( 195 | scatter_queue, 196 | JSON.json( 197 | Dict{String,Any}( 198 | "value_id" => value_id, 199 | "contents" => to_jl_value_contents(f.value) 200 | ), 201 | ), 202 | ) 203 | sourced(f, None()) 204 | # TODO: Update stale/mutated here to avoid costly 205 | # call to `send_evaluation` 206 | elseif message_type == "GATHER" 207 | @debug "Received gather request" 208 | # Receive gather 209 | value_id = message["value_id"] 210 | if value_id in keys(job.futures_on_client) 211 | value = from_jl_value_contents(message["contents"]) 212 | f::Future = job.futures_on_client[value_id] 213 | f.value = value 214 | # TODO: Update stale/mutated here to avoid costly 215 | # call to `send_evaluation` 216 | @debug value 217 | end 218 | elseif message_type == "EVALUATION_END" 219 | @debug "Received evaluation" 220 | if message["end"] == true 221 | break 222 | end 223 | end 224 | end 225 | 226 | # Update `mutated` and `stale` for the future that is being evaluated 227 | fut.mutated = false 228 | # TODO: See if there are more cases where you a `compute` call on a future 229 | # makes it no longer stale 230 | if get_dst_name(fut) == "Client" 231 | fut.stale = false 232 | end 233 | end 234 | 235 | fut 236 | end 237 | 238 | function send_evaluation(value_id::ValueId, job_id::JobId) 239 | @debug "Sending evaluation request" 240 | 241 | # Submit evaluation request 242 | response = send_request_get_response( 243 | :evaluate, 244 | Dict{String,Any}( 245 | "value_id" => value_id, 246 | "job_id" => job_id, 247 | "requests" => [to_jl(req) for req in get_job().pending_requests] 248 | ), 249 | ) 250 | 251 | # Clear global state and return response 252 | empty!(get_job().pending_requests) 253 | response 254 | end 255 | 256 | function Base.collect(fut::AbstractFuture) 257 | fut = convert(Future, fut) 258 | 259 | # Fast case for where the future has not been mutated and isn't stale 260 | if !fut.mutated && !fut.stale 261 | return fut.value 262 | end 263 | 264 | # This function collects the given future on the client side 265 | 266 | # Set the future's destination location to Client 267 | destined(fut, Client()) 268 | mutated(fut) 269 | 270 | pt(fut, Replicated()) 271 | @partitioned fut begin 272 | # This code region is empty but it ensures that something is run 273 | # and so the data is partitioned and then re-merged back up to its new 274 | # destination location, the client 275 | end 276 | 277 | # Evaluate the future so that its value is downloaded to the client 278 | compute(fut) 279 | destined(fut, None()) 280 | fut.value 281 | end 282 | 283 | ############################################################### 284 | # Other requests to be sent with request to evaluate a Future # 285 | ############################################################### 286 | 287 | struct RecordTaskRequest 288 | task::DelayedTask 289 | end 290 | 291 | struct RecordLocationRequest 292 | value_id::ValueId 293 | location::Location 294 | end 295 | 296 | struct DestroyRequest 297 | value_id::ValueId 298 | end 299 | 300 | const Request = Union{RecordTaskRequest,RecordLocationRequest,DestroyRequest} 301 | 302 | to_jl(req::RecordTaskRequest) = Dict("type" => "RECORD_TASK", "task" => to_jl(req.task)) 303 | 304 | to_jl(req::RecordLocationRequest) = 305 | Dict( 306 | "type" => "RECORD_LOCATION", 307 | "value_id" => req.value_id, 308 | "location" => to_jl(req.location), 309 | ) 310 | 311 | to_jl(req::DestroyRequest) = Dict("type" => "DESTROY", "value_id" => req.value_id) 312 | 313 | function record_request(request::Request) 314 | push!(get_job().pending_requests, request) 315 | end 316 | -------------------------------------------------------------------------------- /Banyan/src/utils.jl: -------------------------------------------------------------------------------- 1 | using Base: AbstractVecOrTuple 2 | 3 | ############## 4 | # CONVERSION # 5 | ############## 6 | 7 | # NOTE: `jl` referes to a subset of Julia that can be serialized to or 8 | # deserialized from JSON with ease 9 | 10 | jl_to_json(j) = JSON.json(j) 11 | 12 | json_to_jl(j) = JSON.parse(j) 13 | 14 | key_to_jl(key) = reinterpret(UInt8, hash(string(key))) |> String 15 | axis_to_jl(axis) = reinterpret(UInt8, hash(string(key))) |> String 16 | 17 | total_memory_usage(val) = 18 | begin 19 | size = Base.summarysize(val) 20 | # TODO: Maybe make this larger 21 | if size ≤ 128 22 | 0 23 | else 24 | size 25 | end 26 | end 27 | 28 | to_vector(v::Vector) = v 29 | to_vector(v) = [v] 30 | 31 | # NOTE: This function is shared between the client library and the PT library 32 | function indexapply(op, objs...; index::Integer=1) 33 | lists = [obj for obj in objs if obj isa AbstractVecOrTuple] 34 | length(lists) > 0 || throw(ArgumentError("Expected at least one tuple as input")) 35 | index = index isa Colon ? length(first(lists)) : index 36 | operands = [(obj isa AbstractVecOrTuple ? obj[index] : obj) for obj in objs] 37 | indexres = op(operands...) 38 | res = first(lists) 39 | if first(lists) isa Tuple 40 | res = [res...] 41 | res[index] = indexres 42 | Tuple(res) 43 | else 44 | res = copy(res) 45 | res[index] = indexres 46 | end 47 | end 48 | 49 | ################## 50 | # AUTHENTICATION # 51 | ################## 52 | 53 | # Process-local configuration for the account being used. It wouldn't be hard 54 | # to but there shouldn't be any reason to make this thread-local (since only 55 | # one account should be being used per workstation or per server where 56 | # Banyan.jl may be being used). However, wrapping this in a mutex to ensure 57 | # synchronized mutation in this module would be a good TODO. 58 | global banyan_config = nothing 59 | global aws_config_in_usage = nothing 60 | 61 | function load_config() 62 | global banyan_config 63 | 64 | banyanconfig_path = joinpath(homedir(), ".banyan", "banyanconfig.toml") 65 | if isfile(banyanconfig_path) 66 | banyan_config = TOML.parsefile(banyanconfig_path) 67 | end 68 | end 69 | 70 | function write_config() 71 | global banyan_config 72 | 73 | # Write to banyanconfig.toml 74 | banyanconfig_path = joinpath(homedir(), ".banyan", "banyanconfig.toml") 75 | mkpath(joinpath(homedir(), ".banyan")) 76 | f = open(banyanconfig_path, "w") 77 | TOML.print(f, banyan_config) 78 | close(f) 79 | end 80 | 81 | if_in_or(key, obj, el = nothing) = 82 | if key in keys(obj) 83 | obj[key] 84 | else 85 | el 86 | end 87 | 88 | function configure(; kwargs...) 89 | # This function allows for users to configure their authentication. 90 | # Authentication details are then saved in 91 | # `$HOME/.banyan/banyanconfig.toml` so they don't have to be entered in again 92 | # each time a program using the Banyan client library is run 93 | 94 | # Load arguments 95 | kwargs = Dict(kwargs) 96 | username = if_in_or(:username, kwargs) 97 | user_id = if_in_or(:user_id, kwargs) 98 | api_key = if_in_or(:api_key, kwargs) 99 | ec2_key_pair_name = if_in_or(:ec2_key_pair_name, kwargs) 100 | require_ec2_key_pair_name = 101 | if_in_or(:require_ec2_key_pair_name, kwargs, false) 102 | 103 | # Initialize 104 | global banyan_config 105 | is_modified = false 106 | is_valid = true 107 | 108 | # Ensure a configuration has been created or can be created. Otherwise, 109 | # return nothing 110 | if isnothing(banyan_config) 111 | if !isnothing(user_id) && !isnothing(api_key) 112 | banyan_config = Dict( 113 | "banyan" => 114 | Dict("username" => username, "user_id" => user_id, "api_key" => api_key), 115 | "aws" => Dict(), 116 | ) 117 | is_modified = true 118 | else 119 | error("User ID and API key not provided") 120 | end 121 | end 122 | 123 | # Check for changes in required 124 | if !isnothing(username) && 125 | (username != banyan_config["banyan"]["username"]) 126 | banyan_config["banyan"]["username"] = username 127 | is_modified = true 128 | end 129 | if !isnothing(user_id) && 130 | (user_id != banyan_config["banyan"]["user_id"]) 131 | banyan_config["banyan"]["user_id"] = user_id 132 | is_modified = true 133 | end 134 | if !isnothing(api_key) && (api_key != banyan_config["banyan"]["api_key"]) 135 | banyan_config["banyan"]["api_key"] = api_key 136 | is_modified = true 137 | end 138 | 139 | # Check for changes in potentially required 140 | 141 | # aws.ec2_key_pair_name 142 | if !isnothing(ec2_key_pair_name) && ( 143 | !(haskey(banyan_config["aws"], "ec2_key_pair_name")) || 144 | ec2_key_pair_name != banyan_config["aws"]["ec2_key_pair_name"] 145 | ) 146 | banyan_config["aws"]["ec2_key_pair_name"] = ec2_key_pair_name 147 | is_modified = true 148 | end 149 | if require_ec2_key_pair_name && 150 | !("ec2_key_pair_name" in banyan_config["aws"]) 151 | error("Name of an EC2 key pair required but not provided; visit here to create a key pair: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html#having-ec2-create-your-key-pair") 152 | end 153 | 154 | # # aws.region 155 | # if !isnothing(region) && ( 156 | # !(haskey(banyan_config["aws"], "region")) || 157 | # region != banyan_config["aws"]["region"] 158 | # ) 159 | # banyan_config["aws"]["region"] = region 160 | # is_modified = true 161 | # end 162 | 163 | # Update config file if it was modified 164 | if is_modified 165 | write_config() #update_config() 166 | end 167 | 168 | return banyan_config 169 | end 170 | 171 | function get_aws_config() 172 | global aws_config_in_usage 173 | 174 | # Get AWS configuration 175 | if isnothing(aws_config_in_usage) 176 | # Get region according to ENV, then credentials, then config files 177 | profile = get(ENV, "AWS_PROFILE", get(ENV, "AWS_DEFAULT_PROFILE", "banyan_nothing")) 178 | env_region = get(ENV, "AWS_DEFAULT_REGION", "") 179 | credentialsfile = read(Inifile(), joinpath(homedir(), ".aws", "credentials")) 180 | configfile = read(Inifile(), joinpath(homedir(), ".aws", "config")) 181 | credentials_region = _get_ini_value(credentialsfile, profile, "region", default_value="") 182 | config_region = _get_ini_value(configfile, profile, "region", default_value="") 183 | 184 | # Choose the region that is not default 185 | region = env_region 186 | region = isempty(region) ? credentials_region : region 187 | region = isempty(region) ? config_region : region 188 | 189 | println(region) 190 | 191 | if isempty(region) 192 | throw(ErrorException("Could not discover AWS region to use from looking at AWS_PROFILE, AWS_DEFAULT_PROFILE, AWS_DEFAULT_REGION, HOME/.aws/credentials, and HOME/.aws/config")) 193 | end 194 | 195 | aws_config_in_usage = Dict( 196 | :creds => AWSCredentials(), 197 | :region => region 198 | ) 199 | end 200 | 201 | # # Use default location if needed 202 | # if !haskey(aws_config_in_usage, :region) 203 | # @warn "Using default AWS region of us-west-2 in \$HOME/.banyan/banyanconfig.toml" 204 | # aws_config_in_usage[:region] = "us-west-2" 205 | # end 206 | 207 | # Convert to dictionary and return 208 | 209 | aws_config_in_usage 210 | end 211 | 212 | get_aws_config_region() = get_aws_config()[:region] 213 | 214 | ######################### 215 | # ENVIRONMENT VARIABLES # 216 | ######################### 217 | 218 | is_debug_on() = "JULIA_DEBUG" in keys(ENV) && ENV["JULIA_DEBUG"] == "all" 219 | 220 | macro in_env(key) 221 | return :(string("BANYAN_", getpid(), "_", $key) in keys(ENV)) 222 | end 223 | 224 | macro env(key) 225 | return :(ENV[string("BANYAN_", getpid(), "_", $key)]) 226 | end 227 | 228 | macro delete_in_env(key) 229 | return :(delete!(ENV, string("BANYAN_", getpid(), "_", $key))) 230 | end 231 | 232 | ################ 233 | # API REQUESTS # 234 | ################ 235 | 236 | method_to_string(method) = begin 237 | if method == :create_cluster 238 | "create-cluster" 239 | elseif method == :destroy_cluster 240 | "destroy-cluster" 241 | elseif method == :describe_clusters 242 | "describe-clusters" 243 | elseif method == :create_job 244 | "create-job" 245 | elseif method == :destroy_job 246 | "destroy-job" 247 | elseif method == :describe_jobs 248 | "describe-jobs" 249 | elseif method == :evaluate 250 | "evaluate" 251 | elseif method == :update_cluster 252 | "update-cluster" 253 | elseif method == :set_cluster_ready 254 | "set-cluster-ready" 255 | end 256 | end 257 | 258 | """ 259 | Sends given request with given content 260 | """ 261 | function send_request_get_response(method, content::Dict) 262 | # Prepare request 263 | # content = convert(Dict{Any, Any}, content) 264 | configuration = load_config() 265 | user_id = configuration["banyan"]["user_id"] 266 | api_key = configuration["banyan"]["api_key"] 267 | # TODO: Allow content["debug"] 268 | # content["debug"] = is_debug_on() 269 | url = string(BANYAN_API_ENDPOINT, method_to_string(method)) 270 | headers = ( 271 | ("content-type", "application/json"), 272 | ("Username-APIKey", "$user_id-$api_key"), 273 | ) 274 | 275 | # Post and return response 276 | try 277 | # println(headers) 278 | # println(content) 279 | response = HTTP.post(url, headers, JSON.json(content)) 280 | # println(response) 281 | body = String(response.body) 282 | return JSON.parse(body) 283 | #return JSON.parse(JSON.parse(body)["body"]) 284 | catch e 285 | if e isa HTTP.ExceptionRequest.StatusError 286 | if e.response.status == 403 287 | throw( 288 | ErrorException( 289 | "Please set a valid api_key. Sign in to the dashboard to retrieve your api key.", 290 | ), 291 | ) 292 | end 293 | if e.response.status != 504 294 | throw(ErrorException(String(take!(IOBuffer(e.response.body))))) 295 | elseif method == :create_cluster 296 | # println( 297 | # "Cluster creation in progress. Please check dashboard to view status.", 298 | # ) 299 | elseif method == :create_job 300 | # println( 301 | # "Job creation in progress. Please check dashboard to view status.", 302 | # ) 303 | elseif method == :evaluate 304 | # println( 305 | # "Evaluation is in progress. Please check dashboard to view status.", 306 | # ) 307 | end 308 | rethrow() 309 | else 310 | rethrow() 311 | end 312 | end 313 | end 314 | 315 | ########################################## 316 | # Ordering hash for computing divisions # 317 | ########################################## 318 | 319 | # NOTE: `orderinghash` must either return a number or a vector of 320 | # equally-sized numbers 321 | 322 | # NOTE: This is duplicated between pt_lib.jl and the client library 323 | orderinghash(x::Any) = x # This lets us handle numbers and dates 324 | orderinghash(s::String) = Integer.(codepoint.(first(s, 32) * repeat(" ", 32-length(s)))) 325 | orderinghash(A::AbstractArray) = orderinghash(first(A)) 326 | 327 | ######################### 328 | # MOUNTED S3 FILESYSTEM # 329 | ######################### 330 | 331 | function get_s3fs_path(path) 332 | # Get information about requested object 333 | s3path = S3Path(path) 334 | bucket = s3path.bucket 335 | key = s3path.key 336 | # bucket = "banyan-cluster-data-myfirstcluster" 337 | mount = joinpath(homedir(), ".banyan", "mnt", "s3", bucket) 338 | 339 | # Ensure path to mount exists 340 | if !isdir(mount) 341 | mkpath(mount) 342 | end 343 | 344 | # Ensure something is mounted 345 | if !ismount(mount) 346 | # TODO: Store buckets from different accounts/IAMs/etc. seperately 347 | try 348 | ACCESS_KEY_ID = get_aws_config()[:creds].access_key_id 349 | SECRET_ACCESS_KEY = get_aws_config()[:creds].secret_key 350 | passwd_s3fs_contents = ACCESS_KEY_ID * ":" * SECRET_ACCESS_KEY 351 | HOME = homedir() 352 | region = get_aws_config_region() 353 | run(pipeline(`echo $passwd_s3fs_contents`, "$HOME/.passwd-s3fs")) 354 | run(`chmod 600 $HOME/.passwd-s3fs`) 355 | run(`s3fs $bucket $mount -o url=https://s3.$region.amazonaws.com -o endpoint=$region -o passwd_file=$HOME/.passwd-s3fs`) 356 | catch e 357 | @error """Failed to mount S3 bucket \"$bucket\" at $mount using s3fs with error: $e. Please ensure s3fs is in PATH or mount manually.""" 358 | end 359 | end 360 | 361 | # Return local path to object 362 | joinpath(mount, key) 363 | end 364 | -------------------------------------------------------------------------------- /Banyan/src/clusters.jl: -------------------------------------------------------------------------------- 1 | 2 | function load_json(path::String) 3 | if startswith(path, "file://") 4 | JSON.parsefile(path[8:end]) 5 | elseif startswith(path, "s3://") 6 | error("S3 path not currently supported") 7 | # JSON.parsefile(S3Path(path, config=get_aws_config())) 8 | elseif startswith(path, "http://") || startswith(path, "https://") 9 | JSON.parse(HTTP.get(path).body) 10 | else 11 | error( 12 | "Path $path must start with \"file://\", \"s3://\", or \"http(s)://\"", 13 | ) 14 | end 15 | end 16 | 17 | # Loads file into String and returns 18 | function load_file(path::String) 19 | if startswith(path, "file://") 20 | String(read(open(path[8:end]))) 21 | elseif startswith(path, "s3://") 22 | String(read(S3Path(path))) 23 | elseif startswith(path, "http://") || startswith(path, "https://") 24 | String(HTTP.get(path).body) 25 | else 26 | error( 27 | "Path $path must start with \"file://\", \"s3://\", or \"http(s)://\"", 28 | ) 29 | end 30 | end 31 | 32 | function merge_with( 33 | banyanfile_so_far::Dict, 34 | banyanfile::Dict, 35 | selector::Function, 36 | ) 37 | # Merge where we combine arrays by taking unions of their unique elements 38 | so_far = selector(banyanfile_so_far) 39 | curr = selector(banyanfile) 40 | Base.collect(union(Set(so_far), Set(curr))) 41 | end 42 | 43 | function merge_paths_with( 44 | banyanfile_so_far::Dict, 45 | banyanfile_path::String, 46 | banyanfile::Dict, 47 | selector::Function, 48 | ) 49 | # Merge where we combine arrays by taking unions of their unique elements 50 | so_far = selector(banyanfile_so_far) 51 | curr = [getnormpath(banyanfile_path, p) for p in selector(banyanfile)] 52 | deduplicated_absolute_locations = collect(union(Set(so_far), Set(curr))) 53 | deduplicated_relative_locations = 54 | unique(loc -> basename(loc), vcat(so_far, curr)) 55 | if deduplicated_relative_locations < deduplicated_absolute_locations 56 | error( 57 | "Files and scripts must have unique base names: $so_far and $curr have the same base name", 58 | ) 59 | else 60 | deduplicated_absolute_locations 61 | end 62 | end 63 | 64 | function keep_same( 65 | banyanfile_so_far::Dict, 66 | banyanfile::Dict, 67 | selector::Function 68 | ) 69 | so_far = selector(banyanfile_so_far) 70 | curr = selector(banyanfile) 71 | if !isnothing(so_far) && !isnothing(curr) && so_far != curr 72 | @warn "$so_far does not match $curr in included Banyanfiles" 73 | end 74 | isnothing(so_far) ? curr : so_far 75 | end 76 | 77 | function keep_same_path( 78 | banyanfile_so_far::Dict, 79 | banyanfile_path::String, 80 | banyanfile::Dict, 81 | selector::Function 82 | ) 83 | so_far = selector(banyanfile_so_far) 84 | curr_selected = selector(banyanfile) 85 | curr = isnothing(curr_selected) ? nothing : getnormpath(banyanfile_path, curr_selected) 86 | if !isnothing(so_far) && !isnothing(curr) && so_far != curr 87 | @warn "$so_far does not match $curr in included Banyanfiles" 88 | end 89 | isnothing(so_far) ? curr : so_far 90 | end 91 | 92 | function getnormpath(banyanfile_path, p) 93 | if startswith(p, "file://") 94 | prefix, suffix = split(banyanfile_path, "://") 95 | banyanfile_location_path = dirname(suffix) 96 | prefix * "://" * normpath(banyanfile_location_path, last(split(p, "://"))) 97 | else 98 | p 99 | end 100 | end 101 | 102 | function merge_banyanfile_with_defaults!(banyanfile) 103 | # Populate with defaults 104 | mergewith!( 105 | (a,b)->a, 106 | banyanfile, 107 | Dict( 108 | "include" => [], 109 | "require" => Dict() 110 | ) 111 | ) 112 | mergewith!( 113 | (a,b)->a, 114 | banyanfile["require"], 115 | Dict( 116 | "language" => "jl", 117 | "cluster" => Dict(), 118 | "job" => Dict() 119 | ) 120 | ) 121 | mergewith!( 122 | (a, b) -> a, 123 | banyanfile["require"]["cluster"], 124 | Dict( 125 | "files" => [], 126 | "scripts" => [], 127 | "packages" => [], 128 | "pt_lib" => nothing, 129 | "pt_lib_info" => nothing, 130 | ), 131 | ) 132 | mergewith!( 133 | (a,b)->a, 134 | banyanfile["require"]["job"], 135 | Dict("code" => []) 136 | ) 137 | end 138 | 139 | function merge_banyanfile_with!( 140 | banyanfile_so_far::Dict, 141 | banyanfile_path::String, 142 | for_cluster_or_job::Symbol, 143 | for_creation_or_update::Symbol, 144 | ) 145 | # Load Banyanfile to merge with 146 | banyanfile = load_json(banyanfile_path) 147 | 148 | # Merge Banyanfile with defaults 149 | merge_banyanfile_with_defaults!(banyanfile) 150 | 151 | # Merge with all included 152 | for included in banyanfile["include"] 153 | merge_banyanfile_with!(banyanfile_so_far, getnormpath(banyanfile_path, included), for_cluster_or_job, for_creation_or_update) 154 | end 155 | banyanfile_so_far["include"] = [] 156 | 157 | # Merge with rest of what is in this banyanfile 158 | 159 | if for_cluster_or_job == :cluster 160 | if for_creation_or_update == :creation 161 | # Merge language 162 | keep_same(banyanfile_so_far, banyanfile, b -> b["require"]["language"]) 163 | else 164 | @warn "Ignoring language" 165 | end 166 | 167 | # Merge files, scripts, packages 168 | banyanfile_so_far["require"]["cluster"]["files"] = merge_paths_with( 169 | banyanfile_so_far, 170 | banyanfile_path, 171 | banyanfile, 172 | b -> b["require"]["cluster"]["files"], 173 | ) 174 | banyanfile_so_far["require"]["cluster"]["scripts"] = merge_paths_with( 175 | banyanfile_so_far, 176 | banyanfile_path, 177 | banyanfile, 178 | b -> b["require"]["cluster"]["scripts"], 179 | ) 180 | banyanfile_so_far["require"]["cluster"]["packages"] = merge_with( 181 | banyanfile_so_far, 182 | banyanfile, 183 | b -> b["require"]["cluster"]["packages"], 184 | ) 185 | 186 | # Merge pt_lib_info and pt_lib 187 | banyanfile_so_far["require"]["cluster"]["pt_lib_info"] = keep_same_path( 188 | banyanfile_so_far, 189 | banyanfile_path, 190 | banyanfile, 191 | b -> b["require"]["cluster"]["pt_lib_info"], 192 | ) 193 | banyanfile_so_far["require"]["cluster"]["pt_lib"] = keep_same_path( 194 | banyanfile_so_far, 195 | banyanfile_path, 196 | banyanfile, 197 | b -> b["require"]["cluster"]["pt_lib"], 198 | ) 199 | elseif for_cluster_or_job == :job 200 | # Merge code 201 | banyanfile_so_far["require"]["job"]["code"] = merge_with( 202 | banyanfile_so_far, 203 | banyanfile, 204 | b -> b["require"]["job"]["code"], 205 | ) 206 | # TODO: If code is too large, upload to S3 bucket and replace code with 207 | # an include statement 208 | else 209 | error("Expected for_cluster_or_job to be either :cluster or :job") 210 | end 211 | end 212 | 213 | function upload_banyanfile(banyanfile_path::String, s3_bucket_arn::String, cluster_name::String, for_creation_or_update::Symbol; reinstall_julia::Bool = false) 214 | # TODO: Implement this to load Banyanfile, referenced pt_lib_info, pt_lib, 215 | # code files 216 | 217 | # TODO: Validate that s3_bucket_arn exists 218 | 219 | # Load Banyanfile and merge with all included 220 | banyanfile = load_json(banyanfile_path) 221 | merge_banyanfile_with_defaults!(banyanfile) 222 | for included in banyanfile["include"] 223 | merge_banyanfile_with!(banyanfile, getnormpath(banyanfile_path, included), :cluster, for_creation_or_update) 224 | end 225 | 226 | # Load pt_lib_info if path provided 227 | pt_lib_info = banyanfile["require"]["cluster"]["pt_lib_info"] 228 | @debug pt_lib_info 229 | pt_lib_info = if pt_lib_info isa String 230 | load_json(pt_lib_info) 231 | else 232 | pt_lib_info 233 | end 234 | 235 | files = banyanfile["require"]["cluster"]["files"] 236 | scripts = banyanfile["require"]["cluster"]["scripts"] 237 | packages = banyanfile["require"]["cluster"]["packages"] 238 | pt_lib = banyanfile["require"]["cluster"]["pt_lib"] 239 | pt_lib = isnothing(pt_lib) ? [] : [pt_lib] 240 | 241 | if isnothing(pt_lib) 242 | error("No pt_lib.jl provided") 243 | end 244 | if isnothing(pt_lib_info) 245 | error("No pt_lib_info.json provided") 246 | end 247 | 248 | # Upload all files, scripts, and pt_lib to s3 bucket 249 | s3_bucket_name = last(split(s3_bucket_arn, ":")) 250 | if endswith(s3_bucket_name, "/") 251 | s3_bucket_name = s3_bucket_name[1:end-1] 252 | elseif endswith(s3_bucket_name, "/*") 253 | s3_bucket_name = s3_bucket_name[1:end-2] 254 | elseif endswith(s3_bucket_name, "*") 255 | s3_bucket_name = s3_bucket_name[1:end-1] 256 | end 257 | for f in vcat(files, scripts, pt_lib) 258 | s3_put(get_aws_config(), s3_bucket_name, basename(f), load_file(f)) 259 | end 260 | 261 | bucket = s3_bucket_name 262 | region = get_aws_config_region() 263 | 264 | # Create post-install script with base commands 265 | code = "#!/bin/bash\n" 266 | code *= "mv setup_log.txt /tmp\n" 267 | code *= "cd /home/ec2-user\n" 268 | code *= "sudo yum update -y &>> setup_log.txt\n" 269 | code *= "sudo chmod 777 setup_log.txt\n" 270 | if reinstall_julia || for_creation_or_update == :creation 271 | code *= "sudo su - ec2-user -c \"wget https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-1.6.1-linux-x86_64.tar.gz -O julia.tar.gz &>> setup_log.txt\"\n" 272 | code *= "mkdir julia &>> setup_log.txt\n" 273 | code *= "sudo su - ec2-user -c \"tar zxvf julia.tar.gz -C julia --strip-components 1 &>> setup_log.txt\"\n" 274 | code *= "rm julia.tar.gz &>> setup_log.txt\n" 275 | code *= "sudo su - ec2-user -c \"julia/bin/julia --project -e 'using Pkg; Pkg.add(name=\\\"AWSS3\\\", version=\\\"0.7\\\"); Pkg.add([\\\"AWSCore\\\", \\\"AWSSQS\\\", \\\"JSON\\\", \\\"MPI\\\", \\\"BenchmarkTools\\\"]); ENV[\\\"JULIA_MPIEXEC\\\"]=\\\"srun\\\"; ENV[\\\"JULIA_MPI_LIBRARY\\\"]=\\\"/opt/amazon/openmpi/lib64/libmpi\\\"; Pkg.build(\\\"MPI\\\"; verbose=true)' &>> setup_log.txt\"\n" 276 | end 277 | code *= "sudo amazon-linux-extras install epel\n" 278 | code *= "aws s3 cp s3://banyan-executor /home/ec2-user --recursive\n" 279 | code *= "sudo yum -y install s3fs-fuse\n" 280 | code *= "sudo su - ec2-user -c \"mkdir /home/ec2-user/mnt/$bucket\"\n" 281 | code *= "sudo su - ec2-user -c \"/usr/bin/s3fs $bucket /home/ec2-user/mnt/$bucket -o iam_role=auto -o url=https://s3.$region.amazonaws.com -o endpoint=$region\"\n" 282 | code *= "sudo su - ec2-user -c \"aws configure set region $region\"\n" 283 | 284 | # Append to post-install script downloading files, scripts, pt_lib onto cluster 285 | for f in vcat(files, scripts, pt_lib) 286 | code *= 287 | "sudo su - ec2-user -c \"aws s3 cp s3://" * s3_bucket_name * "/" * 288 | basename(f) * 289 | " /home/ec2-user/\"\n" 290 | end 291 | 292 | # Append to post-install script running scripts onto cluster 293 | for script in scripts 294 | fname = basename(script) 295 | code *= "sudo su - ec2-user -c \"bash /home/ec2-user/$fname\"\n" 296 | end 297 | 298 | # Append to post-install script installing Julia dependencies 299 | for pkg in packages 300 | pkg_spec = split(pkg, "@") 301 | if length(pkg_spec) == 1 302 | code *= "sudo su - ec2-user -c \"julia/bin/julia --project -e 'using Pkg; Pkg.add(name=\\\"$pkg\\\")' &>> setup_log.txt \"\n" 303 | elseif length(pkg_spec) == 2 304 | name, version = pkg_spec 305 | code *= "sudo su - ec2-user -c \"julia/bin/julia --project -e 'using Pkg; Pkg.add(name=\\\"$name\\\", version=\\\"$version\\\")' &>> setup_log.txt \"\n" 306 | end 307 | end 308 | 309 | # Upload post_install script to s3 bucket 310 | post_install_script = "banyan_" * cluster_name * "_script.sh" 311 | code *= 312 | "touch /home/ec2-user/update_finished\n" * 313 | "aws s3 cp /home/ec2-user/update_finished " * 314 | "s3://" * s3_bucket_name * "/\n" 315 | println(s3_bucket_name) 316 | s3_put(get_aws_config(), s3_bucket_name, post_install_script, code) 317 | @debug code 318 | @debug pt_lib_info 319 | return pt_lib_info 320 | end 321 | 322 | # Required: cluster_name 323 | function create_cluster(; 324 | name::String = nothing, 325 | instance_type::String = "m4.4xlarge", 326 | max_num_nodes::Int = 8, 327 | banyanfile_path::String = nothing, 328 | iam_policy_arn::String = nothing, 329 | s3_bucket_arn::String = nothing, 330 | s3_bucket_name::String = nothing, 331 | vpc_id = nothing, 332 | subnet_id = nothing, 333 | kwargs..., 334 | ) 335 | @debug "Creating cluster" 336 | 337 | # Construct arguments 338 | if isnothing(s3_bucket_arn) 339 | s3_bucket_arn = "arn:aws:s3:::$s3_bucket_name*" 340 | end 341 | 342 | # Configure using parameters 343 | c = configure(; require_ec2_key_pair_name = true, kwargs...) 344 | name = if !isnothing(name) 345 | name 346 | else 347 | "banyan-cluster-" * randstring(6) 348 | end 349 | if isnothing(s3_bucket_arn) 350 | s3_bucket_arn = "arn:aws:s3:::banyan-cluster-data-" * name * bytes2hex(rand(UInt8, 16)) 351 | s3_bucket_name = last(split(s3_bucket_arn, ":")) 352 | s3_create_bucket(get_aws_config(), s3_bucket_name) 353 | elseif !(s3_bucket_arn in s3_list_buckets(get_aws_config())) 354 | error("Bucket $s3_bucket_arn does not exist in connected AWS account") 355 | end 356 | 357 | # Construct cluster creation 358 | cluster_config = Dict( 359 | "cluster_name" => name, 360 | "instance_type" => instance_type, #"t3.large", "c5.2xlarge" 361 | "num_nodes" => max_num_nodes, 362 | "ec2_key_pair" => c["aws"]["ec2_key_pair_name"], 363 | "aws_region" => get_aws_config_region(), 364 | "s3_read_write_resource" => s3_bucket_arn, 365 | ) 366 | if !isnothing(banyanfile_path) 367 | pt_lib_info = upload_banyanfile(banyanfile_path, s3_bucket_arn, name, :creation) 368 | cluster_config["pt_lib_info"] = pt_lib_info 369 | end 370 | if !isnothing(iam_policy_arn) 371 | cluster_config["additional_policy"] = iam_policy_arn # "arn:aws:s3:::banyanexecutor*" 372 | end 373 | if !isnothing(vpc_id) 374 | cluster_config["vpc_id"] = vpc_id 375 | end 376 | if !isnothing(subnet_id) 377 | cluster_config["subnet_id"] = subnet_id 378 | end 379 | 380 | # Send request to create cluster 381 | send_request_get_response(:create_cluster, cluster_config) 382 | end 383 | 384 | function destroy_cluster(name::String; kwargs...) 385 | @debug "Destroying cluster" 386 | configure(; kwargs...) 387 | send_request_get_response(:destroy_cluster, Dict("cluster_name" => name)) 388 | end 389 | 390 | # TODO: Update website display 391 | # TODO: Implement load_banyanfile 392 | function update_cluster(; 393 | name::String = nothing, 394 | banyanfile_path::String = nothing, 395 | force = false, 396 | kwargs..., 397 | ) 398 | @info "Updating cluster" 399 | 400 | # Configure 401 | configure(; kwargs...) 402 | cluster_name = if isnothing(name) 403 | clusters = get_clusters() 404 | if length(clusters) == 0 405 | error("Failed to create job: you don't have any clusters created") 406 | end 407 | first(keys(clusters)) 408 | else 409 | name 410 | end 411 | 412 | # Force by setting cluster to running 413 | if force 414 | assert_cluster_is_ready(name=name) 415 | end 416 | 417 | # Require restart: pcluster_additional_policy, s3_read_write_resource, num_nodes 418 | # No restart: Banyanfile 419 | 420 | if !isnothing(banyanfile_path) 421 | # Retrieve the location of the current post_install script in S3 and upload 422 | # the updated version to the same location 423 | s3_bucket_arn = get_cluster(name).s3_bucket_arn 424 | if endswith(s3_bucket_arn, "/") 425 | s3_bucket_arn = s3_bucket_arn[1:end-1] 426 | elseif endswith(s3_bucket_arn, "/*") 427 | s3_bucket_arn = s3_bucket_arn[1:end-2] 428 | elseif endswith(s3_bucket_arn, "*") 429 | s3_bucket_arn = s3_bucket_arn[1:end-1] 430 | end 431 | 432 | # Upload to S3 433 | pt_lib_info = upload_banyanfile(banyanfile_path, s3_bucket_arn, cluster_name, :update, reinstall_julia=get(kwargs, :reinstall_julia, false)) 434 | 435 | # Upload pt_lib_info 436 | send_request_get_response( 437 | :update_cluster, 438 | Dict( 439 | "cluster_name" => name, 440 | "pt_lib_info" => pt_lib_info 441 | # TODO: Send banyanfile here 442 | ), 443 | ) 444 | end 445 | end 446 | 447 | function assert_cluster_is_ready(; 448 | name::String, 449 | kwargs..., 450 | ) 451 | @info "Setting cluster status to running" 452 | 453 | # Configure 454 | configure(; kwargs...) 455 | 456 | send_request_get_response( 457 | :set_cluster_ready, 458 | Dict( 459 | "cluster_name" => name, 460 | ), 461 | ) 462 | end 463 | 464 | struct Cluster 465 | name::String 466 | status::Symbol 467 | num_jobs_running::Int32 468 | s3_bucket_arn::String 469 | end 470 | 471 | parsestatus(status) = 472 | if status == "creating" 473 | :creating 474 | elseif status == "notready" 475 | :notready 476 | elseif status == "destroying" 477 | :destroying 478 | elseif status == "updating" 479 | :updating 480 | elseif status == "failed" 481 | :failed 482 | elseif status == "starting" 483 | :starting 484 | elseif status == "stopped" 485 | :stopped 486 | elseif status == "running" 487 | :running 488 | elseif status == "terminated" 489 | :terminated 490 | else 491 | error("Unexpected status ", status) 492 | end 493 | 494 | function get_clusters(; kwargs...) 495 | @debug "Downloading description of clusters" 496 | configure(; kwargs...) 497 | response = 498 | send_request_get_response(:describe_clusters, Dict{String,Any}()) 499 | @show response 500 | Dict( 501 | name => Cluster( 502 | name, 503 | parsestatus(c["status"]), 504 | c["num_jobs"], 505 | c["s3_read_write_resource"], 506 | ) for (name, c) in response["clusters"] 507 | ) 508 | end 509 | 510 | get_cluster(name::String; kwargs...) = get_clusters(; kwargs...)[name] 511 | get_cluster() = get_cluster(get_cluster_name()) 512 | -------------------------------------------------------------------------------- /Banyan/res/utils.jl: -------------------------------------------------------------------------------- 1 | using Base: Integer, AbstractVecOrTuple 2 | 3 | #################### 4 | # Helper functions # 5 | #################### 6 | 7 | isa_df(obj) = @isdefined(AbstractDataFrame) && obj isa AbstractDataFrame 8 | isa_array(obj) = obj isa AbstractArray 9 | 10 | get_worker_idx(comm::MPI.Comm) = MPI.Comm_rank(comm) + 1 11 | get_nworkers(comm::MPI.Comm) = MPI.Comm_size(comm) 12 | 13 | get_partition_idx(batch_idx, nbatches, comm::MPI.Comm) = 14 | (get_worker_idx(comm) - 1) * nbatches + batch_idx 15 | 16 | get_npartitions(nbatches, comm::MPI.Comm) = 17 | nbatches * get_nworkers(comm) 18 | 19 | split_len(src_len::Integer, idx::Integer, npartitions::Integer) = 20 | if npartitions > 1 21 | # dst_len = Int64(cld(src_len, npartitions)) 22 | dst_len = cld(src_len, npartitions) 23 | dst_start = min((idx - 1) * dst_len + 1, src_len + 1) 24 | dst_end = min(idx * dst_len, src_len) 25 | dst_start:dst_end 26 | else 27 | 1:src_len 28 | end 29 | 30 | split_len(src_len, batch_idx::Integer, nbatches::Integer, comm::MPI.Comm) = 31 | split_len( 32 | src_len, 33 | get_partition_idx(batch_idx, nbatches, comm), 34 | get_npartitions(nbatches, comm) 35 | ) 36 | 37 | split_on_executor(src, d::Integer, i) = 38 | if isa_df(src) 39 | @view src[i, :] 40 | elseif isa_array(src) 41 | selectdim(src, d, i) 42 | else 43 | error("Expected split across either dimension of an AbstractArray or rows of an AbstractDataFrame") 44 | end 45 | 46 | split_on_executor(src, dim::Integer, batch_idx::Integer, nbatches::Integer, comm::MPI.Comm) = 47 | begin 48 | npartitions = get_npartitions(nbatches, comm) 49 | if npartitions > 1 50 | split_on_executor( 51 | src, 52 | dim, 53 | split_len( 54 | size(src, dim), 55 | get_partition_idx(batch_idx, nbatches, comm), 56 | npartitions 57 | ) 58 | ) 59 | else 60 | src 61 | end 62 | end 63 | 64 | function merge_on_executor(obj...; key=nothing) 65 | # @show obj 66 | # @show length(obj) 67 | # @show typeof(obj) 68 | first_obj = first(obj) 69 | # @show first_obj 70 | # @show typeof(first_obj) 71 | # @show length(first_obj) 72 | if isa_df(first_obj) 73 | # If this is a dataframe then we ignore the grouping key 74 | vcat(obj...) 75 | elseif isa_array(first_obj) 76 | # @show obj 77 | cat(obj...; dims=key) 78 | else 79 | # error("Expected either AbstractDataFrame or AbstractArray for concatenation") 80 | first_obj 81 | end 82 | end 83 | 84 | function merge_on_executor(kind::Symbol, vbuf::MPI.VBuffer, nchunks::Integer; key) 85 | chunks = [ 86 | begin 87 | chunk = view( 88 | vbuf.data, 89 | (vbuf.displs[i]+1): 90 | (vbuf.displs[i] + vbuf.counts[i]) 91 | ) 92 | if kind == :df 93 | DataFrame(Arrow.Table(IOBuffer(chunk))) 94 | elseif kind == :bits 95 | chunk 96 | else 97 | deserialize(IOBuffer(chunk)) 98 | end 99 | end 100 | for i in 1:nchunks 101 | ] 102 | merge_on_executor(chunks...; key=key) 103 | end 104 | 105 | function get_partition_idx_from_divisions(val, divisions; boundedlower=false, boundedupper=false) 106 | # The given divisions may be returned from `get_divisions` 107 | oh = orderinghash(val) 108 | for (i, div) in enumerate(divisions) 109 | isfirstdivision = i == 1 110 | islastdivision = i == length(divisions) 111 | if ((!boundedlower && isfirstdivision) || oh >= first(div)[1]) && 112 | ((!boundedupper && islastdivision) || oh < last(div)[2]) 113 | return i 114 | end 115 | end 116 | -1 117 | end 118 | 119 | isoverlapping(a::AbstractRange, b::AbstractRange) = 120 | a.start ≤ b.stop && b.start ≤ a.stop 121 | 122 | # NOTE: This function is shared between the client library and the PT library 123 | to_jl_value_contents(jl) = begin 124 | # Handle functions defined in a module 125 | # TODO: Document this special case 126 | # if jl isa Function && !(isdefined(Base, jl) || isdefined(Core, jl) || isdefined(Main, jl)) 127 | if jl isa Expr && eval(jl) isa Function 128 | jl = Dict("is_banyan_udf" => true, "code" => jl) 129 | end 130 | 131 | # Convert Julia object to string 132 | io = IOBuffer() 133 | iob64_encode = Base64EncodePipe(io) 134 | serialize(iob64_encode, jl) 135 | close(iob64_encode) 136 | String(take!(io)) 137 | end 138 | 139 | # NOTE: This function is shared between the client library and the PT library 140 | from_jl_value_contents(jl_value_contents) = begin 141 | # Converty string to Julia object 142 | io = IOBuffer() 143 | iob64_decode = Base64DecodePipe(io) 144 | write(io, jl_value_contents) 145 | seekstart(io) 146 | res = deserialize(iob64_decode) 147 | 148 | # Handle functions defined in a module 149 | if res isa Dict && haskey(res, "is_banyan_udf") && res["is_banyan_udf"] 150 | eval(res["code"]) 151 | else 152 | res 153 | end 154 | end 155 | 156 | # NOTE: This is duplicated between pt_lib.jl and the client library 157 | orderinghash(x::Any) = x # This lets us handle numbers and dates 158 | orderinghash(s::String) = Integer.(codepoint.(collect(first(s, 32) * repeat(" ", 32-length(s))))) 159 | orderinghash(A::Array) = orderinghash(first(A)) 160 | 161 | to_vector(v::Vector) = v 162 | to_vector(v) = [v] 163 | 164 | function get_divisions(divisions, npartitions) 165 | # This function accepts a list of divisions where each division is a tuple 166 | # of ordering hashes (values returned by `orderinghash` which are either 167 | # numbers or vectors of numbers). It also accepts a number of partitions to 168 | # produce divisions for. The result is a list of length `npartitions` 169 | # containing lists of divisions for each partition. A partition may contain 170 | # multiple divisions. 171 | 172 | ndivisions = length(divisions) 173 | if ndivisions >= npartitions 174 | # If there are more divisions than partitions, we can distribute them 175 | # easily. Each partition gets 0 or more divisions. 176 | # TODO: Ensure usage of div here and in sampling (in PT 177 | # library (here), annotation, and in locations) doesn't result in 0 or 178 | # instead we use ceiling division 179 | # ndivisions_per_partition = div(ndivisions, npartitions) 180 | [ 181 | begin 182 | # islastpartition = partition_idx == npartitions 183 | # firstdivisioni = ((partition_idx-1) * ndivisions_per_partition) + 1 184 | # lastdivisioni = islastpartition ? ndivisions : partition_idx * ndivisions_per_partition 185 | # divisions[firstdivisioni:lastdivisioni] 186 | divisions[split_len(ndivisions, partition_idx, npartitions)] 187 | end 188 | for partition_idx in 1:npartitions 189 | ] 190 | else 191 | # Otherwise, each division must be shared among 1 or more partitions 192 | allsplitdivisions = [] 193 | # npartitions_per_division = div(npartitions, ndivisions) 194 | 195 | # Iterate through the divisions and split each of them and find the 196 | # one that contains a split that this partition must own and use as 197 | # its `partition_divisions` 198 | for (division_idx, division) in enumerate(divisions) 199 | # Determine the range (from `firstpartitioni` to `lastpartitioni`) of 200 | # partitions that own this division 201 | # islastdivision = division_idx == ndivisions 202 | # firstpartitioni = ((division_idx-1) * npartitions_per_division) + 1 203 | # lastpartitioni = islastdivision ? npartitions : division_idx * npartitions_per_division 204 | # partitionsrange = firstpartitioni:lastpartitioni 205 | partitionsrange = split_len(npartitions, division_idx, ndivisions) 206 | 207 | # # If the current partition is in that division, compute the 208 | # # subdivision it should use for its partition 209 | # if partition_idx in partitionsrange 210 | 211 | # We need to split the division among all the partitions in 212 | # its range 213 | ndivisionsplits = length(partitionsrange) 214 | 215 | # Get the `Vector{Number}`s to interpolate between 216 | divisionbegin = to_vector(first(division)) 217 | divisionend = to_vector(last(division)) 218 | 219 | # @show divisionbegin 220 | # @show divisionend 221 | 222 | # Initialize divisions for each split 223 | splitdivisions = [[copy(divisionbegin), copy(divisionend)] for _ in 1:ndivisionsplits] 224 | 225 | # Adjust the divisions for each split to interpolate. The result 226 | # of an `orderinghash` call can be an array (in the case of 227 | # strings), so we must iterate through that array in order to 228 | # interpolate at the first element in that array where there is a 229 | # difference. 230 | for (i, (dbegin, dend)) in enumerate(zip(divisionbegin, divisionend)) 231 | # Find the first index in the `Vector{Number}` where 232 | # there is a difference that we can interpolate between 233 | if dbegin != dend 234 | # dpersplit = div(dend-dbegin, ndivisionsplits) 235 | # Iterate through each split 236 | # @show dpersplit 237 | # @show dbegin 238 | # @show dend 239 | start = copy(dbegin) 240 | for j in 1:ndivisionsplits 241 | # Update the start and end of the division 242 | # islastsplit = j == ndivisionsplits 243 | splitdivisions[j][1][i] = j == 1 ? dbegin : start 244 | start += cld(dend-dbegin, ndivisionsplits) 245 | start = min(start, dend) 246 | splitdivisions[j][2][i] = j == ndivisionsplits ? dend : start 247 | # splitdivisions[j][1][i] = dbegin + (dpersplit * (j-1)) 248 | # splitdivisions[j][2][i] = islastsplit ? dend : dbegin + dpersplit * j 249 | end 250 | 251 | # Stop if we have found a difference we can 252 | # interpolate between 253 | # TODO: If the difference is not that much, 254 | # interpolate between multiple consecutive 255 | # differeing characters together 256 | break 257 | end 258 | end 259 | 260 | # Convert back to `Number` if the divisions were originally 261 | # `Number`s. We support either numbers or lists of numbers for the 262 | # ordering hashes that we use for the min-max bounds. 263 | if !(first(division) isa Vector) 264 | splitdivisions = [ 265 | # NOTE: When porting this stuff to Python, be sure 266 | # to take into account the fact that Julia treats 267 | # many values as arrays 268 | (first(splitdivisionbegin), first(splitdivisionend)) 269 | for (splitdivisionbegin, splitdivisionend) in splitdivisions 270 | ] 271 | end 272 | 273 | # # Get the split of the division that this partition should own 274 | # splitdivision = splitdivisions[1+partition_idx-first(partitionsrange)] 275 | 276 | # # Stop because we have found a division that this partition 277 | # # is supposed to own a split from 278 | # break 279 | 280 | # Each partition must have a _list_ of divisions so we must have a list 281 | # for each partition 282 | for splitdivision in splitdivisions 283 | push!(allsplitdivisions, [splitdivision]) 284 | end 285 | 286 | # end 287 | end 288 | allsplitdivisions 289 | end 290 | end 291 | 292 | ######################## 293 | # Helper MPI functions # 294 | ######################## 295 | 296 | # TODO: Fix below function for reducing values of non-equal sizes 297 | # TODO: Make Allreducev version of below function 298 | 299 | # function Reducev(value, op, comm::MPI.Comm) 300 | # # Reduces values on all processes to a single value on rank 0. 301 | # # 302 | # # This function does the same thing as the function MPI_Reduce using 303 | # # only MPI_Send and MPI_Recv. As shown, it operates with additions on 304 | # # integers, so you could trivially use MPI_Reduce, but for operations 305 | # # on variable size structs for which you cannot define an MPI_Datatype, 306 | # # you can still use this method, by modifying it to use your op 307 | # # and your datastructure. 308 | 309 | # # TODO: Actually determine buffer 310 | # tag = 0 311 | # size = get_nworkers(comm) 312 | # rank = get_worker_idx(comm)-1 313 | # lastpower = 1 << log2(size) 314 | 315 | # # each of the ranks greater than the last power of 2 less than size 316 | # # need to downshift their data, since the binary tree reduction below 317 | # # only works when N is a power of two. 318 | # for i in lastpower:(size-1) 319 | # if rank == i 320 | # MPI.send(value, i-lastpower, tag, comm) 321 | # end 322 | # for i in 0:(size-lastpower-1) 323 | # if rank == i 324 | # MPI.Recv!(recvbuffer, i+lastpower, tag, comm) 325 | # value = op(value, recvbuffer) 326 | # end 327 | # end 328 | 329 | # for d in 0:(fastlog2(lastpower)-1) 330 | # k = 0 331 | # while k < lastpower 332 | # k += 1 << (d + 1) 333 | # end 334 | # receiver = k 335 | # sender = k + (1 << d) 336 | # if rank == receiver 337 | # MPI.Recv!(recvbuffer, 1, sender, tag) 338 | # value = op(value, recvbuffer) 339 | # elseif rank == sender 340 | # MPI.Send(value, 1, receiver, tag) 341 | # end 342 | # end 343 | # value 344 | # end 345 | 346 | # function fastlog2(v::UInt32) 347 | # multiply_de_bruijn_bit_position::Vector{Int32} = [ 348 | # 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 349 | # 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 350 | # ] 351 | 352 | # v |= v >> 1 # first round down to one less than a power of 2 353 | # v |= v >> 2 354 | # v |= v >> 4 355 | # v |= v >> 8 356 | # v |= v >> 16 357 | 358 | # # TODO: Fix this 359 | # multiply_de_bruijn_bit_position[(UInt32(v * 0x07C4ACDDU) >> 27) + 1] 360 | # end 361 | 362 | # function tobuf(obj)::Tuple{Symbol, MPI.Buffer} 363 | function tobuf(obj) 364 | # We pass around Julia objects between MPI processes in different ways 365 | # depending on the data type. For simple isbitstype data we keep it as-is 366 | # and use the simple C-like data layout for fast transfer. For dataframes, 367 | # we use Arrow data layout for zero-copy deserialization. For everything 368 | # else including variably-sized arrays and arbitrary Julia objects, we 369 | # simply serialize and deserialize using the Serialization module in Julia 370 | # standard library. 371 | 372 | if isbits(obj) 373 | (:bits, MPI.Buffer(Ref(obj))) 374 | # (:bits, MPI.Buffer(obj)) 375 | # (:bits, MPI.Buffer(Ref(obj))) 376 | elseif isa_array(obj) && isbitstype(first(typeof(obj).parameters)) && ndims(obj) == 1 377 | # (:bits, MPI.Buffer(obj)) 378 | (:bits, MPI.Buffer(obj)) 379 | elseif isa_df(obj) 380 | io = IOBuffer() 381 | Arrow.write(io, obj) 382 | # (:df, MPI.Buffer(view(io.data, 1:position(io)))) 383 | (:df, MPI.Buffer(view(io.data, 1:io.size))) 384 | else 385 | io = IOBuffer() 386 | serialize(io, obj) 387 | (:unknown, MPI.Buffer(view(io.data, 1:io.size))) 388 | # (:unknown, io) 389 | end 390 | end 391 | 392 | function buftovbuf(buf::MPI.Buffer, comm::MPI.Comm)::MPI.VBuffer 393 | # This function expects that the given buf has buf.data being an array. 394 | # Basically what it does is it takes the result of a call to tobuf above 395 | # on each process and constructs a VBuffer with the sum of the sizes of the 396 | # buffers on different processes. 397 | sizes = MPI.Allgather(buf.count, comm) 398 | # NOTE: This function should only be used for variably-sized buffers for 399 | # receiving data because the returned buffer contains zeroed-out memory. 400 | VBuffer(similar(buf.data, sum(sizes)), sizes) 401 | end 402 | 403 | function bufstosendvbuf(bufs::Vector{MPI.Buffer}, comm::MPI.Comm)::MPI.VBuffer 404 | sizes = [length(buf.data) for buf in bufs] 405 | VBuffer(vcat(map(buf->buf.data, bufs)), sizes) 406 | end 407 | 408 | function bufstorecvvbuf(bufs::Vector{MPI.Buffer}, comm::MPI.Comm)::MPI.VBuffer 409 | # This function expects that each given buf has buf.data being an array and 410 | # that the number of bufs in bufs is equal to the size of the communicator. 411 | # sizes = MPI.Allgather(length(buf.data), comm) 412 | sizes = MPI.Alltoall([length(buf.data) for buf in bufs]) 413 | # NOTE: Ensure that the data fields of the bufs are initialized to have the 414 | # right data type (e.g., Vector{UInt8} or Vector{Int64}) 415 | # We use `similar` here because we want zeroed out memory to receive data. 416 | VBuffer(similar(first(bufs).data, sum(sizes)), sizes) 417 | end 418 | 419 | function frombuf(kind, obj) 420 | if kind == :bits && obj isa Ref 421 | # TODO: Ensure that the "dereferece" here is necessary 422 | obj[] 423 | elseif kind == :bits 424 | obj 425 | elseif kind == :df 426 | DataFrame(Arrow.Table(obj), copycols=false) 427 | else 428 | deserialize(obj) 429 | end 430 | end 431 | 432 | function getpath(path) 433 | if startswith(path, "http://") || startswith(path, "https://") 434 | # TODO: First check for size of file and only download to 435 | # disk if it doesn't fit in free memory 436 | hashed_path = string(hash(path)) 437 | joined_path = joinpath(tempdir(), hashed_path) 438 | if !isfile(joined_path) 439 | # NOTE: Even though we are storing in /tmp, this is 440 | # effectively caching the download. If this is undesirable 441 | # to a user, a short-term solution is to use a different 442 | # URL each time (e.g., add a dummy query to the end of the 443 | # URL) 444 | download(path, joined_path) 445 | end 446 | joined_path 447 | elseif startswith(path, "s3://") 448 | replace(path, "s3://" => "/home/ec2-user/mnt/") 449 | # NOTE: We expect that the ParallelCluster instance was set up 450 | # to have the S3 filesystem mounted at /mnt/ 451 | else 452 | path 453 | end 454 | end --------------------------------------------------------------------------------