├── Banyan
    ├── deps
    │   ├── build.log
    │   └── build.jl
    ├── .swp
    ├── test
    │   ├── res
    │   │   ├── Banyanfile.json
    │   │   └── Banyanfile_badcluster.json
    │   ├── data
    │   │   ├── multiple_rowgroups.parquet
    │   │   └── iris.csv
    │   ├── runtests.jl
    │   └── test_cluster.jl
    ├── src
    │   ├── utils_abstract_types.jl
    │   ├── id.jl
    │   ├── job.jl
    │   ├── future.jl
    │   ├── tasks.jl
    │   ├── queues.jl
    │   ├── futures.jl
    │   ├── Banyan.jl
    │   ├── jobs.jl
    │   ├── samples.jl
    │   ├── partitions.jl
    │   ├── pt_lib_constructors.jl
    │   ├── requests.jl
    │   ├── utils.jl
    │   └── clusters.jl
    ├── res
    │   ├── Banyanfile.json
    │   ├── pt_lib_info.json
    │   └── utils.jl
    ├── scripts
    │   └── deploy_pt_lib.py
    ├── Project.toml
    └── LICENSE
├── .vscode
    └── settings.json
├── BanyanArrays
    ├── test
    │   ├── res
    │   │   └── Banyanfile.json
    │   ├── blas.jl
    │   ├── haversine.jl
    │   ├── test_simple.jl
    │   ├── shallow_water.jl
    │   ├── test_l3.jl
    │   ├── runtests.jl
    │   └── test_l1_l2.jl
    ├── res
    │   ├── Banyanfile.json
    │   └── utils_ba.jl
    ├── src
    │   └── BanyanArrays.jl
    ├── Project.toml
    └── LICENSE
├── BanyanDataFrames
    ├── res
    │   └── Banyanfile.json
    ├── test
    │   ├── res
    │   │   └── Banyanfile.json
    │   ├── test_simple.jl
    │   └── runtests.jl
    ├── Project.toml
    ├── src
    │   └── BanyanDataFrames.jl
    └── LICENSE
├── benchmarking
    ├── matmul_runtime.pdf
    ├── matmul_runtime.png
    ├── blackscholes_runtime.pdf
    ├── blackscholes_runtime.png
    ├── benchmark.py
    └── benchmark_results.md
├── docs
    ├── 1-Create_an_account_with_Banyan.md
    ├── 5-Cluster_management.md
    └── 3-Cluster_creation.md
├── .gitignore
├── README.md
└── LICENSE


/Banyan/deps/build.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "julia.enableTelemetry": false
3 | }


--------------------------------------------------------------------------------
/Banyan/.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/Banyan/.swp


--------------------------------------------------------------------------------
/Banyan/test/res/Banyanfile.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": ["file://../../res/Banyanfile.json"]
3 | }
4 | 


--------------------------------------------------------------------------------
/BanyanArrays/test/res/Banyanfile.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": ["file://../../res/Banyanfile.json"]
3 | }
4 | 


--------------------------------------------------------------------------------
/BanyanDataFrames/res/Banyanfile.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": ["file://../../BanyanArrays/res/Banyanfile.json"]
3 | }
4 | 


--------------------------------------------------------------------------------
/Banyan/deps/build.jl:
--------------------------------------------------------------------------------
1 | # using JuliaFormatter: format
2 | 
3 | # format("..", margin = 79, remove_extra_newlines = true)
4 | 


--------------------------------------------------------------------------------
/benchmarking/matmul_runtime.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/matmul_runtime.pdf


--------------------------------------------------------------------------------
/benchmarking/matmul_runtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/matmul_runtime.png


--------------------------------------------------------------------------------
/benchmarking/blackscholes_runtime.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/blackscholes_runtime.pdf


--------------------------------------------------------------------------------
/benchmarking/blackscholes_runtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/benchmarking/blackscholes_runtime.png


--------------------------------------------------------------------------------
/Banyan/test/data/multiple_rowgroups.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/banyan-julia/v0.1.0/Banyan/test/data/multiple_rowgroups.parquet


--------------------------------------------------------------------------------
/Banyan/src/utils_abstract_types.jl:
--------------------------------------------------------------------------------
1 | abstract type AbstractFuture end
2 | 
3 | abstract type AbstractSample end
4 | abstract type AbstractSampleWithKeys <: AbstractSample end


--------------------------------------------------------------------------------
/Banyan/src/id.jl:
--------------------------------------------------------------------------------
1 | const JobId = String
2 | const ValueId = String
3 | const MessageId = String
4 | 
5 | generate_value_id() = randstring(8)
6 | generate_message_id() = randstring(8)
7 | 


--------------------------------------------------------------------------------
/BanyanArrays/test/blas.jl:
--------------------------------------------------------------------------------
1 | @testset "Level 1: Element-wise Vector Multiplication" begin end
2 | 
3 | @testset "Level 3: Matrix Multiplication" begin end
4 | 
5 | @testset "Level 3: Transpose" begin end
6 | 


--------------------------------------------------------------------------------
/BanyanArrays/res/Banyanfile.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": ["file://../../Banyan/res/Banyanfile.json"],
3 |     "require": {
4 |         "cluster": {"files": ["file://utils_ba.jl"]},
5 |         "job": {"code": ["include(\"utils_ba.jl\")"]}
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/BanyanArrays/src/BanyanArrays.jl:
--------------------------------------------------------------------------------
 1 | module BanyanArrays
 2 | 
 3 | using Banyan
 4 | 
 5 | export Array, Vector, Matrix
 6 | export read_hdf5, write_hdf5
 7 | export ndims, size, length, eltype
 8 | export fill
 9 | export map, mapslices, reduce, sort, sortlices
10 | 
11 | include("array.jl")
12 | 
13 | end # module


--------------------------------------------------------------------------------
/BanyanDataFrames/test/res/Banyanfile.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": ["file://../../res/Banyanfile.json"],
 3 |     "require": {
 4 |         "cluster": {
 5 |             "packages": ["Statistics"]
 6 |         },
 7 |         "job": {
 8 |             "code": [
 9 |                 "using Statistics"
10 |             ]
11 |         }
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/BanyanArrays/test/haversine.jl:
--------------------------------------------------------------------------------
 1 | # Expected Output According to Reference python : an array of 4841.08061686
 2 | lat_ref = 0.7098
 3 | lon_ref = 1.2390
 4 | 
 5 | lat_test = ones(10) * 0.069
 6 | lon_test = ones(10) * 0.069
 7 | 
 8 | dlat = sin.((lat_test .- lat_ref) / 2) .^ 2
 9 | dlon = sin.((lon_test .- lon_ref) / 2) .^ 2
10 | 
11 | a = cos.(lat_ref) * cos.(lat_test) .* dlon + dlat
12 | 
13 | c = asin.(sqrt.(a)) * 2 * 3959.0 # 3959.0 is miles conversion I think
14 | print(c)


--------------------------------------------------------------------------------
/BanyanArrays/test/test_simple.jl:
--------------------------------------------------------------------------------
 1 | @testset "Simple usage of BanyanArrays" begin
 2 |     run_with_job("Filling") do job
 3 |         println(typeof(Base.fill(1.0, 2048)))
 4 |         x = BanyanArrays.fill(10.0, 2048)
 5 |         println(typeof(x))
 6 |         x = map(e -> e / 10, x)
 7 |         println(typeof(x))
 8 |         res = sum(x)
 9 | 
10 |         res = collect(res)
11 |         @test typeof(res) == Float64
12 |         @test res = 1024
13 |     end
14 | end
15 | 


--------------------------------------------------------------------------------
/BanyanArrays/Project.toml:
--------------------------------------------------------------------------------
 1 | name = "BanyanArrays"
 2 | uuid = "369465de-032e-4609-9dcf-82b89c370a7b"
 3 | authors = ["Caleb Winston <calebhwin@gmail.com>"]
 4 | version = "0.1.0"
 5 | 
 6 | [deps]
 7 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317"
 8 | 
 9 | [compat]
10 | Banyan = "0.1.0"
11 | julia = "1.6.0"
12 | 
13 | [extras]
14 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317"
15 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
16 | 
17 | [targets]
18 | test = ["Banyan", "Test"]
19 | 


--------------------------------------------------------------------------------
/Banyan/test/res/Banyanfile_badcluster.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [],
 3 |     "require": {
 4 |         "language": "jl",
 5 |         "cluster": {
 6 |             "files": [],
 7 |             "scripts": [],
 8 |             "packages": ["DataFrames"],
 9 |             "pt_lib_info": "file://../../res/pt_lib_info.json",
10 |             "pt_lib": "file://../../res/pt_lib.jl"
11 |         },
12 |         "job": {
13 |             "code": ["using DataFrames; println(\"using DataFrames\")"]
14 |         }
15 |     }
16 | }
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Banyan/res/Banyanfile.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [],
 3 |     "require": {
 4 |         "language": "jl",
 5 |         "cluster": {
 6 |             "files": ["file://utils.jl"],
 7 |             "scripts": [],
 8 |             "packages": ["DataFrames", "CSV", "Parquet", "Arrow"],
 9 |             "pt_lib_info": "file://pt_lib_info.json",
10 |             "pt_lib": "file://pt_lib.jl"
11 |         },
12 |         "job": {
13 |             "code": [
14 |                 "using DataFrames, CSV, Parquet, Arrow"
15 |             ]
16 |         }
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/BanyanDataFrames/test/test_simple.jl:
--------------------------------------------------------------------------------
 1 | @testset "Simple usage of BanyanDataFrames" begin
 2 |     run_with_job("Grouping") do job
 3 |         # TODO: Use `get_cluster()` and `wget` to automatically load in iris.csv
 4 |         iris = read_csv("s3://banyan-cluster-data-pumpkincluster0-3e15290827c0c584/iris.csv")
 5 |         gdf = groupby(iris, :species)
 6 |         lengths = collect(combine(gdf, :petal_length => mean))
 7 |         counts = collect(combine(gdf, nrow))
 8 | 
 9 |         @test first(lengths)[:petal_length_mean] == 1.464
10 |         @test first(counts)[:nrow] == 50
11 |     end
12 | end
13 | 


--------------------------------------------------------------------------------
/Banyan/src/job.jl:
--------------------------------------------------------------------------------
 1 | struct Job
 2 |     id::JobId
 3 |     nworkers::Int32
 4 |     sample_rate::Int32
 5 |     locations::Dict{ValueId,Location}
 6 |     pending_requests::Vector{Request}
 7 |     futures_on_client::WeakKeyDict{ValueId,Future}
 8 |     cluster_name::String
 9 | 
10 |     # TODO: Ensure that this struct and constructor (which are just for storing
11 |     # information about the job) does not conflict with the `Job` function that
12 |     # calls `create_job`
13 |     function Job(cluster_name::String, job_id::JobId, nworkers::Integer, sample_rate::Integer)::Job
14 |         new(job_id, nworkers, sample_rate, Dict(), [], Dict(), cluster_name)
15 |     end
16 | end
17 | 


--------------------------------------------------------------------------------
/Banyan/src/future.jl:
--------------------------------------------------------------------------------
 1 | mutable struct Future <: AbstractFuture
 2 |     value::Any
 3 |     value_id::ValueId
 4 |     mutated::Bool
 5 |     stale::Bool
 6 | 
 7 |     function Future(value::Any, value_id::ValueId, mutated::Bool, stale::Bool)
 8 |         new_future = new(value, value_id, mutated, stale)
 9 | 
10 |         # Create finalizer and register
11 |         finalizer(new_future) do fut
12 |             try
13 |                 record_request(DestroyRequest(fut.value_id))
14 |             catch e
15 |                 @warn "Failed to destroy value $(fut.value_id) because job has stopped: $e"
16 |             end
17 |         end
18 | 
19 |         new_future
20 |     end
21 | end


--------------------------------------------------------------------------------
/BanyanArrays/res/utils_ba.jl:
--------------------------------------------------------------------------------
 1 | # NOTE: This function is shared between the client library and the PT library
 2 | function indexapply(op, objs...; index::Integer=1)
 3 |     lists = [obj for obj in objs if obj isa AbstractVecOrTuple]
 4 |     length(lists) > 0 || throw(ArgumentError("Expected at least one tuple as input"))
 5 |     index = index isa Colon ? length(first(lists)) : index
 6 |     operands = [(obj isa AbstractVecOrTuple ? obj[index] : obj) for obj in objs]
 7 |     indexres = op(operands...)
 8 |     res = first(lists)
 9 |     if first(lists) isa Tuple
10 |         res = [res...]
11 |         res[index] = indexres
12 |         Tuple(res)
13 |     else
14 |         res = copy(res)
15 |         res[index] = indexres
16 |     end
17 | end


--------------------------------------------------------------------------------
/BanyanDataFrames/Project.toml:
--------------------------------------------------------------------------------
 1 | name = "BanyanDataFrames"
 2 | uuid = "dfdd4555-3ad7-41cc-a503-a251c9c652f9"
 3 | authors = ["Caleb Winston <calebhwin@gmail.com>"]
 4 | version = "0.1.0"
 5 | 
 6 | [deps]
 7 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317"
 8 | BanyanArrays = "369465de-032e-4609-9dcf-82b89c370a7b"
 9 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
10 | 
11 | [compat]
12 | Banyan = "0.1.0"
13 | BanyanArrays = "0.1.0"
14 | DataFrames = "1.1.1"
15 | julia = "1.6.0"
16 | 
17 | [extras]
18 | Banyan = "706d138b-e922-45b9-a636-baf8ae0d5317"
19 | BanyanArrays = "369465de-032e-4609-9dcf-82b89c370a7b"
20 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
21 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
22 | 
23 | [targets]
24 | test = ["Banyan", "BanyanArrays", "Test", "Statistics"]
25 | 


--------------------------------------------------------------------------------
/docs/1-Create_an_account_with_Banyan.md:
--------------------------------------------------------------------------------
 1 | # Create an account with Banyan
 2 | 
 3 | ## Step 1: Sign up on AWS Marketplace
 4 | 1. Create an AWS account or sign into an existing AWS account.
 5 | 2. Visit banyancomputing.com and click ... You will be redirected to the registration page in AWS Marketplace. Be sure to sign into AWS. Alternatively, you can directly visit the registration page in AWS Marketplace.
 6 | 3. Subscribe. You will be redirected to the Banyan sign up page.
 7 | 
 8 | ## Step 2: Sign up on Banyan website
 9 | 4. Fill out the fields with your information (username, email address) and check the box for accepting terms. Click sign up.
10 | 5. Important: Do not close your browser or the web page.
11 | 6. You will receive an activation link in your email. Open that email and click on the activation link. Reset your password and sign in....


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Files generated by invoking Julia with --code-coverage
 2 | *.jl.cov
 3 | *.jl.*.cov
 4 | 
 5 | # Files generated by invoking Julia with --track-allocation
 6 | *.jl.mem
 7 | 
 8 | # System-specific files and directories generated by the BinaryProvider and BinDeps packages
 9 | # They contain absolute paths specific to the host computer, and so should not be committed
10 | deps/deps.jl
11 | deps/build.log
12 | deps/downloads/
13 | deps/usr/
14 | deps/src/
15 | 
16 | # Build artifacts for creating documentation generated by the Documenter package
17 | docs/build/
18 | docs/site/
19 | 
20 | # File generated by Pkg, the package manager, based on a corresponding Project.toml
21 | # It records a fixed state of all packages used by the project. As such, it should not be
22 | # committed for packages, but should be committed for applications that require a static
23 | # environment.
24 | # Manifest.toml
25 | 


--------------------------------------------------------------------------------
/BanyanDataFrames/src/BanyanDataFrames.jl:
--------------------------------------------------------------------------------
 1 | module BanyanDataFrames
 2 | 
 3 | using Banyan
 4 | using BanyanArrays
 5 | 
 6 | using DataFrames
 7 | 
 8 | include("df.jl")
 9 | include("gdf.jl")
10 | 
11 | # Types
12 | export DataFrame, GroupedDataFrame
13 | 
14 | # I/O
15 | export read_csv, write_csv, read_parquet, write_parquet, read_arrow, write_arrow
16 | 
17 | # Dataframe properties
18 | export nrow, ncol, size, names, propertynames
19 | 
20 | # Dataframe filtering
21 | export dropmissing, filter, unique, nonunique
22 | 
23 | # Dataframe selection and column manipulation
24 | export getindex, setindex!, rename
25 | 
26 | # Dataframe sorting
27 | export sort
28 | 
29 | # Dataframe joining
30 | export innerjoin
31 | 
32 | # Grouped dataframe properties
33 | export length, groupcols, valuecols
34 | 
35 | # Grouped dataframe methods
36 | export groupby, select, transform, combine, subset
37 | 
38 | end # module
39 | 


--------------------------------------------------------------------------------
/Banyan/src/tasks.jl:
--------------------------------------------------------------------------------
 1 | #########
 2 | # Tasks #
 3 | #########
 4 | 
 5 | mutable struct DelayedTask
 6 |     # Fields for use in processed task ready to be recorded
 7 |     code::String
 8 |     value_names::Dict{ValueId,String}
 9 |     effects::Dict{ValueId,String}
10 |     pa_union::Vector{PartitionAnnotation} # Enumeration of applicable PAs
11 |     # Fields for use in task yet to be processed in a call to `compute`
12 |     partitioned_using_func::Union{Function,Nothing}
13 |     partitioned_with_func::Union{Function,Nothing}
14 |     mutation::Dict{Future,Future} # This gets converted to `effects`
15 | end
16 | 
17 | DelayedTask() = DelayedTask("", Dict(), Dict(), [PartitionAnnotation()], nothing, nothing, Dict())
18 | 
19 | function to_jl(task::DelayedTask)
20 |     return Dict(
21 |         "code" => task.code,
22 |         "value_names" => task.value_names,
23 |         "effects" => task.effects,
24 |         "pa_union" => [to_jl(pa) for pa in task.pa_union],
25 |     )
26 | end
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Banyan Julia
 2 | 
 3 | Banyan Julia is an extension to the Julia programming language for that seamlessly scales existing libraries and code to massive data and compute. Banyan allows _partition types_ to be assigned to variables in existing code. Annotated code is automatically offloaded to run in a distributed fashion on managed clusters running in your AWS Virtual Private Cloud. Banyan optimizes code on-the-fly to take advantage of CPU caches and multicores in the clusters where the offloaded code runs.
 4 | 
 5 | Software libraries can be annotated with partition types and subsequent use of the annotated functions automatically runs at scale. Currently, we are developing two annotated libraries:
 6 | 
 7 | - BanyanArrays.jl
 8 | - BanyanDataFrames.jl
 9 | 
10 | Eventually, you will be able to use these libraries as drop-in replacements of the standard library Arrays and the DataFrames.jl library. By changing an import statement, you can run your code as is with Banyan scaling to arbitrary data or compute needs.
11 | 


--------------------------------------------------------------------------------
/docs/5-Cluster_management.md:
--------------------------------------------------------------------------------
 1 | # Manage a cluster
 2 | 
 3 | ## Updating a Cluster
 4 | 
 5 | To create a cluster, you must provide the following information.
 6 | 
 7 | Required Paramters
 8 | * `cluster_name` - name of cluster to update
 9 | * `username` - username under which this cluster was created
10 | 
11 | Optional Parameters
12 | * `additional_policy` - ARN of AWS IAM policy to give additional permissions to the cluster
13 | * `s3_read_write_resource` - ARN of AWS S3 bucket to give cluster read/write permission to
14 | * `num_nodes` - maximum number of nodes for cluster
15 | * `banyanfile` - Banyanfile
16 | 
17 | Updating a cluster will fail under the following scenarios.
18 | * Cluster is not currently in the `running` state
19 | * Cluster has jobs currently running
20 | 
21 | 
22 | ## Destroying a Cluster
23 | 
24 | To destroy a cluster, you must provide the following information:
25 | * `cluster_name` - name of cluster to delete
26 | * `username` - username under which this cluster was created
27 | 
28 | <!-- Cluster destruction will fail under the following scenarios. To force delete a cluster, even in the following scenarios, set `force=True`.
29 | * Cluster is not currently in the `running` state
30 | * Cluster has jobs currently running -->
31 | 


--------------------------------------------------------------------------------
/Banyan/scripts/deploy_pt_lib.py:
--------------------------------------------------------------------------------
 1 | # This script is retired.
 2 | #
 3 | # import sys
 4 | # import json
 5 | # import subprocess
 6 | 
 7 | # import boto3
 8 | # from boto3.dynamodb.types import TypeSerializer
 9 | 
10 | # # Upload pt_lib.jl to S3
11 | # s3 = boto3.resource('s3')
12 | # s3_client = boto3.client('s3')
13 | # s3_client.upload_file("res/pt_lib.jl", "banyan-executor", "pt_lib.jl")
14 | 
15 | # # Upload pt_lib_info.json to DynamoDB (to be downloaded by `evaluate`)
16 | # dynamodb = boto3.resource('dynamodb')
17 | # clusters = dynamodb.Table('Clusters')
18 | # serializer = TypeSerializer()
19 | # with open('res/pt_lib_info.json') as pt_lib_info_file:
20 | #     pt_lib_info_json = json.load(pt_lib_info_file)
21 | #     clusters.update_item(
22 | #         Key={
23 | #             'cluster_id': sys.argv[1],
24 | #         },
25 | #         UpdateExpression='SET pt_lib_info = :pt_lib_info',
26 | #         ExpressionAttributeValues={
27 | #             ':pt_lib_info': {
28 | #                 k: serializer.serialize(v) for k, v in pt_lib_info_json.items()
29 | #             }
30 | #         }
31 | #     )
32 | 
33 | # # Download pt_lib.jl from S3 to the cluster
34 | # #commands = ["\"cd /home/ec2-user\"", "\"sudo yum update -y\"", "\"aws s3 cp s3://banyan-executor /home/ec2-user --recursive\""]
35 | # #for cmd in commands:
36 | # #    subprocess.Popen("pcluster ssh {n} -i {f} {cmd}".format(n=sys.argv[1], f=sys.argv[2], cmd=cmd), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
37 | 


--------------------------------------------------------------------------------
/benchmarking/benchmark.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | num_workers = [1, 2, 4, 8, 16]
 4 | 
 5 | #################
 6 | # BLACK SCHOLES #
 7 | #################
 8 | 
 9 | wo_cache_optim = [237.947, 163.376, 85.370, 42.892, 22.544]
10 | 
11 | w_l2cache_optim = [103.517, 71.561, 36.956, 18.404, 9.756]
12 | w_l1cache_optim = [93.652, 81.079, 40.485, 19.570, 10.368]
13 | 
14 | plt.plot(num_workers, wo_cache_optim, marker='o', label="Without Cache Optimization")  #, label="w/o cache optimization")
15 | plt.plot(num_workers, w_l2cache_optim, marker='o', label="L2 Cache Optimized")
16 | plt.plot(num_workers, w_l1cache_optim, marker='o', label="L1 Cache Optimized")
17 | plt.title("Runtime for Black Scholes")
18 | plt.xlabel("Number of Workers")
19 | plt.ylabel("Execution Runtime (s)")
20 | plt.yscale("log")
21 | plt.legend()
22 | plt.grid(alpha=0.5)
23 | plt.savefig("blackscholes_runtime.pdf", dpi=200)
24 | plt.close()
25 | 
26 | 
27 | 
28 | ##########
29 | # MATMUL #
30 | ##########
31 | 
32 | wo_cache_optim = [31.543, 26.172, 13.388, 7.380, 3.964]
33 | w_cache_optim = [96.275, 68.332, 37.093, 18.993, 10.289]
34 | 
35 | 
36 | plt.plot(num_workers, wo_cache_optim, marker='o', label="Without Cache Optimization")  #, label="w/o cache optimization")
37 | plt.plot(num_workers, w_cache_optim, marker='o', label="L2 Cache Optimized")
38 | plt.title("Runtime for Matrix Multiplication")
39 | plt.xlabel("Number of Workers")
40 | plt.ylabel("Execution Runtime (s)")
41 | plt.yscale("log")
42 | plt.legend()
43 | plt.grid(alpha=0.5)
44 | plt.savefig("matmul_runtime.pdf", dpi=200)
45 | plt.close()
46 | 


--------------------------------------------------------------------------------
/Banyan/Project.toml:
--------------------------------------------------------------------------------
 1 | name = "Banyan"
 2 | uuid = "706d138b-e922-45b9-a636-baf8ae0d5317"
 3 | authors = ["Banyan Team <banyan.hyper@gmail.com>"]
 4 | version = "0.1.0"
 5 | 
 6 | [deps]
 7 | AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
 8 | AWSCore = "4f1ea46c-232b-54a6-9b17-cc2d0f3e6598"
 9 | AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
10 | AWSSQS = "6e80b5ca-5733-51f9-999e-c18680912812"
11 | Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
12 | Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
13 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
14 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
15 | FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
16 | FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
17 | HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
18 | HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
19 | IniFile = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
20 | IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
21 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
22 | Parquet = "626c502c-15b0-58ad-a749-f091afb673ae"
23 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
24 | Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
25 | TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
26 | 
27 | [compat]
28 | AWS = "1.36.0"
29 | AWSCore = "0.6.17"
30 | AWSS3 = "0.8.3"
31 | AWSSQS = "0.6.4"
32 | Arrow = "1.5.0"
33 | CSV = "0.8.5"
34 | DataFrames = "1.1.1"
35 | FileIO = "1.9.1"
36 | FilePathsBase = "0.9.10"
37 | HDF5 = "0.15.5"
38 | HTTP = "0.8.19"
39 | IniFile = "0.5.0"
40 | IterTools = "1.3.0"
41 | JSON = "0.21.1"
42 | Parquet = "0.8.3"
43 | julia = "1.6.0"
44 | 
45 | [extras]
46 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
47 | 
48 | [targets]
49 | test = ["Test"]
50 | 


--------------------------------------------------------------------------------
/BanyanArrays/test/shallow_water.jl:
--------------------------------------------------------------------------------
 1 | G     = 6.67384e-11     # m/(kg*s^2)
 2 | dt    = 60*60*24*365.25 # Years in seconds
 3 | r_ly  = 9.4607e15       # Lightyear in m
 4 | m_sol = 1.9891e30       # Solar mass in kg
 5 | b     = 0.0
 6 | 
 7 | n = 10
 8 | 
 9 | box_size = 1
10 | grid_spacing = 1.0 * box_size / n
11 | 
12 | iterations = 10
13 | 
14 | v_x = zeros((n,n))
15 | v_y = zeros((n,n))
16 | eta = ones((n,n)) # pressure
17 | for i=1:n
18 | 	eta[i] = eta[i] * 0.1 * i
19 | end
20 | 
21 | #temps = [ones((n,n)),
22 | #		 ones((n,n)),
23 | #		 ones((n,n)),
24 | #		 ones((n,n))]
25 | 
26 | tmp = ones((n,n))
27 | du_dt = ones((n,n))
28 | dv_dt = ones((n,n))
29 | tmp1 = ones((n,n))
30 | 
31 | for i in 1:iterations
32 | 	# Compute derivatives with respect to x and y by using difference in z (by shifting) / difference in x or y (going both sides)
33 | 	# Derivative of eta with respect to x (second dimension?????!???!!!??)
34 | 	roll1 = circshift(eta, (0,-1))
35 | 	roll2 = circshift(eta, (0,1))
36 | 	tmp1 = (roll1 .- roll2) ./ (grid_spacing * 2.0)
37 | 
38 | 	# Derivative of eta with respect to y (first dimension?????!???!!!??)
39 | 	roll1 = circshift(eta, (-1,0))
40 | 	roll2 = circshift(eta, (1,0))
41 | 	tmp = (roll1 .- roll2) ./ (grid_spacing * 2.0)
42 | 
43 | 	du_dt = -g .* tmp1 - b * u
44 | 	dv_dt = -g .* tmp - b .* v
45 | 
46 | 	H = 0
47 | 	tmp1 = ((eta .+ H) .* u) 
48 | 	tmp2 = tmp 1 .* v
49 | 
50 | 	roll1 = circshift(tmp1, (0,-1))
51 | 	roll2 = circshift(tmp1, (0,1))
52 | 	tmp1 = -1 * (roll1 .- roll2) ./ (grid_spacing * 2.0)
53 | 
54 | 	roll1 = circshift(tmp, (0,-1))
55 | 	roll2 = circshift(tmp, (0,1))
56 | 	tmp = tmp1 .- ((roll1 .- roll2) ./ (grid_spacing * 2.0))
57 | 
58 | 	# On to line 106
59 | end


--------------------------------------------------------------------------------
/Banyan/src/queues.jl:
--------------------------------------------------------------------------------
 1 | #using AWSCore
 2 | #using AWSSQS
 3 | 
 4 | function get_scatter_queue(job_id::JobId)
 5 |     return sqs_get_queue(
 6 |         get_aws_config(),
 7 |         string("banyan_", job_id, "_scatter.fifo"),
 8 |     )
 9 | end
10 | 
11 | function get_gather_queue(job_id::JobId)
12 |     return sqs_get_queue(
13 |         get_aws_config(),
14 |         string("banyan_", job_id, "_gather.fifo"),
15 |     )
16 | end
17 | 
18 | function receive_next_message(queue_name)
19 |     m = sqs_receive_message(queue_name)
20 |     while isnothing(m)
21 |         m = sqs_receive_message(queue_name)
22 |     end
23 |     content = m[:message]
24 |     sqs_delete_message(queue_name, m)
25 |     if startswith(content, "EVALUATION_END")
26 |         @debug "Received evaluation end"
27 |         println(content[15:end])
28 |         response = Dict{String,Any}("kind" => "EVALUATION_END")
29 |         response["end"] = (endswith(content, "MESSAGE_END"))
30 |         # TODO: Maybe truncate by chopping off the MESSAGE_END
31 |         response
32 |     elseif startswith(content, "JOB_FAILURE")
33 |         @debug "Job failed"
34 |         global current_job_status
35 |         current_job_status = "failed"
36 |         # TODO: Document why the 12 here is necessary
37 |         println(content[12:end])
38 |         error("Job failed; see preceding output")
39 |     else
40 |         @debug "Received scatter or gather request"
41 |         JSON.parse(content)
42 |     end
43 | end
44 | 
45 | function send_message(queue_name, message)
46 |     sqs_send_message(
47 |         queue_name,
48 |         message,
49 |         (:MessageGroupId, "1"),
50 |         (:MessageDeduplicationId, generate_message_id()),
51 |     )
52 | end
53 | 


--------------------------------------------------------------------------------
/BanyanArrays/test/test_l3.jl:
--------------------------------------------------------------------------------
 1 | function matmul()
 2 | 
 3 |     # Create data
 4 |     n = future(Int32(15e2))
 5 |     m = future(Int32(10e3))
 6 |     p = future(Int32(5e3))
 7 |     A = future() # n x m
 8 |     B = future() # m x p
 9 | 
10 |     # Where the data is located
11 |     val(n)
12 |     val(m)
13 |     val(p)
14 |     mem(A, Int64(evaluate(n) * evaluate(m)), Float64)
15 |     mem(B, Int64(evaluate(m) * evaluate(p)), Float64)
16 | 
17 |     # How the data is partitioned
18 |     pt(n, Div())
19 |     pt(m, Replicate())
20 |     pt(p, Div())
21 |     pt(A, Block(1))
22 |     pt(B, Block(2))
23 |     pc(Cross(A, B))
24 |     pc(Co(A, n))
25 |     pc(Co(B, p))
26 | 
27 |     mutated(A)
28 |     mutated(B)
29 | 
30 |     @partitioned A B n m p begin
31 |         A = fill(1, (Int64(n), Int64(m)))
32 |         B = fill(2, (Int64(m), Int64(p)))
33 |     end
34 | 
35 |     C = future()
36 |     mem(C, Int64(evaluate(n) * evaluate(p)), Float64)
37 | 
38 |     pt(A, Block(1))
39 |     pt(B, Block(2))
40 |     pt(C, [Block(1), Block(2)])
41 |     pc(Cross(A, B))
42 |     pc(Cross((C, 1), (C, 2))) # Redundant but currently required to assert order of splitting
43 |     pc(Equal((C, 1), (C, 2)))
44 |     pc(Co((C, 1), A))
45 |     pc(Co((C, 2), B))
46 | 
47 |     mutated(C)
48 | 
49 |     @partitioned A B C begin
50 |         C = A * B
51 |     end
52 | 
53 |     # C_new = future()
54 |     # located(C_new, C.location)
55 |     # mutated(C_new)
56 | 
57 |     # pt(C, [Block(1), Block(2)])
58 |     # pc(Cross((C, 1), (C, 2)))
59 |     # pt(C_new, BlockMulti([1, 2]))
60 | 
61 |     # @partitioned C C_new begin
62 |     #     C_new = C
63 |     # end
64 |     # C = C_new
65 | 
66 |     evaluate(C)
67 | end
68 | 
69 | @testset "Matrix multiplication" begin
70 |     runtest("Matrix Multiplication", j -> begin
71 |         matmul()
72 |     end)
73 | end
74 | 


--------------------------------------------------------------------------------
/docs/3-Cluster_creation.md:
--------------------------------------------------------------------------------
 1 | # Create a cluster
 2 | 
 3 | ## Parameters for Cluster Creation
 4 | 
 5 | To create a cluster, you must provide the following information:
 6 | 
 7 | Required Paramters
 8 | * `cluster_name` - name of cluster to create
 9 | * `username` - username already registered with Banyan
10 | * `ec2_key_pair` - name of AWS EC2 Key Pair to SSH into the head node of the cluster
11 | * `num_nodes` - maximu number of nodes in cluster
12 | * `instance_type` - AWS EC2 instance type (one of `t3.large`, `t3.xlarge`, `t3.2xlarge`, `m4.4xlarge`, `m4.10xlarge`, `c5.2xlarge`, `c5.4xlarge`)
13 | 
14 | Optional Parameters
15 | * `additional_policy` - name of AWS IAM Policy that user created for the cluster
16 | * `banyanfile` - Banyanfile describing how to set up cluster and jobs (format described below)
17 | * `s3_read_write_resource` - ARN of AWS S3 bucket in user account that cluster can access (e.g., to pull source code from or write logs/results back to)
18 | 
19 | 
20 | ## Format of Banyanfile
21 | 
22 | The format of a Banyanfile is as follows:
23 | ```json
24 | {
25 |     "include": [],
26 |     "require": {
27 |         "language": "jl"|"py",
28 |         "cluster": {
29 |             "commands": ["string"],
30 |             "packages": ["string",],
31 |             "pt_lib_info": "string",
32 |             "pt_lib": "string"
33 |         },
34 |         "job": {
35 |           "code": ["string"]
36 |         }
37 |     }
38 | }
39 | ```
40 | * **include** (list)  
41 | List of paths to other Banyanfiles to include, or the actual Banyanfile dictionaries
42 | * **require** (dict)
43 |   * **language** (string)  
44 |   Language used. Currently supporting Julia ("jl")
45 |   * **cluster** (dict)
46 |     * **commands** (list)  
47 |     List of commands to execute on creation of cluster
48 |     * **packages** (list)  
49 |     List of language-dependent packages to install
50 |     * **pt_lib_info** (dict or string)  
51 |     Path to pt_lib_info json file or actual pt_lib_info dict
52 |     * **pt_lib** (string)  
53 |     Optional path to pt_lib
54 |   * **jobs** (dict)  
55 |     * **code** (list)  
56 |     List of lines to code to be executed on creation of a job in this cluster
57 | 


--------------------------------------------------------------------------------
/Banyan/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | # NOTE: This file is to be copied across the `runtests.jl` of all Banyan Julia
 2 | # projects
 3 | 
 4 | using Test
 5 | 
 6 | function include_tests_to_run(args...)
 7 |     clear_jobs()
 8 |     for arg in args
 9 |         include(arg)
10 |     end
11 | end
12 | 
13 | get_enabled_tests() = lowercase.(ARGS)
14 | 
15 | # NOTE: For testing, please provide the following:
16 | # - AWS_DEFAULT_PROFILE (if you don't already have the desired default AWS account)
17 | # - BANYAN_USERNAME
18 | # - BANYAN_USER_ID
19 | # - BANYAN_API_KEY
20 | # - BANYAN_CLUSTER_NAME
21 | # - BANYAN_NWORKERS
22 | # - BANYAN_NWORKERS_ALL
23 | # 
24 | # If these are not specified, we will only run tests that don't require a
25 | # configured job to be created first.
26 | 
27 | function run_with_job(test_fn, name)
28 |     # This function should be used for tests that need a job to be already
29 |     # created to run. We look at environment variables for a specification for
30 |     # how to authenticate and what cluster to run on
31 | 
32 |     username = get(ENV, "BANYAN_USERNAME", nothing)
33 |     user_id = get(ENV, "BANYAN_USER_ID", nothing)
34 |     api_key = get(ENV, "BANYAN_API_KEY", nothing)
35 |     cluster_name = get(ENV, "BANYAN_CLUSTER_NAME", nothing)
36 |     nworkers = get(ENV, "BANYAN_NWORKERS", nothing)
37 | 
38 |     if isempty(get_enabled_tests()) ||
39 |        any([occursin(t, lowercase(name)) for t in get_enabled_tests()])
40 |         if get(ENV, "BANYAN_NWORKERS_ALL", "false") == "true"
41 |             for nworkers in [16, 8, 4, 2, 1]
42 |                 with_job(
43 |                     username = username,
44 |                     user_id = user_id,
45 |                     api_key = api_key,
46 |                     cluster_name = cluster_name,
47 |                     nworkers = parse(Int32, nworkers),
48 |                     banyanfile_path = "file://res/Banyanfile.json",
49 |                 ) do j
50 |                     test_fn(j)
51 |                 end
52 |             end
53 |         elseif !isnothing(nworkers)
54 |             with_job(
55 |                 username = username,
56 |                 api_key = api_key,
57 |                 cluster_name = cluster_name,
58 |                 nworkers = parse(Int32, nworkers),
59 |                 banyanfile_path = "file://res/Banyanfile.json",
60 |                 user_id = user_id,
61 |             ) do j
62 |                 test_fn(j)
63 |             end
64 |         end
65 |     end
66 | end
67 | 
68 | function run(test_fn, name)
69 |     # This function should be used for tests that test cluster/job managemnt
70 |     # and so they only need environment variables to dictate how to
71 |     # authenticate. These can be read in from ENV on a per-test basis.
72 | 
73 |     if isempty(get_enabled_tests()) ||
74 |        any([occursin(t, lowercase(name)) for t in get_enabled_tests()])
75 |         test_fn()
76 |     end
77 | end
78 | 
79 | include_tests_to_run("test_cluster.jl")


--------------------------------------------------------------------------------
/BanyanArrays/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | # NOTE: This file is to be copied across the `runtests.jl` of all Banyan Julia
 2 | # projects
 3 | 
 4 | using Test
 5 | using Banyan
 6 | using BanyanArrays
 7 | 
 8 | function include_tests_to_run(args...)
 9 |     clear_jobs()
10 |     for arg in args
11 |         include(arg)
12 |     end
13 | end
14 | 
15 | get_enabled_tests() = lowercase.(ARGS)
16 | 
17 | # NOTE: For testing, please provide the following:
18 | # - AWS_DEFAULT_PROFILE (if you don't already have the desired default AWS account)
19 | # - BANYAN_USERNAME
20 | # - BANYAN_USER_ID
21 | # - BANYAN_API_KEY
22 | # - BANYAN_CLUSTER_NAME
23 | # - BANYAN_NWORKERS
24 | # - BANYAN_NWORKERS_ALL
25 | # 
26 | # If these are not specified, we will only run tests that don't require a
27 | # configured job to be created first.
28 | 
29 | function run_with_job(test_fn, name)
30 |     # This function should be used for tests that need a job to be already
31 |     # created to run. We look at environment variables for a specification for
32 |     # how to authenticate and what cluster to run on
33 | 
34 |     username = get(ENV, "BANYAN_USERNAME", nothing)
35 |     user_id = get(ENV, "BANYAN_USER_ID", nothing)
36 |     api_key = get(ENV, "BANYAN_API_KEY", nothing)
37 |     cluster_name = get(ENV, "BANYAN_CLUSTER_NAME", nothing)
38 |     nworkers = get(ENV, "BANYAN_NWORKERS", nothing)
39 | 
40 |     if isempty(get_enabled_tests()) ||
41 |        any([occursin(t, lowercase(name)) for t in get_enabled_tests()])
42 |         if get(ENV, "BANYAN_NWORKERS_ALL", "false") == "true"
43 |             for nworkers in [16, 8, 4, 2, 1]
44 |                 with_job(
45 |                     username = username,
46 |                     user_id = user_id,
47 |                     api_key = api_key,
48 |                     cluster_name = cluster_name,
49 |                     nworkers = parse(Int32, nworkers),
50 |                     banyanfile_path = "file://res/Banyanfile.json",
51 |                 ) do j
52 |                     test_fn(j)
53 |                 end
54 |             end
55 |         elseif !isnothing(nworkers)
56 |             with_job(
57 |                 username = username,
58 |                 api_key = api_key,
59 |                 cluster_name = cluster_name,
60 |                 nworkers = parse(Int32, nworkers),
61 |                 banyanfile_path = "file://res/Banyanfile.json",
62 |                 user_id = user_id,
63 |             ) do j
64 |                 test_fn(j)
65 |             end
66 |         end
67 |     end
68 | end
69 | 
70 | function run(test_fn, name)
71 |     # This function should be used for tests that test cluster/job managemnt
72 |     # and so they only need environment variables to dictate how to
73 |     # authenticate. These can be read in from ENV on a per-test basis.
74 | 
75 |     if isempty(get_enabled_tests()) ||
76 |        any([occursin(t, lowercase(name)) for t in get_enabled_tests()])
77 |         test_fn()
78 |     end
79 | end
80 | 
81 | include_tests_to_run("test_simple.jl")


--------------------------------------------------------------------------------
/BanyanDataFrames/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | # NOTE: This file is to be copied across the `runtests.jl` of all Banyan Julia
 2 | # projects
 3 | 
 4 | using Test
 5 | using Banyan
 6 | using BanyanArrays
 7 | using BanyanDataFrames
 8 | 
 9 | using Statistics
10 | 
11 | function include_tests_to_run(args...)
12 |     clear_jobs()
13 |     for arg in args
14 |         include(arg)
15 |     end
16 | end
17 | 
18 | get_enabled_tests() = lowercase.(ARGS)
19 | 
20 | # NOTE: For testing, please provide the following:
21 | # - AWS_DEFAULT_PROFILE (if you don't already have the desired default AWS account)
22 | # - BANYAN_USERNAME
23 | # - BANYAN_USER_ID
24 | # - BANYAN_API_KEY
25 | # - BANYAN_CLUSTER_NAME
26 | # - BANYAN_NWORKERS
27 | # - BANYAN_NWORKERS_ALL
28 | # 
29 | # If these are not specified, we will only run tests that don't require a
30 | # configured job to be created first.
31 | 
32 | function run_with_job(test_fn, name)
33 |     # This function should be used for tests that need a job to be already
34 |     # created to run. We look at environment variables for a specification for
35 |     # how to authenticate and what cluster to run on
36 | 
37 |     username = get(ENV, "BANYAN_USERNAME", nothing)
38 |     user_id = get(ENV, "BANYAN_USER_ID", nothing)
39 |     api_key = get(ENV, "BANYAN_API_KEY", nothing)
40 |     cluster_name = get(ENV, "BANYAN_CLUSTER_NAME", nothing)
41 |     nworkers = get(ENV, "BANYAN_NWORKERS", nothing)
42 | 
43 |     if isempty(get_enabled_tests()) ||
44 |        any([occursin(t, lowercase(name)) for t in get_enabled_tests()])
45 |         if get(ENV, "BANYAN_NWORKERS_ALL", "false") == "true"
46 |             for nworkers in [16, 8, 4, 2, 1]
47 |                 with_job(
48 |                     username = username,
49 |                     user_id = user_id,
50 |                     api_key = api_key,
51 |                     cluster_name = cluster_name,
52 |                     nworkers = parse(Int32, nworkers),
53 |                     banyanfile_path = "file://res/Banyanfile.json",
54 |                 ) do j
55 |                     test_fn(j)
56 |                 end
57 |             end
58 |         elseif !isnothing(nworkers)
59 |             with_job(
60 |                 username = username,
61 |                 api_key = api_key,
62 |                 cluster_name = cluster_name,
63 |                 nworkers = parse(Int32, nworkers),
64 |                 banyanfile_path = "file://res/Banyanfile.json",
65 |                 user_id = user_id,
66 |             ) do j
67 |                 test_fn(j)
68 |             end
69 |         end
70 |     end
71 | end
72 | 
73 | function run(test_fn, name)
74 |     # This function should be used for tests that test cluster/job managemnt
75 |     # and so they only need environment variables to dictate how to
76 |     # authenticate. These can be read in from ENV on a per-test basis.
77 | 
78 |     if isempty(get_enabled_tests()) ||
79 |        any([occursin(t, lowercase(name)) for t in get_enabled_tests()])
80 |         test_fn()
81 |     end
82 | end
83 | 
84 | include_tests_to_run("test_simple.jl")


--------------------------------------------------------------------------------
/benchmarking/benchmark_results.md:
--------------------------------------------------------------------------------
  1 | # Benchmarking Results
  2 | 
  3 | ## Black Scholes
  4 | 
  5 | The following table shows the runtime for Black Scholes, averaged over 5 trials, using input data size of 1e9.
  6 | 
  7 | | Num Workers | Sequential | Parallelized Without Cache Optimization | Parallelized With Cache Optimization | Parallelized with L1 Cache Optimization |
  8 | | :---: | :---: | :---: | :---: | :---: |
  9 | | 1 | 232.878 | 237.947 | 103.517 | 93.652 |
 10 | | 2 | __ | 163.376 | 71.561 | 81.079 |
 11 | | 4 | __ | 85.370 | 36.956 | 40.485 |
 12 | | 8 | __ | 42.892 | 18.404 | 19.570 |
 13 | | 16 | __ | 22.544 | 9.756 | 10.368 |
 14 | 
 15 | 
 16 | ![alt text](blackscholes_runtime.png)
 17 | 
 18 | <!-- The following table shows the slurm job id, for reference.
 19 | 
 20 | | Num Workers | Parallelized Without Cache Optimization | Parallelized With L1 Cache Optimization | Parallelized With L1 Cache Optimization |
 21 | | :---: | :---: | :---: | :---: |
 22 | | 1 | 6 | 630 | 640 |
 23 | | 2 | 6 | 629 | 639 |
 24 | | 4 | 6 | 628 | 638 |
 25 | | 8 | 6 | 627 | 637 |
 26 | | 16 | 6 | 626 | 636 | -->
 27 | 
 28 | 630 - 0:67, 0:3765
 29 | 640 - 0:67, 0:117648
 30 | 
 31 | 629 - 0:34, 0:3658
 32 | 639 - 0:34, 0:114286
 33 | 
 34 | 
 35 | 
 36 | 
 37 | <!-- | Num Workers | Sequential | Parallelized Without Cache Optimization | Parallelized With Cache Optimization |
 38 | | :---: | :---: | :---: | :---: |
 39 | | 1 | 232.878 | 237.947 | 103.526 |
 40 | | 2 | __ | 163.376 | 83.300 | 
 41 | | 4 | __ | 85.370 | 41.891 |
 42 | | 8 | __ | 42.892 | 21.776 |
 43 | | 16 | __ | 22.544 | 10.871 |
 44 | 
 45 | 
 46 | ![alt text](blackscholes_runtime.png)
 47 | 
 48 | <!-- The following table shows the slurm job id, for reference.
 49 | 
 50 | | Num Workers | Parallelized Without Cache Optimization | Parallelized With Cache Optimization |
 51 | | :---: | :---: | :---: |
 52 | | 1 | 616 | 611 |
 53 | | 2 | 615 | 610 |
 54 | | 4 | 614 | 609 |
 55 | | 8 | 613 | 608 |
 56 | | 16 | 612 | 607 | --> -->
 57 | 
 58 | 
 59 | ## Matrix Multiplication
 60 | 
 61 | The following table shows the runtime (s) for matrix multiplication A x B, averaged over 5 trials, where the dimensions of A and B are (1500, 10,000) and (10,000, 5000) respectively.
 62 | 
 63 | 
 64 | 
 65 | | Num Workers | Sequential | Parallelized Without Cache Optimization | Parallelized With L2 Cache Optimization |
 66 | | :---: | :---: | :---: | :---: |
 67 | | 1 | 40.920 | 31.543 | 96.275 |
 68 | | 2 | __ | 26.172 | 68.332 | 
 69 | | 4 | __ | 13.388 | 37.093 |
 70 | | 8 | __ | 7.380 | 18.993 |
 71 | | 16 | __ | 3.964 | 10.289 |
 72 | 
 73 | ![alt text](matmul_runtime.png)
 74 | 
 75 | 
 76 | <!-- The following table shows the slurm job id, for reference.
 77 | 
 78 | | Num Workers | Parallelized Without Cache Optimization | Parallelized With Cache Optimization |
 79 | | :---: | :---: | :---: |
 80 | | 1 | 598 | 635 |
 81 | | 2 | 597 | 634 |
 82 | | 4 | 596 | 633 |
 83 | | 8 | 593 | 632 |
 84 | | 16 | 592 | 631 |
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | <!-- | Num Workers | Sequential | Parallelized Without Cache Optimization | Parallelized With Cache Optimization |
 95 | | :---: | :---: | :---: | :---: |
 96 | | 1 | 40.920 | 31.543 | 31.956 |
 97 | | 2 | __ | 26.172 | 28.699 | 
 98 | | 4 | __ | 13.388 | 14.735 |
 99 | | 8 | __ | 7.380 | 6.829 |
100 | | 16 | __ | 3.964 | 3.951 |
101 | 
102 | 
103 | <!-- The following table shows the slurm job id, for reference.
104 | 
105 | | Num Workers | Parallelized Without Cache Optimization | Parallelized With Cache Optimization |
106 | | :---: | :---: | :---: |
107 | | 1 | 598 | 603 |
108 | | 2 | 597 | 602 |
109 | | 4 | 596 | 601 |
110 | | 8 | 593 | 600 |
111 | | 16 | 592 | 599 | -->
112 | 
113 | 597 - 1, 3, 1, 1  
114 | 602 - 1, 3, 8, 8  
115 | 
116 | 
117 | 
118 | jobs 619 (4), 620 (2), 621 (1) -->
119 | 
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/Banyan/src/futures.jl:
--------------------------------------------------------------------------------
  1 | ###########
  2 | # Futures #
  3 | ###########
  4 | 
  5 | """
  6 |     Future()
  7 |     Future(value::Any)
  8 |     Future(location::Location)
  9 |     Future(; kwargs...)
 10 | 
 11 | Constructs a new future, representing a value that has not yet been evaluated.
 12 | """
 13 | function Future(location::Location = None(); mutate_from::Union{<:AbstractFuture,Nothing}=nothing)
 14 |     # Generate new value id
 15 |     value_id = generate_value_id()
 16 | 
 17 |     # Create new Future and assign a location to it
 18 |     new_future = Future(nothing, value_id, false, false)
 19 |     sourced(new_future, location)
 20 |     destined(new_future, None())
 21 | 
 22 |     # TODO: Add Size location here if needed
 23 |     # Handle locations that have an associated value
 24 |     if location.src_name in ["None", "Client", "Value"]
 25 |         new_future.value = location.sample.value
 26 |         new_future.stale = false
 27 |     end
 28 |     
 29 |     if !isnothing(mutate_from)
 30 |         # Indicate that this future is the result of an in-place mutation of
 31 |         # some other value
 32 |         mutated(mutate_from, new_future)
 33 |     elseif location.src_name == "None"
 34 |         # For convenience, if a future is constructed with no location to
 35 |         # split from, we assume it will be mutated in the next code region
 36 |         # and mark it as mutated. This is pretty common since often when
 37 |         # we are creating new futures with None location it is as an
 38 |         # intermediate variable to store the result of some code region.
 39 |         # 
 40 |         # Mutation can also be specified manually with mutate=true|false in
 41 |         # `partition` or implicitly through `Future` constructors
 42 |         mutated(new_future)
 43 |     end
 44 | 
 45 |     new_future
 46 | end
 47 | 
 48 | function Future(value::Any)
 49 |     location = if Base.summarysize(value) ≤ 4 * 1024
 50 |         Value(value)
 51 |     else
 52 |         Client(value)
 53 |     end
 54 | 
 55 |     # Create future, store value, and return
 56 |     Future(location)
 57 | end
 58 | 
 59 | """
 60 |     Future(future::AbstractFuture)
 61 | 
 62 | Constructs a future from a future that was already created.
 63 | 
 64 | If the given future has not had its value mutated (meaning that the value
 65 | stored with it here on the client is the most up-to-date version of it), we use
 66 | its value to construct a new future from a copy of the value.
 67 | 
 68 | However, if the future has been mutated by some code region that has already
 69 | been recorded, we construct a new future with location `None` and mark it as
 70 | mutated. This is because presumably in the case that we _can't_ copy over the
 71 | given future, we would want to assign to it in the upcoming code region where
 72 | it's going to be used.
 73 | """
 74 | function Future(fut::AbstractFuture, mutation::Function=identity)
 75 |     fut = convert(Future, fut)
 76 |     if !fut.stale
 77 |         # Copy over value
 78 |         new_future = Future(
 79 |             deepcopy(mutation(fut.value)),
 80 |             generate_value_id(),
 81 |             # If the future is not stale, it is not mutated in a way where
 82 |             # a further `compute` is needed. So we can just copy its value.
 83 |             false,
 84 |             false
 85 |         )
 86 | 
 87 |         # Copy over location
 88 |         located(new_future, deepcopy(get_location(fut)))
 89 | 
 90 |         new_future
 91 |     else
 92 |         Future()
 93 |     end
 94 | end
 95 | 
 96 | # convert(::Type{Future}, value::Any) = Future(value)
 97 | convert(::Type{Future}, fut::Future) = fut
 98 | 
 99 | get_location(fut::AbstractFuture) = get(get_job().locations, convert(Future, fut).value_id, nothing)
100 | get_location(value_id::ValueId) = get(get_job().locations, value_id, nothing)
101 | get_future(value_id::ValueId) = get_job().futures_on_client[value_id]
102 | 


--------------------------------------------------------------------------------
/Banyan/src/Banyan.jl:
--------------------------------------------------------------------------------
  1 | # The Banyan client for Julia has 5 key parts:
  2 | # - Job
  3 | # - Future
  4 | # - Location, src, dst, loc
  5 | # - pt, pc
  6 | # - @partitioned
  7 | 
  8 | module Banyan
  9 | 
 10 | using FilePathsBase: joinpath, isempty
 11 | using Base: notnothing, env_project_file
 12 | global BANYAN_API_ENDPOINT
 13 | 
 14 | # TODO: Remove this
 15 | # export create_job,
 16 | #     destroy_job,
 17 | #     JobRequest,
 18 | #     set_cluster_id,
 19 | #     set_job_request,
 20 | #     get_job_id,
 21 | #     evaluate,
 22 | #     record_request,
 23 | #     send_request_get_response
 24 | # export Future
 25 | # export PartitionAnnotation,
 26 | #      PartitionType,
 27 | #      PartitioningConstraint,
 28 | #      PartitioningConstraints,
 29 | #      Partitions
 30 | # export LocationType
 31 | # export DelayedTask
 32 | 
 33 | # export @pa, @pp, @lt, @src, @dst
 34 | # export pa_noconstraints
 35 | # export Div, Block, Stencil
 36 | # export HDF5, Value, Client
 37 | # export Cross
 38 | # # export Const, Mut
 39 | 
 40 | # include("id.jl")
 41 | # include("utils.jl")
 42 | # include("jobs.jl")
 43 | # include("locations.jl")
 44 | # include("futures.jl")
 45 | # include("partitions.jl")
 46 | # include("queues.jl")
 47 | # include("tasks.jl")
 48 | # include("pa_constructors.jl")
 49 | # include("pt_constructors.jl")
 50 | # include("lt_constructors.jl")
 51 | # include("constraint_constructors.jl")
 52 | # include("macros.jl")
 53 | # include("evaluation.jl")
 54 | 
 55 | # Account management
 56 | export configure
 57 | 
 58 | # Cluster management
 59 | export Cluster,
 60 |     create_cluster, update_cluster, destroy_cluster, get_clusters, get_cluster, assert_cluster_is_ready
 61 | 
 62 | # Job management
 63 | export Job, with_job, create_job, destroy_job, destroy_all_jobs, clear_jobs, get_jobs
 64 | 
 65 | # Futures
 66 | export AbstractFuture, Future, compute, collect
 67 | 
 68 | # Samples
 69 | export Sample, ExactSample, sample, setsample!
 70 | export sample_memory_usage,
 71 |     sample_axes,
 72 |     sample_keys,
 73 |     sample_divisions,
 74 |     sample_percentile,
 75 |     sample_max_ngroups,
 76 |     sample_min,
 77 |     sample_max
 78 | 
 79 | # Locations
 80 | export Location, LocationSource, LocationDestination, located, sourced, destined
 81 | export Value, Size, Client, None, Remote
 82 | 
 83 | # Partition types
 84 | export PartitionType, pt, pc, mutated, @partitioned
 85 | export Any,
 86 |     Replicating,
 87 |     Replicated,
 88 |     Divided,
 89 |     Syncing,
 90 |     Reducing,
 91 |     ReducingWithKey,
 92 |     Distributing,
 93 |     Blocked,
 94 |     Grouped,
 95 |     ScaledBySame,
 96 |     Drifted,
 97 |     Balanced,
 98 |     Unbalanced,
 99 |     Blocked,
100 |     Grouped
101 | 
102 | # Partitioning constraints
103 | export Co, Cross, Equal, Sequential, Match, MatchOn, AtMost, ScaleBy
104 | 
105 | # Annotations
106 | export partitioned_using,
107 |     partitioned_with,
108 |     keep_all_sample_keys,
109 |     keep_all_sample_keys_renamed,
110 |     keep_sample_keys_named,
111 |     keep_sample_keys,
112 |     keep_sample_rate
113 | 
114 | using AWS: _get_ini_value
115 | using AWSCore
116 | using AWSS3
117 | using AWSSQS
118 | using Base64
119 | using HTTP
120 | using JSON
121 | using Random
122 | using Serialization
123 | using TOML
124 | 
125 | using FileIO
126 | using FilePathsBase
127 | using IniFile
128 | 
129 | using IterTools
130 | 
131 | # TODO: Move locations, samples, and parts of pt_lib.jl and pt_lib_info.json
132 | # into their respective libraries where they can be specialized
133 | using HDF5, CSV, Parquet, Arrow, DataFrames
134 | 
135 | # Jobs
136 | include("id.jl")
137 | include("utils.jl")
138 | include("utils_abstract_types.jl")
139 | include("queues.jl")
140 | include("jobs.jl")
141 | include("clusters.jl")
142 | 
143 | # Futures
144 | include("future.jl")
145 | include("samples.jl")
146 | include("locations.jl")
147 | include("futures.jl")
148 | 
149 | # Annotation
150 | include("partitions.jl")
151 | include("pt_lib_constructors.jl")
152 | include("tasks.jl")
153 | include("annotation.jl")
154 | 
155 | # Utilities
156 | include("requests.jl")
157 | 
158 | # Job (using locations and futures)
159 | include("job.jl")
160 | 
161 | function __init__()
162 |     # The user must provide the following for authentication:
163 |     # - Username
164 |     # - API key
165 |     # - AWS credentials
166 |     # - SSH key pair (used in cluster creation)
167 | 
168 |     global BANYAN_API_ENDPOINT
169 |     BANYAN_API_ENDPOINT = "https://hcohsbhhzf.execute-api.us-west-2.amazonaws.com/dev/"
170 | 
171 |     load_config()
172 | end
173 | 
174 | end # module
175 | 


--------------------------------------------------------------------------------
/Banyan/test/data/iris.csv:
--------------------------------------------------------------------------------
  1 | "sepal.length","sepal.width","petal.length","petal.width","variety"
  2 | 5.1,3.5,1.4,.2,"Setosa"
  3 | 4.9,3,1.4,.2,"Setosa"
  4 | 4.7,3.2,1.3,.2,"Setosa"
  5 | 4.6,3.1,1.5,.2,"Setosa"
  6 | 5,3.6,1.4,.2,"Setosa"
  7 | 5.4,3.9,1.7,.4,"Setosa"
  8 | 4.6,3.4,1.4,.3,"Setosa"
  9 | 5,3.4,1.5,.2,"Setosa"
 10 | 4.4,2.9,1.4,.2,"Setosa"
 11 | 4.9,3.1,1.5,.1,"Setosa"
 12 | 5.4,3.7,1.5,.2,"Setosa"
 13 | 4.8,3.4,1.6,.2,"Setosa"
 14 | 4.8,3,1.4,.1,"Setosa"
 15 | 4.3,3,1.1,.1,"Setosa"
 16 | 5.8,4,1.2,.2,"Setosa"
 17 | 5.7,4.4,1.5,.4,"Setosa"
 18 | 5.4,3.9,1.3,.4,"Setosa"
 19 | 5.1,3.5,1.4,.3,"Setosa"
 20 | 5.7,3.8,1.7,.3,"Setosa"
 21 | 5.1,3.8,1.5,.3,"Setosa"
 22 | 5.4,3.4,1.7,.2,"Setosa"
 23 | 5.1,3.7,1.5,.4,"Setosa"
 24 | 4.6,3.6,1,.2,"Setosa"
 25 | 5.1,3.3,1.7,.5,"Setosa"
 26 | 4.8,3.4,1.9,.2,"Setosa"
 27 | 5,3,1.6,.2,"Setosa"
 28 | 5,3.4,1.6,.4,"Setosa"
 29 | 5.2,3.5,1.5,.2,"Setosa"
 30 | 5.2,3.4,1.4,.2,"Setosa"
 31 | 4.7,3.2,1.6,.2,"Setosa"
 32 | 4.8,3.1,1.6,.2,"Setosa"
 33 | 5.4,3.4,1.5,.4,"Setosa"
 34 | 5.2,4.1,1.5,.1,"Setosa"
 35 | 5.5,4.2,1.4,.2,"Setosa"
 36 | 4.9,3.1,1.5,.2,"Setosa"
 37 | 5,3.2,1.2,.2,"Setosa"
 38 | 5.5,3.5,1.3,.2,"Setosa"
 39 | 4.9,3.6,1.4,.1,"Setosa"
 40 | 4.4,3,1.3,.2,"Setosa"
 41 | 5.1,3.4,1.5,.2,"Setosa"
 42 | 5,3.5,1.3,.3,"Setosa"
 43 | 4.5,2.3,1.3,.3,"Setosa"
 44 | 4.4,3.2,1.3,.2,"Setosa"
 45 | 5,3.5,1.6,.6,"Setosa"
 46 | 5.1,3.8,1.9,.4,"Setosa"
 47 | 4.8,3,1.4,.3,"Setosa"
 48 | 5.1,3.8,1.6,.2,"Setosa"
 49 | 4.6,3.2,1.4,.2,"Setosa"
 50 | 5.3,3.7,1.5,.2,"Setosa"
 51 | 5,3.3,1.4,.2,"Setosa"
 52 | 7,3.2,4.7,1.4,"Versicolor"
 53 | 6.4,3.2,4.5,1.5,"Versicolor"
 54 | 6.9,3.1,4.9,1.5,"Versicolor"
 55 | 5.5,2.3,4,1.3,"Versicolor"
 56 | 6.5,2.8,4.6,1.5,"Versicolor"
 57 | 5.7,2.8,4.5,1.3,"Versicolor"
 58 | 6.3,3.3,4.7,1.6,"Versicolor"
 59 | 4.9,2.4,3.3,1,"Versicolor"
 60 | 6.6,2.9,4.6,1.3,"Versicolor"
 61 | 5.2,2.7,3.9,1.4,"Versicolor"
 62 | 5,2,3.5,1,"Versicolor"
 63 | 5.9,3,4.2,1.5,"Versicolor"
 64 | 6,2.2,4,1,"Versicolor"
 65 | 6.1,2.9,4.7,1.4,"Versicolor"
 66 | 5.6,2.9,3.6,1.3,"Versicolor"
 67 | 6.7,3.1,4.4,1.4,"Versicolor"
 68 | 5.6,3,4.5,1.5,"Versicolor"
 69 | 5.8,2.7,4.1,1,"Versicolor"
 70 | 6.2,2.2,4.5,1.5,"Versicolor"
 71 | 5.6,2.5,3.9,1.1,"Versicolor"
 72 | 5.9,3.2,4.8,1.8,"Versicolor"
 73 | 6.1,2.8,4,1.3,"Versicolor"
 74 | 6.3,2.5,4.9,1.5,"Versicolor"
 75 | 6.1,2.8,4.7,1.2,"Versicolor"
 76 | 6.4,2.9,4.3,1.3,"Versicolor"
 77 | 6.6,3,4.4,1.4,"Versicolor"
 78 | 6.8,2.8,4.8,1.4,"Versicolor"
 79 | 6.7,3,5,1.7,"Versicolor"
 80 | 6,2.9,4.5,1.5,"Versicolor"
 81 | 5.7,2.6,3.5,1,"Versicolor"
 82 | 5.5,2.4,3.8,1.1,"Versicolor"
 83 | 5.5,2.4,3.7,1,"Versicolor"
 84 | 5.8,2.7,3.9,1.2,"Versicolor"
 85 | 6,2.7,5.1,1.6,"Versicolor"
 86 | 5.4,3,4.5,1.5,"Versicolor"
 87 | 6,3.4,4.5,1.6,"Versicolor"
 88 | 6.7,3.1,4.7,1.5,"Versicolor"
 89 | 6.3,2.3,4.4,1.3,"Versicolor"
 90 | 5.6,3,4.1,1.3,"Versicolor"
 91 | 5.5,2.5,4,1.3,"Versicolor"
 92 | 5.5,2.6,4.4,1.2,"Versicolor"
 93 | 6.1,3,4.6,1.4,"Versicolor"
 94 | 5.8,2.6,4,1.2,"Versicolor"
 95 | 5,2.3,3.3,1,"Versicolor"
 96 | 5.6,2.7,4.2,1.3,"Versicolor"
 97 | 5.7,3,4.2,1.2,"Versicolor"
 98 | 5.7,2.9,4.2,1.3,"Versicolor"
 99 | 6.2,2.9,4.3,1.3,"Versicolor"
100 | 5.1,2.5,3,1.1,"Versicolor"
101 | 5.7,2.8,4.1,1.3,"Versicolor"
102 | 6.3,3.3,6,2.5,"Virginica"
103 | 5.8,2.7,5.1,1.9,"Virginica"
104 | 7.1,3,5.9,2.1,"Virginica"
105 | 6.3,2.9,5.6,1.8,"Virginica"
106 | 6.5,3,5.8,2.2,"Virginica"
107 | 7.6,3,6.6,2.1,"Virginica"
108 | 4.9,2.5,4.5,1.7,"Virginica"
109 | 7.3,2.9,6.3,1.8,"Virginica"
110 | 6.7,2.5,5.8,1.8,"Virginica"
111 | 7.2,3.6,6.1,2.5,"Virginica"
112 | 6.5,3.2,5.1,2,"Virginica"
113 | 6.4,2.7,5.3,1.9,"Virginica"
114 | 6.8,3,5.5,2.1,"Virginica"
115 | 5.7,2.5,5,2,"Virginica"
116 | 5.8,2.8,5.1,2.4,"Virginica"
117 | 6.4,3.2,5.3,2.3,"Virginica"
118 | 6.5,3,5.5,1.8,"Virginica"
119 | 7.7,3.8,6.7,2.2,"Virginica"
120 | 7.7,2.6,6.9,2.3,"Virginica"
121 | 6,2.2,5,1.5,"Virginica"
122 | 6.9,3.2,5.7,2.3,"Virginica"
123 | 5.6,2.8,4.9,2,"Virginica"
124 | 7.7,2.8,6.7,2,"Virginica"
125 | 6.3,2.7,4.9,1.8,"Virginica"
126 | 6.7,3.3,5.7,2.1,"Virginica"
127 | 7.2,3.2,6,1.8,"Virginica"
128 | 6.2,2.8,4.8,1.8,"Virginica"
129 | 6.1,3,4.9,1.8,"Virginica"
130 | 6.4,2.8,5.6,2.1,"Virginica"
131 | 7.2,3,5.8,1.6,"Virginica"
132 | 7.4,2.8,6.1,1.9,"Virginica"
133 | 7.9,3.8,6.4,2,"Virginica"
134 | 6.4,2.8,5.6,2.2,"Virginica"
135 | 6.3,2.8,5.1,1.5,"Virginica"
136 | 6.1,2.6,5.6,1.4,"Virginica"
137 | 7.7,3,6.1,2.3,"Virginica"
138 | 6.3,3.4,5.6,2.4,"Virginica"
139 | 6.4,3.1,5.5,1.8,"Virginica"
140 | 6,3,4.8,1.8,"Virginica"
141 | 6.9,3.1,5.4,2.1,"Virginica"
142 | 6.7,3.1,5.6,2.4,"Virginica"
143 | 6.9,3.1,5.1,2.3,"Virginica"
144 | 5.8,2.7,5.1,1.9,"Virginica"
145 | 6.8,3.2,5.9,2.3,"Virginica"
146 | 6.7,3.3,5.7,2.5,"Virginica"
147 | 6.7,3,5.2,2.3,"Virginica"
148 | 6.3,2.5,5,1.9,"Virginica"
149 | 6.5,3,5.2,2,"Virginica"
150 | 6.2,3.4,5.4,2.3,"Virginica"
151 | 5.9,3,5.1,1.8,"Virginica"


--------------------------------------------------------------------------------
/Banyan/res/pt_lib_info.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "splits": {
  3 |         "ReturnNull": {
  4 |             "location_names": ["None"],
  5 |             "partition_memory_usage": "all"
  6 |         },
  7 |         "ReadBlock": {
  8 |             "expected": ["key"],
  9 |             "required": {"name": "Distributing", "distribution": "blocked", "id": "!"},
 10 |             "default": {},
 11 |             "location_names": ["Remote", "Disk"],
 12 |             "partition_memory_usage": "div"
 13 |         },
 14 |         "ReadGroup": {
 15 |             "expected": ["key", "divisions"],
 16 |             "required": {"name": "Distributing", "distribution": "grouped", "id": "!"},
 17 |             "default": {"rev": false},
 18 |             "location_names": ["Remote", "Disk"],
 19 |             "partition_memory_usage": "div"
 20 |         },
 21 |         "SplitBlock": {
 22 |             "expected": ["key"],
 23 |             "required": {"name": "Distributing", "distribution": "blocked", "id": "!"},
 24 |             "default": {},
 25 |             "location_names": ["Memory"],
 26 |             "partition_memory_usage": "div"
 27 |         },
 28 |         "SplitGroup": {
 29 |             "expected": ["key"],
 30 |             "required": {"name": "Distributing", "distribution": "grouped", "id": "!"},
 31 |             "default": {"rev": false},
 32 |             "location_names": ["Memory"],
 33 |             "partition_memory_usage": "div"
 34 |         },
 35 |         "CopyFrom": {
 36 |             "required": {"name": "Replicating", "dividing": false},
 37 |             "default": {},
 38 |             "location_names": ["Client", "Remote", "Value", "Disk", "Memory"],
 39 |             "partition_memory_usage": "all"
 40 |         },
 41 |         "Divide": {
 42 |             "expected": ["key"],
 43 |             "required": {"name": "Replicating", "dividing": true},
 44 |             "default": {},
 45 |             "location_names": ["Value", "Disk", "Memory"],
 46 |             "partition_memory_usage": "all"
 47 |         }
 48 |     },
 49 |     "merges": {
 50 |         "Write": {
 51 |             "required": {"name": "Distributing"},
 52 |             "default": {},
 53 |             "location_names": ["Remote", "Disk"]
 54 |         },
 55 |         "Merge": {
 56 |             "expected": ["key"],
 57 |             "required": {"name": "Distributing"},
 58 |             "default": {},
 59 |             "location_names": ["Memory"]
 60 |         },
 61 |         "CopyTo": {
 62 |             "required": {"name": "Replicating", "replication": "all"},
 63 |             "default": {},
 64 |             "location_names": ["Memory", "Disk", "Remote", "Client"]
 65 |         },
 66 |         "ReduceAndCopyTo": {
 67 |             "expected": ["reducer"],
 68 |             "required": {"name": "Replicating", "replication": null, "with_key": false},
 69 |             "default": {},
 70 |             "location_names": ["Memory", "Disk", "Remote"]
 71 |         },
 72 |         "ReduceWithKeyAndCopyTo": {
 73 |             "expected": ["key", "reducer"],
 74 |             "required": {"name": "Replicating", "replication": null, "with_key": true},
 75 |             "default": {},
 76 |             "location_names": ["Memory", "Disk", "Remote"]
 77 |         }
 78 |     },
 79 |     "casts": {
 80 |         "Reduce": {
 81 |             "src_expected": ["reducer"],
 82 |             "src_required": {"name": "Replicating", "replication": null, "with_key": false},
 83 |             "dst_required": {"name": "Replicating", "replication": "all"},
 84 |             "dst_default": {},
 85 |             "matching": []
 86 |         },
 87 |         "ReduceWithKey": {
 88 |             "src_expected": ["reducer", "key"],
 89 |             "src_required": {"name": "Replicating", "replication": null, "with_key": true},
 90 |             "dst_required": {"name": "Replicating", "replication": "all"},
 91 |             "dst_default": {},
 92 |             "matching": []
 93 |         },
 94 |         "Rebalance": {
 95 |             "src_required": {"name": "Distributing"},
 96 |             "dst_required": {"name": "Distributing", "distribution": "blocked", "balanced": true, "id": "!"},
 97 |             "dst_default": {},
 98 |             "matching": ["dim"]
 99 |         },
100 |         "Distribute": {
101 |             "src_required": {"name": "Replicating", "replication": "all"},
102 |             "dst_required": {"name": "Distributing", "distribution": "blocked", "id": "!"},
103 |             "dst_expected": ["key"],
104 |             "dst_default": {},
105 |             "matching": []
106 |         },
107 |         "Consolidate": {
108 |             "src_expected": ["key"],
109 |             "src_required": {"name": "Distributing"},
110 |             "dst_required": {"name": "Replicating", "replication": "all"},
111 |             "dst_default": {},
112 |             "matching": []
113 |         },
114 |         "Shuffle": {
115 |             "src_required": {"name": "Distributing"},
116 |             "dst_required": {"name": "Distributing", "distribution": "grouped", "id": "!"},
117 |             "dst_default": {"rev": false},
118 |             "dst_expected": ["key", "divisions"],
119 |             "matching": []
120 |         },
121 |         "DistributeAndShuffle": {
122 |             "src_required": {"name": "Replicating", "replication": "all"},
123 |             "dst_required": {"name": "Distributing", "distribution": "grouped", "id": "!"},
124 |             "dst_default": {"rev": false},
125 |             "dst_expected": ["key", "divisions"],
126 |             "matching": []
127 |         }
128 |     }
129 | }


--------------------------------------------------------------------------------
/Banyan/src/jobs.jl:
--------------------------------------------------------------------------------
  1 | ########
  2 | # Jobs #
  3 | ########
  4 | 
  5 | # Process-local dictionary mapping from job IDs to instances of `Job`
  6 | global jobs = Dict()
  7 | 
  8 | # TODO: Allow for different threads to use different jobs by making this
  9 | # thread-local. For now, we only allow a single `Job` for each process
 10 | # and no sharing between threads; i.e., the burden is on the user to make
 11 | # sure they are synchronizing access to the `Job` if using the same one from
 12 | # different threads.
 13 | # TODO: Allow for different threads to use the same job by wrapping each
 14 | # `Job` in `jobs` in a mutex to allow only one to use it at a time. Further
 15 | # modifications would be required to make sharing a job between threads
 16 | # ergonomic.
 17 | global current_job_id = nothing
 18 | global current_job_status = nothing
 19 | 
 20 | function set_job_id(job_id::Union{JobId, Nothing})
 21 |     global current_job_id
 22 |     current_job_id = job_id
 23 | end
 24 | 
 25 | function get_job_id()::JobId
 26 |     global current_job_id
 27 |     if isnothing(current_job_id)
 28 |         error("No job selected using `with_job` or `create_job` or `set_job_id`")
 29 |     end
 30 |     current_job_id
 31 | end
 32 | 
 33 | function get_job()
 34 |     global jobs
 35 |     jobs[get_job_id()]
 36 | end
 37 | 
 38 | get_cluster_name() = get_job().cluster_name
 39 | 
 40 | function create_job(;
 41 |     cluster_name::String = nothing,
 42 |     nworkers::Integer = 2,
 43 |     banyanfile_path::String = "",
 44 |     logs_location::String = "",
 45 |     sample_rate::Integer = nworkers,
 46 |     kwargs...,
 47 | )
 48 |     global jobs
 49 |     global current_job_id
 50 |     global current_job_status
 51 | 
 52 |     @debug "Creating job"
 53 |     if cluster_name == ""
 54 |         cluster_name = nothing
 55 |     end
 56 |     if banyanfile_path == ""
 57 |         banyanfile_path = nothing
 58 |     end
 59 |     if logs_location == ""
 60 |         logs_location = "client"
 61 |     end
 62 | 
 63 |     # Configure
 64 |     configure(; kwargs...)
 65 |     cluster_name = if isnothing(cluster_name)
 66 |         clusters = list_clusters()
 67 |         if length(clusters) == 0
 68 |             error("Failed to create job: you don't have any clusters created")
 69 |         end
 70 |         first(keys(clusters))
 71 |     else
 72 |         cluster_name
 73 |     end
 74 | 
 75 |     # Merge Banyanfile if provided
 76 |     job_configuration = Dict{String,Any}(
 77 |         "cluster_name" => cluster_name,
 78 |         "num_workers" => nworkers,
 79 |     	"logs_location" => "s3",  #logs_location,
 80 |     )
 81 |     if !isnothing(banyanfile_path)
 82 |         banyanfile = load_json(banyanfile_path)
 83 |         merge_banyanfile_with_defaults!(banyanfile)
 84 |         for included in banyanfile["include"]
 85 |             merge_banyanfile_with!(banyanfile, getnormpath(banyanfile_path, included), :job, :creation)
 86 |         end
 87 |         job_configuration["banyanfile"] = banyanfile
 88 |     end
 89 | 
 90 |     # Create the job
 91 |     @debug "Sending request for job creation"
 92 |     job_id = send_request_get_response(:create_job, job_configuration)
 93 |     job_id = job_id["job_id"]
 94 |     @debug "Creating job $job_id"
 95 | 
 96 |     # Store in global state
 97 |     current_job_id = job_id
 98 |     current_job_status = "running"
 99 |     jobs[current_job_id] = Job(cluster_name, current_job_id, nworkers, sample_rate)
100 | 
101 |     @debug "Finished creating job $job_id"
102 |     return job_id
103 | end
104 | 
105 | function destroy_job(job_id::JobId; failed = false, kwargs...)
106 |     global current_job_id
107 |     global current_job_status
108 | 
109 |     failed = false
110 |     if current_job_status == "failed"
111 |     	failed = true
112 |     end
113 | 
114 | 
115 |     # configure(; kwargs...)
116 | 
117 |     @debug "Destroying job $job_id"
118 |     send_request_get_response(
119 |         :destroy_job,
120 |         Dict{String,Any}("job_id" => job_id, "failed" => failed),
121 |     )
122 | 
123 |     # Remove from global state
124 |     if !isnothing(current_job_id) && get_job_id() == job_id
125 |         set_job_id(nothing)
126 |     end
127 |     delete!(jobs, job_id)
128 | end
129 | 
130 | function get_jobs(cluster_name=Nothing; kwargs...)
131 |     @debug "Downloading description of jobs in each cluster"
132 |     configure(; kwargs...)
133 |     filters = Dict()
134 |     if cluster_name != Nothing
135 |         filters["cluster_name"] = cluster_name
136 |     end
137 |     response =
138 |         send_request_get_response(:describe_jobs, Dict{String,Any}("filters"=>filters))
139 |     response["jobs"]
140 | end
141 | 
142 | function destroy_all_jobs(cluster_name::String; kwargs...)
143 |     @debug "Destroying all jobs for cluster"
144 |     configure(; kwargs...)
145 |     jobs = get_jobs(cluster_name)
146 |     for (job_id, job) in jobs
147 |         if job["status"] == "running"
148 |             destroy_job(job_id; kwargs...)
149 | 	end
150 |     end
151 | end
152 | 
153 | # destroy_job() = destroy_job(get_job_id())
154 | 
155 | # mutable struct Job
156 | #     job_id::JobId
157 | #     failed::Bool
158 | 
159 | #     # function Job(; kwargs...)
160 | #     #     new_job_id = create_job(; kwargs...)
161 | #     #     #new_job_id = create_job(;cluster_name="banyancluster", nworkers=2)
162 | #     #     new_job = new(new_job_id)
163 | #     #     finalizer(new_job) do j
164 | #     #         destroy_job(j.job_id)
165 | #     #     end
166 | 
167 | #     #     new_job
168 | #     # end
169 | # end
170 | 
171 | function with_job(f::Function; kwargs...)
172 |     # This is not a constructor; this is just a function that ensures that
173 |     # every job is always destroyed even in the case of an error
174 |     j = create_job(;kwargs...)
175 |     j_destroyed = false
176 |     try
177 |         f(j)
178 |     catch err
179 |         destroy_job(j)
180 |         j_destroyed = true
181 |         rethrow(err)
182 |     finally
183 |         if !j_destroyed
184 |     	    destroy_job(j)
185 |         end
186 |     end
187 | end
188 | 
189 | function clear_jobs()
190 |     global jobs
191 |     global current_job_id
192 |     if !isnothing(current_job_id)
193 |         empty!(jobs[current_job_id].pending_requests)
194 |     end
195 | end
196 | 
197 | # TODO: Fix bug causing nbatches to be 2 when it should be 25
198 | # TODO: Fix finalizer of Job
199 | 


--------------------------------------------------------------------------------
/Banyan/test/test_cluster.jl:
--------------------------------------------------------------------------------
  1 | @testset "Cluster Management" begin
  2 |     run("Configuration") do
  3 |         configure(
  4 |             username = "adminuser",
  5 |             api_key = "a41ef8a693682dd93189e71676b2cdc9",
  6 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
  7 |             region = "us-west-2",
  8 |         )
  9 |     end
 10 | 
 11 |     # TODO: Add tests for creating, destroying cluster and updating with more
 12 |     # complex Banyanfiles. The point of additional tests for updating is to
 13 |     # ensure that we parse, merge, and load Banyanfiles correctly and such
 14 |     # tests should cover all the different fields in a Banyanfile including
 15 |     # `includes` for example.
 16 | 
 17 |     run("Updating Cluster") do
 18 |         update_cluster(
 19 |             name = "c0416",
 20 |             banyanfile_path = "file://res/Banyanfile.json",
 21 |         )
 22 |     end
 23 | end
 24 | 
 25 | 
 26 | # Tests create_cluster and calls get_clusters to ensure correct behavior
 27 | #   expected_presence (bool): indicates whether the cluster should be listed
 28 | #   expected_status (bool): indicates expected status if cluster should be listed
 29 | #   kwargs : arguments for create_cluster
 30 | function test_create_cluster(expected_presence, expected_status; kwargs...)
 31 |     create_cluster(; kwargs...)
 32 |     clusters = get_clusters()
 33 |     @test haskey(clusters, name) == expected_presence
 34 |     if (haskey(clusters, kwargs[:name]))
 35 |         @test clusters[kwargs[:name]].status == expected_status
 36 |     end
 37 | end
 38 | 
 39 | 
 40 | @testset "Cluster Creation - Should Fail Immediately" begin
 41 |     run("createcluster_bad_username") do
 42 |         configure(
 43 |             username = "BadUser",
 44 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
 45 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
 46 |             region = "us-west-2",
 47 |         )
 48 |         test_create_cluster(
 49 |             false,
 50 |             "";
 51 |             name = "badcluster",
 52 |             instance_type = "t3.large",
 53 |             banyanfile_path = "file://res/Banyanfile.json",
 54 |         )
 55 |     end
 56 |     run("createcluster_bad_api_key") do
 57 |         configure(
 58 |             username = "BanyanTest",
 59 |             api_key = "invalidapikey",
 60 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
 61 |             region = "us-west-2",
 62 |         )
 63 |         test_create_cluster(
 64 |             false,
 65 |             "";
 66 |             name = "badcluster",
 67 |             instance_type = "t3.large",
 68 |             banyanfile_path = "file://res/Banyanfile.json",
 69 |         )
 70 |     end
 71 |     run("createcluster_bad_ec2_key_pair_name") do
 72 |         configure(
 73 |             username = "BanyanTest",
 74 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
 75 |             ec2_key_pair_name = "NoEC2KeyPair",
 76 |             region = "us-west-2",
 77 |         )
 78 |         test_create_cluster(
 79 |             false,
 80 |             "";
 81 |             name = "badcluster",
 82 |             instance_type = "t3.large",
 83 |             banyanfile_path = "file://res/Banyanfile.json",
 84 |         )
 85 |     end
 86 |     run("createcluster_bad_region") do
 87 |         configure(
 88 |             username = "BanyanTest",
 89 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
 90 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
 91 |             region = "noregion",
 92 |         )
 93 |         test_create_cluster(
 94 |             false,
 95 |             "";
 96 |             name = "badcluster",
 97 |             instance_type = "t3.large",
 98 |             banyanfile_path = "file://res/Banyanfile.json",
 99 |         )
100 |     end
101 |     run("createcluster_bad_instance_type") do
102 |         configure(
103 |             username = "BanyanTest",
104 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
105 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
106 |             region = "us-west-2",
107 |         )
108 |         test_create_cluster(
109 |             false,
110 |             "";
111 |             name = "badcluster",
112 |             instance_type = "a1.metal",
113 |             banyanfile_path = "file://res/Banyanfile.json",
114 |         )
115 |     end
116 |     run("createcluster_bad_banyanfile") do
117 |         configure(
118 |             username = "BanyanTest",
119 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
120 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
121 |             region = "uw-west-2",
122 |         )
123 |         test_create_cluster(
124 |             false,
125 |             "";
126 |             name = "badcluster",
127 |             instance_type = "t3.large",
128 |             banyanfile_path = "file://res/banyanfile_badcluster.json",
129 |         )
130 |     end
131 | end
132 | 
133 | 
134 | @testset "Cluster Creation" begin
135 |     run("createcluster_region") do
136 |         configure(
137 |             username = "BanyanTest",
138 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
139 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
140 |             region = "us-east-1",
141 |         )
142 |         test_create_cluster(
143 |             true,
144 |             :creating;
145 |             name = "cluster_useast1",
146 |             instance_type = "t3.large",
147 |             banyanfile_path = "file://res/Banyanfile.json",
148 |         )
149 |     end
150 |     # run(
151 |     #     "createcluster_s3bucket",
152 |     #     () -> begin
153 |     #         configure(;
154 |     #             username = "BanyanTest",
155 |     #             api_key = "7FBKWAv3ld0eOfghSwhX_g",
156 |     #             ec2_key_pair_name = "EC2ConnectKeyPairTest",
157 |     #             region = "us-east-1",
158 |     #         )
159 |     #         test_create_cluster(
160 |     #             true,
161 |     #             :creating,
162 |     #             name = "cluster_useast1",
163 |     #             instance_type = "t3.large",
164 |     #             banyanfile_path = "file://res/Banyanfile.json",
165 |     #             s3_bucket_arn = "TODOTODOTODTODOTO",
166 |     #         )
167 |     #     end,
168 |     # )
169 |     run("createcluster_iam") do
170 |         configure(
171 |             username = "BanyanTest",
172 |             api_key = "7FBKWAv3ld0eOfghSwhX_g",
173 |             ec2_key_pair_name = "EC2ConnectKeyPairTest",
174 |             region = "us-east-1",
175 |         )
176 |         test_create_cluster(
177 |             true,
178 |             :creating;
179 |             name = "cluster_useast1",
180 |             instance_type = "t3.large",
181 |             banyanfile_path = "file://res/Banyanfile.json",
182 |             s3_bucket_arn = "TODOTODOTODTODOTO",
183 |         )
184 |     end
185 | end
186 | 


--------------------------------------------------------------------------------
/Banyan/src/samples.jl:
--------------------------------------------------------------------------------
  1 | ###############################################################
  2 | # Sample that caches properties returned by an AbstractSample #
  3 | ###############################################################
  4 | 
  5 | mutable struct Sample
  6 |     # The sample itself
  7 |     value::Any
  8 |     # Properties of the sample
  9 |     properties::Dict{Symbol,Any}
 10 | 
 11 |     function Sample(
 12 |         value::Any = nothing;
 13 |         properties::Dict{Symbol,Any} = Dict{Symbol,Any}(),
 14 |         sample_rate=get_job().sample_rate,
 15 |         total_memory_usage=nothing
 16 |     )
 17 |         newsample = new(value, properties)
 18 | 
 19 |         # Fill in properties if possible
 20 |         if !isnothing(total_memory_usage)
 21 |             setsample!(newsample, :memory_usage, round(total_memory_usage / sample_rate))
 22 |         end
 23 |         setsample!(newsample, :rate, sample_rate)
 24 | 
 25 |         newsample
 26 |     end
 27 |     # TODO: Un-comment if needed
 28 |     # Sample(sample::Sample, properties::Vector{String}) =
 29 |     #     new(sample.value, sample.stale, Dict(
 30 |     #         sample.sample_properties[prop]
 31 |     #         for prop in properties
 32 |     #     ))
 33 | end
 34 | 
 35 | ExactSample(value::Any = nothing; kwargs...) = Sample(value; sample_rate=1, kwargs...)
 36 | 
 37 | # TODO: Lazily compute samples by storing sample computation in a DAG if its
 38 | # getting too expensive
 39 | sample(fut::AbstractFuture) = sample(get_location(fut).sample)
 40 | sample(sample::Sample) = sample.value
 41 | 
 42 | sample(fut::AbstractFuture, propertykeys...) = sample(get_location(fut).sample, propertykeys...)
 43 | function sample(s::Sample, propertykeys...)
 44 |     properties = s.properties
 45 |     for (i, propertykey) in enumerate(propertykeys)
 46 |         properties = get!(
 47 |             properties,
 48 |             propertykey,
 49 |             if i < length(propertykeys)
 50 |                 Dict()
 51 |             else
 52 |                 sample(s.value, propertykeys...)
 53 |             end
 54 |         )
 55 |     end
 56 |     properties
 57 | end
 58 | 
 59 | setsample!(fut::AbstractFuture, value) = setsample!(get_location(fut).sample, value)
 60 | function setsample!(sample::Sample, value)
 61 |     sample.value = value
 62 | end
 63 | 
 64 | setsample!(fut::AbstractFuture, propertykeys...) = setsample!(get_location(fut).sample, propertykeys...)
 65 | function setsample!(sample::Sample, propertykeys...)
 66 |     if length(propertykeys) == 1
 67 |         setsample!(sample, first(propertykeys))
 68 |     else
 69 |         properties = sample.properties
 70 |         propertyvalue = last(propertykeys)
 71 |         propertykeys = propertykeys[1:end-1]
 72 |         for (i, propertykey) in enumerate(propertykeys)
 73 |             if i < length(propertykeys)
 74 |                 properties = get!(properties, propertykey, Dict())
 75 |             end
 76 |         end
 77 |         properties[last(propertykeys)] = propertyvalue
 78 |     end
 79 | end
 80 | 
 81 | # TODO: For futures with locations like Size, "scale up" the computed sample
 82 | # for a useful approximation of things like length of an array
 83 | 
 84 | ####################################################################
 85 | # AbstractSample to be implemented by anything that can be sampled #
 86 | ####################################################################
 87 | 
 88 | # NOTE: We use strings for things that will be serialized to JSON and symbols
 89 | # for everything else
 90 | 
 91 | # NOTE: We use upper-camel-case for user-facing names (like names of PTs) and
 92 | # all-caps-snake-case for anything internal (like names of constraints)
 93 | 
 94 | # The purpose of the `sample` function is to allow for computing various
 95 | # properties of the sample by property key name instead of an explicit
 96 | # function call. This makes it easier for the `sample` and `setsample!`
 97 | # functions for `Future`s to compute and cache samples.
 98 | 
 99 | sample(as::Any, properties...) =
100 |     if length(properties) == 1
101 |         if first(properties) == :memory_usage
102 |             sample_memory_usage(as)
103 |         elseif first(properties) == :rate
104 |             # This is the default but the `Sample` constructor overrides this
105 |             # before-hand to allow some samples to be "exact" with a sample
106 |             # rate of 1
107 |             get_job().sample_rate
108 |         elseif first(properties) == :keys
109 |             sample_keys(as)
110 |         elseif first(properties) == :axes
111 |             sample_axes(as)
112 |         elseif first(properties) == :groupingkeys
113 |             # This is just the initial value for grouping keys. Calls to
114 |             # `keep_*` functions will expand it.
115 |             []
116 |         else
117 |             # println(typeof(as))
118 |             # println(typeof(as) <: Any)
119 |             throw(ArgumentError("Invalid sample properties: $properties"))
120 |         end
121 |     elseif length(properties) <= 2 && first(properties) == :statistics
122 |         Dict()
123 |     elseif length(properties) == 3 && first(properties) == :statistics
124 |         key = properties[2]
125 |         query = properties[3]
126 |         if query == :max_ngroups
127 |             sample_max_ngroups(as, key)
128 |         elseif query == :divisions
129 |             sample_divisions(as, key)
130 |         elseif query == :min
131 |             sample_min(as, key)
132 |         elseif query == :max
133 |             sample_max(as, key)
134 |         else
135 |             throw(ArgumentError("Invalid sample properties: $properties"))
136 |         end
137 |     elseif length(properties) == 5 && first(properties) == :statistics
138 |         key, query, minvalue, maxvalue = properties[2:end]
139 |         if query == :percentile
140 |             sample_percentile(as, key, minvalue, maxvalue)
141 |         else
142 |             throw(ArgumentError("Invalid sample properties: $properties"))
143 |         end
144 |     else
145 |         throw(ArgumentError("Invalid sample properties: $properties"))
146 |     end
147 | 
148 | sample_memory_usage(as::Any) = total_memory_usage(as)
149 | 
150 | # Implementation error 
151 | impl_error(fn_name, as) = error("$fn_name not implemented for $(typeof(as))")
152 | 
153 | # Functions to implement for Any (e.g., for DataFrame or
154 | # Array)
155 | sample_axes(as::Any) = impl_error("sample_axes", as)
156 | sample_keys(as::Any) = impl_error("sample_keys", as)
157 | sample_divisions(as::Any, key) = impl_error("sample_divisions", as)
158 | sample_percentile(as::Any, key, minvalue, maxvalue) = impl_error("sample_percentile", as)
159 | sample_max_ngroups(as::Any, key) = impl_error("sample_max_ngroups", as)
160 | sample_min(as::Any, key) = impl_error("sample_min", as)
161 | sample_max(as::Any, key) = impl_error("sample_max", as)


--------------------------------------------------------------------------------
/Banyan/src/partitions.jl:
--------------------------------------------------------------------------------
  1 | #############################
  2 | # Partition type references #
  3 | #############################
  4 | 
  5 | const PartitionTypeReference = Tuple{ValueId,Integer}
  6 | 
  7 | ############################
  8 | # Partitioning constraints #
  9 | ############################
 10 | 
 11 | pt_ref_to_jl(pt_ref) =
 12 |     if pt_ref isa Tuple
 13 |         (convert(Future, pt_ref[1]).value_id, pt_ref[2] - 1)
 14 |     else
 15 |         (convert(Future, pt_ref).value_id, 0)
 16 |     end
 17 | 
 18 | pt_refs_to_jl(refs) =
 19 |     [pt_ref_to_jl(ref) for ref in refs]
 20 | 
 21 | struct PartitioningConstraintOverGroup
 22 |     type::String
 23 |     args::Vector{PartitionTypeReference}
 24 | end
 25 | 
 26 | struct PartitioningConstraintOverGroups
 27 |     type::String
 28 |     args::Vector{Vector{PartitionTypeReference}}
 29 | end
 30 | 
 31 | const PartitioningConstraint = Union{PartitioningConstraintOverGroup, PartitioningConstraintOverGroups}
 32 | 
 33 | to_jl(pc::PartitioningConstraint) = Dict("type" => pc.type, "args" => pc.args)
 34 | 
 35 | arg_to_jl_for_co(arg) =
 36 |     if arg isa Vector
 37 |         pt_refs_to_jl(arg)
 38 |     else
 39 |         [pt_ref_to_jl(arg)]
 40 |     end
 41 | 
 42 | function constraint_for_co(args)::PartitioningConstraintOverGroups
 43 |     if any(arg isa Vector for arg in args)
 44 |         args = [arg_to_jl_for_co(arg) for arg in args]
 45 |         PartitioningConstraintOverGroups("CO_GROUP", args)
 46 |     else
 47 |         PartitioningConstraintOverGroups("CO", pt_refs_to_jl(args))
 48 |     end
 49 | end
 50 | 
 51 | # TODO: Support Ordered
 52 | Co(args...) = constraint_for_co(args)
 53 | Cross(args...) = PartitioningConstraintOverGroup("CROSS", pt_refs_to_jl(args))
 54 | Equal(args...) = PartitioningConstraintOverGroup("EQUAL", pt_refs_to_jl(args))
 55 | Sequential(args...) =
 56 |     PartitioningConstraintOverGroup("SEQUENTIAL", pt_refs_to_jl(args))
 57 | Match(args...) = PartitioningConstraintOverGroup("MATCH", pt_refs_to_jl(args))
 58 | MatchOn(on, args...) =
 59 |     PartitioningConstraintOverGroup(
 60 |         "MATCH_ON=" * string(on),
 61 |         pt_refs_to_jl(args),
 62 |     )
 63 | AtMost(npartitions, args...) =
 64 |     PartitioningConstraintOverGroup(
 65 |         "AT_MOST=$npartitions",
 66 |         pt_refs_to_jl(args)
 67 |     )
 68 | ScaleBy(arg, factor::Real = 1.0, relative_to...) = 
 69 |     PartitioningConstraintOverGroup(
 70 |         "SCALE_BY=$factor",
 71 |         pt_refs_to_jl([arg; relative_to...])
 72 |     )
 73 | 
 74 | # TODO: Make the above constraint constructors produce dictionaries that have
 75 | # fields that make sense and are specialized for each one. This will reduce
 76 | # a significant amount of messy and hard-to-read code here (e.g., what in the
 77 | # world isa PartitioningConstraintOverGroup vs. a
 78 | # PartitioningConstraintOverGroups).
 79 | 
 80 | # NOTE: ScaleBy constraints accept PT references but only the values of PT
 81 | # references where the index is 1 are taken because we only scale relative
 82 | # to the memory usage that is split by the first PT in the PT compositions
 83 | # referenced
 84 | 
 85 | # NOTE: If you require a constraint for a particular PT, the onus is on you to
 86 | # ensure that whereever you use a value with that PT assigned, you always
 87 | # have the PTs that are referenced by the constraint. For example, if you use
 88 | # an AtMost constraint which references both PTs from a PT composition for a
 89 | # value where the first PT splits across workers and the second across batches,
 90 | # you need to ensure that anywhere you use the value, you actually do have a 
 91 | # PT composition of length 2.
 92 | 
 93 | # NOTE: Currently, only AtMost and ScaleBy are supported as PT-level
 94 | # constraints (meaning they are included as part of a PT so that the PT
 95 | # cannot be applied to a variable unless the constraints are also be enforced)
 96 | # while ScaleBy may not be used as PA-level constraints (constraints that are
 97 | # applicable only for a single code region annotated with a PA)
 98 | 
 99 | mutable struct PartitioningConstraints
100 |     constraints::Vector{Union{PartitioningConstraint, Function}}
101 | end
102 | 
103 | PartitioningConstraints() = PartitioningConstraints([])
104 | 
105 | function to_jl(constraints::PartitioningConstraints)
106 |     return Dict(
107 |         "constraints" =>
108 |             [to_jl(constraint) for constraint in constraints.constraints],
109 |     )
110 | end
111 | 
112 | ###################
113 | # Partition types #
114 | ###################
115 | 
116 | const PartitionTypeParameters = Dict{String, Any}
117 | 
118 | mutable struct PartitionType
119 |     parameters::PartitionTypeParameters
120 |     constraints::PartitioningConstraints
121 | 
122 |     PartitionType(
123 |         parameters::Dict{String, <:Any} = PartitionTypeParameters(),
124 |         constraints::PartitioningConstraints = PartitioningConstraints(),
125 |     ) = new(parameters, constraints)
126 |     PartitionType(s::String) = new(Dict("name" => s), PartitioningConstraints())
127 |     PartitionType(parameters::PartitionTypeParameters) = new(parameters, PartitioningConstraints())
128 | 
129 |     function PartitionType(args::Union{String, Pair{String,<:Any}, PartitioningConstraint, Function}...)
130 |         parameters = Dict()
131 |         constraints = PartitioningConstraints()
132 | 
133 |         # Construct parameters and constraints from arguments
134 |         for arg in args
135 |             if arg isa String
136 |                 parameters["name"] = arg
137 |             elseif arg isa Pair
138 |                 parameters[first(arg)] = last(arg)
139 |             elseif arg isa PartitioningConstraint || arg isa Function
140 |                 push!(constraints.constraints, arg)
141 |             else
142 |                 throw(ArgumentError("Expected either a partition type parameter or constraint"))
143 |             end
144 |         end
145 | 
146 |         new(parameters, constraints)
147 |     end
148 | end
149 | 
150 | # We probably need this so we can iterate over PTs produced by Grouped and then
151 | # check the key property
152 | function Base.getproperty(pt::PartitionType, name::Symbol)
153 |     if hasfield(PartitionType, name)
154 |         return getfield(pt, name)
155 |     end
156 | 
157 |     n = string(name)
158 |     if haskey(pt.parameters, n)
159 |         return pt.parameters[n]
160 |     end
161 |     error("$name not found in partition type parameters")
162 | end
163 | 
164 | function to_jl(pt::PartitionType)
165 |     # Interpret bangs as random IDs
166 |     # TODO: Use this in the backend to interpret pt_lib_info.json
167 |     for (k, v) in pt.parameters
168 |         if v == "!"
169 |             pt.parameters[k] = randstring(8)
170 |         end
171 |     end
172 | 
173 |     # Construct dictionary
174 |     Dict("parameters" => pt.parameters, "constraints" => to_jl(pt.constraints))
175 | end
176 | 
177 | ##############################
178 | # Partition type composition #
179 | ##############################
180 | 
181 | # This is mutable so that we can append PTs
182 | mutable struct PartitionTypeComposition
183 |     pts::Vector{PartitionType}
184 | end
185 | 
186 | to_jl(ptc::PartitionTypeComposition) = [to_jl(pt) for pt in ptc.pts]
187 | 
188 | ##############################
189 | # Partition type combinators #
190 | ##############################
191 | 
192 | const PTOrPTUnion = Union{PartitionType,Vector{PartitionType}}
193 | 
194 | Base.:&(a::PartitionType, b::PartitionType) =
195 |     if all(
196 |         a.parameters[param_name] == b.parameters[param_name] for
197 |         param_name in keys(a.parameters) if param_name in keys(b.parameters)
198 |     )
199 |         PartitionType(
200 |             merge(a.parameters, b.parameters),
201 |             PartitioningConstraints(
202 |                 [a.constraints.constraints; b.constraints.constraints]
203 |             )
204 |         )
205 |     else
206 |         nothing
207 |     end
208 | 
209 | Base.:&(a::Vector{PartitionType}, b::PartitionType) =
210 |     filter(pt->!isnothing(pt), [pt & b for pt in a])
211 | Base.:&(a::PartitionType, b::Vector{PartitionType}) = b & a
212 | Base.:&(a::Vector{PartitionType}, b::Vector{PartitionType}) =
213 |     filter(pt->!isnothing(pt), [aa & bb for aa in a for bb in b])
214 | Base.:|(a::PTOrPTUnion, b::PTOrPTUnion) = [a; b]
215 | 
216 | #########################
217 | # Partition annotations #
218 | #########################
219 | 
220 | # TODO: Rename Partitions to PartitionTypeBinding and keep Partitioning as is
221 | mutable struct Partitions
222 |     # TODO: Only use either PT stack or PT composition to be consistent in
223 |     # terminology
224 |     pt_stacks::Dict{ValueId,PartitionTypeComposition}
225 | end
226 | 
227 | Partitions() = Partitions(Dict())
228 | 
229 | function to_jl(p::Partitions)
230 |     # NOTE: This assumes that the PT compositions in `p.pt_stacks` are _not_
231 |     # delayed
232 |     return Dict(
233 |         "pt_stacks" =>
234 |             Dict(v => ptc |> to_jl for (v, ptc) in p.pt_stacks),
235 |     )
236 | end
237 | 
238 | mutable struct PartitionAnnotation
239 |     partitions::Partitions
240 |     constraints::PartitioningConstraints
241 | end
242 | 
243 | PartitionAnnotation() = PartitionAnnotation(Partitions(), PartitioningConstraints())
244 | 
245 | function to_jl(pa::PartitionAnnotation)
246 |     return Dict(
247 |         "partitions" => to_jl(pa.partitions),
248 |         "constraints" => to_jl(pa.constraints),
249 |     )
250 | end
251 | 


--------------------------------------------------------------------------------
/BanyanArrays/test/test_l1_l2.jl:
--------------------------------------------------------------------------------
  1 | # TODO: Make FutureArray a subtype of AbstractArray
  2 | struct FutureArray{T,N}
  3 |     data::Future
  4 |     size::Future
  5 | end
  6 | 
  7 | # Defining a method like the following is helpful if the Future in a data type
  8 | # isn't just the first field of the type that is a Future. In such a case,
  9 | # a custom future function may be defined but users of this data type must
 10 | # be sure to first call future on their data type before calling
 11 | # annotations functions like `mem`/`val`/`pt`/`pc`/`mut`/`loc`/`src`/`dst`.
 12 | Banyan.future(ba::T) where {T<:FutureArray} = ba.data
 13 | 
 14 | function partitioned_vector(ba::FutureArray{T,1}) where {T}
 15 |     res_data = future()
 16 |     res_size = future()
 17 |     target_size = ba.size
 18 | 
 19 |     mem(ba, res_data)
 20 | 
 21 |     pt(ba, Block())
 22 |     pt(res_data, Block())
 23 |     pt(res_size, Replicate())
 24 |     pt(target_size, Replicate())
 25 | 
 26 |     pc(Match(ba, res_data))
 27 |     pc(Match(res_size, target_size))
 28 |     mutated(res_data)
 29 |     mutated(res_size)
 30 | 
 31 |     return ba, res_data, res_size, target_size
 32 | end
 33 | 
 34 | function partitioned_vector_by_vector(
 35 |     ba::FutureArray{T,1},
 36 |     other::FutureArray{T,1},
 37 | ) where {T}
 38 |     res_data = future()
 39 |     res_size = future()
 40 |     target_size = ba.size
 41 |     mem(ba, other, res_data)
 42 | 
 43 |     pt(ba, BlockBalanced())
 44 |     pt(other, BlockBalanced())
 45 |     pt(res_data, Block())
 46 |     pt(res_size, Replicate())
 47 |     pt(target_size, Replicate())
 48 | 
 49 |     pc(Match(ba, other, res_data))
 50 |     pc(Match(res_size, target_size))
 51 |     mutated(res_data)
 52 |     mutated(res_size)
 53 | 
 54 |     return ba, other, res_data, res_size, target_size
 55 | end
 56 | 
 57 | function partitioned_vector_by_scalar(ba::FutureArray{T,1}, other::T) where {T}
 58 |     other = future(other)
 59 |     res_data = future()
 60 |     res_size = future()
 61 |     target_size = ba.size
 62 |     # TODO: See whether ba.size is keeping arrays around for longer
 63 | 
 64 |     mem(ba, res_data)
 65 |     val(other)
 66 | 
 67 |     pt(ba, Block())
 68 |     pt(res_data, Block())
 69 |     pt(other, Replicate())
 70 |     pt(res_size, Replicate())
 71 |     pt(target_size, Replicate())
 72 | 
 73 |     pc(Match(ba, res_data))
 74 |     pc(Match(res_size, target_size))
 75 |     mutated(res_data)
 76 |     mutated(res_size)
 77 | 
 78 |     return ba, other, res_data, res_size, target_size
 79 | end
 80 | 
 81 | function partitioned_replicated_value(x)
 82 |     x = future(x)
 83 |     val(x)
 84 |     pt(x, Replicate())
 85 |     return x
 86 | end
 87 | 
 88 | function ones(::Type{T}, len::Integer)::FutureArray{T,1} where {T<:Number}
 89 |     data = future()
 90 |     data_size = future((len))
 91 |     created_size = future(len) # TODO: Support n-dimensional arrays with Match
 92 |     ty = future(T)
 93 | 
 94 |     mem(data, len, Float64)
 95 |     val(data_size)
 96 |     val(created_size)
 97 |     val(ty)
 98 | 
 99 |     pt(data, Block())
100 |     pt(created_size, Div())
101 |     pt(ty, Replicate())
102 |     mutated(data)
103 | 
104 |     @partitioned data created_size ty begin
105 |         data = ones(ty, created_size)
106 |         x = DataFrame()
107 |         # error("Hello!")
108 |     end
109 | 
110 |     FutureArray{T,1}(data, data_size)
111 | end
112 | 
113 | # Binary vector-vector operations
114 | for op in (:+, :-)
115 |     @eval begin
116 |         function Base.$op(
117 |             ba::FutureArray{T,1},
118 |             other::FutureArray{T,1},
119 |         )::FutureArray{T,1} where {T}
120 |             ba, other, res_data, res_size, target_size =
121 |                 partitioned_vector_by_vector(ba, other)
122 |             op = partitioned_replicated_value($op)
123 |             @partitioned op ba other res_data res_size target_size begin
124 |                 res_data = op(ba, other)
125 |                 res_size = target_size
126 |             end
127 | 
128 |             FutureArray{T,1}(res_data, res_size)
129 |         end
130 |     end
131 | end
132 | 
133 | # Binary vector-scalar operations
134 | 
135 | for op in (:*, :/)
136 |     @eval begin
137 |         function Base.$op(
138 |             ba::FutureArray{T,1},
139 |             other::T,
140 |         )::FutureArray{T,1} where {T}
141 |             ba, other, res_data, res_size, target_size =
142 |                 partitioned_vector_by_scalar(ba, other)
143 |             op = partitioned_replicated_value($op)
144 |             @partitioned op ba other res_data res_size target_size begin
145 |                 res_data = op(ba, other)
146 |                 res_size = target_size
147 |             end
148 | 
149 |             FutureArray{T,1}(res_data, res_size)
150 |         end
151 |     end
152 | end
153 | 
154 | for op in (:*,)
155 |     @eval begin
156 |         function Base.$op(
157 |             other::T,
158 |             ba::FutureArray{T,1},
159 |         )::FutureArray{T,1} where {T}
160 |             ba, other, res_data, res_size, target_size =
161 |                 partitioned_vector_by_scalar(ba, other)
162 |             op = partitioned_replicated_value($op)
163 |             @partitioned op ba other res_data res_size target_size begin
164 |                 res_data = op(other, ba)
165 |                 res_size = target_size
166 |             end
167 | 
168 |             FutureArray{T,1}(res_data, res_size)
169 |         end
170 |     end
171 | end
172 | 
173 | # Broadcasting vector-vector operations
174 | function Base.broadcasted(
175 |     op,
176 |     ba::FutureArray{T,1},
177 |     other::FutureArray{T,1},
178 | )::FutureArray{T,1} where {T}
179 |     ba, other, res_data, res_size, target_size =
180 |         partitioned_vector_by_vector(ba, other)
181 |     op = partitioned_replicated_value(op)
182 |     @partitioned op ba other res_data res_size target_size begin
183 |         res_data = Base.broadcast(op, ba, other)
184 |         res_size = target_size
185 |     end
186 | 
187 |     FutureArray{T,1}(res_data, res_size)
188 | end
189 | 
190 | # Broadcasting vector-scalar operations
191 | 
192 | function broadcasted(
193 |     op,
194 |     ba::FutureArray{T,1},
195 |     other::T,
196 | )::FutureArray{T,1} where {T}
197 |     ba, other, res_data, res_size, target_size =
198 |         partitioned_vector_by_scalar(ba, other)
199 |     op = partitioned_replicated_value(op)
200 |     @partitioned op ba other res_data res_size target_size begin
201 |         res_data = Base.broadcast(op, ba, other)
202 |         res_size = target_size
203 |     end
204 | 
205 |     FutureArray{T,1}(res_data, res_size)
206 | end
207 | 
208 | broadcasted(op, ba::FutureArray{T,1}, ::Val{V}) where {T,V} =
209 |     broadcasted(op, ba, T(V))
210 | 
211 | Base.broadcasted(op, ba::FutureArray, other) where {T} =
212 |     broadcasted(op, ba, other)
213 | 
214 | # specialized_wrapper might be Base.literal_pow
215 | Base.broadcasted(specialized_wrapper, op, ba::FutureArray, other) =
216 |     broadcasted(op, ba, other)
217 | 
218 | function Base.broadcasted(
219 |     op,
220 |     other::T,
221 |     ba::FutureArray{T,1},
222 | )::FutureArray{T,1} where {T}
223 |     ba, other, res_data, res_size, target_size =
224 |         partitioned_vector_by_scalar(ba, other)
225 |     op = partitioned_replicated_value(op)
226 |     @partitioned op ba other res_data res_size target_size begin
227 |         res_data = Base.broadcast(op, other, ba)
228 |         res_size = target_size
229 |     end
230 | 
231 |     FutureArray{T,1}(res_data, res_size)
232 | end
233 | 
234 | function Base.broadcasted(op, ba::FutureArray{T,1})::FutureArray{T,1} where {T}
235 |     ba, res_data, res_size, target_size = partitioned_vector(ba)
236 |     op = partitioned_replicated_value(op)
237 |     @partitioned op ba res_data res_size target_size begin
238 |         res_data = Base.broadcast(op, ba)
239 |         res_size = target_size
240 |     end
241 | 
242 |     FutureArray{T,1}(res_data, res_size)
243 | end
244 | 
245 | # Unary operators
246 | for op in (:-, :+)
247 |     @eval begin
248 |         function Base.$op(ba::FutureArray{T,1})::FutureArray{T,1} where {T}
249 |             ba, res_data, res_size, target_size = partitioned_vector(ba)
250 |             op = partitioned_replicated_value($op)
251 |             @partitioned op ba res_data res_size target_size begin
252 |                 res_data = op(ba)
253 |                 res_size = target_size
254 |             end
255 | 
256 |             FutureArray{T,1}(res_data, res_size)
257 |         end
258 |     end
259 | end
260 | 
261 | function run_bs(size::Integer)
262 |     price = ones(Float64, size) * 4.0
263 |     strike = ones(Float64, size) * 4.0
264 |     t = ones(Float64, size) * 4.0
265 |     rate = ones(Float64, size) * 4.0
266 |     vol = ones(Float64, size) * 4.0
267 | 
268 |     evaluate(price)
269 | 
270 |     # c05 = Float64(3.0)
271 |     # c10 = Float64(1.5)
272 |     # invsqrt2 = 1.0 / sqrt(2.0)
273 | 
274 |     # # rsig = rate + (vol.^2) * c05
275 |     # rsig = vol .^ 2 # TODO: Fix issue in FutureArray where operands are same
276 |     # rsig = rsig .* c05
277 |     # rsig = rsig .+ rate
278 | 
279 |     # # rsig
280 | 
281 |     # # vol_sqrt = vol .* sqrt.(t)
282 |     # vol_sqrt = sqrt.(t)
283 |     # vol_sqrt = vol_sqrt .* vol
284 | 
285 |     # # d1 = (log.(price ./ strike) + rsig .* t) ./ vol_sqrt
286 |     # d1 = price ./ strike
287 |     # d1 = log.(d1)
288 |     # tmp = rsig .* t
289 |     # d1 = d1 .+ tmp
290 |     # d1 = d1 ./ vol_sqrt
291 | 
292 |     # # d1
293 | 
294 |     # d2 = d1 .- vol_sqrt
295 | 
296 |     # # d2
297 | 
298 |     # # d1 = c05 .+ c05 .* exp.(d1 .* invsqrt2)
299 |     # # d2 = c05 .+ c05 .* exp.(d2 .* invsqrt2)
300 |     # d1 = d1 * invsqrt2
301 |     # d1 = exp.(d1)
302 |     # d1 = d1 .* c05
303 |     # d1 = d1 .+ c05
304 |     # d2 = d2 .* invsqrt2
305 |     # d2 = exp.(d2)
306 |     # d2 = d2 .* c05
307 |     # d2 = d2 .+ c05
308 | 
309 |     # # e_rt = exp.((-rate) .* t)
310 |     # e_rt = -rate
311 |     # e_rt = e_rt .* t
312 |     # e_rt = exp.(e_rt)
313 | 
314 |     # # call = price .* d1 - e_rt .* strike .* d2
315 |     # call = price .* d1
316 |     # tmp = e_rt .* strike
317 |     # tmp = tmp .* d2
318 |     # call = call .- tmp
319 | 
320 |     # put = e_rt .* strike .* (c10 .- d2) - price .* (c10 .- d1)
321 | 
322 |     # evaluate(call)
323 | end
324 | 
325 | @testset "Black Scholes" begin
326 |     run_with_job("Black Scholes", j -> begin
327 |         size = Integer(1e9)
328 |         run_bs(size)
329 |         # evaluate(put)
330 |     end)
331 | end
332 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Banyan/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/BanyanArrays/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/BanyanDataFrames/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Banyan/src/pt_lib_constructors.jl:
--------------------------------------------------------------------------------
  1 | # NOTE: Do not construct a PT such that PTs can be fused together or used as-is
  2 | # in a way such that there aren't functions for splitting and merging them in
  3 | # pt_lib.jl. Note that each splitting and merging function in pt_lib.jl is for
  4 | # specific locations and so, for example, a Div should not be used on a value
  5 | # with CSV location unless there is a splitting function for that.
  6 | 
  7 | # Block() = PartitionType(Dict("name" => "Block"))
  8 | # Block(dim) = PartitionType(Dict("name" => "Block", "dim" => dim))
  9 | # BlockBalanced() = PartitionType(Dict("name" => "Block", "balanced" => true))
 10 | # BlockBalanced(dim) =
 11 | #     PartitionType(Dict("name" => "Block", "dim" => dim, "balanced" => true))
 12 | # BlockUnbalanced() = PartitionType(Dict("name" => "Block", "balanced" => false))
 13 | # BlockUnbalanced(dim) =
 14 | #     PartitionType(Dict("name" => "Block", "dim" => dim, "balanced" => false))
 15 |     
 16 | # Div() = PartitionType(Dict("name" => "Replicate", "dividing" => true))
 17 | # Replicated() = PartitionType(Dict("name" => "Replicate", "replicated" => true))
 18 | # Reducing(op) = PartitionType(Dict("name" => "Replicate", "replicated" => false, "reducer" => to_jl_value(op)))
 19 | 
 20 | # TODO: Generate AtMost and ScaledBy constraints in handling filters and joins
 21 | # that introduce data skew and in other operations that explicitly don't
 22 | 
 23 | Replicating() = PartitionType("name" => "Replicating", f->ScaleBy(f, 1.0))
 24 | Replicated() = Replicating() & PartitionType("replication" => "all", "reducer" => nothing)
 25 | # TODO: Add Replicating(f) to the below if needed for reducing operations on
 26 | # large objects such as unique(df::DataFrame)
 27 | 
 28 | # TODO: Determine whether the `"reducer" => nothing` should be there
 29 | Divided() = Replicating() & PartitionType("divided" => true)
 30 | Syncing() = Replicating() & PartitionType("replication" => "one", "reducer" => nothing) # TODO: Determine whether this is really needed
 31 | Reducing(op) = Replicating() & PartitionType("replication" => nothing, "reducer" => to_jl_value(op), "with_key" => false)
 32 | ReducingWithKey(op) = Replicating() & PartitionType("replication" => nothing, "reducer" => to_jl_value(op), "with_key" => true)
 33 | # TODO: Maybe replace banyan_reduce_size_by_key with an anonymous function since that actually _can_ be ser/de-ed
 34 | # or instead make there be a reducing type that passes in the key to the reducing functions so it can reduce by that key
 35 | # ReducingSize() = PartitionType("replication" => "one", "reducer" => "banyan_reduce_size_by_key")
 36 | 
 37 | Distributing() = PartitionType("name" => "Distributing")
 38 | Blocked(; along = nothing) =
 39 |     if isnothing(along)
 40 |         PartitionType("name" => "Distributing", "distribution" => "blocked")
 41 |     else
 42 |         PartitionType(
 43 |             "name" => "Distributing",
 44 |             "distribution" => "blocked",
 45 |             "key" => along,
 46 |         )
 47 |     end
 48 | Grouped() = PartitionType("name" => "Distributing", "distribution" => "grouped")
 49 | # Blocked(;balanced) = PartitionType("name" => "Distributing", "distribution" => "blocked", "balanced" => balanced)
 50 | # Grouped(;balanced) = PartitionType("name" => "Distributing", "distribution" => "grouped", "balanced" => balanced)
 51 | 
 52 | ScaledBySame(;as) = PartitionType(f -> ScaleBy(f, 1.0, as))
 53 | Drifted() = Distributing() & PartitionType("id" => "!")
 54 | Balanced() =
 55 |     Distributing() & PartitionType("balanced" => true, f -> ScaleBy(f, 1.0))
 56 | Unbalanced(; scaled_by_same_as = nothing) =
 57 |     if isnothing(scaled_by_same_as)
 58 |         Distributing() & PartitionType("balanced" => false)
 59 |     else
 60 |         Unbalanced() & ScaledBySame(as = scaled_by_same_as)
 61 |     end
 62 | 
 63 | # These functions (along with `keep_sample_rate`) allow for managing memory
 64 | # usage in annotated code. `keep_sample_rate` allows for setting the sample
 65 | # rate as it changes from value to value. Some operations such as joins
 66 | # actually require a change in sample rate so propagating this information is
 67 | # important and must be done before partition annotations are applied (in
 68 | # `partitioned_using`). In the partition annotation itself, we sometimes want
 69 | # to set constraints on how we scale the memory usage based on how much skew
 70 | # is introduced by an operation. Some operations not only change the sample
 71 | # rate but also introduce skew and so applying these constraints is important.
 72 | # FilteredTo and FilteredFrom help with constraining skew when it is introduced
 73 | # through data filtering operations while MutatedTo and MutatedFrom allow for
 74 | # propagatng skew for operations where the skew is unchanged. Balanced data
 75 | # doesn't have any skew and Balanced and balanced=true help to make this clear.
 76 | # TODO: Remove this if we don't need
 77 | # MutatedRelativeTo(f, mutated_relative_to) = PartitionType(ScaleBy(1.0, f, mutated_relative_to))
 78 | # MutatedTo(f, mutated_to) = MutatedRelativeTo(f, mutated_to)
 79 | # MutatedFrom(f, mutated_from) = MutatedRelativeTo(f, mutated_from)
 80 | 
 81 | Distributed(args...; kwargs...) = Blocked(args...; kwargs...) | Grouped(args...; kwargs...)
 82 | Partitioned(args...; kwargs...) = Distributed(args...; kwargs...) | Replicated()
 83 | 
 84 | function Blocked(
 85 |     f::AbstractFuture;
 86 |     along = :,
 87 |     balanced = nothing,
 88 |     filtered_from = nothing,
 89 |     filtered_to = nothing,
 90 |     scaled_by_same_as = nothing,
 91 | )
 92 |     parameters = Dict()
 93 |     constraints = PartitioningConstraints()
 94 | 
 95 |     # Prepare `along`
 96 |     if along isa Colon
 97 |         along = sample(f, :axes)
 98 |         # TODO: Ensure that axes returns [1] for DataFrame and axes for Array
 99 |         # while keys returns keys for DataFrame and axes for Array
100 |     end
101 |     along = to_vector(along)
102 |     # TODO: Maybe assert that along isa Vector{String} or Vector{Symbol}
103 | 
104 |     # Create PTs for each axis that can be used to block along
105 |     pts::Vector{PartitionType} = []
106 |     for axis in first(along, 4)
107 |         # Handle combinations of `balanced` and `filtered_from`/`filtered_to`
108 |         for b in (isnothing(balanced) ? [true, false] : [balanced])
109 |             # Initialize parameters
110 |             parameters = Dict("key" => axis, "balanced" => b)
111 |             constraints = PartitioningConstraints()
112 | 
113 |             # Create `ScaleBy` constraints
114 |             if b
115 |                 push!(constraints.constraints, ScaleBy(f, 1.0))
116 |                 # TODO: Add an AtMost constraint in the case that input elements are very large
117 |             else
118 |                 if !isnothing(filtered_from)
119 |                     filtered_from = to_vector(filtered_from)
120 |                     factor, from = maximum(filtered_from) do ff
121 |                         (sample(ff, :memory_usage) / sample(f, :memory_usage), filtered_from)
122 |                     end
123 |                     push!(constraints.constraints, ScaleBy(f, factor, from))
124 |                 elseif !isnothing(filtered_to)
125 |                     filtered_to = to_vector(filtered_to)
126 |                     factor, to = maximum(filtered_to) do ft
127 |                         (sample(ft, :memory_usage) / sample(f, :memory_usage), filtered_to)
128 |                     end
129 |                     push!(constraints.constraints, ScaleBy(f, factor, to))
130 |                 elseif !isnothing(scaled_by_same_as)
131 |                     push!(constraints.constraints, ScaleBy(f, 1.0, scaled_by_same_as))
132 |                 end
133 |             end
134 | 
135 |             # Append new PT to PT union being produced
136 |             push!(pts, PartitionType(parameters, constraints))
137 |         end
138 |     end
139 | 
140 |     # Return the resulting PT union that can then be passed into a call to `pt`
141 |     # which would in turn result in a PA union
142 |     Blocked() & pts
143 | end
144 | 
145 | # NOTE: A reason to use Grouped for element-wise computation (with no
146 | # filtering) is to allow for the input to be re-balanced. If you just use
147 | # Any then there wouldn't be any way to re-balance right before the
148 | # computation. Grouped allows the input to have either balanced=true or
149 | # balanced=false and if balanced=true is chosen then a cast may be applied.
150 | 
151 | function Grouped(
152 |     f::AbstractFuture;
153 |     # Parameters for splitting into groups
154 |     by = nothing,
155 |     balanced = nothing,
156 |     rev = nothing,
157 |     # Options to deal with skew
158 |     filtered_from = nothing,
159 |     filtered_to = nothing,
160 |     scaled_by_same_as = nothing,
161 | )
162 |     # Prepare `by`
163 |     by = if isnothing(by)
164 |         sample(f, :groupingkeys)
165 |     elseif by isa Colon
166 |         sample(f, :keys)
167 |     else
168 |         by
169 |     end
170 |     by = Symbol.(by)
171 |     by = to_vector(by)
172 | 
173 |     # Create PTs for each key that can be used to group by
174 |     pts::Vector{PartitionType} = []
175 |     for key in first(by, 8)
176 |         # Handle combinations of `balanced` and `filtered_from`/`filtered_to`
177 |         for b in (isnothing(balanced) ? [true, false] : [balanced])
178 |             parameters = Dict("key" => key, "balanced" => b)
179 |             constraints = PartitioningConstraints()
180 | 
181 |             # Create `ScaleBy` constraint and also compute `divisions` and
182 |             # `AtMost` constraint if balanced
183 |             if b
184 |                 # Set divisions
185 |                 # TODO: Change this if `divisions` is not a `Vector{Tuple{Any,Any}}`
186 |                 parameters["divisions"] = to_jl_value(sample(f, :statistics, key, :divisions))
187 |                 max_ngroups = sample(f, :statistics, key, :max_ngroups)
188 | 
189 |                 # Set flag for reversing the order of the groups
190 |                 if !isnothing(rev)
191 |                     parameters["rev"] = rev
192 |                 end
193 | 
194 |                 # Add constraints
195 |                 push!(constraints.constraints, AtMost(max_ngroups, f))
196 |                 push!(constraints.constraints, ScaleBy(f, 1.0))
197 | 
198 |                 # TODO: Make AtMost only accept a value (we can support PT references in the future if needed)
199 |                 # TODO: Make scheduler check that the values in AtMost or ScaledBy are actually present to ensure
200 |                 # that the constraint can be satisfied for this PT to be used
201 |             else
202 |                 # TODO: Support joins
203 |                 if !isnothing(filtered_from)
204 |                     filtered_from = to_vector(filtered_from)
205 |                     factor, from = maximum(filtered_from) do ff
206 |                         min_filtered_from = sample(f, :statistics, key, :min)
207 |                         max_filtered_from = sample(f, :statistics, key, :max)
208 |                         # divisions_filtered_from = sample(ff, :statistics, key, :divisions)
209 |                         f_percentile = sample(f, :statistics, key, :percentile, min_filtered_from, max_filtered_from)
210 |                         (f_percentile, filtered_from)
211 |                     end
212 |                     push!(constraints.constraints, ScaleBy(f, factor, from))
213 |                 elseif !isnothing(filtered_to)
214 |                     filtered_to = to_vector(filtered_to)
215 |                     factor, to = maximum(filtered_to) do ft
216 |                         min_filtered_to = sample(ft, :statistics, key, :min)
217 |                         max_filtered_to = sample(ft, :statistics, key, :max)
218 |                         # f_divisions = sample(f, :statistics, key, :divisions)
219 |                         f_percentile = sample(f, :statistics, key, :percentile, min_filtered_to, max_filtered_to)
220 |                         (1 / f_percentile, filtered_to)
221 |                     end
222 |                     push!(constraints.constraints, ScaleBy(f, factor, to))
223 |                 elseif !isnothing(scaled_by_same_as)
224 |                     push!(constraints.constraints, ScaleBy(f, 1.0, scaled_by_same_as))
225 |                 end
226 |             end
227 | 
228 |             push!(pts, PartitionType(parameters, constraints))
229 |         end
230 |     end
231 |     Grouped() & pts
232 | end
233 | 


--------------------------------------------------------------------------------
/Banyan/src/requests.jl:
--------------------------------------------------------------------------------
  1 | #################
  2 | # Magic Methods #
  3 | #################
  4 | 
  5 | # TODO: Implement magic methods
  6 | 
  7 | # Assume that this is mutating
  8 | # function Base.getproperty(fut::Future, sym::Symbol)
  9 | # end
 10 | 
 11 | # Mutating
 12 | # TODO: put this back in some way
 13 | # function Base.setproperty!(fut::Future, sym::Symbol, new_value)
 14 | # end
 15 | 
 16 | #############################
 17 | # Basic methods for futures #
 18 | #############################
 19 | 
 20 | function compute(fut::AbstractFuture)
 21 |     # TODO: Refactor `current_job_status` out into the `Job`s stored in
 22 |     # `global jobs`
 23 |     global current_job_status
 24 | 
 25 |     fut = convert(Future, fut)
 26 |     job_id = get_job_id()
 27 |     job = get_job()
 28 | 
 29 |     if fut.mutated
 30 |         # Get all tasks to be recorded in this call to `compute`
 31 |         tasks = [req.task for req in job.pending_requests if req isa RecordTaskRequest]
 32 | 
 33 |         # Call `partitioned_using_func`s in 2 passes - forwards and backwards.
 34 |         # This allows sample properties to propagate in both directions. We
 35 |         # must also make sure to apply mutations in each task appropriately.
 36 |         for t in tasks
 37 |             @show t.mutation
 38 |             @show t.effects
 39 |         end
 40 |         for t in Iterators.reverse(tasks)
 41 |             apply_mutation(invert(t.mutation))
 42 |         end
 43 |         for t in tasks
 44 |             if !isnothing(t.partitioned_using_func)
 45 |                 t.partitioned_using_func()
 46 |             end
 47 |             apply_mutation(t.mutation)
 48 |         end
 49 |         for t in Iterators.reverse(tasks)
 50 |             apply_mutation(invert(t.mutation))
 51 |             if !isnothing(t.partitioned_using_func)
 52 |                 t.partitioned_using_func()
 53 |             end
 54 |         end
 55 | 
 56 |         # Do further processing on tasks now that all samples have been
 57 |         # computed and sample properties have been set up to share references
 58 |         # as needed to prevent expensive redundant computation of sample
 59 |         # properties like divisions
 60 |         for (i, t) in enumerate(tasks)
 61 |             apply_mutation(t.mutation)
 62 |             
 63 |             # Call `partitioned_with_func` to create additional PAs for each task
 64 |             set_task(t)
 65 |             if !isnothing(t.partitioned_with_func)
 66 |                 t.partitioned_with_func()
 67 |             end
 68 | 
 69 |             # Cascade PAs backwards. In other words, if as we go from first to
 70 |             # last PA we come across one that's annotating a value not
 71 |             # annotated in a previous PA, we copy over the annotation (the
 72 |             # assigned PT stack) to the previous PA.
 73 |             for (j, pa) in enumerate(t.pa_union)
 74 |                 for previous_pa in Iterators.reverse(t.pa_union[1:j-1])
 75 |                     for value_id in keys(pa.partitions.pt_stacks)
 76 |                         if !(value_id in keys(previous_pa.partitions.pt_stacks))
 77 |                             # Cascade the PT composition backwards
 78 |                             previous_pa.partitions.pt_stacks[value_id] =
 79 |                                 deepcopy(pa.partitions.pt_stacks[value_id])
 80 | 
 81 |                             # Cascade backwards all constraints that mention the
 82 |                             # value. NOTE: If this is not desired, users should
 83 |                             # be explicit and assign different PT compositions for
 84 |                             # different values.
 85 |                             for constraint in pa.constraints.constraints
 86 |                                 # Determine whether we should copy over this constraint
 87 |                                 copy_constraint = false
 88 |                                 if constraint isa PartitioningConstraintOverGroup
 89 |                                     for arg in constraint.args
 90 |                                         if arg isa PartitionTypeReference && first(arg) == value_id
 91 |                                             copy_constraint = true
 92 |                                         end
 93 |                                     end
 94 |                                 elseif constraint isa PartitioningConstraintOverGroups
 95 |                                     for arg in constraint.args
 96 |                                         for subarg in arg
 97 |                                             if subarg isa PartitionTypeReference && first(subarg) == value_id
 98 |                                                 copy_constraint = true
 99 |                                             end    
100 |                                         end
101 |                                     end
102 |                                 end
103 | 
104 |                                 # Copy over constraint
105 |                                 if copy_constraint
106 |                                     push!(previous_pa.constraints.constraints, deepcopy(constraint))
107 |                                 end
108 |                             end
109 |                         end
110 |                     end
111 |                 end
112 |             end
113 |         end
114 | 
115 |         # Switch back to a new task for next code region
116 |         finish_task()
117 | 
118 |         # for t in tasks
119 |         #     # Apply defaults to PAs
120 |         #     for pa in t.pa_union
121 |         #         @show pa
122 |         #     end
123 |         # end
124 | 
125 |         # Iterate through tasks for further processing before recording them
126 |         for t in tasks
127 |             @show t.code
128 |             @show t.value_names
129 |             @show t.mutation
130 |             @show t.effects
131 |             # Apply defaults to PAs
132 |             for pa in t.pa_union
133 |                 apply_default_constraints!(pa)
134 |                 duplicate_for_batching!(pa)
135 |                 @show pa
136 |             end
137 | 
138 |             # Destroy all closures so that all references to `Future`s are dropped
139 |             t.partitioned_using_func = nothing
140 |             t.partitioned_with_func = nothing
141 | 
142 |             # Handle 
143 |             empty!(t.mutation) # Drop references to `Future`s here as well
144 |         end
145 | 
146 |         # Finalize (destroy) all `Future`s that can be destroyed
147 |         GC.gc()
148 |     
149 |         # Destroy everything that is to be destroyed in this task
150 |         for req in job.pending_requests
151 |             # Don't destroy stuff where a `DestroyRequest` was produced just
152 |             # because of a `mutated(old, new)`
153 |             if req isa DestroyRequest && !any(req.value_id in values(t.mutation) for t in tasks)
154 |                 # If this value was to be downloaded to or uploaded from the
155 |                 # client side, delete the reference to its data
156 |                 if req.value_id in keys(job.futures_on_client)
157 |                     delete!(job.futures_on_client, req.value_id)
158 |                 end
159 |     
160 |                 # Remove information about the value's location including the
161 |                 # sample taken from it
162 |                 delete!(job.locations, req.value_id)
163 |             end
164 |         end
165 |     
166 |         # Send evaluation request
167 |         # Send evaluate request
168 |         try
169 |             response = send_evaluation(fut.value_id, job_id)
170 |         catch
171 |             current_job_status = "failed"
172 |             rethrow()
173 |         end
174 |     
175 |         # Get queues for moving data between client and cluster
176 |         scatter_queue = get_scatter_queue(job_id)
177 |         gather_queue = get_gather_queue(job_id)
178 |     
179 |         # Read instructions from gather queue
180 |         # println("job id: ", job_id)
181 |         # print("LISTENING ON: ", gather_queue)
182 |         @debug "Waiting on running job $job_id"
183 |         while true
184 |             # TODO: Use to_jl_value and from_jl_value to support Client
185 |             message = receive_next_message(gather_queue)
186 |             @debug message
187 |             message_type = message["kind"]
188 |             message_end = message["end"]
189 |             if message_type == "SCATTER_REQUEST"
190 |                 @debug "Received scatter request"
191 |                 # Send scatter
192 |                 value_id = message["value_id"]
193 |                 f = job.futures_on_client[value_id]
194 |                 send_message(
195 |                     scatter_queue,
196 |                     JSON.json(
197 |                         Dict{String,Any}(
198 |                             "value_id" => value_id,
199 |                             "contents" => to_jl_value_contents(f.value)
200 |                         ),
201 |                     ),
202 |                 )
203 |                 sourced(f, None())
204 |                 # TODO: Update stale/mutated here to avoid costly
205 |                 # call to `send_evaluation`
206 |             elseif message_type == "GATHER"
207 |                 @debug "Received gather request"
208 |                 # Receive gather
209 |                 value_id = message["value_id"]
210 |                 if value_id in keys(job.futures_on_client)
211 |                     value = from_jl_value_contents(message["contents"])
212 |                     f::Future = job.futures_on_client[value_id]
213 |                     f.value = value
214 |                     # TODO: Update stale/mutated here to avoid costly
215 |                     # call to `send_evaluation`
216 |                     @debug value
217 |                 end
218 |             elseif message_type == "EVALUATION_END"
219 |                 @debug "Received evaluation"
220 |                 if message["end"] == true
221 |                     break
222 |                 end
223 |             end
224 |         end
225 | 
226 |         # Update `mutated` and `stale` for the future that is being evaluated
227 |         fut.mutated = false
228 |         # TODO: See if there are more cases where you a `compute` call on a future
229 |         # makes it no longer stale
230 |         if get_dst_name(fut) == "Client"
231 |             fut.stale = false
232 |         end
233 |     end
234 | 
235 |     fut
236 | end
237 | 
238 | function send_evaluation(value_id::ValueId, job_id::JobId)
239 |     @debug "Sending evaluation request"
240 | 
241 |     # Submit evaluation request
242 |     response = send_request_get_response(
243 |         :evaluate,
244 |         Dict{String,Any}(
245 |             "value_id" => value_id,
246 |             "job_id" => job_id,
247 |             "requests" => [to_jl(req) for req in get_job().pending_requests]
248 |         ),
249 |     )
250 | 
251 |     # Clear global state and return response
252 |     empty!(get_job().pending_requests)
253 |     response
254 | end
255 | 
256 | function Base.collect(fut::AbstractFuture)
257 |     fut = convert(Future, fut)
258 | 
259 |     # Fast case for where the future has not been mutated and isn't stale
260 |     if !fut.mutated && !fut.stale
261 |         return fut.value
262 |     end
263 | 
264 |     # This function collects the given future on the client side
265 |     
266 |     # Set the future's destination location to Client
267 |     destined(fut, Client())
268 |     mutated(fut)
269 | 
270 |     pt(fut, Replicated())
271 |     @partitioned fut begin
272 |         # This code region is empty but it ensures that something is run
273 |         # and so the data is partitioned and then re-merged back up to its new
274 |         # destination location, the client
275 |     end
276 | 
277 |     # Evaluate the future so that its value is downloaded to the client
278 |     compute(fut)
279 |     destined(fut, None())
280 |     fut.value
281 | end
282 | 
283 | ###############################################################
284 | # Other requests to be sent with request to evaluate a Future #
285 | ###############################################################
286 | 
287 | struct RecordTaskRequest
288 |     task::DelayedTask
289 | end
290 | 
291 | struct RecordLocationRequest
292 |     value_id::ValueId
293 |     location::Location
294 | end
295 | 
296 | struct DestroyRequest
297 |     value_id::ValueId
298 | end
299 | 
300 | const Request = Union{RecordTaskRequest,RecordLocationRequest,DestroyRequest}
301 | 
302 | to_jl(req::RecordTaskRequest) = Dict("type" => "RECORD_TASK", "task" => to_jl(req.task))
303 | 
304 | to_jl(req::RecordLocationRequest) =
305 |     Dict(
306 |         "type" => "RECORD_LOCATION",
307 |         "value_id" => req.value_id,
308 |         "location" => to_jl(req.location),
309 |     )
310 | 
311 | to_jl(req::DestroyRequest) = Dict("type" => "DESTROY", "value_id" => req.value_id)
312 | 
313 | function record_request(request::Request)
314 |     push!(get_job().pending_requests, request)
315 | end
316 | 


--------------------------------------------------------------------------------
/Banyan/src/utils.jl:
--------------------------------------------------------------------------------
  1 | using Base: AbstractVecOrTuple
  2 | 
  3 | ##############
  4 | # CONVERSION #
  5 | ##############
  6 | 
  7 | # NOTE: `jl` referes to a subset of Julia that can be serialized to or
  8 | # deserialized from JSON with ease
  9 | 
 10 | jl_to_json(j) = JSON.json(j)
 11 | 
 12 | json_to_jl(j) = JSON.parse(j)
 13 | 
 14 | key_to_jl(key) = reinterpret(UInt8, hash(string(key))) |> String
 15 | axis_to_jl(axis) = reinterpret(UInt8, hash(string(key))) |> String
 16 | 
 17 | total_memory_usage(val) =
 18 |     begin
 19 |         size = Base.summarysize(val)
 20 |         # TODO: Maybe make this larger
 21 |         if size ≤ 128
 22 |             0
 23 |         else
 24 |             size
 25 |         end
 26 |     end
 27 | 
 28 | to_vector(v::Vector) = v
 29 | to_vector(v) = [v]
 30 | 
 31 | # NOTE: This function is shared between the client library and the PT library
 32 | function indexapply(op, objs...; index::Integer=1)
 33 |     lists = [obj for obj in objs if obj isa AbstractVecOrTuple]
 34 |     length(lists) > 0 || throw(ArgumentError("Expected at least one tuple as input"))
 35 |     index = index isa Colon ? length(first(lists)) : index
 36 |     operands = [(obj isa AbstractVecOrTuple ? obj[index] : obj) for obj in objs]
 37 |     indexres = op(operands...)
 38 |     res = first(lists)
 39 |     if first(lists) isa Tuple
 40 |         res = [res...]
 41 |         res[index] = indexres
 42 |         Tuple(res)
 43 |     else
 44 |         res = copy(res)
 45 |         res[index] = indexres
 46 |     end
 47 | end
 48 | 
 49 | ##################
 50 | # AUTHENTICATION #
 51 | ##################
 52 | 
 53 | # Process-local configuration for the account being used. It wouldn't be hard
 54 | # to but there shouldn't be any reason to make this thread-local (since only
 55 | # one account should be being used per workstation or per server where
 56 | # Banyan.jl may be being used). However, wrapping this in a mutex to ensure
 57 | # synchronized mutation in this module would be a good TODO.
 58 | global banyan_config = nothing
 59 | global aws_config_in_usage = nothing
 60 | 
 61 | function load_config()
 62 |     global banyan_config
 63 | 
 64 |     banyanconfig_path = joinpath(homedir(), ".banyan", "banyanconfig.toml")
 65 |     if isfile(banyanconfig_path)
 66 |         banyan_config = TOML.parsefile(banyanconfig_path)
 67 |     end
 68 | end
 69 | 
 70 | function write_config()
 71 |     global banyan_config
 72 | 
 73 |     # Write to banyanconfig.toml
 74 |     banyanconfig_path = joinpath(homedir(), ".banyan", "banyanconfig.toml")
 75 |     mkpath(joinpath(homedir(), ".banyan"))
 76 |     f = open(banyanconfig_path, "w")
 77 |     TOML.print(f, banyan_config)
 78 |     close(f)
 79 | end
 80 | 
 81 | if_in_or(key, obj, el = nothing) =
 82 |     if key in keys(obj)
 83 |         obj[key]
 84 |     else
 85 |         el
 86 |     end
 87 | 
 88 | function configure(; kwargs...)
 89 |     # This function allows for users to configure their authentication.
 90 |     # Authentication details are then saved in
 91 |     # `$HOME/.banyan/banyanconfig.toml` so they don't have to be entered in again
 92 |     # each time a program using the Banyan client library is run
 93 | 
 94 |     # Load arguments
 95 |     kwargs = Dict(kwargs)
 96 |     username = if_in_or(:username, kwargs)
 97 |     user_id = if_in_or(:user_id, kwargs)
 98 |     api_key = if_in_or(:api_key, kwargs)
 99 |     ec2_key_pair_name = if_in_or(:ec2_key_pair_name, kwargs)
100 |     require_ec2_key_pair_name =
101 |         if_in_or(:require_ec2_key_pair_name, kwargs, false)
102 | 
103 |     # Initialize
104 |     global banyan_config
105 |     is_modified = false
106 |     is_valid = true
107 | 
108 |     # Ensure a configuration has been created or can be created. Otherwise,
109 |     # return nothing
110 |     if isnothing(banyan_config)
111 |         if !isnothing(user_id) && !isnothing(api_key)
112 |             banyan_config = Dict(
113 |                 "banyan" =>
114 |                     Dict("username" => username, "user_id" => user_id, "api_key" => api_key),
115 |                 "aws" => Dict(),
116 |             )
117 |             is_modified = true
118 |         else
119 |             error("User ID and API key not provided")
120 |         end
121 |     end
122 | 
123 |     # Check for changes in required
124 |     if !isnothing(username) &&
125 |        (username != banyan_config["banyan"]["username"])
126 |         banyan_config["banyan"]["username"] = username
127 |         is_modified = true
128 |     end
129 |     if !isnothing(user_id) &&
130 |         (user_id != banyan_config["banyan"]["user_id"])
131 |          banyan_config["banyan"]["user_id"] = user_id
132 |          is_modified = true
133 |      end
134 |     if !isnothing(api_key) && (api_key != banyan_config["banyan"]["api_key"])
135 |         banyan_config["banyan"]["api_key"] = api_key
136 |         is_modified = true
137 |     end
138 | 
139 |     # Check for changes in potentially required
140 | 
141 |     # aws.ec2_key_pair_name
142 |     if !isnothing(ec2_key_pair_name) && (
143 |         !(haskey(banyan_config["aws"], "ec2_key_pair_name")) ||
144 |         ec2_key_pair_name != banyan_config["aws"]["ec2_key_pair_name"]
145 |     )
146 |         banyan_config["aws"]["ec2_key_pair_name"] = ec2_key_pair_name
147 |         is_modified = true
148 |     end
149 |     if require_ec2_key_pair_name &&
150 |        !("ec2_key_pair_name" in banyan_config["aws"])
151 |         error("Name of an EC2 key pair required but not provided; visit here to create a key pair: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html#having-ec2-create-your-key-pair")
152 |     end
153 | 
154 |     # # aws.region
155 |     # if !isnothing(region) && (
156 |     #     !(haskey(banyan_config["aws"], "region")) ||
157 |     #     region != banyan_config["aws"]["region"]
158 |     # )
159 |     #     banyan_config["aws"]["region"] = region
160 |     #     is_modified = true
161 |     # end
162 | 
163 |     # Update config file if it was modified
164 |     if is_modified
165 |         write_config()  #update_config()
166 |     end
167 | 
168 |     return banyan_config
169 | end
170 | 
171 | function get_aws_config()
172 |     global aws_config_in_usage
173 | 
174 |     # Get AWS configuration
175 |     if isnothing(aws_config_in_usage)
176 |         # Get region according to ENV, then credentials, then config files
177 |         profile = get(ENV, "AWS_PROFILE", get(ENV, "AWS_DEFAULT_PROFILE", "banyan_nothing"))
178 |         env_region = get(ENV, "AWS_DEFAULT_REGION", "")
179 |         credentialsfile = read(Inifile(), joinpath(homedir(), ".aws", "credentials"))
180 |         configfile = read(Inifile(), joinpath(homedir(), ".aws", "config"))
181 |         credentials_region = _get_ini_value(credentialsfile, profile, "region", default_value="")
182 |         config_region = _get_ini_value(configfile, profile, "region", default_value="")
183 | 
184 |         # Choose the region that is not default
185 |         region = env_region
186 |         region = isempty(region) ? credentials_region : region
187 |         region = isempty(region) ? config_region : region
188 | 
189 |         println(region)
190 | 
191 |         if isempty(region)
192 |             throw(ErrorException("Could not discover AWS region to use from looking at AWS_PROFILE, AWS_DEFAULT_PROFILE, AWS_DEFAULT_REGION, HOME/.aws/credentials, and HOME/.aws/config"))
193 |         end
194 | 
195 |         aws_config_in_usage = Dict(
196 |             :creds => AWSCredentials(),
197 |             :region => region
198 |         )
199 |     end
200 | 
201 |     # # Use default location if needed
202 |     # if !haskey(aws_config_in_usage, :region)
203 |     #     @warn "Using default AWS region of us-west-2 in \$HOME/.banyan/banyanconfig.toml"
204 |     #     aws_config_in_usage[:region] = "us-west-2"
205 |     # end
206 | 
207 |     # Convert to dictionary and return
208 | 
209 |     aws_config_in_usage
210 | end
211 | 
212 | get_aws_config_region() = get_aws_config()[:region]
213 | 
214 | #########################
215 | # ENVIRONMENT VARIABLES #
216 | #########################
217 | 
218 | is_debug_on() = "JULIA_DEBUG" in keys(ENV) && ENV["JULIA_DEBUG"] == "all"
219 | 
220 | macro in_env(key)
221 |     return :(string("BANYAN_", getpid(), "_", $key) in keys(ENV))
222 | end
223 | 
224 | macro env(key)
225 |     return :(ENV[string("BANYAN_", getpid(), "_", $key)])
226 | end
227 | 
228 | macro delete_in_env(key)
229 |     return :(delete!(ENV, string("BANYAN_", getpid(), "_", $key)))
230 | end
231 | 
232 | ################
233 | # API REQUESTS #
234 | ################
235 | 
236 | method_to_string(method) = begin
237 |     if method == :create_cluster
238 |         "create-cluster"
239 |     elseif method == :destroy_cluster
240 |         "destroy-cluster"
241 |     elseif method == :describe_clusters
242 |         "describe-clusters"
243 |     elseif method == :create_job
244 |         "create-job"
245 |     elseif method == :destroy_job
246 |         "destroy-job"
247 |     elseif method == :describe_jobs
248 |         "describe-jobs"
249 |     elseif method == :evaluate
250 |         "evaluate"
251 |     elseif method == :update_cluster
252 |         "update-cluster"
253 |     elseif method == :set_cluster_ready
254 |         "set-cluster-ready"
255 |     end
256 | end
257 | 
258 | """
259 | Sends given request with given content
260 | """
261 | function send_request_get_response(method, content::Dict)
262 |     # Prepare request
263 |     # content = convert(Dict{Any, Any}, content)
264 |     configuration = load_config()
265 |     user_id = configuration["banyan"]["user_id"]
266 |     api_key = configuration["banyan"]["api_key"]
267 |     # TODO: Allow content["debug"]
268 |     # content["debug"] = is_debug_on()
269 |     url = string(BANYAN_API_ENDPOINT, method_to_string(method))
270 |     headers = (
271 |         ("content-type", "application/json"),
272 |         ("Username-APIKey", "$user_id-$api_key"),
273 |     )
274 | 
275 |     # Post and return response
276 |     try
277 |         # println(headers)
278 | 	    # println(content)
279 |         response = HTTP.post(url, headers, JSON.json(content))
280 |         # println(response)
281 |         body = String(response.body)
282 |         return JSON.parse(body)
283 |         #return JSON.parse(JSON.parse(body)["body"])
284 |     catch e
285 |         if e isa HTTP.ExceptionRequest.StatusError
286 |             if e.response.status == 403
287 |                 throw(
288 |                     ErrorException(
289 |                         "Please set a valid api_key. Sign in to the dashboard to retrieve your api key.",
290 |                     ),
291 |                 )
292 |             end
293 |             if e.response.status != 504
294 |                 throw(ErrorException(String(take!(IOBuffer(e.response.body)))))
295 |             elseif method == :create_cluster
296 |                 # println(
297 |                 #     "Cluster creation in progress. Please check dashboard to view status.",
298 |                 # )
299 |             elseif method == :create_job
300 |                 # println(
301 |                 #     "Job creation in progress. Please check dashboard to view status.",
302 |                 # )
303 |             elseif method == :evaluate
304 |                 # println(
305 |                 #     "Evaluation is in progress. Please check dashboard to view status.",
306 |                 # )
307 |             end
308 |             rethrow()
309 |         else
310 |             rethrow()
311 |         end
312 |     end
313 | end
314 | 
315 | ##########################################
316 | # Ordering hash for computing  divisions #
317 | ##########################################
318 | 
319 | # NOTE: `orderinghash` must either return a number or a vector of
320 | # equally-sized numbers
321 | 
322 | # NOTE: This is duplicated between pt_lib.jl and the client library
323 | orderinghash(x::Any) = x # This lets us handle numbers and dates
324 | orderinghash(s::String) = Integer.(codepoint.(first(s, 32) * repeat(" ", 32-length(s))))
325 | orderinghash(A::AbstractArray) = orderinghash(first(A))
326 | 
327 | #########################
328 | # MOUNTED S3 FILESYSTEM #
329 | #########################
330 | 
331 | function get_s3fs_path(path)
332 |     # Get information about requested object
333 |     s3path = S3Path(path)
334 |     bucket = s3path.bucket
335 |     key = s3path.key
336 |     # bucket = "banyan-cluster-data-myfirstcluster"
337 |     mount = joinpath(homedir(), ".banyan", "mnt", "s3", bucket)
338 | 
339 |     # Ensure path to mount exists
340 |     if !isdir(mount)
341 |         mkpath(mount)
342 |     end
343 | 
344 |     # Ensure something is mounted
345 |     if !ismount(mount)
346 |         # TODO: Store buckets from different accounts/IAMs/etc. seperately
347 |         try
348 |             ACCESS_KEY_ID = get_aws_config()[:creds].access_key_id
349 |             SECRET_ACCESS_KEY = get_aws_config()[:creds].secret_key
350 |             passwd_s3fs_contents = ACCESS_KEY_ID * ":" * SECRET_ACCESS_KEY
351 |             HOME = homedir()
352 |             region = get_aws_config_region()
353 |             run(pipeline(`echo $passwd_s3fs_contents`, "$HOME/.passwd-s3fs"))
354 |             run(`chmod 600 $HOME/.passwd-s3fs`)
355 |             run(`s3fs $bucket $mount -o url=https://s3.$region.amazonaws.com -o endpoint=$region -o passwd_file=$HOME/.passwd-s3fs`)
356 |         catch e
357 |             @error """Failed to mount S3 bucket \"$bucket\" at $mount using s3fs with error: $e. Please ensure s3fs is in PATH or mount manually."""
358 |         end
359 |     end
360 | 
361 |     # Return local path to object
362 |     joinpath(mount, key)
363 | end
364 | 


--------------------------------------------------------------------------------
/Banyan/src/clusters.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | function load_json(path::String)
  3 |     if startswith(path, "file://")
  4 |         JSON.parsefile(path[8:end])
  5 |     elseif startswith(path, "s3://")
  6 |         error("S3 path not currently supported")
  7 |         # JSON.parsefile(S3Path(path, config=get_aws_config()))
  8 |     elseif startswith(path, "http://") || startswith(path, "https://")
  9 |         JSON.parse(HTTP.get(path).body)
 10 |     else
 11 |         error(
 12 |             "Path $path must start with \"file://\", \"s3://\", or \"http(s)://\"",
 13 |         )
 14 |     end
 15 | end
 16 | 
 17 | # Loads file into String and returns
 18 | function load_file(path::String)
 19 |     if startswith(path, "file://")
 20 |         String(read(open(path[8:end])))
 21 |     elseif startswith(path, "s3://")
 22 |         String(read(S3Path(path)))
 23 |     elseif startswith(path, "http://") || startswith(path, "https://")
 24 |         String(HTTP.get(path).body)
 25 |     else
 26 |         error(
 27 |             "Path $path must start with \"file://\", \"s3://\", or \"http(s)://\"",
 28 |         )
 29 |     end
 30 | end
 31 | 
 32 | function merge_with(
 33 |     banyanfile_so_far::Dict,
 34 |     banyanfile::Dict,
 35 |     selector::Function,
 36 | )
 37 |     # Merge where we combine arrays by taking unions of their unique elements
 38 |     so_far = selector(banyanfile_so_far)
 39 |     curr = selector(banyanfile)
 40 |     Base.collect(union(Set(so_far), Set(curr)))
 41 | end
 42 | 
 43 | function merge_paths_with(
 44 |     banyanfile_so_far::Dict,
 45 |     banyanfile_path::String,
 46 |     banyanfile::Dict,
 47 |     selector::Function,
 48 | )
 49 |     # Merge where we combine arrays by taking unions of their unique elements
 50 |     so_far = selector(banyanfile_so_far)
 51 |     curr = [getnormpath(banyanfile_path, p) for p in selector(banyanfile)]
 52 |     deduplicated_absolute_locations = collect(union(Set(so_far), Set(curr)))
 53 |     deduplicated_relative_locations =
 54 |         unique(loc -> basename(loc), vcat(so_far, curr))
 55 |     if deduplicated_relative_locations < deduplicated_absolute_locations
 56 |         error(
 57 |             "Files and scripts must have unique base names: $so_far and $curr have the same base name",
 58 |         )
 59 |     else
 60 |         deduplicated_absolute_locations
 61 |     end
 62 | end
 63 | 
 64 | function keep_same(
 65 |     banyanfile_so_far::Dict,
 66 |     banyanfile::Dict,
 67 |     selector::Function
 68 | )
 69 |     so_far = selector(banyanfile_so_far)
 70 |     curr = selector(banyanfile)
 71 |     if !isnothing(so_far) && !isnothing(curr) && so_far != curr
 72 |         @warn "$so_far does not match $curr in included Banyanfiles"
 73 |     end
 74 |     isnothing(so_far) ? curr : so_far
 75 | end
 76 | 
 77 | function keep_same_path(
 78 |     banyanfile_so_far::Dict,
 79 |     banyanfile_path::String,
 80 |     banyanfile::Dict,
 81 |     selector::Function
 82 | )
 83 |     so_far = selector(banyanfile_so_far)
 84 |     curr_selected = selector(banyanfile)
 85 |     curr = isnothing(curr_selected) ? nothing : getnormpath(banyanfile_path, curr_selected)
 86 |     if !isnothing(so_far) && !isnothing(curr) && so_far != curr
 87 |         @warn "$so_far does not match $curr in included Banyanfiles"
 88 |     end
 89 |     isnothing(so_far) ? curr : so_far
 90 | end
 91 | 
 92 | function getnormpath(banyanfile_path, p)
 93 |     if startswith(p, "file://")
 94 |         prefix, suffix = split(banyanfile_path, "://")
 95 |         banyanfile_location_path = dirname(suffix)
 96 |         prefix * "://" * normpath(banyanfile_location_path, last(split(p, "://")))
 97 |     else
 98 |         p
 99 |     end
100 | end
101 | 
102 | function merge_banyanfile_with_defaults!(banyanfile)
103 |     # Populate with defaults
104 |     mergewith!(
105 |         (a,b)->a,
106 |         banyanfile,
107 |         Dict(
108 |             "include" => [],
109 |             "require" => Dict()
110 |         )
111 |     )
112 |     mergewith!(
113 |         (a,b)->a,
114 |         banyanfile["require"],
115 |         Dict(
116 |             "language" => "jl",
117 |             "cluster" => Dict(),
118 |             "job" => Dict()
119 |         )
120 |     )
121 |     mergewith!(
122 |         (a, b) -> a,
123 |         banyanfile["require"]["cluster"],
124 |         Dict(
125 |             "files" => [],
126 |             "scripts" => [],
127 |             "packages" => [],
128 |             "pt_lib" => nothing,
129 |             "pt_lib_info" => nothing,
130 |         ),
131 |     )
132 |     mergewith!(
133 |         (a,b)->a,
134 |         banyanfile["require"]["job"],
135 |         Dict("code" => [])
136 |     )
137 | end
138 | 
139 | function merge_banyanfile_with!(
140 |     banyanfile_so_far::Dict,
141 |     banyanfile_path::String,
142 |     for_cluster_or_job::Symbol,
143 |     for_creation_or_update::Symbol,
144 | )
145 |     # Load Banyanfile to merge with
146 |     banyanfile = load_json(banyanfile_path)
147 | 
148 |     # Merge Banyanfile with defaults
149 |     merge_banyanfile_with_defaults!(banyanfile)
150 | 
151 |     # Merge with all included
152 |     for included in banyanfile["include"]
153 |         merge_banyanfile_with!(banyanfile_so_far, getnormpath(banyanfile_path, included), for_cluster_or_job, for_creation_or_update)
154 |     end
155 |     banyanfile_so_far["include"] = []
156 | 
157 |     # Merge with rest of what is in this banyanfile
158 | 
159 |     if for_cluster_or_job == :cluster
160 |         if for_creation_or_update == :creation
161 |             # Merge language
162 |             keep_same(banyanfile_so_far, banyanfile, b -> b["require"]["language"])
163 |         else
164 |             @warn "Ignoring language"
165 |         end
166 | 
167 |         # Merge files, scripts, packages
168 |         banyanfile_so_far["require"]["cluster"]["files"] = merge_paths_with(
169 |             banyanfile_so_far,
170 |             banyanfile_path,
171 |             banyanfile,
172 |             b -> b["require"]["cluster"]["files"],
173 |         )
174 |         banyanfile_so_far["require"]["cluster"]["scripts"] = merge_paths_with(
175 |             banyanfile_so_far,
176 |             banyanfile_path,
177 |             banyanfile,
178 |             b -> b["require"]["cluster"]["scripts"],
179 |         )
180 |         banyanfile_so_far["require"]["cluster"]["packages"] = merge_with(
181 |             banyanfile_so_far,
182 |             banyanfile,
183 |             b -> b["require"]["cluster"]["packages"],
184 |         )
185 | 
186 |         # Merge pt_lib_info and pt_lib
187 |         banyanfile_so_far["require"]["cluster"]["pt_lib_info"] = keep_same_path(
188 |             banyanfile_so_far,
189 |             banyanfile_path,
190 |             banyanfile,
191 |             b -> b["require"]["cluster"]["pt_lib_info"],
192 |         )
193 |         banyanfile_so_far["require"]["cluster"]["pt_lib"] = keep_same_path(
194 |             banyanfile_so_far,
195 |             banyanfile_path,
196 |             banyanfile,
197 |             b -> b["require"]["cluster"]["pt_lib"],
198 |         )
199 |     elseif for_cluster_or_job == :job
200 |         # Merge code
201 |         banyanfile_so_far["require"]["job"]["code"] = merge_with(
202 |             banyanfile_so_far,
203 |             banyanfile,
204 |             b -> b["require"]["job"]["code"],
205 |         )
206 |         # TODO: If code is too large, upload to S3 bucket and replace code with
207 |         # an include statement
208 |     else
209 |         error("Expected for_cluster_or_job to be either :cluster or :job")
210 |     end
211 | end
212 | 
213 | function upload_banyanfile(banyanfile_path::String, s3_bucket_arn::String, cluster_name::String, for_creation_or_update::Symbol; reinstall_julia::Bool = false)
214 |     # TODO: Implement this to load Banyanfile, referenced pt_lib_info, pt_lib,
215 |     # code files
216 | 
217 |     # TODO: Validate that s3_bucket_arn exists
218 | 
219 |     # Load Banyanfile and merge with all included
220 |     banyanfile = load_json(banyanfile_path)
221 |     merge_banyanfile_with_defaults!(banyanfile)
222 |     for included in banyanfile["include"]
223 |         merge_banyanfile_with!(banyanfile, getnormpath(banyanfile_path, included), :cluster, for_creation_or_update)
224 |     end
225 | 
226 |     # Load pt_lib_info if path provided
227 |     pt_lib_info = banyanfile["require"]["cluster"]["pt_lib_info"]
228 |     @debug pt_lib_info
229 |     pt_lib_info = if pt_lib_info isa String
230 |         load_json(pt_lib_info)
231 |     else
232 |         pt_lib_info
233 |     end
234 | 
235 |     files = banyanfile["require"]["cluster"]["files"]
236 |     scripts = banyanfile["require"]["cluster"]["scripts"]
237 |     packages = banyanfile["require"]["cluster"]["packages"]
238 |     pt_lib = banyanfile["require"]["cluster"]["pt_lib"]
239 |     pt_lib = isnothing(pt_lib) ? [] : [pt_lib]
240 | 
241 |     if isnothing(pt_lib)
242 |         error("No pt_lib.jl provided")
243 |     end
244 |     if isnothing(pt_lib_info)
245 |         error("No pt_lib_info.json provided")
246 |     end
247 | 
248 |     # Upload all files, scripts, and pt_lib to s3 bucket
249 |     s3_bucket_name = last(split(s3_bucket_arn, ":"))
250 |     if endswith(s3_bucket_name, "/")
251 |         s3_bucket_name = s3_bucket_name[1:end-1]
252 |     elseif endswith(s3_bucket_name, "/*")
253 |         s3_bucket_name = s3_bucket_name[1:end-2]
254 |     elseif endswith(s3_bucket_name, "*")
255 |         s3_bucket_name = s3_bucket_name[1:end-1]
256 |     end
257 |     for f in vcat(files, scripts, pt_lib)
258 |         s3_put(get_aws_config(), s3_bucket_name, basename(f), load_file(f))
259 |     end
260 | 
261 |     bucket = s3_bucket_name
262 |     region = get_aws_config_region()
263 | 
264 |     # Create post-install script with base commands
265 |     code = "#!/bin/bash\n"
266 |     code *= "mv setup_log.txt /tmp\n"
267 |     code *= "cd /home/ec2-user\n"
268 |     code *= "sudo yum update -y &>> setup_log.txt\n"
269 |     code *= "sudo chmod 777 setup_log.txt\n"
270 |     if reinstall_julia || for_creation_or_update == :creation
271 |         code *= "sudo su - ec2-user -c \"wget https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-1.6.1-linux-x86_64.tar.gz -O julia.tar.gz &>> setup_log.txt\"\n"
272 |         code *= "mkdir julia &>> setup_log.txt\n"
273 |         code *= "sudo su - ec2-user -c \"tar zxvf julia.tar.gz -C julia --strip-components 1 &>> setup_log.txt\"\n"
274 |         code *= "rm julia.tar.gz &>> setup_log.txt\n"
275 |         code *= "sudo su - ec2-user -c \"julia/bin/julia --project -e 'using Pkg; Pkg.add(name=\\\"AWSS3\\\", version=\\\"0.7\\\"); Pkg.add([\\\"AWSCore\\\", \\\"AWSSQS\\\", \\\"JSON\\\", \\\"MPI\\\", \\\"BenchmarkTools\\\"]); ENV[\\\"JULIA_MPIEXEC\\\"]=\\\"srun\\\"; ENV[\\\"JULIA_MPI_LIBRARY\\\"]=\\\"/opt/amazon/openmpi/lib64/libmpi\\\"; Pkg.build(\\\"MPI\\\"; verbose=true)' &>> setup_log.txt\"\n"
276 |     end
277 |     code *= "sudo amazon-linux-extras install epel\n"
278 |     code *= "aws s3 cp s3://banyan-executor /home/ec2-user --recursive\n"
279 |     code *= "sudo yum -y install s3fs-fuse\n"
280 |     code *= "sudo su - ec2-user -c \"mkdir /home/ec2-user/mnt/$bucket\"\n"
281 |     code *= "sudo su - ec2-user -c \"/usr/bin/s3fs $bucket /home/ec2-user/mnt/$bucket -o iam_role=auto -o url=https://s3.$region.amazonaws.com -o endpoint=$region\"\n"
282 |     code *= "sudo su - ec2-user -c \"aws configure set region $region\"\n"
283 | 
284 |     # Append to post-install script downloading files, scripts, pt_lib onto cluster
285 |     for f in vcat(files, scripts, pt_lib)
286 |         code *=
287 |             "sudo su - ec2-user -c \"aws s3 cp s3://" * s3_bucket_name * "/" *
288 |             basename(f) *
289 |             " /home/ec2-user/\"\n"
290 |     end
291 | 
292 |     # Append to post-install script running scripts onto cluster
293 |     for script in scripts
294 |         fname = basename(script)
295 |         code *= "sudo su - ec2-user -c \"bash /home/ec2-user/$fname\"\n"
296 |     end
297 | 
298 |     # Append to post-install script installing Julia dependencies
299 |     for pkg in packages
300 | 	pkg_spec = split(pkg, "@")
301 | 	if length(pkg_spec) == 1
302 | 	    code *= "sudo su - ec2-user -c \"julia/bin/julia --project -e 'using Pkg; Pkg.add(name=\\\"$pkg\\\")' &>> setup_log.txt \"\n"
303 | 	elseif length(pkg_spec) == 2
304 | 	    name, version = pkg_spec
305 |             code *= "sudo su - ec2-user -c \"julia/bin/julia --project -e 'using Pkg; Pkg.add(name=\\\"$name\\\", version=\\\"$version\\\")' &>> setup_log.txt \"\n"
306 |         end
307 |     end
308 | 
309 |     # Upload post_install script to s3 bucket
310 |     post_install_script = "banyan_" * cluster_name * "_script.sh"
311 |     code *=
312 |         "touch /home/ec2-user/update_finished\n" *
313 |         "aws s3 cp /home/ec2-user/update_finished " *
314 |         "s3://" * s3_bucket_name * "/\n"
315 |     println(s3_bucket_name)
316 |     s3_put(get_aws_config(), s3_bucket_name, post_install_script, code)
317 |     @debug code
318 |     @debug pt_lib_info
319 |     return pt_lib_info
320 | end
321 | 
322 | # Required: cluster_name
323 | function create_cluster(;
324 |     name::String = nothing,
325 |     instance_type::String = "m4.4xlarge",
326 |     max_num_nodes::Int = 8,
327 |     banyanfile_path::String = nothing,
328 |     iam_policy_arn::String = nothing,
329 |     s3_bucket_arn::String = nothing,
330 |     s3_bucket_name::String = nothing,
331 |     vpc_id = nothing,
332 |     subnet_id = nothing,
333 |     kwargs...,
334 | )
335 |     @debug "Creating cluster"
336 | 
337 |     # Construct arguments
338 |     if isnothing(s3_bucket_arn)
339 |         s3_bucket_arn = "arn:aws:s3:::$s3_bucket_name*"
340 |     end
341 | 
342 |     # Configure using parameters
343 |     c = configure(; require_ec2_key_pair_name = true, kwargs...)
344 |     name = if !isnothing(name)
345 |         name
346 |     else
347 |         "banyan-cluster-" * randstring(6)
348 |     end
349 |     if isnothing(s3_bucket_arn)
350 |         s3_bucket_arn = "arn:aws:s3:::banyan-cluster-data-" * name * bytes2hex(rand(UInt8, 16))
351 |         s3_bucket_name = last(split(s3_bucket_arn, ":"))
352 |         s3_create_bucket(get_aws_config(), s3_bucket_name)
353 |     elseif !(s3_bucket_arn in s3_list_buckets(get_aws_config()))
354 |         error("Bucket $s3_bucket_arn does not exist in connected AWS account")
355 |     end
356 | 
357 |     # Construct cluster creation
358 |     cluster_config = Dict(
359 |         "cluster_name" => name,
360 |         "instance_type" => instance_type, #"t3.large", "c5.2xlarge"
361 |         "num_nodes" => max_num_nodes,
362 |         "ec2_key_pair" => c["aws"]["ec2_key_pair_name"],
363 |         "aws_region" => get_aws_config_region(),
364 |         "s3_read_write_resource" => s3_bucket_arn,
365 |     )
366 |     if !isnothing(banyanfile_path)
367 |         pt_lib_info = upload_banyanfile(banyanfile_path, s3_bucket_arn, name, :creation)
368 |         cluster_config["pt_lib_info"] = pt_lib_info
369 |     end
370 |     if !isnothing(iam_policy_arn)
371 |         cluster_config["additional_policy"] = iam_policy_arn # "arn:aws:s3:::banyanexecutor*"
372 |     end
373 |     if !isnothing(vpc_id)
374 |         cluster_config["vpc_id"] = vpc_id
375 |     end
376 |     if !isnothing(subnet_id)
377 |         cluster_config["subnet_id"] = subnet_id
378 |     end
379 | 
380 |     # Send request to create cluster
381 |     send_request_get_response(:create_cluster, cluster_config)
382 | end
383 | 
384 | function destroy_cluster(name::String; kwargs...)
385 |     @debug "Destroying cluster"
386 |     configure(; kwargs...)
387 |     send_request_get_response(:destroy_cluster, Dict("cluster_name" => name))
388 | end
389 | 
390 | # TODO: Update website display
391 | # TODO: Implement load_banyanfile
392 | function update_cluster(;
393 |     name::String = nothing,
394 |     banyanfile_path::String = nothing,
395 |     force = false,
396 |     kwargs...,
397 | )
398 |     @info "Updating cluster"
399 | 
400 |     # Configure
401 |     configure(; kwargs...)
402 |     cluster_name = if isnothing(name)
403 |         clusters = get_clusters()
404 |         if length(clusters) == 0
405 |             error("Failed to create job: you don't have any clusters created")
406 |         end
407 |         first(keys(clusters))
408 |     else
409 |         name
410 |     end
411 | 
412 |     # Force by setting cluster to running
413 |     if force
414 |         assert_cluster_is_ready(name=name)
415 |     end
416 | 
417 |     # Require restart: pcluster_additional_policy, s3_read_write_resource, num_nodes
418 |     # No restart: Banyanfile
419 | 
420 |     if !isnothing(banyanfile_path)
421 |         # Retrieve the location of the current post_install script in S3 and upload
422 |         # the updated version to the same location
423 |         s3_bucket_arn = get_cluster(name).s3_bucket_arn
424 | 	    if endswith(s3_bucket_arn, "/")
425 |             s3_bucket_arn = s3_bucket_arn[1:end-1]
426 |         elseif endswith(s3_bucket_arn, "/*")
427 |             s3_bucket_arn = s3_bucket_arn[1:end-2]
428 |         elseif endswith(s3_bucket_arn, "*")
429 |             s3_bucket_arn = s3_bucket_arn[1:end-1]
430 |         end
431 | 
432 |         # Upload to S3
433 |         pt_lib_info = upload_banyanfile(banyanfile_path, s3_bucket_arn, cluster_name, :update, reinstall_julia=get(kwargs, :reinstall_julia, false))
434 | 
435 |         # Upload pt_lib_info
436 |         send_request_get_response(
437 |             :update_cluster,
438 |             Dict(
439 |                 "cluster_name" => name,
440 |                 "pt_lib_info" => pt_lib_info
441 |                 # TODO: Send banyanfile here
442 |             ),
443 |         )
444 |     end
445 | end
446 | 
447 | function assert_cluster_is_ready(;
448 |     name::String,
449 |     kwargs...,
450 | )
451 |     @info "Setting cluster status to running"
452 | 
453 |     # Configure
454 |     configure(; kwargs...)
455 | 
456 |     send_request_get_response(
457 |         :set_cluster_ready,
458 | 	Dict(
459 | 	     "cluster_name" => name,
460 | 	),
461 |     )
462 | end
463 | 
464 | struct Cluster
465 |     name::String
466 |     status::Symbol
467 |     num_jobs_running::Int32
468 |     s3_bucket_arn::String
469 | end
470 | 
471 | parsestatus(status) =
472 |     if status == "creating"
473 |         :creating
474 |     elseif status == "notready"
475 | 	:notready
476 |     elseif status == "destroying"
477 |         :destroying
478 |     elseif status == "updating"
479 |         :updating
480 |     elseif status == "failed"
481 |         :failed
482 |     elseif status == "starting"
483 |         :starting
484 |     elseif status == "stopped"
485 |         :stopped
486 |     elseif status == "running"
487 |         :running
488 |     elseif status == "terminated"
489 |     	:terminated
490 |     else
491 |         error("Unexpected status ", status)
492 |     end
493 | 
494 | function get_clusters(; kwargs...)
495 |     @debug "Downloading description of clusters"
496 |     configure(; kwargs...)
497 |     response =
498 |         send_request_get_response(:describe_clusters, Dict{String,Any}())
499 |     @show response
500 |     Dict(
501 |         name => Cluster(
502 |             name,
503 |             parsestatus(c["status"]),
504 |             c["num_jobs"],
505 |             c["s3_read_write_resource"],
506 |         ) for (name, c) in response["clusters"]
507 |     )
508 | end
509 | 
510 | get_cluster(name::String; kwargs...) = get_clusters(; kwargs...)[name]
511 | get_cluster() = get_cluster(get_cluster_name())
512 | 


--------------------------------------------------------------------------------
/Banyan/res/utils.jl:
--------------------------------------------------------------------------------
  1 | using Base: Integer, AbstractVecOrTuple
  2 | 
  3 | ####################
  4 | # Helper functions #
  5 | ####################
  6 | 
  7 | isa_df(obj) = @isdefined(AbstractDataFrame) && obj isa AbstractDataFrame
  8 | isa_array(obj) = obj isa AbstractArray
  9 | 
 10 | get_worker_idx(comm::MPI.Comm) = MPI.Comm_rank(comm) + 1
 11 | get_nworkers(comm::MPI.Comm) = MPI.Comm_size(comm)
 12 | 
 13 | get_partition_idx(batch_idx, nbatches, comm::MPI.Comm) =
 14 |     (get_worker_idx(comm) - 1) * nbatches + batch_idx
 15 | 
 16 | get_npartitions(nbatches, comm::MPI.Comm) =
 17 |     nbatches * get_nworkers(comm)
 18 | 
 19 | split_len(src_len::Integer, idx::Integer, npartitions::Integer) =
 20 |     if npartitions > 1
 21 |         # dst_len = Int64(cld(src_len, npartitions))
 22 |         dst_len = cld(src_len, npartitions)
 23 |         dst_start = min((idx - 1) * dst_len + 1, src_len + 1)
 24 |         dst_end = min(idx * dst_len, src_len)
 25 |         dst_start:dst_end
 26 |     else
 27 |         1:src_len
 28 |     end
 29 | 
 30 | split_len(src_len, batch_idx::Integer, nbatches::Integer, comm::MPI.Comm) =
 31 |     split_len(
 32 |         src_len,
 33 |         get_partition_idx(batch_idx, nbatches, comm),
 34 |         get_npartitions(nbatches, comm)
 35 |     )
 36 | 
 37 | split_on_executor(src, d::Integer, i) =
 38 |     if isa_df(src)
 39 |         @view src[i, :]
 40 |     elseif isa_array(src)
 41 |         selectdim(src, d, i)
 42 |     else
 43 |         error("Expected split across either dimension of an AbstractArray or rows of an AbstractDataFrame")
 44 |     end
 45 | 
 46 | split_on_executor(src, dim::Integer, batch_idx::Integer, nbatches::Integer, comm::MPI.Comm) =
 47 |     begin
 48 |         npartitions = get_npartitions(nbatches, comm)
 49 |         if npartitions > 1
 50 |             split_on_executor(
 51 |                 src,
 52 |                 dim,
 53 |                 split_len(
 54 |                     size(src, dim),
 55 |                     get_partition_idx(batch_idx, nbatches, comm),
 56 |                     npartitions
 57 |                 )
 58 |             )
 59 |         else
 60 |             src
 61 |         end
 62 |     end
 63 | 
 64 | function merge_on_executor(obj...; key=nothing)
 65 |     # @show obj
 66 |     # @show length(obj)
 67 |     # @show typeof(obj)
 68 |     first_obj = first(obj)
 69 |     # @show first_obj
 70 |     # @show typeof(first_obj)
 71 |     # @show length(first_obj)
 72 |     if isa_df(first_obj)
 73 |         # If this is a dataframe then we ignore the grouping key
 74 |         vcat(obj...)
 75 |     elseif isa_array(first_obj)
 76 |         # @show obj
 77 |         cat(obj...; dims=key)
 78 |     else
 79 |         # error("Expected either AbstractDataFrame or AbstractArray for concatenation")
 80 |         first_obj
 81 |     end
 82 | end
 83 | 
 84 | function merge_on_executor(kind::Symbol, vbuf::MPI.VBuffer, nchunks::Integer; key)
 85 |     chunks = [
 86 |         begin
 87 |             chunk = view(
 88 |                 vbuf.data,
 89 |                 (vbuf.displs[i]+1):
 90 |                 (vbuf.displs[i] + vbuf.counts[i])
 91 |             )
 92 |             if kind == :df
 93 |                 DataFrame(Arrow.Table(IOBuffer(chunk)))
 94 |             elseif kind == :bits
 95 |                 chunk
 96 |             else
 97 |                 deserialize(IOBuffer(chunk))
 98 |             end
 99 |         end
100 |         for i in 1:nchunks
101 |     ]
102 |     merge_on_executor(chunks...; key=key)
103 | end
104 | 
105 | function get_partition_idx_from_divisions(val, divisions; boundedlower=false, boundedupper=false)
106 |     # The given divisions may be returned from `get_divisions`
107 |     oh = orderinghash(val)
108 |     for (i, div) in enumerate(divisions)
109 |         isfirstdivision = i == 1
110 |         islastdivision = i == length(divisions)
111 |         if ((!boundedlower && isfirstdivision) || oh >= first(div)[1]) &&
112 |            ((!boundedupper && islastdivision) || oh < last(div)[2])
113 |             return i
114 |         end        
115 |     end
116 |     -1
117 | end
118 | 
119 | isoverlapping(a::AbstractRange, b::AbstractRange) =
120 |     a.start ≤ b.stop && b.start ≤ a.stop
121 | 
122 | # NOTE: This function is shared between the client library and the PT library
123 | to_jl_value_contents(jl) = begin
124 |     # Handle functions defined in a module
125 |     # TODO: Document this special case
126 |     # if jl isa Function && !(isdefined(Base, jl) || isdefined(Core, jl) || isdefined(Main, jl))
127 |     if jl isa Expr && eval(jl) isa Function
128 |         jl = Dict("is_banyan_udf" => true, "code" => jl)
129 |     end
130 | 
131 |     # Convert Julia object to string
132 |     io = IOBuffer()
133 |     iob64_encode = Base64EncodePipe(io)
134 |     serialize(iob64_encode, jl)
135 |     close(iob64_encode)
136 |     String(take!(io))
137 | end
138 | 
139 | # NOTE: This function is shared between the client library and the PT library
140 | from_jl_value_contents(jl_value_contents) = begin
141 |     # Converty string to Julia object
142 |     io = IOBuffer()
143 |     iob64_decode = Base64DecodePipe(io)
144 |     write(io, jl_value_contents)
145 |     seekstart(io)
146 |     res = deserialize(iob64_decode)
147 | 
148 |     # Handle functions defined in a module
149 |     if res isa Dict && haskey(res, "is_banyan_udf") && res["is_banyan_udf"]
150 |         eval(res["code"])
151 |     else
152 |         res
153 |     end
154 | end
155 | 
156 | # NOTE: This is duplicated between pt_lib.jl and the client library
157 | orderinghash(x::Any) = x # This lets us handle numbers and dates
158 | orderinghash(s::String) = Integer.(codepoint.(collect(first(s, 32) * repeat(" ", 32-length(s)))))
159 | orderinghash(A::Array) = orderinghash(first(A))
160 | 
161 | to_vector(v::Vector) = v
162 | to_vector(v) = [v]
163 | 
164 | function get_divisions(divisions, npartitions)
165 |     # This function accepts a list of divisions where each division is a tuple
166 |     # of ordering hashes (values returned by `orderinghash` which are either
167 |     # numbers or vectors of numbers). It also accepts a number of partitions to
168 |     # produce divisions for. The result is a list of length `npartitions`
169 |     # containing lists of divisions for each partition. A partition may contain
170 |     # multiple divisions.
171 | 
172 |     ndivisions = length(divisions)
173 |     if ndivisions >= npartitions
174 |         # If there are more divisions than partitions, we can distribute them
175 |         # easily. Each partition gets 0 or more divisions.
176 |         # TODO: Ensure usage of div here and in sampling (in PT
177 |         # library (here), annotation, and in locations) doesn't result in 0 or
178 |         # instead we use ceiling division
179 |         # ndivisions_per_partition = div(ndivisions, npartitions)
180 |         [
181 |             begin
182 |                 # islastpartition = partition_idx == npartitions
183 |                 # firstdivisioni = ((partition_idx-1) * ndivisions_per_partition) + 1
184 |                 # lastdivisioni = islastpartition ? ndivisions : partition_idx * ndivisions_per_partition
185 |                 # divisions[firstdivisioni:lastdivisioni]
186 |                 divisions[split_len(ndivisions, partition_idx, npartitions)]
187 |             end
188 |             for partition_idx in 1:npartitions
189 |         ]
190 |     else
191 |         # Otherwise, each division must be shared among 1 or more partitions
192 |         allsplitdivisions = []
193 |         # npartitions_per_division = div(npartitions, ndivisions)
194 | 
195 |         # Iterate through the divisions and split each of them and find the
196 |         # one that contains a split that this partition must own and use as
197 |         # its `partition_divisions`
198 |         for (division_idx, division) in enumerate(divisions)
199 |             # Determine the range (from `firstpartitioni` to `lastpartitioni`) of
200 |             # partitions that own this division
201 |             # islastdivision = division_idx == ndivisions
202 |             # firstpartitioni = ((division_idx-1) * npartitions_per_division) + 1
203 |             # lastpartitioni = islastdivision ? npartitions : division_idx * npartitions_per_division
204 |             # partitionsrange = firstpartitioni:lastpartitioni
205 |             partitionsrange = split_len(npartitions, division_idx, ndivisions)
206 | 
207 |             # # If the current partition is in that division, compute the
208 |             # # subdivision it should use for its partition
209 |             # if partition_idx in partitionsrange
210 | 
211 |             # We need to split the division among all the partitions in
212 |             # its range
213 |             ndivisionsplits = length(partitionsrange)
214 | 
215 |             # Get the `Vector{Number}`s to interpolate between
216 |             divisionbegin = to_vector(first(division))
217 |             divisionend = to_vector(last(division))
218 | 
219 |             # @show divisionbegin
220 |             # @show divisionend
221 | 
222 |             # Initialize divisions for each split
223 |             splitdivisions = [[copy(divisionbegin), copy(divisionend)] for _ in 1:ndivisionsplits]
224 | 
225 |             # Adjust the divisions for each split to interpolate. The result
226 |             # of an `orderinghash` call can be an array (in the case of
227 |             # strings), so we must iterate through that array in order to
228 |             # interpolate at the first element in that array where there is a
229 |             # difference.
230 |             for (i, (dbegin, dend)) in enumerate(zip(divisionbegin, divisionend))
231 |                 # Find the first index in the `Vector{Number}` where
232 |                 # there is a difference that we can interpolate between
233 |                 if dbegin != dend
234 |                     # dpersplit = div(dend-dbegin, ndivisionsplits)
235 |                     # Iterate through each split
236 |                     # @show dpersplit
237 |                     # @show dbegin
238 |                     # @show dend
239 |                     start = copy(dbegin)
240 |                     for j in 1:ndivisionsplits
241 |                         # Update the start and end of the division
242 |                         # islastsplit = j == ndivisionsplits
243 |                         splitdivisions[j][1][i] = j == 1 ? dbegin : start
244 |                         start += cld(dend-dbegin, ndivisionsplits)
245 |                         start = min(start, dend)
246 |                         splitdivisions[j][2][i] = j == ndivisionsplits ? dend : start
247 |                         # splitdivisions[j][1][i] = dbegin + (dpersplit * (j-1))
248 |                         # splitdivisions[j][2][i] = islastsplit ? dend : dbegin + dpersplit * j
249 |                     end
250 | 
251 |                     # Stop if we have found a difference we can
252 |                     # interpolate between
253 |                     # TODO: If the difference is not that much,
254 |                     # interpolate between multiple consecutive
255 |                     # differeing characters together
256 |                     break
257 |                 end
258 |             end
259 | 
260 |             # Convert back to `Number` if the divisions were originally
261 |             # `Number`s. We support either numbers or lists of numbers for the
262 |             # ordering hashes that we use for the min-max bounds.
263 |             if !(first(division) isa Vector)
264 |                 splitdivisions = [
265 |                     # NOTE: When porting this stuff to Python, be sure
266 |                     # to take into account the fact that Julia treats
267 |                     # many values as arrays
268 |                     (first(splitdivisionbegin), first(splitdivisionend))
269 |                     for (splitdivisionbegin, splitdivisionend) in splitdivisions
270 |                 ]
271 |             end
272 | 
273 |             # # Get the split of the division that this partition should own
274 |             # splitdivision = splitdivisions[1+partition_idx-first(partitionsrange)]
275 | 
276 |             # # Stop because we have found a division that this partition
277 |             # # is supposed to own a split from
278 |             # break
279 | 
280 |             # Each partition must have a _list_ of divisions so we must have a list
281 |             # for each partition
282 |             for splitdivision in splitdivisions
283 |                 push!(allsplitdivisions, [splitdivision])
284 |             end
285 | 
286 |             # end
287 |         end
288 |         allsplitdivisions
289 |     end
290 | end
291 | 
292 | ########################
293 | # Helper MPI functions #
294 | ########################
295 | 
296 | # TODO: Fix below function for reducing values of non-equal sizes
297 | # TODO: Make Allreducev version of below function
298 | 
299 | # function Reducev(value, op, comm::MPI.Comm)
300 | #     # Reduces values on all processes to a single value on rank 0.
301 | #     # 
302 | #     # This function does the same thing as the function MPI_Reduce using
303 | #     # only MPI_Send and MPI_Recv. As shown, it operates with additions on
304 | #     # integers, so you could trivially use MPI_Reduce, but for operations
305 | #     # on variable size structs for which you cannot define an MPI_Datatype,
306 | #     # you can still use this method, by modifying it to use your op
307 | #     # and your datastructure.
308 | 
309 | #     # TODO: Actually determine buffer
310 | #     tag = 0
311 | #     size = get_nworkers(comm)
312 | #     rank = get_worker_idx(comm)-1
313 | #     lastpower = 1 << log2(size)
314 | 
315 | #     # each of the ranks greater than the last power of 2 less than size
316 | #     # need to downshift their data, since the binary tree reduction below
317 | #     # only works when N is a power of two.
318 | #     for i in lastpower:(size-1)
319 | #         if rank == i
320 | #             MPI.send(value, i-lastpower, tag, comm)
321 | #         end
322 | #     for i in 0:(size-lastpower-1)
323 | #         if rank == i
324 | #             MPI.Recv!(recvbuffer, i+lastpower, tag, comm)
325 | #             value = op(value, recvbuffer)
326 | #         end
327 | #     end
328 | 
329 | #     for d in 0:(fastlog2(lastpower)-1)
330 | #         k = 0
331 | #         while k < lastpower
332 | #             k += 1 << (d + 1)
333 | #         end
334 | #         receiver = k
335 | #         sender = k + (1 << d)
336 | #         if rank == receiver
337 | #             MPI.Recv!(recvbuffer, 1, sender, tag)
338 | #             value = op(value, recvbuffer)
339 | #         elseif rank == sender
340 | #             MPI.Send(value, 1, receiver, tag)
341 | #         end
342 | #     end
343 | #     value
344 | # end
345 | 
346 | # function fastlog2(v::UInt32)
347 | #     multiply_de_bruijn_bit_position::Vector{Int32} = [
348 | #         0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
349 | #         8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
350 | #     ]
351 | 
352 | #     v |= v >> 1 # first round down to one less than a power of 2 
353 | #     v |= v >> 2
354 | #     v |= v >> 4
355 | #     v |= v >> 8
356 | #     v |= v >> 16
357 | 
358 | #     # TODO: Fix this
359 | #     multiply_de_bruijn_bit_position[(UInt32(v * 0x07C4ACDDU) >> 27) + 1]
360 | # end
361 | 
362 | # function tobuf(obj)::Tuple{Symbol, MPI.Buffer}
363 | function tobuf(obj)
364 |     # We pass around Julia objects between MPI processes in different ways
365 |     # depending on the data type. For simple isbitstype data we keep it as-is
366 |     # and use the simple C-like data layout for fast transfer. For dataframes,
367 |     # we use Arrow data layout for zero-copy deserialization. For everything
368 |     # else including variably-sized arrays and arbitrary Julia objects, we
369 |     # simply serialize and deserialize using the Serialization module in Julia
370 |     # standard library.
371 | 
372 |     if isbits(obj)
373 |         (:bits, MPI.Buffer(Ref(obj)))
374 |         # (:bits, MPI.Buffer(obj))
375 |         # (:bits, MPI.Buffer(Ref(obj)))
376 |     elseif isa_array(obj) && isbitstype(first(typeof(obj).parameters)) && ndims(obj) == 1
377 |         # (:bits, MPI.Buffer(obj))
378 |         (:bits, MPI.Buffer(obj))
379 |     elseif isa_df(obj)
380 |         io = IOBuffer()
381 |         Arrow.write(io, obj)
382 |         # (:df, MPI.Buffer(view(io.data, 1:position(io))))
383 |         (:df, MPI.Buffer(view(io.data, 1:io.size)))
384 |     else
385 |         io = IOBuffer()
386 |         serialize(io, obj)
387 |         (:unknown, MPI.Buffer(view(io.data, 1:io.size)))
388 |         # (:unknown, io)
389 |     end
390 | end
391 | 
392 | function buftovbuf(buf::MPI.Buffer, comm::MPI.Comm)::MPI.VBuffer
393 |     # This function expects that the given buf has buf.data being an array.
394 |     # Basically what it does is it takes the result of a call to tobuf above
395 |     # on each process and constructs a VBuffer with the sum of the sizes of the
396 |     # buffers on different processes.
397 |     sizes = MPI.Allgather(buf.count, comm)
398 |     # NOTE: This function should only be used for variably-sized buffers for
399 |     # receiving data because the returned buffer contains zeroed-out memory.
400 |     VBuffer(similar(buf.data, sum(sizes)), sizes)
401 | end
402 | 
403 | function bufstosendvbuf(bufs::Vector{MPI.Buffer}, comm::MPI.Comm)::MPI.VBuffer
404 |     sizes = [length(buf.data) for buf in bufs]
405 |     VBuffer(vcat(map(buf->buf.data, bufs)), sizes)
406 | end
407 | 
408 | function bufstorecvvbuf(bufs::Vector{MPI.Buffer}, comm::MPI.Comm)::MPI.VBuffer
409 |     # This function expects that each given buf has buf.data being an array and
410 |     # that the number of bufs in bufs is equal to the size of the communicator.
411 |     # sizes = MPI.Allgather(length(buf.data), comm)
412 |     sizes = MPI.Alltoall([length(buf.data) for buf in bufs])
413 |     # NOTE: Ensure that the data fields of the bufs are initialized to have the
414 |     # right data type (e.g., Vector{UInt8} or Vector{Int64})
415 |     # We use `similar` here because we want zeroed out memory to receive data.
416 |     VBuffer(similar(first(bufs).data, sum(sizes)), sizes)
417 | end
418 | 
419 | function frombuf(kind, obj)
420 |     if kind == :bits && obj isa Ref
421 |         # TODO: Ensure that the "dereferece" here is necessary
422 |         obj[]
423 |     elseif kind == :bits
424 |         obj
425 |     elseif kind == :df
426 |         DataFrame(Arrow.Table(obj), copycols=false)
427 |     else
428 |         deserialize(obj)
429 |     end
430 | end
431 | 
432 | function getpath(path)
433 |     if startswith(path, "http://") || startswith(path, "https://")
434 |         # TODO: First check for size of file and only download to
435 |         # disk if it doesn't fit in free memory
436 |         hashed_path = string(hash(path))
437 |         joined_path = joinpath(tempdir(), hashed_path)
438 |         if !isfile(joined_path)
439 |             # NOTE: Even though we are storing in /tmp, this is
440 |             # effectively caching the download. If this is undesirable
441 |             # to a user, a short-term solution is to use a different
442 |             # URL each time (e.g., add a dummy query to the end of the
443 |             # URL)
444 |             download(path, joined_path)
445 |         end
446 |         joined_path
447 |     elseif startswith(path, "s3://")
448 |         replace(path, "s3://" => "/home/ec2-user/mnt/")
449 |         # NOTE: We expect that the ParallelCluster instance was set up
450 |         # to have the S3 filesystem mounted at /mnt/<bucket name>
451 |     else
452 |         path
453 |     end
454 | end


--------------------------------------------------------------------------------