├── .Rbuildignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_AWSSnowParam-class.R
    │   └── test_AWSBatchJobsParam-class.R
├── vignettes
    ├── add-storage.png
    ├── inbound_rules.png
    ├── outbound_rules.png
    ├── review-and-launch.png
    ├── configure-instance.png
    ├── choose-instance-type.png
    ├── AWSParallel-AWSSnowParam-tutorial.Rmd
    └── AWSParallel-AWSBatchJobsParam-tutorial.Rmd
├── .gitignore
├── inst
    ├── script
    │   ├── config_files
    │   │   ├── BatchJobs.R
    │   │   └── sgetemplate
    │   │   │   └── simple.tmpl
    │   ├── snowparam-demo.R
    │   └── batchjobs-demo.R
    └── extdata
    │   └── config.ini
├── man
    ├── getAwsAmiId.Rd
    ├── awsProfile.Rd
    ├── awsInstance.Rd
    ├── awsLaunchMasterOnEc2.Rd
    ├── awsParallelListClusters.Rd
    ├── awsLaunchMaster.Rd
    ├── getAwsRequirements.Rd
    ├── awsSecurityGroup.Rd
    ├── awsCluster.Rd
    ├── awsDetectVpc.Rd
    ├── awsWorkers.Rd
    ├── awsDetectSubnetOnMaster.Rd
    ├── awsDetectMaster.Rd
    ├── awsCredentialsPath.Rd
    ├── awsSubnet.Rd
    ├── awsSshKeyPair.Rd
    ├── awsAmiId.Rd
    ├── bpsuspend.Rd
    ├── awsDetectOrCreateSubnet.Rd
    ├── awsDetectSecurityGroup.Rd
    ├── transferFromCluster.Rd
    ├── transferToCluster.Rd
    ├── bpteardown.Rd
    ├── bpsetup.Rd
    ├── awsInstanceType.Rd
    ├── config_starcluster.Rd
    ├── AWSBatchJobsParam-class.Rd
    ├── AWSSnowParam-class.Rd
    ├── AWSBatchJobsParam.Rd
    └── AWSSnowParam.Rd
├── R
    ├── zzz.R
    ├── helper_functions.R
    ├── AWSSnowParam-class.R
    └── AWSBatchJobsParam-class.R
├── DESCRIPTION
└── NAMESPACE


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(AWSParallel)
3 | 
4 | test_check("AWSParallel")
5 | 


--------------------------------------------------------------------------------
/vignettes/add-storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AWSParallel/devel/vignettes/add-storage.png


--------------------------------------------------------------------------------
/vignettes/inbound_rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AWSParallel/devel/vignettes/inbound_rules.png


--------------------------------------------------------------------------------
/vignettes/outbound_rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AWSParallel/devel/vignettes/outbound_rules.png


--------------------------------------------------------------------------------
/vignettes/review-and-launch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AWSParallel/devel/vignettes/review-and-launch.png


--------------------------------------------------------------------------------
/vignettes/configure-instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AWSParallel/devel/vignettes/configure-instance.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | *.Rproj
6 | *.Rcheck
7 | inst/doc
8 | inst/script/extra_files
9 | 


--------------------------------------------------------------------------------
/vignettes/choose-instance-type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AWSParallel/devel/vignettes/choose-instance-type.png


--------------------------------------------------------------------------------
/inst/script/config_files/BatchJobs.R:
--------------------------------------------------------------------------------
1 | cluster.functions = makeClusterFunctionsSGE('/home/ubuntu/.sgetemplate/simple.tmpl')
2 | mail.start = "none"
3 | mail.done = "none"
4 | mail.error = "none"
5 | db.driver = "SQLite"
6 | db.options = list()
7 | debug = FALSE
8 | 


--------------------------------------------------------------------------------
/man/getAwsAmiId.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{getAwsAmiId}
 4 | \alias{getAwsAmiId}
 5 | \title{Get name of bioconductor release version AMI}
 6 | \usage{
 7 | getAwsAmiId()
 8 | }
 9 | \value{
10 | Bioconductor release version
11 | }
12 | 


--------------------------------------------------------------------------------
/man/awsProfile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R
 3 | \name{awsProfile}
 4 | \alias{awsProfile}
 5 | \title{Get the awsProfile being used}
 6 | \usage{
 7 | awsProfile(x)
 8 | }
 9 | \arguments{
10 | \item{AWSBatchJobsParam}{}
11 | }
12 | \description{
13 | Get the awsProfile being used
14 | }
15 | 


--------------------------------------------------------------------------------
/man/awsInstance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSSnowParam-class.R
 3 | \name{awsInstance}
 4 | \alias{awsInstance}
 5 | \title{Get AWS instance attributes in a list}
 6 | \usage{
 7 | awsInstance(x)
 8 | }
 9 | \arguments{
10 | \item{AWSSnowParam}{object}
11 | }
12 | \description{
13 | Get AWS instance attributes in a list
14 | }
15 | 


--------------------------------------------------------------------------------
/man/awsLaunchMasterOnEc2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSSnowParam-class.R
 3 | \name{awsLaunchMasterOnEc2}
 4 | \alias{awsLaunchMasterOnEc2}
 5 | \title{Launch master node on AWS EC2 if credentials are valid}
 6 | \usage{
 7 | awsLaunchMasterOnEc2(x)
 8 | }
 9 | \description{
10 | Launch master node on AWS EC2 if credentials are valid
11 | }
12 | 


--------------------------------------------------------------------------------
/man/awsParallelListClusters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{awsParallelListClusters}
 4 | \alias{awsParallelListClusters}
 5 | \title{Function to return the names of Clusters launched.}
 6 | \usage{
 7 | awsParallelListClusters()
 8 | }
 9 | \description{
10 | Function to return the names of Clusters launched.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/awsLaunchMaster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.awsLaunchMaster}
 4 | \alias{.awsLaunchMaster}
 5 | \title{Launch AWS master node}
 6 | \usage{
 7 | .awsLaunchMaster(x)
 8 | }
 9 | \value{
10 | list
11 | }
12 | \description{
13 | Launch AWS master node
14 | }
15 | \section{Functions}{
16 | \itemize{
17 | \item \code{.awsLaunchMaster}: Internal function to launch master node.
18 | }}
19 | 
20 | 


--------------------------------------------------------------------------------
/man/getAwsRequirements.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{getAwsRequirements}
 4 | \alias{getAwsRequirements}
 5 | \title{Get AWS security requirements}
 6 | \usage{
 7 | getAwsRequirements()
 8 | }
 9 | \value{
10 | list, containing VPC, subnet, security group information
11 | }
12 | \description{
13 | Security requirements to launch the EC2 instances into a VPC,
14 | subnet, and security group
15 | }
16 | 


--------------------------------------------------------------------------------
/man/awsSecurityGroup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSSnowParam-class.R
 3 | \name{awsSecurityGroup}
 4 | \alias{awsSecurityGroup}
 5 | \title{Get AWS Security group for the EC2 instance, which defines inbound
 6 | and outbound traffic.}
 7 | \usage{
 8 | awsSecurityGroup(x)
 9 | }
10 | \arguments{
11 | \item{AWSSnowParam}{}
12 | }
13 | \description{
14 | Get AWS Security group for the EC2 instance, which defines inbound
15 | and outbound traffic.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/awsCluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSSnowParam-class.R
 3 | \name{awsCluster}
 4 | \alias{awsCluster}
 5 | \title{Get the AWSSnowParam object currently launched. Only one
 6 | AWSSnowParam object can be started within one session.}
 7 | \usage{
 8 | awsCluster()
 9 | }
10 | \value{
11 | AWSSnowParam object
12 | }
13 | \description{
14 | Get the AWSSnowParam object currently launched. Only one
15 | AWSSnowParam object can be started within one session.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/awsDetectVpc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.awsDetectVpc}
 4 | \alias{.awsDetectVpc}
 5 | \title{Describe the VPC that needs to be used}
 6 | \usage{
 7 | .awsDetectVpc(cidr = "10.0.0.0/16")
 8 | }
 9 | \arguments{
10 | \item{cidr}{character, CIDR block for the VPC}
11 | }
12 | \value{
13 | vpc information
14 | }
15 | \section{Functions}{
16 | \itemize{
17 | \item \code{.awsDetectVpc}: the value for VPC needed to be used
18 | }}
19 | 
20 | 


--------------------------------------------------------------------------------
/man/awsWorkers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R,
 3 | %   R/AWSSnowParam-class.R
 4 | \name{awsWorkers}
 5 | \alias{awsWorkers}
 6 | \alias{awsWorkers}
 7 | \title{Get number of workers in the cluster}
 8 | \usage{
 9 | awsWorkers(x)
10 | 
11 | awsWorkers(x)
12 | }
13 | \arguments{
14 | \item{AWSBatchJobsParam}{object}
15 | 
16 | \item{AWSSnowParam}{object}
17 | }
18 | \description{
19 | Get number of workers in the cluster
20 | 
21 | Get number of workers in the cluster
22 | }
23 | 


--------------------------------------------------------------------------------
/man/awsDetectSubnetOnMaster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.awsDetectSubnetOnMaster}
 4 | \alias{.awsDetectSubnetOnMaster}
 5 | \title{Detect the subnet on the AWS master instance of the cluster}
 6 | \usage{
 7 | .awsDetectSubnetOnMaster()
 8 | }
 9 | \description{
10 | Detect the subnet on the AWS master instance of the cluster
11 | }
12 | \section{Functions}{
13 | \itemize{
14 | \item \code{.awsDetectSubnetOnMaster}: get the value of subnet on the master node
15 | }}
16 | 
17 | 


--------------------------------------------------------------------------------
/man/awsDetectMaster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.awsDetectMaster}
 4 | \alias{.awsDetectMaster}
 5 | \title{Function to detect if code is being run on EC2 master node}
 6 | \usage{
 7 | .awsDetectMaster()
 8 | }
 9 | \value{
10 | logical
11 | }
12 | \description{
13 | Function to detect if code is being run on EC2 master node
14 | }
15 | \section{Functions}{
16 | \itemize{
17 | \item \code{.awsDetectMaster}: Function to detect if current machine is on
18 | an AWS machine
19 | }}
20 | 
21 | 


--------------------------------------------------------------------------------
/man/awsCredentialsPath.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R,
 3 | %   R/AWSSnowParam-class.R
 4 | \name{awsCredentialsPath}
 5 | \alias{awsCredentialsPath}
 6 | \alias{awsCredentialsPath}
 7 | \title{Get path to AWS credentials}
 8 | \usage{
 9 | awsCredentialsPath(x)
10 | 
11 | awsCredentialsPath(x)
12 | }
13 | \arguments{
14 | \item{AWSBatchJobsParam}{object}
15 | 
16 | \item{AWSSnowParam}{object}
17 | }
18 | \description{
19 | Get path to AWS credentials
20 | 
21 | Get path to AWS credentials
22 | }
23 | 


--------------------------------------------------------------------------------
/man/awsSubnet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R,
 3 | %   R/AWSSnowParam-class.R
 4 | \name{awsSubnet}
 5 | \alias{awsSubnet}
 6 | \alias{awsSubnet}
 7 | \title{Get AWS Subnet within which the AWS EC2 instance was launched}
 8 | \usage{
 9 | awsSubnet(x)
10 | 
11 | awsSubnet(x)
12 | }
13 | \arguments{
14 | \item{AWSBatchJobsParam}{}
15 | 
16 | \item{AWSSnowParam}{}
17 | }
18 | \description{
19 | Get AWS Subnet within which the AWS EC2 instance was launched
20 | 
21 | Get AWS Subnet within which the AWS EC2 instance was launched
22 | }
23 | 


--------------------------------------------------------------------------------
/man/awsSshKeyPair.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R,
 3 | %   R/AWSSnowParam-class.R
 4 | \name{awsSshKeyPair}
 5 | \alias{awsSshKeyPair}
 6 | \alias{awsSshKeyPair}
 7 | \title{Get the SSH public key path associted to the AWS EC2 instance.}
 8 | \usage{
 9 | awsSshKeyPair(x)
10 | 
11 | awsSshKeyPair(x)
12 | }
13 | \arguments{
14 | \item{AWSBatchJobsParam}{}
15 | 
16 | \item{AWSSnowParam}{}
17 | }
18 | \description{
19 | Get the SSH public key path associted to the AWS EC2 instance.
20 | 
21 | Get the SSH public key path associted to the AWS EC2 instance.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/awsAmiId.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R,
 3 | %   R/AWSSnowParam-class.R
 4 | \name{awsAmiId}
 5 | \alias{awsAmiId}
 6 | \alias{awsAmiId}
 7 | \title{Get AWS AMI-ID of the launched instance. These need to be
 8 | Bioconductor configured AMI's.}
 9 | \usage{
10 | awsAmiId(x)
11 | 
12 | awsAmiId(x)
13 | }
14 | \arguments{
15 | \item{AWSBatchJobsParam}{}
16 | 
17 | \item{AWSSnowParam}{}
18 | }
19 | \description{
20 | Get AWS AMI-ID of the launched instance. These need to be
21 | Bioconductor configured AMI's.
22 | 
23 | Get AWS AMI-ID of the launched instance
24 | }
25 | 


--------------------------------------------------------------------------------
/man/bpsuspend.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R
 3 | \name{bpsuspend}
 4 | \alias{bpsuspend}
 5 | \title{Suspend an AWS EC2 cluster started using bpsetup}
 6 | \usage{
 7 | bpsuspend(x, clustername = "awsparallel")
 8 | }
 9 | \arguments{
10 | \item{x}{AWSBatchJobsParam object}
11 | 
12 | \item{clustername}{character value given to the cluster.}
13 | }
14 | \description{
15 | bpsuspend is required to 'stop' an AWS Cluster, if the user
16 | has an intention of re-using it at a later time. It does NOT
17 | terminate the cluster. The clustername should match the argument
18 | used in bpstart.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/awsDetectOrCreateSubnet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.awsDetectOrCreateSubnet}
 4 | \alias{.awsDetectOrCreateSubnet}
 5 | \title{Describe the Subnet that needs to be used}
 6 | \usage{
 7 | .awsDetectOrCreateSubnet(vpc)
 8 | }
 9 | \arguments{
10 | \item{vpc}{character subnet is created within the given VPC-ID}
11 | }
12 | \value{
13 | list subnet information
14 | }
15 | \description{
16 | Describe the Subnet that needs to be used
17 | }
18 | \section{Functions}{
19 | \itemize{
20 | \item \code{.awsDetectOrCreateSubnet}: AWS subnet information is detected or created
21 | from given VPC
22 | }}
23 | 
24 | 


--------------------------------------------------------------------------------
/man/awsDetectSecurityGroup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.awsDetectSecurityGroup}
 4 | \alias{.awsDetectSecurityGroup}
 5 | \title{Detect the security group which needs to be used}
 6 | \usage{
 7 | .awsDetectSecurityGroup(vpc)
 8 | }
 9 | \arguments{
10 | \item{vpc}{character Security Group is created within given VPC-ID}
11 | }
12 | \value{
13 | security group information
14 | }
15 | \description{
16 | Detect the security group which needs to be used
17 | }
18 | \section{Functions}{
19 | \itemize{
20 | \item \code{.awsDetectSecurityGroup}: Detect the security group of the AWS instance
21 | }}
22 | 
23 | 


--------------------------------------------------------------------------------
/man/transferFromCluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{transferFromCluster}
 4 | \alias{transferFromCluster}
 5 | \title{Allows transfer of files from master node to the host machine..}
 6 | \usage{
 7 | transferFromCluster(clustername, remotePath, localPath)
 8 | }
 9 | \arguments{
10 | \item{clustername}{character vector of the clustername}
11 | 
12 | \item{starcluster_config}{character vector of path to starcluster config}
13 | }
14 | \description{
15 | Follows this command
16 |     #  starcluster get mycluster --node mycluster-master
17 |     #                            --user myuser /remote/path /local/path
18 | }
19 | 


--------------------------------------------------------------------------------
/man/transferToCluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{transferToCluster}
 4 | \alias{transferToCluster}
 5 | \title{Allows transfer of files from Host machine, to master node on cluster.}
 6 | \usage{
 7 | transferToCluster(clustername, localPath, remotePath)
 8 | }
 9 | \arguments{
10 | \item{clustername}{character vector of the clustername}
11 | 
12 | \item{starcluster_config}{character vector of path to starcluster config}
13 | }
14 | \description{
15 | Follows this command
16 |     #  starcluster put mycluster --node mycluster-master
17 |     #                            --user myuser /local/path /remote/path
18 | }
19 | 


--------------------------------------------------------------------------------
/man/bpteardown.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R
 3 | \name{bpteardown}
 4 | \alias{bpteardown}
 5 | \title{Teardown permanently (TERMINATE) the AWS cluster.}
 6 | \usage{
 7 | bpteardown(x, clustername = "awsparallel")
 8 | }
 9 | \arguments{
10 | \item{x}{AWSBatchJobsParam object}
11 | 
12 | \item{clustername}{character value given to the cluster.}
13 | }
14 | \description{
15 | bpteardown is used to completely remove the AWS cluster from
16 | the users AWS account. The user cannot retreive any data or
17 | reuse the cluster once bpteardown is started.
18 | }
19 | \details{
20 | We recommend using bpteardown, once the data analysis is done.
21 | This will regulate AWS account costs, unless the user intends to
22 | to reuse the cluster. If there is a need to reuse the cluster see,
23 | '?bpsuspend'.
24 | }
25 | 


--------------------------------------------------------------------------------
/inst/script/config_files/sgetemplate/simple.tmpl:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # The name of the job, can be anything, simply used when displaying the list of running jobs
 4 | #$ -N <%= job.name %>
 5 | # Combining output/error messages into one file
 6 | #$ -j y
 7 | # Giving the name of the output log file
 8 | #$ -o <%= log.file %>
 9 | # One needs to tell the queue system to use the current directory as the working directory
10 | # Or else the script may fail as it will execute in your top level home directory /home/username
11 | #$ -cwd
12 | # use environment variables
13 | #$ -V
14 | # use correct queue (This variable is not being used)
15 | # -q <%= resources$queue %>
16 | # use job arrays
17 | #$ -t 1-<%= arrayjobs %>
18 | 
19 | # we merge R output with stdout from SGE, which gets then logged via -o option
20 | R CMD BATCH --no-save --no-restore "<%= rscript %>" /dev/stdout
21 | exit 0
22 | 


--------------------------------------------------------------------------------
/man/bpsetup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R
 3 | \name{bpsetup}
 4 | \alias{bpsetup}
 5 | \title{Setup a new AWS EC2 cluster}
 6 | \usage{
 7 | bpsetup(x, clustername = "awsparallel")
 8 | }
 9 | \arguments{
10 | \item{x}{AWSBatchJobsParam object}
11 | 
12 | \item{clustername}{character value given to the cluster.}
13 | }
14 | \description{
15 | The step bpsetup is required before using any of the conventional
16 | BiocParallel functions like bpstart, bpstop. It is used to setup
17 | or start a new or existing cluster on the user's AWS account. Once
18 | a cluster is up an running, it should be safely suspended or terminated
19 | using functionality like 'bpsuspend' and 'bpteardown'. NOTE: This function
20 | takes a while to process, depending on the number of workers needed
21 | it may take upto 4-5 minutes.
22 | }
23 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | .STARCLUSTER_CONFIG_PATH <- "~/.starcluster/config"
 2 | .AWS_CREDENTIALS_PATH <- "~/.aws/credentials"
 3 | 
 4 | .registerOnStartup <-
 5 |     function()
 6 | {
 7 |     test <- !file.exists(.STARCLUSTER_CONFIG_PATH)
 8 |     ## If starcluster_config does NOT exists
 9 |     if (test)
10 |         warning(
11 |             "'AWSBatchJobsParam()' registered without starcluster configuration; see ?AWSBatchJobsParam",
12 |             call.=FALSE
13 |         )
14 |     ## If starcluster_config does exist
15 |     res <- tryCatch({
16 |         aws <- AWSBatchJobsParam()
17 |         register(aws)
18 |     }, error = function(e) {
19 |         warning(
20 |             "'.onLoad()' failed to register 'AWSBatchJobsParam():",
21 |             "\n  ", conditionMessage(e),
22 |             call.=FALSE
23 |         )
24 |     })
25 | }
26 | 
27 | .onLoad <-
28 |     function(libname, pkgname)
29 | {
30 |     .registerOnStartup()
31 | }
32 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: AWSParallel
 2 | Type: Package
 3 | Title: AWS facilities for parallel evaluation
 4 | Version: 0.1.1
 5 | Authors@R: c(
 6 |     person(
 7 |         "Bioconductor", "Package Maintainer",
 8 |         email="maintainer@bioconductor.org",
 9 |         role=c("aut", "cre")
10 |     ),
11 |     person(
12 |         "Nitesh", "Turaga",
13 |         email="nitesh.turaga@roswellpark.org",
14 |         role="aut"
15 |     ),
16 |     person(
17 |         "Martin", "Morgan", role="aut"
18 |     ))
19 | Description: This package provides facilities to perform parallel
20 |     evaluation with AWS infrastructure. It connects with BiocParallel,
21 |     and works with Bioconductor objects.
22 | License: GPL-2 | GPL-3
23 | Depends: methods, utils, BiocParallel
24 | Imports:
25 |     aws.ec2,
26 |     aws.signature,
27 |     httr,
28 |     yaml,
29 |     ini
30 | Encoding: UTF-8
31 | LazyData: true
32 | RoxygenNote: 6.0.1
33 | Suggests: knitr,
34 |           rmarkdown,
35 |           BiocStyle,
36 |           testthat
37 | VignetteBuilder: knitr
38 | 


--------------------------------------------------------------------------------
/man/awsInstanceType.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R,
 3 | %   R/AWSSnowParam-class.R
 4 | \name{awsInstanceType}
 5 | \alias{awsInstanceType}
 6 | \alias{awsInstanceType}
 7 | \title{Get AWS Instance type.}
 8 | \usage{
 9 | awsInstanceType(x)
10 | 
11 | awsInstanceType(x)
12 | }
13 | \arguments{
14 | \item{AWSBatchJobsParam}{object}
15 | 
16 | \item{AWSSnowParam}{object}
17 | }
18 | \value{
19 | character
20 | 
21 | character
22 | }
23 | \description{
24 | The possible instance types are listed in the
25 | document: https://aws.amazon.com/ec2/instance-types/.  The
26 | Bioconductor AMI's have been built using an m4.xlarge instance
27 | type.  Large computations are best supported on this type of
28 | instance.
29 | 
30 | The possible instance types are listed in the
31 | document: https://aws.amazon.com/ec2/instance-types/.  The
32 | Bioconductor AMI's have been built using an m4.xlarge instance
33 | type.  Large computations are best supported on this type of
34 | instance.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/config_starcluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{.config_starcluster}
 4 | \alias{.config_starcluster}
 5 | \title{Function to fill the starcluster configuration}
 6 | \usage{
 7 | .config_starcluster(workers, awsCredentialsPath = "~/.aws/credentials",
 8 |   awsInstanceType, awsSubnet = NA_character_, awsAmiId, awsSshKeyPair,
 9 |   awsProfile = "default", user = "ubuntu", cidr_ip = NA_character_)
10 | }
11 | \arguments{
12 | \item{workers}{integer vector}
13 | 
14 | \item{awsCredentialsPath}{character vector}
15 | 
16 | \item{awsInstanceType}{character vector}
17 | 
18 | \item{awsSubnet}{character vector}
19 | 
20 | \item{awsAmiId}{character vector}
21 | 
22 | \item{awsSshKeyPair}{character vector}
23 | 
24 | \item{user}{character vector}
25 | 
26 | \item{cidr_ip}{character vector}
27 | }
28 | \description{
29 | Function to fill the starcluster configuration
30 | }
31 | \section{Functions}{
32 | \itemize{
33 | \item \code{.config_starcluster}: starcluster with the values given in the function
34 | }}
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test_AWSSnowParam-class.R:
--------------------------------------------------------------------------------
 1 | context("AWSSnowParam")
 2 | 
 3 | ## Test to check if aws credtials exist 
 4 | test_that("AWSSnowParam constructor fails without awsCredentials", {
 5 |     expect_true(file.exists("~/.aws/credentials"))
 6 | })
 7 | 
 8 | test_that("AWSSnowParam works with minimum arguments", {
 9 |     aws <- AWSSnowParam(
10 |         workers=2,
11 |         awsInstanceType="t2.micro",
12 |         awsSubnet = "subnet-d66a05ec",
13 |         awsSecurityGroup = "sg-748dcd07",
14 |         awsAmiId="ami-2951fa53",
15 |         awsSshKeyPair = "mykey",
16 |         bplib="/home/ubuntu/R/x86_64-pc-linux-gnu-library/3.4/BiocParallel",
17 |         awsCredentialsPath= .AWS_CREDENTIALS_PATH
18 |     )
19 |     ## Test accessors and see if identical
20 |     expect_identical(bpworkers(aws), 2L)
21 |     expect_identical(awsSubnet(aws), "subnet-d66a05ec")
22 |     expect_identical(awsSecurityGroup(aws), "sg-748dcd07")
23 |     expect_identical(awsAmiId(aws), "ami-2951fa53")
24 | })
25 | 
26 | ## Test case to check if setting `bpworkers<-` works
27 | 
28 | 
29 | ## Test case to check if there is always only 1 cluster available through snow
30 | 
31 | 
32 | ## Test case to check the AWSSnowParam null constructor
33 | 


--------------------------------------------------------------------------------
/man/AWSBatchJobsParam-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R
 3 | \docType{class}
 4 | \name{AWSBatchJobsParam-class}
 5 | \alias{AWSBatchJobsParam-class}
 6 | \alias{.AWSBatchJobsParam}
 7 | \title{Reference class .AWSBatchJobsParam allows use AWS EC2 as Clusters
 8 | 
 9 | The .AWSBatchJobsParam class extends the BatchJobsParam class to
10 | allow usage of AWS EC2-instances for parallel computation. The
11 | methods follow a style similar to that of BiocParallelParams, with
12 | bpstart, bpstop, bpisup, bplapply being the important one.The
13 | behaviour of these functions is described in the man pages.}
14 | \section{Fields}{
15 | 
16 | \describe{
17 | \item{\code{awsCredentialsPath}}{Path to AWS credentials, default value is
18 | `~/.aws/credentials`}
19 | 
20 | \item{\code{awsInstanceType}}{Type of AWS EC2-instance, eg. t2.micro}
21 | 
22 | \item{\code{awsSubnet}}{AWS EC2-instance subnet, within a certain VPC}
23 | 
24 | \item{\code{awsAmiId}}{AMI(amazon machine image) ID for the
25 | Bioconductor-starcluster image. Correct ID is needed.}
26 | 
27 | \item{\code{awsSshKeyPair}}{SSH key pair, to associate with your AWS
28 | EC2-instance}
29 | }}
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/man/AWSSnowParam-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSSnowParam-class.R
 3 | \docType{class}
 4 | \name{AWSSnowParam-class}
 5 | \alias{AWSSnowParam-class}
 6 | \alias{.AWSSnowParam}
 7 | \title{Reference class .AWSSnowParam that allows usage of AWS EC2-instances}
 8 | \description{
 9 | The .AWSSnowParam class extends the SnowParam class to allow usage
10 | of AWS EC2-instances for parallel computation.  The methods follow
11 | a style similar to that of BiocParallelParams, with bpstart,
12 | bpstop, bpisup, bplapply being the important one.
13 | }
14 | \section{Fields}{
15 | 
16 | \describe{
17 | \item{\code{awsCredentialsPath}}{Path to AWS credentials, default value is
18 | `~/.aws/credentials`}
19 | 
20 | \item{\code{awsInstanceType}}{Type of AWS EC2-instance, eg. t2.micro}
21 | 
22 | \item{\code{awsSubnet}}{AWS EC2-instance subnet, within a certain VPC}
23 | 
24 | \item{\code{awsSecurityGroup}}{Secutiry group which assigns inbound and
25 | outbound traffic at the instance level}
26 | 
27 | \item{\code{awsInstance}}{A list, created holding all the information of
28 | the AWS instance}
29 | 
30 | \item{\code{awsAmiId}}{AMI(amazon machine image) ID for the
31 | Bioconductor-release version}
32 | 
33 | \item{\code{awsSshKeyPair}}{SSH key pair, to associate with your AWS
34 | EC2-instance}
35 | }}
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(AWSBatchJobsParam)
 4 | export(AWSSnowParam)
 5 | export(awsAmiId)
 6 | export(awsCluster)
 7 | export(awsCredentialsPath)
 8 | export(awsInstance)
 9 | export(awsInstanceStatus)
10 | export(awsInstanceType)
11 | export(awsParallelListClusters)
12 | export(awsProfile)
13 | export(awsSecurityGroup)
14 | export(awsSshKeyPair)
15 | export(awsSubnet)
16 | export(awsWorkers)
17 | export(bpsetup)
18 | export(bpsuspend)
19 | export(bpteardown)
20 | export(getAwsAmiId)
21 | export(getAwsRequirements)
22 | export(transferFromCluster)
23 | export(transferToCluster)
24 | exportClasses(AWSBatchJobsParam)
25 | exportClasses(AWSSnowParam)
26 | exportMethods(bpstart)
27 | exportMethods(bpstop)
28 | importClassesFrom(BiocParallel,BatchJobsParam)
29 | importClassesFrom(BiocParallel,BiocParallelParam)
30 | importClassesFrom(BiocParallel,SnowParam)
31 | importFrom(BiocParallel,"bpworkers<-")
32 | importFrom(BiocParallel,BatchJobsParam)
33 | importFrom(BiocParallel,bpstart)
34 | importFrom(BiocParallel,bpstop)
35 | importFrom(aws.ec2,authorize_ingress)
36 | importFrom(aws.ec2,create_sgroup)
37 | importFrom(aws.ec2,create_subnet)
38 | importFrom(aws.ec2,create_vpc)
39 | importFrom(aws.ec2,describe_instances)
40 | importFrom(aws.ec2,describe_sgroups)
41 | importFrom(aws.ec2,describe_subnets)
42 | importFrom(aws.ec2,describe_vpcs)
43 | importFrom(aws.ec2,instance_status)
44 | importFrom(aws.ec2,my_ip)
45 | importFrom(aws.ec2,run_instances)
46 | importFrom(aws.ec2,terminate_instances)
47 | importFrom(aws.signature,use_credentials)
48 | importFrom(httr,GET)
49 | importFrom(httr,content)
50 | importFrom(httr,stop_for_status)
51 | importFrom(ini,read.ini)
52 | importFrom(ini,write.ini)
53 | importFrom(methods,callNextMethod)
54 | importFrom(methods,new)
55 | importFrom(methods,validObject)
56 | importFrom(yaml,yaml.load)
57 | 


--------------------------------------------------------------------------------
/man/AWSBatchJobsParam.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSBatchJobsParam-class.R
 3 | \name{AWSBatchJobsParam}
 4 | \alias{AWSBatchJobsParam}
 5 | \title{AWSBatchJobsParam function to start an AWS EC2-instance cluster}
 6 | \usage{
 7 | AWSBatchJobsParam(workers = NULL,
 8 |   starclusterConfigPath = .STARCLUSTER_CONFIG_PATH,
 9 |   startclusterClusterId = "smallcluster", awsInstanceType = NA_character_,
10 |   awsSubnet = NA_character_, awsAmiId = NA_character_,
11 |   awsSshKeyPair = NA_character_, awsCredentialsPath = "~/.aws/credentials",
12 |   awsProfile = "default", ...)
13 | }
14 | \arguments{
15 | \item{workers}{Numeric, number of workers to launch in the cluster}
16 | 
17 | \item{awsInstanceType}{character, Type of AWS EC2-instance,
18 | eg. t2.micro}
19 | 
20 | \item{awsSubnet}{character, AWS EC2-instance subnet, within a
21 | certain VPC}
22 | 
23 | \item{awsAmiId}{character, AMI(amazon machine image) ID for the
24 | Bioconductor-release version}
25 | 
26 | \item{awsSshKeyPair}{character, SSH key pair, to associate with
27 | your AWS EC2-instance}
28 | 
29 | \item{awsCredentialsPath}{character, Path to AWS credentials,
30 | default value is `~/.aws/credentials`}
31 | 
32 | \item{awsProfile}{character, indicates what profile to use while
33 | using AWS credentials}
34 | 
35 | \item{...}{Additional arguments, used to initialize BatchJobsParam.}
36 | 
37 | \item{verbose}{logical, gives a verbose output of SSH connection
38 | attempt, default is FALSE.}
39 | }
40 | \value{
41 | AWSBatchJobsparam object
42 | }
43 | \description{
44 | This function starts a cluster of AWS EC2-instances to allow
45 | parallel computation of R objects using BatchJobs on SGE, and works
46 | with BiocParallel, to allow computation with R/Bioconductor objects.
47 | }
48 | \examples{
49 | \dontrun{
50 |         ## Minimal example
51 |         aws <- AWSBatchJobsParam(
52 |                    workers = 2
53 |                    awsCredentialsPath = "~/.aws/credentials"
54 |                    awsInstanceType = "t2.micro"
55 |                    awsSubnet = "subnet-d66a05ec"
56 |                    awsAmiId = "ami-0454187e"
57 |                    awsSshKeyPair = "mykey"
58 |                )
59 | }
60 | }
61 | 


--------------------------------------------------------------------------------
/inst/script/snowparam-demo.R:
--------------------------------------------------------------------------------
 1 | ###########################################
 2 | ## Trial 1: with Security Group and Subnet
 3 | ###########################################
 4 | library(AWSParallel)
 5 | 
 6 | ## Bioc-devel 
 7 | image <-  "ami-2951fa53"
 8 | 
 9 | ## Include Number of workers as 2
10 | workers = 2
11 | ## Set the AWS SSH key pair for your machine
12 | awsSshKeyPair = getOption("aws_ssh_key_pair")
13 | 
14 | sg <- "sg-748dcd07"
15 | subnet <- "subnet-d66a05ec"
16 | 
17 | creds_path = "~/.aws/credentials"
18 | 
19 | ## Create AWS instance
20 | aws <- AWSSnowParam(
21 |     workers=workers,
22 |     awsInstanceType="t2.micro",
23 |     awsSubnet = subnet,
24 |     awsSecurityGroup = sg,
25 |     awsAmiId= image,
26 |     awsSshKeyPair = awsSshKeyPair,
27 |     bplib="/home/ubuntu/R/x86_64-pc-linux-gnu-library/3.4/BiocParallel",
28 |     awsCredentialsPath=creds_path
29 |     )
30 | 
31 | aws
32 | ## Check if instance is up,
33 | awsInstanceStatus(aws)
34 | 
35 | ## Start instance
36 | bpstart(aws)
37 | 
38 | ## Return cluster which was started
39 | awsCluster()
40 | 
41 | ## Check is instance is up
42 | awsInstanceStatus(aws)
43 | 
44 | ## start an AWSParam job
45 | bplapply(1:4, function(i) system("hostname", intern=TRUE), BPPARAM=aws)
46 | 
47 | ## Stop aws instance
48 | bpstop(aws)
49 | 
50 | ##############################################
51 | ## Trial 2: Without security group and subnet,
52 | ##          this allows AWSParallel to create
53 | ##          as needed.
54 | #############################################
55 | 
56 | library(AWSParallel)
57 | workers = 2
58 | image <-  "ami-9fe2fee4"
59 | awsSshKeyPair = getOption("aws_ssh_key_pair")
60 | 
61 | aws <- AWSSnowParam(
62 |     workers=workers,
63 |     awsInstanceType="t2.micro",
64 |     awsAmiId= image,
65 |     awsSshKeyPair = awsSshKeyPair,
66 |     bplib="/home/ubuntu/R/x86_64-pc-linux-gnu-library/3.4/BiocParallel"
67 | )
68 | 
69 | 
70 | aws
71 | ## Check if instance is up,
72 | awsInstanceStatus(aws)
73 | 
74 | ## Start instance
75 | bpstart(aws)
76 | 
77 | ## Return cluster which was started
78 | awsCluster()
79 | 
80 | ## Check is instance is up
81 | awsInstanceStatus(aws)
82 | 
83 | ## start an AWSParam job
84 | bplapply(1:4, function(i) system("hostname", intern=TRUE), BPPARAM=aws)
85 | 
86 | ## Stop aws instance
87 | bpstop(aws)
88 | 


--------------------------------------------------------------------------------
/inst/script/batchjobs-demo.R:
--------------------------------------------------------------------------------
 1 | ############################################
 2 | ### STEP 1
 3 | ### HOST MACHINE
 4 | ###########################################
 5 | library(AWSParallel)
 6 | devtools::load_all()
 7 | ## NOTE: Make sure ~/.aws/credentials are available
 8 | 
 9 | ## Include Number of workers as 2
10 | workers = 2
11 | credentialsPath = "~/.aws/credentials"
12 | instanceType = "t2.micro"
13 | subnet <- "subnet-d66a05ec"
14 | ## Bioc-devel with starcluster
15 | image <- "ami-18c0f562"
16 | ## If you don't have a key you use, just create a new one
17 | ## for AWSParallel, and use that throughout.
18 | keyPair <- "mykey"
19 | 
20 | ## Construct AWSBatchJobsParam class
21 | aws <- AWSBatchJobsParam(workers = workers,
22 |                   awsCredentialsPath = credentialsPath,
23 |                   awsInstanceType = instanceType,
24 |                   awsSubnet = subnet,
25 |                   awsAmiId = image,
26 |                   awsSshKeyPair = keyPair,
27 |                   awsProfile="default")
28 | 
29 | 
30 | ## Show object aws
31 | aws
32 | 
33 | ## Setup AWS cluster (takes a few mins)
34 | bpsetup(aws)
35 | 
36 | ## SUSPEND WORKS BUT STARCLUSTER HAS AN ISSUE WITH RESTART, 
37 | ## Suspend AWS cluster 
38 | bpsuspend(aws)
39 | 
40 | ## Manually have to start AWS Nodes using CloudyR and finding out the ClusterID
41 | ## bpresume(aws)
42 | 
43 | ## Terminate or teardown AWS cluster
44 | bpteardown(aws)
45 | 
46 | #########################################
47 | ## STEP 2
48 | ## LOG INTO MASTER NODE
49 | #########################################
50 | 
51 | starcluster sshmaster -u ubuntu awsparallel
52 | 
53 | #########################################
54 | ## STEP 3
55 | ## USE CLUSTER on MASTER NODE
56 | #########################################
57 | 
58 | git pull
59 | git checkout AWSBatchParam-class
60 | ## Start R
61 | 
62 | ## Load AWSParallel
63 | library(AWSParallel)
64 | devtools::load_all()
65 | 
66 | aws  <- registered()[[1]]
67 | FUN <- function(i) system("hostname", intern=TRUE)
68 | xx <- bplapply(1:100, FUN, BPPARAM=aws)
69 | 
70 | table(unlist(xx))
71 | 
72 | # bpstart(aws)
73 | 
74 | ## Return cluster which was started
75 | awsListCluster()
76 | #
77 | # ## Check is instance is up
78 | # awsClusterStatus(aws)
79 | #
80 | # ## start an AWSParam job
81 | # bplapply(1:4, function(i) system("hostname", intern=TRUE), BPPARAM=aws)
82 | #
83 | # ## Stop aws instance
84 | # bpstop(aws)
85 | 
86 | 


--------------------------------------------------------------------------------
/man/AWSSnowParam.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AWSSnowParam-class.R
 3 | \name{AWSSnowParam}
 4 | \alias{AWSSnowParam}
 5 | \title{AWSSnowParam function to start an AWS EC2-instance cluster}
 6 | \usage{
 7 | AWSSnowParam(workers = 2, awsCredentialsPath = NA_character_,
 8 |   awsInstanceType = NA_character_, awsSubnet = NA, awsSecurityGroup = NA,
 9 |   awsAmiId = NA_character_, awsSshKeyPair = NA_character_,
10 |   awsProfile = "default", user = "ubuntu", rhome = "/usr/local/lib/R",
11 |   bplib = "/home/ubuntu/R/x86_64-pc-linux-gnu-library/3.4/BiocParallel",
12 |   rscript = "/usr/local/bin/Rscript", outfile = "/home/ubuntu/snow.log",
13 |   verbose = FALSE)
14 | }
15 | \arguments{
16 | \item{workers}{Numeric, number of workers to launch in the cluster}
17 | 
18 | \item{awsCredentialsPath}{character, Path to AWS credentials,
19 | default value is `~/.aws/credentials`}
20 | 
21 | \item{awsInstanceType}{character, Type of AWS EC2-instance,
22 | eg. t2.micro}
23 | 
24 | \item{awsSubnet}{character, AWS EC2-instance subnet, within a
25 | certain VPC}
26 | 
27 | \item{awsSecurityGroup}{character, Security group which assigns
28 |     inbound and outbound traffic at the instance level. The
29 |     security group needs to be
30 | *Inbound rules*
31 | Protocol type   Port number   Source IP
32 |           TCP      22 (SSH)   0.0.0.0/0
33 |           TCP   11000-11999   CIDR-Block same as VPC
34 | *Outbound rules*
35 | Protocol type   Port number   Destination IP
36 |           All           All   0.0.0.0/0}
37 | 
38 | \item{awsAmiId}{character, AMI(amazon machine image) ID for the
39 | Bioconductor-release version}
40 | 
41 | \item{awsSshKeyPair}{character, SSH key pair, to associate with
42 | your AWS EC2-instance}
43 | 
44 | \item{awsProfile}{character, indicates what profile to use while
45 | using AWS credentials}
46 | 
47 | \item{verbose}{logical, gives a verbose output of SSH
48 | connection attempt, default is FALSE.}
49 | }
50 | \value{
51 | AWSSnowParam object
52 | }
53 | \description{
54 | This function starts a cluster of AWS EC2-instances to allow
55 | parallel computation of R objects, and works with BiocParallel, to
56 | allow computation with R/Bioconductor objects
57 | }
58 | \examples{
59 | \dontrun{
60 |         ## Minimal example
61 |         aws <- AWSSnowParam(workers = 1,
62 |                awsInstanceType="t2.micro",
63 |                awsAmiId= image,
64 |                awsSshKeyPair = "~/.ssh/<my_aws_key_pair>.pub")
65 | }
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/testthat/test_AWSBatchJobsParam-class.R:
--------------------------------------------------------------------------------
  1 | context("AWSBatchJobsParam")
  2 | 
  3 | starcluster_template <- "[global]
  4 | DEFAULT_TEMPLATE = smallcluster
  5 | 
  6 | [aws info]
  7 | AWS_ACCESS_KEY_ID = # your ACCESS KEY
  8 | AWS_SECRET_ACCESS_KEY = # your SECRET ACCESS KEY
  9 | AWS_USER_ID = # your USER ID
 10 | 
 11 | [key mykey]
 12 | KEY_LOCATION = ~/.ssh/mykey.rsa
 13 | 
 14 | [cluster smallcluster]
 15 | KEYNAME = mykey
 16 | CLUSTER_SIZE = 2
 17 | CLUSTER_USER = ubuntu
 18 | CLUSTER_SHELL = bash
 19 | DNS_PREFIX = True
 20 | NODE_IMAGE_ID = ami-0454187e
 21 | NODE_INSTANCE_TYPE = t2.micro
 22 | permissions = http
 23 | SUBNET_IDS = subnet-d66a05ec
 24 | 
 25 | [permission http]
 26 | IP_PROTOCOL = tcp
 27 | FROM_PORT = 80
 28 | TO_PORT = 80
 29 | CIDR_IP = 172.30.0.0/16"
 30 | 
 31 | starclusterConfigPath <- tempfile()
 32 | writeLines(starcluster_template, starclusterConfigPath)
 33 | 
 34 | ## AWSBatchJobsParam constructor fails
 35 | test_that("AWSBatchJobsParam constructor fails without starclusterConfig", {
 36 |     expect_error(
 37 |         AWSBatchJobsParam(
 38 |             starclusterConfigPath = starclusterConfigPath,
 39 |             awsInstanceType = "t2.micro"
 40 |         ),
 41 |         "'AWSBatchJobsParam\\(\\)' requires either.*"
 42 |     )
 43 | })
 44 | 
 45 | test_that("AWSBatchJobsParam null constructor works", {
 46 |     aws <- AWSBatchJobsParam()
 47 |     expect_true(validObject(aws))
 48 | 
 49 |     work.dir <- aws$reg.pars$work.dir
 50 |     expect_identical(work.dir, getwd())
 51 | })
 52 | 
 53 | test_that("AWSBatchJobsParam initializes BatchJobsParams", {
 54 |     dir <- tempfile()
 55 |     aws <- AWSBatchJobsParam(work.dir=dir)
 56 |     work.dir <- aws$reg.pars$work.dir
 57 |     expect_identical(work.dir, dir)
 58 | })
 59 | 
 60 | test_that("AWSBatchJobsParam overrides BatchJobsParams", {
 61 |     aws <- AWSBatchJobsParam(workers = 6)
 62 |     expect_identical(bpnworkers(aws), 6L)
 63 | })
 64 | 
 65 | test_that("AWSBatchJobsParam accepts starclusterConfigPath", {
 66 |     aws <- AWSBatchJobsParam(starclusterConfigPath = starclusterConfigPath)
 67 |     expect_identical(bpnworkers(aws), 2L)
 68 |     expect_identical(awsInstanceType(aws), "t2.micro")
 69 | })
 70 | 
 71 | ## This test case works only if .starcluster/config is missing from the machine
 72 | ## Expect it to fail if the tests are run with
 73 | test_that("AWSBatchJobsParam .onLoad() works", {
 74 |     .registerOnStartup <- AWSParallel:::.registerOnStartup
 75 | 
 76 |     expect_warning(
 77 |         .registerOnStartup(),
 78 |         "'AWSBatchJobsParam\\(\\)' registered without starcluster conf"
 79 |     )
 80 | })
 81 | 
 82 | 
 83 | ## Check if the values instantiated with .onLoad are empty values
 84 | test_that("AWSBatchJobsParam .onLoad works, aws values nchar==0", {
 85 |     .registerOnStartup <- AWSParallel:::.registerOnStartup
 86 | 
 87 |     expect_warning(
 88 |         .registerOnStartup(),
 89 |         "'AWSBatchJobsParam\\(\\)' registered without starcluster conf"
 90 |     )
 91 |     aws <- registered()[[1]]
 92 |     expect_identical(awsInstanceType(aws), "")
 93 |     expect_identical(awsAmiId(aws), "")
 94 |     expect_identical(awsSshKeyPair(aws), "")
 95 |     expect_identical(awsSubnet(aws), "")
 96 |     expect_identical(awsProfile(aws), "default")
 97 |     ## awsCredentialsPath goes to default
 98 |     expect_identical(awsCredentialsPath(aws), .AWS_CREDENTIALS_PATH)
 99 | })
100 | 


--------------------------------------------------------------------------------
/R/helper_functions.R:
--------------------------------------------------------------------------------
  1 | #' Get name of bioconductor release version AMI
  2 | #'
  3 | #' @return Bioconductor release version
  4 | #' @importFrom httr GET
  5 | #' @importFrom httr content
  6 | #' @importFrom yaml yaml.load
  7 | #' @importFrom httr stop_for_status
  8 | #' @export
  9 | getAwsAmiId <-
 10 |     function()
 11 |     {
 12 |         res <- GET("https://www.bioconductor.org/config.yaml")
 13 |         stop_for_status(res)
 14 |         content <- content(res, type="text", encoding="UTF-8")
 15 |         txt <- yaml.load(content)
 16 |         release_version <- sub(".", "_", txt$release_version, fixed=TRUE)
 17 |         txt$ami_ids[[paste0("bioc",release_version)]]
 18 |     }
 19 | 
 20 | 
 21 | #' Describe the VPC that needs to be used
 22 | #'
 23 | #' @param cidr character, CIDR block for the VPC
 24 | #' @return vpc information
 25 | #' @importFrom aws.ec2 create_vpc
 26 | #' @importFrom aws.ec2 describe_vpcs
 27 | .awsDetectVpc <-
 28 |     function(cidr = "10.0.0.0/16")
 29 |     {
 30 |         ## TODO: Fix this
 31 |         ## Needs IF Statment
 32 |         vpcs <- describe_vpcs()
 33 | 
 34 |         if (length(vpcs) >= 1) {
 35 |             vpc <- vpcs[[1]]
 36 |         } else {
 37 |             stop("Please create a VPC on your AWS account")
 38 |         }
 39 |         vpc
 40 |     }
 41 | 
 42 | 
 43 | #' Detect the subnet on the AWS master instance of the cluster
 44 | #'
 45 | #' @importFrom aws.ec2 describe_instances
 46 | .awsDetectSubnetOnMaster <-
 47 |     function()
 48 |     {
 49 |         ## Get list of all instances on AWS account
 50 |         instances <- describe_instances()
 51 |         ## Get hostname of local machine code is being run on
 52 |         hostname <- system2("hostname", stdout=TRUE)
 53 |         hostname <- gsub("-",".", sub("ip-","", hostname))
 54 |         subnet <- NA_character_
 55 |         for (i in seq_along(instances)) {
 56 |             instancesSet = instances[[i]][["instancesSet"]]
 57 |             for (j in seq_along(instancesSet)) {
 58 |                 privateIpAddress <- instancesSet[[j]][["privateIpAddress"]]
 59 |                 if (privateIpAddress == hostname)
 60 |                     subnet <- instancesSet[[j]][["subnetId"]]
 61 |             }
 62 |         }
 63 |         subnet
 64 |     }
 65 | 
 66 | 
 67 | 
 68 | #' Describe the Subnet that needs to be used
 69 | #'
 70 | #' @param vpc character subnet is created within the given VPC-ID
 71 | #' @return subnet information
 72 | #' @importFrom aws.ec2 create_subnet
 73 | #' @importFrom aws.ec2 describe_subnets
 74 | .awsDetectOrCreateSubnet <-
 75 |     function(vpc)
 76 |     {
 77 |         awsSubnet <- .awsDetectSubnetOnMaster()
 78 |         if (is.na(awsSubnet)) {
 79 |             ## If no subnet is available in that VPC,
 80 |             ## create one
 81 |             awsSubnet <- create_subnet(vpc, cidr=vpc$cidrBlock)
 82 |             awsSubnet <- awsSubnet$subnet$subnetId
 83 |         }
 84 |         awsSubnet
 85 |     }
 86 | 
 87 | #' Detect the security group which needs to be used
 88 | #'
 89 | #' @param vpc character Security Group is created within given VPC-ID
 90 | #' @return security group information
 91 | #' @importFrom aws.ec2 create_sgroup
 92 | #' @importFrom aws.ec2 authorize_ingress
 93 | #' @importFrom aws.ec2 describe_sgroups
 94 | .awsDetectSecurityGroup <-
 95 |     function(vpc)
 96 |     {
 97 |         ## TODO: add error checking to see if sg exists
 98 |         sgroups <- describe_sgroups()
 99 |         group_names <- vapply(sgroups, `[[`, character(1), "groupName")
100 |         idx <- grep("AWSParallel_sgroup", group_names)
101 |         if (length(idx) !=0 ) {
102 |             sg <- sgroups[[idx]]
103 |         } else {
104 |             ## create sgroup
105 |             sg <- create_sgroup(
106 |                 name="AWSParallel_sgroup",
107 |                 description="Security group for AWSParallel",
108 |                 vpc = vpc
109 |             )
110 |             ## Add TCP port range between 11000 to 11999
111 |             authorize_ingress(
112 |                 sg, port=c(11000,11999), protocol="tcp", cidr=vpc$cidrBlock
113 |             )
114 |             ## Add SSH 22 port
115 |             authorize_ingress(
116 |                 sg, port=22, protocol="tcp", cidr="0.0.0.0/0"
117 |             )
118 |         }
119 |         sg
120 |     }
121 | 
122 | #' Get AWS security requirements
123 | #'
124 | #' Security requirements to launch the EC2 instances into a VPC,
125 | #' subnet, and security group
126 | #' @return list, containing VPC, subnet, security group information
127 | #' @export
128 | getAwsRequirements <-
129 |     function()
130 |     {
131 |         ## If user passes in CIDR block, does it get passed in?
132 |         vpc <- .awsDetectVpc()
133 |         subnet <- .awsDetectOrCreateSubnet(vpc)
134 |         sg <- .awsDetectSecurityGroup(vpc)
135 |         ## Return a named list of vpc, subnet and security group
136 |         list(vpc=vpc, subnet=subnet, sgroup=sg)
137 |     }
138 | 
139 | 
140 | #' Function to detect if code is being run on EC2 master node
141 | .awsDetectMaster <-
142 |     function()
143 |     {
144 |         ## Get list of all instances on AWS account
145 |         instances <- describe_instances()
146 |         ## Get hostname of local machine code is being run on
147 |         hostname <- system2("hostname", stdout=TRUE)
148 |         hostname <- gsub("-",".", sub("ip-","", hostname))
149 |         bool <- FALSE
150 |         for (i in seq_along(instances)) {
151 |             instancesSet = instances[[i]][["instancesSet"]]
152 |             for (j in seq_along(instancesSet)) {
153 |                 privateIpAddress <- instancesSet[[j]][["privateIpAddress"]]
154 |                 if (privateIpAddress == hostname) {
155 |                     bool <- TRUE
156 |                 }
157 |             }
158 |         }
159 |         bool
160 |     }
161 | 
162 | 
163 | #' @importFrom aws.ec2 run_instances
164 | .awsLaunchMaster <-
165 |     function(x)
166 |     {
167 |         reqs <- getAwsRequirements()
168 |         master_instance <- run_instances(image=awsAmiId(x),
169 |                                          type=awsInstanceType(x),
170 |                                          min=awsWorkers(x),
171 |                                          subnet=reqs$subnet,
172 |                                          sgroup=reqs$sgroup$groupId
173 |         )
174 |         master_instance
175 |     }
176 | 
177 | 
178 | #' Configure starcluster with the
179 | #' @importFrom ini read.ini
180 | #' @importFrom ini write.ini
181 | .config_starcluster <-
182 |     function(workers,
183 |              awsCredentialsPath="~/.aws/credentials",
184 |              awsInstanceType,
185 |              awsSubnet=NA_character_,
186 |              awsAmiId,
187 |              awsSshKeyPair,
188 |              awsProfile="default",
189 |              user="ubuntu",
190 |              cidr_ip=NA_character_
191 |              )
192 | {
193 |     starcluster_config <- system.file("extdata",
194 |                                       "config.ini",
195 |                                       package = "AWSParallel")
196 |     ## Read starcluster config
197 |     config <- read.ini(starcluster_config)
198 | 
199 |     ## Fill starcluster config, Process AWS credentials
200 |     aws_credentials <- read.ini(awsCredentialsPath)
201 |     config[["aws info"]][["AWS_ACCESS_KEY_ID"]] <-
202 |         aws_credentials[[awsProfile]][["aws_access_key_id"]]
203 |     config[["aws info"]][["AWS_SECRET_ACCESS_KEY"]] <-
204 |         aws_credentials[[awsProfile]][["aws_secret_access_key"]]
205 | 
206 |     ## Process AWS instance configuration
207 |     config[["cluster smallcluster"]][["SUBNET_IDS"]] <- awsSubnet
208 |     config[["cluster smallcluster"]][["CLUSTER_SIZE"]] <- workers
209 |     config[["cluster smallcluster"]][["CLUSTER_USER"]] <- user
210 |     config[["cluster smallcluster"]][["KEYNAME"]] <- awsSshKeyPair
211 |     config[["cluster smallcluster"]][["NODE_INSTANCE_TYPE"]] <- awsInstanceType
212 |     config[["cluster smallcluster"]][["NODE_IMAGE_ID"]] <- awsAmiId
213 | 
214 |     ## Write CIDR block
215 |     config[["permission http"]][["CIDR_IP"]] <- cidr_ip
216 | 
217 |     ## Write config file in the correct path.
218 |     write.ini(config, "~/.starcluster/config")
219 | }
220 | 
221 | ## FIXME: AMI should be listed on bioconductor.org/config.yaml
222 | #' This function returns the AMI ID listed on bioconductor
223 | #' https://www.bioconductor.org/config.yaml
224 | getStarclusterAmiId <-
225 |     function()
226 | {
227 |     "ami-0454187e"
228 | }
229 | 
230 | 
231 | #' Allows transfer of files from Host machine, to master node on cluster.
232 | #'
233 | #' Follows this command
234 | #'     #  starcluster put mycluster --node mycluster-master
235 | #'     #                            --user myuser /local/path /remote/path
236 | #'
237 | #' @param clustername character vector of the clustername
238 | #' @param starcluster_config character vector of path to starcluster config
239 | #' @export
240 | transferToCluster <-
241 |     function(clustername,
242 |              localPath,
243 |              remotePath)
244 | {
245 |     ## Check if clustername exists
246 |     args <- c("put", clustername,
247 |               "--node", paste0(clustername,"-master"),
248 |               "--user", "ubuntu",
249 |               localPath,
250 |               remotePath)
251 |     res  <- system2("starcluster", args = args)
252 |     if (res !=0) {
253 |         stop("There was an error transferring your file")
254 |     }
255 | }
256 | 
257 | 
258 | #' Allows transfer of files from master node to the host machine..
259 | #'
260 | #' Follows this command
261 | #'     #  starcluster get mycluster --node mycluster-master
262 | #'     #                            --user myuser /remote/path /local/path
263 | #'
264 | #' @param clustername character vector of the clustername
265 | #' @param starcluster_config character vector of path to starcluster config
266 | #' @export
267 | transferFromCluster <-
268 |     function(clustername,
269 |              remotePath,
270 |              localPath)
271 | {
272 |     ## Check if clustername exists
273 |     args <- c("get", clustername,
274 |               "--node", paste0(clustername,"-master"),
275 |               "--user", "ubuntu",
276 |               remotePath,
277 |               localPath)
278 |     res  <- system2("starcluster", args = args)
279 |     if (res !=0) {
280 |         stop("There was an error transferring your file")
281 |     }
282 | }
283 | 
284 | 
285 | #' Function to return the names of Clusters launched.
286 | #'
287 | awsParallelListClusters <-
288 |     function()
289 |     {
290 |         args <- c("listclusters")
291 |         res <- system2("starcluster", args = args, stdout=TRUE,stderr=FALSE)
292 |         clusterTagIdx <- grep("-------", res)
293 |         skipper <- seq(1, length(clusterTagIdx), by=2)
294 |         clusterNameIdx <- colMeans(rbind(clusterTagIdx[skipper],
295 |                                          clusterTagIdx[skipper+1]))
296 |         ## Return Name of clusters in Starcluster
297 |         res[clusterNameIdx]
298 |     }
299 | 


--------------------------------------------------------------------------------
/vignettes/AWSParallel-AWSSnowParam-tutorial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "AWSParallel AWSSnowParam Tutorial"
  3 | author: "Nitesh Turaga"
  4 | date: "`r Sys.Date()`"
  5 | output: BiocStyle::html_document
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Vignette Title}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | # Introduction
 13 | 
 14 | The AWSParallel package provides functionality to perform parallel
 15 | evaluation using AWS infrastructure, most importantly EC2. It extends
 16 | the `SnowParam` class in BiocParallel, and works with the same range 
 17 | of R and Bioconductor objects as `SnowParam`.
 18 | 
 19 | The goal of the AWSParallel package is allow the user to create a
 20 | cluster of EC2 machines on AWS, and run `bplapply` from one "master"
 21 | node (EC2 instance) to send a task to a set of "worker" nodes
 22 | (multiple EC2 instances).  It is important to note, that both master
 23 | and worker nodes are on EC2. The user will have to start an instance,
 24 | manually, and use this machine as master.
 25 | 
 26 | This package requires that the user have an Amazon AWS account that
 27 | **costs money**, and requires a credit card to access. The AWS
 28 | credentials provided by the user also need access, to other AWS
 29 | services as well, namely,
 30 | 
 31 |     1. IAM
 32 |     2. VPC
 33 |     3. EC2
 34 | 
 35 | # Quick Start
 36 | 
 37 | The quick start guide assumes you have your AWS access key, and secret
 38 | key in `~/.aws/credentials`. Please refer to the detailed section of
 39 | the vignette if these settings are not present.
 40 | 
 41 | Load the **AWSParallel** library and create an `AWSSnowParam`
 42 | 
 43 | 
 44 | ```{r, eval=FALSE}
 45 | library(AWSParallel)
 46 | 
 47 | # Number of workers
 48 | workers = 2
 49 | 
 50 | ## bioc-release 3.6 on US-EAST-2 (Ohio region), this is not default.
 51 | ## Look up AMI's you want to use, at 
 52 | ## http://bioconductor.org/help/bioconductor-cloud-ami/#ami_ids
 53 | image <- "ami-a90c23cc"
 54 | 
 55 | ## SSH key pair
 56 | awsSshKeyPair = getOption("aws_ssh_key_pair")
 57 | 
 58 | ## Launch a small instance for demonstration `t2.micro`,
 59 | ## larger instances cost more money.
 60 | cluster <- AWSSnowParam(
 61 |     workers=workers,
 62 |     awsInstanceType="t2.micro",
 63 |     awsAmiId= image,
 64 |     awsSshKeyPair = awsSshKeyPair
 65 | )
 66 | ```
 67 | 
 68 | Start, use, and stop a cluster.
 69 | 
 70 | 
 71 | ```{r awsDetectMaster}
 72 | ## This is a conditional param used to evaluate the vignette
 73 | awsDetectMaster <- function()
 74 | {
 75 |         ## Get list of all instances on AWS account
 76 |     instances <- aws.ec2::describe_instances()
 77 |     ## Get hostname of local machine code is being run on
 78 |     hostname <- system2("hostname", stdout=TRUE)
 79 |     hostname <- gsub("-",".", sub("ip-","", hostname))
 80 |     bool <- FALSE
 81 |     for (i in seq_along(instances)) {
 82 |         instancesSet = instances[[i]][["instancesSet"]]
 83 |         for (j in seq_along(instancesSet)) {
 84 |             privateIpAddress <- instancesSet[[j]][["privateIpAddress"]]
 85 |             if (privateIpAddress == hostname) {
 86 |                 bool <- TRUE
 87 |             }
 88 |         }
 89 |     }
 90 |     bool
 91 | }
 92 | 
 93 | onMaster <- FALSE
 94 | ```
 95 | 
 96 | Eval only on master
 97 | 
 98 | ```{r conditional, eval = onMaster}
 99 | ## Start instance
100 | bpstart(cluster)
101 | 
102 | ## Start an AWSParam job-
103 | ## Notice that the hostnames are different. We can run a computation now, and 
104 | ## the job gets spread across the different machines, and collected back into
105 | ## the variable xx.
106 | xx <- bplapply(1:4, function(i) system("hostname", intern=TRUE), BPPARAM=cluster)
107 | 
108 | ## Stop aws instance(s)
109 | bpstop(cluster) 
110 | ```
111 | 
112 | # AWS settings
113 | 
114 | Settings required to get the package working on AWS.
115 | 
116 | ## Get AWS Credntials
117 | 
118 | To use AWSParallel, AWS credentials are a requirement. The credentials
119 | are given through [AWS Identity and Access management - IAM][]. The
120 | user needs an AWS access key and an AWS secret key.
121 | 
122 | These AWS credntials should be stored on your machine accessible at
123 | `~/.aws/credentials`, as given in the documentation for
124 | [configuring AWS credentials][].
125 | 
126 | Example AWS credentials, which need to be in the file,
127 | "~/.aws/credentials".
128 | 
129 | 
130 |     [default]
131 |     aws_access_key_id=AKIAIOSFODNN7EXAMPLE
132 |     aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
133 | 
134 | 
135 | The AWS credentials provided to the package need access to a few
136 | components of the AWS account,
137 | 
138 | 1. IAM - This is used to get the credentials, change credentials,
139 |    activate/deactivate credentials for security reasons.
140 | 1. VPC - This is used to detect VPC's in the region, so that all the
141 |    instances launched are within the same VPC, and same subnet.
142 | 1. EC2 - This is used to launch, run, and terminate instances as
143 |    needed.
144 | 
145 | ## AWS Key Pair
146 | 
147 | User's also need to create a KeyPair which needs to be accessible on
148 | the machine being used. The Key Pair can be created using the
149 | [AWS-EC2 interface][].
150 | 
151 | It can also be done programmatically using the `AWSParallel` package
152 | but the functionality is imported from `aws.ec2`.
153 | 
154 | ```{r eval=FALSE}
155 | library(AWSParallel)
156 | ## Use credentials in your ~/.aws/crentials file
157 | aws.signature::use_credentials()
158 | ## This saves the `.pem` file in your your path ~/AWSParallel-keypair.pem
159 | aws.ec2::create_keypair(
160 |     keypair = "AWSParallel-key", path="~/AWSParallel-keypair.pem"
161 | )
162 | 
163 | ## Key pair to be passed in to your AWSSnowParam class
164 | awsSshKeyPair = "~/AWSParallel-keypair.pem"
165 | ```
166 | 
167 | ## VPC and Subnet (Optional)
168 | 
169 | Every AWS account has a default *VPC* created when the account is
170 | started. This VPC is usually contained to one AWS Region. Most of the
171 | Bioconductor AMI's are located on the *AWS-Region US-EAST*, so
172 | starting your account with a VPC located in that region makes most
173 | sense.
174 | 
175 | If the VPC is created(by the user or amazon default), the account gets
176 | *Subnets* as well by default. For the `AWSSnowParam` class to be
177 | created, the user has to specify the Subnet. If the subnet is not
178 | given, we use, the first one on the AWS account.
179 | 
180 | *NOTE:* The *master* instance and the *worker* instances need to be on
181 | the same VPC and subnet with permissible security groups. Without this
182 | the socket connection established with other machines launched on AWS
183 | does not work well.
184 | 
185 | ## Security Group settings (Optional)
186 | 
187 | Security groups are probably the most important AWS setting required
188 | in AWSParallel. It's easier if
189 | 
190 | ![Security Group Inbound Rules](inbound_rules.png)
191 | 
192 | ![Security Group Outbound Rules](outbound_rules.png)
193 | 
194 | 
195 | Once you have the following AWS components set up, you are ready to
196 | use the package
197 | 
198 | # Working with the package AWSParallel
199 | 
200 | We need to launch the Bioconductor AMI for the *release* version. Do
201 | this by creating an `AWSSnowParam` object
202 | 
203 | FIXME: this code chunk should be written to be evaluated
204 | 
205 | ```{r, eval = FALSE}
206 | ## Load the package
207 | library(AWSParallel)
208 | 
209 | ## bioc-release 3.6 as found in https://www.bioconductor.org/config.yaml
210 | image <- "ami-a90c23cc"
211 | 
212 | ## Number of workers to be started in the AWS cluster
213 | workers = 4
214 | 
215 | ## Set the AWS SSH key pair for your machine, or
216 | ## If you already have a keypair, just set the path_to_key, to the pem file.
217 | path_to_key = "~/aws-parallel.pem"
218 | 
219 | ## TODO: This can also be automated
220 | awsSshKeyPair = aws.ec2::create_keypair("AWSParallelKeyPair", path_to_key)
221 | 
222 | ## 
223 | sg <- "sg-748dcd07"
224 | subnet <- "subnet-d66a05ec"
225 | 
226 | ## Create AWS instance
227 | aws <- AWSSnowParam(
228 |     workers=workers,
229 |     awsInstanceType="t2.micro",
230 |     awsSubnet = subnet,
231 |     awsSecurityGroup = sg,
232 |     awsAmiId= image,
233 |     awsSshKeyPair = path_to_key,
234 |     awsCredentialsPath="/home/ubuntu/credentials"
235 |     )
236 | 
237 | aws
238 | ## Check if instance is up,
239 | awsInstanceStatus(aws)
240 | 
241 | ```
242 | 
243 | Then perform diagnostics, start, use, and stop the cluster
244 | 
245 | ```{r, eval=onMaster}
246 | ## Start instance
247 | bpstart(aws)
248 | 
249 | ## Return cluster which was started
250 | awsCluster()
251 | 
252 | ## Check is instance is up
253 | awsInstanceStatus(aws)
254 | 
255 | ## start an AWSParam job
256 | bplapply(1:4, function(i) system("hostname", intern=TRUE), BPPARAM=aws)
257 | 
258 | ## Stop aws instance
259 | bpstop(aws)
260 | ```
261 | 
262 | # Choosing AWS EC2 Instance Size
263 | 
264 | The size of an AWS-EC2 Instance gives you access to the required
265 | amount of compute power. Larger instances usually have a higher
266 | capacity for computing, but also cost more money. The [AWS Pricing][]
267 | is given in the documentation, and we recommend you take a look at it.
268 | 
269 | The Bioconductor AMI's have been built using the *m4.xlarge*
270 | machine. So ideally to run a large computation, and use every package
271 | available in Bioconductor you would use your worker of size
272 | *m4.xlarge*. If you are using a limited set of packages, or you just
273 | need to run a job in parallel, it would be easier to take a look at
274 | the [Instance types][] and decide the appropriate size for your needs.
275 | 
276 | 
277 | # Steps to prepare master instance.
278 | 
279 | This process can be done ONE time, and the instance can be stopped
280 | without being terminated. This *master* instance can be reused.
281 | 
282 | 1. Create a new amazon EC2 instance which is going to be the *master*
283 |    node, by choosing the AMI-ID from this page,
284 |    http://bioconductor.org/help/bioconductor-cloud-ami/#ami_ids. Follow
285 |    the steps in the ec2 management console to launch the image.
286 | 
287 |     ![Choose EC2 Instance Type](choose-instance-type.png)
288 | 
289 |     ![Configure EC2 instance with security settings](configure-instance.png)
290 | 
291 |     ![Add storage as per usage requirements](add-storage.png)
292 | 
293 |     You are required to create a Keypair if you don't have one
294 |     already. This can be done using this [AWS-EC2 interface][]
295 |     console.
296 | 
297 |     ![Review and launch your master instance](review-and-launch.png)
298 | 
299 | 1. Name your master instance. This is important for getting your
300 |    instances settings. Call it "AWSParallelMaster"
301 | 
302 | 1. SSH into the instance, which will be 
303 | 
304 |         ssh -i ~/.ssh/AWSparallel-test-keypair.pem ubuntu@34.239.248.175
305 |     
306 | 1. Once you are logged in, there are a few things you need to set
307 |    up. Install the `AWSParallel` package in your R prompt.
308 | 
309 |         biocLite(`AWSParallel`)
310 |     
311 |     If you have a dependency error installing AWSParallel, because of
312 |     missing dependencies, i.e `aws.ec2` and `aws.signature`, try
313 | 
314 |         install.packages(
315 |             "aws.ec2", repos = c(getOption("repos"), 
316 |             "http://cloudyr.github.io/drat")
317 |         )
318 | 
319 | 1. Copy your AWS credentials to this machine, by writing your
320 |    credentials in the `~/.aws/credentials` directory.
321 | 
322 | 1. Copy you AWS SSH keypair file (.pem) file to the machine as
323 |    well. The `.pem` file needs to have permissions to read only, i.e
324 |    run `chmod 400 AWSparallel-test-keypair.pem` if you get a
325 |    permissions error.
326 |    
327 |    
328 | # Instructions for Windows machines.
329 | 
330 | For windows machines, we recommend that you manually follow the steps to 
331 | prepare a master instance instead of using the `awsLaunchMasterOnEc2` function.
332 | 
333 | Once you prepare the master instance, and are able to establish an SSH 
334 | connection to the master you may use the rest of the package as described 
335 | without any difference because of the platform.
336 | 
337 | 
338 | # Advanced Tips
339 | 
340 | 1. If you choose to keep your cluster isolated from any other work you have 
341 | going on your VPC. Please create a new VPC and use the subnets in that VPC
342 | to start your AWSParallel param.
343 | 
344 | 1. Give the AWSParallel param, the new subnet, and a security group as described
345 | in the security group section.
346 | 
347 | 1. If you want to get the verbose output of the SSH connections being
348 |    attempted to your worker nodes, please use the option
349 |    `verbose=TRUE` when making your AWSSnowParam object.
350 | 
351 | 1. It is important that this cluster configuration is used only to launch parallel
352 | jobs. If you launch a large job on the "master" node, there is no increment in speed.
353 | It is vital that large jobs are not launched on the master node.
354 | 
355 | 
356 | # Session Info
357 | 
358 | ```{r}
359 | sessionInfo()
360 | ```
361 | 
362 | [AWS Identity and Access management - IAM]:http://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys
363 | 
364 | [configuring AWS credentials]:http://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html
365 | 
366 | [AWS-EC2 interface]:http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html#having-ec2-create-your-key-pair
367 | 
368 | [AWS Pricing]:https://aws.amazon.com/ec2/pricing/
369 | 
370 | [Instance types]: https://aws.amazon.com/ec2/instance-types/
371 | 


--------------------------------------------------------------------------------
/R/AWSSnowParam-class.R:
--------------------------------------------------------------------------------
  1 | #' Reference class .AWSSnowParam that allows usage of AWS EC2-instances
  2 | #'
  3 | #' The .AWSSnowParam class extends the SnowParam class to allow usage
  4 | #' of AWS EC2-instances for parallel computation.  The methods follow
  5 | #' a style similar to that of BiocParallelParams, with bpstart,
  6 | #' bpstop, bpisup, bplapply being the important one.
  7 | #'
  8 | #' @field awsCredentialsPath Path to AWS credentials, default value is
  9 | #'     `~/.aws/credentials`
 10 | #' @field awsInstanceType Type of AWS EC2-instance, eg. t2.micro
 11 | #' @field awsSubnet AWS EC2-instance subnet, within a certain VPC
 12 | #' @field awsSecurityGroup Secutiry group which assigns inbound and
 13 | #'     outbound traffic at the instance level
 14 | #' @field awsInstance A list, created holding all the information of
 15 | #'     the AWS instance
 16 | #' @field awsAmiId AMI(amazon machine image) ID for the
 17 | #'     Bioconductor-release version
 18 | #' @field awsSshKeyPair SSH key pair, to associate with your AWS
 19 | #'     EC2-instance
 20 | #' @importFrom methods new validObject callNextMethod
 21 | #' @importClassesFrom BiocParallel SnowParam BiocParallelParam
 22 | .AWSSnowParam <- setRefClass(
 23 |     "AWSSnowParam",
 24 |     contains = "SnowParam",
 25 |     fields = list(
 26 |         awsCredentialsPath = "character",
 27 |         awsInstanceType = "character",
 28 |         awsSubnet = "character",
 29 |         awsSecurityGroup = "character",
 30 |         awsInstance = "list",
 31 |         awsAmiId = "character",
 32 |         awsSshKeyPair = "character"
 33 |     ),
 34 |     methods = list(
 35 |         show = function() {
 36 |             callSuper()
 37 |             cat("  awsCredentialsPath: ", awsCredentialsPath(.self),
 38 |                 "\n  awsInstanceType: ", awsInstanceType(.self),
 39 |                 "\n  awsSubnet: ", awsSubnet(.self),
 40 |                 "\n  awsSecurityGroup(s): ",
 41 |                      paste(awsSecurityGroup(.self), sep=" ", collapse=" "),
 42 |                 "\n  awsAmiId: ", awsAmiId(.self),
 43 |                 "\n  awsSshKeyPair: ", awsSshKeyPair(.self),
 44 |                 "\n",
 45 |                 sep = "")
 46 |         }
 47 |     ),
 48 |     inheritPackage = TRUE
 49 | )
 50 | 
 51 | 
 52 | #' AWSSnowParam function to start an AWS EC2-instance cluster
 53 | #'
 54 | #' This function starts a cluster of AWS EC2-instances to allow
 55 | #' parallel computation of R objects, and works with BiocParallel, to
 56 | #' allow computation with R/Bioconductor objects
 57 | #'
 58 | #' @param workers Numeric, number of workers to launch in the cluster
 59 | #' @param awsCredentialsPath character, Path to AWS credentials,
 60 | #'     default value is `~/.aws/credentials`
 61 | #' @param awsInstanceType character, Type of AWS EC2-instance,
 62 | #'     eg. t2.micro
 63 | #' @param awsSubnet character, AWS EC2-instance subnet, within a
 64 | #'     certain VPC
 65 | #' @param awsSecurityGroup character, Security group which assigns
 66 | #'     inbound and outbound traffic at the instance level. The
 67 | #'     security group needs to be
 68 | #' *Inbound rules*
 69 | #' Protocol type   Port number   Source IP
 70 | #'           TCP      22 (SSH)   0.0.0.0/0
 71 | #'           TCP   11000-11999   CIDR-Block same as VPC
 72 | #' *Outbound rules*
 73 | #' Protocol type   Port number   Destination IP
 74 | #'           All           All   0.0.0.0/0
 75 | #' @param awsAmiId character, AMI(amazon machine image) ID for the
 76 | #'     Bioconductor-release version
 77 | #' @param awsSshKeyPair character, SSH key pair, to associate with
 78 | #'     your AWS EC2-instance
 79 | #' @param awsProfile character, indicates what profile to use while
 80 | #'     using AWS credentials
 81 | #' @param verbose logical, gives a verbose output of SSH
 82 | #'     connection attempt, default is FALSE.
 83 | #' @return AWSSnowParam object
 84 | #' @examples
 85 | #' \dontrun{
 86 | #'         ## Minimal example
 87 | #'         aws <- AWSSnowParam(workers = 1,
 88 | #'                awsInstanceType="t2.micro",
 89 | #'                awsAmiId= image,
 90 | #'                awsSshKeyPair = "~/.ssh/<my_aws_key_pair>.pub")
 91 | #' }
 92 | #' @importFrom aws.ec2 my_ip
 93 | #' @importFrom aws.signature use_credentials
 94 | #' @exportClass AWSSnowParam
 95 | #' @export
 96 | AWSSnowParam <- function(workers = 2,
 97 |              awsCredentialsPath = NA_character_,
 98 |              awsInstanceType = NA_character_,
 99 |              awsSubnet = NA,
100 |              awsSecurityGroup = NA,
101 |              awsAmiId = NA_character_,
102 |              awsSshKeyPair = NA_character_,
103 |              awsProfile = "default",
104 |              user="ubuntu",
105 |              rhome="/usr/local/lib/R",
106 |              ## TODO: change this default
107 |              bplib="/home/ubuntu/R/x86_64-pc-linux-gnu-library/3.4/BiocParallel",
108 |              rscript = "/usr/local/bin/Rscript",
109 |              outfile = "/home/ubuntu/snow.log",
110 |              verbose = FALSE
111 |              )
112 | {
113 | 
114 |     ## Validate AWS profile for IAM management
115 |     stopifnot(length(awsProfile) == 1L, is.character(awsProfile))
116 | 
117 |     ## Validate AWS Credentials Path
118 |     if (is.na(awsCredentialsPath)) {
119 |         if (.Platform$OS.type == "unix") {
120 |             awsCredentialsPath = "~/.aws/credentials"
121 |             ## Use credentials
122 |             use_credentials(profile=awsProfile, file=awsCredentialsPath)
123 |         } else {
124 |             ## if (.Platform$OS.type == "windows") {
125 |             message("Please launch EC2 master instance following the vignette")
126 |         }
127 |     }
128 |     stopifnot(
129 |         file.exists(awsCredentialsPath),
130 |         !missing(awsInstanceType),
131 |         !missing(awsSshKeyPair),
132 |         length(user) == 1L, is.character(user),
133 |         length(rhome) == 1L, is.character(rhome),
134 |         length(bplib) == 1L, is.character(bplib),
135 |         length(rscript) == 1L, is.character(rscript),
136 |         length(outfile) == 1L, is.character(outfile)
137 |     )
138 | 
139 |     ## If missing, default to release version of AMI
140 |     if (missing(awsAmiId)) {
141 |         awsAmiId <- getAwsAmiId()
142 |     }
143 | 
144 |     ## If both security group and subnet are missing, assign
145 |     if (missing(awsSubnet) || missing(awsSecurityGroup)) {
146 |         ## If on a master node
147 |         reqs <- getAwsRequirements()
148 |         ## Allocate subnet and securityGroup as need
149 |         awsSubnet <- reqs$subnet
150 |         awsSecurityGroup <- reqs$sgroup$groupId
151 |     }
152 | 
153 |     .clusterargs <- list(
154 |         spec = workers, type = "SOCK",
155 |         ## Allow 'yes' to `Are you sure you want to continue connecting (yes/no)?`
156 |         ## using ssh -oStrictHostKeyChecking=no
157 |         ## ref: http://xmodulo.com/how-to-accept-ssh-host-keys-automatically-on-linux.html
158 |         rshcmd = paste("ssh -oStrictHostKeyChecking=n -i", awsSshKeyPair,
159 |                        ifelse(verbose,yes="-v",no=""),
160 |                        sep=" "),
161 |         user=user,
162 |         rhome=rhome,
163 |         snowlib=bplib,
164 |         rscript=rscript,
165 |         outfile=outfile
166 |     )
167 | 
168 |     ## Initiate .AWSSnowParam class
169 |     x <- .AWSSnowParam(
170 |         ## base class (SnowParam) fields
171 |         workers = workers,
172 |         ## TODO: There is no `-i` in OS-X
173 |         hostname = system2("hostname", stdout=TRUE),
174 |         .clusterargs = .clusterargs,
175 |         ## AWSSnowParam fields
176 |         awsCredentialsPath = awsCredentialsPath,
177 |         awsInstanceType = awsInstanceType,
178 |         awsSubnet = awsSubnet,
179 |         awsSecurityGroup = awsSecurityGroup,
180 |         awsAmiId = awsAmiId,
181 |         awsSshKeyPair = awsSshKeyPair
182 |     )
183 |     validObject(x)
184 |     x
185 | }
186 | 
187 | 
188 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189 | ### Accessors
190 | ###
191 | 
192 | #' Get path to AWS credentials
193 | #'
194 | #' @param AWSSnowParam object
195 | #'
196 | #' @export
197 | awsCredentialsPath <-
198 |     function(x)
199 | {
200 |     x$awsCredentialsPath
201 | }
202 | 
203 | #' Get number of workers in the cluster
204 | #'
205 | #' @param AWSSnowParam object
206 | #'
207 | #' @export
208 | awsWorkers <-
209 |     function(x)
210 | {
211 |     x$workers
212 | }
213 | 
214 | #' Get AWS instance attributes in a list
215 | #'
216 | #' @param AWSSnowParam object
217 | #'
218 | #' @export
219 | awsInstance <-
220 |     function(x)
221 | {
222 |     x$awsInstance
223 | }
224 | 
225 | 
226 | #' Get AWS Instance type.
227 | #'
228 | #' The possible instance types are listed in the
229 | #' document: https://aws.amazon.com/ec2/instance-types/.  The
230 | #' Bioconductor AMI's have been built using an m4.xlarge instance
231 | #' type.  Large computations are best supported on this type of
232 | #' instance.
233 | #'
234 | #' @param AWSSnowParam object
235 | #'
236 | #' @return character
237 | #' @export
238 | awsInstanceType <-
239 |     function(x)
240 | {
241 |     x$awsInstanceType
242 | }
243 | 
244 | #' Get AWS AMI-ID of the launched instance
245 | #'
246 | #' @param AWSSnowParam
247 | #'
248 | #' @export
249 | awsAmiId <-
250 |     function(x)
251 | {
252 |     x$awsAmiId
253 | }
254 | 
255 | #' Get AWS Subnet within which the AWS EC2 instance was launched
256 | #'
257 | #' @param AWSSnowParam
258 | #'
259 | #' @export
260 | awsSubnet <-
261 |      function(x)
262 |  {
263 |      x$awsSubnet
264 |  }
265 | 
266 | 
267 | #' Get the SSH public key path associted to the AWS EC2 instance.
268 | #'
269 | #' @param AWSSnowParam
270 | #'
271 | #' @export
272 | awsSshKeyPair <-
273 |     function(x)
274 | {
275 |     x$awsSshKeyPair
276 | }
277 | 
278 | #' Get AWS Security group for the EC2 instance, which defines inbound
279 | #' and outbound traffic.
280 | #'
281 | #' @param AWSSnowParam
282 | #'
283 | #' @export
284 | awsSecurityGroup <-
285 |     function(x)
286 | {
287 |     x$awsSecurityGroup
288 | }
289 | 
290 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
291 | ### Methods - control
292 | ###
293 | 
294 | ## Create a local enviroment to store the cluster created. This allows
295 | ## for only a single AWSSnowParam object to be present at a time.
296 | .awsCluster <- local({
297 |     cl <- NULL
298 |     list(
299 |         isup = function() {
300 |             !is.null(cl)
301 |         },
302 |         get = function() {
303 |             cl
304 |         },
305 |         set = function(cluster) {
306 |             stopifnot(is(cluster, "AWSSnowParam"))
307 |             cl <<- cluster
308 |         },
309 |         reset = function() {
310 |             cl <<- NULL
311 |         }
312 |     )
313 | })
314 | 
315 | #' Get the AWSSnowParam object currently launched. Only one
316 | #' AWSSnowParam object can be started within one session.
317 | #'
318 | #' @return AWSSnowParam object
319 | #' @export
320 | awsCluster <- function()
321 | {
322 |     if (!.awsCluster$isup()) {
323 |         stop("no existing cluster")
324 |     }
325 |     .awsCluster$get()
326 | }
327 | 
328 | 
329 | #' @importFrom aws.ec2 describe_instances
330 | .awsClusterIps <- function(x)
331 | {
332 |     instances <- describe_instances(awsInstance(x))
333 |     vapply(
334 |         instances[[1]][["instancesSet"]],
335 |         `[[`, character(1),
336 |         "privateIpAddress"
337 |     )
338 | }
339 | 
340 | 
341 | #' Launch master node on AWS EC2 if credentials are valid
342 | #'
343 | awsLaunchMasterOnEc2 <-
344 |     function(x)
345 | {
346 |     onMaster <- .awsDetectMaster()
347 |     if (onMaster) {
348 |         ## TODO: return instance details
349 |         message("You are on an EC2 instance now, you may choose,",
350 |                 "to use this instance as your master node")
351 |     } else {
352 |         .awsLaunchMaster(x)
353 |     }
354 | }
355 | 
356 | 
357 | #' @importFrom aws.ec2 run_instances
358 | #' @importFrom aws.signature use_credentials
359 | #' @importFrom BiocParallel bpstart bpworkers<-
360 | #' @exportMethod bpstart
361 | setMethod("bpstart", "AWSSnowParam",
362 |     function(x)
363 | {
364 |     if (.awsCluster$isup())
365 |         stop(
366 |             "use 'bpstop(awsCluster())' to shut down existing AWS cluster",
367 |             call. = FALSE
368 |         )
369 |     use_credentials(file=awsCredentialsPath(x))
370 |     ## Set awsBiocVersion, devel vs release
371 | 
372 |     result <- run_instances(
373 |         image=awsAmiId(x),
374 |         type=awsInstanceType(x),
375 |         min=awsWorkers(x),
376 |         subnet=awsSubnet(x),
377 |         sgroup=awsSecurityGroup(x)
378 |     )
379 |     ## Print instance state to screen after starting instance
380 |     x$awsInstance <- result
381 |     .awsCluster$set(x)
382 |     ## Wait for instance to be up.
383 |     message("starting...", appendLF = FALSE)
384 |     repeat{
385 |         if (.awsisup(x)) {
386 |             break
387 |         }
388 |         message(".", appendLF = FALSE)
389 |         Sys.sleep(1)
390 |     }
391 |     message(awsInstanceStatus(x))
392 |     ## start cluster
393 |     bpworkers(x) <- .awsClusterIps(x)
394 |     ## Sleep for 10 seconds to make sure there is no race condition
395 |     ## TODO: make this better
396 |     Sys.sleep(10)
397 |     ## Call bpstart in SnowParam
398 |     callNextMethod(x)
399 | })
400 | 
401 | 
402 | # Check status of aws ec2 instance
403 | #' @importFrom aws.ec2 instance_status
404 | #' @export
405 | awsInstanceStatus <- function(x)
406 | {
407 |     instance <- awsInstance(x)
408 |     if (length(instance) == 0L) {
409 |         "stopped"
410 |     } else {
411 |         status <- instance_status(instance)
412 |         if (length(status) == 0L) {
413 |             "starting"
414 |         } else {
415 |             status$item$instanceState$name[[1]]
416 |         }
417 |     }
418 | }
419 | 
420 | ## Check if AWS cluster is up
421 | .awsisup <- function(x)
422 | {
423 |     awsInstanceStatus(x) == "running"
424 | }
425 | 
426 | 
427 | #' @importFrom aws.ec2 terminate_instances
428 | #' @importFrom BiocParallel bpstop
429 | #' @exportMethod bpstop
430 | setMethod("bpstop", "AWSSnowParam",
431 |     function(x)
432 | {
433 |     if (.awsisup(x)) {
434 |         result <- terminate_instances(x$awsInstance)
435 |         message("stopping...", appendLF = FALSE)
436 |         repeat {
437 |             if (!.awsisup(x))
438 |                 break
439 |             message(".", appendLF = FALSE)
440 |         }
441 |         message("terminated")
442 |         ## TODO: Fix this
443 |         .awsCluster$reset()
444 |     }
445 |     ## Return terminated instance state to screen
446 |     x$awsInstance <- list()
447 |     invisible(x)
448 | })
449 | 


--------------------------------------------------------------------------------
/inst/extdata/config.ini:
--------------------------------------------------------------------------------
  1 | ####################################
  2 | ## StarCluster Configuration File ##
  3 | ####################################
  4 | [global]
  5 | # Configure the default cluster template to use when starting a cluster
  6 | # defaults to 'smallcluster' defined below. This template should be usable
  7 | # out-of-the-box provided you've configured your keypair correctly
  8 | DEFAULT_TEMPLATE=smallcluster
  9 | # enable experimental features for this release
 10 | #ENABLE_EXPERIMENTAL=True
 11 | # number of seconds to wait when polling instances (default: 30s)
 12 | #REFRESH_INTERVAL=15
 13 | # specify a web browser to launch when viewing spot history plots
 14 | #WEB_BROWSER=chromium
 15 | # split the config into multiple files
 16 | #INCLUDE=~/.starcluster/aws, ~/.starcluster/keys, ~/.starcluster/vols
 17 | 
 18 | #############################################
 19 | ## AWS Credentials and Connection Settings ##
 20 | #############################################
 21 | [aws info]
 22 | # This is the AWS credentials section (required).
 23 | # These settings apply to all clusters
 24 | # replace these with your AWS keys
 25 | AWS_ACCESS_KEY_ID = #your_aws_access_key_id
 26 | AWS_SECRET_ACCESS_KEY = #your_secret_access_key
 27 | # replace this with your account number
 28 | AWS_USER_ID= #your userid
 29 | # Uncomment to specify a different Amazon AWS region  (OPTIONAL)
 30 | # (defaults to us-east-1 if not specified)
 31 | # NOTE: AMIs have to be migrated!
 32 | #AWS_REGION_NAME = eu-west-1
 33 | #AWS_REGION_HOST = ec2.eu-west-1.amazonaws.com
 34 | # Uncomment these settings when creating an instance-store (S3) AMI (OPTIONAL)
 35 | #EC2_CERT = /path/to/your/cert-asdf0as9df092039asdfi02089.pem
 36 | #EC2_PRIVATE_KEY = /path/to/your/pk-asdfasd890f200909.pem
 37 | # Uncomment these settings to use a proxy host when connecting to AWS
 38 | #AWS_PROXY = your.proxyhost.com
 39 | #AWS_PROXY_PORT = 8080
 40 | #AWS_PROXY_USER = yourproxyuser
 41 | #AWS_PROXY_PASS = yourproxypass
 42 | 
 43 | ###########################
 44 | ## Defining EC2 Keypairs ##
 45 | ###########################
 46 | # Sections starting with "key" define your keypairs. See "starcluster createkey
 47 | # --help" for instructions on how to create a new keypair. Section name should
 48 | # match your key name e.g.:
 49 | [key mykey]
 50 | KEY_LOCATION=~/.ssh/mykey.rsa
 51 | 
 52 | # You can of course have multiple keypair sections
 53 | # [key myotherkey]
 54 | # KEY_LOCATION=~/.ssh/myotherkey.rsa
 55 | 
 56 | ################################
 57 | ## Defining Cluster Templates ##
 58 | ################################
 59 | # Sections starting with "cluster" represent a cluster template. These
 60 | # "templates" are a collection of settings that define a single cluster
 61 | # configuration and are used when creating and configuring a cluster. You can
 62 | # change which template to use when creating your cluster using the -c option
 63 | # to the start command:
 64 | #
 65 | #     $ starcluster start -c mediumcluster mycluster
 66 | #
 67 | # If a template is not specified then the template defined by DEFAULT_TEMPLATE
 68 | # in the [global] section above is used. Below is the "default" template named
 69 | # "smallcluster". You can rename it but dont forget to update the
 70 | # DEFAULT_TEMPLATE setting in the [global] section above. See the next section
 71 | # on defining multiple templates.
 72 | 
 73 | [cluster smallcluster]
 74 | # change this to the name of one of the keypair sections defined above
 75 | KEYNAME = mykey
 76 | # number of ec2 instances to launch
 77 | CLUSTER_SIZE = 2
 78 | # create the following user on the cluster
 79 | CLUSTER_USER = ubuntu
 80 | # optionally specify shell (defaults to bash)
 81 | # (options: tcsh, zsh, csh, bash, ksh)
 82 | CLUSTER_SHELL = bash
 83 | # Uncomment to prepent the cluster tag to the dns name of all nodes created
 84 | # using this cluster config.  ie: mycluster-master and mycluster-node001
 85 | # If you choose to enable this option, it's recommended that you enable it in
 86 | # the DEFAULT_TEMPLATE so all nodes will automatically have the prefix
 87 | DNS_PREFIX = True
 88 | # AMI to use for cluster nodes. These AMIs are for the us-east-1 region.
 89 | # Use the 'listpublic' command to list StarCluster AMIs in other regions
 90 | # The base i386 StarCluster AMI is ami-9bf9c9f2
 91 | # The base x86_64 StarCluster AMI is ami-3393a45a
 92 | # The base HVM StarCluster AMI is ami-6b211202
 93 | NODE_IMAGE_ID = ami-0454187e
 94 | # instance type for all cluster nodes
 95 | # (options: m3.large, c3.8xlarge, i2.8xlarge, t2.micro, hs1.8xlarge, c1.xlarge, r3.4xlarge, g2.2xlarge, m1.small, c1.medium, m3.2xlarge, c3.2xlarge, m2.xlarge, m2.2xlarge, t2.small, r3.2xlarge, t1.micro, cr1.8xlarge, r3.8xlarge, cc1.4xlarge, m1.medium, r3.large, c3.xlarge, i2.xlarge, m3.medium, cc2.8xlarge, m1.large, cg1.4xlarge, i2.2xlarge, c3.large, i2.4xlarge, c3.4xlarge, r3.xlarge, t2.medium, hi1.4xlarge, m2.4xlarge, m1.xlarge, m3.xlarge)
 96 | NODE_INSTANCE_TYPE = t2.micro
 97 | # Launch cluster in a VPC subnet (OPTIONAL)
 98 | SUBNET_IDS=
 99 | # Uncomment to assign public IPs to cluster nodes (VPC-ONLY) (OPTIONAL)
100 | # WARNING: Using public IPs with a VPC requires:
101 | # 1. An internet gateway attached to the VPC
102 | # 2. A route table entry linked to the VPC's internet gateway and associated
103 | #    with the VPC subnet with a destination CIDR block of 0.0.0.0/0
104 | # WARNING: Public IPs allow direct access to your VPC nodes from the internet
105 | #PUBLIC_IPS=True
106 | # Uncomment to disable installing/configuring a queueing system on the
107 | # cluster (SGE)
108 | #DISABLE_QUEUE=True
109 | # Uncomment to specify a different instance type for the master node (OPTIONAL)
110 | # (defaults to NODE_INSTANCE_TYPE if not specified)
111 | #MASTER_INSTANCE_TYPE = m1.small
112 | # Uncomment to specify a separate AMI to use for the master node. (OPTIONAL)
113 | # (defaults to NODE_IMAGE_ID if not specified)
114 | #MASTER_IMAGE_ID = ami-3393a45a (OPTIONAL)
115 | # availability zone to launch the cluster in (OPTIONAL)
116 | # (automatically determined based on volumes (if any) or
117 | # selected by Amazon if not specified)
118 | #AVAILABILITY_ZONE = us-east-1c
119 | # list of volumes to attach to the master node (OPTIONAL)
120 | # these volumes, if any, will be NFS shared to the worker nodes
121 | # see "Configuring EBS Volumes" below on how to define volume sections
122 | #VOLUMES = oceandata, biodata
123 | # list of plugins to load after StarCluster's default setup routines (OPTIONAL)
124 | # see "Configuring StarCluster Plugins" below on how to define plugin sections
125 | #PLUGINS = myplugin, myplugin2
126 | # list of permissions (or firewall rules) to apply to the cluster's security
127 | # group (OPTIONAL).
128 | #PERMISSIONS = ssh, http
129 | permissions = http
130 | # Uncomment to always create a spot cluster when creating a new cluster from
131 | # this template. The following example will place a $0.50 bid for each spot
132 | # request.
133 | #SPOT_BID = 0.50
134 | # Uncomment to specify one or more userdata scripts to use when launching
135 | # cluster instances. Supports cloudinit. All scripts combined must be less than
136 | # 16KB
137 | #USERDATA_SCRIPTS = /path/to/script1, /path/to/script2
138 | 
139 | ###########################################
140 | ## Defining Additional Cluster Templates ##
141 | ###########################################
142 | # You can also define multiple cluster templates. You can either supply all
143 | # configuration options as with smallcluster above, or create an
144 | # EXTENDS=<cluster_name> variable in the new cluster section to use all
145 | # settings from <cluster_name> as defaults. Below are example templates that
146 | # use the EXTENDS feature:
147 | 
148 | # [cluster mediumcluster]
149 | # Declares that this cluster uses smallcluster as defaults
150 | # EXTENDS=smallcluster
151 | # This section is the same as smallcluster except for the following settings:
152 | # KEYNAME=myotherkey
153 | # NODE_INSTANCE_TYPE = c1.xlarge
154 | # CLUSTER_SIZE=8
155 | # VOLUMES = biodata2
156 | 
157 | # [cluster largecluster]
158 | # Declares that this cluster uses mediumcluster as defaults
159 | # EXTENDS=mediumcluster
160 | # This section is the same as mediumcluster except for the following variables:
161 | # CLUSTER_SIZE=16
162 | 
163 | #############################
164 | ## Configuring EBS Volumes ##
165 | #############################
166 | # StarCluster can attach one or more EBS volumes to the master and then
167 | # NFS_share these volumes to all of the worker nodes. A new [volume] section
168 | # must be created for each EBS volume you wish to use with StarCluser. The
169 | # section name is a tag for your volume. This tag is used in the VOLUMES
170 | # setting of a cluster template to declare that an EBS volume is to be mounted
171 | # and nfs shared on the cluster. (see the commented VOLUMES setting in the
172 | # example 'smallcluster' template above) Below are some examples of defining
173 | # and configuring EBS volumes to be used with StarCluster:
174 | 
175 | # Sections starting with "volume" define your EBS volumes
176 | # [volume biodata]
177 | # attach vol-c9999999 to /home on master node and NFS-shre to worker nodes
178 | # VOLUME_ID = vol-c999999
179 | # MOUNT_PATH = /home
180 | 
181 | # Same volume as above, but mounts to different location
182 | # [volume biodata2]
183 | # VOLUME_ID = vol-c999999
184 | # MOUNT_PATH = /opt/
185 | 
186 | # Another volume example
187 | # [volume oceandata]
188 | # VOLUME_ID = vol-d7777777
189 | # MOUNT_PATH = /mydata
190 | 
191 | # By default StarCluster will attempt first to mount the entire volume device,
192 | # failing that it will try the first partition. If you have more than one
193 | # partition you will need to set the PARTITION number, e.g.:
194 | # [volume oceandata]
195 | # VOLUME_ID = vol-d7777777
196 | # MOUNT_PATH = /mydata
197 | # PARTITION = 2
198 | 
199 | ############################################
200 | ## Configuring Security Group Permissions ##
201 | ############################################
202 | # Sections starting with "permission" define security group rules to
203 | # automatically apply to newly created clusters. IP_PROTOCOL in the following
204 | # examples can be can be: tcp, udp, or icmp. CIDR_IP defaults to 0.0.0.0/0 or
205 | # "open to the # world"
206 | 
207 | # open port 80 on the cluster to the world
208 | # [permission http]
209 | # IP_PROTOCOL = tcp
210 | # FROM_PORT = 80
211 | # TO_PORT = 80
212 | 
213 | # open https on the cluster to the world
214 | # [permission https]
215 | # IP_PROTOCOL = tcp
216 | # FROM_PORT = 443
217 | # TO_PORT = 443
218 | 
219 | # open port 80 on the cluster to an ip range using CIDR_IP
220 | [permission http]
221 | IP_PROTOCOL = tcp
222 | FROM_PORT = 80
223 | TO_PORT = 80
224 | CIDR_IP = #insert_cidr
225 | 
226 | # restrict ssh access to a single ip address (<your_ip>)
227 | # [permission ssh]
228 | # IP_PROTOCOL = tcp
229 | # FROM_PORT = 22
230 | # TO_PORT = 22
231 | # CIDR_IP = <your_ip>/32
232 | 
233 | 
234 | #####################################
235 | ## Configuring StarCluster Plugins ##
236 | #####################################
237 | # Sections starting with "plugin" define a custom python class which perform
238 | # additional configurations to StarCluster's default routines. These plugins
239 | # can be assigned to a cluster template to customize the setup procedure when
240 | # starting a cluster from this template (see the commented PLUGINS setting in
241 | # the 'smallcluster' template above). Below is an example of defining a user
242 | # plugin called 'myplugin':
243 | 
244 | # [plugin myplugin]
245 | # NOTE: myplugin module must either live in ~/.starcluster/plugins or be
246 | # on your PYTHONPATH
247 | # SETUP_CLASS = myplugin.SetupClass
248 | # extra settings are passed as __init__ arguments to your plugin:
249 | # SOME_PARAM_FOR_MY_PLUGIN = 1
250 | # SOME_OTHER_PARAM = 2
251 | 
252 | ######################
253 | ## Built-in Plugins ##
254 | ######################
255 | # The following plugins ship with StarCluster and should work out-of-the-box.
256 | # Uncomment as needed. Don't forget to update your PLUGINS list!
257 | # See http://star.mit.edu/cluster/docs/latest/plugins for plugin details.
258 | #
259 | # Use this plugin to install one or more packages on all nodes
260 | # [plugin pkginstaller]
261 | # SETUP_CLASS = starcluster.plugins.pkginstaller.PackageInstaller
262 | # # list of apt-get installable packages
263 | # PACKAGES = mongodb, python-pymongo
264 | #
265 | # Use this plugin to create one or more cluster users and download all user ssh
266 | # keys to $HOME/.starcluster/user_keys/<cluster>-<region>.tar.gz
267 | # [plugin createusers]
268 | # SETUP_CLASS = starcluster.plugins.users.CreateUsers
269 | # NUM_USERS = 30
270 | # # you can also comment out NUM_USERS and specify exact usernames, e.g.
271 | # # usernames = linus, tux, larry
272 | # DOWNLOAD_KEYS = True
273 | #
274 | # Use this plugin to configure the Condor queueing system
275 | # [plugin condor]
276 | # SETUP_CLASS = starcluster.plugins.condor.CondorPlugin
277 | #
278 | # The SGE plugin is enabled by default and not strictly required. Only use this
279 | # if you want to tweak advanced settings in which case you should also set
280 | # DISABLE_QUEUE=TRUE in your cluster template. See the plugin doc for more
281 | # details.
282 | # [plugin sge]
283 | # SETUP_CLASS = starcluster.plugins.sge.SGEPlugin
284 | # MASTER_IS_EXEC_HOST = False
285 | #
286 | # The IPCluster plugin configures a parallel IPython cluster with optional
287 | # web notebook support. This allows you to run Python code in parallel with low
288 | # latency message passing via ZeroMQ.
289 | # [plugin ipcluster]
290 | # SETUP_CLASS = starcluster.plugins.ipcluster.IPCluster
291 | # # Enable the IPython notebook server (optional)
292 | # ENABLE_NOTEBOOK = True
293 | # # Set a password for the notebook for increased security
294 | # # This is optional but *highly* recommended
295 | # NOTEBOOK_PASSWD = a-secret-password
296 | # # Set a custom directory for storing/loading notebooks (optional)
297 | # NOTEBOOK_DIRECTORY = /path/to/notebook/dir
298 | # # Set a custom packer. Must be one of 'json', 'pickle', or 'msgpack'
299 | # # This is optional.
300 | # PACKER = pickle
301 | #
302 | # Use this plugin to create a cluster SSH "dashboard" using tmux. The plugin
303 | # creates a tmux session on the master node that automatically connects to all
304 | # the worker nodes over SSH. Attaching to the session shows a separate window
305 | # for each node and each window is logged into the node via SSH.
306 | # [plugin tmux]
307 | # SETUP_CLASS = starcluster.plugins.tmux.TmuxControlCenter
308 | #
309 | # Use this plugin to change the default MPI implementation on the
310 | # cluster from OpenMPI to MPICH2.
311 | # [plugin mpich2]
312 | # SETUP_CLASS = starcluster.plugins.mpich2.MPICH2Setup
313 | #
314 | # Configure a hadoop cluster. (includes dumbo setup)
315 | # [plugin hadoop]
316 | # SETUP_CLASS = starcluster.plugins.hadoop.Hadoop
317 | #
318 | # Configure a distributed MySQL Cluster
319 | # [plugin mysqlcluster]
320 | # SETUP_CLASS = starcluster.plugins.mysql.MysqlCluster
321 | # NUM_REPLICAS = 2
322 | # DATA_MEMORY = 80M
323 | # INDEX_MEMORY = 18M
324 | # DUMP_FILE = test.sql
325 | # DUMP_INTERVAL = 60
326 | # DEDICATED_QUERY = True
327 | # NUM_DATA_NODES = 2
328 | #
329 | # Install and setup an Xvfb server on each cluster node
330 | # [plugin xvfb]
331 | # SETUP_CLASS = starcluster.plugins.xvfb.XvfbSetup
332 | 


--------------------------------------------------------------------------------
/R/AWSBatchJobsParam-class.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Reference class .AWSBatchJobsParam allows use AWS EC2 as Clusters
  3 | #'
  4 | #' The .AWSBatchJobsParam class extends the BatchJobsParam class to allow
  5 | #' usage of AWS EC2-instances for parallel computation. The methods follow a
  6 | #' style similar to that of BiocParallelParams, with bpstart, bpstop, bpisup,
  7 | #' bplapply being the important one. The behaviour of these functions is
  8 | #' described in the man pages.
  9 | #'
 10 | #' @field awsCredentialsPath Path to AWS credentials, default value is
 11 | #'     `~/.aws/credentials`
 12 | #' @field awsInstanceType Type of AWS EC2-instance, eg. t2.micro
 13 | #' @field awsSubnet AWS EC2-instance subnet, within a certain VPC
 14 | #' @field awsAmiId AMI(amazon machine image) ID for the
 15 | #'     Bioconductor-starcluster image. Correct ID is needed.
 16 | #' @field awsSshKeyPair SSH key pair, to associate with your AWS
 17 | #'     EC2-instance
 18 | #' @importFrom methods new validObject callNextMethod
 19 | #' @importFrom BiocParallel BatchJobsParam
 20 | #' @importClassesFrom BiocParallel BatchJobsParam BiocParallelParam
 21 | .AWSBatchJobsParam <- setRefClass(
 22 |     "AWSBatchJobsParam",
 23 |     contains = "BatchJobsParam",
 24 |     fields = list(
 25 |         awsCredentialsPath = "character",
 26 |         awsInstanceType = "character",
 27 |         awsSubnet = "character",
 28 |         awsAmiId = "character",
 29 |         awsSshKeyPair = "character",
 30 |         awsProfile = "character"
 31 |     ),
 32 |     methods = list(
 33 |         show = function() {
 34 |             callSuper()
 35 |             cat("  awsCredentialsPath: ", awsCredentialsPath(.self),
 36 |                 "\n  awsInstanceType: ", awsInstanceType(.self),
 37 |                 "\n  awsSubnet: ", awsSubnet(.self),
 38 |                 "\n  awsAmiId: ", awsAmiId(.self),
 39 |                 "\n  awsSshKeyPair: ", awsSshKeyPair(.self),
 40 |                 "\n  awsProfile: ", awsProfile(.self),
 41 |                 "\n",
 42 |                 sep="")
 43 |         }
 44 |     ),
 45 |     inheritPackage=TRUE
 46 | )
 47 | 
 48 | 
 49 | #' AWSBatchJobsParam function to start an AWS EC2-instance cluster
 50 | #'
 51 | #' This function starts a cluster of AWS EC2-instances to allow
 52 | #' parallel computation of R objects using BatchJobs on SGE, and works
 53 | #' with BiocParallel, to allow computation with R/Bioconductor objects.
 54 | #'
 55 | #' @param workers Numeric, number of workers to launch in the cluster
 56 | #' @param awsCredentialsPath character, Path to AWS credentials,
 57 | #'     default value is `~/.aws/credentials`
 58 | #' @param awsInstanceType character, Type of AWS EC2-instance,
 59 | #'     eg. t2.micro
 60 | #' @param awsSubnet character, AWS EC2-instance subnet, within a
 61 | #'     certain VPC
 62 | #' @param awsAmiId character, AMI(amazon machine image) ID for the
 63 | #'     Bioconductor-release version
 64 | #' @param awsSshKeyPair character, SSH key pair, to associate with
 65 | #'     your AWS EC2-instance
 66 | #' @param awsProfile character, indicates what profile to use while
 67 | #'     using AWS credentials
 68 | #' @param verbose logical, gives a verbose output of SSH connection
 69 | #'     attempt, default is FALSE.
 70 | #' @param ... Additional arguments, used to initialize BatchJobsParam.
 71 | #' @return AWSSnowParam object
 72 | #' @examples
 73 | #' \dontrun{
 74 | #'         ## Minimal example
 75 | #'         aws <- AWSBatchJobsParam(
 76 | #'                    workers = 2
 77 | #'                    awsCredentialsPath = "~/.aws/credentials"
 78 | #'                    awsInstanceType = "t2.micro"
 79 | #'                    awsSubnet = "subnet-d66a05ec"
 80 | #'                    awsAmiId = "ami-0454187e"
 81 | #'                    awsSshKeyPair = "mykey"
 82 | #'                )
 83 | #' }
 84 | #' @importFrom aws.ec2 my_ip
 85 | #' @importFrom aws.signature use_credentials
 86 | #' @importFrom ini read.ini
 87 | #' @exportClass AWSBatchJobsParam
 88 | ##
 89 | ## aws, starclusterConfigPaths exist: AWSBatchJobsParam() constructs a
 90 | ## valid object
 91 | ##
 92 | ## starclusterCredentialsPaths does not exist: create from arguments
 93 | 
 94 | .starcluster_option <-
 95 |     function(config, cluster_id, option_value, default_value)
 96 | {
 97 |     if (!is.null(default_value))
 98 |         default_value
 99 |     else
100 |         config[[cluster_id]][[option_value]]
101 | }
102 | 
103 | #' @export
104 | AWSBatchJobsParam <-
105 |     function(workers = NULL,
106 |              starclusterConfigPath = .STARCLUSTER_CONFIG_PATH,
107 |              startclusterClusterId = "smallcluster",
108 |              ## for bpsetup() only
109 |              awsInstanceType = NA_character_,
110 |              awsSubnet = NA_character_,
111 |              awsAmiId = NA_character_,
112 |              awsSshKeyPair = NA_character_,
113 |              awsCredentialsPath = "~/.aws/credentials",
114 |              awsProfile = "default",
115 |              ## for BatchJobsParam()
116 |              ...
117 |              )
118 | {
119 |     ## Zero Check: Cannot support Windows.
120 |     if (.Platform$OS.type == "windows") {
121 |         stop("'AWSBatchJobsParam' not supported on Windows")
122 |     }
123 | 
124 |     ## FIXME: On master node, when library(AWSParallel) is called, it does not need valid aws credntials to launch jobs.
125 |     ## Zero Check: Cannot work without AWS credentials
126 |     if (!file.exists(starclusterConfigPath)) {
127 |         credentialCheck <- file.exists(awsCredentialsPath) &&
128 |             any(grepl(awsProfile, readLines(awsCredentialsPath)))
129 |         if (!credentialCheck) {
130 |             stop("'AWSBatchJobsParam()' requires either 'startclusterConfig*' _or_ 'aws*' arguments",
131 |                  call.=FALSE)
132 |         }
133 |     }
134 | 
135 |     ## First check: If starcluster config exists use that
136 |     if (file.exists(starclusterConfigPath))
137 |     {
138 |         ## read config
139 |         config <- read.ini(starclusterConfigPath)
140 |         clusterId <- paste("cluster", startclusterClusterId)
141 |         ## extract awsInstanceType, awsSubnet, awsAmiId, awsSshKeyPair
142 |         awsInstanceType <- config[[clusterId]][["NODE_INSTANCE_TYPE"]]
143 |         awsSubnet <- config[[clusterId]][["SUBNET_IDS"]]
144 |         awsAmiId <- config[[clusterId]][["NODE_IMAGE_ID"]]
145 |         awsSshKeyPair <- config[[clusterId]][["KEYNAME"]]
146 |         workers <- .starcluster_option(
147 |             config, clusterId, "CLUSTER_SIZE", workers
148 |         )
149 |         cidr_ip <- config[["permission http"]][["CIDR_IP"]]
150 |     }
151 | 
152 |     ## Second Check:
153 |     ## If any of the AWS requirements are missing, if they are
154 |     ## initialize with empty values
155 |     setup <- !missing(awsInstanceType) || !missing(awsSubnet) ||
156 |         !missing(awsAmiId) || !missing(awsSshKeyPair)
157 | 
158 |     ## If workers are null give an integer value of 0
159 |     if (!setup){
160 |         if (is.null(workers)) {
161 |             workers <- 0L
162 |         }
163 |         awsInstanceType = awsSubnet = awsAmiId = awsSshKeyPair = ""
164 |     }
165 | 
166 |     ## ELSE, if arguments are given, use arguments
167 |     ## Initiate .AWSBatchJobsParam class
168 |     x <- .AWSBatchJobsParam(
169 |         BatchJobsParam(...),
170 |         workers = workers,
171 |         ## AWSBatchJobsParam fields
172 |         awsCredentialsPath = awsCredentialsPath,
173 |         awsInstanceType = awsInstanceType,
174 |         awsSubnet = awsSubnet,
175 |         awsAmiId = awsAmiId,
176 |         awsSshKeyPair = awsSshKeyPair,
177 |         awsProfile = awsProfile
178 |     )
179 |     validObject(x)
180 |     return(x)
181 | }
182 | 
183 | 
184 | 
185 | #' Get AWS Instance type.
186 | #'
187 | #' The possible instance types are listed in the
188 | #' document: https://aws.amazon.com/ec2/instance-types/.  The
189 | #' Bioconductor AMI's have been built using an m4.xlarge instance
190 | #' type.  Large computations are best supported on this type of
191 | #' instance.
192 | #'
193 | #' @param AWSBatchJobsParam object
194 | #'
195 | #' @return character
196 | #' @export
197 | awsInstanceType <-
198 |     function(x)
199 | {
200 |     x$awsInstanceType
201 | }
202 | 
203 | 
204 | #' Get path to AWS credentials
205 | #'
206 | #' @param AWSBatchJobsParam object
207 | #'
208 | #' @export
209 | awsCredentialsPath <-
210 |     function(x)
211 | {
212 |     x$awsCredentialsPath
213 | }
214 | 
215 | 
216 | #' Get number of workers in the cluster
217 | #'
218 | #' @param AWSBatchJobsParam object
219 | #'
220 | #' @export
221 | awsWorkers <-
222 |     function(x)
223 | {
224 |     x$workers
225 | }
226 | 
227 | 
228 | #' Get AWS AMI-ID of the launched instance. These need to be
229 | #' Bioconductor configured AMI's.
230 | #'
231 | #' @param AWSBatchJobsParam
232 | #'
233 | #' @export
234 | awsAmiId <-
235 |     function(x)
236 | {
237 |     x$awsAmiId
238 | }
239 | 
240 | 
241 | #' Get AWS Subnet within which the AWS EC2 instance was launched
242 | #'
243 | #' @param AWSBatchJobsParam
244 | #'
245 | #' @export
246 | awsSubnet <-
247 |      function(x)
248 | {
249 |     x$awsSubnet
250 | }
251 | 
252 | 
253 | #' Get the SSH public key path associted to the AWS EC2 instance.
254 | #'
255 | #' @param AWSBatchJobsParam
256 | #'
257 | #' @export
258 | awsSshKeyPair <-
259 |     function(x)
260 | {
261 |     x$awsSshKeyPair
262 | }
263 | 
264 | #' Get the awsProfile being used
265 | #'
266 | #' @param AWSBatchJobsParam
267 | #'
268 | #' @export
269 | awsProfile <-
270 |     function(x)
271 | {
272 |     x$awsProfile
273 | }
274 | 
275 | #' Setup a new AWS EC2 cluster
276 | #'
277 | #' The step bpsetup is required before using any of the conventional
278 | #' BiocParallel functions like bpstart, bpstop. It is used to setup
279 | #' or start a new or existing cluster on the user's AWS account. Once
280 | #' a cluster is up an running, it should be safely suspended or terminated
281 | #' using functionality like 'bpsuspend' and 'bpteardown'. NOTE: This function
282 | #' takes a while to process, depending on the number of workers needed
283 | #' it may take upto 4-5 minutes.
284 | #'
285 | #'
286 | #'
287 | #' @param x AWSBatchJobsParam object
288 | #' @param clustername character value given to the cluster.
289 | #' @export
290 | bpsetup <-
291 |     function(x, clustername="awsparallel")
292 | {
293 |     .config_starcluster(workers = awsWorkers(x),
294 |                         awsCredentialsPath = awsCredentialsPath(x),
295 |                         awsInstanceType = awsInstanceType(x),
296 |                         awsSubnet = awsSubnet(x),
297 |                         awsAmiId = awsAmiId(x),
298 |                         awsSshKeyPair = awsSshKeyPair(x),
299 |                         awsProfile = awsProfile(x),
300 |                         cidr_ip = "172.30.0.0/16"
301 |                         )
302 |     args <- c("start", clustername)
303 |     res <- system2("starcluster", args=args)
304 |     ## If res!=0 then fail.
305 |     if (res != 0) {
306 |         stop("Cluster failed to launch, please check the settings")
307 |     }
308 |     ## Once cluster is started transfer config file to master node
309 |     transferToCluster(clustername, "~/.starcluster/config",
310 |                       "~/.starcluster/config")
311 | }
312 | 
313 | #' Suspend an AWS EC2 cluster started using bpsetup
314 | #'
315 | #' bpsuspend is required to 'stop' an AWS Cluster, if the user
316 | #' has an intention of re-using it at a later time. It does NOT
317 | #' terminate the cluster. The clustername should match the argument
318 | #' used in bpstart.
319 | #'
320 | #' NOTE: bpsuspend stops the cluster, but it loses connection to resume it.
321 | #' Please refrain from using bpsuspend.
322 | #'
323 | #' @param x AWSBatchJobsParam object
324 | #' @param clustername character value given to the cluster.
325 | #' @export
326 | bpsuspend <-
327 |     function(x, clustername="awsparallel")
328 | {
329 |     warning("If you use 'bpsuspend()', you cannot restart the cluster using 'bpresume()', please check ?bpsuspend")
330 |     args <- c("stop", "--confirm", clustername)
331 |     res <- system2("starcluster", args=args)
332 |     ## Throw error if unsuccessful
333 |     if (res != 0) {
334 |         stop("Error suspending cluster. Please check your AWS",
335 |              "account for these instances.")
336 |     }
337 | }
338 | 
339 | ## FIXME: If awsparallel cluster already exists, when bpsetup is called,
340 | ## trigger bpresume
341 | ## FIXME: This is not working because of a starcluster bug, 
342 | ## "ERROR: No master node found!"
343 | bpresume <-
344 |     function(x, clustername="awsparallel")
345 |     {
346 |         stop("bpresume, does NOT work at this time")
347 |         args <- c("restart", clustername)
348 |         res <- system2("starcluster", args=args)
349 |         ## Throw error if unsuccessful
350 |         if (res != 0) {
351 |             stop("Error resuming cluster. Please check your AWS",
352 |                  "account to see if they have been terminated instead.")
353 |         }
354 |     }
355 | 
356 | ## Function to resume cluster
357 | ## bpresume2 <- 
358 | ##     function(x, clustername="awsparallel")
359 | ##     {
360 | ##         ### use credentials
361 | ##         aws.ec2::use_credentials(awsCredentialsPath(aws), profile=awsProfile(awS))
362 | ##         ### Discover instances
363 | ##         instances <- aws.ec2::describe_instances()
364 | ##         ### Find instances with "name" containing clustername
365 | ##         awsList <- ## List of nodes with paste0(clustername, "-master")
366 | ##         ### start instances
367 | ##         run_instances(awsList)
368 | ##     }
369 | 
370 | #' Teardown permanently (TERMINATE) the AWS cluster.
371 | #'
372 | #' bpteardown is used to completely remove the AWS cluster from
373 | #' the users AWS account. The user cannot retreive any data or
374 | #' reuse the cluster once bpteardown is started.
375 | #'
376 | #' We recommend using bpteardown, once the data analysis is done.
377 | #' This will regulate AWS account costs, unless the user intends to
378 | #' to reuse the cluster. If there is a need to reuse the cluster see,
379 | #' '?bpsuspend'.
380 | #'
381 | #' @param x AWSBatchJobsParam object
382 | #' @param clustername character value given to the cluster.
383 | #' @export
384 | bpteardown <-
385 |     function(x, clustername="awsparallel")
386 | {
387 |     args <- c("terminate", "-f", "--confirm", clustername)
388 |     res <- system2("starcluster", args=args)
389 |     if (res !=0 ) {
390 |         stop("Error terminating cluster. Please check your AWS",
391 |              "account for these instances or run bpteardown again.")
392 |     }
393 | }
394 | 
395 | ## TODO: Provide information for the config file to be SSH-ed into the master.
396 | ## 1. Save config file.
397 | ## 2. re-construct the AWSBatchJobsParam on the master from the config file.
398 | ## 3. Look at register.R from BiocParallel to see how to register latest
399 | ##    AWSBatchJobs param.
400 | #'
401 | setMethod("bpstart", "AWSBatchJobsParam",
402 |     function(x)
403 | {
404 |     if(.awsDetectMaster()) {
405 |         stop(
406 |             "SSH to Master node of batch jobs machine using",
407 |             "awsConnectToMaster()",
408 |             call. = FALSE
409 |         )
410 |     }
411 | })
412 | 
413 | 
414 | setMethod("bpstop", "AWSBatchJobsParam",
415 |     function(x)
416 | {
417 |     cat("bpstop is being called")
418 | })
419 | 


--------------------------------------------------------------------------------
/vignettes/AWSParallel-AWSBatchJobsParam-tutorial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "AWSParallel AWSBatchJobsParam Tutorial"
  3 | author: "Nitesh Turaga"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   BiocStyle::html_document:
  7 |     number_sections: yes
  8 |     theme: united
  9 |     toc_float: yes
 10 | vignette: >
 11 |   %\VignetteIndexEntry{Vignette Title}
 12 |   %\VignetteEngine{knitr::rmarkdown}
 13 |   %\VignetteEncoding{UTF-8}
 14 | ---
 15 | 
 16 | # Introduction
 17 | 
 18 | The AWSParallel package provides functionality to perform parallel
 19 | evaluation using AWS infrastructure, most importantly EC2. It also
 20 | internally uses `StarCluster` to deploy jobs on SGE. It extends
 21 | `BatchJobsParam` class in BiocParallel, and works with the same range of R
 22 | and Bioconductor objects as `BatchJobsParam`.
 23 | 
 24 | The goal of the AWSParallel package is allow the user to create a cluster
 25 | of EC2 machines on AWS, and run `bplapply` from one "master" node (EC2
 26 | instance) to submit jobs to a set of "worker" nodes (multiple EC2
 27 | instances). It is important to note that, both master and worker nodes are
 28 | AWS EC2 machines. A side-effect of the way we configure the required
 29 | software to enable batch job submission is, the "master" and "workers"
 30 | which act as the cluster, need to be spawned (started) from a Bioconductor
 31 | AMI.  The user will have to start an instance, manually, and use this
 32 | machine as cluster starter (primary machine where AWSParallel is being
 33 | run).
 34 | 
 35 | This package requires that the user have an Amazon AWS account that **costs
 36 | money** and requires a credit card to access. The AWS credentials provided
 37 | by the user also need access, to other AWS services as well, namely,
 38 | 
 39 |     1. IAM
 40 |     2. VPC
 41 |     3. EC2
 42 | 
 43 | We leave the responsibility to the user to figure out AWS, although many
 44 | helpful tutorials are pointed out in the *References* section.
 45 | 
 46 | # Quick Start
 47 | 
 48 | The quick start guide assumes you have your AWS access key, and secret
 49 | key in `~/.aws/credentials`. Please refer to the detailed section of
 50 | the vignette if these settings are not present. We are expecting at
 51 | this point, that you have launched the AMI (ami-18c0f562) provided by Bioconductor.
 52 | 
 53 | You have to use the AMI created by Bioconductor, which includes
 54 | starcluster and Bioc-devel to use this package. This will be your
 55 | **HOST** instance.
 56 | 
 57 | Load the **AWSParallel** library and create an `AWSBatchJobsParam`
 58 | object. This step is needed to **setup** your cluster on AWS.
 59 | 
 60 | The AMI required for correct configuration (Bioc-devel with
 61 | starcluster) is "ami-18c0f562".  The AWS credntials are needed on the
 62 | launched instance as well. They need to be in the default location, or
 63 | the path needs to be specified. Specify the instance type of your AWS
 64 | Cluster, the same instance type will apply to your master and
 65 | workers. Specify the subnet you want to use from your AWS account,
 66 | note that the master and workers need to be on the same
 67 | subnet. Specify the SSH key pair, if you don't have a key you use,
 68 | just create a new one for AWSParallel, and use that throughout. The
 69 | quickstart will launch a small instance for demonstration `t2.micro`
 70 | as larger instances cost more money.
 71 | 
 72 | ```{r, eval=FALSE}
 73 | ## Load the library
 74 | library(AWSParallel)
 75 | 
 76 | ## Construct AWSBatchJobsParam class
 77 | aws <- AWSBatchJobsParam(workers = 2,
 78 |                   awsCredentialsPath = "~/.aws/credentials",
 79 |                   awsInstanceType = "t2.micro",
 80 |                   awsSubnet = "subnet-d66a05ec",
 81 |                   awsAmiId = "ami-18c0f562",
 82 |                   awsSshKeyPair = "mykey",
 83 |                   awsProfile="default")
 84 | 
 85 | ## Print object to show structure
 86 | aws
 87 | ```
 88 | 
 89 | Start, use, and stop a cluster.
 90 | 
 91 | ```{r awsDetectMaster, eval=FALSE, echo=FALSE}
 92 | ## This is a conditional param used to evaluate the vignette
 93 | awsDetectMaster <- function()
 94 | {
 95 |         ## Get list of all instances on AWS account
 96 |     instances <- aws.ec2::describe_instances()
 97 |     ## Get hostname of local machine code is being run on
 98 |     hostname <- system2("hostname", stdout=TRUE)
 99 |     hostname <- gsub("-",".", sub("ip-","", hostname))
100 |     bool <- FALSE
101 |     for (i in seq_along(instances)) {
102 |         instancesSet = instances[[i]][["instancesSet"]]
103 |         for (j in seq_along(instancesSet)) {
104 |             privateIpAddress <- instancesSet[[j]][["privateIpAddress"]]
105 |             if (privateIpAddress == hostname) {
106 |                 bool <- TRUE
107 |             }
108 |         }
109 |     }
110 |     bool
111 | }
112 | 
113 | onMaster <- awsDetectMaster()
114 | ```
115 | 
116 | We now should start an AWS Cluster from our HOST instance. The setup
117 | step usually takes a few mins to start the cluster. The code chunk
118 | below shows you the functionality of controlling the AWS Cluster from
119 | the HOST node.
120 | 
121 | You can **setup** an AWS cluster with a master and a few workers using
122 | `bpsetup()`. Given that our current configuration has `workers=2`, the
123 | cluster contains **1 master** and **1 worker** node. This step
124 | produces quite a bit of verbose output about the launch of your
125 | cluster (don't be alarmed).
126 | 
127 | You can **suspend** the AWS cluster using `bpsuspend()`, this stops
128 | your AWS cluster but does not terminate the instances in the cluster.
129 | 
130 | You can **teardown** or terminate the AWS cluster using
131 | `bpteardown()`, this will terminate your AWS cluster and remove any
132 | data on the master or worker nodes that are not saved. You should use
133 | this option with caution.
134 | 
135 | ## Setup AWS Cluster
136 | 
137 | Setup the AWS cluster using `bpsetup`
138 | 
139 | ```{r conditional, eval=FALSE}
140 | 
141 | ## Setup AWS cluster (takes a few mins)
142 | bpsetup(aws)
143 | ```
144 | 
145 | ## Use AWS Cluster
146 | 
147 | Once the cluster is setup, you should log into your master node of you
148 | AWS cluster. This can be done using the following step on your host's
149 | command line.
150 | 
151 | _NOTE_: This is not an R command, you need to exit your R session and
152 | use this command once your AWS Cluster has successfully launched. If
153 | this command fails, then your key has not been setup properly or the
154 | previous previous commands haven't been used correctly.
155 | 
156 | ```
157 | starcluster sshmaster -u ubuntu awsparallel
158 | ```
159 | 
160 | Once you have logged into your **master** node, you may launch your
161 | jobs on the cluster. Start a new R session,
162 | 
163 | ```{r, eval=FALSE}
164 | ## Load the AWSParallel library
165 | library(AWSParallel)
166 | 
167 | ## Get the AWSBatchJobsParam which is already registered on your master node
168 | aws <- registered()[[1]]
169 | 
170 | ## Test the bplapply command with some function, this function 
171 | ## just prints out the hostname of the machine.
172 | FUN <- function(i) system("hostname", intern=TRUE)
173 | 
174 | ## Run a bplapply command with FUN, set the BPPARAM to aws
175 | ## This will submit jobs to your AWS cluster
176 | xx <- bplapply(1:100, FUN, BPPARAM = aws)
177 | 
178 | ## See the hostname of your cluster and how the jobs have been divided.
179 | table(unlist(xx))
180 | ```
181 | 
182 | Once you are done with submitting jobs on the AWS cluster, you need to
183 | suspend, or teardown, to avoid being charged by amazon when it is not
184 | in use. This is done on the HOST machine again.
185 | 
186 | ## Teardown AWS Cluster
187 | 
188 | Teardown or terminate the cluster once you have finished using it.
189 | 
190 | ```{r, eval=FALSE}
191 | bpteardown(aws)
192 | ```
193 | 
194 | # AWS settings
195 | 
196 | Settings required to get the package working on AWS.
197 | 
198 | ## Get AWS Credentials
199 | 
200 | To use AWSParallel, AWS credentials are a requirement. The credentials
201 | are given through [AWS Identity and Access management - IAM][]. The
202 | user needs an AWS access key and an AWS secret key.
203 | 
204 | These AWS credentials should be stored on your HOST machine accessible
205 | at `~/.aws/credentials`, as given in the documentation for
206 | [configuring AWS credentials][].
207 | 
208 | Example AWS credentials, which need to be in the file,
209 | "~/.aws/credentials".
210 | 
211 | 
212 |     [default]
213 |     aws_access_key_id=AKIAIOSFODNN7EXAMPLE
214 |     aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
215 | 
216 | 
217 | The AWS credentials provided to the package need access to a few
218 | components of the AWS account,
219 | 
220 | 1. IAM - This is used to get the credentials, change credentials,
221 |    activate/deactivate credentials for security reasons.
222 | 1. VPC - This is used to detect VPC's in the region, so that all the
223 |    instances launched are within the same VPC, and same subnet.
224 | 1. EC2 - This is used to launch, run, stop, and terminate instances as
225 |    needed.
226 | 
227 | ## AWS Profile
228 | 
229 | The setting `[default]` which you see on the AWS credentials,
230 | determines the profile of the AWS credentials. If you have multiple
231 | profiles, be sure to specify the correct profile in the argument to
232 | instantiate the AWSBatchJobsParam.
233 | 
234 | ## AWS Key Pair
235 | 
236 | User's also need to create a KeyPair which needs to be accessible on
237 | the machine being used. The Key Pair can be created using the
238 | [AWS-EC2 interface][].
239 | 
240 | It can also be done programmatically using the `AWSParallel` package
241 | but the functionality is imported from `aws.ec2`.
242 | 
243 | ```
244 | starcluster createkey mykey
245 | ```
246 | 
247 | The same key pair needs to be passed to your AWSBatchJobsParam. Notice
248 | that I pass in the same key "mykey" to my AWSBatchJobsParam as well.
249 | 
250 | ## Subnet
251 | 
252 | Every AWS account has a default *VPC* created when the account is
253 | started. This VPC is usually contained to one AWS Region. Most of the
254 | Bioconductor AMI's are located on the *AWS-Region US-EAST*, so
255 | starting your account with a VPC located in that region makes most
256 | sense.
257 | 
258 | If the VPC is created(by the user or amazon default), the account gets
259 | *Subnets* as well by default. For the `AWSBatchJobsParam` class to be
260 | created, the user has to specify the Subnet. If the subnet is not
261 | given, we use, the first one on the AWS account.
262 | 
263 | *NOTE:* The HOST, the *master* instance, and the *worker* instances
264 | need to be on the same VPC and subnet with permissible security
265 | groups.  Without this the connection established between the machines
266 | launched on AWS does not work.
267 | 
268 | # Working with the AWSParallel - AWSBatchJobsParam
269 | 
270 | This section describes the usage of AWSBatchJobsParam in a more
271 | detailed way. The steps below highlight the ideal way to use this
272 | package.
273 | 
274 | ## Starting a HOST instance on AWS
275 | 
276 | The user needs to start a HOST instance on AWS directly from his AWS
277 | account using the AWS UI to set up the host. This host instance will
278 | be used as an interface to control the AWS Cluster.
279 | 
280 | The HOST instance is just a machine to interact with your AWSCluster.
281 | It also allows windows users to be able to use the AWSParallel package
282 | without any issues.
283 | 
284 | ### Steps to prepare HOST instance.
285 | 
286 | This process can be done ONE time, and the instance can be stopped
287 | without being terminated. This **HOST** instance can be reused.
288 | 
289 | 1. Create a new amazon EC2 instance which is going to be the **HOST**
290 |    node, by choosing the AMI-ID with starcluster and bioc-devel, as
291 |    given on this page,
292 |    http://bioconductor.org/help/bioconductor-cloud-ami/#ami_ids.
293 |    
294 |     The size of the HOST can be a t2. micro, which is a free tier
295 |     of the AWS instance sizes. 
296 | 
297 |     Follow the steps in the ec2 management console to launch the image. 
298 | 
299 |     ![Choose EC2 Instance Type](choose-instance-type.png)
300 | 
301 |     ![Configure EC2 instance with security settings](configure-instance.png)
302 | 
303 |     ![Add storage as per usage requirements](add-storage.png)
304 | 
305 |     You are required to create a Keypair if you don't have one
306 |     already. This can be done using this [AWS-EC2 interface][]
307 |     console.
308 | 
309 |     ![Review and launch your HOST instance](review-and-launch.png)
310 | 
311 | 1. Name your HOST instance. This is important for getting your
312 |    instances settings. Call it "AWSParallelHOST", to make it easier
313 |    to recognize.
314 | 
315 | 1. SSH into the instance, which will be 
316 | 
317 |         ssh -i ~/.ssh/mykey.pem ubuntu@34.239.248.175
318 |     
319 | 1. Once you are logged in, there are a few things you need to set
320 |    up. Install or update the `AWSParallel` package in your R prompt.
321 | 
322 |         biocLite(`AWSParallel`)
323 |     
324 |     If you have a dependency error installing AWSParallel, because of
325 |     missing dependencies, i.e `aws.ec2` and `aws.signature`, try
326 | 
327 |         install.packages(
328 |             "aws.ec2", repos = c(getOption("repos"), 
329 |             "http://cloudyr.github.io/drat")
330 |         )
331 | 
332 | 1. Copy your AWS credentials to this machine, by writing your
333 |    credentials in the `~/.aws/credentials` directory.
334 | 
335 | 1. Copy you AWS SSH keypair file (.pem) file to the machine as
336 |    well. The `.pem` file needs to have permissions to read only, i.e
337 |    run `chmod 400 AWSparallel-test-keypair.pem` if you get a
338 |    permissions error.
339 |  
340 | 
341 | ### HOST instance size
342 | 
343 | Choosing a **t2.micro** is enough for this instance, as it is only
344 | required to switch on and off your AWS Cluster.
345 | 
346 | If you have data which needs to be used on the AWS Cluster, you'd
347 | transfer it on to the HOST before you teardown(terminate) your AWS
348 | Cluster. Remember the storage size has no relation with instance
349 | compute size. If you add a storage volume to you t2.micro, it will
350 | cost money, and does not remain in the free tier anymore.
351 | 
352 | ## Functionality of HOST instance
353 | 
354 | ### bpstart()
355 | 
356 | It used to start an AWS Cluster and takes as an argument the
357 | AWSBatchJobsParam object, which needs to be initialized with the
358 | correct credentials.
359 | 
360 | The output of a successful `bpstart()` is quite verbose as it acts as
361 | the interface to starcluster and the users AWS account to start the
362 | master and worker nodes, and set them up with the correct
363 | configuration.
364 | 
365 | ```{r, eval=FALSE}
366 | bpstart(aws)
367 | ```
368 | 
369 | ### bpsuspend()
370 | 
371 | If the user intends to reuse the AWS Cluster after finishing the
372 | compute jobs, the cluster can be suspended and restarted at a later
373 | time. The **bpsuspend** functionality is similar to the **stop** in
374 | the AWS account.
375 | 
376 | ```{r, eval=FALSE}
377 | bpsuspend(aws)
378 | ```
379 | 
380 | ### bpresume()
381 | 
382 | Resume your AWS Cluster from the HOST machine using `bpresume` with
383 | the registered AWS param.
384 | 
385 | When you log into your HOST node, your object for your
386 | AWSBatchJobsParam should register automatically after loading the
387 | `library(AWSParallel)`. It should be the first object in your
388 | registry.
389 | 
390 | `bpresume` will fail if the cluster was previously torn down. You
391 | might have to `bpsetup` again.
392 | 
393 | ```{r, eval=FALSE}
394 | library(AWSParallel)
395 | aws <- registered()[[1]]
396 | bpresume(aws)
397 | ```
398 | 
399 | NOTE: `bpresume` is not currently functional. It will be operational
400 | in other iterations of the package.
401 | 
402 | ### bpteardown()
403 | 
404 | Purge your AWS cluster from the HOST, using `bpteardown`. This removes
405 | the master and the worker nodes. Remember that there is no coming back
406 | after this step, your cluster is permanently lost.
407 | 
408 | ```{r, eval=FALSE}
409 | bpteardown(aws)
410 | ```
411 | 
412 | ## Using AWS Cluster from HOST instance
413 | 
414 | Start by SSH-ing into to the master node of your AWS Cluster
415 | 
416 | ```
417 | starcluster sshmaster -u ubuntu awsparallel
418 | ```
419 | 
420 | ## Using AWS Cluster from MASTER mode
421 | 
422 | Once you are logged into the master node of your AWS cluster, you can
423 | start an R session and start sending jobs through the
424 | AWSBatchJobsParam available on your registry.
425 | 
426 | ```{r, eval=FALSE}
427 | library(AWSParallel)
428 | 
429 | aws <- registered()[[1]]
430 | 
431 | FUN  <- function(i) system("hostname", intern=TRUE)
432 | 
433 | xx  <- bplapply(1:100, FUN, BPPARAM=aws)
434 | 
435 | table(unlist(xx))
436 | ```
437 | 
438 | ### Choosing AWS EC2 Instance Size -- MASTER
439 | 
440 | The size of an AWS-EC2 Instance gives you access to the required
441 | amount of compute power. Larger instances usually have a higher
442 | capacity for computing, but also cost more money. The [AWS Pricing][]
443 | is given in the documentation, and we recommend you take a look at it.
444 | 
445 | The Bioconductor AMI's have been built using the *m4.xlarge*
446 | machine. So ideally to run a large computation, and use every package
447 | available in Bioconductor you would use your worker of size
448 | *m4.xlarge*. If you are using a limited set of packages, or you just
449 | need to run a job in parallel, it would be easier to take a look at
450 | the [Instance types][] and decide the appropriate size for your needs.
451 | 
452 | 
453 | ### Choosing AWS EC2 Instance size -- WORKER
454 | 
455 | The size of the master and worker nodes are going to be the same in
456 | this version of the package. In the following releases we will allow
457 | for master and workers to be of different size.
458 |    
459 | # Instructions for Windows machines.
460 | 
461 | There should be no difference for a windows user vs any other
462 | operating system as the entire process takes place in the AWS
463 | ecosystem.
464 | 
465 | # Advanced Tips
466 | 
467 | 1. If you choose to keep your cluster isolated from any other work you
468 | have going on your VPC. Please create a new VPC and use the subnets in
469 | that VPC to start your AWSParallel param.
470 | 
471 | 1. Give the AWSParallel param, the new subnet, and a security group as
472 | described in the security group section.
473 | 
474 | 1. It is important that this cluster configuration is used only to
475 | launch parallel jobs. If you launch a large job only on the "master"
476 | node, there is no increment in speed if the job is not parallelized.
477 | It is vital that large jobs are not launched on the master node.
478 | 
479 | 
480 | # Session Info
481 | 
482 | ```{r}
483 | sessionInfo()
484 | ```
485 | 
486 | [AWS Identity and Access management - IAM]:http://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys
487 | 
488 | [configuring AWS credentials]:http://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html
489 | 
490 | [AWS-EC2 interface]:http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html#having-ec2-create-your-key-pair
491 | 
492 | [AWS Pricing]:https://aws.amazon.com/ec2/pricing/
493 | 
494 | [Instance types]: https://aws.amazon.com/ec2/instance-types/
495 | 


--------------------------------------------------------------------------------