├── samples
    ├── compress
    │   ├── bzip2_compress_file_list.txt
    │   ├── gzip_compress_file_list.txt
    │   ├── gzip_compress_config.sh
    │   ├── bzip2_compress_config.sh
    │   ├── gzip_decompress_config.sh
    │   ├── bzip2_decompress_config.sh
    │   ├── bzip2_decompress_file_list.txt
    │   └── gzip_decompress_file_list.txt
    └── samtools
    │   ├── samtools_index_config.sh
    │   └── samtools_index_file_list.txt
├── src
    ├── common
    │   ├── logging.sh
    │   └── gcs_util.sh
    ├── samtools
    │   ├── do_samtools.sh
    │   ├── task_samtools.sh
    │   └── launch_samtools.sh
    └── compress
    │   ├── do_compress.sh
    │   ├── task_compress.sh
    │   └── launch_compress.sh
├── bin
    ├── list_all_nodes.py
    ├── list_all_instances.py
    ├── attach_disk_to_nodes.sh
    ├── install_crcmod_on_nodes.sh
    ├── qconf.mod.sh
    ├── mount_disk_on_nodes.sh
    ├── sanitize_known_hosts.py
    ├── ensure_cluster_size.py
    ├── remove_terminated_nodes.py
    ├── cluster_util.py
    └── cluster_monitor.sh
├── README.md
├── tools
    └── array_job_monitor.sh
└── LICENSE


/samples/compress/bzip2_compress_file_list.txt:
--------------------------------------------------------------------------------
1 | gs://genomics-public-data/platinum-genomes/vcf/NA12877_S1.genome.vcf
2 | gs://genomics-public-data/platinum-genomes/vcf/NA12878_S1.genome.vcf
3 | gs://genomics-public-data/platinum-genomes/vcf/NA12879_S1.genome.vcf
4 | gs://genomics-public-data/platinum-genomes/vcf/NA12880_S1.genome.vcf
5 | gs://genomics-public-data/platinum-genomes/vcf/NA12881_S1.genome.vcf
6 | gs://genomics-public-data/platinum-genomes/vcf/NA12882_S1.genome.vcf
7 | 


--------------------------------------------------------------------------------
/samples/compress/gzip_compress_file_list.txt:
--------------------------------------------------------------------------------
1 | gs://genomics-public-data/platinum-genomes/vcf/NA12877_S1.genome.vcf
2 | gs://genomics-public-data/platinum-genomes/vcf/NA12878_S1.genome.vcf
3 | gs://genomics-public-data/platinum-genomes/vcf/NA12879_S1.genome.vcf
4 | gs://genomics-public-data/platinum-genomes/vcf/NA12880_S1.genome.vcf
5 | gs://genomics-public-data/platinum-genomes/vcf/NA12881_S1.genome.vcf
6 | gs://genomics-public-data/platinum-genomes/vcf/NA12882_S1.genome.vcf
7 | 


--------------------------------------------------------------------------------
/samples/samtools/samtools_index_config.sh:
--------------------------------------------------------------------------------
 1 | # samtools_index_config.sh
 2 | #
 3 | # Configuration for a job which takes in a list of BAM files
 4 | # in Google Cloud Storage, uses "samtools index" to create a
 5 | # a BAM index file, and pushes the index to Google Cloud Storage.
 6 | 
 7 | export SAMTOOLS_OPERATION="index"
 8 | 
 9 | export INPUT_LIST_FILE=./samples/samtools/samtools_index_file_list.txt
10 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/samtools_index
11 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/samtools_index
12 | 


--------------------------------------------------------------------------------
/samples/compress/gzip_compress_config.sh:
--------------------------------------------------------------------------------
 1 | # gzip_compress.sh
 2 | #
 3 | # Configuration for a job which takes in a list of uncompressed
 4 | # files in Google Cloud Storage, compresses them using gzip, and uploads
 5 | # the compressed versions to Google Cloud Storage.
 6 | 
 7 | export COMPRESS_OPERATION="compress"   # compress | decompress
 8 | export COMPRESS_TYPE="gzip"            # gzip | bzip2
 9 | export COMPRESS_EXTENSION=".gz"        # .gz | .bz2
10 | 
11 | export INPUT_LIST_FILE=./samples/compress/gzip_compress_file_list.txt
12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_gzip
13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_gzip
14 | 


--------------------------------------------------------------------------------
/samples/compress/bzip2_compress_config.sh:
--------------------------------------------------------------------------------
 1 | # bzip2_compress.sh
 2 | #
 3 | # Configuration for a job which takes in a list of uncompressed
 4 | # files in Google Cloud Storage, compresses them using bzip2, and uploads
 5 | # the compressed versions to Google Cloud Storage.
 6 | 
 7 | export COMPRESS_OPERATION="compress"   # compress | decompress
 8 | export COMPRESS_TYPE="bzip2"           # gzip | bzip2
 9 | export COMPRESS_EXTENSION=".bz2"       # .gz | .bz2
10 | 
11 | export INPUT_LIST_FILE=./samples/compress/bzip2_compress_file_list.txt
12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_bzip2
13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_bzip2
14 | 


--------------------------------------------------------------------------------
/samples/compress/gzip_decompress_config.sh:
--------------------------------------------------------------------------------
 1 | # gzip_decompress.sh
 2 | #
 3 | # Configuration for a job which takes in a list of gzip compressed
 4 | # files in Google Cloud Storage, decompresses them, and uploads
 5 | # the decompressed versions to Google Cloud Storage.
 6 | 
 7 | export COMPRESS_OPERATION="decompress" # compress | decompress
 8 | export COMPRESS_TYPE="gzip"            # gzip | bzip2
 9 | export COMPRESS_EXTENSION=".gz"        # .gz | .bz2
10 | 
11 | export INPUT_LIST_FILE=./samples/compress/gzip_decompress_file_list.txt
12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_gzipd
13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_gzipd
14 | 


--------------------------------------------------------------------------------
/samples/compress/bzip2_decompress_config.sh:
--------------------------------------------------------------------------------
 1 | # bzip2_decompress.sh
 2 | #
 3 | # Configuration for a job which takes in a list of bzip2 compressed
 4 | # files in Google Cloud Storage, decompresses them, and uploads
 5 | # the decompressed versions to Google Cloud Storage.
 6 | 
 7 | export COMPRESS_OPERATION="decompress" # compress | decompress
 8 | export COMPRESS_TYPE="bzip2"           # gzip | bzip2
 9 | export COMPRESS_EXTENSION=".bz2"       # .gz | .bz2
10 | 
11 | export INPUT_LIST_FILE=./samples/compress/bzip2_decompress_file_list.txt
12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_bzip2d
13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_bzip2d
14 | 


--------------------------------------------------------------------------------
/samples/compress/bzip2_decompress_file_list.txt:
--------------------------------------------------------------------------------
1 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG02291/cg_data/ASM_blood/vcfBeta-GS000017105-ASM.vcf.bz2
2 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG01974/cg_data/ASM_blood/vcfBeta-GS000017158-ASM.vcf.bz2
3 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG00621/cg_data/ASM_lcl/vcfBeta-GS000017116-ASM.vcf.bz2
4 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG00625/cg_data/ASM_lcl/vcfBeta-GS000017120-ASM.vcf.bz2
5 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/NA12801/cg_data/ASM_lcl/vcfBeta-GS000016407-ASM.vcf.bz2
6 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/NA12752/cg_data/ASM_lcl/vcfBeta-GS000016413-ASM.vcf.bz2
7 | 


--------------------------------------------------------------------------------
/samples/samtools/samtools_index_file_list.txt:
--------------------------------------------------------------------------------
1 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/pilot2_high_cov_GRCh37_bams/data/NA12878/alignment/NA12878.chrom9.SOLID.bfast.CEU.high_coverage.20100125.bam
2 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/pilot2_high_cov_GRCh37_bams/data/NA12878/alignment/NA12878.chrom1.LS454.ssaha2.CEU.high_coverage.20100311.bam
3 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom11.SOLID.corona.SRP000032.2009_08.bam
4 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom12.SOLID.corona.SRP000032.2009_08.bam
5 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom10.SOLID.corona.SRP000032.2009_08.bam
6 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chromX.SOLID.corona.SRP000032.2009_08.bam
7 | 


--------------------------------------------------------------------------------
/samples/compress/gzip_decompress_file_list.txt:
--------------------------------------------------------------------------------
1 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20130723_phase3_wg/cornell/ALL.ChrY.Cornell.20130502.SNPs.Genotypes.vcf.gz
2 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20140708_previous_phase3/v2_vcfs/ALL.chr21.phase3_shapeit2_mvncall_integrated_v2.20130502.genotypes.vcf.gz
3 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20140708_previous_phase3/v1_vcfs/ALL.chr21.phase3_shapeit2_mvncall_integrated.20130502.genotype.vcf.gz
4 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/20130502/supporting/input_callsets/um/ALL.chr22.got_cloud.20130502.indels.integrated.sites.vcf.gz
5 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110721_exome_call_sets/bcm/ALL.BCM_Illumina_Mosaik_ontarget_plus50bp_822.20110521.snp.exome.genotypes.vcf.gz
6 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/20130502/supporting/input_callsets/bi/ALL.chr16.broad.mapping.20130502.snps_indels.low_coverage.sites.vcf.gz
7 | 


--------------------------------------------------------------------------------
/src/common/logging.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # logging.sh
18 | #
19 | # Provides basic logging services.
20 | # Client's of this utility script should:
21 | #  * Set LOGGING_LOG_FILE
22 | #  * Call logging::log to write messages to the log
23 | #  * Call logging::emit to write to stdout and to the log
24 | 
25 | # logging::log
26 | #
27 | # The log function will echo the input parameters to the LOGGING_LOG_FILE
28 | function logging::log() {
29 |   if [[ -n ${LOGGING_LOG_FILE:-} ]]; then
30 |     echo "${@}" >> ${LOGGING_LOG_FILE}
31 |   fi
32 | }
33 | readonly -f logging::log
34 | 
35 | # logging::emit
36 | #
37 | # The emit function will echo the input parameters to stdout
38 | # and will also emit the input to the LOGGING_LOG_FILE
39 | function logging::emit() {
40 |   echo "${@}"
41 |   logging::log ${@}
42 | }
43 | readonly -f logging::emit
44 | 


--------------------------------------------------------------------------------
/bin/list_all_nodes.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # list_all_nodes.py
18 | #
19 | # Utility script that returns a list of elasticluster node names
20 | # for a cluster. The "node type" can optionally be specified.
21 | 
22 | import elasticluster
23 | import elasticluster.conf
24 | from elasticluster.__main__ import ElastiCluster
25 | 
26 | 
27 | import sys
28 | 
29 | # Check usage
30 | if len(sys.argv) < 2 or len(sys.argv) > 3:
31 |   print "Usage: {} [cluster] <node_type>".format(sys.argv[0])
32 |   sys.exit(1)
33 | 
34 | cluster_name=sys.argv[1]
35 | node_type=sys.argv[2] if len(sys.argv) > 2 else None
36 | 
37 | # Create the elasticluster configuration endpoint
38 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
39 | 
40 | # Lookup the cluster
41 | cluster = creator.load_cluster(cluster_name)
42 | 
43 | # Emit the node names
44 | for node in cluster.get_all_nodes():
45 |   if not node_type or node['kind'] == node_type:
46 |     print node['name']
47 | 
48 | 


--------------------------------------------------------------------------------
/bin/list_all_instances.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # list_all_instances.py
18 | #
19 | # Utility script that returns a list of elasticluster instance names
20 | # for a cluster. The "node type" can optionally be specified.
21 | 
22 | import elasticluster
23 | import elasticluster.conf
24 | from elasticluster.__main__ import ElastiCluster
25 | 
26 | 
27 | import sys
28 | 
29 | # Check usage
30 | if len(sys.argv) < 2 or len(sys.argv) > 3:
31 |   print "Usage: {} [cluster] <node_type>".format(sys.argv[0])
32 |   sys.exit(1)
33 | 
34 | cluster_name=sys.argv[1]
35 | node_type=sys.argv[2] if len(sys.argv) > 2 else None
36 | 
37 | # Create the elasticluster configuration endpoint
38 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
39 | 
40 | # Lookup the cluster
41 | cluster = creator.load_cluster(cluster_name)
42 | 
43 | # Emit the node names
44 | for node in cluster.get_all_nodes():
45 |   if not node_type or node['kind'] == node_type:
46 |     print node['instance_id']
47 | 
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | grid-computing-tools
 2 | ====================
 3 | 
 4 | The grid-computing-tools repo is intended to be a place for scripts and
 5 | recipes for solving some very common issues, which typically fall under
 6 | the category of "simple for a few files, hard for many files."
 7 | Examples include:
 8 | 
 9 | * I have many VCFs in Cloud Storage that I need to (de)compress
10 | * I have many VCFs in Cloud Storage that have something wrong with the header
11 | * I have many BAMs in Cloud Storage for which I need to compute index files
12 |  
13 | grid-computing-tools components
14 | -------------------------------
15 | 
16 | The primary components of the grid-computing-tools examples are:
17 | 
18 | * [Google Cloud Storage](https://cloud.google.com/storage/) - location of source input files and destination for output files
19 | * [Google Compute Engine](https://cloud.google.com/compute/) - virtual machines in the cloud
20 | * [Grid Engine](http://gridengine.info/) - job scheduling software to distribute commands across a cluster of virtual machines
21 | 
22 | The approach here is intended to provide a familiar environment to
23 | computational scientists who are accustomed to using Grid Engine to
24 | submit jobs to fixed-size clusters available at their research institution.
25 | 
26 | Available Tools
27 | ---------------
28 | Documentation for the tools in this repo can be found at
29 | http://googlegenomics.readthedocs.org/
30 | 
31 | The following tools are available:
32 | 
33 | * [Compress/Decompress files in Google Cloud Storage](http://googlegenomics.readthedocs.org/en/latest/use_cases/compress_or_decompress_many_files/index.html)
34 | * [With SAMtools index BAM files in Google Cloud Storage](http://googlegenomics.readthedocs.org/en/latest/use_cases/run_samtools_over_many_files/index.html)
35 | 


--------------------------------------------------------------------------------
/bin/attach_disk_to_nodes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # attach_my_disk.sh
18 | #
19 | # Utility script that attaches a disk read-only to each node of a cluster.
20 | # The "node type" can optionally be specified such that, for example,
21 | # the operation can be restricted to all "compute" nodes in the cluster.
22 | 
23 | set -o errexit
24 | set -o nounset
25 | 
26 | if [[ $# -lt 3 ]]; then
27 |   >&2 echo "Usage: ${0} [cluster] [disk_name] [zone] <node_type>"
28 |   exit 1
29 | fi
30 | 
31 | readonly CLUSTER=${1}
32 | readonly DISK_NAME=${2}
33 | readonly ZONE=${3}
34 | readonly NODE_TYPE=${4:-}
35 | 
36 | # Use the list_all_instances.py python script to get the list of instances
37 | readonly SCRIPT_DIR=$(dirname ${0})
38 | readonly INSTANCES=$(
39 |   python ${SCRIPT_DIR}/list_all_instances.py ${CLUSTER} ${NODE_TYPE})
40 | 
41 | # Sequentially connect to the nodes and run the command
42 | for INSTANCE_NAME in ${INSTANCES}; do
43 |   echo "Attaching disk ${DISK_NAME} to instance ${INSTANCE_NAME}"
44 |   gcloud compute instances attach-disk ${INSTANCE_NAME} \
45 |     --disk=${DISK_NAME} --device-name=${DISK_NAME} --zone=${ZONE} \
46 |     --mode ro
47 | done
48 | 
49 | 


--------------------------------------------------------------------------------
/bin/install_crcmod_on_nodes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # install_crcmod.sh
18 | #
19 | # Utility script that connects to each node of a cluster
20 | # and installs the python crcmod module required by gsutil
21 | # to download multi-component objects.
22 | # The "node type" can optionally be specified such that, for example,
23 | # the operation can be restricted to all "compute" nodes in the cluster.
24 | 
25 | set -o errexit
26 | set -o nounset
27 | 
28 | if [[ $# -lt 1 ]]; then
29 |   >&2 echo "Usage: ${0} [cluster]"
30 |   exit 1
31 | fi
32 | 
33 | readonly CLUSTER=${1}
34 | 
35 | # Set of commands for Debian and Ubuntu as per "gsutil help crcmod"
36 | readonly COMMANDS='
37 | sudo apt-get update --yes
38 | sudo apt-get install --yes gcc python-dev python-setuptools
39 | sudo easy_install -U pip
40 | sudo pip uninstall --yes crcmod
41 | sudo pip install -U crcmod
42 | '
43 | 
44 | # Use the list_all_nodes.py python script to get the list of instances
45 | readonly SCRIPT_DIR=$(dirname ${0})
46 | readonly NODES=$(python ${SCRIPT_DIR}/list_all_nodes.py ${CLUSTER})
47 | 
48 | # Sequentially connect to the nodes and run the commands
49 | for NODE in ${NODES}; do
50 |   elasticluster ssh ${CLUSTER} "${COMMANDS}" -n ${NODE}
51 | done
52 | 
53 | 


--------------------------------------------------------------------------------
/bin/qconf.mod.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # qconf.mod.sh
18 | #
19 | # Variant of the script suggested here:
20 | #  http://gridscheduler.sourceforge.net/howto/scripting.html
21 | #
22 | # to allow for setting configuration options programmatically.
23 | #
24 | # Example:
25 | #   qconf.mod.sh -mconf global reschedule_unknown 00:05:00
26 |  
27 | set -o errexit
28 | set -o nounset
29 | 
30 | if [[ $# -eq 0 ]]; then
31 |   echo "Usage: ${0} [qconf_command] [host|global] [qconf_param] [qconf_value]"
32 |   exit 1
33 | fi
34 | 
35 | # This script gets invoked directly by the user with the command-line
36 | # noted above.
37 | #
38 | # The script then sets itself as the EDITOR and executes "qconf".
39 | # qconf will then call this script with one command-line parameter
40 | # (a temporary file name).
41 | 
42 | if [[ -z ${QCONF_PARAMETER:-} ]]; then
43 |   readonly COMMAND=${1}
44 |   readonly HOST=${2}
45 |   export QCONF_PARAMETER=${3}
46 |   export QCONF_VALUE=${4}
47 | 
48 |   EDITOR=${0} \
49 |     qconf ${COMMAND} ${HOST}
50 | else
51 |   # Sleep 1 second to ensure that the file modification time changes
52 |   sleep 1
53 | 
54 |   # Update the temp file passed on the command-line by qconf
55 |   readonly QCONF_TEMP_FILE=${1}
56 |   sed -i \
57 |     -e "/^${QCONF_PARAMETER} /d;\$a${QCONF_PARAMETER} ${QCONF_VALUE}" \
58 |     ${QCONF_TEMP_FILE}
59 | fi
60 | 
61 | 


--------------------------------------------------------------------------------
/bin/mount_disk_on_nodes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # mount_my_disk.sh
18 | #
19 | # Utility script that connects to each node of a cluster
20 | # and mounts the specified disk read-only.
21 | # The "node type" can optionally be specified such that, for example,
22 | # the operation can be restricted to all "compute" nodes in the cluster.
23 | 
24 | set -o errexit
25 | set -o nounset
26 | 
27 | if [[ $# -lt 3 ]]; then
28 |   >&2 echo "Usage: ${0} [cluster] [disk_name] [mount_point] <node_type>"
29 |   exit 1
30 | fi
31 | 
32 | readonly CLUSTER=${1}
33 | readonly DISK_NAME=${2}
34 | readonly MOUNT_POINT=${3}
35 | readonly NODE_TYPE=${4:-}
36 | 
37 | # Set of commands for Debian and Ubuntu as per "gsutil help crcmod"
38 | readonly COMMANDS='
39 | if ! mount -l | grep "'${MOUNT_POINT}'"; then
40 |   sudo mkdir -p "'${MOUNT_POINT}'"
41 |   sudo chmod 777 "'${MOUNT_POINT}'"
42 |   sudo mount -o ro /dev/disk/by-id/google-'"${DISK_NAME}"' '"${MOUNT_POINT}"'
43 | fi
44 | '
45 | 
46 | # Use the list_all_nodes.py python script to get the list of instances
47 | readonly SCRIPT_DIR=$(dirname ${0})
48 | readonly NODES=$(
49 |   python ${SCRIPT_DIR}/list_all_nodes.py ${CLUSTER} ${NODE_TYPE})
50 | 
51 | # Sequentially connect to the nodes and run the commands
52 | for NODE in ${NODES}; do
53 |   echo "Mount ${DISK_NAME} on ${NODE}:${MOUNT_POINT}"
54 |   elasticluster ssh ${CLUSTER} "${COMMANDS}" -n ${NODE}
55 | done
56 | 
57 | 


--------------------------------------------------------------------------------
/bin/sanitize_known_hosts.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # sanitize_known_hosts.py
18 | #
19 | # Lists all instances of an Elasticluster cluster and rewrites
20 | # the known_hosts file with only those members.
21 | 
22 | import elasticluster
23 | import elasticluster.conf
24 | from elasticluster.__main__ import ElastiCluster
25 | 
26 | import paramiko
27 | 
28 | import sys
29 | 
30 | # Check usage
31 | if len(sys.argv) != 2:
32 |   print "Usage: {} [cluster]".format(sys.argv[0])
33 |   sys.exit(1)
34 | 
35 | cluster_name=sys.argv[1]
36 | 
37 | # Create the elasticluster configuration endpoint
38 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
39 | 
40 | # Lookup the cluster
41 | cluster = creator.load_cluster(cluster_name)
42 | 
43 | # Get the list of IP addresses
44 | ip_addrs = [node.preferred_ip for node in cluster.get_all_nodes()]
45 | print "Known ip addresses for cluster %s" % cluster_name
46 | print ip_addrs
47 | 
48 | try:
49 |   keys = paramiko.hostkeys.HostKeys(cluster.known_hosts_file)
50 | except IOError as e:
51 |   print e
52 |   sys.exit(1)
53 | 
54 | print "Keyfile %s loaded" % cluster.known_hosts_file
55 | 
56 | new_keys = paramiko.hostkeys.HostKeys()
57 | 
58 | for ip_addr in ip_addrs:
59 |   node_host_keys = keys.lookup(ip_addr)
60 |   if node_host_keys:
61 |     for key_type in node_host_keys.keys():
62 |       new_keys.add(node_host_keys._hostname, key_type, node_host_keys[key_type])
63 | 
64 | print "Saving sanitized keyfile %s" % cluster.known_hosts_file
65 | new_keys.save(cluster.known_hosts_file)
66 | 


--------------------------------------------------------------------------------
/bin/ensure_cluster_size.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # ensure_cluster_size.py
18 | #
19 | # Makes a pass over the specified cluster and adds nodes such that the
20 | # number of nodes is at least as high (hopefully equal to) the value in
21 | # the cluster configuration.
22 | 
23 | import elasticluster
24 | import elasticluster.conf
25 | from elasticluster.__main__ import ElastiCluster
26 | 
27 | import cluster_util
28 | 
29 | import os
30 | import sys
31 | 
32 | # Check usage
33 | if len(sys.argv) != 2:
34 |   print "Usage: {} [cluster]".format(sys.argv[0])
35 |   sys.exit(1)
36 | 
37 | cluster_name=sys.argv[1]
38 | 
39 | # Testing modes
40 | #
41 | # DRYRUN=1: do not add any nodes, just display a log of the operations
42 | #           that would occur
43 | 
44 | dryrun=os.environ['DRYRUN'] if 'DRYRUN' in os.environ else None
45 | 
46 | # BEGIN MAIN
47 | 
48 | # Create the elasticluster configuration endpoint
49 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
50 | 
51 | # Lookup the cluster
52 | cluster = creator.load_cluster(cluster_name)
53 | cluster.update()
54 | 
55 | print "*********************"
56 | print "Checking cluster size"
57 | print "*********************"
58 | 
59 | target_nodes = cluster_util.get_desired_cluster_nodes(cluster_name)
60 | 
61 | for kind in target_nodes:
62 |   has_count = len(cluster.nodes[kind]) if kind in cluster.nodes else 0
63 |   print "Node type (%s): Has: %d, Should have: %d" % (
64 |     kind, has_count, target_nodes[kind])
65 | 
66 |   diff = target_nodes[kind] - has_count
67 |   if diff > 0:
68 |     print "Adding new nodes of type %s" % kind
69 |     print
70 |     if not dryrun:
71 |       cluster_util.run_elasticluster(
72 |         ['resize', cluster_name,
73 |            '-a' '%d:%s' % (diff, kind),
74 |            '-t', cluster_name])
75 |   elif diff < 0:
76 |     print "WARNING: There are more nodes of type %s than configured" % kind
77 | 


--------------------------------------------------------------------------------
/src/samtools/do_samtools.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # do_samtools.sh
18 | #
19 | # Copies one or more files from GCS to disk,
20 | # runs a samtools command
21 | # and pushes the results into GCS.
22 | 
23 | set -o errexit
24 | set -o nounset
25 | 
26 | # Required inputs parameters:
27 | readonly WORKSPACE_DIR=${1}
28 | readonly INPUT_PATH=${2}
29 | readonly OUTPUT_PATH=${3}
30 | 
31 | readonly WS_IN_DIR=${WORKSPACE_DIR}/in
32 | readonly WS_OUT_DIR=${WORKSPACE_DIR}/out
33 | 
34 | source ${SRC_ROOT}/common/logging.sh
35 | source ${SRC_ROOT}/common/gcs_util.sh
36 | 
37 | # Make sure our workspace directories are clean and ready
38 | for DIR in ${WS_IN_DIR} ${WS_OUT_DIR}; do
39 |   sudo rm -rf ${DIR}/*
40 |   sudo mkdir -p ${DIR} --mode 777
41 | done
42 | unset DIR
43 | 
44 | # Download the file(s) to processed
45 | gcs_util::download "${INPUT_PATH}" "${WS_IN_DIR}/"
46 | 
47 | # Get an array of input files
48 | declare -a FILE_LIST
49 | if [[ ${DRYRUN:-} -eq 1 ]]; then
50 |   # The FILE_LIST will be empty for a DRYRUN; try to fake it
51 |   DRYRUN_LIST=$(gcs_util::get_file_list "${INPUT_PATH}")
52 |   FILE_LIST=($(echo "${DRYRUN_LIST}" | sed -e 's#.*/##'))
53 | else
54 |   FILE_LIST=($(/bin/ls -1 ${WS_IN_DIR}))
55 | fi
56 | readonly FILE_LIST
57 | 
58 | # Process the input files
59 | START=$(date +%s)
60 | for FILE in "${FILE_LIST[@]}"; do
61 |   logging::emit "Processing file ${FILE}"
62 | 
63 |   case "${SAMTOOLS_OPERATION}" in
64 |     index)
65 |       # The output file name cannot be changed for "samtools index"
66 |       INFILE=${WS_IN_DIR}/${FILE}
67 |       OUTFILE=${WS_IN_DIR}/${FILE}.bai
68 | 
69 |       CMD="samtools index ${INFILE}"
70 |       ;;
71 |     *)
72 |       logging::emit "Unknown operation: ${SAMTOOLS_OPERATION}"
73 |       exit 1
74 |       ;;
75 |   esac
76 | 
77 |   logging::emit "Command: ${CMD}"
78 | 
79 |   if [[ ${DRYRUN:-} -eq 1 ]]; then
80 |     continue
81 |   fi
82 | 
83 |   eval ${CMD}
84 | done
85 | END=$(date +%s)
86 | 
87 | logging::emit "Update: ${#FILE_LIST[@]} files in $((END-START)) seconds"
88 | 
89 | # Upload the output file(s)
90 | if [[ ${OUTPUT_PATH} == "source" ]]; then
91 |   OUTPUT_PATH=$(dirname ${INPUT_PATH})
92 | fi
93 | gcs_util::upload "${OUTFILE}" "${OUTPUT_PATH}/"
94 | 
95 | 


--------------------------------------------------------------------------------
/src/compress/do_compress.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # do_compress.sh
 18 | #
 19 | # Copies one or more files from GCS to disk,
 20 | # compresses or decompresses the file(s),
 21 | # and pushes the results into GCS.
 22 | 
 23 | set -o errexit
 24 | set -o nounset
 25 | 
 26 | # Required inputs parameters:
 27 | readonly WORKSPACE_DIR=${1}
 28 | readonly INPUT_PATH=${2}
 29 | readonly OUTPUT_PATH=${3}
 30 | 
 31 | readonly WS_IN_DIR=${WORKSPACE_DIR}/in
 32 | readonly WS_OUT_DIR=${WORKSPACE_DIR}/out
 33 | 
 34 | source ${SRC_ROOT}/common/logging.sh
 35 | source ${SRC_ROOT}/common/gcs_util.sh
 36 | 
 37 | # Make sure our workspace directories are clean and ready
 38 | for DIR in ${WS_IN_DIR} ${WS_OUT_DIR}; do
 39 |   sudo rm -rf ${DIR}/*
 40 |   sudo mkdir -p ${DIR} --mode 777
 41 | done
 42 | unset DIR
 43 | 
 44 | # Download the file(s) to (de)compress
 45 | gcs_util::download "${INPUT_PATH}" "${WS_IN_DIR}/"
 46 | 
 47 | # Get an array of input files
 48 | declare -a FILE_LIST
 49 | if [[ ${DRYRUN:-} -eq 1 ]]; then
 50 |   # The FILE_LIST will be empty for a DRYRUN; try to fake it
 51 |   DRYRUN_LIST=$(gcs_util::get_file_list "${INPUT_PATH}")
 52 |   FILE_LIST=($(echo "${DRYRUN_LIST}" | sed -e 's#.*/##'))
 53 | else
 54 |   FILE_LIST=($(/bin/ls -1 ${WS_IN_DIR}))
 55 | fi
 56 | readonly FILE_LIST
 57 | 
 58 | # Process the input files
 59 | START=$(date +%s)
 60 | for FILE in "${FILE_LIST[@]}"; do
 61 |   logging::emit "Processing file ${FILE}"
 62 | 
 63 |   case "${COMPRESS_OPERATION}" in
 64 |     compress)
 65 |       # Add the extension to the output file
 66 |       INFILE=${WS_IN_DIR}/${FILE}
 67 |       OUTFILE=${WS_OUT_DIR}/${FILE}${COMPRESS_EXTENSION}
 68 | 
 69 |       CMD="${COMPRESS_TYPE} --stdout ${INFILE} > ${OUTFILE}"
 70 |       ;;
 71 |     decompress)
 72 |       # Trim the extension from the output file
 73 |       INFILE=${WS_IN_DIR}/${FILE}
 74 |       OUTFILE=${WS_OUT_DIR}/${FILE%${COMPRESS_EXTENSION}}
 75 | 
 76 |       CMD="${COMPRESS_TYPE} --decompress --stdout ${INFILE} > ${OUTFILE}"
 77 |       ;;
 78 |     *)
 79 |       logging::emit "Unknown compression operation: ${COMPRESS_OPERATION}"
 80 |       exit 1
 81 |       ;;
 82 |   esac
 83 | 
 84 |   logging::emit "Command: ${CMD}"
 85 | 
 86 |   if [[ ${DRYRUN:-} -eq 1 ]]; then
 87 |     continue
 88 |   fi
 89 | 
 90 |   eval ${CMD}
 91 | done
 92 | END=$(date +%s)
 93 | 
 94 | logging::emit "Update: ${#FILE_LIST[@]} files in $((END-START)) seconds"
 95 | 
 96 | # Upload the output file(s)
 97 | if [[ ${DRYRUN:-} -eq 1 ]]; then
 98 |   exit 0
 99 | fi
100 | gcs_util::upload "${WS_OUT_DIR}/*" "${OUTPUT_PATH}/"
101 | 
102 | 


--------------------------------------------------------------------------------
/src/compress/task_compress.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # task_compress.sh
18 | #
19 | # Wrapper script that sets up the call to the actual worker:
20 | #   do_compress.sh
21 | #
22 | # This script isolates from do_compress.sh that Grid Engine is
23 | # managing the operation and do_compress can be dedicated to its
24 | # specific task.
25 | #
26 | # When a single command in the array job is sent to a compute node,
27 | # its task number is stored in the variable SGE_TASK_ID,
28 | # so we can use the value of that variable to determine the inputs
29 | 
30 | set -o errexit
31 | set -o nounset
32 | 
33 | source ${SRC_ROOT}/common/logging.sh
34 | source ${SRC_ROOT}/common/gcs_util.sh
35 | 
36 | # Set up an EXIT trap to be sure to clean up
37 | trap exit_clean EXIT
38 | 
39 | # Set the workspace dir
40 | readonly WORKSPACE_DIR=${TASK_SCRATCH_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID}
41 | sudo mkdir -p ${WORKSPACE_DIR} -m 777
42 | 
43 | # Set the log file
44 | export LOGGING_LOG_FILE=${WORKSPACE_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID}.log
45 | readonly TASK_START_TIME=$(date '+%s')
46 | 
47 | # For debugging, emit the hostname and inputs
48 | logging::emit "Task host: $(hostname)"
49 | logging::emit "Task start: ${SGE_TASK_ID}"
50 | logging::emit "Input list file: ${INPUT_LIST_FILE}"
51 | logging::emit "Output path: ${OUTPUT_PATH}"
52 | logging::emit "Output log path: ${OUTPUT_LOG_PATH:-}"
53 | logging::emit "Scratch dir: ${TASK_SCRATCH_DIR}"
54 | 
55 | # Set up an EXIT trap to be sure to clean up
56 | function exit_clean() {
57 |   # If the WORKSPACE_DIR variable has been set, then be sure to clean up
58 |   if [[ -n ${WORKSPACE_DIR:-} ]]; then
59 |     sudo rm -rf ${WORKSPACE_DIR}
60 |   fi
61 | }
62 | readonly -f exit_clean
63 | 
64 | function finish() {
65 |   # Upload the log file
66 |   if [[ -n ${OUTPUT_LOG_PATH:-} ]]; then
67 |     local start=${TASK_START_TIME}
68 |     local end=$(date '+%s')
69 | 
70 |     logging::emit "Task time ${SGE_TASK_ID}: $((end - start)) seconds"
71 |     gcs_util::upload_log "${LOGGING_LOG_FILE}" "${OUTPUT_LOG_PATH}/"
72 |   fi
73 | }
74 | readonly -f finish
75 | 
76 | # Make sure that the crcmod library is installed
77 | gcs_util::install_crcmod
78 | 
79 | # Grab the record to process
80 | readonly INPUT_PATTERN=$(sed -n "${SGE_TASK_ID}p" ${INPUT_LIST_FILE})
81 | logging::emit "Processing ${INPUT_PATTERN}"
82 | 
83 | # Launch the job
84 | if ${SRC_ROOT}/compress/do_compress.sh \
85 |       ${WORKSPACE_DIR} \
86 |       ${INPUT_PATTERN} \
87 |       ${OUTPUT_PATH}; then
88 |   logging::emit "Task end SUCCESS: ${SGE_TASK_ID}"
89 | else
90 |   logging::emit "Task end FAILURE: ${SGE_TASK_ID}"
91 | fi
92 | 
93 | finish
94 | 


--------------------------------------------------------------------------------
/bin/remove_terminated_nodes.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # remove_terminated_nodes.py
 18 | #
 19 | # Makes a pass over the specified cluster and removes any nodes that are
 20 | # in a TERMINATED, STOPPING, or unknown state.
 21 | 
 22 | import elasticluster
 23 | import elasticluster.conf
 24 | from elasticluster.__main__ import ElastiCluster
 25 | 
 26 | import cluster_util
 27 | 
 28 | import errno
 29 | import os
 30 | import sys
 31 | 
 32 | # Check usage
 33 | if len(sys.argv) < 2 or len(sys.argv) > 3:
 34 |   print "Usage: {} [cluster] <node_type>".format(sys.argv[0])
 35 |   sys.exit(1)
 36 | 
 37 | cluster_name=sys.argv[1]
 38 | node_type=sys.argv[2] if len(sys.argv) > 2 else None
 39 | 
 40 | # Testing modes
 41 | #
 42 | # DRYRUN=1: do not remove any nodes, just display a log of the operations
 43 | #           that would occur
 44 | # REMOVENODES=<node list>: remove the requested node(s)
 45 | 
 46 | dryrun=os.environ['DRYRUN'] if 'DRYRUN' in os.environ else None
 47 | removenodes=os.environ['REMOVENODES'].split(',') \
 48 |   if 'REMOVENODES' in os.environ else []
 49 | 
 50 | # BEGIN MAIN
 51 | 
 52 | known_hosts_file = '%s/%s' % (
 53 |   os.environ['HOME'], '.elasticluster/storage/%s.known_hosts' % cluster_name)
 54 | 
 55 | # Create the elasticluster configuration endpoint
 56 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
 57 | 
 58 | # Lookup the cluster
 59 | cluster = creator.load_cluster(cluster_name)
 60 | cluster.update()
 61 | 
 62 | # Build a list of nodes to remove
 63 | if removenodes:
 64 |   print "Testing with node list: %s" % ",".join(removenodes)
 65 |   to_remove = cluster_util.get_nodes_by_name(cluster, removenodes)
 66 | else:
 67 |   print "************************************"
 68 |   print "Determining status of existing nodes"
 69 |   print "************************************"
 70 |   to_remove = \
 71 |     cluster_util.get_stopping_or_terminated_nodes(cluster, node_type)
 72 | print
 73 | 
 74 | if not to_remove:
 75 |   print "******************"
 76 |   print "No nodes to remove"
 77 |   print "******************"
 78 |   print
 79 | 
 80 |   sys.exit(0)
 81 | 
 82 | print "***************"
 83 | print "Removing nodes:"
 84 | print "***************"
 85 | print
 86 | 
 87 | for node in to_remove:
 88 |   print "Removing node %s (%s)" % (node.name, node.preferred_ip)
 89 |   if not dryrun:
 90 |     cluster_util.run_elasticluster(
 91 |       ['remove-node', '--no-setup', '--yes', cluster_name, node.name])
 92 | 
 93 |     if not cluster_util.remove_known_hosts_entry(node, known_hosts_file):
 94 |       print "No preferred ip for node; removing file %s" % known_hosts_file
 95 |       try:
 96 |         os.remove(known_hosts_file)
 97 |       except OSError as e:
 98 |         if e.errno != errno.ENOENT:
 99 |           raise
100 | 
101 | cluster_util.run_elasticluster(['setup', cluster_name])
102 | 


--------------------------------------------------------------------------------
/src/samtools/task_samtools.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # task_samtools.sh
 18 | #
 19 | # Wrapper script that sets up the call to the actual worker:
 20 | #   do_samtools.sh
 21 | #
 22 | # This script isolates from do_samtools.sh that Grid Engine is
 23 | # managing the operation and do_samtools can be dedicated to its
 24 | # specific task.
 25 | #
 26 | # When a single command in the array job is sent to a compute node,
 27 | # its task number is stored in the variable SGE_TASK_ID,
 28 | # so we can use the value of that variable to determine the inputs
 29 | 
 30 | set -o errexit
 31 | set -o nounset
 32 | 
 33 | source ${SRC_ROOT}/common/logging.sh
 34 | source ${SRC_ROOT}/common/gcs_util.sh
 35 | 
 36 | # Set up an EXIT trap to be sure to clean up
 37 | trap exit_clean EXIT
 38 | 
 39 | # Set the workspace dir
 40 | readonly WORKSPACE_DIR=${TASK_SCRATCH_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID}
 41 | sudo mkdir -p ${WORKSPACE_DIR} -m 777
 42 | 
 43 | # Set the log file
 44 | export LOGGING_LOG_FILE=${WORKSPACE_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID}.log
 45 | readonly TASK_START_TIME=$(date '+%s')
 46 | 
 47 | # For debugging, emit the hostname and inputs
 48 | logging::emit "Task host: $(hostname)"
 49 | logging::emit "Task start: ${SGE_TASK_ID}"
 50 | logging::emit "Input list file: ${INPUT_LIST_FILE}"
 51 | logging::emit "Output path: ${OUTPUT_PATH}"
 52 | logging::emit "Output log path: ${OUTPUT_LOG_PATH:-}"
 53 | logging::emit "Scratch dir: ${TASK_SCRATCH_DIR}"
 54 | 
 55 | # Set up an EXIT trap to be sure to clean up
 56 | function exit_clean() {
 57 |   # If the WORKSPACE_DIR variable has been set, then be sure to clean up
 58 |   if [[ -n ${WORKSPACE_DIR:-} ]]; then
 59 |     sudo rm -rf ${WORKSPACE_DIR}
 60 |   fi
 61 | }
 62 | readonly -f exit_clean
 63 | 
 64 | function finish() {
 65 |   # Upload the log file
 66 |   if [[ -n ${OUTPUT_LOG_PATH:-} ]]; then
 67 |     local start=${TASK_START_TIME}
 68 |     local end=$(date '+%s')
 69 | 
 70 |     logging::emit "Task time ${SGE_TASK_ID}: $((end - start)) seconds"
 71 |     gcs_util::upload_log "${LOGGING_LOG_FILE}" "${OUTPUT_LOG_PATH}/"
 72 |   fi
 73 | }
 74 | readonly -f finish
 75 | 
 76 | # Make sure that the crcmod library is installed
 77 | gcs_util::install_crcmod
 78 | 
 79 | # Make sure that samtools is installed
 80 | if which samtools &> /dev/null; then
 81 |   echo "samtools is installed"
 82 | else
 83 |   sudo apt-get install --yes samtools
 84 | fi
 85 | 
 86 | # Grab the record to process
 87 | readonly INPUT_PATH=$(sed -n "${SGE_TASK_ID}p" ${INPUT_LIST_FILE})
 88 | logging::emit "Processing ${INPUT_PATH}"
 89 | 
 90 | # Special-case the output path
 91 | if [[ ${OUTPUT_PATH} == "source" ]]; then
 92 |   OUTPUT_PATH=$(dirname ${INPUT_PATH})
 93 |   logging::emit "Output path set to: ${OUTPUT_PATH}"
 94 | fi
 95 | 
 96 | # Launch the job
 97 | if ${SRC_ROOT}/samtools/do_samtools.sh \
 98 |       ${WORKSPACE_DIR} \
 99 |       ${INPUT_PATH} \
100 |       ${OUTPUT_PATH}; then
101 |   logging::emit "Task end SUCCESS: ${SGE_TASK_ID}"
102 | else
103 |   logging::emit "Task end FAILURE: ${SGE_TASK_ID}"
104 | fi
105 | 
106 | finish
107 | 
108 | 


--------------------------------------------------------------------------------
/src/samtools/launch_samtools.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # launch_samtools.sh
 18 | #
 19 | # Launches a Grid Engine job to run a samtools command over files
 20 | # in Google Cloud Storage. Overall flow of operation:
 21 | #   * Create Grid Engine "array" job
 22 | #   * Each task will
 23 | #     * Download one or more files from GCS
 24 | #     * Process the file(s)
 25 | #     * Upload the file(s) to GCS
 26 | #
 27 | # The launch script need source and destination information,
 28 | # along with an optional destination for logging.
 29 | #
 30 | # The list of files to act on is assumed to be provided in a pre-generated
 31 | # file. This file must contain one GCS path per line.
 32 | # The paths may be individual files or a GCS pattern, such as:
 33 | #
 34 | #   gs://my_bucket/my_path/dir1/by_chrom.*.bam
 35 | #   gs://my_bucket/my_path/dir2/sample.bam
 36 | #
 37 | # Each line gets processed as an individual task. If you want files to
 38 | # be processed as separate tasks on separate nodes, then list the files
 39 | # explicitly in the list file.
 40 | #
 41 | # All scripts here respect the DRYRUN environment variable.
 42 | # If set to 1, then the operations that *would* be performed will be
 43 | # emitted to stdout. This is useful for verifying input and output paths.
 44 | #
 45 | # Example DRYRUN usage:
 46 | #   DRYRUN=1 ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh
 47 | #
 48 | # Example real usage:
 49 | #   ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh
 50 | #
 51 | # The launch script also accepts the environment variables LAUNCH_MIN and
 52 | # LAUNCH_MAX, which can be used to specify the minimum and maximum record
 53 | # to process. This is useful for small scale testing.
 54 | #
 55 | # Example DRYRUN processing only the first record:
 56 | #   DRYRUN=1 LAUNCH_MIN=1 LAUNCH_MAX=1 ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh
 57 | #
 58 | # Example real usage processing only the first 5 records:
 59 | #   LAUNCH_MIN=1 LAUNCH_MAX=5 ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh
 60 | #
 61 | 
 62 | # The first parameter is a path to a "job configuration" shell script.
 63 | # This script must export paths:
 64 | #
 65 | #  export INPUT_LIST_FILE=<path to local file listing GCS input paths>
 66 | #  export OUTPUT_PATH=<GCS path to which to upload output>
 67 | #  export OUTPUT_LOG_PATH=<GCS path to which to upload logs>
 68 | #  
 69 | # This script must export information about what operation to perform:
 70 | #
 71 | #  export SAMTOOLS_OPERATION="index"   # Only index currently supported
 72 | 
 73 | set -o errexit
 74 | set -o nounset
 75 | 
 76 | if [[ $# -lt 1 ]]; then
 77 |   >&2 echo "Usage: ${0} [job_config_file]"
 78 |   exit 1
 79 | fi
 80 | 
 81 | # Task-specific parameters which can be overridden in the job
 82 | # config file.
 83 | export TASK_SCRATCH_DIR=/scratch
 84 | 
 85 | readonly CONFIG_FILE=${1}
 86 | 
 87 | source ${CONFIG_FILE}
 88 | 
 89 | #
 90 | # Input validation
 91 | #
 92 | 
 93 | readonly REQUIRED_VARS='
 94 | INPUT_LIST_FILE
 95 | OUTPUT_PATH
 96 | OUTPUT_LOG_PATH
 97 | SAMTOOLS_OPERATION
 98 | '
 99 | 
100 | for VAR in ${REQUIRED_VARS}; do
101 |   if [[ -z "${!VAR:-}" ]]; then
102 |     >&2 echo "Error: ${VAR} must be set"
103 |     exit 1
104 |   fi
105 | done
106 | 
107 | if [[ ! -e ${INPUT_LIST_FILE} ]]; then
108 |   >&2 echo "Error: ${INPUT_LIST_FILE} not found"
109 |   exit 1
110 | fi
111 | 
112 | # If LAUNCH_MIN or LAUNCH_MAX are set in the environment, use them.
113 | # Otherwise, launch tasks for all lines in the INPUT_LIST_FILE.
114 | readonly TASK_START=${LAUNCH_MIN:-1}
115 | readonly TASK_END=${LAUNCH_MAX:-$(cat ${INPUT_LIST_FILE} | wc -l)}
116 | 
117 | #
118 | # Submit the job
119 | #
120 | 
121 | # Parameters
122 | #  -t: Task range
123 | #  -S: Force the task shell to be bash
124 | #  -V: Pass the current environment through to each task
125 | #  -N: Job name
126 | readonly SAMTOOLS_SRC_ROOT=$(readlink -f $(dirname ${0}))
127 | 
128 | export SRC_ROOT=$(dirname ${SAMTOOLS_SRC_ROOT})
129 | 
130 | qsub \
131 |   -t ${TASK_START}-${TASK_END} \
132 |   -S /bin/bash \
133 |   -V \
134 |   -N samtools \
135 |   -r y \
136 |   ${SAMTOOLS_SRC_ROOT}/task_samtools.sh
137 | 
138 | 


--------------------------------------------------------------------------------
/src/compress/launch_compress.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # launch_compress.sh
 18 | #
 19 | # Launches a Grid Engine job to compress or decompress files
 20 | # in Google Cloud Storage. Overall flow of operation:
 21 | #   * Create Grid Engine "array" job
 22 | #   * Each task will
 23 | #     * Download one or more files from GCS
 24 | #     * De(compress) the file(s)
 25 | #     * Upload the file(s) to GCS
 26 | #
 27 | # The launch script need source and destination information,
 28 | # along with an optional destination for logging.
 29 | #
 30 | # The list of files to act on is assumed to be provided in a pre-generated
 31 | # file. This file must contain one GCS path per line.
 32 | # The paths may be individual files or a GCS pattern, such as:
 33 | #
 34 | #   gs://my_bucket/my_path/dir1/just_one_file.gz
 35 | #   gs://my_bucket/my_path/dir2/*.gz
 36 | #
 37 | # Each line gets processed as an individual task. If you want files to
 38 | # be processed as separate tasks on separate nodes, then list the files
 39 | # explicitly in the list file.
 40 | #
 41 | # All scripts here respect the DRYRUN environment variable.
 42 | # If set to 1, then the operations that *would* be performed will be
 43 | # emitted to stdout. This is useful for verifying input and output paths.
 44 | #
 45 | # Example DRYRUN usage:
 46 | #   DRYRUN=1 ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh
 47 | #
 48 | # Example real usage:
 49 | #   ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh
 50 | #
 51 | # The launch script also accepts the environment variables LAUNCH_MIN and
 52 | # LAUNCH_MAX, which can be used to specify the minimum and maximum record
 53 | # to process. This is useful for small scale testing.
 54 | #
 55 | # Example DRYRUN processing only the first record:
 56 | #   DRYRUN=1 LAUNCH_MIN=1 LAUNCH_MAX=1 ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh
 57 | #
 58 | # Example real usage processing only the first 5 records:
 59 | #   LAUNCH_MIN=1 LAUNCH_MAX=5 ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh
 60 | #
 61 | 
 62 | # The first parameter is a path to a "job configuration" shell script.
 63 | # This script must export paths:
 64 | #
 65 | #  export INPUT_LIST_FILE=<path to local file listing GCS input paths>
 66 | #  export OUTPUT_PATH=<GCS path to which to upload output>
 67 | #  export OUTPUT_LOG_PATH=<GCS path to which to upload logs>
 68 | #  
 69 | # This script must export information about what operation to perform:
 70 | #
 71 | #  export COMPRESS_OPERATION="compress"   # compress | decompress
 72 | #  export COMPRESS_TYPE="gzip"            # gzip | bzip2
 73 | #  export COMPRESS_EXTENSION=".gz"        # .gz | .bz2
 74 | #
 75 | # The do_compress.sh script has built-in support for gzip and bzip2
 76 | 
 77 | set -o errexit
 78 | set -o nounset
 79 | 
 80 | if [[ $# -lt 1 ]]; then
 81 |   >&2 echo "Usage: ${0} [job_config_file]"
 82 |   exit 1
 83 | fi
 84 | 
 85 | # Task-specific parameters which can be overridden in the job
 86 | # config file.
 87 | export TASK_SCRATCH_DIR=/scratch
 88 | 
 89 | readonly CONFIG_FILE=${1}
 90 | 
 91 | source ${CONFIG_FILE}
 92 | 
 93 | #
 94 | # Input validation
 95 | #
 96 | 
 97 | readonly REQUIRED_VARS='
 98 | INPUT_LIST_FILE
 99 | OUTPUT_PATH
100 | OUTPUT_LOG_PATH
101 | COMPRESS_OPERATION
102 | COMPRESS_TYPE
103 | COMPRESS_EXTENSION
104 | '
105 | 
106 | for VAR in ${REQUIRED_VARS}; do
107 |   if [[ -z "${!VAR:-}" ]]; then
108 |     >&2 echo "Error: ${VAR} must be set"
109 |     exit 1
110 |   fi
111 | done
112 | 
113 | if [[ ! -e ${INPUT_LIST_FILE} ]]; then
114 |   >&2 echo "Error: ${INPUT_LIST_FILE} not found"
115 |   exit 1
116 | fi
117 | 
118 | # If LAUNCH_MIN or LAUNCH_MAX are set in the environment, use them.
119 | # Otherwise, launch tasks for all lines in the INPUT_LIST_FILE.
120 | readonly TASK_START=${LAUNCH_MIN:-1}
121 | readonly TASK_END=${LAUNCH_MAX:-$(cat ${INPUT_LIST_FILE} | wc -l)}
122 | 
123 | #
124 | # Submit the job
125 | #
126 | 
127 | # Parameters
128 | #  -t: Task range
129 | #  -S: Force the task shell to be bash
130 | #  -V: Pass the current environment through to each task
131 | #  -N: Job name
132 | readonly COMPRESS_SRC_ROOT=$(readlink -f $(dirname ${0}))
133 | 
134 | export SRC_ROOT=$(dirname ${COMPRESS_SRC_ROOT})
135 | 
136 | qsub \
137 |   -t ${TASK_START}-${TASK_END} \
138 |   -S /bin/bash \
139 |   -V \
140 |   -N compress \
141 |   -r y \
142 |   ${COMPRESS_SRC_ROOT}/task_compress.sh
143 | 
144 | 


--------------------------------------------------------------------------------
/src/common/gcs_util.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # gcs_util::install_crcmod
 18 | #
 19 | # Installs the compiled crcmod library if it is not installed
 20 | # See:
 21 | #  https://cloud.google.com/storage/docs/gsutil/addlhelp/CRC32CandInstallingcrcmod
 22 | function gcs_util::install_crcmod() {
 23 |   local crcmod_installed=$(\
 24 |     gsutil version -l | sed -n -e 's/^compiled crcmod: *//p')
 25 | 
 26 |   logging::emit "Compiled crcmod installed: ${crcmod_installed}"
 27 |   if [[ ${crcmod_installed} != "True" ]]; then
 28 |     logging::emit "Installing compiled crcmod"
 29 |     sudo apt-get update --yes
 30 |     sudo apt-get install --yes gcc python-dev python-setuptools
 31 |     sudo easy_install -U pip
 32 |     sudo pip uninstall --yes crcmod || true
 33 |     sudo pip install -U crcmod
 34 |   fi
 35 | }
 36 | readonly -f gcs_util::install_crcmod
 37 | 
 38 | # gcs_util::download
 39 | #
 40 | # Copies the matching objects at the specified remote path
 41 | # to the specified target.
 42 | #
 43 | # Logs the number of bytes downloaded, the number of seconds,
 44 | # and the overall throughput.
 45 | #
 46 | # Respects the DRYRUN environment variable; if set to 1, then
 47 | # logs the operation (with to and from path) and returns.
 48 | function gcs_util::download() {
 49 |   local remote_path=${1}
 50 |   local local_path=${2}
 51 | 
 52 |   logging::emit "Will download: ${remote_path} to ${local_path}"
 53 |   if [[ ${DRYRUN:-} -eq 1 ]]; then
 54 |     return
 55 |   fi
 56 | 
 57 |   # Track the number of bytes we download.
 58 |   # Get the number of bytes already in the destination directory
 59 |   # (and assume no one else is writing to the directory).
 60 |   local bytes_start=$(du -s -c --bytes ${local_path} | tail -n 1 | cut -f 1 -d $'\t')
 61 | 
 62 |   # Download the file(s)
 63 |   local time_start=$(date +%s)
 64 |   while ! gsutil -m cp ${remote_path} ${local_path}; do
 65 |     echo "Restarting download"
 66 |   done
 67 |   local time_end=$(date +%s)
 68 | 
 69 |   local bytes_end=$(du -s -c --bytes ${local_path} | tail -n 1 | cut -f 1 -d $'\t')
 70 | 
 71 |   local bytes=$((bytes_end - bytes_start))
 72 |   local time=$((time_end - time_start))
 73 | 
 74 |   logging::emit "Download: ${bytes} bytes in ${time} seconds"
 75 |   logging::emit "Download rate: $(( (bytes/1000/1000) / time )) MB/s"
 76 | }
 77 | readonly -f gcs_util::download
 78 | 
 79 | # gcs_util::upload
 80 | #
 81 | # Copies the matching objects at the specified local path
 82 | # to the specified target.
 83 | #
 84 | # Logs the number of bytes uploaded, the number of seconds,
 85 | # and the overall throughput.
 86 | #
 87 | # Respects the DRYRUN environment variable; if set to 1, then
 88 | # logs the operation (with to and from path) and returns.
 89 | function gcs_util::upload() {
 90 |   local local_path=${1}
 91 |   local remote_path=${2}
 92 | 
 93 |   logging::emit "Will upload: ${local_path} to ${remote_path}"
 94 |   if [[ ${DRYRUN:-} -eq 1 ]]; then
 95 |     return
 96 |   fi
 97 | 
 98 |   # Track the number of bytes we upload.
 99 |   local bytes=$(du -s -c --bytes ${local_path} | tail -n 1 | cut -f 1 -d $'\t')
100 | 
101 |   # Do the upload
102 |   local time_start=$(date +%s)
103 |   while ! gsutil -m cp ${local_path} ${remote_path}; do
104 |     echo "Restarting upload"
105 |   done
106 |   local time_end=$(date +%s)
107 | 
108 |   local time=$((time_end - time_start))
109 | 
110 |   logging::emit "Upload: ${bytes} bytes in ${time} seconds"
111 |   logging::emit "Upload rate: $(( (bytes/1000/1000) / time )) MB/s"
112 | }
113 | readonly -f gcs_util::upload
114 | 
115 | # gcs_util::upload_log
116 | #
117 | # Copies the log file at the specified local path into Cloud Storage.
118 | # This is largely syntactic sugar around "gsutil cp", but it does
119 | # respects the DRYRUN environment variable; if set to 1, then
120 | # logs the intended operation (with to and from path) and returns.
121 | function gcs_util::upload_log() {
122 |   local local_path=${1}
123 |   local remote_path=${2}
124 | 
125 |   logging::emit "Upload log: ${local_path} to ${remote_path}"
126 |   if [[ ${DRYRUN:-} -eq 1 ]]; then
127 |     return
128 |   fi
129 | 
130 |   gsutil cp ${local_path} ${remote_path}
131 | }
132 | readonly -f gcs_util::upload_log
133 | 
134 | # gcs_util::get_file_list
135 | #
136 | # Returns a list of matching objects at the specified remote path.
137 | # This is strictly syntactic sugar around "gsutil ls".
138 | # It does NOT respect the DRYRUN environment variable as the intent of
139 | # this function is to be used specifically when DRYRUN is enabled (1).
140 | function gcs_util::get_file_list() {
141 |   local remote_path=${1}
142 | 
143 |   gsutil ls ${remote_path}
144 | }
145 | readonly -f gcs_util::get_file_list 
146 | 


--------------------------------------------------------------------------------
/bin/cluster_util.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # cluster_util.py
 18 | #
 19 | # Utility routines for managing an Elasticluster cluster.
 20 | 
 21 | import elasticluster
 22 | import elasticluster.conf
 23 | from elasticluster.__main__ import ElastiCluster
 24 | 
 25 | import json
 26 | import subprocess
 27 | 
 28 | def remove_known_hosts_entry(node, known_hosts_file):
 29 |   """For a give node, remove any host key entries in the known_hosts file"""
 30 | 
 31 |   if not node.preferred_ip:
 32 |     return False
 33 | 
 34 |   ip=node.preferred_ip
 35 | 
 36 |   # Assume concurrency on the known_hosts file is not an issue
 37 |   # Read all the lines and then rewrite the file omitting any
 38 |   # that match the "preferred IP" (the public IP)
 39 |   
 40 |   lines = open(known_hosts_file, "r").readlines()
 41 | 
 42 |   with open(known_hosts_file, "w") as f:
 43 |     for line in lines:
 44 |       if not line.startswith(ip + " "):
 45 |         f.write(line)
 46 | 
 47 |   return True
 48 | 
 49 | def get_zone_for_cluster(cluster_name):
 50 |   """Returns the GCE zone associated with the cluster.
 51 | 
 52 |   There appears to be an elasticluster bug where the zone is not saved
 53 |   with the cluster. So we will pull it from the existing configuration
 54 |   (we assume the cluster configuration has not been changed)."""
 55 | 
 56 |   creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
 57 | 
 58 |   # FIXME: should not assume the template name is the same as the cluster_name
 59 |   conf = creator.cluster_conf[cluster_name]
 60 |   return conf['cloud']['zone']
 61 | 
 62 | 
 63 | def get_nodes_by_name(cluster, node_name_list):
 64 |   """Returns a list of node objects for the input list of node names"""
 65 |   node_list = []
 66 | 
 67 |   for node in cluster.get_all_nodes():
 68 |     if node.name in node_name_list:
 69 |       print "Adding node %s (%s)" % (node.name, node.instance_id)
 70 |       node_list.append(node)
 71 |   
 72 |   return node_list
 73 | 
 74 | 
 75 | def get_node_status(project_id, node, zone):
 76 |   """Returns the GCE instance status for the specified zone"""
 77 |   if not node.instance_id:
 78 |     print "node %s has no instance_id"
 79 |     return "UNKNOWN"
 80 | 
 81 |   try:
 82 |     print "Get status for %s (%s)" % (node.name, node.instance_id)
 83 |     out = subprocess.check_output(["gcloud",
 84 |                                    "--project", project_id,
 85 |                                    "compute", "instances",
 86 |                                    "describe", node.instance_id,
 87 |                                    "--zone", zone,
 88 |                                    "--format", "json"],
 89 |                                    stderr=subprocess.STDOUT)
 90 |     details = json.loads(out)
 91 |     print "Node %s: %s" % (node.name, details['status'])
 92 |     return details['status']
 93 |   except subprocess.CalledProcessError, e:
 94 |     print e.output
 95 |     return 'UNKNOWN'
 96 | 
 97 | 
 98 | def get_nodes_with_status(cluster, node_type, status_list):
 99 |   """Returns a list of nodes with the specified instance status"""
100 |   node_list = []
101 | 
102 |   zone = get_zone_for_cluster(cluster.name)
103 |   project_id = cluster.cloud_provider._project_id
104 | 
105 |   for node in cluster.get_all_nodes():
106 |     if not node_type or node['kind'] == node_type:
107 |       status = get_node_status(project_id, node, zone)
108 | 
109 |       if status in status_list:
110 |         node_list.append(node)
111 | 
112 |   return node_list
113 | 
114 | 
115 | def get_stopping_or_terminated_nodes(cluster, node_type):
116 |   """Returns a list of nodes with STOPPING, TERMINATED, or UNKNOWN status"""
117 | 
118 |   # Adding nodes with "UNKNOWN" may be an incorrect assumption;
119 |   # a node could be starting,
120 |   # but the only way to be sane is to assume no one else is updating
121 |   # the cluster.
122 |   return get_nodes_with_status(cluster, node_type, 
123 |                                ['STOPPING', 'TERMINATED', 'UNKNOWN'])
124 | 
125 | 
126 | def get_desired_cluster_nodes(cluster_name):
127 |   """Returns a dictionary object with a mapping of the node types
128 |   to their desired count (based on cluster configuration)"""
129 | 
130 |   nodes = {}
131 | 
132 |   creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file)
133 | 
134 |   # FIXME: should not assume the template name is the same as the cluster_name
135 |   conf = creator.cluster_conf[cluster_name]
136 |   for key in conf:
137 |     if key.endswith('_nodes'):
138 |       kind = key[:-len('_nodes')]
139 |       nodes[kind] = int(conf[key])
140 | 
141 |   return nodes
142 | 
143 | 
144 | def run_elasticluster(argv):
145 |   """Execute the specified elasticluster command"""
146 | 
147 |   # Currently highly verbose: make the "-v" level optional
148 |   return subprocess.call(["elasticluster", "-v", "-v", "-v"] + argv)
149 | 
150 | 


--------------------------------------------------------------------------------
/bin/cluster_monitor.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | # 
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # cluster_monitor.sh
 18 | #
 19 | # Runs continuously to ensure that the specified cluster contains
 20 | # the number of configured instances. If members of the cluster are
 21 | # found to be TERMINATED, they are removed fromt he cluster and
 22 | # replacement instances are created
 23 | #
 24 | # Usage:
 25 | #   cluster_monitor.sh <cluster-name> [sleep_minutes]
 26 | # Where:
 27 | #   cluster-name is the Elasticluster cluster name
 28 | #   sleep_minutes is how long to sleep between checks (default 10)
 29 | 
 30 | set -o errexit
 31 | set -o nounset
 32 | 
 33 | if [[ $# -lt 1 ]]; then
 34 |   echo "Usage: ${0} [cluster] <sleep_minutes>"
 35 |   exit 1
 36 | fi
 37 | 
 38 | readonly CLUSTER=${1}
 39 | readonly SLEEP_MINUTES=${2:-10}
 40 | 
 41 | readonly SCRIPT_DIR=$(dirname $0)
 42 | 
 43 | # Sometimes when adding or removing nodes, elasticluster configuration
 44 | # of the cluster fails. When it does, it emits a message indicating
 45 | # "please re-run elasticluster setup" (thought it exits with a success (0)
 46 | # status code).
 47 | #
 48 | # We capture each add/remove node operation to a logfile and then just grep
 49 | # for the error message. If we find it, then we re-run elasticluster setup.
 50 | readonly TMPFILE=/tmp/$(basename $0)-${CLUSTER}.log
 51 | 
 52 | # remove_terminated_nodes
 53 | #
 54 | # Remove from the cluster any nodes marked as TERMINATED.
 55 | # Capture output to a logfile to inspect for errors.
 56 | function remove_terminated_nodes() {
 57 |   date
 58 |   python -u ${SCRIPT_DIR}/remove_terminated_nodes.py ${CLUSTER} 2>&1 \
 59 |     | tee ${TMPFILE}
 60 | }
 61 | readonly -f remove_terminated_nodes
 62 | 
 63 | # ensure_cluster_size
 64 | #
 65 | # Add nodes to the cluster if the number configured is not at least
 66 | # as many as specified in the cluster configuration.
 67 | # Capture output to a logfile to inspect for errors.
 68 | function ensure_cluster_size() {
 69 |   date
 70 |   python -u ${SCRIPT_DIR}/ensure_cluster_size.py ${CLUSTER} 2>&1 \
 71 |     | tee ${TMPFILE}
 72 | }
 73 | readonly -f ensure_cluster_size
 74 | 
 75 | # check_elasticluster_error
 76 | #
 77 | # Check the logfile for instructions from Elasticluster to re-run
 78 | # "elasticluster setup".
 79 | function check_elasticluster_error() {
 80 |   grep --quiet --ignore-case \
 81 |     "please re-run elasticluster setup" ${TMPFILE}
 82 | }
 83 | readonly check_elasticluster_error
 84 | 
 85 | # check_elasticluster_ready
 86 | #
 87 | # Check the logfile for instructions from Elasticluster that the
 88 | # cluster is ready. When remove_terminated_nodes and ensure_cluster_size
 89 | # run, they may not end up running elasticluster setup, so the absence
 90 | # of this message does not necessarily indicate a failure. It may be
 91 | # that no cluster changes occurred at all.
 92 | function check_elasticluster_ready() {
 93 |   grep --quiet \
 94 |     "Your cluster is ready!" ${TMPFILE}
 95 | }
 96 | readonly check_elasticluster_ready
 97 | 
 98 | # check_cleanup_cluster
 99 | #
100 | # We don't currently have a great way to get a coded error response from
101 | # Elasticluster operations. This can make it hard to decide here whether
102 | # to actually re-run "elasticluster setup" as recommended.
103 | #
104 | # One case where you would *not* want to continue to re-run "setup"
105 | # is if a node were terminated (and not yet removed from the cluster).
106 | # Thus each time we have an operational failure, we try re-running
107 | # "setup" once, and if problems persist, then try removing TERMINATED
108 | # nodes before re-running setup.
109 | function check_cleanup_cluster() {
110 |   local error_detected=0
111 | 
112 |   while [[ ${error_detected} -eq 1 ]] || check_elasticluster_error; do
113 | 
114 |     echo "*****************************************************************"
115 |     echo "Setup errors detected. Running: elasticluster setup -v ${CLUSTER}"
116 |     echo "*****************************************************************"
117 | 
118 |     date
119 |     elasticluster setup -v ${CLUSTER} 2>&1 | tee ${TMPFILE}
120 | 
121 |     echo "***************************************************"
122 |     echo "Finished running: elasticluster setup -v ${CLUSTER}"
123 |     echo "***************************************************"
124 | 
125 |     if ! check_elasticluster_error; then
126 |       break
127 |     fi
128 | 
129 |     error_detected=1
130 | 
131 |     remove_terminated_nodes
132 | 
133 |     if check_elasticluster_ready; then
134 |       break
135 |     fi
136 |   done
137 | }
138 | readonly -f check_cleanup_cluster
139 | 
140 | # MAIN loop
141 | 
142 | while :; do
143 |   # Remove any terminated nodes
144 |   remove_terminated_nodes
145 |   check_cleanup_cluster
146 | 
147 |   # Remove server keys from the known_host file for removed nodes 
148 |   if ! python -u ${SCRIPT_DIR}/sanitize_known_hosts.py ${CLUSTER}; then
149 |     echo "Continuing..."
150 |   fi
151 | 
152 |   # Add new nodes so that the cluster is at full strength
153 |   ensure_cluster_size
154 |   check_cleanup_cluster
155 | 
156 |   echo "Sleeping for ${SLEEP_MINUTES} minutes"
157 |   sleep ${SLEEP_MINUTES}m
158 | done
159 | 


--------------------------------------------------------------------------------
/tools/array_job_monitor.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2015 Google Inc. All Rights Reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | # 
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # array_job_monitor.sh
 18 | #
 19 | # This script is intended to run on the master node of gridengine cluster.
 20 | # It will monitor an array job (specified by job id on the command-line)
 21 | # and to detect when a task has stopped running due to a node failure.
 22 | #
 23 | # The specific problem this addresses is when a worker node of the cluster
 24 | # has been removed (perhaps it was a preemptible VM that was TERMINATED).
 25 | # gridengine will continue to report the task as in a "r"unning state.
 26 | #
 27 | # The function of this script *should* be taken care of by grid engine
 28 | # itself, namely the configuration values:
 29 | #  * reschedule_unknown
 30 | #  * max_unheard
 31 | # However I was never able to get them to work *reliably* and frequently
 32 | # ended up with tasks stuck in a "r"unning state on machines that had
 33 | # been terminated.
 34 | 
 35 | # usage:
 36 | #   array_job_monitor.sh <job_id> [monitor_interval] [task_timeout] [queue_name]
 37 | #
 38 | # parameters:
 39 | #   job_id:
 40 | #     Grid Engine job ID to monitor
 41 | #   monitor_interval:
 42 | #     Minutes to sleep between checks of running tasks
 43 | #     Default: 15 minutes
 44 | #   task_timeout:
 45 | #     Number of minutes a task may run before it is considered stalled,
 46 | #     and is eligible to be resubmitted.
 47 | #     Default: None
 48 | #   queue_name:
 49 | #     Grid Engine job queue the job_id is associated with
 50 | #     Default: all.q
 51 | 
 52 | set -o errexit
 53 | set -o nounset
 54 | 
 55 | readonly JOB_ID=${1}
 56 | readonly MONITOR_INTERVAL=${2:-15}
 57 | readonly TASK_TIMEOUT=${3:-}
 58 | readonly QUEUE_NAME=${4:-all.q}
 59 | 
 60 | # To detect a failed (and possibly restarted) worker node, the script will
 61 | # SSH to the node and check the uptime.
 62 | # For a given pass, the script will try at most CONNECT_RETRIES attempts to
 63 | # connect to a node. Each attempt will timeout after CONNECT_TIMEOUT seconds.
 64 | readonly CONNECT_TIMEOUT=15
 65 | readonly CONNECT_RETRIES=5
 66 | 
 67 | readonly JOB_NAME=$(
 68 |   qstat -j ${JOB_ID} | sed --quiet -e 's#job_name: *\(.*\)#\1#p')
 69 | 
 70 | echo "Begin: monitoring ${JOB_NAME}.${JOB_ID} every ${MONITOR_INTERVAL} minutes"
 71 | 
 72 | while :; do
 73 |    # qstat will return a list of all running tasks where the interesting
 74 |    # lines look like:
 75 |    #   3 0.50000 samtools   mbookman     r    08/06/2015 18:22:19 
 76 |    #   all.q@compute002                   1 376
 77 | 
 78 |    # Grab all of the lines for this job
 79 |    # For each line - check the status of the associated node
 80 |    TASK_LIST=$(qstat | \
 81 |                awk -v job=${JOB_ID} -v queue=${QUEUE_NAME} \
 82 |                '$1 == job && $8 ~ queue"@" {
 83 |                   printf "%s,%s,%s,%s\n", $10, $8, $6, $7 }')
 84 | 
 85 |    for TASK in ${TASK_LIST}; do
 86 |      TASK_ID=$(echo "${TASK}" | cut -d , -f 1)
 87 |      QUEUE=$(echo "${TASK}" | cut -d , -f 2)
 88 |      TASK_START_DATE="$(echo "${TASK}" | cut -d , -f 3)"
 89 |      TASK_START_TIME="$(echo "${TASK}" | cut -d , -f 4)"
 90 |      TASK_START="${TASK_START_DATE} ${TASK_START_TIME}"
 91 | 
 92 |      # Trim the "all.q@" from the front of the queue
 93 |      NODE=${QUEUE##${QUEUE_NAME}@}
 94 | 
 95 |      # To get the uptime of the system, grab the first value from /proc/uptime
 96 |      # If we fail to connect to the target host, the output will be empty.
 97 |      UPTIME_SEC=
 98 |      for ((i = 0; i < ${CONNECT_RETRIES}; i++)); do
 99 |        UPTIME_SEC=$(ssh -o ConnectTimeout=${CONNECT_TIMEOUT} ${NODE} \
100 |                       cat /proc/uptime | awk '{ print $1 }')
101 |        if [[ -n ${UPTIME_SEC} ]]; then
102 |          break
103 |        fi
104 |      done
105 | 
106 |      RESTART_TASK=0
107 |      if [[ -z ${UPTIME_SEC} ]]; then
108 |        echo "Node ${NODE} unreachable"
109 |        RESTART_TASK=1
110 |      else
111 |        # Convert the uptime (float) to an integer
112 |        UPTIME_SEC=$(printf '%.0f' ${UPTIME_SEC})
113 | 
114 |        # Convert the start time string to seconds since the epoch
115 |        TASK_START_SEC=$(date -d "${TASK_START}" '+%s')
116 | 
117 |        # Get the current time as seconds since the epoch
118 |        NOW=$(date '+%s')
119 | 
120 |        if [[ ${TASK_START_SEC} < $((NOW - UPTIME_SEC)) ]]; then
121 |          echo "Node ${NODE} appears to have been restarted"
122 |          echo "  Node uptime: ${UPTIME_SEC} sec"
123 |          echo "  Task start: ${TASK_START_SEC} sec, (${TASK_START})"
124 |          echo "  Now: ${NOW}, $(date '+%D %T')"
125 | 
126 |          RESTART_TASK=1
127 |        elif [[ -n ${TASK_TIMEOUT} ]] && \
128 |             [[ $((NOW - TASK_START_SEC)) -gt $((TASK_TIMEOUT * 60)) ]]; then
129 |          echo "Task ${JOB_ID}.${TASK_ID} has exceeded the task timeout"
130 |          echo "  Task start: ${TASK_START_SEC} sec, (${TASK_START})"
131 |          echo "  Now: ${NOW}, $(date '+%D %T')"
132 |          echo "  $(((NOW - TASK_START_SEC) / 60)) minutes >= ${TASK_TIMEOUT} minutes"
133 | 
134 |          RESTART_TASK=1
135 |        fi
136 |      fi
137 | 
138 |      if [[ ${RESTART_TASK} -eq 1 ]]; then
139 |        echo "Requesting restart of ${JOB_ID}.${TASK_ID}"
140 |        if ! qmod -rj ${JOB_ID}.${TASK_ID}; then
141 |          # Sometimes qmod fails with "invalid queue or job", and the failure
142 |          # is persistent. Re-exec (strangely) seems to resolve it, where
143 |          # simply retrying does not.
144 |          exec $0 $*
145 |        fi
146 |      fi
147 |   done
148 | 
149 |   echo "Sleeping ${MONITOR_INTERVAL} minute(s)"
150 |   sleep ${MONITOR_INTERVAL}m
151 | done
152 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------