├── samples ├── compress │ ├── bzip2_compress_file_list.txt │ ├── gzip_compress_file_list.txt │ ├── gzip_compress_config.sh │ ├── bzip2_compress_config.sh │ ├── gzip_decompress_config.sh │ ├── bzip2_decompress_config.sh │ ├── bzip2_decompress_file_list.txt │ └── gzip_decompress_file_list.txt └── samtools │ ├── samtools_index_config.sh │ └── samtools_index_file_list.txt ├── src ├── common │ ├── logging.sh │ └── gcs_util.sh ├── samtools │ ├── do_samtools.sh │ ├── task_samtools.sh │ └── launch_samtools.sh └── compress │ ├── do_compress.sh │ ├── task_compress.sh │ └── launch_compress.sh ├── bin ├── list_all_nodes.py ├── list_all_instances.py ├── attach_disk_to_nodes.sh ├── install_crcmod_on_nodes.sh ├── qconf.mod.sh ├── mount_disk_on_nodes.sh ├── sanitize_known_hosts.py ├── ensure_cluster_size.py ├── remove_terminated_nodes.py ├── cluster_util.py └── cluster_monitor.sh ├── README.md ├── tools └── array_job_monitor.sh └── LICENSE /samples/compress/bzip2_compress_file_list.txt: -------------------------------------------------------------------------------- 1 | gs://genomics-public-data/platinum-genomes/vcf/NA12877_S1.genome.vcf 2 | gs://genomics-public-data/platinum-genomes/vcf/NA12878_S1.genome.vcf 3 | gs://genomics-public-data/platinum-genomes/vcf/NA12879_S1.genome.vcf 4 | gs://genomics-public-data/platinum-genomes/vcf/NA12880_S1.genome.vcf 5 | gs://genomics-public-data/platinum-genomes/vcf/NA12881_S1.genome.vcf 6 | gs://genomics-public-data/platinum-genomes/vcf/NA12882_S1.genome.vcf 7 | -------------------------------------------------------------------------------- /samples/compress/gzip_compress_file_list.txt: -------------------------------------------------------------------------------- 1 | gs://genomics-public-data/platinum-genomes/vcf/NA12877_S1.genome.vcf 2 | gs://genomics-public-data/platinum-genomes/vcf/NA12878_S1.genome.vcf 3 | gs://genomics-public-data/platinum-genomes/vcf/NA12879_S1.genome.vcf 4 | gs://genomics-public-data/platinum-genomes/vcf/NA12880_S1.genome.vcf 5 | gs://genomics-public-data/platinum-genomes/vcf/NA12881_S1.genome.vcf 6 | gs://genomics-public-data/platinum-genomes/vcf/NA12882_S1.genome.vcf 7 | -------------------------------------------------------------------------------- /samples/samtools/samtools_index_config.sh: -------------------------------------------------------------------------------- 1 | # samtools_index_config.sh 2 | # 3 | # Configuration for a job which takes in a list of BAM files 4 | # in Google Cloud Storage, uses "samtools index" to create a 5 | # a BAM index file, and pushes the index to Google Cloud Storage. 6 | 7 | export SAMTOOLS_OPERATION="index" 8 | 9 | export INPUT_LIST_FILE=./samples/samtools/samtools_index_file_list.txt 10 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/samtools_index 11 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/samtools_index 12 | -------------------------------------------------------------------------------- /samples/compress/gzip_compress_config.sh: -------------------------------------------------------------------------------- 1 | # gzip_compress.sh 2 | # 3 | # Configuration for a job which takes in a list of uncompressed 4 | # files in Google Cloud Storage, compresses them using gzip, and uploads 5 | # the compressed versions to Google Cloud Storage. 6 | 7 | export COMPRESS_OPERATION="compress" # compress | decompress 8 | export COMPRESS_TYPE="gzip" # gzip | bzip2 9 | export COMPRESS_EXTENSION=".gz" # .gz | .bz2 10 | 11 | export INPUT_LIST_FILE=./samples/compress/gzip_compress_file_list.txt 12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_gzip 13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_gzip 14 | -------------------------------------------------------------------------------- /samples/compress/bzip2_compress_config.sh: -------------------------------------------------------------------------------- 1 | # bzip2_compress.sh 2 | # 3 | # Configuration for a job which takes in a list of uncompressed 4 | # files in Google Cloud Storage, compresses them using bzip2, and uploads 5 | # the compressed versions to Google Cloud Storage. 6 | 7 | export COMPRESS_OPERATION="compress" # compress | decompress 8 | export COMPRESS_TYPE="bzip2" # gzip | bzip2 9 | export COMPRESS_EXTENSION=".bz2" # .gz | .bz2 10 | 11 | export INPUT_LIST_FILE=./samples/compress/bzip2_compress_file_list.txt 12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_bzip2 13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_bzip2 14 | -------------------------------------------------------------------------------- /samples/compress/gzip_decompress_config.sh: -------------------------------------------------------------------------------- 1 | # gzip_decompress.sh 2 | # 3 | # Configuration for a job which takes in a list of gzip compressed 4 | # files in Google Cloud Storage, decompresses them, and uploads 5 | # the decompressed versions to Google Cloud Storage. 6 | 7 | export COMPRESS_OPERATION="decompress" # compress | decompress 8 | export COMPRESS_TYPE="gzip" # gzip | bzip2 9 | export COMPRESS_EXTENSION=".gz" # .gz | .bz2 10 | 11 | export INPUT_LIST_FILE=./samples/compress/gzip_decompress_file_list.txt 12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_gzipd 13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_gzipd 14 | -------------------------------------------------------------------------------- /samples/compress/bzip2_decompress_config.sh: -------------------------------------------------------------------------------- 1 | # bzip2_decompress.sh 2 | # 3 | # Configuration for a job which takes in a list of bzip2 compressed 4 | # files in Google Cloud Storage, decompresses them, and uploads 5 | # the decompressed versions to Google Cloud Storage. 6 | 7 | export COMPRESS_OPERATION="decompress" # compress | decompress 8 | export COMPRESS_TYPE="bzip2" # gzip | bzip2 9 | export COMPRESS_EXTENSION=".bz2" # .gz | .bz2 10 | 11 | export INPUT_LIST_FILE=./samples/compress/bzip2_decompress_file_list.txt 12 | export OUTPUT_PATH=gs://MY_BUCKET/output_path/compress_bzip2d 13 | export OUTPUT_LOG_PATH=gs://MY_BUCKET/log_path/compress_bzip2d 14 | -------------------------------------------------------------------------------- /samples/compress/bzip2_decompress_file_list.txt: -------------------------------------------------------------------------------- 1 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG02291/cg_data/ASM_blood/vcfBeta-GS000017105-ASM.vcf.bz2 2 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG01974/cg_data/ASM_blood/vcfBeta-GS000017158-ASM.vcf.bz2 3 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG00621/cg_data/ASM_lcl/vcfBeta-GS000017116-ASM.vcf.bz2 4 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/HG00625/cg_data/ASM_lcl/vcfBeta-GS000017120-ASM.vcf.bz2 5 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/NA12801/cg_data/ASM_lcl/vcfBeta-GS000016407-ASM.vcf.bz2 6 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/NA12752/cg_data/ASM_lcl/vcfBeta-GS000016413-ASM.vcf.bz2 7 | -------------------------------------------------------------------------------- /samples/samtools/samtools_index_file_list.txt: -------------------------------------------------------------------------------- 1 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/pilot2_high_cov_GRCh37_bams/data/NA12878/alignment/NA12878.chrom9.SOLID.bfast.CEU.high_coverage.20100125.bam 2 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/pilot2_high_cov_GRCh37_bams/data/NA12878/alignment/NA12878.chrom1.LS454.ssaha2.CEU.high_coverage.20100311.bam 3 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom11.SOLID.corona.SRP000032.2009_08.bam 4 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom12.SOLID.corona.SRP000032.2009_08.bam 5 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom10.SOLID.corona.SRP000032.2009_08.bam 6 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chromX.SOLID.corona.SRP000032.2009_08.bam 7 | -------------------------------------------------------------------------------- /samples/compress/gzip_decompress_file_list.txt: -------------------------------------------------------------------------------- 1 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20130723_phase3_wg/cornell/ALL.ChrY.Cornell.20130502.SNPs.Genotypes.vcf.gz 2 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20140708_previous_phase3/v2_vcfs/ALL.chr21.phase3_shapeit2_mvncall_integrated_v2.20130502.genotypes.vcf.gz 3 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20140708_previous_phase3/v1_vcfs/ALL.chr21.phase3_shapeit2_mvncall_integrated.20130502.genotype.vcf.gz 4 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/20130502/supporting/input_callsets/um/ALL.chr22.got_cloud.20130502.indels.integrated.sites.vcf.gz 5 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110721_exome_call_sets/bcm/ALL.BCM_Illumina_Mosaik_ontarget_plus50bp_822.20110521.snp.exome.genotypes.vcf.gz 6 | gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/20130502/supporting/input_callsets/bi/ALL.chr16.broad.mapping.20130502.snps_indels.low_coverage.sites.vcf.gz 7 | -------------------------------------------------------------------------------- /src/common/logging.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # logging.sh 18 | # 19 | # Provides basic logging services. 20 | # Client's of this utility script should: 21 | # * Set LOGGING_LOG_FILE 22 | # * Call logging::log to write messages to the log 23 | # * Call logging::emit to write to stdout and to the log 24 | 25 | # logging::log 26 | # 27 | # The log function will echo the input parameters to the LOGGING_LOG_FILE 28 | function logging::log() { 29 | if [[ -n ${LOGGING_LOG_FILE:-} ]]; then 30 | echo "${@}" >> ${LOGGING_LOG_FILE} 31 | fi 32 | } 33 | readonly -f logging::log 34 | 35 | # logging::emit 36 | # 37 | # The emit function will echo the input parameters to stdout 38 | # and will also emit the input to the LOGGING_LOG_FILE 39 | function logging::emit() { 40 | echo "${@}" 41 | logging::log ${@} 42 | } 43 | readonly -f logging::emit 44 | -------------------------------------------------------------------------------- /bin/list_all_nodes.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # list_all_nodes.py 18 | # 19 | # Utility script that returns a list of elasticluster node names 20 | # for a cluster. The "node type" can optionally be specified. 21 | 22 | import elasticluster 23 | import elasticluster.conf 24 | from elasticluster.__main__ import ElastiCluster 25 | 26 | 27 | import sys 28 | 29 | # Check usage 30 | if len(sys.argv) < 2 or len(sys.argv) > 3: 31 | print "Usage: {} [cluster] ".format(sys.argv[0]) 32 | sys.exit(1) 33 | 34 | cluster_name=sys.argv[1] 35 | node_type=sys.argv[2] if len(sys.argv) > 2 else None 36 | 37 | # Create the elasticluster configuration endpoint 38 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 39 | 40 | # Lookup the cluster 41 | cluster = creator.load_cluster(cluster_name) 42 | 43 | # Emit the node names 44 | for node in cluster.get_all_nodes(): 45 | if not node_type or node['kind'] == node_type: 46 | print node['name'] 47 | 48 | -------------------------------------------------------------------------------- /bin/list_all_instances.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # list_all_instances.py 18 | # 19 | # Utility script that returns a list of elasticluster instance names 20 | # for a cluster. The "node type" can optionally be specified. 21 | 22 | import elasticluster 23 | import elasticluster.conf 24 | from elasticluster.__main__ import ElastiCluster 25 | 26 | 27 | import sys 28 | 29 | # Check usage 30 | if len(sys.argv) < 2 or len(sys.argv) > 3: 31 | print "Usage: {} [cluster] ".format(sys.argv[0]) 32 | sys.exit(1) 33 | 34 | cluster_name=sys.argv[1] 35 | node_type=sys.argv[2] if len(sys.argv) > 2 else None 36 | 37 | # Create the elasticluster configuration endpoint 38 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 39 | 40 | # Lookup the cluster 41 | cluster = creator.load_cluster(cluster_name) 42 | 43 | # Emit the node names 44 | for node in cluster.get_all_nodes(): 45 | if not node_type or node['kind'] == node_type: 46 | print node['instance_id'] 47 | 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | grid-computing-tools 2 | ==================== 3 | 4 | The grid-computing-tools repo is intended to be a place for scripts and 5 | recipes for solving some very common issues, which typically fall under 6 | the category of "simple for a few files, hard for many files." 7 | Examples include: 8 | 9 | * I have many VCFs in Cloud Storage that I need to (de)compress 10 | * I have many VCFs in Cloud Storage that have something wrong with the header 11 | * I have many BAMs in Cloud Storage for which I need to compute index files 12 | 13 | grid-computing-tools components 14 | ------------------------------- 15 | 16 | The primary components of the grid-computing-tools examples are: 17 | 18 | * [Google Cloud Storage](https://cloud.google.com/storage/) - location of source input files and destination for output files 19 | * [Google Compute Engine](https://cloud.google.com/compute/) - virtual machines in the cloud 20 | * [Grid Engine](http://gridengine.info/) - job scheduling software to distribute commands across a cluster of virtual machines 21 | 22 | The approach here is intended to provide a familiar environment to 23 | computational scientists who are accustomed to using Grid Engine to 24 | submit jobs to fixed-size clusters available at their research institution. 25 | 26 | Available Tools 27 | --------------- 28 | Documentation for the tools in this repo can be found at 29 | http://googlegenomics.readthedocs.org/ 30 | 31 | The following tools are available: 32 | 33 | * [Compress/Decompress files in Google Cloud Storage](http://googlegenomics.readthedocs.org/en/latest/use_cases/compress_or_decompress_many_files/index.html) 34 | * [With SAMtools index BAM files in Google Cloud Storage](http://googlegenomics.readthedocs.org/en/latest/use_cases/run_samtools_over_many_files/index.html) 35 | -------------------------------------------------------------------------------- /bin/attach_disk_to_nodes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # attach_my_disk.sh 18 | # 19 | # Utility script that attaches a disk read-only to each node of a cluster. 20 | # The "node type" can optionally be specified such that, for example, 21 | # the operation can be restricted to all "compute" nodes in the cluster. 22 | 23 | set -o errexit 24 | set -o nounset 25 | 26 | if [[ $# -lt 3 ]]; then 27 | >&2 echo "Usage: ${0} [cluster] [disk_name] [zone] " 28 | exit 1 29 | fi 30 | 31 | readonly CLUSTER=${1} 32 | readonly DISK_NAME=${2} 33 | readonly ZONE=${3} 34 | readonly NODE_TYPE=${4:-} 35 | 36 | # Use the list_all_instances.py python script to get the list of instances 37 | readonly SCRIPT_DIR=$(dirname ${0}) 38 | readonly INSTANCES=$( 39 | python ${SCRIPT_DIR}/list_all_instances.py ${CLUSTER} ${NODE_TYPE}) 40 | 41 | # Sequentially connect to the nodes and run the command 42 | for INSTANCE_NAME in ${INSTANCES}; do 43 | echo "Attaching disk ${DISK_NAME} to instance ${INSTANCE_NAME}" 44 | gcloud compute instances attach-disk ${INSTANCE_NAME} \ 45 | --disk=${DISK_NAME} --device-name=${DISK_NAME} --zone=${ZONE} \ 46 | --mode ro 47 | done 48 | 49 | -------------------------------------------------------------------------------- /bin/install_crcmod_on_nodes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # install_crcmod.sh 18 | # 19 | # Utility script that connects to each node of a cluster 20 | # and installs the python crcmod module required by gsutil 21 | # to download multi-component objects. 22 | # The "node type" can optionally be specified such that, for example, 23 | # the operation can be restricted to all "compute" nodes in the cluster. 24 | 25 | set -o errexit 26 | set -o nounset 27 | 28 | if [[ $# -lt 1 ]]; then 29 | >&2 echo "Usage: ${0} [cluster]" 30 | exit 1 31 | fi 32 | 33 | readonly CLUSTER=${1} 34 | 35 | # Set of commands for Debian and Ubuntu as per "gsutil help crcmod" 36 | readonly COMMANDS=' 37 | sudo apt-get update --yes 38 | sudo apt-get install --yes gcc python-dev python-setuptools 39 | sudo easy_install -U pip 40 | sudo pip uninstall --yes crcmod 41 | sudo pip install -U crcmod 42 | ' 43 | 44 | # Use the list_all_nodes.py python script to get the list of instances 45 | readonly SCRIPT_DIR=$(dirname ${0}) 46 | readonly NODES=$(python ${SCRIPT_DIR}/list_all_nodes.py ${CLUSTER}) 47 | 48 | # Sequentially connect to the nodes and run the commands 49 | for NODE in ${NODES}; do 50 | elasticluster ssh ${CLUSTER} "${COMMANDS}" -n ${NODE} 51 | done 52 | 53 | -------------------------------------------------------------------------------- /bin/qconf.mod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # qconf.mod.sh 18 | # 19 | # Variant of the script suggested here: 20 | # http://gridscheduler.sourceforge.net/howto/scripting.html 21 | # 22 | # to allow for setting configuration options programmatically. 23 | # 24 | # Example: 25 | # qconf.mod.sh -mconf global reschedule_unknown 00:05:00 26 | 27 | set -o errexit 28 | set -o nounset 29 | 30 | if [[ $# -eq 0 ]]; then 31 | echo "Usage: ${0} [qconf_command] [host|global] [qconf_param] [qconf_value]" 32 | exit 1 33 | fi 34 | 35 | # This script gets invoked directly by the user with the command-line 36 | # noted above. 37 | # 38 | # The script then sets itself as the EDITOR and executes "qconf". 39 | # qconf will then call this script with one command-line parameter 40 | # (a temporary file name). 41 | 42 | if [[ -z ${QCONF_PARAMETER:-} ]]; then 43 | readonly COMMAND=${1} 44 | readonly HOST=${2} 45 | export QCONF_PARAMETER=${3} 46 | export QCONF_VALUE=${4} 47 | 48 | EDITOR=${0} \ 49 | qconf ${COMMAND} ${HOST} 50 | else 51 | # Sleep 1 second to ensure that the file modification time changes 52 | sleep 1 53 | 54 | # Update the temp file passed on the command-line by qconf 55 | readonly QCONF_TEMP_FILE=${1} 56 | sed -i \ 57 | -e "/^${QCONF_PARAMETER} /d;\$a${QCONF_PARAMETER} ${QCONF_VALUE}" \ 58 | ${QCONF_TEMP_FILE} 59 | fi 60 | 61 | -------------------------------------------------------------------------------- /bin/mount_disk_on_nodes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # mount_my_disk.sh 18 | # 19 | # Utility script that connects to each node of a cluster 20 | # and mounts the specified disk read-only. 21 | # The "node type" can optionally be specified such that, for example, 22 | # the operation can be restricted to all "compute" nodes in the cluster. 23 | 24 | set -o errexit 25 | set -o nounset 26 | 27 | if [[ $# -lt 3 ]]; then 28 | >&2 echo "Usage: ${0} [cluster] [disk_name] [mount_point] " 29 | exit 1 30 | fi 31 | 32 | readonly CLUSTER=${1} 33 | readonly DISK_NAME=${2} 34 | readonly MOUNT_POINT=${3} 35 | readonly NODE_TYPE=${4:-} 36 | 37 | # Set of commands for Debian and Ubuntu as per "gsutil help crcmod" 38 | readonly COMMANDS=' 39 | if ! mount -l | grep "'${MOUNT_POINT}'"; then 40 | sudo mkdir -p "'${MOUNT_POINT}'" 41 | sudo chmod 777 "'${MOUNT_POINT}'" 42 | sudo mount -o ro /dev/disk/by-id/google-'"${DISK_NAME}"' '"${MOUNT_POINT}"' 43 | fi 44 | ' 45 | 46 | # Use the list_all_nodes.py python script to get the list of instances 47 | readonly SCRIPT_DIR=$(dirname ${0}) 48 | readonly NODES=$( 49 | python ${SCRIPT_DIR}/list_all_nodes.py ${CLUSTER} ${NODE_TYPE}) 50 | 51 | # Sequentially connect to the nodes and run the commands 52 | for NODE in ${NODES}; do 53 | echo "Mount ${DISK_NAME} on ${NODE}:${MOUNT_POINT}" 54 | elasticluster ssh ${CLUSTER} "${COMMANDS}" -n ${NODE} 55 | done 56 | 57 | -------------------------------------------------------------------------------- /bin/sanitize_known_hosts.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # sanitize_known_hosts.py 18 | # 19 | # Lists all instances of an Elasticluster cluster and rewrites 20 | # the known_hosts file with only those members. 21 | 22 | import elasticluster 23 | import elasticluster.conf 24 | from elasticluster.__main__ import ElastiCluster 25 | 26 | import paramiko 27 | 28 | import sys 29 | 30 | # Check usage 31 | if len(sys.argv) != 2: 32 | print "Usage: {} [cluster]".format(sys.argv[0]) 33 | sys.exit(1) 34 | 35 | cluster_name=sys.argv[1] 36 | 37 | # Create the elasticluster configuration endpoint 38 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 39 | 40 | # Lookup the cluster 41 | cluster = creator.load_cluster(cluster_name) 42 | 43 | # Get the list of IP addresses 44 | ip_addrs = [node.preferred_ip for node in cluster.get_all_nodes()] 45 | print "Known ip addresses for cluster %s" % cluster_name 46 | print ip_addrs 47 | 48 | try: 49 | keys = paramiko.hostkeys.HostKeys(cluster.known_hosts_file) 50 | except IOError as e: 51 | print e 52 | sys.exit(1) 53 | 54 | print "Keyfile %s loaded" % cluster.known_hosts_file 55 | 56 | new_keys = paramiko.hostkeys.HostKeys() 57 | 58 | for ip_addr in ip_addrs: 59 | node_host_keys = keys.lookup(ip_addr) 60 | if node_host_keys: 61 | for key_type in node_host_keys.keys(): 62 | new_keys.add(node_host_keys._hostname, key_type, node_host_keys[key_type]) 63 | 64 | print "Saving sanitized keyfile %s" % cluster.known_hosts_file 65 | new_keys.save(cluster.known_hosts_file) 66 | -------------------------------------------------------------------------------- /bin/ensure_cluster_size.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # ensure_cluster_size.py 18 | # 19 | # Makes a pass over the specified cluster and adds nodes such that the 20 | # number of nodes is at least as high (hopefully equal to) the value in 21 | # the cluster configuration. 22 | 23 | import elasticluster 24 | import elasticluster.conf 25 | from elasticluster.__main__ import ElastiCluster 26 | 27 | import cluster_util 28 | 29 | import os 30 | import sys 31 | 32 | # Check usage 33 | if len(sys.argv) != 2: 34 | print "Usage: {} [cluster]".format(sys.argv[0]) 35 | sys.exit(1) 36 | 37 | cluster_name=sys.argv[1] 38 | 39 | # Testing modes 40 | # 41 | # DRYRUN=1: do not add any nodes, just display a log of the operations 42 | # that would occur 43 | 44 | dryrun=os.environ['DRYRUN'] if 'DRYRUN' in os.environ else None 45 | 46 | # BEGIN MAIN 47 | 48 | # Create the elasticluster configuration endpoint 49 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 50 | 51 | # Lookup the cluster 52 | cluster = creator.load_cluster(cluster_name) 53 | cluster.update() 54 | 55 | print "*********************" 56 | print "Checking cluster size" 57 | print "*********************" 58 | 59 | target_nodes = cluster_util.get_desired_cluster_nodes(cluster_name) 60 | 61 | for kind in target_nodes: 62 | has_count = len(cluster.nodes[kind]) if kind in cluster.nodes else 0 63 | print "Node type (%s): Has: %d, Should have: %d" % ( 64 | kind, has_count, target_nodes[kind]) 65 | 66 | diff = target_nodes[kind] - has_count 67 | if diff > 0: 68 | print "Adding new nodes of type %s" % kind 69 | print 70 | if not dryrun: 71 | cluster_util.run_elasticluster( 72 | ['resize', cluster_name, 73 | '-a' '%d:%s' % (diff, kind), 74 | '-t', cluster_name]) 75 | elif diff < 0: 76 | print "WARNING: There are more nodes of type %s than configured" % kind 77 | -------------------------------------------------------------------------------- /src/samtools/do_samtools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # do_samtools.sh 18 | # 19 | # Copies one or more files from GCS to disk, 20 | # runs a samtools command 21 | # and pushes the results into GCS. 22 | 23 | set -o errexit 24 | set -o nounset 25 | 26 | # Required inputs parameters: 27 | readonly WORKSPACE_DIR=${1} 28 | readonly INPUT_PATH=${2} 29 | readonly OUTPUT_PATH=${3} 30 | 31 | readonly WS_IN_DIR=${WORKSPACE_DIR}/in 32 | readonly WS_OUT_DIR=${WORKSPACE_DIR}/out 33 | 34 | source ${SRC_ROOT}/common/logging.sh 35 | source ${SRC_ROOT}/common/gcs_util.sh 36 | 37 | # Make sure our workspace directories are clean and ready 38 | for DIR in ${WS_IN_DIR} ${WS_OUT_DIR}; do 39 | sudo rm -rf ${DIR}/* 40 | sudo mkdir -p ${DIR} --mode 777 41 | done 42 | unset DIR 43 | 44 | # Download the file(s) to processed 45 | gcs_util::download "${INPUT_PATH}" "${WS_IN_DIR}/" 46 | 47 | # Get an array of input files 48 | declare -a FILE_LIST 49 | if [[ ${DRYRUN:-} -eq 1 ]]; then 50 | # The FILE_LIST will be empty for a DRYRUN; try to fake it 51 | DRYRUN_LIST=$(gcs_util::get_file_list "${INPUT_PATH}") 52 | FILE_LIST=($(echo "${DRYRUN_LIST}" | sed -e 's#.*/##')) 53 | else 54 | FILE_LIST=($(/bin/ls -1 ${WS_IN_DIR})) 55 | fi 56 | readonly FILE_LIST 57 | 58 | # Process the input files 59 | START=$(date +%s) 60 | for FILE in "${FILE_LIST[@]}"; do 61 | logging::emit "Processing file ${FILE}" 62 | 63 | case "${SAMTOOLS_OPERATION}" in 64 | index) 65 | # The output file name cannot be changed for "samtools index" 66 | INFILE=${WS_IN_DIR}/${FILE} 67 | OUTFILE=${WS_IN_DIR}/${FILE}.bai 68 | 69 | CMD="samtools index ${INFILE}" 70 | ;; 71 | *) 72 | logging::emit "Unknown operation: ${SAMTOOLS_OPERATION}" 73 | exit 1 74 | ;; 75 | esac 76 | 77 | logging::emit "Command: ${CMD}" 78 | 79 | if [[ ${DRYRUN:-} -eq 1 ]]; then 80 | continue 81 | fi 82 | 83 | eval ${CMD} 84 | done 85 | END=$(date +%s) 86 | 87 | logging::emit "Update: ${#FILE_LIST[@]} files in $((END-START)) seconds" 88 | 89 | # Upload the output file(s) 90 | if [[ ${OUTPUT_PATH} == "source" ]]; then 91 | OUTPUT_PATH=$(dirname ${INPUT_PATH}) 92 | fi 93 | gcs_util::upload "${OUTFILE}" "${OUTPUT_PATH}/" 94 | 95 | -------------------------------------------------------------------------------- /src/compress/do_compress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # do_compress.sh 18 | # 19 | # Copies one or more files from GCS to disk, 20 | # compresses or decompresses the file(s), 21 | # and pushes the results into GCS. 22 | 23 | set -o errexit 24 | set -o nounset 25 | 26 | # Required inputs parameters: 27 | readonly WORKSPACE_DIR=${1} 28 | readonly INPUT_PATH=${2} 29 | readonly OUTPUT_PATH=${3} 30 | 31 | readonly WS_IN_DIR=${WORKSPACE_DIR}/in 32 | readonly WS_OUT_DIR=${WORKSPACE_DIR}/out 33 | 34 | source ${SRC_ROOT}/common/logging.sh 35 | source ${SRC_ROOT}/common/gcs_util.sh 36 | 37 | # Make sure our workspace directories are clean and ready 38 | for DIR in ${WS_IN_DIR} ${WS_OUT_DIR}; do 39 | sudo rm -rf ${DIR}/* 40 | sudo mkdir -p ${DIR} --mode 777 41 | done 42 | unset DIR 43 | 44 | # Download the file(s) to (de)compress 45 | gcs_util::download "${INPUT_PATH}" "${WS_IN_DIR}/" 46 | 47 | # Get an array of input files 48 | declare -a FILE_LIST 49 | if [[ ${DRYRUN:-} -eq 1 ]]; then 50 | # The FILE_LIST will be empty for a DRYRUN; try to fake it 51 | DRYRUN_LIST=$(gcs_util::get_file_list "${INPUT_PATH}") 52 | FILE_LIST=($(echo "${DRYRUN_LIST}" | sed -e 's#.*/##')) 53 | else 54 | FILE_LIST=($(/bin/ls -1 ${WS_IN_DIR})) 55 | fi 56 | readonly FILE_LIST 57 | 58 | # Process the input files 59 | START=$(date +%s) 60 | for FILE in "${FILE_LIST[@]}"; do 61 | logging::emit "Processing file ${FILE}" 62 | 63 | case "${COMPRESS_OPERATION}" in 64 | compress) 65 | # Add the extension to the output file 66 | INFILE=${WS_IN_DIR}/${FILE} 67 | OUTFILE=${WS_OUT_DIR}/${FILE}${COMPRESS_EXTENSION} 68 | 69 | CMD="${COMPRESS_TYPE} --stdout ${INFILE} > ${OUTFILE}" 70 | ;; 71 | decompress) 72 | # Trim the extension from the output file 73 | INFILE=${WS_IN_DIR}/${FILE} 74 | OUTFILE=${WS_OUT_DIR}/${FILE%${COMPRESS_EXTENSION}} 75 | 76 | CMD="${COMPRESS_TYPE} --decompress --stdout ${INFILE} > ${OUTFILE}" 77 | ;; 78 | *) 79 | logging::emit "Unknown compression operation: ${COMPRESS_OPERATION}" 80 | exit 1 81 | ;; 82 | esac 83 | 84 | logging::emit "Command: ${CMD}" 85 | 86 | if [[ ${DRYRUN:-} -eq 1 ]]; then 87 | continue 88 | fi 89 | 90 | eval ${CMD} 91 | done 92 | END=$(date +%s) 93 | 94 | logging::emit "Update: ${#FILE_LIST[@]} files in $((END-START)) seconds" 95 | 96 | # Upload the output file(s) 97 | if [[ ${DRYRUN:-} -eq 1 ]]; then 98 | exit 0 99 | fi 100 | gcs_util::upload "${WS_OUT_DIR}/*" "${OUTPUT_PATH}/" 101 | 102 | -------------------------------------------------------------------------------- /src/compress/task_compress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # task_compress.sh 18 | # 19 | # Wrapper script that sets up the call to the actual worker: 20 | # do_compress.sh 21 | # 22 | # This script isolates from do_compress.sh that Grid Engine is 23 | # managing the operation and do_compress can be dedicated to its 24 | # specific task. 25 | # 26 | # When a single command in the array job is sent to a compute node, 27 | # its task number is stored in the variable SGE_TASK_ID, 28 | # so we can use the value of that variable to determine the inputs 29 | 30 | set -o errexit 31 | set -o nounset 32 | 33 | source ${SRC_ROOT}/common/logging.sh 34 | source ${SRC_ROOT}/common/gcs_util.sh 35 | 36 | # Set up an EXIT trap to be sure to clean up 37 | trap exit_clean EXIT 38 | 39 | # Set the workspace dir 40 | readonly WORKSPACE_DIR=${TASK_SCRATCH_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID} 41 | sudo mkdir -p ${WORKSPACE_DIR} -m 777 42 | 43 | # Set the log file 44 | export LOGGING_LOG_FILE=${WORKSPACE_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID}.log 45 | readonly TASK_START_TIME=$(date '+%s') 46 | 47 | # For debugging, emit the hostname and inputs 48 | logging::emit "Task host: $(hostname)" 49 | logging::emit "Task start: ${SGE_TASK_ID}" 50 | logging::emit "Input list file: ${INPUT_LIST_FILE}" 51 | logging::emit "Output path: ${OUTPUT_PATH}" 52 | logging::emit "Output log path: ${OUTPUT_LOG_PATH:-}" 53 | logging::emit "Scratch dir: ${TASK_SCRATCH_DIR}" 54 | 55 | # Set up an EXIT trap to be sure to clean up 56 | function exit_clean() { 57 | # If the WORKSPACE_DIR variable has been set, then be sure to clean up 58 | if [[ -n ${WORKSPACE_DIR:-} ]]; then 59 | sudo rm -rf ${WORKSPACE_DIR} 60 | fi 61 | } 62 | readonly -f exit_clean 63 | 64 | function finish() { 65 | # Upload the log file 66 | if [[ -n ${OUTPUT_LOG_PATH:-} ]]; then 67 | local start=${TASK_START_TIME} 68 | local end=$(date '+%s') 69 | 70 | logging::emit "Task time ${SGE_TASK_ID}: $((end - start)) seconds" 71 | gcs_util::upload_log "${LOGGING_LOG_FILE}" "${OUTPUT_LOG_PATH}/" 72 | fi 73 | } 74 | readonly -f finish 75 | 76 | # Make sure that the crcmod library is installed 77 | gcs_util::install_crcmod 78 | 79 | # Grab the record to process 80 | readonly INPUT_PATTERN=$(sed -n "${SGE_TASK_ID}p" ${INPUT_LIST_FILE}) 81 | logging::emit "Processing ${INPUT_PATTERN}" 82 | 83 | # Launch the job 84 | if ${SRC_ROOT}/compress/do_compress.sh \ 85 | ${WORKSPACE_DIR} \ 86 | ${INPUT_PATTERN} \ 87 | ${OUTPUT_PATH}; then 88 | logging::emit "Task end SUCCESS: ${SGE_TASK_ID}" 89 | else 90 | logging::emit "Task end FAILURE: ${SGE_TASK_ID}" 91 | fi 92 | 93 | finish 94 | -------------------------------------------------------------------------------- /bin/remove_terminated_nodes.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # remove_terminated_nodes.py 18 | # 19 | # Makes a pass over the specified cluster and removes any nodes that are 20 | # in a TERMINATED, STOPPING, or unknown state. 21 | 22 | import elasticluster 23 | import elasticluster.conf 24 | from elasticluster.__main__ import ElastiCluster 25 | 26 | import cluster_util 27 | 28 | import errno 29 | import os 30 | import sys 31 | 32 | # Check usage 33 | if len(sys.argv) < 2 or len(sys.argv) > 3: 34 | print "Usage: {} [cluster] ".format(sys.argv[0]) 35 | sys.exit(1) 36 | 37 | cluster_name=sys.argv[1] 38 | node_type=sys.argv[2] if len(sys.argv) > 2 else None 39 | 40 | # Testing modes 41 | # 42 | # DRYRUN=1: do not remove any nodes, just display a log of the operations 43 | # that would occur 44 | # REMOVENODES=: remove the requested node(s) 45 | 46 | dryrun=os.environ['DRYRUN'] if 'DRYRUN' in os.environ else None 47 | removenodes=os.environ['REMOVENODES'].split(',') \ 48 | if 'REMOVENODES' in os.environ else [] 49 | 50 | # BEGIN MAIN 51 | 52 | known_hosts_file = '%s/%s' % ( 53 | os.environ['HOME'], '.elasticluster/storage/%s.known_hosts' % cluster_name) 54 | 55 | # Create the elasticluster configuration endpoint 56 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 57 | 58 | # Lookup the cluster 59 | cluster = creator.load_cluster(cluster_name) 60 | cluster.update() 61 | 62 | # Build a list of nodes to remove 63 | if removenodes: 64 | print "Testing with node list: %s" % ",".join(removenodes) 65 | to_remove = cluster_util.get_nodes_by_name(cluster, removenodes) 66 | else: 67 | print "************************************" 68 | print "Determining status of existing nodes" 69 | print "************************************" 70 | to_remove = \ 71 | cluster_util.get_stopping_or_terminated_nodes(cluster, node_type) 72 | print 73 | 74 | if not to_remove: 75 | print "******************" 76 | print "No nodes to remove" 77 | print "******************" 78 | print 79 | 80 | sys.exit(0) 81 | 82 | print "***************" 83 | print "Removing nodes:" 84 | print "***************" 85 | print 86 | 87 | for node in to_remove: 88 | print "Removing node %s (%s)" % (node.name, node.preferred_ip) 89 | if not dryrun: 90 | cluster_util.run_elasticluster( 91 | ['remove-node', '--no-setup', '--yes', cluster_name, node.name]) 92 | 93 | if not cluster_util.remove_known_hosts_entry(node, known_hosts_file): 94 | print "No preferred ip for node; removing file %s" % known_hosts_file 95 | try: 96 | os.remove(known_hosts_file) 97 | except OSError as e: 98 | if e.errno != errno.ENOENT: 99 | raise 100 | 101 | cluster_util.run_elasticluster(['setup', cluster_name]) 102 | -------------------------------------------------------------------------------- /src/samtools/task_samtools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # task_samtools.sh 18 | # 19 | # Wrapper script that sets up the call to the actual worker: 20 | # do_samtools.sh 21 | # 22 | # This script isolates from do_samtools.sh that Grid Engine is 23 | # managing the operation and do_samtools can be dedicated to its 24 | # specific task. 25 | # 26 | # When a single command in the array job is sent to a compute node, 27 | # its task number is stored in the variable SGE_TASK_ID, 28 | # so we can use the value of that variable to determine the inputs 29 | 30 | set -o errexit 31 | set -o nounset 32 | 33 | source ${SRC_ROOT}/common/logging.sh 34 | source ${SRC_ROOT}/common/gcs_util.sh 35 | 36 | # Set up an EXIT trap to be sure to clean up 37 | trap exit_clean EXIT 38 | 39 | # Set the workspace dir 40 | readonly WORKSPACE_DIR=${TASK_SCRATCH_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID} 41 | sudo mkdir -p ${WORKSPACE_DIR} -m 777 42 | 43 | # Set the log file 44 | export LOGGING_LOG_FILE=${WORKSPACE_DIR}/${JOB_NAME}.${JOB_ID}.${SGE_TASK_ID}.log 45 | readonly TASK_START_TIME=$(date '+%s') 46 | 47 | # For debugging, emit the hostname and inputs 48 | logging::emit "Task host: $(hostname)" 49 | logging::emit "Task start: ${SGE_TASK_ID}" 50 | logging::emit "Input list file: ${INPUT_LIST_FILE}" 51 | logging::emit "Output path: ${OUTPUT_PATH}" 52 | logging::emit "Output log path: ${OUTPUT_LOG_PATH:-}" 53 | logging::emit "Scratch dir: ${TASK_SCRATCH_DIR}" 54 | 55 | # Set up an EXIT trap to be sure to clean up 56 | function exit_clean() { 57 | # If the WORKSPACE_DIR variable has been set, then be sure to clean up 58 | if [[ -n ${WORKSPACE_DIR:-} ]]; then 59 | sudo rm -rf ${WORKSPACE_DIR} 60 | fi 61 | } 62 | readonly -f exit_clean 63 | 64 | function finish() { 65 | # Upload the log file 66 | if [[ -n ${OUTPUT_LOG_PATH:-} ]]; then 67 | local start=${TASK_START_TIME} 68 | local end=$(date '+%s') 69 | 70 | logging::emit "Task time ${SGE_TASK_ID}: $((end - start)) seconds" 71 | gcs_util::upload_log "${LOGGING_LOG_FILE}" "${OUTPUT_LOG_PATH}/" 72 | fi 73 | } 74 | readonly -f finish 75 | 76 | # Make sure that the crcmod library is installed 77 | gcs_util::install_crcmod 78 | 79 | # Make sure that samtools is installed 80 | if which samtools &> /dev/null; then 81 | echo "samtools is installed" 82 | else 83 | sudo apt-get install --yes samtools 84 | fi 85 | 86 | # Grab the record to process 87 | readonly INPUT_PATH=$(sed -n "${SGE_TASK_ID}p" ${INPUT_LIST_FILE}) 88 | logging::emit "Processing ${INPUT_PATH}" 89 | 90 | # Special-case the output path 91 | if [[ ${OUTPUT_PATH} == "source" ]]; then 92 | OUTPUT_PATH=$(dirname ${INPUT_PATH}) 93 | logging::emit "Output path set to: ${OUTPUT_PATH}" 94 | fi 95 | 96 | # Launch the job 97 | if ${SRC_ROOT}/samtools/do_samtools.sh \ 98 | ${WORKSPACE_DIR} \ 99 | ${INPUT_PATH} \ 100 | ${OUTPUT_PATH}; then 101 | logging::emit "Task end SUCCESS: ${SGE_TASK_ID}" 102 | else 103 | logging::emit "Task end FAILURE: ${SGE_TASK_ID}" 104 | fi 105 | 106 | finish 107 | 108 | -------------------------------------------------------------------------------- /src/samtools/launch_samtools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # launch_samtools.sh 18 | # 19 | # Launches a Grid Engine job to run a samtools command over files 20 | # in Google Cloud Storage. Overall flow of operation: 21 | # * Create Grid Engine "array" job 22 | # * Each task will 23 | # * Download one or more files from GCS 24 | # * Process the file(s) 25 | # * Upload the file(s) to GCS 26 | # 27 | # The launch script need source and destination information, 28 | # along with an optional destination for logging. 29 | # 30 | # The list of files to act on is assumed to be provided in a pre-generated 31 | # file. This file must contain one GCS path per line. 32 | # The paths may be individual files or a GCS pattern, such as: 33 | # 34 | # gs://my_bucket/my_path/dir1/by_chrom.*.bam 35 | # gs://my_bucket/my_path/dir2/sample.bam 36 | # 37 | # Each line gets processed as an individual task. If you want files to 38 | # be processed as separate tasks on separate nodes, then list the files 39 | # explicitly in the list file. 40 | # 41 | # All scripts here respect the DRYRUN environment variable. 42 | # If set to 1, then the operations that *would* be performed will be 43 | # emitted to stdout. This is useful for verifying input and output paths. 44 | # 45 | # Example DRYRUN usage: 46 | # DRYRUN=1 ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh 47 | # 48 | # Example real usage: 49 | # ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh 50 | # 51 | # The launch script also accepts the environment variables LAUNCH_MIN and 52 | # LAUNCH_MAX, which can be used to specify the minimum and maximum record 53 | # to process. This is useful for small scale testing. 54 | # 55 | # Example DRYRUN processing only the first record: 56 | # DRYRUN=1 LAUNCH_MIN=1 LAUNCH_MAX=1 ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh 57 | # 58 | # Example real usage processing only the first 5 records: 59 | # LAUNCH_MIN=1 LAUNCH_MAX=5 ./src/samtools/launch_samtools.sh samples/samtools/samtools_index_config.sh 60 | # 61 | 62 | # The first parameter is a path to a "job configuration" shell script. 63 | # This script must export paths: 64 | # 65 | # export INPUT_LIST_FILE= 66 | # export OUTPUT_PATH= 67 | # export OUTPUT_LOG_PATH= 68 | # 69 | # This script must export information about what operation to perform: 70 | # 71 | # export SAMTOOLS_OPERATION="index" # Only index currently supported 72 | 73 | set -o errexit 74 | set -o nounset 75 | 76 | if [[ $# -lt 1 ]]; then 77 | >&2 echo "Usage: ${0} [job_config_file]" 78 | exit 1 79 | fi 80 | 81 | # Task-specific parameters which can be overridden in the job 82 | # config file. 83 | export TASK_SCRATCH_DIR=/scratch 84 | 85 | readonly CONFIG_FILE=${1} 86 | 87 | source ${CONFIG_FILE} 88 | 89 | # 90 | # Input validation 91 | # 92 | 93 | readonly REQUIRED_VARS=' 94 | INPUT_LIST_FILE 95 | OUTPUT_PATH 96 | OUTPUT_LOG_PATH 97 | SAMTOOLS_OPERATION 98 | ' 99 | 100 | for VAR in ${REQUIRED_VARS}; do 101 | if [[ -z "${!VAR:-}" ]]; then 102 | >&2 echo "Error: ${VAR} must be set" 103 | exit 1 104 | fi 105 | done 106 | 107 | if [[ ! -e ${INPUT_LIST_FILE} ]]; then 108 | >&2 echo "Error: ${INPUT_LIST_FILE} not found" 109 | exit 1 110 | fi 111 | 112 | # If LAUNCH_MIN or LAUNCH_MAX are set in the environment, use them. 113 | # Otherwise, launch tasks for all lines in the INPUT_LIST_FILE. 114 | readonly TASK_START=${LAUNCH_MIN:-1} 115 | readonly TASK_END=${LAUNCH_MAX:-$(cat ${INPUT_LIST_FILE} | wc -l)} 116 | 117 | # 118 | # Submit the job 119 | # 120 | 121 | # Parameters 122 | # -t: Task range 123 | # -S: Force the task shell to be bash 124 | # -V: Pass the current environment through to each task 125 | # -N: Job name 126 | readonly SAMTOOLS_SRC_ROOT=$(readlink -f $(dirname ${0})) 127 | 128 | export SRC_ROOT=$(dirname ${SAMTOOLS_SRC_ROOT}) 129 | 130 | qsub \ 131 | -t ${TASK_START}-${TASK_END} \ 132 | -S /bin/bash \ 133 | -V \ 134 | -N samtools \ 135 | -r y \ 136 | ${SAMTOOLS_SRC_ROOT}/task_samtools.sh 137 | 138 | -------------------------------------------------------------------------------- /src/compress/launch_compress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # launch_compress.sh 18 | # 19 | # Launches a Grid Engine job to compress or decompress files 20 | # in Google Cloud Storage. Overall flow of operation: 21 | # * Create Grid Engine "array" job 22 | # * Each task will 23 | # * Download one or more files from GCS 24 | # * De(compress) the file(s) 25 | # * Upload the file(s) to GCS 26 | # 27 | # The launch script need source and destination information, 28 | # along with an optional destination for logging. 29 | # 30 | # The list of files to act on is assumed to be provided in a pre-generated 31 | # file. This file must contain one GCS path per line. 32 | # The paths may be individual files or a GCS pattern, such as: 33 | # 34 | # gs://my_bucket/my_path/dir1/just_one_file.gz 35 | # gs://my_bucket/my_path/dir2/*.gz 36 | # 37 | # Each line gets processed as an individual task. If you want files to 38 | # be processed as separate tasks on separate nodes, then list the files 39 | # explicitly in the list file. 40 | # 41 | # All scripts here respect the DRYRUN environment variable. 42 | # If set to 1, then the operations that *would* be performed will be 43 | # emitted to stdout. This is useful for verifying input and output paths. 44 | # 45 | # Example DRYRUN usage: 46 | # DRYRUN=1 ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh 47 | # 48 | # Example real usage: 49 | # ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh 50 | # 51 | # The launch script also accepts the environment variables LAUNCH_MIN and 52 | # LAUNCH_MAX, which can be used to specify the minimum and maximum record 53 | # to process. This is useful for small scale testing. 54 | # 55 | # Example DRYRUN processing only the first record: 56 | # DRYRUN=1 LAUNCH_MIN=1 LAUNCH_MAX=1 ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh 57 | # 58 | # Example real usage processing only the first 5 records: 59 | # LAUNCH_MIN=1 LAUNCH_MAX=5 ./src/compress/launch_compress.sh samples/compress/gzip_compress.sh 60 | # 61 | 62 | # The first parameter is a path to a "job configuration" shell script. 63 | # This script must export paths: 64 | # 65 | # export INPUT_LIST_FILE= 66 | # export OUTPUT_PATH= 67 | # export OUTPUT_LOG_PATH= 68 | # 69 | # This script must export information about what operation to perform: 70 | # 71 | # export COMPRESS_OPERATION="compress" # compress | decompress 72 | # export COMPRESS_TYPE="gzip" # gzip | bzip2 73 | # export COMPRESS_EXTENSION=".gz" # .gz | .bz2 74 | # 75 | # The do_compress.sh script has built-in support for gzip and bzip2 76 | 77 | set -o errexit 78 | set -o nounset 79 | 80 | if [[ $# -lt 1 ]]; then 81 | >&2 echo "Usage: ${0} [job_config_file]" 82 | exit 1 83 | fi 84 | 85 | # Task-specific parameters which can be overridden in the job 86 | # config file. 87 | export TASK_SCRATCH_DIR=/scratch 88 | 89 | readonly CONFIG_FILE=${1} 90 | 91 | source ${CONFIG_FILE} 92 | 93 | # 94 | # Input validation 95 | # 96 | 97 | readonly REQUIRED_VARS=' 98 | INPUT_LIST_FILE 99 | OUTPUT_PATH 100 | OUTPUT_LOG_PATH 101 | COMPRESS_OPERATION 102 | COMPRESS_TYPE 103 | COMPRESS_EXTENSION 104 | ' 105 | 106 | for VAR in ${REQUIRED_VARS}; do 107 | if [[ -z "${!VAR:-}" ]]; then 108 | >&2 echo "Error: ${VAR} must be set" 109 | exit 1 110 | fi 111 | done 112 | 113 | if [[ ! -e ${INPUT_LIST_FILE} ]]; then 114 | >&2 echo "Error: ${INPUT_LIST_FILE} not found" 115 | exit 1 116 | fi 117 | 118 | # If LAUNCH_MIN or LAUNCH_MAX are set in the environment, use them. 119 | # Otherwise, launch tasks for all lines in the INPUT_LIST_FILE. 120 | readonly TASK_START=${LAUNCH_MIN:-1} 121 | readonly TASK_END=${LAUNCH_MAX:-$(cat ${INPUT_LIST_FILE} | wc -l)} 122 | 123 | # 124 | # Submit the job 125 | # 126 | 127 | # Parameters 128 | # -t: Task range 129 | # -S: Force the task shell to be bash 130 | # -V: Pass the current environment through to each task 131 | # -N: Job name 132 | readonly COMPRESS_SRC_ROOT=$(readlink -f $(dirname ${0})) 133 | 134 | export SRC_ROOT=$(dirname ${COMPRESS_SRC_ROOT}) 135 | 136 | qsub \ 137 | -t ${TASK_START}-${TASK_END} \ 138 | -S /bin/bash \ 139 | -V \ 140 | -N compress \ 141 | -r y \ 142 | ${COMPRESS_SRC_ROOT}/task_compress.sh 143 | 144 | -------------------------------------------------------------------------------- /src/common/gcs_util.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # gcs_util::install_crcmod 18 | # 19 | # Installs the compiled crcmod library if it is not installed 20 | # See: 21 | # https://cloud.google.com/storage/docs/gsutil/addlhelp/CRC32CandInstallingcrcmod 22 | function gcs_util::install_crcmod() { 23 | local crcmod_installed=$(\ 24 | gsutil version -l | sed -n -e 's/^compiled crcmod: *//p') 25 | 26 | logging::emit "Compiled crcmod installed: ${crcmod_installed}" 27 | if [[ ${crcmod_installed} != "True" ]]; then 28 | logging::emit "Installing compiled crcmod" 29 | sudo apt-get update --yes 30 | sudo apt-get install --yes gcc python-dev python-setuptools 31 | sudo easy_install -U pip 32 | sudo pip uninstall --yes crcmod || true 33 | sudo pip install -U crcmod 34 | fi 35 | } 36 | readonly -f gcs_util::install_crcmod 37 | 38 | # gcs_util::download 39 | # 40 | # Copies the matching objects at the specified remote path 41 | # to the specified target. 42 | # 43 | # Logs the number of bytes downloaded, the number of seconds, 44 | # and the overall throughput. 45 | # 46 | # Respects the DRYRUN environment variable; if set to 1, then 47 | # logs the operation (with to and from path) and returns. 48 | function gcs_util::download() { 49 | local remote_path=${1} 50 | local local_path=${2} 51 | 52 | logging::emit "Will download: ${remote_path} to ${local_path}" 53 | if [[ ${DRYRUN:-} -eq 1 ]]; then 54 | return 55 | fi 56 | 57 | # Track the number of bytes we download. 58 | # Get the number of bytes already in the destination directory 59 | # (and assume no one else is writing to the directory). 60 | local bytes_start=$(du -s -c --bytes ${local_path} | tail -n 1 | cut -f 1 -d $'\t') 61 | 62 | # Download the file(s) 63 | local time_start=$(date +%s) 64 | while ! gsutil -m cp ${remote_path} ${local_path}; do 65 | echo "Restarting download" 66 | done 67 | local time_end=$(date +%s) 68 | 69 | local bytes_end=$(du -s -c --bytes ${local_path} | tail -n 1 | cut -f 1 -d $'\t') 70 | 71 | local bytes=$((bytes_end - bytes_start)) 72 | local time=$((time_end - time_start)) 73 | 74 | logging::emit "Download: ${bytes} bytes in ${time} seconds" 75 | logging::emit "Download rate: $(( (bytes/1000/1000) / time )) MB/s" 76 | } 77 | readonly -f gcs_util::download 78 | 79 | # gcs_util::upload 80 | # 81 | # Copies the matching objects at the specified local path 82 | # to the specified target. 83 | # 84 | # Logs the number of bytes uploaded, the number of seconds, 85 | # and the overall throughput. 86 | # 87 | # Respects the DRYRUN environment variable; if set to 1, then 88 | # logs the operation (with to and from path) and returns. 89 | function gcs_util::upload() { 90 | local local_path=${1} 91 | local remote_path=${2} 92 | 93 | logging::emit "Will upload: ${local_path} to ${remote_path}" 94 | if [[ ${DRYRUN:-} -eq 1 ]]; then 95 | return 96 | fi 97 | 98 | # Track the number of bytes we upload. 99 | local bytes=$(du -s -c --bytes ${local_path} | tail -n 1 | cut -f 1 -d $'\t') 100 | 101 | # Do the upload 102 | local time_start=$(date +%s) 103 | while ! gsutil -m cp ${local_path} ${remote_path}; do 104 | echo "Restarting upload" 105 | done 106 | local time_end=$(date +%s) 107 | 108 | local time=$((time_end - time_start)) 109 | 110 | logging::emit "Upload: ${bytes} bytes in ${time} seconds" 111 | logging::emit "Upload rate: $(( (bytes/1000/1000) / time )) MB/s" 112 | } 113 | readonly -f gcs_util::upload 114 | 115 | # gcs_util::upload_log 116 | # 117 | # Copies the log file at the specified local path into Cloud Storage. 118 | # This is largely syntactic sugar around "gsutil cp", but it does 119 | # respects the DRYRUN environment variable; if set to 1, then 120 | # logs the intended operation (with to and from path) and returns. 121 | function gcs_util::upload_log() { 122 | local local_path=${1} 123 | local remote_path=${2} 124 | 125 | logging::emit "Upload log: ${local_path} to ${remote_path}" 126 | if [[ ${DRYRUN:-} -eq 1 ]]; then 127 | return 128 | fi 129 | 130 | gsutil cp ${local_path} ${remote_path} 131 | } 132 | readonly -f gcs_util::upload_log 133 | 134 | # gcs_util::get_file_list 135 | # 136 | # Returns a list of matching objects at the specified remote path. 137 | # This is strictly syntactic sugar around "gsutil ls". 138 | # It does NOT respect the DRYRUN environment variable as the intent of 139 | # this function is to be used specifically when DRYRUN is enabled (1). 140 | function gcs_util::get_file_list() { 141 | local remote_path=${1} 142 | 143 | gsutil ls ${remote_path} 144 | } 145 | readonly -f gcs_util::get_file_list 146 | -------------------------------------------------------------------------------- /bin/cluster_util.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # cluster_util.py 18 | # 19 | # Utility routines for managing an Elasticluster cluster. 20 | 21 | import elasticluster 22 | import elasticluster.conf 23 | from elasticluster.__main__ import ElastiCluster 24 | 25 | import json 26 | import subprocess 27 | 28 | def remove_known_hosts_entry(node, known_hosts_file): 29 | """For a give node, remove any host key entries in the known_hosts file""" 30 | 31 | if not node.preferred_ip: 32 | return False 33 | 34 | ip=node.preferred_ip 35 | 36 | # Assume concurrency on the known_hosts file is not an issue 37 | # Read all the lines and then rewrite the file omitting any 38 | # that match the "preferred IP" (the public IP) 39 | 40 | lines = open(known_hosts_file, "r").readlines() 41 | 42 | with open(known_hosts_file, "w") as f: 43 | for line in lines: 44 | if not line.startswith(ip + " "): 45 | f.write(line) 46 | 47 | return True 48 | 49 | def get_zone_for_cluster(cluster_name): 50 | """Returns the GCE zone associated with the cluster. 51 | 52 | There appears to be an elasticluster bug where the zone is not saved 53 | with the cluster. So we will pull it from the existing configuration 54 | (we assume the cluster configuration has not been changed).""" 55 | 56 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 57 | 58 | # FIXME: should not assume the template name is the same as the cluster_name 59 | conf = creator.cluster_conf[cluster_name] 60 | return conf['cloud']['zone'] 61 | 62 | 63 | def get_nodes_by_name(cluster, node_name_list): 64 | """Returns a list of node objects for the input list of node names""" 65 | node_list = [] 66 | 67 | for node in cluster.get_all_nodes(): 68 | if node.name in node_name_list: 69 | print "Adding node %s (%s)" % (node.name, node.instance_id) 70 | node_list.append(node) 71 | 72 | return node_list 73 | 74 | 75 | def get_node_status(project_id, node, zone): 76 | """Returns the GCE instance status for the specified zone""" 77 | if not node.instance_id: 78 | print "node %s has no instance_id" 79 | return "UNKNOWN" 80 | 81 | try: 82 | print "Get status for %s (%s)" % (node.name, node.instance_id) 83 | out = subprocess.check_output(["gcloud", 84 | "--project", project_id, 85 | "compute", "instances", 86 | "describe", node.instance_id, 87 | "--zone", zone, 88 | "--format", "json"], 89 | stderr=subprocess.STDOUT) 90 | details = json.loads(out) 91 | print "Node %s: %s" % (node.name, details['status']) 92 | return details['status'] 93 | except subprocess.CalledProcessError, e: 94 | print e.output 95 | return 'UNKNOWN' 96 | 97 | 98 | def get_nodes_with_status(cluster, node_type, status_list): 99 | """Returns a list of nodes with the specified instance status""" 100 | node_list = [] 101 | 102 | zone = get_zone_for_cluster(cluster.name) 103 | project_id = cluster.cloud_provider._project_id 104 | 105 | for node in cluster.get_all_nodes(): 106 | if not node_type or node['kind'] == node_type: 107 | status = get_node_status(project_id, node, zone) 108 | 109 | if status in status_list: 110 | node_list.append(node) 111 | 112 | return node_list 113 | 114 | 115 | def get_stopping_or_terminated_nodes(cluster, node_type): 116 | """Returns a list of nodes with STOPPING, TERMINATED, or UNKNOWN status""" 117 | 118 | # Adding nodes with "UNKNOWN" may be an incorrect assumption; 119 | # a node could be starting, 120 | # but the only way to be sane is to assume no one else is updating 121 | # the cluster. 122 | return get_nodes_with_status(cluster, node_type, 123 | ['STOPPING', 'TERMINATED', 'UNKNOWN']) 124 | 125 | 126 | def get_desired_cluster_nodes(cluster_name): 127 | """Returns a dictionary object with a mapping of the node types 128 | to their desired count (based on cluster configuration)""" 129 | 130 | nodes = {} 131 | 132 | creator = elasticluster.conf.make_creator(ElastiCluster.default_configuration_file) 133 | 134 | # FIXME: should not assume the template name is the same as the cluster_name 135 | conf = creator.cluster_conf[cluster_name] 136 | for key in conf: 137 | if key.endswith('_nodes'): 138 | kind = key[:-len('_nodes')] 139 | nodes[kind] = int(conf[key]) 140 | 141 | return nodes 142 | 143 | 144 | def run_elasticluster(argv): 145 | """Execute the specified elasticluster command""" 146 | 147 | # Currently highly verbose: make the "-v" level optional 148 | return subprocess.call(["elasticluster", "-v", "-v", "-v"] + argv) 149 | 150 | -------------------------------------------------------------------------------- /bin/cluster_monitor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # cluster_monitor.sh 18 | # 19 | # Runs continuously to ensure that the specified cluster contains 20 | # the number of configured instances. If members of the cluster are 21 | # found to be TERMINATED, they are removed fromt he cluster and 22 | # replacement instances are created 23 | # 24 | # Usage: 25 | # cluster_monitor.sh [sleep_minutes] 26 | # Where: 27 | # cluster-name is the Elasticluster cluster name 28 | # sleep_minutes is how long to sleep between checks (default 10) 29 | 30 | set -o errexit 31 | set -o nounset 32 | 33 | if [[ $# -lt 1 ]]; then 34 | echo "Usage: ${0} [cluster] " 35 | exit 1 36 | fi 37 | 38 | readonly CLUSTER=${1} 39 | readonly SLEEP_MINUTES=${2:-10} 40 | 41 | readonly SCRIPT_DIR=$(dirname $0) 42 | 43 | # Sometimes when adding or removing nodes, elasticluster configuration 44 | # of the cluster fails. When it does, it emits a message indicating 45 | # "please re-run elasticluster setup" (thought it exits with a success (0) 46 | # status code). 47 | # 48 | # We capture each add/remove node operation to a logfile and then just grep 49 | # for the error message. If we find it, then we re-run elasticluster setup. 50 | readonly TMPFILE=/tmp/$(basename $0)-${CLUSTER}.log 51 | 52 | # remove_terminated_nodes 53 | # 54 | # Remove from the cluster any nodes marked as TERMINATED. 55 | # Capture output to a logfile to inspect for errors. 56 | function remove_terminated_nodes() { 57 | date 58 | python -u ${SCRIPT_DIR}/remove_terminated_nodes.py ${CLUSTER} 2>&1 \ 59 | | tee ${TMPFILE} 60 | } 61 | readonly -f remove_terminated_nodes 62 | 63 | # ensure_cluster_size 64 | # 65 | # Add nodes to the cluster if the number configured is not at least 66 | # as many as specified in the cluster configuration. 67 | # Capture output to a logfile to inspect for errors. 68 | function ensure_cluster_size() { 69 | date 70 | python -u ${SCRIPT_DIR}/ensure_cluster_size.py ${CLUSTER} 2>&1 \ 71 | | tee ${TMPFILE} 72 | } 73 | readonly -f ensure_cluster_size 74 | 75 | # check_elasticluster_error 76 | # 77 | # Check the logfile for instructions from Elasticluster to re-run 78 | # "elasticluster setup". 79 | function check_elasticluster_error() { 80 | grep --quiet --ignore-case \ 81 | "please re-run elasticluster setup" ${TMPFILE} 82 | } 83 | readonly check_elasticluster_error 84 | 85 | # check_elasticluster_ready 86 | # 87 | # Check the logfile for instructions from Elasticluster that the 88 | # cluster is ready. When remove_terminated_nodes and ensure_cluster_size 89 | # run, they may not end up running elasticluster setup, so the absence 90 | # of this message does not necessarily indicate a failure. It may be 91 | # that no cluster changes occurred at all. 92 | function check_elasticluster_ready() { 93 | grep --quiet \ 94 | "Your cluster is ready!" ${TMPFILE} 95 | } 96 | readonly check_elasticluster_ready 97 | 98 | # check_cleanup_cluster 99 | # 100 | # We don't currently have a great way to get a coded error response from 101 | # Elasticluster operations. This can make it hard to decide here whether 102 | # to actually re-run "elasticluster setup" as recommended. 103 | # 104 | # One case where you would *not* want to continue to re-run "setup" 105 | # is if a node were terminated (and not yet removed from the cluster). 106 | # Thus each time we have an operational failure, we try re-running 107 | # "setup" once, and if problems persist, then try removing TERMINATED 108 | # nodes before re-running setup. 109 | function check_cleanup_cluster() { 110 | local error_detected=0 111 | 112 | while [[ ${error_detected} -eq 1 ]] || check_elasticluster_error; do 113 | 114 | echo "*****************************************************************" 115 | echo "Setup errors detected. Running: elasticluster setup -v ${CLUSTER}" 116 | echo "*****************************************************************" 117 | 118 | date 119 | elasticluster setup -v ${CLUSTER} 2>&1 | tee ${TMPFILE} 120 | 121 | echo "***************************************************" 122 | echo "Finished running: elasticluster setup -v ${CLUSTER}" 123 | echo "***************************************************" 124 | 125 | if ! check_elasticluster_error; then 126 | break 127 | fi 128 | 129 | error_detected=1 130 | 131 | remove_terminated_nodes 132 | 133 | if check_elasticluster_ready; then 134 | break 135 | fi 136 | done 137 | } 138 | readonly -f check_cleanup_cluster 139 | 140 | # MAIN loop 141 | 142 | while :; do 143 | # Remove any terminated nodes 144 | remove_terminated_nodes 145 | check_cleanup_cluster 146 | 147 | # Remove server keys from the known_host file for removed nodes 148 | if ! python -u ${SCRIPT_DIR}/sanitize_known_hosts.py ${CLUSTER}; then 149 | echo "Continuing..." 150 | fi 151 | 152 | # Add new nodes so that the cluster is at full strength 153 | ensure_cluster_size 154 | check_cleanup_cluster 155 | 156 | echo "Sleeping for ${SLEEP_MINUTES} minutes" 157 | sleep ${SLEEP_MINUTES}m 158 | done 159 | -------------------------------------------------------------------------------- /tools/array_job_monitor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # array_job_monitor.sh 18 | # 19 | # This script is intended to run on the master node of gridengine cluster. 20 | # It will monitor an array job (specified by job id on the command-line) 21 | # and to detect when a task has stopped running due to a node failure. 22 | # 23 | # The specific problem this addresses is when a worker node of the cluster 24 | # has been removed (perhaps it was a preemptible VM that was TERMINATED). 25 | # gridengine will continue to report the task as in a "r"unning state. 26 | # 27 | # The function of this script *should* be taken care of by grid engine 28 | # itself, namely the configuration values: 29 | # * reschedule_unknown 30 | # * max_unheard 31 | # However I was never able to get them to work *reliably* and frequently 32 | # ended up with tasks stuck in a "r"unning state on machines that had 33 | # been terminated. 34 | 35 | # usage: 36 | # array_job_monitor.sh [monitor_interval] [task_timeout] [queue_name] 37 | # 38 | # parameters: 39 | # job_id: 40 | # Grid Engine job ID to monitor 41 | # monitor_interval: 42 | # Minutes to sleep between checks of running tasks 43 | # Default: 15 minutes 44 | # task_timeout: 45 | # Number of minutes a task may run before it is considered stalled, 46 | # and is eligible to be resubmitted. 47 | # Default: None 48 | # queue_name: 49 | # Grid Engine job queue the job_id is associated with 50 | # Default: all.q 51 | 52 | set -o errexit 53 | set -o nounset 54 | 55 | readonly JOB_ID=${1} 56 | readonly MONITOR_INTERVAL=${2:-15} 57 | readonly TASK_TIMEOUT=${3:-} 58 | readonly QUEUE_NAME=${4:-all.q} 59 | 60 | # To detect a failed (and possibly restarted) worker node, the script will 61 | # SSH to the node and check the uptime. 62 | # For a given pass, the script will try at most CONNECT_RETRIES attempts to 63 | # connect to a node. Each attempt will timeout after CONNECT_TIMEOUT seconds. 64 | readonly CONNECT_TIMEOUT=15 65 | readonly CONNECT_RETRIES=5 66 | 67 | readonly JOB_NAME=$( 68 | qstat -j ${JOB_ID} | sed --quiet -e 's#job_name: *\(.*\)#\1#p') 69 | 70 | echo "Begin: monitoring ${JOB_NAME}.${JOB_ID} every ${MONITOR_INTERVAL} minutes" 71 | 72 | while :; do 73 | # qstat will return a list of all running tasks where the interesting 74 | # lines look like: 75 | # 3 0.50000 samtools mbookman r 08/06/2015 18:22:19 76 | # all.q@compute002 1 376 77 | 78 | # Grab all of the lines for this job 79 | # For each line - check the status of the associated node 80 | TASK_LIST=$(qstat | \ 81 | awk -v job=${JOB_ID} -v queue=${QUEUE_NAME} \ 82 | '$1 == job && $8 ~ queue"@" { 83 | printf "%s,%s,%s,%s\n", $10, $8, $6, $7 }') 84 | 85 | for TASK in ${TASK_LIST}; do 86 | TASK_ID=$(echo "${TASK}" | cut -d , -f 1) 87 | QUEUE=$(echo "${TASK}" | cut -d , -f 2) 88 | TASK_START_DATE="$(echo "${TASK}" | cut -d , -f 3)" 89 | TASK_START_TIME="$(echo "${TASK}" | cut -d , -f 4)" 90 | TASK_START="${TASK_START_DATE} ${TASK_START_TIME}" 91 | 92 | # Trim the "all.q@" from the front of the queue 93 | NODE=${QUEUE##${QUEUE_NAME}@} 94 | 95 | # To get the uptime of the system, grab the first value from /proc/uptime 96 | # If we fail to connect to the target host, the output will be empty. 97 | UPTIME_SEC= 98 | for ((i = 0; i < ${CONNECT_RETRIES}; i++)); do 99 | UPTIME_SEC=$(ssh -o ConnectTimeout=${CONNECT_TIMEOUT} ${NODE} \ 100 | cat /proc/uptime | awk '{ print $1 }') 101 | if [[ -n ${UPTIME_SEC} ]]; then 102 | break 103 | fi 104 | done 105 | 106 | RESTART_TASK=0 107 | if [[ -z ${UPTIME_SEC} ]]; then 108 | echo "Node ${NODE} unreachable" 109 | RESTART_TASK=1 110 | else 111 | # Convert the uptime (float) to an integer 112 | UPTIME_SEC=$(printf '%.0f' ${UPTIME_SEC}) 113 | 114 | # Convert the start time string to seconds since the epoch 115 | TASK_START_SEC=$(date -d "${TASK_START}" '+%s') 116 | 117 | # Get the current time as seconds since the epoch 118 | NOW=$(date '+%s') 119 | 120 | if [[ ${TASK_START_SEC} < $((NOW - UPTIME_SEC)) ]]; then 121 | echo "Node ${NODE} appears to have been restarted" 122 | echo " Node uptime: ${UPTIME_SEC} sec" 123 | echo " Task start: ${TASK_START_SEC} sec, (${TASK_START})" 124 | echo " Now: ${NOW}, $(date '+%D %T')" 125 | 126 | RESTART_TASK=1 127 | elif [[ -n ${TASK_TIMEOUT} ]] && \ 128 | [[ $((NOW - TASK_START_SEC)) -gt $((TASK_TIMEOUT * 60)) ]]; then 129 | echo "Task ${JOB_ID}.${TASK_ID} has exceeded the task timeout" 130 | echo " Task start: ${TASK_START_SEC} sec, (${TASK_START})" 131 | echo " Now: ${NOW}, $(date '+%D %T')" 132 | echo " $(((NOW - TASK_START_SEC) / 60)) minutes >= ${TASK_TIMEOUT} minutes" 133 | 134 | RESTART_TASK=1 135 | fi 136 | fi 137 | 138 | if [[ ${RESTART_TASK} -eq 1 ]]; then 139 | echo "Requesting restart of ${JOB_ID}.${TASK_ID}" 140 | if ! qmod -rj ${JOB_ID}.${TASK_ID}; then 141 | # Sometimes qmod fails with "invalid queue or job", and the failure 142 | # is persistent. Re-exec (strangely) seems to resolve it, where 143 | # simply retrying does not. 144 | exec $0 $* 145 | fi 146 | fi 147 | done 148 | 149 | echo "Sleeping ${MONITOR_INTERVAL} minute(s)" 150 | sleep ${MONITOR_INTERVAL}m 151 | done 152 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------