├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── examples ├── ConvertPairedFastQToUnmappedBamWf_170107 │ ├── ConvertPairedFastQToUnmappedBamWf_170107.inputs.json │ ├── ConvertPairedFastQToUnmappedBamWf_170107.wdl │ └── PairedFastQsToUnmappedBAM.cwl ├── PublicPairedSingleSampleWf_160927 │ ├── ApplyBQSR.cwl │ ├── BaseRecalibrator.cwl │ ├── ConvertToCram.cwl │ ├── CreateSequenceGroupingTSV.cwl │ ├── GatherBamFiles.cwl │ ├── GatherBqsrReports.cwl │ ├── GatherVCFs.cwl │ ├── GetBwaVersion.cwl │ ├── HaplotypeCaller.cwl │ ├── MarkDuplicates.cwl │ ├── MergeBamAlignment.cwl │ ├── PairedEndSingleSampleWorkflow.cwl │ ├── SamToFastqAndBwaMem.cwl │ ├── SortAndFixTags.cwl │ ├── broad_pipelines │ │ ├── PublicPairedSingleSampleWf_160927.inputs.json │ │ ├── PublicPairedSingleSampleWf_160927.md │ │ ├── PublicPairedSingleSampleWf_160927.options.json │ │ ├── PublicPairedSingleSampleWf_160927.wdl │ │ ├── README.md │ │ └── archive │ │ │ ├── PublicPairedSingleSampleWf_160624.wdl │ │ │ ├── PublicPairedSingleSampleWf_160714.json │ │ │ ├── PublicPairedSingleSampleWf_160714.wdl │ │ │ ├── PublicPairedSingleSampleWf_160720.inputs.json │ │ │ ├── PublicPairedSingleSampleWf_160720.options.json │ │ │ └── PublicPairedSingleSampleWf_160720.wdl │ └── new.input.json ├── RevertBamToUnmappedRGBamsWf_170107 │ ├── RevertBamToUnmappedRGBams.cwl │ ├── RevertBamToUnmappedRGBamsWf.cwl │ ├── RevertBamToUnmappedRGBamsWf_170107.inputs.json │ └── RevertBamToUnmappedRGBamsWf_170107.wdl ├── RevertRGBamsToPairedFastQsWf_170107 │ ├── RevertBAMToPairedFASTQ.cwl │ ├── RevertRGBamsToPairedFastQsWf.cwl │ ├── RevertRGBamsToPairedFastQsWf_170107.inputs.json │ └── RevertRGBamsToPairedFastQsWf_170107.wdl ├── ValidateBamsWf_170107 │ ├── ValidateBAM.cwl │ ├── ValidateBamsWf.cwl │ ├── ValidateBamsWf_170107.inputs.json │ └── ValidateBamsWf_170107.wdl ├── gatk_wrappers │ ├── ASEReadCounter.cwl │ ├── AnalyzeCovariates.cwl │ ├── ApplyRecalibration.cwl │ ├── BaseRecalibrator.cwl │ ├── CalculateGenotypePosteriors.cwl │ ├── CallableLoci.cwl │ ├── CatVariants.cwl │ ├── CheckPileup.cwl │ ├── ClipReads.cwl │ ├── CombineGVCFs.cwl │ ├── CombineVariants.cwl │ ├── CompareCallableLoci.cwl │ ├── ContEst.cwl │ ├── CountBases.cwl │ ├── CountIntervals.cwl │ ├── CountLoci.cwl │ ├── CountMales.cwl │ ├── CountRODs.cwl │ ├── CountRODsByRef.cwl │ ├── CountReadEvents.cwl │ ├── CountReads.cwl │ ├── CountTerminusEvent.cwl │ ├── DepthOfCoverage.cwl │ ├── DiagnoseTargets.cwl │ ├── DiffObjects.cwl │ ├── ErrorRatePerCycle.cwl │ ├── FastaStats.cwl │ ├── FindCoveredIntervals.cwl │ ├── FlagStat.cwl │ ├── GATKPaperGenotyper.cwl │ ├── GCContentByInterval.cwl │ ├── GenotypeConcordance.cwl │ ├── GenotypeGVCFs.cwl │ ├── HaplotypeCaller.cwl │ ├── HaplotypeResolver.cwl │ ├── IndelRealigner.cwl │ ├── LeftAlignAndTrimVariants.cwl │ ├── LeftAlignIndels.cwl │ ├── MuTect2.cwl │ ├── PhaseByTransmission.cwl │ ├── Pileup.cwl │ ├── PrintRODs.cwl │ ├── PrintReads.cwl │ ├── QualifyMissingIntervals.cwl │ ├── RandomlySplitVariants.cwl │ ├── ReadBackedPhasing.cwl │ ├── ReadClippingStats.cwl │ ├── ReadGroupProperties.cwl │ ├── ReadLengthDistribution.cwl │ ├── RealignerTargetCreator.cwl │ ├── RegenotypeVariants.cwl │ ├── SelectHeaders.cwl │ ├── SelectVariants.cwl │ ├── SimulateReadsForVariants.cwl │ ├── SplitNCigarReads.cwl │ ├── SplitSamFile.cwl │ ├── UnifiedGenotyper.cwl │ ├── ValidateVariants.cwl │ ├── ValidationSiteSelector.cwl │ ├── VariantAnnotator.cwl │ ├── VariantEval.cwl │ ├── VariantFiltration.cwl │ ├── VariantRecalibrator.cwl │ ├── VariantsToAllelicPrimitives.cwl │ ├── VariantsToBinaryPed.cwl │ ├── VariantsToTable.cwl │ ├── VariantsToVCF.cwl │ └── WDLTasks_3.6 │ │ ├── ASEReadCounter_3.6.wdl │ │ ├── AnalyzeCovariates_3.6.wdl │ │ ├── ApplyRecalibration_3.6.wdl │ │ ├── BaseRecalibrator_3.6.wdl │ │ ├── CalculateGenotypePosteriors_3.6.wdl │ │ ├── CallableLoci_3.6.wdl │ │ ├── CatVariants_3.6.wdl │ │ ├── CheckPileup_3.6.wdl │ │ ├── ClipReads_3.6.wdl │ │ ├── CombineGVCFs_3.6.wdl │ │ ├── CombineVariants_3.6.wdl │ │ ├── CompareCallableLoci_3.6.wdl │ │ ├── ContEst_3.6.wdl │ │ ├── CountBases_3.6.wdl │ │ ├── CountIntervals_3.6.wdl │ │ ├── CountLoci_3.6.wdl │ │ ├── CountMales_3.6.wdl │ │ ├── CountRODsByRef_3.6.wdl │ │ ├── CountRODs_3.6.wdl │ │ ├── CountReadEvents_3.6.wdl │ │ ├── CountReads_3.6.wdl │ │ ├── CountTerminusEvent_3.6.wdl │ │ ├── DepthOfCoverage_3.6.wdl │ │ ├── DiagnoseTargets_3.6.wdl │ │ ├── DiffObjects_3.6.wdl │ │ ├── ErrorRatePerCycle_3.6.wdl │ │ ├── FastaStats_3.6.wdl │ │ ├── FindCoveredIntervals_3.6.wdl │ │ ├── FlagStat_3.6.wdl │ │ ├── GATKPaperGenotyper_3.6.wdl │ │ ├── GCContentByInterval_3.6.wdl │ │ ├── GenotypeConcordance_3.6.wdl │ │ ├── GenotypeGVCFs_3.6.wdl │ │ ├── HaplotypeCaller_3.6.wdl │ │ ├── HaplotypeResolver_3.6.wdl │ │ ├── IndelRealigner_3.6.wdl │ │ ├── LeftAlignAndTrimVariants_3.6.wdl │ │ ├── LeftAlignIndels_3.6.wdl │ │ ├── MuTect2_3.6.wdl │ │ ├── PhaseByTransmission_3.6.wdl │ │ ├── Pileup_3.6.wdl │ │ ├── PrintRODs_3.6.wdl │ │ ├── PrintReads_3.6.wdl │ │ ├── QualifyMissingIntervals_3.6.wdl │ │ ├── RandomlySplitVariants_3.6.wdl │ │ ├── ReadBackedPhasing_3.6.wdl │ │ ├── ReadClippingStats_3.6.wdl │ │ ├── ReadGroupProperties_3.6.wdl │ │ ├── ReadLengthDistribution_3.6.wdl │ │ ├── RealignerTargetCreator_3.6.wdl │ │ ├── RegenotypeVariants_3.6.wdl │ │ ├── SelectHeaders_3.6.wdl │ │ ├── SelectVariants_3.6.wdl │ │ ├── SimulateReadsForVariants_3.6.wdl │ │ ├── SplitNCigarReads_3.6.wdl │ │ ├── SplitSamFile_3.6.wdl │ │ ├── UnifiedGenotyper_3.6.wdl │ │ ├── ValidateVariants_3.6.wdl │ │ ├── ValidationSiteSelector_3.6.wdl │ │ ├── VariantAnnotator_3.6.wdl │ │ ├── VariantEval_3.6.wdl │ │ ├── VariantFiltration_3.6.wdl │ │ ├── VariantRecalibrator_3.6.wdl │ │ ├── VariantsToAllelicPrimitives_3.6.wdl │ │ ├── VariantsToBinaryPed_3.6.wdl │ │ ├── VariantsToTable_3.6.wdl │ │ └── VariantsToVCF_3.6.wdl └── jointCallingGenotypes │ ├── automatedCWL │ ├── GenotypeGVCFs.cwl │ ├── HaplotypeCallerERC.cwl │ ├── jointCallingGenotypes.cwl │ └── read_tsv.cwl │ ├── cwlJointCallingGenotypesJob.json │ ├── instructions.md │ ├── instructions.txt │ ├── jointCallingGenotypes.wdl │ └── manualCWL │ ├── GenotypeGVCFs.cwl │ ├── HaplotypeCallerERC.cwl │ ├── manualJointCallingGenotypes.cwl │ └── read_tsv.cwl ├── setup.cfg ├── setup.py ├── tests ├── primitive_workflows_wld.py ├── test-data │ ├── ctask.wdl │ ├── scatter.wdl │ ├── task.cwl │ ├── task.wdl │ ├── wc.cwl │ └── wftask.json └── test.py ├── version.py └── wdl2cwl ├── __init__.py ├── expression-tools └── read_tsv.cwl ├── main.py └── templates └── cwltool.j2 /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include wdl2cwl/expression-tools/* 2 | include wdl2cwl/templates/* -------------------------------------------------------------------------------- /examples/ConvertPairedFastQToUnmappedBamWf_170107/ConvertPairedFastQToUnmappedBamWf_170107.inputs.json: -------------------------------------------------------------------------------- 1 | { 2 | "ConvertPairedFastQsToUnmappedBamWf.readgroup_list": [ 3 | "NA12878_A", "NA12878_B", "NA12878_C" 4 | ], 5 | "ConvertPairedFastQsToUnmappedBamWf.metadata": { 6 | "NA12878_A": [ 7 | "NA12878", "Solexa-NA12878", "H06HDADXX130110.2.ATCACGAT", "2016-09-01T02:00:00+0200", "illumina", "BI" 8 | ], 9 | "NA12878_B": [ 10 | "NA12878", "Solexa-NA12878", "H06HDADXX130110.1.ATCACGAT", "2016-09-01T02:00:00+0200", "illumina", "BI" 11 | ], 12 | "NA12878_C": [ 13 | "NA12878", "Solexa-NA12878", "H06JUADXX130110.1.ATCACGAT", "2016-09-01T02:00:00+0200", "illumina", "BI" 14 | ] 15 | }, 16 | "ConvertPairedFastQsToUnmappedBamWf.fastq_pairs": { 17 | "NA12878_A": [ 18 | "gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.1.ATCACGAT.20k_reads_1.fastq", 19 | "gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.1.ATCACGAT.20k_reads_2.fastq" 20 | ], 21 | "NA12878_B": [ 22 | "gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.2.ATCACGAT.20k_reads_1.fastq", 23 | "gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.2.ATCACGAT.20k_reads_2.fastq" 24 | ], 25 | "NA12878_C": [ 26 | "gs://gatk-test-data/wgs_fastq/NA12878_20k/H06JUADXX130110.1.ATCACGAT.20k_reads_1.fastq", 27 | "gs://gatk-test-data/wgs_fastq/NA12878_20k/H06JUADXX130110.1.ATCACGAT.20k_reads_2.fastq" 28 | ] 29 | }, 30 | "ConvertPairedFastQsToUnmappedBamWf.PairedFastQsToUnmappedBAM.mem_size": "1 GB", 31 | "ConvertPairedFastQsToUnmappedBamWf.PairedFastQsToUnmappedBAM.disk_size": 200 32 | } 33 | -------------------------------------------------------------------------------- /examples/ConvertPairedFastQToUnmappedBamWf_170107/PairedFastQsToUnmappedBAM.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "baseCommand": [], 6 | "cwlVersion": "v1.0", 7 | "class": "CommandLineTool", 8 | "requirements": [ 9 | { 10 | "class": "ShellCommandRequirement" 11 | }, 12 | { 13 | "class": "InlineJavascriptRequirement" 14 | }, 15 | { 16 | "class": "DockerRequirement", 17 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282" 18 | }, 19 | { 20 | "class": "ResourceRequirement", 21 | "ramMin": "mem_size" 22 | } 23 | ], 24 | "id": "PairedFastQsToUnmappedBAM", 25 | "arguments": [ 26 | { 27 | "valueFrom": " java -Xmx3000m -jar /usr/gitc/picard.jar \\ FastqToSam \\ FASTQ=$(inputs.fastq_1.path) \\ FASTQ2=$(inputs.fastq_2.path) \\ OUTPUT=$(inputs.readgroup_name).bam \\ READ_GROUP_NAME=$(inputs.readgroup_name) \\ SAMPLE_NAME=$(inputs.sample_name) \\ LIBRARY_NAME=$(inputs.library_name) \\ PLATFORM_UNIT=$(inputs.platform_unit) \\ RUN_DATE=$(inputs.run_date) \\ PLATFORM=$(inputs.platform_name) \\ SEQUENCING_CENTER=$(inputs.sequencing_center) ", 28 | "shellQuote": false 29 | } 30 | ], 31 | "outputs": [ 32 | { 33 | "type": "File", 34 | "id": "output_bam", 35 | "outputBinding": { 36 | "glob": "$(inputs.readgroup_name).bam" 37 | } 38 | } 39 | ], 40 | "inputs": [ 41 | { 42 | "type": "File", 43 | "id": "fastq_1" 44 | }, 45 | { 46 | "type": "File", 47 | "id": "fastq_2" 48 | }, 49 | { 50 | "type": "string", 51 | "id": "readgroup_name" 52 | }, 53 | { 54 | "type": "string", 55 | "id": "sample_name" 56 | }, 57 | { 58 | "type": "string", 59 | "id": "library_name" 60 | }, 61 | { 62 | "type": "string", 63 | "id": "platform_unit" 64 | }, 65 | { 66 | "type": "string", 67 | "id": "run_date" 68 | }, 69 | { 70 | "type": "string", 71 | "id": "platform_name" 72 | }, 73 | { 74 | "type": "string", 75 | "id": "sequencing_center" 76 | }, 77 | { 78 | "type": "int", 79 | "id": "disk_size" 80 | }, 81 | { 82 | "type": "string", 83 | "id": "mem_size" 84 | } 85 | ] 86 | } -------------------------------------------------------------------------------- /examples/PublicPairedSingleSampleWf_160927/ConvertToCram.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "arguments": [ 6 | { 7 | "valueFrom": " samtools view -C -T $(inputs.ref_fasta.path) $(inputs.input_bam.path) | \\ tee $(inputs.output_basename).cram | \\ md5sum > $(inputs.output_basename).cram.md5 # Create REF_CACHE. Used when indexing a CRAM seq_cache_populate.pl -root ./ref/cache $(inputs.ref_fasta.path) export REF_PATH=: export REF_CACHE=./ref/cache/%2s/%2s/%s samtools index $(inputs.output_basename).cram mv $(inputs.output_basename).cram.crai $(inputs.output_basename).crai ", 8 | "shellQuote": false 9 | } 10 | ], 11 | "baseCommand": [], 12 | "id": "ConvertToCram", 13 | "outputs": [ 14 | { 15 | "outputBinding": { 16 | "glob": "$(inputs.output_basename).cram" 17 | }, 18 | "id": "output_cram", 19 | "type": "File" 20 | }, 21 | { 22 | "outputBinding": { 23 | "glob": "$(inputs.output_basename).crai" 24 | }, 25 | "id": "output_cram_index", 26 | "type": "File" 27 | }, 28 | { 29 | "outputBinding": { 30 | "glob": "$(inputs.output_basename).cram.md5" 31 | }, 32 | "id": "output_cram_md5", 33 | "type": "File" 34 | } 35 | ], 36 | "requirements": [ 37 | { 38 | "class": "ShellCommandRequirement" 39 | }, 40 | { 41 | "class": "InlineJavascriptRequirement" 42 | }, 43 | { 44 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282", 45 | "class": "DockerRequirement" 46 | }, 47 | { 48 | "ramMin": "3 GB", 49 | "class": "ResourceRequirement" 50 | } 51 | ], 52 | "inputs": [ 53 | { 54 | "id": "input_bam", 55 | "type": "File" 56 | }, 57 | { 58 | "id": "ref_fasta", 59 | "type": "File" 60 | }, 61 | { 62 | "id": "ref_fasta_index", 63 | "type": "File" 64 | }, 65 | { 66 | "id": "output_basename", 67 | "type": "string" 68 | }, 69 | { 70 | "id": "disk_size", 71 | "type": "int" 72 | }, 73 | { 74 | "id": "preemptible_tries", 75 | "type": "int" 76 | } 77 | ], 78 | "class": "CommandLineTool", 79 | "cwlVersion": "v1.0" 80 | } -------------------------------------------------------------------------------- /examples/PublicPairedSingleSampleWf_160927/CreateSequenceGroupingTSV.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "arguments": [ 6 | { 7 | "valueFrom": " python <&1 | \\ grep -e '^Version' | \\ sed 's/Version: //' ", 8 | "shellQuote": false 9 | } 10 | ], 11 | "baseCommand": [], 12 | "id": "GetBwaVersion", 13 | "outputs": [ 14 | { 15 | "outputBinding": { 16 | "glob": "self[0].contents", 17 | "loadContents": true 18 | }, 19 | "id": "version", 20 | "type": "string" 21 | } 22 | ], 23 | "requirements": [ 24 | { 25 | "class": "ShellCommandRequirement" 26 | }, 27 | { 28 | "class": "InlineJavascriptRequirement" 29 | }, 30 | { 31 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282", 32 | "class": "DockerRequirement" 33 | }, 34 | { 35 | "ramMin": "1 GB", 36 | "class": "ResourceRequirement" 37 | } 38 | ], 39 | "stdout": "__stdout", 40 | "inputs": [], 41 | "class": "CommandLineTool", 42 | "cwlVersion": "v1.0" 43 | } -------------------------------------------------------------------------------- /examples/PublicPairedSingleSampleWf_160927/HaplotypeCaller.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "arguments": [ 6 | { 7 | "valueFrom": " java -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx8000m \\ -jar /usr/gitc/GATK35.jar \\ -T HaplotypeCaller \\ -R $(inputs.ref_fasta.path) \\ -o $(inputs.gvcf_basename).vcf.gz \\ -I $(inputs.input_bam.path) \\ -L $(inputs.interval_list.path) \\ -ERC GVCF \\ --max_alternate_alleles 3 \\ -variant_index_parameter 128000 \\ -variant_index_type LINEAR \\ -contamination $(inputs.contamination) \\ --read_filter OverclippedRead ", 8 | "shellQuote": false 9 | } 10 | ], 11 | "baseCommand": [], 12 | "id": "HaplotypeCaller", 13 | "outputs": [ 14 | { 15 | "outputBinding": { 16 | "glob": "$(inputs.gvcf_basename).vcf.gz" 17 | }, 18 | "id": "output_gvcf", 19 | "type": "File" 20 | }, 21 | { 22 | "outputBinding": { 23 | "glob": "$(inputs.gvcf_basename).vcf.gz.tbi" 24 | }, 25 | "id": "output_gvcf_index", 26 | "type": "File" 27 | } 28 | ], 29 | "requirements": [ 30 | { 31 | "class": "ShellCommandRequirement" 32 | }, 33 | { 34 | "class": "InlineJavascriptRequirement" 35 | }, 36 | { 37 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282", 38 | "class": "DockerRequirement" 39 | }, 40 | { 41 | "ramMin": "10 GB", 42 | "class": "ResourceRequirement" 43 | } 44 | ], 45 | "inputs": [ 46 | { 47 | "id": "input_bam", 48 | "type": "File" 49 | }, 50 | { 51 | "id": "input_bam_index", 52 | "type": "File" 53 | }, 54 | { 55 | "id": "interval_list", 56 | "type": "File" 57 | }, 58 | { 59 | "id": "gvcf_basename", 60 | "type": "string" 61 | }, 62 | { 63 | "id": "ref_dict", 64 | "type": "File" 65 | }, 66 | { 67 | "id": "ref_fasta", 68 | "type": "File" 69 | }, 70 | { 71 | "id": "ref_fasta_index", 72 | "type": "File" 73 | }, 74 | { 75 | "id": "contamination", 76 | "type": "float?" 77 | }, 78 | { 79 | "id": "disk_size", 80 | "type": "int" 81 | }, 82 | { 83 | "id": "preemptible_tries", 84 | "type": "int" 85 | } 86 | ], 87 | "class": "CommandLineTool", 88 | "cwlVersion": "v1.0" 89 | } -------------------------------------------------------------------------------- /examples/PublicPairedSingleSampleWf_160927/MarkDuplicates.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "arguments": [ 6 | { 7 | "valueFrom": "${ var input_bams_separated = ''; for (var i=0; i.wdl` 9 | This WDL pipeline implements data pre-processing and initial variant 10 | calling (GVCF generation) according to the GATK Best Practices for 11 | germline SNP and Indel discovery in human whole-genome sequencing (WGS) 12 | data. 13 | 14 | -------------------------------------------------------------------------------- /examples/PublicPairedSingleSampleWf_160927/broad_pipelines/archive/PublicPairedSingleSampleWf_160720.options.json: -------------------------------------------------------------------------------- 1 | { 2 | "read_from_cache":false, 3 | "defaultRuntimeOptions": { 4 | "zones": "us-central1-b us-central1-c" 5 | } 6 | } -------------------------------------------------------------------------------- /examples/RevertBamToUnmappedRGBamsWf_170107/RevertBamToUnmappedRGBams.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "requirements": [ 6 | { 7 | "class": "ShellCommandRequirement" 8 | }, 9 | { 10 | "class": "InlineJavascriptRequirement" 11 | } 12 | ], 13 | "inputs": [ 14 | { 15 | "type": "File", 16 | "id": "input_bam" 17 | }, 18 | { 19 | "type": "string", 20 | "id": "output_dir" 21 | }, 22 | { 23 | "type": "float?", 24 | "id": "max_discard_pct" 25 | }, 26 | { 27 | "type": "int", 28 | "id": "disk_size" 29 | }, 30 | { 31 | "type": "string", 32 | "id": "mem_size" 33 | } 34 | ], 35 | "id": "RevertBamToUnmappedRGBams", 36 | "baseCommand": [], 37 | "arguments": [ 38 | { 39 | "valueFrom": " java -Xmx1000m -jar /usr/gitc/picard.jar \\ RevertSam \\ INPUT=$(inputs.input_bam.path) \\ O=$(inputs.output_dir) \\ OUTPUT_BY_READGROUP=true \\ VALIDATION_STRINGENCY=LENIENT \\ SANITIZE=TRUE \\ MAX_DISCARD_FRACTION=$(inputs.max_discard_pct) \\ ATTRIBUTE_TO_CLEAR=FT \\ SORT_ORDER=queryname ", 40 | "shellQuote": false 41 | } 42 | ], 43 | "cwlVersion": "v1.0", 44 | "class": "CommandLineTool", 45 | "outputs": [ 46 | { 47 | "type": { 48 | "type": "array", 49 | "items": "File" 50 | }, 51 | "id": "unmapped_bams", 52 | "outputBinding": { 53 | "glob": [ 54 | "*.bam" 55 | ] 56 | } 57 | } 58 | ] 59 | } -------------------------------------------------------------------------------- /examples/RevertBamToUnmappedRGBamsWf_170107/RevertBamToUnmappedRGBamsWf.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "requirements": [ 6 | { 7 | "class": "InlineJavascriptRequirement" 8 | } 9 | ], 10 | "inputs": [ 11 | { 12 | "type": "File", 13 | "id": "input_bam" 14 | }, 15 | { 16 | "type": "File", 17 | "id": "ref_fasta" 18 | }, 19 | { 20 | "type": "File", 21 | "id": "ref_fasta_index" 22 | }, 23 | { 24 | "type": "string", 25 | "id": "output_dir" 26 | }, 27 | { 28 | "type": "float?", 29 | "id": "RevertBamToUnmappedRGBams_max_discard_pct" 30 | }, 31 | { 32 | "type": "int", 33 | "id": "RevertBamToUnmappedRGBams_disk_size" 34 | }, 35 | { 36 | "type": "string", 37 | "id": "RevertBamToUnmappedRGBams_mem_size" 38 | } 39 | ], 40 | "id": "RevertBamToUnmappedRGBamsWf", 41 | "steps": [ 42 | { 43 | "run": "RevertBamToUnmappedRGBams.cwl", 44 | "in": [ 45 | { 46 | "source": "input_bam", 47 | "id": "input_bam" 48 | }, 49 | { 50 | "source": "output_dir", 51 | "id": "output_dir" 52 | }, 53 | { 54 | "source": "RevertBamToUnmappedRGBams_max_discard_pct", 55 | "id": "max_discard_pct" 56 | }, 57 | { 58 | "source": "RevertBamToUnmappedRGBams_disk_size", 59 | "id": "disk_size" 60 | }, 61 | { 62 | "source": "RevertBamToUnmappedRGBams_mem_size", 63 | "id": "mem_size" 64 | } 65 | ], 66 | "id": "RevertBamToUnmappedRGBams", 67 | "out": [ 68 | { 69 | "id": "unmapped_bams" 70 | } 71 | ] 72 | } 73 | ], 74 | "cwlVersion": "v1.0", 75 | "class": "Workflow", 76 | "outputs": [ 77 | { 78 | "type": { 79 | "type": "array", 80 | "items": "File" 81 | }, 82 | "outputSource": "RevertBamToUnmappedRGBams/unmapped_bams", 83 | "id": "unmapped_bams_output" 84 | } 85 | ] 86 | } -------------------------------------------------------------------------------- /examples/RevertBamToUnmappedRGBamsWf_170107/RevertBamToUnmappedRGBamsWf_170107.inputs.json: -------------------------------------------------------------------------------- 1 | { 2 | 3 | "RevertBamToUnmappedRGBamsWf.ref_fasta": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta", 4 | "RevertBamToUnmappedRGBamsWf.ref_fasta_index": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.fai", 5 | 6 | "RevertBamToUnmappedRGBamsWf.input_bam": "gs://gatk-test-data/wgs_bam/NA12878_20k_b37/NA12878.bam", 7 | 8 | "RevertBamToUnmappedRGBamsWf.output_dir": ".", 9 | 10 | "RevertBamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.max_discard_pct": 0.01, 11 | 12 | "RevertBamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.disk_size": 10, 13 | "RevertBamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.mem_size": "1 GB", 14 | "RevertBamToUnmappedRGBamsWf.SortBamByQueryname.disk_size": 10, 15 | "RevertBamToUnmappedRGBamsWf.SortBamByQueryname.mem_size": "3500 MB" 16 | } 17 | -------------------------------------------------------------------------------- /examples/RevertBamToUnmappedRGBamsWf_170107/RevertBamToUnmappedRGBamsWf_170107.wdl: -------------------------------------------------------------------------------- 1 | ## Copyright Broad Institute, 2017 2 | ## 3 | ## This WDL reverts a SAM or BAM file to uBAMs, one per readgroup 4 | ## 5 | ## Requirements/expectations : 6 | ## - Pair-end sequencing data in SAM or BAM format 7 | ## - One or more read groups 8 | ## 9 | ## Outputs : 10 | ## - Set of unmapped BAMs, one per read group, with reads sorted by queryname 11 | ## 12 | ## Cromwell version support 13 | ## - Successfully tested on v24 14 | ## - Does not work on versions < v23 due to output syntax 15 | ## 16 | ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. 17 | ## For program versions, see docker containers. 18 | ## 19 | ## LICENSING : 20 | ## This script is released under the WDL source code license (BSD-3) (see LICENSE in 21 | ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may 22 | ## be subject to different licenses. Users are responsible for checking that they are 23 | ## authorized to run all programs before running this script. Please see the docker 24 | ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed 25 | ## licensing information pertaining to the included programs. 26 | 27 | # TASK DEFINITIONS 28 | 29 | # Revert a BAM to uBAMs, one per readgroup 30 | task RevertBamToUnmappedRGBams { 31 | File input_bam 32 | String output_dir 33 | Float? max_discard_pct 34 | Int disk_size 35 | String mem_size 36 | 37 | command { 38 | java -Xmx1000m -jar /usr/gitc/picard.jar \ 39 | RevertSam \ 40 | INPUT=${input_bam} \ 41 | O=${output_dir} \ 42 | OUTPUT_BY_READGROUP=true \ 43 | VALIDATION_STRINGENCY=LENIENT \ 44 | SANITIZE=TRUE \ 45 | MAX_DISCARD_FRACTION=${max_discard_pct} \ 46 | ATTRIBUTE_TO_CLEAR=FT \ 47 | SORT_ORDER=queryname 48 | } 49 | output { 50 | Array[File] unmapped_bams = glob("*.bam") 51 | } 52 | } 53 | 54 | # WORKFLOW DEFINITION 55 | workflow RevertBamToUnmappedRGBamsWf { 56 | File input_bam 57 | File ref_fasta 58 | File ref_fasta_index 59 | String output_dir 60 | 61 | # Revert inputs to unmapped 62 | call RevertBamToUnmappedRGBams { 63 | input: 64 | input_bam = input_bam, 65 | output_dir = output_dir 66 | } 67 | 68 | # Outputs that will be retained when execution is complete 69 | output { 70 | Array[File] unmapped_bams_output=RevertBamToUnmappedRGBams.unmapped_bams 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /examples/RevertRGBamsToPairedFastQsWf_170107/RevertBAMToPairedFASTQ.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "bam_file", 8 | "type": "File" 9 | }, 10 | { 11 | "id": "output_basename", 12 | "type": "string" 13 | }, 14 | { 15 | "id": "disk_size", 16 | "type": "int" 17 | }, 18 | { 19 | "id": "mem_size", 20 | "type": "string" 21 | } 22 | ], 23 | "baseCommand": [], 24 | "requirements": [ 25 | { 26 | "class": "ShellCommandRequirement" 27 | }, 28 | { 29 | "class": "InlineJavascriptRequirement" 30 | }, 31 | { 32 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018", 33 | "class": "DockerRequirement" 34 | }, 35 | { 36 | "ramMin": "mem_size", 37 | "class": "ResourceRequirement" 38 | } 39 | ], 40 | "id": "RevertBAMToPairedFASTQ", 41 | "arguments": [ 42 | { 43 | "valueFrom": " java -Xmx3000m -jar /usr/gitc/picard.jar \\ SamToFastq \\ I=$(inputs.bam_file.path) \\ FASTQ=$(inputs.output_basename)_1.fastq \\ SECOND_END_FASTQ=$(inputs.output_basename)_2.fastq \\ UNPAIRED_FASTQ=$(inputs.output_basename)_unp.fastq \\ INCLUDE_NON_PRIMARY_ALIGNMENTS=true \\ INCLUDE_NON_PF_READS=true ", 44 | "shellQuote": false 45 | } 46 | ], 47 | "class": "CommandLineTool", 48 | "outputs": [ 49 | { 50 | "id": "output_fastqs", 51 | "outputBinding": { 52 | "glob": [ 53 | "*.fastq" 54 | ] 55 | }, 56 | "type": "File[]" 57 | } 58 | ], 59 | "cwlVersion": "v1.0" 60 | } -------------------------------------------------------------------------------- /examples/RevertRGBamsToPairedFastQsWf_170107/RevertRGBamsToPairedFastQsWf.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "bam_list", 8 | "type": "File[]" 9 | }, 10 | { 11 | "id": "sub_strip_path", 12 | "default": "$(\"gs://.*/\")", 13 | "type": "string" 14 | }, 15 | { 16 | "id": "sub_strip_suffix", 17 | "default": "$(\".bam$\")", 18 | "type": "string" 19 | } 20 | ], 21 | "steps": [ 22 | { 23 | "scatterMethod": "dotproduct", 24 | "id": "RevertBAMToPairedFASTQ", 25 | "in": [ 26 | { 27 | "id": "bam_file", 28 | "valueFrom": "$(self)", 29 | "source": "bam_list" 30 | }, 31 | { 32 | "id": "output_basename", 33 | "valueFrom": "$(inputs.self.replace(sub_strip_path, \"\").replace(sub_strip_suffix, \"\"))", 34 | "source": "bam_list" 35 | } 36 | ], 37 | "out": [ 38 | { 39 | "id": "output_fastqs" 40 | } 41 | ], 42 | "run": "RevertBAMToPairedFASTQ.cwl", 43 | "scatter": [ 44 | "bam_file", 45 | "output_basename" 46 | ] 47 | } 48 | ], 49 | "requirements": [ 50 | { 51 | "class": "InlineJavascriptRequirement" 52 | }, 53 | { 54 | "class": "ScatterFeatureRequirement" 55 | }, 56 | { 57 | "class": "StepInputExpressionRequirement" 58 | } 59 | ], 60 | "id": "RevertRGBamsToPairedFastQsWf", 61 | "class": "Workflow", 62 | "outputs": [ 63 | { 64 | "id": "output_fastqs_globs", 65 | "outputSource": "RevertBAMToPairedFASTQ/output_fastqs", 66 | "type": { 67 | "type": "array", 68 | "items": { 69 | "type": "array", 70 | "items": "File" 71 | } 72 | } 73 | } 74 | ], 75 | "cwlVersion": "v1.0" 76 | } -------------------------------------------------------------------------------- /examples/RevertRGBamsToPairedFastQsWf_170107/RevertRGBamsToPairedFastQsWf_170107.inputs.json: -------------------------------------------------------------------------------- 1 | { 2 | "RevertRGBamsToPairedFastQsWf.bam_list": [ 3 | "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", 4 | "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", 5 | "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" 6 | ], 7 | 8 | "RevertRGBamsToPairedFastQsWf.RevertBAMToPairedFASTQ.mem_size": "1 GB", 9 | "RevertRGBamsToPairedFastQsWf.RevertBAMToPairedFASTQ.disk_size": 200 10 | 11 | } 12 | -------------------------------------------------------------------------------- /examples/RevertRGBamsToPairedFastQsWf_170107/RevertRGBamsToPairedFastQsWf_170107.wdl: -------------------------------------------------------------------------------- 1 | ## Copyright Broad Institute, 2017 2 | ## 3 | ## This WDL reverts a set of single-readgroup BAMs to paired FASTQs 4 | ## 5 | ## Requirements/expectations: 6 | ## - List of valid BAM files 7 | ## - Max one readgroup per BAM files. If there are more, the distinctions will be lost! 8 | ## 9 | ## Outputs: 10 | ## - Sets of two FASTQ files of paired reads (*_1 and *_2) plus one FASTQ file of 11 | ## unpaired reads (*_unp) per input file. 12 | ## 13 | ## Cromwell version support 14 | ## - Successfully tested on v24 15 | ## - Does not work on versions < v23 due to output syntax 16 | ## 17 | ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. 18 | ## For program versions, see docker containers. 19 | ## 20 | ## LICENSING : 21 | ## This script is released under the WDL source code license (BSD-3) (see LICENSE in 22 | ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may 23 | ## be subject to different licenses. Users are responsible for checking that they are 24 | ## authorized to run all programs before running this script. Please see the docker 25 | ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed 26 | ## licensing information pertaining to the included programs. 27 | 28 | # TASK DEFINITIONS 29 | 30 | # Run SamToFASTQ to revert the bam 31 | task RevertBAMToPairedFASTQ { 32 | File bam_file 33 | String output_basename 34 | Int disk_size 35 | String mem_size 36 | 37 | command { 38 | java -Xmx3000m -jar /usr/gitc/picard.jar \ 39 | SamToFastq \ 40 | I=${bam_file} \ 41 | FASTQ=${output_basename}_1.fastq \ 42 | SECOND_END_FASTQ=${output_basename}_2.fastq \ 43 | UNPAIRED_FASTQ=${output_basename}_unp.fastq \ 44 | INCLUDE_NON_PRIMARY_ALIGNMENTS=true \ 45 | INCLUDE_NON_PF_READS=true 46 | } 47 | runtime { 48 | docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" 49 | memory: mem_size 50 | cpu: "1" 51 | disks: "local-disk " + disk_size + " HDD" 52 | } 53 | output { 54 | Array[File] output_fastqs = glob("*.fastq") 55 | } 56 | } 57 | 58 | # WORKFLOW DEFINITION 59 | workflow RevertRGBamsToPairedFastQsWf { 60 | Array[File] bam_list 61 | 62 | # Process input files in parallel 63 | scatter (input_bam in bam_list) { 64 | 65 | String sub_strip_path = "gs://.*/" 66 | String sub_strip_suffix = ".bam$" 67 | 68 | # Revert inputs to paired FASTQ 69 | call RevertBAMToPairedFASTQ { 70 | input: 71 | bam_file = input_bam, 72 | output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, ""), 73 | } 74 | } 75 | 76 | # Outputs that will be retained when execution is complete 77 | output { 78 | Array[Array[File]] output_fastqs_globs=RevertBAMToPairedFASTQ.output_fastqs 79 | } 80 | } 81 | 82 | -------------------------------------------------------------------------------- /examples/ValidateBamsWf_170107/ValidateBAM.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "type": "File", 8 | "id": "bam_file" 9 | }, 10 | { 11 | "type": "string", 12 | "id": "output_basename" 13 | }, 14 | { 15 | "type": "string", 16 | "id": "validation_mode" 17 | }, 18 | { 19 | "type": "int", 20 | "id": "disk_size" 21 | }, 22 | { 23 | "type": "string", 24 | "id": "mem_size" 25 | }, 26 | { 27 | "type": "string", 28 | "id": "output_name", 29 | "default": "$(\"${output_basename}_${validation_mode}.txt\")" 30 | } 31 | ], 32 | "cwlVersion": "v1.0", 33 | "outputs": [ 34 | { 35 | "outputBinding": { 36 | "glob": "$(inputs.output_name)" 37 | }, 38 | "type": "File", 39 | "id": "validation_report" 40 | } 41 | ], 42 | "arguments": [ 43 | { 44 | "shellQuote": false, 45 | "valueFrom": " java -Xmx3000m -jar /usr/gitc/picard.jar \\ ValidateSamFile \\ I=$(inputs.bam_file.path) \\ OUTPUT=$(inputs.output_name) \\ MODE=$(inputs.validation_mode) " 46 | } 47 | ], 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "class": "DockerRequirement", 59 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" 60 | }, 61 | { 62 | "class": "ResourceRequirement", 63 | "ramMin": "mem_size" 64 | } 65 | ], 66 | "id": "ValidateBAM" 67 | } -------------------------------------------------------------------------------- /examples/ValidateBamsWf_170107/ValidateBamsWf.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "type": "File[]", 8 | "id": "bam_list" 9 | }, 10 | { 11 | "type": "string", 12 | "id": "sub_strip_path", 13 | "default": "$(\"gs://.*/\")" 14 | }, 15 | { 16 | "type": "string", 17 | "id": "sub_strip_suffix", 18 | "default": "$(\".bam$\")" 19 | } 20 | ], 21 | "cwlVersion": "v1.0", 22 | "outputs": [ 23 | { 24 | "outputSource": "ValidateBAM/validation_report", 25 | "id": "validation_reports", 26 | "type": "File[]" 27 | } 28 | ], 29 | "steps": [ 30 | { 31 | "in": [ 32 | { 33 | "source": "bam_list", 34 | "id": "bam_file", 35 | "valueFrom": "$(self)" 36 | }, 37 | { 38 | "source": "bam_list", 39 | "id": "output_basename", 40 | "valueFrom": "$(inputs.self.replace(sub_strip_path, \"\").replace(sub_strip_suffix, \"\") + \".validation\")" 41 | } 42 | ], 43 | "run": "ValidateBAM.cwl", 44 | "scatter": [ 45 | "bam_file", 46 | "output_basename" 47 | ], 48 | "id": "ValidateBAM", 49 | "scatterMethod": "dotproduct", 50 | "out": [ 51 | { 52 | "id": "validation_report" 53 | } 54 | ] 55 | } 56 | ], 57 | "class": "Workflow", 58 | "requirements": [ 59 | { 60 | "class": "InlineJavascriptRequirement" 61 | }, 62 | { 63 | "class": "ScatterFeatureRequirement" 64 | }, 65 | { 66 | "class": "StepInputExpressionRequirement" 67 | } 68 | ], 69 | "id": "ValidateBamsWf" 70 | } -------------------------------------------------------------------------------- /examples/ValidateBamsWf_170107/ValidateBamsWf_170107.inputs.json: -------------------------------------------------------------------------------- 1 | { 2 | "ValidateBamsWf.bam_list": [ 3 | "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", 4 | "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", 5 | "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" 6 | ], 7 | 8 | "ValidateBamsWf.ValidateBAM.validation_mode": "SUMMARY", 9 | 10 | "ValidateBamsWf.ValidateBAM.mem_size": "1 GB", 11 | "ValidateBamsWf.ValidateBAM.disk_size": 200 12 | } 13 | -------------------------------------------------------------------------------- /examples/ValidateBamsWf_170107/ValidateBamsWf_170107.wdl: -------------------------------------------------------------------------------- 1 | ## Copyright Broad Institute, 2017 2 | ## 3 | ## This WDL validates a list of SAM/BAMs 4 | ## 5 | ## Requirements/expectations : 6 | ## - List of SAM or BAM files to validate 7 | ## - Explicit request of either SUMMARY or VERBOSE mode in inputs.json 8 | ## 9 | ## Outputs: 10 | ## - Set of .txt files containing the validation report, one per input file 11 | ## 12 | ## Cromwell version support 13 | ## - Successfully tested on v24 14 | ## - Does not work on versions < v23 due to output syntax 15 | ## 16 | ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. 17 | ## For program versions, see docker containers. 18 | ## 19 | ## LICENSING : 20 | ## This script is released under the WDL source code license (BSD-3) (see LICENSE in 21 | ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may 22 | ## be subject to different licenses. Users are responsible for checking that they are 23 | ## authorized to run all programs before running this script. Please see the docker 24 | ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed 25 | ## licensing information pertaining to the included programs. 26 | 27 | # TASK DEFINITIONS 28 | 29 | # Validate a SAM or BAM using Picard ValidateSamFile 30 | task ValidateBAM { 31 | File bam_file 32 | String output_basename 33 | String validation_mode 34 | Int disk_size 35 | String mem_size 36 | 37 | String output_name = "${output_basename}_${validation_mode}.txt" 38 | 39 | command { 40 | java -Xmx3000m -jar /usr/gitc/picard.jar \ 41 | ValidateSamFile \ 42 | I=${bam_file} \ 43 | OUTPUT=${output_name} \ 44 | MODE=${validation_mode} 45 | } 46 | runtime { 47 | docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" 48 | memory: mem_size 49 | cpu: "1" 50 | disks: "local-disk " + disk_size + " HDD" 51 | } 52 | output { 53 | File validation_report = "${output_name}" 54 | } 55 | } 56 | 57 | # WORKFLOW DEFINITION 58 | workflow ValidateBamsWf { 59 | Array[File] bam_list 60 | 61 | # Process the input files in parallel 62 | scatter (input_bam in bam_list) { 63 | 64 | String sub_strip_path = "gs://.*/" 65 | String sub_strip_suffix = ".bam$" 66 | 67 | # Run the validation 68 | call ValidateBAM { 69 | input: 70 | bam_file = input_bam, 71 | output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") + ".validation" 72 | } 73 | } 74 | 75 | # Outputs that will be retained when execution is complete 76 | output { 77 | Array[File] validation_reports = ValidateBAM.validation_report 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountBases.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | } 41 | ], 42 | "id": "CountBases", 43 | "baseCommand": [], 44 | "class": "CommandLineTool", 45 | "requirements": [ 46 | { 47 | "class": "ShellCommandRequirement" 48 | }, 49 | { 50 | "class": "InlineJavascriptRequirement" 51 | }, 52 | { 53 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 54 | "class": "DockerRequirement" 55 | } 56 | ], 57 | "outputs": [ 58 | { 59 | "id": "taskOut", 60 | "outputBinding": { 61 | "glob": "$(inputs.out)" 62 | }, 63 | "type": "string" 64 | } 65 | ], 66 | "arguments": [ 67 | { 68 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountBases \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t$(inputs.userString) \t", 69 | "shellQuote": false 70 | } 71 | ], 72 | "cwlVersion": "v1.0" 73 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountIntervals.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "check", 38 | "type": "string[]?", 39 | "doc": "Any number of RODs" 40 | }, 41 | { 42 | "id": "numOverlaps", 43 | "type": "int?", 44 | "doc": "Count all occurrences of X or more overlapping intervals; defaults to 2" 45 | }, 46 | { 47 | "id": "out", 48 | "type": "string?", 49 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 50 | } 51 | ], 52 | "id": "CountIntervals", 53 | "baseCommand": [], 54 | "class": "CommandLineTool", 55 | "requirements": [ 56 | { 57 | "class": "ShellCommandRequirement" 58 | }, 59 | { 60 | "class": "InlineJavascriptRequirement" 61 | }, 62 | { 63 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 64 | "class": "DockerRequirement" 65 | } 66 | ], 67 | "outputs": [ 68 | { 69 | "id": "taskOut", 70 | "outputBinding": { 71 | "glob": "$(inputs.out)" 72 | }, 73 | "type": "string" 74 | } 75 | ], 76 | "arguments": [ 77 | { 78 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountIntervals \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-check $(inputs.check) \\\t\t\t-no $(inputs.numOverlaps) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 79 | "shellQuote": false 80 | } 81 | ], 82 | "cwlVersion": "v1.0" 83 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountLoci.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "nctVal", 43 | "type": "int?" 44 | }, 45 | { 46 | "id": "ntVal", 47 | "type": "int?" 48 | }, 49 | { 50 | "id": "out", 51 | "type": "string?", 52 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 53 | } 54 | ], 55 | "id": "CountLoci", 56 | "baseCommand": [], 57 | "class": "CommandLineTool", 58 | "requirements": [ 59 | { 60 | "class": "ShellCommandRequirement" 61 | }, 62 | { 63 | "class": "InlineJavascriptRequirement" 64 | }, 65 | { 66 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 67 | "class": "DockerRequirement" 68 | } 69 | ], 70 | "outputs": [ 71 | { 72 | "id": "taskOut", 73 | "outputBinding": { 74 | "glob": "$(inputs.out)" 75 | }, 76 | "type": "string" 77 | } 78 | ], 79 | "arguments": [ 80 | { 81 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountLoci \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t$(\"-nct\" + inputs.nctVal) \\\t\t\t$(\"-nt\" + inputs.ntVal) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 82 | "shellQuote": false 83 | } 84 | ], 85 | "cwlVersion": "v1.0" 86 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountMales.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 45 | } 46 | ], 47 | "id": "CountMales", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountMales \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountRODsByRef.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "rod", 38 | "type": "string[]?", 39 | "doc": "Input VCF file(s)" 40 | }, 41 | { 42 | "id": "showSkipped", 43 | "type": "boolean?", 44 | "doc": "If true, this tool will print out the skipped locations" 45 | }, 46 | { 47 | "id": "verbose", 48 | "type": "boolean?", 49 | "doc": "If true, this tool will print out detailed information about the rods it finds and locations" 50 | } 51 | ], 52 | "id": "CountRODsByRef", 53 | "baseCommand": [], 54 | "class": "CommandLineTool", 55 | "requirements": [ 56 | { 57 | "class": "ShellCommandRequirement" 58 | }, 59 | { 60 | "class": "InlineJavascriptRequirement" 61 | }, 62 | { 63 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 64 | "class": "DockerRequirement" 65 | } 66 | ], 67 | "outputs": [ 68 | { 69 | "id": "taskOut", 70 | "outputBinding": { 71 | "glob": "$(inputs.out)" 72 | }, 73 | "type": "string" 74 | } 75 | ], 76 | "arguments": [ 77 | { 78 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountRODsByRef \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-rod $(inputs.rod) \\\t\t\t-s $(inputs.showSkipped) \\\t\t\t-v $(inputs.verbose) \\\t\t\t$(inputs.userString) \t", 79 | "shellQuote": false 80 | } 81 | ], 82 | "cwlVersion": "v1.0" 83 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountReadEvents.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 45 | } 46 | ], 47 | "id": "CountReadEvents", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountReadEvents \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountReads.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "nctVal", 43 | "type": "int?" 44 | } 45 | ], 46 | "id": "CountReads", 47 | "baseCommand": [], 48 | "class": "CommandLineTool", 49 | "requirements": [ 50 | { 51 | "class": "ShellCommandRequirement" 52 | }, 53 | { 54 | "class": "InlineJavascriptRequirement" 55 | }, 56 | { 57 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 58 | "class": "DockerRequirement" 59 | } 60 | ], 61 | "outputs": [ 62 | { 63 | "id": "taskOut", 64 | "outputBinding": { 65 | "glob": "$(inputs.out)" 66 | }, 67 | "type": "string" 68 | } 69 | ], 70 | "arguments": [ 71 | { 72 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountReads \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t$(\"-nct\" + inputs.nctVal) \\\t\t\t$(inputs.userString) \t", 73 | "shellQuote": false 74 | } 75 | ], 76 | "cwlVersion": "v1.0" 77 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/CountTerminusEvent.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 45 | } 46 | ], 47 | "id": "CountTerminusEvent", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T CountTerminusEvent \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/FastaStats.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "out", 38 | "type": "string?", 39 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 40 | } 41 | ], 42 | "id": "FastaStats", 43 | "baseCommand": [], 44 | "class": "CommandLineTool", 45 | "requirements": [ 46 | { 47 | "class": "ShellCommandRequirement" 48 | }, 49 | { 50 | "class": "InlineJavascriptRequirement" 51 | }, 52 | { 53 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 54 | "class": "DockerRequirement" 55 | } 56 | ], 57 | "outputs": [ 58 | { 59 | "id": "taskOut", 60 | "outputBinding": { 61 | "glob": "$(inputs.out)" 62 | }, 63 | "type": "string" 64 | } 65 | ], 66 | "arguments": [ 67 | { 68 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T FastaStats \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 69 | "shellQuote": false 70 | } 71 | ], 72 | "cwlVersion": "v1.0" 73 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/FlagStat.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "nctVal", 43 | "type": "int?" 44 | }, 45 | { 46 | "id": "out", 47 | "type": "string?", 48 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 49 | } 50 | ], 51 | "id": "FlagStat", 52 | "baseCommand": [], 53 | "class": "CommandLineTool", 54 | "requirements": [ 55 | { 56 | "class": "ShellCommandRequirement" 57 | }, 58 | { 59 | "class": "InlineJavascriptRequirement" 60 | }, 61 | { 62 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 63 | "class": "DockerRequirement" 64 | } 65 | ], 66 | "outputs": [ 67 | { 68 | "id": "taskOut", 69 | "outputBinding": { 70 | "glob": "$(inputs.out)" 71 | }, 72 | "type": "string" 73 | } 74 | ], 75 | "arguments": [ 76 | { 77 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T FlagStat \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t$(\"-nct\" + inputs.nctVal) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 78 | "shellQuote": false 79 | } 80 | ], 81 | "cwlVersion": "v1.0" 82 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/GCContentByInterval.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "out", 38 | "type": "string?", 39 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 40 | } 41 | ], 42 | "id": "GCContentByInterval", 43 | "baseCommand": [], 44 | "class": "CommandLineTool", 45 | "requirements": [ 46 | { 47 | "class": "ShellCommandRequirement" 48 | }, 49 | { 50 | "class": "InlineJavascriptRequirement" 51 | }, 52 | { 53 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 54 | "class": "DockerRequirement" 55 | } 56 | ], 57 | "outputs": [ 58 | { 59 | "id": "taskOut", 60 | "outputBinding": { 61 | "glob": "$(inputs.out)" 62 | }, 63 | "type": "string" 64 | } 65 | ], 66 | "arguments": [ 67 | { 68 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T GCContentByInterval \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 69 | "shellQuote": false 70 | } 71 | ], 72 | "cwlVersion": "v1.0" 73 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/LeftAlignIndels.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "Output bam" 45 | } 46 | ], 47 | "id": "LeftAlignIndels", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T LeftAlignIndels \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/PrintRODs.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "task_input", 33 | "type": "string", 34 | "doc": "The input ROD which should be printed out." 35 | }, 36 | { 37 | "id": "out", 38 | "type": "string?", 39 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 40 | } 41 | ], 42 | "id": "PrintRODs", 43 | "baseCommand": [], 44 | "class": "CommandLineTool", 45 | "requirements": [ 46 | { 47 | "class": "ShellCommandRequirement" 48 | }, 49 | { 50 | "class": "InlineJavascriptRequirement" 51 | }, 52 | { 53 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 54 | "class": "DockerRequirement" 55 | } 56 | ], 57 | "outputs": [ 58 | { 59 | "id": "taskOut", 60 | "outputBinding": { 61 | "glob": "$(inputs.out)" 62 | }, 63 | "type": "string" 64 | } 65 | ], 66 | "arguments": [ 67 | { 68 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T PrintRODs \\\t\t\t-R $(inputs.ref.path) \\\t\t\t-input $(inputs.task_input) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 69 | "shellQuote": false 70 | } 71 | ], 72 | "cwlVersion": "v1.0" 73 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/ReadGroupProperties.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "max_values_for_median", 38 | "type": "int?", 39 | "doc": "Calculate median from the first maxElementsForMedian values observed" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 45 | } 46 | ], 47 | "id": "ReadGroupProperties", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T ReadGroupProperties \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t-maxElementsForMedian $(inputs.max_values_for_median) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/ReadLengthDistribution.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "An output file created by the walker. Will overwrite contents if file exists" 45 | } 46 | ], 47 | "id": "ReadLengthDistribution", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T ReadLengthDistribution \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/RegenotypeVariants.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "ntVal", 38 | "type": "int?" 39 | }, 40 | { 41 | "id": "out", 42 | "type": "string?", 43 | "doc": "File to which variants should be written" 44 | }, 45 | { 46 | "id": "variant", 47 | "type": "string", 48 | "doc": "Input VCF file" 49 | } 50 | ], 51 | "id": "RegenotypeVariants", 52 | "baseCommand": [], 53 | "class": "CommandLineTool", 54 | "requirements": [ 55 | { 56 | "class": "ShellCommandRequirement" 57 | }, 58 | { 59 | "class": "InlineJavascriptRequirement" 60 | }, 61 | { 62 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 63 | "class": "DockerRequirement" 64 | } 65 | ], 66 | "outputs": [ 67 | { 68 | "id": "taskOut", 69 | "outputBinding": { 70 | "glob": "$(inputs.out)" 71 | }, 72 | "type": "string" 73 | } 74 | ], 75 | "arguments": [ 76 | { 77 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T RegenotypeVariants \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t$(\"-nt\" + inputs.ntVal) \\\t\t\t-o $(inputs.out) \\\t\t\t-V $(inputs.variant) \\\t\t\t$(inputs.userString) \t", 78 | "shellQuote": false 79 | } 80 | ], 81 | "cwlVersion": "v1.0" 82 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/SplitSamFile.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "input_file", 33 | "type": "string[]", 34 | "doc": "Input file containing sequence data (BAM or CRAM)" 35 | }, 36 | { 37 | "id": "intervals", 38 | "type": "string[]?", 39 | "doc": "One or more genomic intervals over which to operate" 40 | }, 41 | { 42 | "id": "outputRoot", 43 | "type": "string?", 44 | "doc": "output BAM file" 45 | } 46 | ], 47 | "id": "SplitSamFile", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T SplitSamFile \\\t\t\t-R $(inputs.ref.path) \\\t\t\t--input_file $(inputs.input_file) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\toutputRoot $(inputs.outputRoot) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/VariantsToAllelicPrimitives.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "out", 38 | "type": "string?", 39 | "doc": "File to which variants should be written" 40 | }, 41 | { 42 | "id": "variant", 43 | "type": "string", 44 | "doc": "Input VCF file" 45 | } 46 | ], 47 | "id": "VariantsToAllelicPrimitives", 48 | "baseCommand": [], 49 | "class": "CommandLineTool", 50 | "requirements": [ 51 | { 52 | "class": "ShellCommandRequirement" 53 | }, 54 | { 55 | "class": "InlineJavascriptRequirement" 56 | }, 57 | { 58 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 59 | "class": "DockerRequirement" 60 | } 61 | ], 62 | "outputs": [ 63 | { 64 | "id": "taskOut", 65 | "outputBinding": { 66 | "glob": "$(inputs.out)" 67 | }, 68 | "type": "string" 69 | } 70 | ], 71 | "arguments": [ 72 | { 73 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T VariantsToAllelicPrimitives \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t-o $(inputs.out) \\\t\t\t-V $(inputs.variant) \\\t\t\t$(inputs.userString) \t", 74 | "shellQuote": false 75 | } 76 | ], 77 | "cwlVersion": "v1.0" 78 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/VariantsToVCF.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "inputs": [ 6 | { 7 | "id": "gatk", 8 | "type": "File", 9 | "doc": "Executable jar for the GenomeAnalysisTK" 10 | }, 11 | { 12 | "id": "ref", 13 | "type": "File", 14 | "doc": "fasta file of reference genome" 15 | }, 16 | { 17 | "id": "refIndex", 18 | "type": "File", 19 | "doc": "Index file of reference genome" 20 | }, 21 | { 22 | "id": "refDict", 23 | "type": "File", 24 | "doc": "dict file of reference genome" 25 | }, 26 | { 27 | "id": "userString", 28 | "type": "string?", 29 | "doc": "An optional parameter which allows the user to specify additions to the command line at run time" 30 | }, 31 | { 32 | "id": "intervals", 33 | "type": "string[]?", 34 | "doc": "One or more genomic intervals over which to operate" 35 | }, 36 | { 37 | "id": "dbsnp", 38 | "type": "string?", 39 | "doc": "dbSNP file" 40 | }, 41 | { 42 | "id": "out", 43 | "type": "string?", 44 | "doc": "File to which variants should be written" 45 | }, 46 | { 47 | "id": "sample", 48 | "type": "string?", 49 | "doc": "The sample name represented by the variant rod" 50 | }, 51 | { 52 | "id": "variant", 53 | "type": "string", 54 | "doc": "Input variant file" 55 | } 56 | ], 57 | "id": "VariantsToVCF", 58 | "baseCommand": [], 59 | "class": "CommandLineTool", 60 | "requirements": [ 61 | { 62 | "class": "ShellCommandRequirement" 63 | }, 64 | { 65 | "class": "InlineJavascriptRequirement" 66 | }, 67 | { 68 | "dockerPull": "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830", 69 | "class": "DockerRequirement" 70 | } 71 | ], 72 | "outputs": [ 73 | { 74 | "id": "taskOut", 75 | "outputBinding": { 76 | "glob": "$(inputs.out)" 77 | }, 78 | "type": "string" 79 | } 80 | ], 81 | "arguments": [ 82 | { 83 | "valueFrom": "\t\tjava -jar $(inputs.gatk.path) \\\t\t\t-T VariantsToVCF \\\t\t\t-R $(inputs.ref.path) \\\t\t\t$(\"--intervals \" + inputs.intervals) \\\t\t\t$(\"-D \" + inputs.dbsnp) \\\t\t\t-o $(inputs.out) \\\t\t\t$(\"-sample \" + inputs.sample) \\\t\t\t-V $(inputs.variant) \\\t\t\t$(inputs.userString) \t", 84 | "shellQuote": false 85 | } 86 | ], 87 | "cwlVersion": "v1.0" 88 | } -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ASEReadCounter_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ASEReadCounter WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Calculate read counts per allele for allele-specific expression analysis 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ASEReadCounter { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String unsafe 17 | String ? countOverlapReadsType 18 | String ? minBaseQuality 19 | Int ? minDepthOfNonFilteredBase 20 | Int ? minMappingQuality 21 | String ? out 22 | String ? outputFormat 23 | String sitesVCFFile 24 | 25 | command { 26 | java -jar ${gatk} \ 27 | -T ASEReadCounter \ 28 | -R ${ref} \ 29 | --input_file ${input_file} \ 30 | ${default="" "--intervals " + intervals} \ 31 | --unsafe ${unsafe} \ 32 | -overlap ${default="COUNT_FRAGMENTS_REQUIRE_SAME_BASE" countOverlapReadsType} \ 33 | -mbq ${default="0" minBaseQuality} \ 34 | -minDepth ${default="-1" minDepthOfNonFilteredBase} \ 35 | -mmq ${default="0" minMappingQuality} \ 36 | -o ${default="stdout" out} \ 37 | outputFormat ${default="RTABLE" outputFormat} \ 38 | -sites ${sitesVCFFile} \ 39 | ${default="\n" userString} 40 | } 41 | 42 | output { 43 | #To track additional outputs from your task, please manually add them below 44 | String taskOut = "${out}" 45 | } 46 | 47 | runtime { 48 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 49 | } 50 | 51 | parameter_meta { 52 | gatk: "Executable jar for the GenomeAnalysisTK" 53 | ref: "fasta file of reference genome" 54 | refIndex: "Index file of reference genome" 55 | refDict: "dict file of reference genome" 56 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 57 | countOverlapReadsType: "Handling of overlapping reads from the same fragment" 58 | minBaseQuality: "Minimum base quality" 59 | minDepthOfNonFilteredBase: "Minimum number of bases that pass filters" 60 | minMappingQuality: "Minimum read mapping quality" 61 | out: "An output file created by the walker. Will overwrite contents if file exists" 62 | outputFormat: "Format of the output file, can be CSV, TABLE, RTABLE" 63 | sitesVCFFile: "Undocumented option" 64 | input_file: "Input file containing sequence data (BAM or CRAM)" 65 | intervals: "One or more genomic intervals over which to operate" 66 | unsafe: "Enable unsafe operations: nothing will be checked at runtime" 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/AnalyzeCovariates_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This AnalyzeCovariates WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Create plots to visualize base recalibration results 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task AnalyzeCovariates { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | File ? BQSR 15 | File ? afterReportFile 16 | File ? beforeReportFile 17 | Boolean ? ignoreLastModificationTimes 18 | File ? intermediateCsvFile 19 | File ? plotsReportFile 20 | 21 | command { 22 | java -jar ${gatk} \ 23 | -T AnalyzeCovariates \ 24 | -R ${ref} \ 25 | ${default="" "--BQSR " + BQSR} \ 26 | ${default="" "-after " + afterReportFile} \ 27 | ${default="" "-before " + beforeReportFile} \ 28 | -ignoreLMT ${default="false" ignoreLastModificationTimes} \ 29 | ${default="" "-csv " + intermediateCsvFile} \ 30 | ${default="" "-plots " + plotsReportFile} \ 31 | ${default="\n" userString} 32 | } 33 | 34 | output { 35 | #To track additional outputs from your task, please manually add them below 36 | String taskOut = "${out}" 37 | } 38 | 39 | runtime { 40 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 41 | } 42 | 43 | parameter_meta { 44 | gatk: "Executable jar for the GenomeAnalysisTK" 45 | ref: "fasta file of reference genome" 46 | refIndex: "Index file of reference genome" 47 | refDict: "dict file of reference genome" 48 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 49 | afterReportFile: "file containing the BQSR second-pass report file" 50 | beforeReportFile: "file containing the BQSR first-pass report file" 51 | ignoreLastModificationTimes: "do not emit warning messages related to suspicious last modification time order of inputs" 52 | intermediateCsvFile: "location of the csv intermediate file" 53 | plotsReportFile: "location of the output report" 54 | BQSR: "Input covariates table file for on-the-fly base quality score recalibration" 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CatVariants_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CatVariants WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Concatenate VCF files of non-overlapping genome intervals, all with the same set of samples 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CatVariants { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Boolean ? assumeSorted 15 | Boolean ? help 16 | String ? log_to_file 17 | String ? logging_level 18 | File outputFile 19 | File reference 20 | Array[File] variant 21 | Int ? variant_index_parameter 22 | String ? variant_index_type 23 | Boolean ? version 24 | 25 | command { 26 | java -jar ${gatk} \ 27 | -T CatVariants \ 28 | -R ${ref} \ 29 | -assumeSorted ${default="false" assumeSorted} \ 30 | -h ${default="false" help} \ 31 | ${default="" "-log " + log_to_file} \ 32 | -l ${default="INFO" logging_level} \ 33 | -out ${outputFile} \ 34 | -R ${reference} \ 35 | -V ${variant} \ 36 | variant_index_parameter ${default="-1" variant_index_parameter} \ 37 | variant_index_type ${default="DYNAMIC_SEEK" variant_index_type} \ 38 | -version ${default="false" version} \ 39 | ${default="\n" userString} 40 | } 41 | 42 | output { 43 | #To track additional outputs from your task, please manually add them below 44 | String taskOut = "${out}" 45 | } 46 | 47 | runtime { 48 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 49 | } 50 | 51 | parameter_meta { 52 | gatk: "Executable jar for the GenomeAnalysisTK" 53 | ref: "fasta file of reference genome" 54 | refIndex: "Index file of reference genome" 55 | refDict: "dict file of reference genome" 56 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 57 | assumeSorted: "assumeSorted should be true if the input files are already sorted (based on the position of the variants)" 58 | help: "Generate the help message" 59 | log_to_file: "Set the logging location" 60 | logging_level: "Set the minimum level of logging" 61 | outputFile: "output file" 62 | reference: "genome reference file .fasta" 63 | variant: "Input VCF file/s" 64 | variant_index_parameter: "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator" 65 | variant_index_type: "which type of IndexCreator to use for VCF/BCF indices" 66 | version: "Output version information" 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CheckPileup_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CheckPileup WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Compare GATK's internal pileup to a reference Samtools pileup 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CheckPileup { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? ntVal 17 | Boolean ? continue_after_error 18 | String ? out 19 | String pileup 20 | 21 | command { 22 | java -jar ${gatk} \ 23 | -T CheckPileup \ 24 | -R ${ref} \ 25 | --input_file ${input_file} \ 26 | ${default="" "--intervals " + intervals} \ 27 | ${default="" "-nt" + ntVal} \ 28 | continue_after_error ${default="false" continue_after_error} \ 29 | -o ${default="stdout" out} \ 30 | -pileup ${pileup} \ 31 | ${default="\n" userString} 32 | } 33 | 34 | output { 35 | #To track additional outputs from your task, please manually add them below 36 | String taskOut = "${out}" 37 | } 38 | 39 | runtime { 40 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 41 | } 42 | 43 | parameter_meta { 44 | gatk: "Executable jar for the GenomeAnalysisTK" 45 | ref: "fasta file of reference genome" 46 | refIndex: "Index file of reference genome" 47 | refDict: "dict file of reference genome" 48 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 49 | continue_after_error: "Continue after encountering an error" 50 | out: "An output file created by the walker. Will overwrite contents if file exists" 51 | pileup: "Pileup generated by Samtools" 52 | input_file: "Input file containing sequence data (BAM or CRAM)" 53 | intervals: "One or more genomic intervals over which to operate" 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ClipReads_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ClipReads WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Read clipping based on quality, position or sequence matching 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ClipReads { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? clipRepresentation 17 | String ? clipSequence 18 | String ? clipSequencesFile 19 | String ? cyclesToTrim 20 | String ? out 21 | String ? outputStatistics 22 | Int ? qTrimmingThreshold 23 | 24 | command { 25 | java -jar ${gatk} \ 26 | -T ClipReads \ 27 | -R ${ref} \ 28 | --input_file ${input_file} \ 29 | ${default="" "--intervals " + intervals} \ 30 | -CR ${default="WRITE_NS" clipRepresentation} \ 31 | ${default="" "-X " + clipSequence} \ 32 | ${default="" "-XF " + clipSequencesFile} \ 33 | ${default="" "-CT " + cyclesToTrim} \ 34 | -o ${default="stdout" out} \ 35 | ${default="" "-os " + outputStatistics} \ 36 | -QT ${default="-1" qTrimmingThreshold} \ 37 | ${default="\n" userString} 38 | } 39 | 40 | output { 41 | #To track additional outputs from your task, please manually add them below 42 | String taskOut = "${out}" 43 | } 44 | 45 | runtime { 46 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 47 | } 48 | 49 | parameter_meta { 50 | gatk: "Executable jar for the GenomeAnalysisTK" 51 | ref: "fasta file of reference genome" 52 | refIndex: "Index file of reference genome" 53 | refDict: "dict file of reference genome" 54 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 55 | clipRepresentation: "How should we actually clip the bases?" 56 | clipSequence: "Remove sequences within reads matching this sequence" 57 | clipSequencesFile: "Remove sequences within reads matching the sequences in this FASTA file" 58 | cyclesToTrim: "String indicating machine cycles to clip from the reads" 59 | out: "Write BAM output here" 60 | outputStatistics: "File to output statistics" 61 | qTrimmingThreshold: "If provided, the Q-score clipper will be applied" 62 | input_file: "Input file containing sequence data (BAM or CRAM)" 63 | intervals: "One or more genomic intervals over which to operate" 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CombineGVCFs_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CombineGVCFs WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample gVCF file 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CombineGVCFs { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Array[String] ? annotation 16 | Int ? breakBandsAtMultiplesOf 17 | Boolean ? convertToBasePairResolution 18 | String ? dbsnp 19 | String ? group 20 | String ? out 21 | Array[String] variant 22 | 23 | command { 24 | java -jar ${gatk} \ 25 | -T CombineGVCFs \ 26 | -R ${ref} \ 27 | ${default="" "--intervals " + intervals} \ 28 | -A ${default="[AS_RMSMappingQuality]" annotation} \ 29 | -breakBandsAtMultiplesOf ${default="0" breakBandsAtMultiplesOf} \ 30 | -bpResolution ${default="false" convertToBasePairResolution} \ 31 | ${default="" "-D " + dbsnp} \ 32 | -G ${default="[StandardAnnotation]" group} \ 33 | -o ${default="stdout" out} \ 34 | -V ${variant} \ 35 | ${default="\n" userString} 36 | } 37 | 38 | output { 39 | #To track additional outputs from your task, please manually add them below 40 | String taskOut = "${out}" 41 | } 42 | 43 | runtime { 44 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 45 | } 46 | 47 | parameter_meta { 48 | gatk: "Executable jar for the GenomeAnalysisTK" 49 | ref: "fasta file of reference genome" 50 | refIndex: "Index file of reference genome" 51 | refDict: "dict file of reference genome" 52 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 53 | annotation: "One or more specific annotations to recompute. The single value 'none' removes the default annotations" 54 | breakBandsAtMultiplesOf: "If > 0, reference bands will be broken up at genomic positions that are multiples of this number" 55 | convertToBasePairResolution: "If specified, convert banded gVCFs to all-sites gVCFs" 56 | dbsnp: "dbSNP file" 57 | group: "One or more classes/groups of annotations to apply to variant calls" 58 | out: "File to which the combined gVCF should be written" 59 | variant: "One or more input gVCF files" 60 | intervals: "One or more genomic intervals over which to operate" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CompareCallableLoci_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CompareCallableLoci WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Compare callability statistics 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CompareCallableLoci { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String comp1 16 | String comp2 17 | String ? out 18 | String ? printstate 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T CompareCallableLoci \ 23 | -R ${ref} \ 24 | ${default="" "--intervals " + intervals} \ 25 | -comp1 ${comp1} \ 26 | -comp2 ${comp2} \ 27 | -o ${default="stdout" out} \ 28 | ${default="" "-printState " + printstate} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | comp1: "First comparison track name" 48 | comp2: "Second comparison track name" 49 | out: "An output file created by the walker. Will overwrite contents if file exists" 50 | printstate: "If provided, prints sites satisfying this state pair" 51 | intervals: "One or more genomic intervals over which to operate" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountBases_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountBases WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of bases in a set of reads 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountBases { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | 17 | command { 18 | java -jar ${gatk} \ 19 | -T CountBases \ 20 | -R ${ref} \ 21 | --input_file ${input_file} \ 22 | ${default="" "--intervals " + intervals} \ 23 | ${default="\n" userString} 24 | } 25 | 26 | output { 27 | #To track additional outputs from your task, please manually add them below 28 | String taskOut = "${out}" 29 | } 30 | 31 | runtime { 32 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 33 | } 34 | 35 | parameter_meta { 36 | gatk: "Executable jar for the GenomeAnalysisTK" 37 | ref: "fasta file of reference genome" 38 | refIndex: "Index file of reference genome" 39 | refDict: "dict file of reference genome" 40 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 41 | input_file: "Input file containing sequence data (BAM or CRAM)" 42 | intervals: "One or more genomic intervals over which to operate" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountIntervals_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountIntervals WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count contiguous regions in an interval list 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountIntervals { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Array[String] ? check 16 | Int ? numOverlaps 17 | String ? out 18 | 19 | command { 20 | java -jar ${gatk} \ 21 | -T CountIntervals \ 22 | -R ${ref} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -check ${default="[]" check} \ 25 | -no ${default="2" numOverlaps} \ 26 | -o ${default="stdout" out} \ 27 | ${default="\n" userString} 28 | } 29 | 30 | output { 31 | #To track additional outputs from your task, please manually add them below 32 | String taskOut = "${out}" 33 | } 34 | 35 | runtime { 36 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 37 | } 38 | 39 | parameter_meta { 40 | gatk: "Executable jar for the GenomeAnalysisTK" 41 | ref: "fasta file of reference genome" 42 | refIndex: "Index file of reference genome" 43 | refDict: "dict file of reference genome" 44 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 45 | check: "Any number of RODs" 46 | numOverlaps: "Count all occurrences of X or more overlapping intervals; defaults to 2" 47 | out: "An output file created by the walker. Will overwrite contents if file exists" 48 | intervals: "One or more genomic intervals over which to operate" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountLoci_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountLoci WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the total number of covered loci 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountLoci { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? nctVal 17 | Int ? ntVal 18 | String ? out 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T CountLoci \ 23 | -R ${ref} \ 24 | --input_file ${input_file} \ 25 | ${default="" "--intervals " + intervals} \ 26 | ${default="" "-nct" + nctVal} \ 27 | ${default="" "-nt" + ntVal} \ 28 | -o ${default="stdout" out} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | out: "An output file created by the walker. Will overwrite contents if file exists" 48 | input_file: "Input file containing sequence data (BAM or CRAM)" 49 | intervals: "One or more genomic intervals over which to operate" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountMales_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountMales WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of reads seen from male samples 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountMales { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? out 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T CountMales \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -o ${default="stdout" out} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | out: "An output file created by the walker. Will overwrite contents if file exists" 44 | input_file: "Input file containing sequence data (BAM or CRAM)" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountRODsByRef_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountRODsByRef WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of ROD objects encountered along the reference 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountRODsByRef { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Array[String] ? rod 16 | Boolean ? showSkipped 17 | Boolean ? verbose 18 | 19 | command { 20 | java -jar ${gatk} \ 21 | -T CountRODsByRef \ 22 | -R ${ref} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -rod ${default="[]" rod} \ 25 | -s ${default="false" showSkipped} \ 26 | -v ${default="false" verbose} \ 27 | ${default="\n" userString} 28 | } 29 | 30 | output { 31 | #To track additional outputs from your task, please manually add them below 32 | String taskOut = "${out}" 33 | } 34 | 35 | runtime { 36 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 37 | } 38 | 39 | parameter_meta { 40 | gatk: "Executable jar for the GenomeAnalysisTK" 41 | ref: "fasta file of reference genome" 42 | refIndex: "Index file of reference genome" 43 | refDict: "dict file of reference genome" 44 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 45 | rod: "Input VCF file(s)" 46 | showSkipped: "If true, this tool will print out the skipped locations" 47 | verbose: "If true, this tool will print out detailed information about the rods it finds and locations" 48 | intervals: "One or more genomic intervals over which to operate" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountRODs_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountRODs WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of ROD objects encountered 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountRODs { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Int ? nctVal 16 | Int ? ntVal 17 | String ? out 18 | Array[String] rod 19 | Boolean ? showSkipped 20 | Boolean ? verbose 21 | 22 | command { 23 | java -jar ${gatk} \ 24 | -T CountRODs \ 25 | -R ${ref} \ 26 | ${default="" "--intervals " + intervals} \ 27 | ${default="" "-nct" + nctVal} \ 28 | ${default="" "-nt" + ntVal} \ 29 | -o ${default="stdout" out} \ 30 | -rod ${rod} \ 31 | -s ${default="false" showSkipped} \ 32 | -v ${default="false" verbose} \ 33 | ${default="\n" userString} 34 | } 35 | 36 | output { 37 | #To track additional outputs from your task, please manually add them below 38 | String taskOut = "${out}" 39 | } 40 | 41 | runtime { 42 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 43 | } 44 | 45 | parameter_meta { 46 | gatk: "Executable jar for the GenomeAnalysisTK" 47 | ref: "fasta file of reference genome" 48 | refIndex: "Index file of reference genome" 49 | refDict: "dict file of reference genome" 50 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 51 | out: "An output file created by the walker. Will overwrite contents if file exists" 52 | rod: "Input VCF file(s)" 53 | showSkipped: "If true, this tool will print out the skipped locations" 54 | verbose: "If true, this tool will print out detailed information about the rods it finds and locations" 55 | intervals: "One or more genomic intervals over which to operate" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountReadEvents_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountReadEvents WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of read events 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountReadEvents { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? out 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T CountReadEvents \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -o ${default="stdout" out} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | out: "An output file created by the walker. Will overwrite contents if file exists" 44 | input_file: "Input file containing sequence data (BAM or CRAM)" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountReads_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountReads WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of reads 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountReads { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? nctVal 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T CountReads \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | ${default="" "-nct" + nctVal} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | input_file: "Input file containing sequence data (BAM or CRAM)" 44 | intervals: "One or more genomic intervals over which to operate" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/CountTerminusEvent_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This CountTerminusEvent WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Count the number of reads ending in insertions, deletions or soft-clips 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task CountTerminusEvent { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? out 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T CountTerminusEvent \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -o ${default="stdout" out} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | out: "An output file created by the walker. Will overwrite contents if file exists" 44 | input_file: "Input file containing sequence data (BAM or CRAM)" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ErrorRatePerCycle_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ErrorRatePerCycle WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Compute the read error rate per position 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ErrorRatePerCycle { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? min_base_quality_score 17 | Int ? min_mapping_quality_score 18 | String ? out 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T ErrorRatePerCycle \ 23 | -R ${ref} \ 24 | --input_file ${input_file} \ 25 | ${default="" "--intervals " + intervals} \ 26 | -mbq ${default="0" min_base_quality_score} \ 27 | -mmq ${default="20" min_mapping_quality_score} \ 28 | -o ${default="stdout" out} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | min_base_quality_score: "Minimum base quality required to consider a base for calling" 48 | min_mapping_quality_score: "Minimum read mapping quality required to consider a read for calling" 49 | out: "An output file created by the walker. Will overwrite contents if file exists" 50 | input_file: "Input file containing sequence data (BAM or CRAM)" 51 | intervals: "One or more genomic intervals over which to operate" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/FastaStats_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This FastaStats WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Calculate basic statistics about the reference sequence itself 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task FastaStats { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String ? out 16 | 17 | command { 18 | java -jar ${gatk} \ 19 | -T FastaStats \ 20 | -R ${ref} \ 21 | ${default="" "--intervals " + intervals} \ 22 | -o ${default="stdout" out} \ 23 | ${default="\n" userString} 24 | } 25 | 26 | output { 27 | #To track additional outputs from your task, please manually add them below 28 | String taskOut = "${out}" 29 | } 30 | 31 | runtime { 32 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 33 | } 34 | 35 | parameter_meta { 36 | gatk: "Executable jar for the GenomeAnalysisTK" 37 | ref: "fasta file of reference genome" 38 | refIndex: "Index file of reference genome" 39 | refDict: "dict file of reference genome" 40 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 41 | out: "An output file created by the walker. Will overwrite contents if file exists" 42 | intervals: "One or more genomic intervals over which to operate" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/FlagStat_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This FlagStat WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Collect statistics about sequence reads based on their SAM flags 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task FlagStat { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? nctVal 17 | String ? out 18 | 19 | command { 20 | java -jar ${gatk} \ 21 | -T FlagStat \ 22 | -R ${ref} \ 23 | --input_file ${input_file} \ 24 | ${default="" "--intervals " + intervals} \ 25 | ${default="" "-nct" + nctVal} \ 26 | -o ${default="stdout" out} \ 27 | ${default="\n" userString} 28 | } 29 | 30 | output { 31 | #To track additional outputs from your task, please manually add them below 32 | String taskOut = "${out}" 33 | } 34 | 35 | runtime { 36 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 37 | } 38 | 39 | parameter_meta { 40 | gatk: "Executable jar for the GenomeAnalysisTK" 41 | ref: "fasta file of reference genome" 42 | refIndex: "Index file of reference genome" 43 | refDict: "dict file of reference genome" 44 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 45 | out: "An output file created by the walker. Will overwrite contents if file exists" 46 | input_file: "Input file containing sequence data (BAM or CRAM)" 47 | intervals: "One or more genomic intervals over which to operate" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/GATKPaperGenotyper_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This GATKPaperGenotyper WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Simple Bayesian genotyper used in the original GATK paper 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task GATKPaperGenotyper { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? ntVal 17 | Float ? log_odds_score 18 | String ? out 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T GATKPaperGenotyper \ 23 | -R ${ref} \ 24 | --input_file ${input_file} \ 25 | ${default="" "--intervals " + intervals} \ 26 | ${default="" "-nt" + ntVal} \ 27 | -LOD ${default="3.0" log_odds_score} \ 28 | -o ${default="stdout" out} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | log_odds_score: "The LOD threshold for us to call confidently a genotype" 48 | out: "An output file created by the walker. Will overwrite contents if file exists" 49 | input_file: "Input file containing sequence data (BAM or CRAM)" 50 | intervals: "One or more genomic intervals over which to operate" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/GCContentByInterval_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This GCContentByInterval WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Calculates the GC content of the reference sequence for each interval 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task GCContentByInterval { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String ? out 16 | 17 | command { 18 | java -jar ${gatk} \ 19 | -T GCContentByInterval \ 20 | -R ${ref} \ 21 | ${default="" "--intervals " + intervals} \ 22 | -o ${default="stdout" out} \ 23 | ${default="\n" userString} 24 | } 25 | 26 | output { 27 | #To track additional outputs from your task, please manually add them below 28 | String taskOut = "${out}" 29 | } 30 | 31 | runtime { 32 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 33 | } 34 | 35 | parameter_meta { 36 | gatk: "Executable jar for the GenomeAnalysisTK" 37 | ref: "fasta file of reference genome" 38 | refIndex: "Index file of reference genome" 39 | refDict: "dict file of reference genome" 40 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 41 | out: "An output file created by the walker. Will overwrite contents if file exists" 42 | intervals: "One or more genomic intervals over which to operate" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/GenotypeConcordance_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This GenotypeConcordance WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Genotype concordance between two callsets 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task GenotypeConcordance { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String comp 16 | String eval 17 | Array[String] ? genotypeFilterExpressionComp 18 | Array[String] ? genotypeFilterExpressionEval 19 | Boolean ? ignoreFilters 20 | Boolean ? moltenize 21 | String ? out 22 | String ? printInterestingSites 23 | 24 | command { 25 | java -jar ${gatk} \ 26 | -T GenotypeConcordance \ 27 | -R ${ref} \ 28 | ${default="" "--intervals " + intervals} \ 29 | -comp ${comp} \ 30 | -eval ${eval} \ 31 | -gfc ${default="[]" genotypeFilterExpressionComp} \ 32 | -gfe ${default="[]" genotypeFilterExpressionEval} \ 33 | ignoreFilters ${default="false" ignoreFilters} \ 34 | -moltenize ${default="false" moltenize} \ 35 | -o ${default="stdout" out} \ 36 | ${default="" "-sites " + printInterestingSites} \ 37 | ${default="\n" userString} 38 | } 39 | 40 | output { 41 | #To track additional outputs from your task, please manually add them below 42 | String taskOut = "${out}" 43 | } 44 | 45 | runtime { 46 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 47 | } 48 | 49 | parameter_meta { 50 | gatk: "Executable jar for the GenomeAnalysisTK" 51 | ref: "fasta file of reference genome" 52 | refIndex: "Index file of reference genome" 53 | refDict: "dict file of reference genome" 54 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 55 | comp: "The variants and genotypes to compare against" 56 | eval: "The variants and genotypes to evaluate" 57 | genotypeFilterExpressionComp: "One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod." 58 | genotypeFilterExpressionEval: "One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod." 59 | ignoreFilters: "Filters will be ignored" 60 | moltenize: "Molten rather than tabular output" 61 | out: "An output file created by the walker. Will overwrite contents if file exists" 62 | printInterestingSites: "File to output the discordant sites and genotypes." 63 | intervals: "One or more genomic intervals over which to operate" 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/HaplotypeResolver_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This HaplotypeResolver WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Haplotype-based resolution of variants in separate callsets. 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task HaplotypeResolver { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String ? out 16 | String ? setKey 17 | String ? statusKey 18 | Array[String] variant 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T HaplotypeResolver \ 23 | -R ${ref} \ 24 | ${default="" "--intervals " + intervals} \ 25 | -o ${default="stdout" out} \ 26 | -setKey ${default="set" setKey} \ 27 | -statusKey ${default="status" statusKey} \ 28 | -V ${variant} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | out: "File to which variants should be written" 48 | setKey: "Key used in the INFO key=value tag emitted describing which set the combined VCF record came from" 49 | statusKey: "Key used in the INFO key=value tag emitted describing the extent to which records match" 50 | variant: "Input VCF file" 51 | intervals: "One or more genomic intervals over which to operate" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/LeftAlignAndTrimVariants_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This LeftAlignAndTrimVariants WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Left-align indels in a variant callset 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task LeftAlignAndTrimVariants { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Boolean ? dontTrimAlleles 16 | Boolean ? keepOriginalAC 17 | String ? out 18 | Boolean ? splitMultiallelics 19 | String variant 20 | 21 | command { 22 | java -jar ${gatk} \ 23 | -T LeftAlignAndTrimVariants \ 24 | -R ${ref} \ 25 | ${default="" "--intervals " + intervals} \ 26 | -notrim ${default="false" dontTrimAlleles} \ 27 | -keepOriginalAC ${default="false" keepOriginalAC} \ 28 | -o ${default="stdout" out} \ 29 | -split ${default="false" splitMultiallelics} \ 30 | -V ${variant} \ 31 | ${default="\n" userString} 32 | } 33 | 34 | output { 35 | #To track additional outputs from your task, please manually add them below 36 | String taskOut = "${out}" 37 | } 38 | 39 | runtime { 40 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 41 | } 42 | 43 | parameter_meta { 44 | gatk: "Executable jar for the GenomeAnalysisTK" 45 | ref: "fasta file of reference genome" 46 | refIndex: "Index file of reference genome" 47 | refDict: "dict file of reference genome" 48 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 49 | dontTrimAlleles: "Do not Trim alleles to remove bases common to all of them" 50 | keepOriginalAC: "Store the original AC, AF, and AN values after subsetting" 51 | out: "File to which variants should be written" 52 | splitMultiallelics: "Split multiallelic records and left-align individual alleles" 53 | variant: "Input VCF file" 54 | intervals: "One or more genomic intervals over which to operate" 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/LeftAlignIndels_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This LeftAlignIndels WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Left-align indels within reads in a bam file 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task LeftAlignIndels { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? out 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T LeftAlignIndels \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -o ${default="stdout" out} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | out: "Output bam" 44 | input_file: "Input file containing sequence data (BAM or CRAM)" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/PhaseByTransmission_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This PhaseByTransmission WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Compute the most likely genotype combination and phasing for trios and parent/child pairs 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task PhaseByTransmission { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Float ? DeNovoPrior 17 | Boolean ? FatherAlleleFirst 18 | String ? MendelianViolationsFile 19 | String ? out 20 | String variant 21 | 22 | command { 23 | java -jar ${gatk} \ 24 | -T PhaseByTransmission \ 25 | -R ${ref} \ 26 | --input_file ${input_file} \ 27 | ${default="" "--intervals " + intervals} \ 28 | -prior ${default="1.0E-8" DeNovoPrior} \ 29 | -fatherAlleleFirst ${default="false" FatherAlleleFirst} \ 30 | ${default="" "-mvf " + MendelianViolationsFile} \ 31 | -o ${default="stdout" out} \ 32 | -V ${variant} \ 33 | ${default="\n" userString} 34 | } 35 | 36 | output { 37 | #To track additional outputs from your task, please manually add them below 38 | String taskOut = "${out}" 39 | } 40 | 41 | runtime { 42 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 43 | } 44 | 45 | parameter_meta { 46 | gatk: "Executable jar for the GenomeAnalysisTK" 47 | ref: "fasta file of reference genome" 48 | refIndex: "Index file of reference genome" 49 | refDict: "dict file of reference genome" 50 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 51 | DeNovoPrior: "Prior for de novo mutations. Default: 1e-8" 52 | FatherAlleleFirst: "Ouputs the father allele as the first allele in phased child genotype. i.e. father|mother rather than mother|father." 53 | MendelianViolationsFile: "File to output the mendelian violation details." 54 | out: "An output file created by the walker. Will overwrite contents if file exists" 55 | variant: "Input VCF file" 56 | input_file: "Input file containing sequence data (BAM or CRAM)" 57 | intervals: "One or more genomic intervals over which to operate" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/Pileup_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This Pileup WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Print read alignments in Pileup-style format 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task Pileup { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? nctVal 17 | Int ? ntVal 18 | Array[String] ? metadata 19 | String ? out 20 | Boolean ? showVerbose 21 | 22 | command { 23 | java -jar ${gatk} \ 24 | -T Pileup \ 25 | -R ${ref} \ 26 | --input_file ${input_file} \ 27 | ${default="" "--intervals " + intervals} \ 28 | ${default="" "-nct" + nctVal} \ 29 | ${default="" "-nt" + ntVal} \ 30 | -metadata ${default="[]" metadata} \ 31 | -o ${default="stdout" out} \ 32 | -verbose ${default="false" showVerbose} \ 33 | ${default="\n" userString} 34 | } 35 | 36 | output { 37 | #To track additional outputs from your task, please manually add them below 38 | String taskOut = "${out}" 39 | } 40 | 41 | runtime { 42 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 43 | } 44 | 45 | parameter_meta { 46 | gatk: "Executable jar for the GenomeAnalysisTK" 47 | ref: "fasta file of reference genome" 48 | refIndex: "Index file of reference genome" 49 | refDict: "dict file of reference genome" 50 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 51 | metadata: "ROD file containing metadata" 52 | out: "An output file created by the walker. Will overwrite contents if file exists" 53 | showVerbose: "Add an extra verbose section to the pileup output" 54 | input_file: "Input file containing sequence data (BAM or CRAM)" 55 | intervals: "One or more genomic intervals over which to operate" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/PrintRODs_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This PrintRODs WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Print out all of the RODs in the input data set 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task PrintRODs { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | String task_input 15 | String ? out 16 | 17 | command { 18 | java -jar ${gatk} \ 19 | -T PrintRODs \ 20 | -R ${ref} \ 21 | -input ${task_input} \ 22 | -o ${default="stdout" out} \ 23 | ${default="\n" userString} 24 | } 25 | 26 | output { 27 | #To track additional outputs from your task, please manually add them below 28 | String taskOut = "${out}" 29 | } 30 | 31 | runtime { 32 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 33 | } 34 | 35 | parameter_meta { 36 | gatk: "Executable jar for the GenomeAnalysisTK" 37 | ref: "fasta file of reference genome" 38 | refIndex: "Index file of reference genome" 39 | refDict: "dict file of reference genome" 40 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 41 | task_input: "The input ROD which should be printed out." 42 | out: "An output file created by the walker. Will overwrite contents if file exists" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/PrintReads_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This PrintReads WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Write out sequence read data (for filtering, merging, subsetting etc) 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task PrintReads { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | File ? BQSR 17 | Int ? nctVal 18 | Int ? number 19 | String ? out 20 | String ? platform 21 | String ? readGroup 22 | String ? sample_file 23 | String ? sample_name 24 | Boolean ? simplify 25 | 26 | command { 27 | java -jar ${gatk} \ 28 | -T PrintReads \ 29 | -R ${ref} \ 30 | --input_file ${input_file} \ 31 | ${default="" "--intervals " + intervals} \ 32 | ${default="" "--BQSR " + BQSR} \ 33 | ${default="" "-nct" + nctVal} \ 34 | -n ${default="-1" number} \ 35 | -o ${default="stdout" out} \ 36 | ${default="" "-platform " + platform} \ 37 | ${default="" "-readGroup " + readGroup} \ 38 | -sf ${default="[]" sample_file} \ 39 | -sn ${default="[]" sample_name} \ 40 | -s ${default="false" simplify} \ 41 | ${default="\n" userString} 42 | } 43 | 44 | output { 45 | #To track additional outputs from your task, please manually add them below 46 | String taskOut = "${out}" 47 | } 48 | 49 | runtime { 50 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 51 | } 52 | 53 | parameter_meta { 54 | gatk: "Executable jar for the GenomeAnalysisTK" 55 | ref: "fasta file of reference genome" 56 | refIndex: "Index file of reference genome" 57 | refDict: "dict file of reference genome" 58 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 59 | number: "Print the first n reads from the file, discarding the rest" 60 | out: "Write output to this BAM filename instead of STDOUT" 61 | platform: "Exclude all reads with this platform from the output" 62 | readGroup: "Exclude all reads with this read group from the output" 63 | sample_file: "File containing a list of samples (one per line). Can be specified multiple times" 64 | sample_name: "Sample name to be included in the analysis. Can be specified multiple times." 65 | simplify: "Simplify all reads" 66 | input_file: "Input file containing sequence data (BAM or CRAM)" 67 | intervals: "One or more genomic intervals over which to operate" 68 | BQSR: "Input covariates table file for on-the-fly base quality score recalibration" 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/RandomlySplitVariants_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This RandomlySplitVariants WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Randomly split variants into different sets 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task RandomlySplitVariants { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Float ? fractionToOut1 16 | Int ? numOfOutputVCFFiles 17 | String ? out1 18 | File ? out2 19 | String ? prefixForAllOutputFileNames 20 | Boolean ? splitToManyFiles 21 | String variant 22 | 23 | command { 24 | java -jar ${gatk} \ 25 | -T RandomlySplitVariants \ 26 | -R ${ref} \ 27 | ${default="" "--intervals " + intervals} \ 28 | -fraction ${default="0.5" fractionToOut1} \ 29 | -N ${default="-1" numOfOutputVCFFiles} \ 30 | -o1 ${default="stdout" out1} \ 31 | ${default="" "-o2 " + out2} \ 32 | ${default="" "-baseOutputName " + prefixForAllOutputFileNames} \ 33 | -splitToMany ${default="false" splitToManyFiles} \ 34 | -V ${variant} \ 35 | ${default="\n" userString} 36 | } 37 | 38 | output { 39 | #To track additional outputs from your task, please manually add them below 40 | String taskOut = "${out}" 41 | } 42 | 43 | runtime { 44 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 45 | } 46 | 47 | parameter_meta { 48 | gatk: "Executable jar for the GenomeAnalysisTK" 49 | ref: "fasta file of reference genome" 50 | refIndex: "Index file of reference genome" 51 | refDict: "dict file of reference genome" 52 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 53 | fractionToOut1: "Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2" 54 | numOfOutputVCFFiles: "number of output VCF files. Only works with SplitToMany = true" 55 | out1: "File #1 to which variants should be written" 56 | out2: "File #2 to which variants should be written" 57 | prefixForAllOutputFileNames: "the name of the output VCF file will be: .split..vcf. Required with SplitToMany option" 58 | splitToManyFiles: "split (with uniform distribution) to more than 2 files. numOfFiles and baseOutputName parameters are required" 59 | variant: "Input VCF file" 60 | intervals: "One or more genomic intervals over which to operate" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ReadClippingStats_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ReadClippingStats WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Collect read clipping statistics 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ReadClippingStats { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Boolean ? include_unmapped 17 | String ? out 18 | Int ? skip 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T ReadClippingStats \ 23 | -R ${ref} \ 24 | --input_file ${input_file} \ 25 | ${default="" "--intervals " + intervals} \ 26 | -u ${default="false" include_unmapped} \ 27 | -o ${default="stdout" out} \ 28 | -skip ${default="1" skip} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | include_unmapped: "Include unmapped reads in the analysis" 48 | out: "An output file created by the walker. Will overwrite contents if file exists" 49 | skip: "Do not print all reads, skip some." 50 | input_file: "Input file containing sequence data (BAM or CRAM)" 51 | intervals: "One or more genomic intervals over which to operate" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ReadGroupProperties_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ReadGroupProperties WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Collect statistics about read groups and their properties 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ReadGroupProperties { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Int ? max_values_for_median 16 | String ? out 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T ReadGroupProperties \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | -maxElementsForMedian ${default="10000" max_values_for_median} \ 24 | -o ${default="stdout" out} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | max_values_for_median: "Calculate median from the first maxElementsForMedian values observed" 44 | out: "An output file created by the walker. Will overwrite contents if file exists" 45 | input_file: "Input file containing sequence data (BAM or CRAM)" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ReadLengthDistribution_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ReadLengthDistribution WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Collect read length statistics 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ReadLengthDistribution { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? out 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T ReadLengthDistribution \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | -o ${default="stdout" out} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | out: "An output file created by the walker. Will overwrite contents if file exists" 44 | input_file: "Input file containing sequence data (BAM or CRAM)" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/RealignerTargetCreator_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This RealignerTargetCreator WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Define intervals to target for local realignment 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task RealignerTargetCreator { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | Int ? ntVal 17 | Array[String] ? known 18 | Int ? maxIntervalSize 19 | Int ? minReadsAtLocus 20 | Float ? mismatchFraction 21 | File ? out 22 | Int ? windowSize 23 | 24 | command { 25 | java -jar ${gatk} \ 26 | -T RealignerTargetCreator \ 27 | -R ${ref} \ 28 | --input_file ${input_file} \ 29 | ${default="" "--intervals " + intervals} \ 30 | ${default="" "-nt" + ntVal} \ 31 | -known ${default="[]" known} \ 32 | -maxInterval ${default="500" maxIntervalSize} \ 33 | -minReads ${default="4" minReadsAtLocus} \ 34 | -mismatch ${default="0.0" mismatchFraction} \ 35 | ${default="" "-o " + out} \ 36 | -window ${default="10" windowSize} \ 37 | ${default="\n" userString} 38 | } 39 | 40 | output { 41 | #To track additional outputs from your task, please manually add them below 42 | String taskOut = "${out}" 43 | } 44 | 45 | runtime { 46 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 47 | } 48 | 49 | parameter_meta { 50 | gatk: "Executable jar for the GenomeAnalysisTK" 51 | ref: "fasta file of reference genome" 52 | refIndex: "Index file of reference genome" 53 | refDict: "dict file of reference genome" 54 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 55 | known: "Input VCF file with known indels" 56 | maxIntervalSize: "maximum interval size; any intervals larger than this value will be dropped" 57 | minReadsAtLocus: "minimum reads at a locus to enable using the entropy calculation" 58 | mismatchFraction: "fraction of base qualities needing to mismatch for a position to have high entropy" 59 | out: "An output file created by the walker. Will overwrite contents if file exists" 60 | windowSize: "window size for calculating entropy or SNP clusters" 61 | input_file: "Input file containing sequence data (BAM or CRAM)" 62 | intervals: "One or more genomic intervals over which to operate" 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/RegenotypeVariants_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This RegenotypeVariants WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Regenotypes the variants from a VCF containing PLs or GLs. 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task RegenotypeVariants { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Int ? ntVal 16 | String ? out 17 | String variant 18 | 19 | command { 20 | java -jar ${gatk} \ 21 | -T RegenotypeVariants \ 22 | -R ${ref} \ 23 | ${default="" "--intervals " + intervals} \ 24 | ${default="" "-nt" + ntVal} \ 25 | -o ${default="stdout" out} \ 26 | -V ${variant} \ 27 | ${default="\n" userString} 28 | } 29 | 30 | output { 31 | #To track additional outputs from your task, please manually add them below 32 | String taskOut = "${out}" 33 | } 34 | 35 | runtime { 36 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 37 | } 38 | 39 | parameter_meta { 40 | gatk: "Executable jar for the GenomeAnalysisTK" 41 | ref: "fasta file of reference genome" 42 | refIndex: "Index file of reference genome" 43 | refDict: "dict file of reference genome" 44 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 45 | out: "File to which variants should be written" 46 | variant: "Input VCF file" 47 | intervals: "One or more genomic intervals over which to operate" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/SelectHeaders_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This SelectHeaders WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Selects headers from a VCF source 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task SelectHeaders { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Int ? ntVal 16 | String ? exclude_header_name 17 | String ? header_expression 18 | String ? header_name 19 | Boolean ? include_interval_names 20 | String ? out 21 | String variant 22 | 23 | command { 24 | java -jar ${gatk} \ 25 | -T SelectHeaders \ 26 | -R ${ref} \ 27 | ${default="" "--intervals " + intervals} \ 28 | ${default="" "-nt" + ntVal} \ 29 | ${default="" "-xl_hn " + exclude_header_name} \ 30 | ${default="" "-he " + header_expression} \ 31 | ${default="" "-hn " + header_name} \ 32 | -iln ${default="false" include_interval_names} \ 33 | -o ${default="stdout" out} \ 34 | -V ${variant} \ 35 | ${default="\n" userString} 36 | } 37 | 38 | output { 39 | #To track additional outputs from your task, please manually add them below 40 | String taskOut = "${out}" 41 | } 42 | 43 | runtime { 44 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 45 | } 46 | 47 | parameter_meta { 48 | gatk: "Executable jar for the GenomeAnalysisTK" 49 | ref: "fasta file of reference genome" 50 | refIndex: "Index file of reference genome" 51 | refDict: "dict file of reference genome" 52 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 53 | exclude_header_name: "Exclude header. Can be specified multiple times" 54 | header_expression: "Regular expression to select many headers from the tracks provided. Can be specified multiple times" 55 | header_name: "Include header. Can be specified multiple times" 56 | include_interval_names: "If set the interval file name minus the file extension, or the command line intervals, will be added to the headers" 57 | out: "File to which variants should be written" 58 | variant: "Input VCF file" 59 | intervals: "One or more genomic intervals over which to operate" 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/SimulateReadsForVariants_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This SimulateReadsForVariants WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Generate simulated reads for variants 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task SimulateReadsForVariants { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | Int ? errorRate 16 | String out 17 | Int ? readDepth 18 | Int ? readLength 19 | String ? readSamplingMode 20 | String ? rgPlatform 21 | String variant 22 | 23 | command { 24 | java -jar ${gatk} \ 25 | -T SimulateReadsForVariants \ 26 | -R ${ref} \ 27 | ${default="" "--intervals " + intervals} \ 28 | -ER ${default="20" errorRate} \ 29 | -o ${out} \ 30 | -DP ${default="20" readDepth} \ 31 | -RL ${default="101" readLength} \ 32 | -RSM ${default="CONSTANT" readSamplingMode} \ 33 | -RGPL ${default="ILLUMINA" rgPlatform} \ 34 | -V ${variant} \ 35 | ${default="\n" userString} 36 | } 37 | 38 | output { 39 | #To track additional outputs from your task, please manually add them below 40 | String taskOut = "${out}" 41 | } 42 | 43 | runtime { 44 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 45 | } 46 | 47 | parameter_meta { 48 | gatk: "Executable jar for the GenomeAnalysisTK" 49 | ref: "fasta file of reference genome" 50 | refIndex: "Index file of reference genome" 51 | refDict: "dict file of reference genome" 52 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 53 | errorRate: "Base error rate (Phred-scaled)" 54 | out: "Reads corresponding to variants" 55 | readDepth: "Read depth to generate" 56 | readLength: "Read lengths (bp)" 57 | readSamplingMode: "Sampling mode" 58 | rgPlatform: "Sequencing platform" 59 | variant: "Input VCF file" 60 | intervals: "One or more genomic intervals over which to operate" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/SplitNCigarReads_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This SplitNCigarReads WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Splits reads that contain Ns in their CIGAR string 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task SplitNCigarReads { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] intervals 16 | String unsafe 17 | Boolean ? doNotFixOverhangs 18 | Int ? maxBasesInOverhang 19 | Int ? maxMismatchesInOverhang 20 | Int ? maxReadsInMemory 21 | String ? out 22 | 23 | command { 24 | java -jar ${gatk} \ 25 | -T SplitNCigarReads \ 26 | -R ${ref} \ 27 | --input_file ${input_file} \ 28 | --intervals ${intervals} \ 29 | --unsafe ${unsafe} \ 30 | -doNotFixOverhangs ${default="false" doNotFixOverhangs} \ 31 | -maxOverhang ${default="40" maxBasesInOverhang} \ 32 | -maxMismatches ${default="1" maxMismatchesInOverhang} \ 33 | -maxInMemory ${default="150000" maxReadsInMemory} \ 34 | -o ${default="stdout" out} \ 35 | ${default="\n" userString} 36 | } 37 | 38 | output { 39 | #To track additional outputs from your task, please manually add them below 40 | String taskOut = "${out}" 41 | } 42 | 43 | runtime { 44 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 45 | } 46 | 47 | parameter_meta { 48 | gatk: "Executable jar for the GenomeAnalysisTK" 49 | ref: "fasta file of reference genome" 50 | refIndex: "Index file of reference genome" 51 | refDict: "dict file of reference genome" 52 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 53 | doNotFixOverhangs: "do not have the walker hard-clip overhanging sections of the reads" 54 | maxBasesInOverhang: "max number of bases allowed in the overhang" 55 | maxMismatchesInOverhang: "max number of mismatches allowed in the overhang" 56 | maxReadsInMemory: "max reads allowed to be kept in memory at a time by the BAM writer" 57 | out: "Write output to this BAM filename instead of STDOUT" 58 | input_file: "Input file containing sequence data (BAM or CRAM)" 59 | intervals: "One or more genomic intervals over which to operate" 60 | unsafe: "Enable unsafe operations: nothing will be checked at runtime" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/SplitSamFile_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This SplitSamFile WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Split a BAM file by sample 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task SplitSamFile { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] input_file 15 | Array[String] ? intervals 16 | String ? outputRoot 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T SplitSamFile \ 21 | -R ${ref} \ 22 | --input_file ${input_file} \ 23 | ${default="" "--intervals " + intervals} \ 24 | outputRoot ${default="" outputRoot} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | outputRoot: "output BAM file" 44 | input_file: "Input file containing sequence data (BAM or CRAM)" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/ValidateVariants_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This ValidateVariants WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Validate a VCF file with an extra strict set of criteria 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task ValidateVariants { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String ? dbsnp 16 | Boolean ? doNotValidateFilteredRecords 17 | Boolean ? validateGVCF 18 | Array[String] ? validationTypeToExclude 19 | String variant 20 | Boolean ? warnOnErrors 21 | 22 | command { 23 | java -jar ${gatk} \ 24 | -T ValidateVariants \ 25 | -R ${ref} \ 26 | ${default="" "--intervals " + intervals} \ 27 | ${default="" "-D " + dbsnp} \ 28 | -doNotValidateFilteredRecords ${default="false" doNotValidateFilteredRecords} \ 29 | -gvcf ${default="false" validateGVCF} \ 30 | -Xtype ${default="[]" validationTypeToExclude} \ 31 | -V ${variant} \ 32 | -warnOnErrors ${default="false" warnOnErrors} \ 33 | ${default="\n" userString} 34 | } 35 | 36 | output { 37 | #To track additional outputs from your task, please manually add them below 38 | String taskOut = "${out}" 39 | } 40 | 41 | runtime { 42 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 43 | } 44 | 45 | parameter_meta { 46 | gatk: "Executable jar for the GenomeAnalysisTK" 47 | ref: "fasta file of reference genome" 48 | refIndex: "Index file of reference genome" 49 | refDict: "dict file of reference genome" 50 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 51 | dbsnp: "dbSNP file" 52 | doNotValidateFilteredRecords: "skip validation on filtered records" 53 | validateGVCF: "Validate this file as a GVCF" 54 | validationTypeToExclude: "which validation type to exclude from a full strict validation" 55 | variant: "Input VCF file" 56 | warnOnErrors: "just emit warnings on errors instead of terminating the run at the first instance" 57 | intervals: "One or more genomic intervals over which to operate" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/VariantsToAllelicPrimitives_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This VariantsToAllelicPrimitives WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Simplify multi-nucleotide variants (MNPs) into more basic/primitive alleles. 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task VariantsToAllelicPrimitives { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String ? out 16 | String variant 17 | 18 | command { 19 | java -jar ${gatk} \ 20 | -T VariantsToAllelicPrimitives \ 21 | -R ${ref} \ 22 | ${default="" "--intervals " + intervals} \ 23 | -o ${default="stdout" out} \ 24 | -V ${variant} \ 25 | ${default="\n" userString} 26 | } 27 | 28 | output { 29 | #To track additional outputs from your task, please manually add them below 30 | String taskOut = "${out}" 31 | } 32 | 33 | runtime { 34 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 35 | } 36 | 37 | parameter_meta { 38 | gatk: "Executable jar for the GenomeAnalysisTK" 39 | ref: "fasta file of reference genome" 40 | refIndex: "Index file of reference genome" 41 | refDict: "dict file of reference genome" 42 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 43 | out: "File to which variants should be written" 44 | variant: "Input VCF file" 45 | intervals: "One or more genomic intervals over which to operate" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/VariantsToBinaryPed_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This VariantsToBinaryPed WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Convert VCF to binary pedigree file 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task VariantsToBinaryPed { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String bed 16 | String bim 17 | Boolean ? checkAlternateAlleles 18 | String ? dbsnp 19 | String fam 20 | Boolean ? majorAlleleFirst 21 | File metaData 22 | Int minGenotypeQuality 23 | String ? outputMode 24 | String variant 25 | 26 | command { 27 | java -jar ${gatk} \ 28 | -T VariantsToBinaryPed \ 29 | -R ${ref} \ 30 | ${default="" "--intervals " + intervals} \ 31 | -bed ${bed} \ 32 | -bim ${bim} \ 33 | checkAlternateAlleles ${default="false" checkAlternateAlleles} \ 34 | ${default="" "-D " + dbsnp} \ 35 | -fam ${fam} \ 36 | majorAlleleFirst ${default="false" majorAlleleFirst} \ 37 | -m ${metaData} \ 38 | -mgq ${minGenotypeQuality} \ 39 | -mode ${default="INDIVIDUAL_MAJOR" outputMode} \ 40 | -V ${variant} \ 41 | ${default="\n" userString} 42 | } 43 | 44 | output { 45 | #To track additional outputs from your task, please manually add them below 46 | String taskOut = "${out}" 47 | } 48 | 49 | runtime { 50 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 51 | } 52 | 53 | parameter_meta { 54 | gatk: "Executable jar for the GenomeAnalysisTK" 55 | ref: "fasta file of reference genome" 56 | refIndex: "Index file of reference genome" 57 | refDict: "dict file of reference genome" 58 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 59 | bed: "output bed file" 60 | bim: "output map file" 61 | checkAlternateAlleles: "Checks that alternate alleles actually appear in samples, erroring out if they do not" 62 | dbsnp: "dbSNP file" 63 | fam: "output fam file" 64 | majorAlleleFirst: "Sets the major allele to be 'reference' for the bim file, rather than the ref allele" 65 | metaData: "Sample metadata file" 66 | minGenotypeQuality: "If genotype quality is lower than this value, output NO_CALL" 67 | outputMode: "The output file mode (SNP major or individual major)" 68 | variant: "Input VCF file" 69 | intervals: "One or more genomic intervals over which to operate" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /examples/gatk_wrappers/WDLTasks_3.6/VariantsToVCF_3.6.wdl: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------------- 2 | # This VariantsToVCF WDL task was generated on 09/09/16 for use with GATK version 3.6 3 | # For more information on using this wrapper, please see the WDL repository at 4 | # https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md 5 | # Task Summary: Convert variants from other file formats to VCF format 6 | # -------------------------------------------------------------------------------------------- 7 | 8 | task VariantsToVCF { 9 | File gatk 10 | File ref 11 | File refIndex 12 | File refDict 13 | String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string 14 | Array[String] ? intervals 15 | String ? dbsnp 16 | String ? out 17 | String ? sample 18 | String variant 19 | 20 | command { 21 | java -jar ${gatk} \ 22 | -T VariantsToVCF \ 23 | -R ${ref} \ 24 | ${default="" "--intervals " + intervals} \ 25 | ${default="" "-D " + dbsnp} \ 26 | -o ${default="stdout" out} \ 27 | ${default="" "-sample " + sample} \ 28 | -V ${variant} \ 29 | ${default="\n" userString} 30 | } 31 | 32 | output { 33 | #To track additional outputs from your task, please manually add them below 34 | String taskOut = "${out}" 35 | } 36 | 37 | runtime { 38 | docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" 39 | } 40 | 41 | parameter_meta { 42 | gatk: "Executable jar for the GenomeAnalysisTK" 43 | ref: "fasta file of reference genome" 44 | refIndex: "Index file of reference genome" 45 | refDict: "dict file of reference genome" 46 | userString: "An optional parameter which allows the user to specify additions to the command line at run time" 47 | dbsnp: "dbSNP file" 48 | out: "File to which variants should be written" 49 | sample: "The sample name represented by the variant rod" 50 | variant: "Input variant file" 51 | intervals: "One or more genomic intervals over which to operate" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/jointCallingGenotypes/automatedCWL/GenotypeGVCFs.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. 0.2 3 | 4 | { 5 | "cwlVersion": "v1.0", 6 | "inputs": [ 7 | { 8 | "type": "File", 9 | "id": "GATK" 10 | }, 11 | { 12 | "type": "File", 13 | "id": "RefFasta" 14 | }, 15 | { 16 | "type": "File", 17 | "id": "RefIndex" 18 | }, 19 | { 20 | "type": "File", 21 | "id": "RefDict" 22 | }, 23 | { 24 | "type": "string", 25 | "id": "sampleName" 26 | }, 27 | { 28 | "type": "File[]", 29 | "id": "GVCFs" 30 | } 31 | ], 32 | "outputs": [ 33 | { 34 | "outputBinding": { 35 | "glob": "$(inputs.sampleName)_rawVariants.vcf" 36 | }, 37 | "type": "File", 38 | "id": "rawVCF" 39 | } 40 | ], 41 | "id": "GenotypeGVCFs", 42 | "requirements": [ 43 | { 44 | "class": "ShellCommandRequirement" 45 | }, 46 | { 47 | "class": "InlineJavascriptRequirement" 48 | } 49 | ], 50 | "arguments": [ 51 | { 52 | "shellQuote": false, 53 | "valueFrom": "${ var GVCFs_separated = ''; for (var i=0; i=1.1.0'], 11 | packages=find_packages(), 12 | package_data={'wdl2cwl': ['templates/*', 'expression-tools/*']}, 13 | include_package_data=True, 14 | dependency_links=['http://github.com/anton-khodak/pywdl/tarball/master#egg=wdl-1.1.0'], 15 | entry_points={ 16 | 'console_scripts': [ 17 | 'wdl2cwl=wdl2cwl.main:main' 18 | ] 19 | }, 20 | classifiers=[ 21 | 'Development Status :: 3 - Alpha', 22 | 'Operating System :: POSIX', 23 | 'Intended Audience :: Developers', 24 | 'Environment :: Console', 25 | 'License :: OSI Approved :: Apache Software License', 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /tests/primitive_workflows_wld.py: -------------------------------------------------------------------------------- 1 | 2 | wdl_code = """ 3 | task my_task { 4 | File file 5 | command { 6 | ./my_binary --input=${file} > results 7 | } 8 | output { 9 | File results = "results" 10 | } 11 | } 12 | 13 | workflow my_wf { 14 | call my_task 15 | } 16 | """ 17 | 18 | wdl_code2 = """ 19 | task my_task { 20 | command { 21 | true 22 | } 23 | } 24 | workflow test { 25 | Int a = (1 + 2) * 3 26 | call my_task { 27 | input: var=a*2, var2="file"+".txt" 28 | } 29 | } 30 | """ 31 | 32 | wdl_code3 = """ 33 | task ps { 34 | command { 35 | ps 36 | } 37 | output { 38 | File procs = stdout() 39 | } 40 | } 41 | 42 | task cgrep { 43 | String pattern 44 | File in_file 45 | command { 46 | grep '${pattern}' ${in_file} | wc -l 47 | } 48 | output { 49 | Int count = read_int(stdout()) 50 | } 51 | } 52 | 53 | task wc { 54 | File in_file 55 | command { 56 | cat ${in_file} | wc -l 57 | } 58 | output { 59 | Int count = read_int(stdout()) 60 | } 61 | } 62 | 63 | workflow three_step { 64 | call ps 65 | call cgrep { 66 | input: in_file=ps.procs 67 | } 68 | call wc { 69 | input: in_file=ps.procs 70 | } 71 | } 72 | """ -------------------------------------------------------------------------------- /tests/test-data/ctask.wdl: -------------------------------------------------------------------------------- 1 | task wc2_tool { 2 | File file1 3 | command { 4 | cat ${file1} | wc -w 5 | } 6 | output { 7 | Int count = read_int(stdout()) 8 | } 9 | } 10 | 11 | workflow count_lines4_wf { 12 | File inputSamplesFile 13 | Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) 14 | Array[File] files = inputSamples[0] 15 | scatter(f in files) { 16 | call wc2_tool { 17 | input: file1=f, 18 | RefFasta=f 19 | } 20 | } 21 | output { 22 | wc2_tool.count 23 | } 24 | } -------------------------------------------------------------------------------- /tests/test-data/scatter.wdl: -------------------------------------------------------------------------------- 1 | task inc { 2 | Int i 3 | 4 | command <<< 5 | python -c "print(${i} + 1)" 6 | >>> 7 | 8 | output { 9 | Int incremented = read_int(stdout()) 10 | } 11 | } 12 | 13 | task sum { 14 | Array[Int] ints 15 | 16 | command <<< 17 | python -c "print(${sep="+" ints})" 18 | >>> 19 | 20 | output { 21 | Int sum = read_int(stdout()) 22 | } 23 | } 24 | 25 | workflow wf { 26 | Array[Int] integers = [1,2,3,4,5] 27 | scatter(i in integers) { 28 | call inc {input: i=i} 29 | call inc as inc2 {input: i=inc.incremented} 30 | } 31 | call sum {input: ints = inc2.incremented} 32 | } 33 | -------------------------------------------------------------------------------- /tests/test-data/task.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.0 2 | class: Workflow 3 | requirements: 4 | - class: ScatterFeatureRequirement 5 | inputs: 6 | inputSamplesFiles: 7 | type: 8 | type: array 9 | items: File 10 | 11 | outputs: 12 | count: 13 | outputSource: wc/count 14 | type: 15 | type: array 16 | items: int 17 | 18 | 19 | steps: 20 | wc: 21 | run: wc.cwl 22 | scatter: file1 23 | in: 24 | file1: inputSamplesFiles 25 | out: [count] -------------------------------------------------------------------------------- /tests/test-data/task.wdl: -------------------------------------------------------------------------------- 1 | task wc2_tool { 2 | File file1 3 | command { 4 | wc ${file1} 5 | } 6 | output { 7 | Int count = read_int(stdout()) 8 | } 9 | } 10 | 11 | workflow count_lines4_wf { 12 | Array[File] files 13 | scatter(f in files) { 14 | call wc2_tool { 15 | input: file1=f, 16 | RefFasta=f 17 | } 18 | } 19 | output { 20 | wc2_tool.count 21 | } 22 | } -------------------------------------------------------------------------------- /tests/test-data/wc.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.0 2 | class: CommandLineTool 3 | arguments: 4 | - shellQuote: false 5 | valueFrom: "cat $(inputs.file1.path) | wc -w" 6 | requirements: 7 | - class: ShellCommandRequirement 8 | - class: InlineJavascriptRequirement 9 | 10 | baseCommand: [] 11 | stdout: "count" 12 | 13 | inputs: 14 | file1: 15 | type: File 16 | 17 | outputs: 18 | count: 19 | type: int 20 | outputBinding: 21 | glob: "count" 22 | loadContents: true 23 | outputEval: "$(parseInt(self[0].contents))" -------------------------------------------------------------------------------- /tests/test-data/wftask.json: -------------------------------------------------------------------------------- 1 | { 2 | "inputSamplesFiles": [ 3 | { 4 | "class": "File", 5 | "path": "/media/anton/ECFA959BFA95631E/Programming/experiments/inputSamples.txt" 6 | }, 7 | { 8 | "class": "File", 9 | "path": "/media/anton/ECFA959BFA95631E/Programming/experiments/example.txt" 10 | }, 11 | { 12 | "class": "File", 13 | "path": "/media/anton/ECFA959BFA95631E/Programming/experiments/example.txt" 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import unittest 4 | 5 | 6 | class ValidateTestCase(unittest.TestCase): 7 | test_dir = "results/" 8 | 9 | def test_tools_general(self): 10 | for toolkit in os.listdir(self.test_dir): 11 | for tool in os.listdir(self.test_dir+toolkit): 12 | # TODO: rewrite universally 13 | path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '{0}{1}/{2}'.format(self.test_dir, toolkit, tool)) 14 | try: 15 | result = subprocess.check_output(['/usr/local/bin/cwltool', '--validate', '{0}'.format(path)]) 16 | except subprocess.CalledProcessError as e: 17 | print(e.output) 18 | self.fail("Validation failed") 19 | -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.2.1' -------------------------------------------------------------------------------- /wdl2cwl/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.2' -------------------------------------------------------------------------------- /wdl2cwl/expression-tools/read_tsv.cwl: -------------------------------------------------------------------------------- 1 | #/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: ExpressionTool 4 | 5 | requirements: 6 | - class: InlineJavascriptRequirement 7 | 8 | inputs: 9 | infile: 10 | type: File 11 | inputBinding: 12 | loadContents: true 13 | 14 | outputs: 15 | outputArray: 16 | type: Any 17 | 18 | expression: "${var lines = inputs.infile.contents.split('\\n'); 19 | var nblines = lines.length; 20 | var arrayofarrays = []; 21 | for (var i = 0; i < nblines; i++) { 22 | var line = lines[i].split('\t'); 23 | 24 | for (var j=0; j < line.length; j++){ 25 | if (line[j].startsWith('/')){ 26 | line[j] = 27 | { 28 | 'class': 'File', 29 | 'location': 'file://'+ line[j] 30 | }; 31 | } 32 | } 33 | arrayofarrays.push(line); 34 | } 35 | return {'outputArray': arrayofarrays } ; 36 | }" -------------------------------------------------------------------------------- /wdl2cwl/templates/cwltool.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | # This tool description was generated automatically by wdl2cwl ver. {{ version }} 3 | 4 | {{ code }} 5 | --------------------------------------------------------------------------------