├── .gitignore ├── HCC2218-sv ├── HCC2218-sv-workflow │ ├── main-HCC2218-sv-samples.json │ ├── main-HCC2218-sv.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_sv.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── calculate_sv_bins.cwl │ │ ├── calculate_sv_coverage.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── detect_sv.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── normalize_sv_coverage.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_sv.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ ├── wf-svcall.cwl │ └── wf-variantcall.cwl ├── HCC2218-sv.yaml ├── bcbio_system.yaml ├── download_data.sh ├── run_cromwell.sh └── run_generate_cwl.sh ├── LICENSE.txt ├── NA12878-chr20 ├── NA12878-platinum-chr20-wdl │ ├── alignment.wdl │ ├── main_NA12878_platinum_chr20.wdl │ └── variantcall.wdl ├── NA12878-platinum-chr20-workflow-arvados │ ├── main-NA12878-platinum-chr20-samples.json │ ├── main-NA12878-platinum-chr20.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_grading_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── NA12878-platinum-chr20-workflow-s3 │ └── main-NA12878-platinum-chr20-samples.json ├── NA12878-platinum-chr20-workflow │ ├── main-NA12878-platinum-chr20-samples.json │ ├── main-NA12878-platinum-chr20.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── NA12878-platinum-chr20.csv ├── bcbio_system.yaml ├── bcbio_system_arvados.yaml ├── bcbio_system_s3.yaml ├── download_data.sh ├── ga4gh_execution_challenge │ ├── bcbio_NA12878-chr20_checker.json │ ├── bcbio_NA12878-chr20_submit.json │ ├── grading-summary-NA12878-chr20-baseline.csv │ ├── link_cwl.sh │ ├── link_to_synapse.py │ └── upload_biodata.sh ├── germline_template.yaml ├── run_bunny.sh ├── run_cwltool.sh ├── run_generate_cwl.sh ├── run_generate_cwl_arvados.sh ├── run_generate_cwl_s3.sh ├── run_generate_wdl.sh ├── run_toil.sh └── run_toil_aws.sh ├── NA24385-sv ├── NA24385-sv-workflow │ ├── main-NA24385-sv-samples.json │ ├── main-NA24385-sv.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_sv.cwl │ │ ├── calculate_sv_bins.cwl │ │ ├── calculate_sv_coverage.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── detect_sv.cwl │ │ ├── multiqc_summary.cwl │ │ ├── normalize_sv_coverage.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ └── summarize_sv.cwl │ ├── wf-alignment.cwl │ └── wf-svcall.cwl ├── NA24385-sv.yaml ├── bcbio_system.yaml ├── download_data.sh ├── run_bunny.sh ├── run_cromwell.sh ├── run_generate_cwl.sh └── run_toil.sh ├── README.md ├── SGDP-recall-CGC ├── SGDP-recall-cgc │ ├── main-SGDP-recall-samples.json │ ├── main-SGDP-recall.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_grading_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── SGDP-recall-standalone.cwl ├── SGDP-recall.csv ├── SGDP-recall │ └── config │ │ ├── SGDP-recall-template.yaml │ │ ├── SGDP-recall.csv │ │ └── SGDP-recall.yaml ├── bcbio_system_cgc.yaml ├── germline_template.yaml ├── run_generate_cwl_cgc.sh ├── run_upload_cgc.sh └── upload_to_cgc.py ├── giab-chm ├── README.md ├── bcbio_system-local.yaml ├── download_data.sh ├── germline-template.yaml ├── giab-chm-workflow │ ├── main-giab-chm-samples.json │ ├── main-giab-chm.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── giab-chm.csv ├── prep_chm_truth.py ├── prep_chm_truth.sh ├── run_bunny.sh └── run_generate_cwl.sh ├── giab-exome ├── bcbio_system-local.yaml ├── download_data.sh ├── germline-template.yaml ├── giab-exome-workflow │ ├── main-giab-exome-samples.json │ ├── main-giab-exome.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── giab-exome.csv ├── input │ └── get_data.sh ├── run_cromwell.sh └── run_generate_cwl.sh ├── giab-joint ├── arvados │ ├── bcbio_system-arvados.yaml │ ├── run_arvados.sh │ └── run_generate_cwl.sh ├── bcbio_system.yaml ├── dnanexus │ ├── bcbio_system-dnanexus.yaml │ ├── run_compile.sh │ ├── run_dnanexus.sh │ └── run_generate_cwl.sh ├── dnanexus_single │ ├── README.txt │ ├── bcbio_system-dnanexus.yaml │ ├── germline-template.yaml │ ├── giab-single-bwa.csv │ ├── giab-single-dragen.csv │ └── run_generate_cwl.sh ├── download_data.sh ├── ga4gh_execution_challenge │ ├── bcbio-giab-joint_checker.json │ ├── bcbio-giab-joint_submit.json │ ├── grading-summary-gj1-baseline.csv │ ├── link_cwl.sh │ ├── link_to_synapse.py │ └── upload_biodata.sh ├── giab-joint-workflow │ ├── main-giab-joint-samples.json │ ├── main-giab-joint.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_jointvc.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── concat_batch_variantcalls_jointvc.cwl │ │ ├── finalize_jointvc.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── get_parallel_regions_jointvc.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── run_jointvc.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ ├── wf-jointcall.cwl │ └── wf-variantcall.cwl ├── giab-joint.csv ├── joint-template.yaml ├── run_bunny.sh ├── run_generate_cwl.sh └── run_toil.sh ├── pgp ├── README.md ├── bcbio_system-arvados.yaml ├── pgp_sv_hla.yaml ├── run_arvados.sh ├── run_generate_cwl.sh └── scripts │ └── extract_veritas_pgp.py ├── somatic-giab-mix ├── bcbio_system.yaml ├── download_data.sh ├── run_bunny.sh ├── run_generate_cwl.sh ├── run_toil.sh ├── somatic-giab-mix-workflow │ ├── main-somatic-giab-mix-samples.json │ ├── main-somatic-giab-mix.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── somatic-giab-mix.csv └── somatic-template.yaml ├── somatic-lowfreq ├── bcbio_system.yaml ├── download_data.sh ├── pisces-ras-workflow │ ├── main-pisces-ras-samples.json │ ├── main-pisces-ras.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── pisces-ras.yaml ├── pisces-titr-workflow │ ├── main-pisces-titr-samples.json │ ├── main-pisces-titr.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl ├── pisces-titr.yaml ├── prepare_inputs │ ├── clean_truth_sets.py │ ├── prepare_bcbio_inputs.py │ ├── prepare_data.sh │ └── smcounter2 │ │ ├── convert_truth_vcf.py │ │ ├── fix_bam_umis.py │ │ └── prepare_data.sh ├── run_bunny.sh ├── run_cromwell.sh ├── run_generate_cwl.sh ├── smcounter2-umi-workflow │ ├── main-smcounter2-umi-samples.json │ ├── main-smcounter2-umi.cwl │ ├── steps │ │ ├── alignment_to_rec.cwl │ │ ├── batch_for_variantcall.cwl │ │ ├── combine_sample_regions.cwl │ │ ├── compare_to_rm.cwl │ │ ├── concat_batch_variantcalls.cwl │ │ ├── get_parallel_regions.cwl │ │ ├── merge_split_alignments.cwl │ │ ├── multiqc_summary.cwl │ │ ├── pipeline_summary.cwl │ │ ├── postprocess_alignment.cwl │ │ ├── postprocess_alignment_to_rec.cwl │ │ ├── postprocess_variants.cwl │ │ ├── prep_align_inputs.cwl │ │ ├── prep_samples.cwl │ │ ├── prep_samples_to_rec.cwl │ │ ├── process_alignment.cwl │ │ ├── qc_to_rec.cwl │ │ ├── summarize_vc.cwl │ │ └── variantcall_batch_region.cwl │ ├── wf-alignment.cwl │ └── wf-variantcall.cwl └── smcounter2-umi.yaml └── wes-agha-test ├── README.md ├── bcbio-validation-checker ├── Dockstore.cwl ├── checker-workflow-wrapper.json └── grading-summary-combined-expected.csv ├── bcbio_system-arvados.yaml ├── bcbio_system-gcp.yaml ├── checker-workflow-wrapping-tool.cwl ├── run_arvados.sh ├── run_cromwell.sh ├── run_generate_cwl_arvados.sh ├── run_generate_cwl_gcp.sh ├── wes_chr21_test-template.yaml ├── wes_chr21_test-workflow-arvados ├── main-wes_chr21_test-samples.json ├── main-wes_chr21_test.cwl ├── steps │ ├── alignment_to_rec.cwl │ ├── batch_for_ensemble.cwl │ ├── batch_for_sv.cwl │ ├── batch_for_variantcall.cwl │ ├── calculate_sv_bins.cwl │ ├── calculate_sv_coverage.cwl │ ├── combine_calls.cwl │ ├── combine_sample_regions.cwl │ ├── compare_to_rm.cwl │ ├── concat_batch_variantcalls.cwl │ ├── detect_sv.cwl │ ├── get_parallel_regions.cwl │ ├── merge_split_alignments.cwl │ ├── multiqc_summary.cwl │ ├── normalize_sv_coverage.cwl │ ├── pipeline_summary.cwl │ ├── postprocess_alignment.cwl │ ├── postprocess_alignment_to_rec.cwl │ ├── postprocess_variants.cwl │ ├── prep_align_inputs.cwl │ ├── prep_samples.cwl │ ├── prep_samples_to_rec.cwl │ ├── process_alignment.cwl │ ├── qc_to_rec.cwl │ ├── summarize_sv.cwl │ ├── summarize_vc.cwl │ └── variantcall_batch_region.cwl ├── wf-alignment.cwl ├── wf-svcall.cwl └── wf-variantcall.cwl ├── wes_chr21_test-workflow-gcp ├── main-wes_chr21_test-samples.json ├── main-wes_chr21_test.cwl ├── steps │ ├── alignment_to_rec.cwl │ ├── batch_for_ensemble.cwl │ ├── batch_for_sv.cwl │ ├── batch_for_variantcall.cwl │ ├── calculate_sv_bins.cwl │ ├── calculate_sv_coverage.cwl │ ├── combine_calls.cwl │ ├── combine_sample_regions.cwl │ ├── compare_to_rm.cwl │ ├── concat_batch_variantcalls.cwl │ ├── detect_sv.cwl │ ├── get_parallel_regions.cwl │ ├── merge_split_alignments.cwl │ ├── multiqc_summary.cwl │ ├── normalize_sv_coverage.cwl │ ├── pipeline_summary.cwl │ ├── postprocess_alignment.cwl │ ├── postprocess_alignment_to_rec.cwl │ ├── postprocess_variants.cwl │ ├── prep_align_inputs.cwl │ ├── prep_samples.cwl │ ├── prep_samples_to_rec.cwl │ ├── process_alignment.cwl │ ├── qc_to_rec.cwl │ ├── summarize_sv.cwl │ ├── summarize_vc.cwl │ └── variantcall_batch_region.cwl ├── wf-alignment.cwl ├── wf-svcall.cwl └── wf-variantcall.cwl └── wes_chr21_test.csv /.gitignore: -------------------------------------------------------------------------------- 1 | */bcbiotx 2 | NA12878-chr20/NA12878-platinum-chr20/ 3 | SGDP-recall-CGC/SGDP-recall/config/SGDP-recall.yaml.bak* 4 | giab-joint/giab-joint/ 5 | giab-chm/giab-chm/ 6 | somatic-giab-mix/somatic-giab-mix/ 7 | */bunny_work* 8 | */cwltoil_work 9 | */cromwell_work* 10 | */biodata 11 | -------------------------------------------------------------------------------- /HCC2218-sv/HCC2218-sv-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | - run_number=0 11 | baseCommand: 12 | - bcbio_nextgen.py 13 | - runfn 14 | - combine_sample_regions 15 | - cwl 16 | class: CommandLineTool 17 | cwlVersion: v1.0 18 | hints: 19 | - class: DockerRequirement 20 | dockerImageId: quay.io/bcbio/bcbio-vc 21 | dockerPull: quay.io/bcbio/bcbio-vc 22 | - class: ResourceRequirement 23 | coresMin: 1 24 | outdirMin: 6403 25 | ramMin: 3584 26 | tmpdirMin: 2690 27 | - class: dx:InputResourceRequirement 28 | indirMin: 3008 29 | - class: SoftwareRequirement 30 | packages: 31 | - package: bedtools 32 | specs: 33 | - https://anaconda.org/bioconda/bedtools 34 | - package: htslib 35 | specs: 36 | - https://anaconda.org/bioconda/htslib 37 | - package: gatk4 38 | specs: 39 | - https://anaconda.org/bioconda/gatk4 40 | - class: arv:APIRequirement 41 | inputs: 42 | - id: regions__callable 43 | type: 44 | items: 45 | - File 46 | - 'null' 47 | type: array 48 | - id: regions__nblock 49 | type: 50 | items: 51 | - File 52 | - 'null' 53 | type: array 54 | - id: metadata__batch 55 | type: 56 | items: string 57 | type: array 58 | - id: config__algorithm__nomap_split_size 59 | type: 60 | items: long 61 | type: array 62 | - id: config__algorithm__nomap_split_targets 63 | type: 64 | items: long 65 | type: array 66 | - id: reference__fasta__base 67 | secondaryFiles: 68 | - .fai 69 | - ^.dict 70 | type: 71 | items: File 72 | type: array 73 | - id: resources 74 | type: 75 | items: string 76 | type: array 77 | - id: description 78 | type: 79 | items: string 80 | type: array 81 | outputs: 82 | - id: config__algorithm__callable_regions 83 | type: 84 | items: File 85 | type: array 86 | - id: config__algorithm__non_callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__callable_count 91 | type: 92 | items: int 93 | type: array 94 | requirements: 95 | - class: InlineJavascriptRequirement 96 | - class: InitialWorkDirRequirement 97 | listing: 98 | - entry: $(JSON.stringify(inputs)) 99 | entryname: cwl.inputs.json 100 | -------------------------------------------------------------------------------- /HCC2218-sv/HCC2218-sv-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - multiqc_summary 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 22540 24 | ramMin: 3584 25 | tmpdirMin: 10758 26 | - class: dx:InputResourceRequirement 27 | indirMin: 1 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: multiqc 31 | specs: 32 | - https://anaconda.org/bioconda/multiqc 33 | - package: multiqc-bcbio 34 | specs: 35 | - https://anaconda.org/bioconda/multiqc-bcbio 36 | inputs: 37 | - id: qcout_rec 38 | type: 39 | items: 40 | fields: 41 | - name: summary__qc 42 | type: 43 | - File 44 | - 'null' 45 | - name: summary__metrics 46 | type: 47 | - string 48 | - 'null' 49 | - name: description 50 | type: string 51 | - name: genome_build 52 | type: string 53 | - name: config__algorithm__tools_off 54 | type: 55 | - 'null' 56 | - string 57 | - items: 58 | - 'null' 59 | - string 60 | type: array 61 | - name: config__algorithm__qc 62 | type: 63 | items: string 64 | type: array 65 | - name: config__algorithm__tools_on 66 | type: 67 | - 'null' 68 | - string 69 | - items: 70 | - 'null' 71 | - string 72 | type: array 73 | name: qcout_rec 74 | type: record 75 | type: array 76 | outputs: 77 | - id: summary__multiqc 78 | type: 79 | items: 80 | - File 81 | - 'null' 82 | type: array 83 | requirements: 84 | - class: InlineJavascriptRequirement 85 | - class: InitialWorkDirRequirement 86 | listing: 87 | - entry: $(JSON.stringify(inputs)) 88 | entryname: cwl.inputs.json 89 | -------------------------------------------------------------------------------- /HCC2218-sv/HCC2218-sv-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=rgnames__sample,config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 6403 24 | ramMin: 3584 25 | tmpdirMin: 2690 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3012 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: htslib 31 | specs: 32 | - https://anaconda.org/bioconda/htslib 33 | - package: bedtools 34 | specs: 35 | - https://anaconda.org/bioconda/bedtools 36 | - package: pythonpy 37 | specs: 38 | - https://anaconda.org/bioconda/pythonpy 39 | inputs: 40 | - id: prep_samples_rec 41 | type: 42 | fields: 43 | - name: resources 44 | type: string 45 | - name: description 46 | type: string 47 | - name: reference__fasta__base 48 | type: File 49 | - name: rgnames__sample 50 | type: string 51 | - name: config__algorithm__variant_regions 52 | type: File 53 | name: prep_samples_rec 54 | type: record 55 | outputs: 56 | - id: rgnames__sample 57 | type: string 58 | - id: config__algorithm__variant_regions 59 | type: 60 | - File 61 | - 'null' 62 | - id: config__algorithm__variant_regions_merged 63 | type: 64 | - File 65 | - 'null' 66 | - id: config__algorithm__variant_regions_orig 67 | type: 68 | - File 69 | - 'null' 70 | - id: config__algorithm__coverage 71 | type: 72 | - File 73 | - 'null' 74 | - id: config__algorithm__coverage_merged 75 | type: 76 | - File 77 | - 'null' 78 | - id: config__algorithm__coverage_orig 79 | type: 80 | - File 81 | - 'null' 82 | - id: config__algorithm__seq2c_bed_ready 83 | type: 84 | - File 85 | - 'null' 86 | requirements: 87 | - class: InlineJavascriptRequirement 88 | - class: InitialWorkDirRequirement 89 | listing: 90 | - entry: $(JSON.stringify(inputs)) 91 | entryname: cwl.inputs.json 92 | -------------------------------------------------------------------------------- /HCC2218-sv/HCC2218-sv-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;rgnames__sample;config__algorithm__variant_regions 8 | - sentinel_inputs=rgnames__sample:var,config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples_to_rec 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 6403 24 | ramMin: 3584 25 | tmpdirMin: 2690 26 | - class: dx:InputResourceRequirement 27 | indirMin: 0 28 | inputs: 29 | - id: rgnames__sample 30 | type: 31 | items: string 32 | type: array 33 | - id: config__algorithm__variant_regions 34 | type: 35 | items: File 36 | type: array 37 | - id: reference__fasta__base 38 | secondaryFiles: 39 | - .fai 40 | - ^.dict 41 | type: 42 | items: File 43 | type: array 44 | - id: resources 45 | type: 46 | items: string 47 | type: array 48 | - id: description 49 | type: 50 | items: string 51 | type: array 52 | outputs: 53 | - id: prep_samples_rec 54 | type: 55 | items: 56 | fields: 57 | - name: resources 58 | type: string 59 | - name: description 60 | type: string 61 | - name: reference__fasta__base 62 | type: File 63 | - name: rgnames__sample 64 | type: string 65 | - name: config__algorithm__variant_regions 66 | type: File 67 | name: prep_samples_rec 68 | type: record 69 | type: array 70 | requirements: 71 | - class: InlineJavascriptRequirement 72 | - class: InitialWorkDirRequirement 73 | listing: 74 | - entry: $(JSON.stringify(inputs)) 75 | entryname: cwl.inputs.json 76 | -------------------------------------------------------------------------------- /HCC2218-sv/HCC2218-sv.yaml: -------------------------------------------------------------------------------- 1 | # Somatic deletion/duplication for HCC2218 breast cancer cell lines 2 | # https://github.com/Illumina/Canvas#demo-tumor-normal-enrichment-workflow 3 | --- 4 | details: 5 | - files: HCC2218BL_S1.bam 6 | description: HCC2218BL 7 | metadata: 8 | batch: HCC2218 9 | phenotype: normal 10 | analysis: variant2 11 | genome_build: GRCh37 12 | algorithm: 13 | aligner: bwa 14 | variantcaller: 15 | germline: [gatk-haplotype] 16 | somatic: [vardict] 17 | svcaller: [cnvkit, manta, delly, lumpy, wham, titancna] 18 | variant_regions: NexteraRapidCapture_Exome_TargetedRegions_v1.2Used.bed 19 | - files: HCC2218C_S1.bam 20 | description: HCC2218C 21 | metadata: 22 | batch: HCC2218 23 | phenotype: tumor 24 | analysis: variant2 25 | genome_build: GRCh37 26 | algorithm: 27 | aligner: bwa 28 | variantcaller: 29 | germline: [gatk-haplotype] 30 | somatic: [vardict] 31 | svcaller: [cnvkit, manta, delly, lumpy, wham, titancna] 32 | svvalidate: HCC2218Truth-clean-prep.vcf.gz 33 | variant_regions: NexteraRapidCapture_Exome_TargetedRegions_v1.2Used.bed 34 | -------------------------------------------------------------------------------- /HCC2218-sv/bcbio_system.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/HCC2218 5 | resources: 6 | default: {cores: 8, memory: 3500M, jvm_opts: [-Xms750m, -Xmx3500m]} 7 | -------------------------------------------------------------------------------- /HCC2218-sv/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # GRCh37 6 | mkdir -p biodata/collections/GRCh37 7 | cd biodata/collections/GRCh37 8 | synapse get -r syn12027897 9 | cd $ORIG_DIR 10 | # HCC2218 samples 11 | mkdir -p biodata/HCC2218 12 | cd biodata/HCC2218 13 | synapse get -r syn12977324 14 | cd $ORIG_DIR 15 | # cleanup synapse files 16 | cd biodata 17 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 18 | cd $ORIG_DIR 19 | -------------------------------------------------------------------------------- /HCC2218-sv/run_cromwell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=HCC2218-sv 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun cromwell -s htcondor --no-container $PNAME-workflow 8 | -------------------------------------------------------------------------------- /HCC2218-sv/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=HCC2218-sv 5 | 6 | CWD=`pwd` 7 | rm -rf $PNAME-workflow 8 | bcbio_vm.py cwl --systemconfig bcbio_system.yaml $PNAME.yaml 9 | sed -i "s#$CWD/biodata/#../biodata/#" $PNAME-workflow/main-$PNAME-samples.json 10 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 bcbio-nextgen contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 6 | - sentinel_inputs=regions__callable:var,regions__nblock:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,description:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - combine_sample_regions 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-align 17 | dockerPull: quay.io/bcbio/bcbio-align 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 1024 21 | ramMin: 3072 22 | - class: SoftwareRequirement 23 | packages: 24 | - package: bedtools 25 | specs: 26 | - https://anaconda.org/bioconda/bedtools 27 | - package: htslib 28 | specs: 29 | - https://anaconda.org/bioconda/htslib 30 | inputs: 31 | - id: regions__callable 32 | type: 33 | items: File 34 | type: array 35 | - id: regions__nblock 36 | type: 37 | items: File 38 | type: array 39 | - id: config__algorithm__nomap_split_size 40 | type: 41 | items: long 42 | type: array 43 | - id: config__algorithm__nomap_split_targets 44 | type: 45 | items: long 46 | type: array 47 | - id: reference__fasta__base 48 | type: 49 | items: File 50 | type: array 51 | - id: description 52 | type: 53 | items: string 54 | type: array 55 | outputs: 56 | - id: config__algorithm__callable_regions 57 | type: 58 | items: File 59 | type: array 60 | - id: config__algorithm__non_callable_regions 61 | type: 62 | items: File 63 | type: array 64 | - id: config__algorithm__callable_count 65 | type: 66 | items: int 67 | type: array 68 | requirements: 69 | - class: InlineJavascriptRequirement 70 | - class: InitialWorkDirRequirement 71 | listing: 72 | - entry: $(JSON.stringify(inputs)) 73 | entryname: cwl.inputs.json 74 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/get_parallel_regions.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=batch-split 5 | - sentinel_outputs=region 6 | - sentinel_inputs=batch_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - get_parallel_regions 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-base 17 | dockerPull: quay.io/bcbio/bcbio-base 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 1024 21 | ramMin: 3072 22 | inputs: 23 | - id: batch_rec 24 | type: 25 | items: 26 | fields: 27 | - name: description 28 | type: string 29 | - name: config__algorithm__validate 30 | type: File 31 | - name: reference__fasta__base 32 | type: File 33 | - name: config__algorithm__variantcaller 34 | type: string 35 | - name: config__algorithm__coverage_interval 36 | type: string 37 | - name: metadata__batch 38 | type: 'null' 39 | - name: config__algorithm__validate_regions 40 | type: File 41 | - name: genome_build 42 | type: string 43 | - name: metadata__phenotype 44 | type: string 45 | - name: config__algorithm__tools_off 46 | type: 47 | items: string 48 | type: array 49 | - name: genome_resources__variation__dbsnp 50 | type: File 51 | - name: genome_resources__variation__cosmic 52 | type: 'null' 53 | - name: reference__genome_context 54 | type: 55 | items: File 56 | type: array 57 | - name: analysis 58 | type: string 59 | - name: config__algorithm__tools_on 60 | type: 61 | items: string 62 | type: array 63 | - name: config__algorithm__variant_regions 64 | type: File 65 | - name: reference__rtg 66 | type: File 67 | - name: align_bam 68 | type: File 69 | - name: regions__sample_callable 70 | type: File 71 | - name: config__algorithm__callable_regions 72 | type: File 73 | name: batch_rec 74 | type: record 75 | type: array 76 | outputs: 77 | - id: region 78 | type: 79 | items: string 80 | type: array 81 | requirements: 82 | - class: InlineJavascriptRequirement 83 | - class: InitialWorkDirRequirement 84 | listing: 85 | - entry: $(JSON.stringify(inputs)) 86 | entryname: cwl.inputs.json 87 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=summary__multiqc 6 | - sentinel_inputs=qcout_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - multiqc_summary 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-qc 17 | dockerPull: quay.io/bcbio/bcbio-qc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 1024 21 | ramMin: 3072 22 | - class: SoftwareRequirement 23 | packages: 24 | - package: multiqc 25 | specs: 26 | - https://anaconda.org/bioconda/multiqc 27 | - package: multiqc-bcbio 28 | specs: 29 | - https://anaconda.org/bioconda/multiqc-bcbio 30 | inputs: 31 | - id: qcout_rec 32 | type: 33 | items: 34 | fields: 35 | - name: summary__qc 36 | type: 37 | - File 38 | - 'null' 39 | - name: summary__metrics 40 | type: string 41 | - name: description 42 | type: string 43 | - name: reference__fasta__base 44 | type: File 45 | - name: config__algorithm__coverage_interval 46 | type: string 47 | - name: genome_build 48 | type: string 49 | - name: config__algorithm__tools_off 50 | type: 51 | items: string 52 | type: array 53 | - name: config__algorithm__qc 54 | type: 55 | items: string 56 | type: array 57 | - name: analysis 58 | type: string 59 | - name: config__algorithm__tools_on 60 | type: 61 | items: string 62 | type: array 63 | - name: config__algorithm__variant_regions 64 | type: File 65 | - name: align_bam 66 | type: File 67 | - name: config__algorithm__variant_regions_merged 68 | type: File 69 | - name: config__algorithm__coverage 70 | type: 71 | - File 72 | - 'null' 73 | - name: config__algorithm__coverage_merged 74 | type: 75 | - File 76 | - 'null' 77 | name: qcout_rec 78 | type: record 79 | type: array 80 | outputs: 81 | - id: summary__multiqc 82 | type: 83 | items: 84 | - File 85 | - 'null' 86 | type: array 87 | requirements: 88 | - class: InlineJavascriptRequirement 89 | - class: InitialWorkDirRequirement 90 | listing: 91 | - entry: $(JSON.stringify(inputs)) 92 | entryname: cwl.inputs.json 93 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/postprocess_variants.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=batch-single 5 | - sentinel_outputs=vrn_file 6 | - sentinel_inputs=batch_rec:record,vrn_file:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - postprocess_variants 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 8 20 | outdirMin: 1024 21 | ramMin: 24576 22 | - class: SoftwareRequirement 23 | packages: 24 | - package: snpeff 25 | specs: 26 | - https://anaconda.org/bioconda/snpeff 27 | version: 28 | - 4.3i 29 | inputs: 30 | - id: batch_rec 31 | type: 32 | items: 33 | fields: 34 | - name: description 35 | type: string 36 | - name: config__algorithm__validate 37 | type: File 38 | - name: reference__fasta__base 39 | type: File 40 | - name: config__algorithm__variantcaller 41 | type: string 42 | - name: config__algorithm__coverage_interval 43 | type: string 44 | - name: metadata__batch 45 | type: 'null' 46 | - name: config__algorithm__validate_regions 47 | type: File 48 | - name: genome_build 49 | type: string 50 | - name: metadata__phenotype 51 | type: string 52 | - name: config__algorithm__tools_off 53 | type: 54 | items: string 55 | type: array 56 | - name: genome_resources__variation__dbsnp 57 | type: File 58 | - name: genome_resources__variation__cosmic 59 | type: 'null' 60 | - name: reference__genome_context 61 | type: 62 | items: File 63 | type: array 64 | - name: analysis 65 | type: string 66 | - name: config__algorithm__tools_on 67 | type: 68 | items: string 69 | type: array 70 | - name: config__algorithm__variant_regions 71 | type: File 72 | - name: reference__rtg 73 | type: File 74 | - name: align_bam 75 | type: File 76 | - name: regions__sample_callable 77 | type: File 78 | - name: config__algorithm__callable_regions 79 | type: File 80 | name: batch_rec 81 | type: record 82 | type: array 83 | - id: vrn_file_toolinput 84 | secondaryFiles: 85 | - .tbi 86 | type: File 87 | outputs: 88 | - id: vrn_file 89 | secondaryFiles: 90 | - .tbi 91 | type: File 92 | requirements: 93 | - class: InlineJavascriptRequirement 94 | - class: InitialWorkDirRequirement 95 | listing: 96 | - entry: $(JSON.stringify(inputs)) 97 | entryname: cwl.inputs.json 98 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/prep_align_inputs.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=single-split 5 | - sentinel_outputs=process_alignment_rec:files;config__algorithm__quality_format;align_split 6 | - sentinel_inputs=alignment_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_align_inputs 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-align 17 | dockerPull: quay.io/bcbio/bcbio-align 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 5244 21 | ramMin: 3072 22 | tmpdirMin: 4220 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: grabix 26 | specs: 27 | - https://anaconda.org/bioconda/grabix 28 | - package: htslib 29 | specs: 30 | - https://anaconda.org/bioconda/htslib 31 | - package: biobambam 32 | specs: 33 | - https://anaconda.org/bioconda/biobambam 34 | inputs: 35 | - id: alignment_rec 36 | type: 37 | fields: 38 | - name: description 39 | type: string 40 | - name: config__algorithm__align_split_size 41 | type: 'null' 42 | - name: reference__fasta__base 43 | type: File 44 | - name: rgnames__lb 45 | type: 'null' 46 | - name: rgnames__rg 47 | type: string 48 | - name: rgnames__lane 49 | type: string 50 | - name: reference__bwa__indexes 51 | type: File 52 | - name: files 53 | type: 54 | items: File 55 | type: array 56 | - name: config__algorithm__aligner 57 | type: string 58 | - name: rgnames__pl 59 | type: string 60 | - name: config__algorithm__mark_duplicates 61 | type: string 62 | - name: rgnames__pu 63 | type: string 64 | - name: rgnames__sample 65 | type: string 66 | name: alignment_rec 67 | type: record 68 | outputs: 69 | - id: process_alignment_rec 70 | type: 71 | items: 72 | fields: 73 | - name: files 74 | type: 75 | items: File 76 | type: array 77 | - name: config__algorithm__quality_format 78 | type: string 79 | - name: align_split 80 | type: 81 | - string 82 | - 'null' 83 | name: process_alignment_rec 84 | type: record 85 | type: array 86 | requirements: 87 | - class: InlineJavascriptRequirement 88 | - class: InitialWorkDirRequirement 89 | listing: 90 | - entry: $(JSON.stringify(inputs)) 91 | entryname: cwl.inputs.json 92 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-parallel 5 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 6 | - sentinel_inputs=prep_samples_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_samples 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-align 17 | dockerPull: quay.io/bcbio/bcbio-align 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 1024 21 | ramMin: 3072 22 | - class: SoftwareRequirement 23 | packages: 24 | - package: htslib 25 | specs: 26 | - https://anaconda.org/bioconda/htslib 27 | - package: bedtools 28 | specs: 29 | - https://anaconda.org/bioconda/bedtools 30 | - package: pythonpy 31 | specs: 32 | - https://anaconda.org/bioconda/pythonpy 33 | inputs: 34 | - id: prep_samples_rec 35 | type: 36 | fields: 37 | - name: description 38 | type: string 39 | - name: reference__fasta__base 40 | type: File 41 | - name: config__algorithm__variant_regions 42 | type: 'null' 43 | name: prep_samples_rec 44 | type: record 45 | outputs: 46 | - id: config__algorithm__variant_regions 47 | type: 48 | - File 49 | - 'null' 50 | - id: config__algorithm__variant_regions_merged 51 | type: 52 | - File 53 | - 'null' 54 | - id: config__algorithm__variant_regions_orig 55 | type: 56 | - File 57 | - 'null' 58 | - id: config__algorithm__coverage 59 | type: 60 | - File 61 | - 'null' 62 | - id: config__algorithm__coverage_merged 63 | type: 64 | - File 65 | - 'null' 66 | - id: config__algorithm__coverage_orig 67 | type: 68 | - File 69 | - 'null' 70 | - id: config__algorithm__seq2c_bed_ready 71 | type: 72 | - File 73 | - 'null' 74 | requirements: 75 | - class: InlineJavascriptRequirement 76 | - class: InitialWorkDirRequirement 77 | listing: 78 | - entry: $(JSON.stringify(inputs)) 79 | entryname: cwl.inputs.json 80 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow-arvados/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=prep_samples_rec:description;reference__fasta__base;config__algorithm__variant_regions 6 | - sentinel_inputs=config__algorithm__variant_regions:var,reference__fasta__base:var,description:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_samples_to_rec 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-base 17 | dockerPull: quay.io/bcbio/bcbio-base 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 1024 21 | ramMin: 3072 22 | inputs: 23 | - id: config__algorithm__variant_regions 24 | type: 25 | items: 'null' 26 | type: array 27 | - id: reference__fasta__base 28 | type: 29 | items: File 30 | type: array 31 | - id: description 32 | type: 33 | items: string 34 | type: array 35 | outputs: 36 | - id: prep_samples_rec 37 | type: 38 | items: 39 | fields: 40 | - name: description 41 | type: string 42 | - name: reference__fasta__base 43 | type: File 44 | - name: config__algorithm__variant_regions 45 | type: 'null' 46 | name: prep_samples_rec 47 | type: record 48 | type: array 49 | requirements: 50 | - class: InlineJavascriptRequirement 51 | - class: InitialWorkDirRequirement 52 | listing: 53 | - entry: $(JSON.stringify(inputs)) 54 | entryname: cwl.inputs.json 55 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 6 | - sentinel_inputs=regions__callable:var,regions__nblock:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,description:var,resources:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - combine_sample_regions 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 7802 21 | ramMin: 3072 22 | tmpdirMin: 6778 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: bedtools 26 | specs: 27 | - https://anaconda.org/bioconda/bedtools 28 | - package: htslib 29 | specs: 30 | - https://anaconda.org/bioconda/htslib 31 | - package: gatk4 32 | specs: 33 | - https://anaconda.org/bioconda/gatk4 34 | - package: gatk 35 | specs: 36 | - https://anaconda.org/bioconda/gatk 37 | inputs: 38 | - id: regions__callable 39 | type: 40 | items: File 41 | type: array 42 | - id: regions__nblock 43 | type: 44 | items: File 45 | type: array 46 | - id: config__algorithm__nomap_split_size 47 | type: 48 | items: long 49 | type: array 50 | - id: config__algorithm__nomap_split_targets 51 | type: 52 | items: long 53 | type: array 54 | - id: reference__fasta__base 55 | secondaryFiles: 56 | - .fai 57 | - ^.dict 58 | type: 59 | items: File 60 | type: array 61 | - id: description 62 | type: 63 | items: string 64 | type: array 65 | - id: resources 66 | type: 67 | items: string 68 | type: array 69 | outputs: 70 | - id: config__algorithm__callable_regions 71 | type: 72 | items: File 73 | type: array 74 | - id: config__algorithm__non_callable_regions 75 | type: 76 | items: File 77 | type: array 78 | - id: config__algorithm__callable_count 79 | type: 80 | items: int 81 | type: array 82 | requirements: 83 | - class: InlineJavascriptRequirement 84 | - class: InitialWorkDirRequirement 85 | listing: 86 | - entry: $(JSON.stringify(inputs)) 87 | entryname: cwl.inputs.json 88 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow/steps/prep_align_inputs.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=single-split 5 | - sentinel_outputs=process_alignment_rec:files;config__algorithm__quality_format;align_split 6 | - sentinel_inputs=alignment_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_align_inputs 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 21342 21 | ramMin: 3072 22 | tmpdirMin: 20318 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: grabix 26 | specs: 27 | - https://anaconda.org/bioconda/grabix 28 | - package: htslib 29 | specs: 30 | - https://anaconda.org/bioconda/htslib 31 | - package: biobambam 32 | specs: 33 | - https://anaconda.org/bioconda/biobambam 34 | inputs: 35 | - id: alignment_rec 36 | type: 37 | fields: 38 | - name: description 39 | type: string 40 | - name: resources 41 | type: string 42 | - name: config__algorithm__align_split_size 43 | type: 44 | - 'null' 45 | - string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: rgnames__lb 49 | type: 50 | - 'null' 51 | - string 52 | - name: rgnames__rg 53 | type: string 54 | - name: rgnames__lane 55 | type: string 56 | - name: reference__bwa__indexes 57 | type: File 58 | - name: files 59 | type: 60 | items: File 61 | type: array 62 | - name: config__algorithm__aligner 63 | type: string 64 | - name: rgnames__pl 65 | type: string 66 | - name: config__algorithm__mark_duplicates 67 | type: 68 | - string 69 | - 'null' 70 | - boolean 71 | - name: rgnames__pu 72 | type: string 73 | - name: rgnames__sample 74 | type: string 75 | name: alignment_rec 76 | type: record 77 | outputs: 78 | - id: process_alignment_rec 79 | type: 80 | items: 81 | fields: 82 | - name: files 83 | type: 84 | items: File 85 | type: array 86 | - name: config__algorithm__quality_format 87 | type: string 88 | - name: align_split 89 | type: 90 | - string 91 | - 'null' 92 | name: process_alignment_rec 93 | type: record 94 | type: array 95 | requirements: 96 | - class: InlineJavascriptRequirement 97 | - class: InitialWorkDirRequirement 98 | listing: 99 | - entry: $(JSON.stringify(inputs)) 100 | entryname: cwl.inputs.json 101 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-parallel 5 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 6 | - sentinel_inputs=prep_samples_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_samples 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 7802 21 | ramMin: 3072 22 | tmpdirMin: 6778 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: htslib 26 | specs: 27 | - https://anaconda.org/bioconda/htslib 28 | - package: bedtools 29 | specs: 30 | - https://anaconda.org/bioconda/bedtools 31 | - package: pythonpy 32 | specs: 33 | - https://anaconda.org/bioconda/pythonpy 34 | inputs: 35 | - id: prep_samples_rec 36 | type: 37 | fields: 38 | - name: description 39 | type: string 40 | - name: resources 41 | type: string 42 | - name: reference__fasta__base 43 | type: File 44 | - name: config__algorithm__variant_regions 45 | type: 46 | - 'null' 47 | - string 48 | name: prep_samples_rec 49 | type: record 50 | outputs: 51 | - id: config__algorithm__variant_regions 52 | type: 53 | - File 54 | - 'null' 55 | - id: config__algorithm__variant_regions_merged 56 | type: 57 | - File 58 | - 'null' 59 | - id: config__algorithm__variant_regions_orig 60 | type: 61 | - File 62 | - 'null' 63 | - id: config__algorithm__coverage 64 | type: 65 | - File 66 | - 'null' 67 | - id: config__algorithm__coverage_merged 68 | type: 69 | - File 70 | - 'null' 71 | - id: config__algorithm__coverage_orig 72 | type: 73 | - File 74 | - 'null' 75 | - id: config__algorithm__seq2c_bed_ready 76 | type: 77 | - File 78 | - 'null' 79 | requirements: 80 | - class: InlineJavascriptRequirement 81 | - class: InitialWorkDirRequirement 82 | listing: 83 | - entry: $(JSON.stringify(inputs)) 84 | entryname: cwl.inputs.json 85 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=prep_samples_rec:description;resources;reference__fasta__base;config__algorithm__variant_regions 6 | - sentinel_inputs=config__algorithm__variant_regions:var,reference__fasta__base:var,description:var,resources:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_samples_to_rec 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 7802 21 | ramMin: 3072 22 | tmpdirMin: 6778 23 | inputs: 24 | - id: config__algorithm__variant_regions 25 | type: 26 | items: 27 | - 'null' 28 | - string 29 | type: array 30 | - id: reference__fasta__base 31 | secondaryFiles: 32 | - .fai 33 | - ^.dict 34 | type: 35 | items: File 36 | type: array 37 | - id: description 38 | type: 39 | items: string 40 | type: array 41 | - id: resources 42 | type: 43 | items: string 44 | type: array 45 | outputs: 46 | - id: prep_samples_rec 47 | type: 48 | items: 49 | fields: 50 | - name: description 51 | type: string 52 | - name: resources 53 | type: string 54 | - name: reference__fasta__base 55 | type: File 56 | - name: config__algorithm__variant_regions 57 | type: 58 | - 'null' 59 | - string 60 | name: prep_samples_rec 61 | type: record 62 | type: array 63 | requirements: 64 | - class: InlineJavascriptRequirement 65 | - class: InitialWorkDirRequirement 66 | listing: 67 | - entry: $(JSON.stringify(inputs)) 68 | entryname: cwl.inputs.json 69 | -------------------------------------------------------------------------------- /NA12878-chr20/NA12878-platinum-chr20.csv: -------------------------------------------------------------------------------- 1 | samplename,description,validate,validate_regions 2 | NA12878-platinum-chr20.bam,NA12878-chr20,hg38/validation/giab-NA12878/truth_small_variants.vcf.gz,hg38/validation/giab-NA12878/truth_regions.bed 3 | -------------------------------------------------------------------------------- /NA12878-chr20/bcbio_system.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/giab/na12878 5 | resources: 6 | default: {cores: 8, memory: 3G, jvm_opts: [-Xms750m, -Xmx3000m]} 7 | -------------------------------------------------------------------------------- /NA12878-chr20/bcbio_system_arvados.yaml: -------------------------------------------------------------------------------- 1 | arvados: 2 | reference: 5657a3361158afad0969ac25a2529ac1+78613 3 | input: [aff0b18d056d9bac4db9b87d4d600714+1979] 4 | resources: 5 | default: {cores: 8, memory: 3G, jvm_opts: [-Xms750m, -Xmx3000m]} 6 | -------------------------------------------------------------------------------- /NA12878-chr20/bcbio_system_s3.yaml: -------------------------------------------------------------------------------- 1 | s3: 2 | region: us-east-1 3 | folders: 4 | - biodata/giab/na12878 5 | - biodata/collections 6 | resources: 7 | default: {cores: 8, memory: 3G, jvm_opts: [-Xms750m, -Xmx3000m]} 8 | -------------------------------------------------------------------------------- /NA12878-chr20/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | mkdir -p biodata 5 | cd biodata 6 | synapse get -r syn9725771 7 | cd .. 8 | # cleanup synapse files 9 | cd biodata 10 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 11 | -------------------------------------------------------------------------------- /NA12878-chr20/ga4gh_execution_challenge/bcbio_NA12878-chr20_checker.json: -------------------------------------------------------------------------------- 1 | { 2 | "baseline": { 3 | "class": "File", 4 | "path": "grading-summary-NA12878-chr20-baseline.csv" 5 | }, 6 | "comparison": { 7 | "class": "File", 8 | "path": "grading-summary-NA12878-chr20.csv" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /NA12878-chr20/ga4gh_execution_challenge/bcbio_NA12878-chr20_submit.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_file": { 3 | "class": "File", 4 | "path": ".synapseConfig" 5 | }, 6 | "team_name": "", 7 | "eval_id": "", 8 | "file": [ 9 | { 10 | "class": "File", 11 | "path": "grading-summary-NA12878-chr20.csv" 12 | }, 13 | { 14 | "class": "File", 15 | "path": "multiqc_report.html" 16 | }, 17 | { 18 | "class": "File", 19 | "path": "NA12878-chr20-sort.bam" 20 | } 21 | ], 22 | "parent_id": "" 23 | } 24 | -------------------------------------------------------------------------------- /NA12878-chr20/ga4gh_execution_challenge/grading-summary-NA12878-chr20-baseline.csv: -------------------------------------------------------------------------------- 1 | sample,caller,vtype,metric,value 2 | NA12878-chr20,platypus,SNPs,tp,65096 3 | NA12878-chr20,platypus,Indels,tp,6758 4 | NA12878-chr20,platypus,SNPs,fp,184 5 | NA12878-chr20,platypus,Indels,fp,79 6 | NA12878-chr20,platypus,SNPs,fn,1882 7 | NA12878-chr20,platypus,Indels,fn,4182 8 | NA12878-chr20,gatk-haplotype,SNPs,tp,66834 9 | NA12878-chr20,gatk-haplotype,Indels,tp,10227 10 | NA12878-chr20,gatk-haplotype,SNPs,fp,95 11 | NA12878-chr20,gatk-haplotype,Indels,fp,254 12 | NA12878-chr20,gatk-haplotype,SNPs,fn,144 13 | NA12878-chr20,gatk-haplotype,Indels,fn,713 14 | NA12878-chr20,freebayes,SNPs,tp,66831 15 | NA12878-chr20,freebayes,Indels,tp,9206 16 | NA12878-chr20,freebayes,SNPs,fp,335 17 | NA12878-chr20,freebayes,Indels,fp,499 18 | NA12878-chr20,freebayes,SNPs,fn,147 19 | NA12878-chr20,freebayes,Indels,fn,1734 20 | NA12878-chr20,samtools,SNPs,tp,66726 21 | NA12878-chr20,samtools,Indels,tp,8035 22 | NA12878-chr20,samtools,SNPs,fp,381 23 | NA12878-chr20,samtools,Indels,fp,1537 24 | NA12878-chr20,samtools,SNPs,fn,252 25 | NA12878-chr20,samtools,Indels,fn,2905 26 | -------------------------------------------------------------------------------- /NA12878-chr20/ga4gh_execution_challenge/link_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG=`pwd` 5 | cd ../ 6 | bcbio_python $ORIG/link_to_synapse.py bcbio_NA12878-chr20 NA12878-platinum-chr20-workflow syn9725771 7 | cd $ORIG 8 | -------------------------------------------------------------------------------- /NA12878-chr20/ga4gh_execution_challenge/upload_biodata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG=`pwd` 5 | cd ../synapse 6 | bcbio_python /mnt/work/bcbio/code/bcbio-nextgen/scripts/utils/upload_to_synapse.py bcbio_NA12878-chr20 biodata syn9725771 7 | cd $ORIG 8 | -------------------------------------------------------------------------------- /NA12878-chr20/germline_template.yaml: -------------------------------------------------------------------------------- 1 | # Small variant calling for germline samples 2 | --- 3 | details: 4 | - algorithm: 5 | aligner: bwa 6 | variantcaller: [gatk-haplotype, freebayes, platypus, samtools] 7 | tools_on: [gatk4] 8 | tools_off: [gemini] 9 | effects: false 10 | analysis: variant2 11 | genome_build: hg38 12 | -------------------------------------------------------------------------------- /NA12878-chr20/run_bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=NA12878-platinum-chr20 5 | ORIG_DIR=`pwd` 6 | WORK_DIR=$ORIG_DIR/bunny_work 7 | mkdir -p $WORK_DIR 8 | cd $WORK_DIR 9 | 10 | # local bcbio install 11 | rabix --no-container -b $WORK_DIR $ORIG_DIR/$PNAME-workflow/main-$PNAME.cwl $ORIG_DIR/$PNAME-workflow/main-$PNAME-samples.json 12 | 13 | # with Docker 14 | # rabix -b $WORK_DIR $ORIG_DIR/$PNAME-workflow/main-$PNAME.cwl $ORIG_DIR/$PNAME-workflow/main-$PNAME-samples.json 15 | 16 | # Re-start from a previous run 17 | #CACHE_DIR=$WORK_DIR/112ebd74-6c4a-458f-800b-c2c1a8f89e1a 18 | #rabix --no-container -b $WORK_DIR --cache-dir $CACHE_DIR $ORIG_DIR/$PNAME-workflow/main-$PNAME.cwl $ORIG_DIR/$PNAME-workflow/main-$PNAME-samples.json 19 | -------------------------------------------------------------------------------- /NA12878-chr20/run_cwltool.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | #synapse get -r syn9725771 4 | PNAME=NA12878-platinum-chr20 5 | 6 | WORKDIR=`pwd`/cwltool_work 7 | CWL_TMPDIR=$WORKDIR/tmpcwl 8 | mkdir -p $CWL_TMPDIR 9 | 10 | export TMPDIR=$CWL_TMPDIR 11 | cwltool --tmpdir-prefix $CWL_TMPDIR --tmp-outdir-prefix $CWL_TMPDIR $PNAME-workflow/main-$PNAME.cwl $PNAME-workflow/main-$PNAME-samples.json 12 | -------------------------------------------------------------------------------- /NA12878-chr20/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | CWD=`pwd` 5 | rm -rf NA12878-platinum-chr20-workflow 6 | bcbio_vm.py template --systemconfig bcbio_system.yaml germline_template.yaml NA12878-platinum-chr20.csv 7 | bcbio_vm.py cwl --systemconfig bcbio_system.yaml NA12878-platinum-chr20/config/NA12878-platinum-chr20.yaml 8 | sed -i "s#$CWD/biodata/#../biodata/#" NA12878-platinum-chr20-workflow/main-NA12878-platinum-chr20-samples.json 9 | -------------------------------------------------------------------------------- /NA12878-chr20/run_generate_cwl_arvados.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | SYSTEM_YAML=bcbio_system_arvados.yaml 5 | bcbio_vm.py template --systemconfig $SYSTEM_YAML germline_template.yaml NA12878-platinum-chr20.csv 6 | bcbio_vm.py cwl --systemconfig $SYSTEM_YAML NA12878-platinum-chr20/config/NA12878-platinum-chr20.yaml 7 | rm -rf NA12878-platinum-chr20-workflow-arvados 8 | mv NA12878-platinum-chr20-workflow NA12878-platinum-chr20-workflow-arvados 9 | -------------------------------------------------------------------------------- /NA12878-chr20/run_generate_cwl_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | rm -rf NA12878-platinum-chr20-workflow-local 5 | rm -rf NA12878-platinum-chr20-workflow-s3 6 | rm -rf NA12878-platinum-chr20-workflow 7 | bcbio_vm.py template --systemconfig bcbio_system_s3.yaml germline_template.yaml NA12878-platinum-chr20.csv 8 | bcbio_vm.py cwl --systemconfig bcbio_system_s3.yaml NA12878-platinum-chr20/config/NA12878-platinum-chr20.yaml 9 | mkdir -p NA12878-platinum-chr20-workflow-s3 10 | mv NA12878-platinum-chr20-workflow/*-samples.json NA12878-platinum-chr20-workflow-s3 11 | sed 's#s3://biodata/#../biodata/#' NA12878-platinum-chr20-workflow-s3/main-NA12878-platinum-chr20-samples.json > NA12878-platinum-chr20-workflow/main-NA12878-platinum-chr20-samples.json 12 | -------------------------------------------------------------------------------- /NA12878-chr20/run_generate_wdl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | CODE=~/bio/bcbio-nextgen 5 | WFNAME=NA12878-platinum-chr20 6 | cwltool2wdl.py $WFNAME-workflow/main-$WFNAME.cwl $WFNAME/main-$WFNAME-samples.json 7 | -------------------------------------------------------------------------------- /NA12878-chr20/run_toil.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | #synapse get -r syn9725771 4 | VALIDATION_PNAME=NA12878-platinum-chr20 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun toil --no-container $VALIDATION_PNAME-workflow 8 | 9 | # with Docker 10 | #bcbio_vm.py cwlrun toil $VALIDATION_PNAME-workflow 11 | -------------------------------------------------------------------------------- /NA12878-chr20/run_toil_aws.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run Toil on AWS with autoscaling 3 | set -eu -o pipefail 4 | 5 | PNAME=NA12878-platinum-chr20 6 | LEADER_PRIVATE_IP=10.0.0.XX 7 | JOB_STORE=aws:us-east-1:bcbio-na12878-chr20-jobstore 8 | NODE_TYPE=m4.2xlarge 9 | SPOT_BID=0.4 10 | MAX_NODES=2 11 | 12 | cwltoil \ 13 | --batchSystem=mesos --mesosMaster=$LEADER_PRIVATE_IP:5050 \ 14 | --provisioner aws \ 15 | --defaultPreemptable --preemptableNodeType $NODE_TYPE:$SPOT_BID --maxPreemptableNodes $MAX_NODES \ 16 | --nodeType $NODE_TYPE --maxNodes $MAX_NODES \ 17 | --jobStore $JOB_STORE --logFile $PNAME-awstoil.log \ 18 | $PNAME-workflow-s3/main-$PNAME.cwl \ 19 | $PNAME-workflow-s3/main-$PNAME-samples.json 20 | -------------------------------------------------------------------------------- /NA24385-sv/NA24385-sv-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | - run_number=0 11 | baseCommand: 12 | - bcbio_nextgen.py 13 | - runfn 14 | - combine_sample_regions 15 | - cwl 16 | class: CommandLineTool 17 | cwlVersion: v1.0 18 | hints: 19 | - class: DockerRequirement 20 | dockerImageId: quay.io/bcbio/bcbio-vc 21 | dockerPull: quay.io/bcbio/bcbio-vc 22 | - class: ResourceRequirement 23 | coresMin: 1 24 | outdirMin: 7668 25 | ramMin: 3584 26 | tmpdirMin: 3322 27 | - class: dx:InputResourceRequirement 28 | indirMin: 3008 29 | - class: SoftwareRequirement 30 | packages: 31 | - package: bedtools 32 | specs: 33 | - https://anaconda.org/bioconda/bedtools 34 | - package: htslib 35 | specs: 36 | - https://anaconda.org/bioconda/htslib 37 | - package: gatk4 38 | specs: 39 | - https://anaconda.org/bioconda/gatk4 40 | - package: gatk 41 | specs: 42 | - https://anaconda.org/bioconda/gatk 43 | - class: arv:APIRequirement 44 | inputs: 45 | - id: regions__callable 46 | type: 47 | items: 48 | - File 49 | - 'null' 50 | type: array 51 | - id: regions__nblock 52 | type: 53 | items: 54 | - File 55 | - 'null' 56 | type: array 57 | - id: metadata__batch 58 | type: 59 | items: 60 | - 'null' 61 | - string 62 | type: array 63 | - id: config__algorithm__nomap_split_size 64 | type: 65 | items: long 66 | type: array 67 | - id: config__algorithm__nomap_split_targets 68 | type: 69 | items: long 70 | type: array 71 | - id: reference__fasta__base 72 | secondaryFiles: 73 | - .fai 74 | - ^.dict 75 | type: 76 | items: File 77 | type: array 78 | - id: resources 79 | type: 80 | items: string 81 | type: array 82 | - id: description 83 | type: 84 | items: string 85 | type: array 86 | outputs: 87 | - id: config__algorithm__callable_regions 88 | type: 89 | items: File 90 | type: array 91 | - id: config__algorithm__non_callable_regions 92 | type: 93 | items: File 94 | type: array 95 | - id: config__algorithm__callable_count 96 | type: 97 | items: int 98 | type: array 99 | requirements: 100 | - class: InlineJavascriptRequirement 101 | - class: InitialWorkDirRequirement 102 | listing: 103 | - entry: $(JSON.stringify(inputs)) 104 | entryname: cwl.inputs.json 105 | -------------------------------------------------------------------------------- /NA24385-sv/NA24385-sv-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - multiqc_summary 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 27601 24 | ramMin: 3584 25 | tmpdirMin: 13289 26 | - class: dx:InputResourceRequirement 27 | indirMin: 1 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: multiqc 31 | specs: 32 | - https://anaconda.org/bioconda/multiqc 33 | - package: multiqc-bcbio 34 | specs: 35 | - https://anaconda.org/bioconda/multiqc-bcbio 36 | inputs: 37 | - id: qcout_rec 38 | type: 39 | items: 40 | fields: 41 | - name: summary__qc 42 | type: 43 | - File 44 | - 'null' 45 | - name: summary__metrics 46 | type: 47 | - string 48 | - 'null' 49 | - name: description 50 | type: string 51 | - name: genome_build 52 | type: string 53 | - name: config__algorithm__tools_off 54 | type: 55 | - 'null' 56 | - string 57 | - items: 58 | - 'null' 59 | - string 60 | type: array 61 | - name: config__algorithm__qc 62 | type: 63 | items: string 64 | type: array 65 | - name: config__algorithm__tools_on 66 | type: 67 | - 'null' 68 | - string 69 | - items: 70 | - 'null' 71 | - string 72 | type: array 73 | name: qcout_rec 74 | type: record 75 | type: array 76 | outputs: 77 | - id: summary__multiqc 78 | type: 79 | items: 80 | - File 81 | - 'null' 82 | type: array 83 | requirements: 84 | - class: InlineJavascriptRequirement 85 | - class: InitialWorkDirRequirement 86 | listing: 87 | - entry: $(JSON.stringify(inputs)) 88 | entryname: cwl.inputs.json 89 | -------------------------------------------------------------------------------- /NA24385-sv/NA24385-sv-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=rgnames__sample,config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 7668 24 | ramMin: 3584 25 | tmpdirMin: 3322 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3021 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: htslib 31 | specs: 32 | - https://anaconda.org/bioconda/htslib 33 | - package: bedtools 34 | specs: 35 | - https://anaconda.org/bioconda/bedtools 36 | - package: pythonpy 37 | specs: 38 | - https://anaconda.org/bioconda/pythonpy 39 | inputs: 40 | - id: prep_samples_rec 41 | type: 42 | fields: 43 | - name: resources 44 | type: string 45 | - name: description 46 | type: string 47 | - name: reference__fasta__base 48 | type: File 49 | - name: config__algorithm__coverage 50 | type: File 51 | - name: rgnames__sample 52 | type: string 53 | - name: config__algorithm__variant_regions 54 | type: File 55 | name: prep_samples_rec 56 | type: record 57 | outputs: 58 | - id: rgnames__sample 59 | type: string 60 | - id: config__algorithm__variant_regions 61 | type: 62 | - File 63 | - 'null' 64 | - id: config__algorithm__variant_regions_merged 65 | type: 66 | - File 67 | - 'null' 68 | - id: config__algorithm__variant_regions_orig 69 | type: 70 | - File 71 | - 'null' 72 | - id: config__algorithm__coverage 73 | type: 74 | - File 75 | - 'null' 76 | - id: config__algorithm__coverage_merged 77 | type: 78 | - File 79 | - 'null' 80 | - id: config__algorithm__coverage_orig 81 | type: 82 | - File 83 | - 'null' 84 | - id: config__algorithm__seq2c_bed_ready 85 | type: 86 | - File 87 | - 'null' 88 | requirements: 89 | - class: InlineJavascriptRequirement 90 | - class: InitialWorkDirRequirement 91 | listing: 92 | - entry: $(JSON.stringify(inputs)) 93 | entryname: cwl.inputs.json 94 | -------------------------------------------------------------------------------- /NA24385-sv/NA24385-sv-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;config__algorithm__coverage;rgnames__sample;config__algorithm__variant_regions 8 | - sentinel_inputs=config__algorithm__coverage:var,rgnames__sample:var,config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples_to_rec 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 7668 24 | ramMin: 3584 25 | tmpdirMin: 3322 26 | - class: dx:InputResourceRequirement 27 | indirMin: 0 28 | inputs: 29 | - id: config__algorithm__coverage 30 | type: 31 | items: File 32 | type: array 33 | - id: rgnames__sample 34 | type: 35 | items: string 36 | type: array 37 | - id: config__algorithm__variant_regions 38 | type: 39 | items: File 40 | type: array 41 | - id: reference__fasta__base 42 | secondaryFiles: 43 | - .fai 44 | - ^.dict 45 | type: 46 | items: File 47 | type: array 48 | - id: resources 49 | type: 50 | items: string 51 | type: array 52 | - id: description 53 | type: 54 | items: string 55 | type: array 56 | outputs: 57 | - id: prep_samples_rec 58 | type: 59 | items: 60 | fields: 61 | - name: resources 62 | type: string 63 | - name: description 64 | type: string 65 | - name: reference__fasta__base 66 | type: File 67 | - name: config__algorithm__coverage 68 | type: File 69 | - name: rgnames__sample 70 | type: string 71 | - name: config__algorithm__variant_regions 72 | type: File 73 | name: prep_samples_rec 74 | type: record 75 | type: array 76 | requirements: 77 | - class: InlineJavascriptRequirement 78 | - class: InitialWorkDirRequirement 79 | listing: 80 | - entry: $(JSON.stringify(inputs)) 81 | entryname: cwl.inputs.json 82 | -------------------------------------------------------------------------------- /NA24385-sv/NA24385-sv.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - algorithm: 3 | align_split_size: false 4 | aligner: minimap2 5 | coverage: Exome-AZ_V2_pluschr20-GRCh37.bed 6 | variantcaller: false 7 | svcaller: [manta, lumpy, cnvkit] 8 | svvalidate: giab-NA24385/truth_sv.vcf.gz 9 | variant_regions: Exome-AZ_V2_pluschr20-GRCh37.bed 10 | analysis: variant2 11 | description: NA24385 12 | files: 13 | - NA24385_R1.fq.gz 14 | - NA24385_R2.fq.gz 15 | genome_build: GRCh37 16 | metadata: {} 17 | -------------------------------------------------------------------------------- /NA24385-sv/bcbio_system.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/regions 5 | - biodata/giab/na24385 6 | resources: 7 | default: {cores: 8, memory: 3500M, jvm_opts: [-Xms750m, -Xmx3500m]} 8 | -------------------------------------------------------------------------------- /NA24385-sv/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # regions 6 | mkdir -p biodata/regions 7 | cd biodata/regions 8 | synapse get -r syn10468188 9 | cd $ORIG_DIR 10 | # GRCh37 11 | mkdir -p biodata/collections/GRCh37 12 | cd biodata/collections/GRCh37 13 | synapse get -r syn12027897 14 | cd $ORIG_DIR 15 | # giab samples 16 | mkdir -p biodata/giab/na24385 17 | cd biodata/giab/na24385 18 | synapse get -r syn11831515 19 | cd $ORIG_DIR 20 | # cleanup synapse files 21 | cd biodata 22 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 23 | cd $ORIG_DIR 24 | -------------------------------------------------------------------------------- /NA24385-sv/run_bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=NA24385-sv 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun bunny --no-container $PNAME-workflow 8 | 9 | # with Docker 10 | #bcbio_vm.py cwlrun bunny $PNAME-workflow 11 | -------------------------------------------------------------------------------- /NA24385-sv/run_cromwell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=NA24385-sv 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun cromwell -s htcondor --no-container $PNAME-workflow 8 | -------------------------------------------------------------------------------- /NA24385-sv/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=NA24385-sv 5 | 6 | CWD=`pwd` 7 | rm -rf $PNAME-workflow 8 | bcbio_vm.py cwl --systemconfig bcbio_system.yaml $PNAME.yaml 9 | sed -i "s#$CWD/biodata/#../biodata/#" $PNAME-workflow/main-$PNAME-samples.json 10 | -------------------------------------------------------------------------------- /NA24385-sv/run_toil.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | PNAME=somatic-giab-mix 4 | 5 | # local bcbio install 6 | bcbio_vm.py cwlrun toil --no-container $PNAME-workflow 7 | 8 | # with Docker 9 | #bcbio_vm.py cwlrun toil $PNAME-workflow 10 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall-cgc/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | baseCommand: 5 | - bcbio_nextgen.py 6 | - runfn 7 | - combine_sample_regions 8 | - cwl 9 | class: CommandLineTool 10 | cwlVersion: v1.0 11 | hints: 12 | - class: DockerRequirement 13 | dockerImageId: quay.io/bcbio/bcbio-align 14 | dockerPull: quay.io/bcbio/bcbio-align 15 | - class: ResourceRequirement 16 | coresMin: 1 17 | outdirMin: 1024 18 | ramMin: 3072 19 | - class: SoftwareRequirement 20 | packages: 21 | - package: bedtools 22 | specs: 23 | - https://anaconda.org/bioconda/bedtools 24 | - package: htslib 25 | specs: 26 | - https://anaconda.org/bioconda/htslib 27 | inputs: 28 | - default: multi-combined 29 | id: sentinel_parallel 30 | inputBinding: 31 | itemSeparator: ;; 32 | position: 0 33 | prefix: sentinel_parallel= 34 | separate: false 35 | type: string 36 | - default: config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 37 | id: sentinel_outputs 38 | inputBinding: 39 | itemSeparator: ;; 40 | position: 1 41 | prefix: sentinel_outputs= 42 | separate: false 43 | type: string 44 | - default: regions__callable:var,regions__nblock:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,description:var 45 | id: sentinel_inputs 46 | inputBinding: 47 | itemSeparator: ;; 48 | position: 2 49 | prefix: sentinel_inputs= 50 | separate: false 51 | type: string 52 | - id: regions__callable 53 | type: 54 | items: File 55 | type: array 56 | - id: regions__nblock 57 | type: 58 | items: File 59 | type: array 60 | - id: config__algorithm__nomap_split_size 61 | type: 62 | items: long 63 | type: array 64 | - id: config__algorithm__nomap_split_targets 65 | type: 66 | items: long 67 | type: array 68 | - id: reference__fasta__base 69 | type: 70 | items: File 71 | type: array 72 | - id: description 73 | type: 74 | items: string 75 | type: array 76 | outputs: 77 | - id: config__algorithm__callable_regions 78 | type: 79 | items: File 80 | type: array 81 | - id: config__algorithm__non_callable_regions 82 | type: 83 | items: File 84 | type: array 85 | - id: config__algorithm__callable_count 86 | type: 87 | items: int 88 | type: array 89 | requirements: 90 | - class: InlineJavascriptRequirement 91 | - class: InitialWorkDirRequirement 92 | listing: 93 | - entry: $(JSON.stringify(inputs)) 94 | entryname: cwl.inputs.json 95 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall-cgc/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | baseCommand: 5 | - bcbio_nextgen.py 6 | - runfn 7 | - prep_samples 8 | - cwl 9 | class: CommandLineTool 10 | cwlVersion: v1.0 11 | hints: 12 | - class: DockerRequirement 13 | dockerImageId: quay.io/bcbio/bcbio-align 14 | dockerPull: quay.io/bcbio/bcbio-align 15 | - class: ResourceRequirement 16 | coresMin: 1 17 | outdirMin: 1024 18 | ramMin: 3072 19 | - class: SoftwareRequirement 20 | packages: 21 | - package: htslib 22 | specs: 23 | - https://anaconda.org/bioconda/htslib 24 | - package: bedtools 25 | specs: 26 | - https://anaconda.org/bioconda/bedtools 27 | - package: pythonpy 28 | specs: 29 | - https://anaconda.org/bioconda/pythonpy 30 | inputs: 31 | - default: multi-parallel 32 | id: sentinel_parallel 33 | inputBinding: 34 | itemSeparator: ;; 35 | position: 0 36 | prefix: sentinel_parallel= 37 | separate: false 38 | type: string 39 | - default: config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 40 | id: sentinel_outputs 41 | inputBinding: 42 | itemSeparator: ;; 43 | position: 1 44 | prefix: sentinel_outputs= 45 | separate: false 46 | type: string 47 | - default: prep_samples_rec:record 48 | id: sentinel_inputs 49 | inputBinding: 50 | itemSeparator: ;; 51 | position: 2 52 | prefix: sentinel_inputs= 53 | separate: false 54 | type: string 55 | - id: prep_samples_rec 56 | type: 57 | fields: 58 | - name: description 59 | type: string 60 | - name: reference__fasta__base 61 | type: File 62 | - name: config__algorithm__variant_regions 63 | type: 'null' 64 | name: prep_samples_rec 65 | type: record 66 | outputs: 67 | - id: config__algorithm__variant_regions 68 | type: 69 | - File 70 | - 'null' 71 | - id: config__algorithm__variant_regions_merged 72 | type: 73 | - File 74 | - 'null' 75 | - id: config__algorithm__variant_regions_orig 76 | type: 77 | - File 78 | - 'null' 79 | - id: config__algorithm__coverage 80 | type: 81 | - File 82 | - 'null' 83 | - id: config__algorithm__coverage_merged 84 | type: 85 | - File 86 | - 'null' 87 | - id: config__algorithm__coverage_orig 88 | type: 89 | - File 90 | - 'null' 91 | - id: config__algorithm__seq2c_bed_ready 92 | type: 93 | - File 94 | - 'null' 95 | requirements: 96 | - class: InlineJavascriptRequirement 97 | - class: InitialWorkDirRequirement 98 | listing: 99 | - entry: $(JSON.stringify(inputs)) 100 | entryname: cwl.inputs.json 101 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall-cgc/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | baseCommand: 5 | - bcbio_nextgen.py 6 | - runfn 7 | - prep_samples_to_rec 8 | - cwl 9 | class: CommandLineTool 10 | cwlVersion: v1.0 11 | hints: 12 | - class: DockerRequirement 13 | dockerImageId: quay.io/bcbio/bcbio-base 14 | dockerPull: quay.io/bcbio/bcbio-base 15 | - class: ResourceRequirement 16 | coresMin: 1 17 | outdirMin: 1024 18 | ramMin: 3072 19 | inputs: 20 | - default: multi-combined 21 | id: sentinel_parallel 22 | inputBinding: 23 | itemSeparator: ;; 24 | position: 0 25 | prefix: sentinel_parallel= 26 | separate: false 27 | type: string 28 | - default: prep_samples_rec:description;reference__fasta__base;config__algorithm__variant_regions 29 | id: sentinel_outputs 30 | inputBinding: 31 | itemSeparator: ;; 32 | position: 1 33 | prefix: sentinel_outputs= 34 | separate: false 35 | type: string 36 | - default: config__algorithm__variant_regions:var,reference__fasta__base:var,description:var 37 | id: sentinel_inputs 38 | inputBinding: 39 | itemSeparator: ;; 40 | position: 2 41 | prefix: sentinel_inputs= 42 | separate: false 43 | type: string 44 | - id: config__algorithm__variant_regions 45 | type: 46 | items: 'null' 47 | type: array 48 | - id: reference__fasta__base 49 | type: 50 | items: File 51 | type: array 52 | - id: description 53 | type: 54 | items: string 55 | type: array 56 | outputs: 57 | - id: prep_samples_rec 58 | type: 59 | items: 60 | fields: 61 | - name: description 62 | type: string 63 | - name: reference__fasta__base 64 | type: File 65 | - name: config__algorithm__variant_regions 66 | type: 'null' 67 | name: prep_samples_rec 68 | type: record 69 | type: array 70 | requirements: 71 | - class: InlineJavascriptRequirement 72 | - class: InitialWorkDirRequirement 73 | listing: 74 | - entry: $(JSON.stringify(inputs)) 75 | entryname: cwl.inputs.json 76 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall.csv: -------------------------------------------------------------------------------- 1 | samplename,description,sex 2 | LP6005443-DNA_H08.srt.aln.bam,S_Dinka-1,male 3 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall/config/SGDP-recall-template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | details: 3 | - analysis: variant2 4 | genome_build: hg38 5 | algorithm: 6 | aligner: bwa 7 | variantcaller: [gatk-haplotype, freebayes, platypus] 8 | ensemble: 9 | numpass: 2 10 | svcaller: [lumpy, manta, cnvkit] 11 | tools_off: [gemini] 12 | effects: false 13 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall/config/SGDP-recall.csv: -------------------------------------------------------------------------------- 1 | samplename,description,sex 2 | LP6005443-DNA_H08.srt.aln.bam,S_Dinka-1,male 3 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/SGDP-recall/config/SGDP-recall.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - algorithm: 3 | aligner: bwa 4 | effects: false 5 | ensemble: 6 | numpass: 2 7 | svcaller: 8 | - lumpy 9 | - manta 10 | - cnvkit 11 | tools_off: 12 | - gemini 13 | variantcaller: 14 | - gatk-haplotype 15 | - freebayes 16 | - platypus 17 | analysis: variant2 18 | description: S_Dinka-1 19 | files: 20 | - sbg:58ee2a76e4b0edbfd71e9fc4/LP6005443-DNA_H08.srt.aln.bam 21 | genome_build: hg38 22 | metadata: 23 | sex: male 24 | fc_date: '2017-05-13' 25 | fc_name: SGDP-recall 26 | upload: 27 | dir: ../final 28 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/bcbio_system_cgc.yaml: -------------------------------------------------------------------------------- 1 | sbgenomics: 2 | project: bchapman/sgdp-recalling 3 | reference: bchapman/biodata-hg38 4 | resources: 5 | default: {cores: 8, memory: 3G, jvm_opts: [-Xms750m, -Xmx3000m]} 6 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/germline_template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | details: 3 | - analysis: variant2 4 | genome_build: hg38 5 | algorithm: 6 | aligner: bwa 7 | variantcaller: [gatk-haplotype, freebayes, platypus] 8 | ensemble: 9 | numpass: 2 10 | svcaller: [lumpy, manta, cnvkit] 11 | tools_off: [gemini] 12 | effects: false 13 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/run_generate_cwl_cgc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | SYSTEM_YAML=bcbio_system_cgc.yaml 5 | PROJECT=SGDP-recall 6 | bcbio_vm.py template --systemconfig $SYSTEM_YAML germline_template.yaml $PROJECT.csv 7 | bcbio_vm.py cwl --systemconfig $SYSTEM_YAML $PROJECT/config/$PROJECT.yaml 8 | rm -rf $PROJECT-cgc 9 | mv $PROJECT-workflow $PROJECT-cgc 10 | rabix --resolve-app SGDP-recall-cgc/main-SGDP-recall.cwl > SGDP-recall-standalone.cwl 11 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/run_upload_cgc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | bcbio_python upload_to_cgc.py bchapman/sgdp-recalling SGDP-recall-standalone.cwl SGDP-recall-cgc/main-SGDP-recall-samples.json 5 | -------------------------------------------------------------------------------- /SGDP-recall-CGC/upload_to_cgc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Upload combined CWL + JSON reference to the Cancer Genomics Cloud. 3 | 4 | upload_to_cgc.py 5 | """ 6 | import datetime 7 | import os 8 | import json 9 | import sys 10 | 11 | import sevenbridges as sbg 12 | from sevenbridges.http.advance_access import advance_access 13 | 14 | def main(project_name, cwl_file, sample_file): 15 | api = sbg.Api(os.environ["CGC_API_URL"], os.environ["CGC_AUTH_TOKEN"]) 16 | project = [p for p in api.projects.query(limit=None).all() if p.id == project_name][0] 17 | version = 0 18 | app_id = "%s/%s/%s" % (project_name, os.path.splitext(os.path.basename(cwl_file))[0].replace("-standalone", ""), 19 | version) 20 | with open(cwl_file) as in_handle: 21 | cwl = json.load(in_handle) 22 | # _debug() 23 | with advance_access(api): 24 | app = api.apps.install_app(id=app_id, raw=cwl) 25 | 26 | with open(sample_file) as in_handle: 27 | inputs = json.load(in_handle) 28 | current_time = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S") 29 | task_name = "%s_%s" % (os.path.basename(app_id), current_time) 30 | task = api.tasks.create(name=task_name, project=project.id, app=app.id, inputs=inputs, run=False) 31 | 32 | def _debug(): 33 | import logging 34 | from httplib import HTTPConnection 35 | HTTPConnection.debuglevel = 2 36 | logging.basicConfig() 37 | logging.getLogger().setLevel(logging.DEBUG) 38 | requests_log = logging.getLogger("requests.packages.urllib3") 39 | requests_log.setLevel(logging.DEBUG) 40 | requests_log.propagate = True 41 | 42 | def _raw_post(app_id, cwl): 43 | import requests 44 | post_request = requests.post("https://cgc-api.sbgenomics.com/v2/%s/raw" % app_id, 45 | headers={'X-SBG-Auth-Token': os.environ["CGC_AUTH_TOKEN"], 46 | 'Accept': 'application/json', 47 | 'Content-type': 'application/json', 48 | 'X-SBG-advance-access': 'advance'}, 49 | data=json.dumps(cwl)) 50 | print(post_request) 51 | 52 | 53 | if __name__ == "__main__": 54 | main(*sys.argv[1:]) 55 | -------------------------------------------------------------------------------- /giab-chm/bcbio_system-local.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/chm 5 | - biodata/regions 6 | - biodata/giab/na12878 7 | - biodata/giab/na24385 8 | - biodata/giab/na24631 9 | resources: 10 | default: {cores: 16, memory: 3750M, jvm_opts: [-Xms750m, -Xmx3750m]} 11 | -------------------------------------------------------------------------------- /giab-chm/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # regions 6 | mkdir -p biodata/regions 7 | cd biodata/regions 8 | synapse get -r syn10468188 9 | cd $ORIG_DIR 10 | # chm 11 | mkdir -p biodata/chm 12 | cd biodata/chm 13 | synapse get -r syn11644594 14 | cd $ORIG_DIR 15 | # hg38 16 | mkdir -p biodata/collections/hg38 17 | cd biodata/collections/hg38 18 | synapse get -r syn10468301 19 | cd $ORIG_DIR 20 | # giab samples 21 | mkdir -p biodata/giab/na12878 22 | cd biodata/giab/na12878 23 | synapse get -r syn11831606 24 | cd $ORIG_DIR 25 | # cleanup synapse files 26 | cd biodata 27 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 28 | cd $ORIG_DIR 29 | -------------------------------------------------------------------------------- /giab-chm/germline-template.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - algorithm: 3 | aligner: minimap2 4 | align_split_size: false 5 | nomap_split_targets: 20 6 | variantcaller: [gatk-haplotype, strelka2, freebayes, deepvariant] 7 | tools_off: [gemini] 8 | analysis: variant2 9 | genome_build: hg38 10 | -------------------------------------------------------------------------------- /giab-chm/giab-chm-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,description:var,resources:var 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - combine_sample_regions 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 10621 24 | ramMin: 3840 25 | tmpdirMin: 4799 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3113 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: bedtools 31 | specs: 32 | - https://anaconda.org/bioconda/bedtools 33 | - package: htslib 34 | specs: 35 | - https://anaconda.org/bioconda/htslib 36 | - package: gatk4 37 | specs: 38 | - https://anaconda.org/bioconda/gatk4 39 | - package: gatk 40 | specs: 41 | - https://anaconda.org/bioconda/gatk 42 | - class: arv:APIRequirement 43 | inputs: 44 | - id: regions__callable 45 | type: 46 | items: 47 | - File 48 | - 'null' 49 | type: array 50 | - id: regions__nblock 51 | type: 52 | items: 53 | - File 54 | - 'null' 55 | type: array 56 | - id: metadata__batch 57 | type: 58 | items: 59 | - 'null' 60 | - string 61 | type: array 62 | - id: config__algorithm__nomap_split_size 63 | type: 64 | items: long 65 | type: array 66 | - id: config__algorithm__nomap_split_targets 67 | type: 68 | items: long 69 | type: array 70 | - id: reference__fasta__base 71 | secondaryFiles: 72 | - .fai 73 | - ^.dict 74 | type: 75 | items: File 76 | type: array 77 | - id: description 78 | type: 79 | items: string 80 | type: array 81 | - id: resources 82 | type: 83 | items: string 84 | type: array 85 | outputs: 86 | - id: config__algorithm__callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__non_callable_regions 91 | type: 92 | items: File 93 | type: array 94 | - id: config__algorithm__callable_count 95 | type: 96 | items: int 97 | type: array 98 | requirements: 99 | - class: InlineJavascriptRequirement 100 | - class: InitialWorkDirRequirement 101 | listing: 102 | - entry: $(JSON.stringify(inputs)) 103 | entryname: cwl.inputs.json 104 | -------------------------------------------------------------------------------- /giab-chm/giab-chm-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - multiqc_summary 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 39412 23 | ramMin: 3840 24 | tmpdirMin: 19194 25 | - class: dx:InputResourceRequirement 26 | indirMin: 1 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: multiqc 30 | specs: 31 | - https://anaconda.org/bioconda/multiqc 32 | - package: multiqc-bcbio 33 | specs: 34 | - https://anaconda.org/bioconda/multiqc-bcbio 35 | inputs: 36 | - id: qcout_rec 37 | type: 38 | items: 39 | fields: 40 | - name: summary__qc 41 | type: 42 | - File 43 | - 'null' 44 | - name: summary__metrics 45 | type: 46 | - string 47 | - 'null' 48 | - name: description 49 | type: string 50 | - name: genome_build 51 | type: string 52 | - name: config__algorithm__tools_off 53 | type: 54 | items: string 55 | type: array 56 | - name: config__algorithm__qc 57 | type: 58 | items: string 59 | type: array 60 | - name: config__algorithm__tools_on 61 | type: 62 | - 'null' 63 | - string 64 | - items: 65 | - 'null' 66 | - string 67 | type: array 68 | name: qcout_rec 69 | type: record 70 | type: array 71 | outputs: 72 | - id: summary__multiqc 73 | type: 74 | items: 75 | - File 76 | - 'null' 77 | type: array 78 | requirements: 79 | - class: InlineJavascriptRequirement 80 | - class: InitialWorkDirRequirement 81 | listing: 82 | - entry: $(JSON.stringify(inputs)) 83 | entryname: cwl.inputs.json 84 | -------------------------------------------------------------------------------- /giab-chm/giab-chm-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 10621 23 | ramMin: 3840 24 | tmpdirMin: 4799 25 | - class: dx:InputResourceRequirement 26 | indirMin: 3127 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: htslib 30 | specs: 31 | - https://anaconda.org/bioconda/htslib 32 | - package: bedtools 33 | specs: 34 | - https://anaconda.org/bioconda/bedtools 35 | - package: pythonpy 36 | specs: 37 | - https://anaconda.org/bioconda/pythonpy 38 | inputs: 39 | - id: prep_samples_rec 40 | type: 41 | fields: 42 | - name: description 43 | type: string 44 | - name: resources 45 | type: string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: config__algorithm__coverage 49 | type: File 50 | - name: config__algorithm__variant_regions 51 | type: File 52 | name: prep_samples_rec 53 | type: record 54 | outputs: 55 | - id: config__algorithm__variant_regions 56 | type: 57 | - File 58 | - 'null' 59 | - id: config__algorithm__variant_regions_merged 60 | type: 61 | - File 62 | - 'null' 63 | - id: config__algorithm__variant_regions_orig 64 | type: 65 | - File 66 | - 'null' 67 | - id: config__algorithm__coverage 68 | type: 69 | - File 70 | - 'null' 71 | - id: config__algorithm__coverage_merged 72 | type: 73 | - File 74 | - 'null' 75 | - id: config__algorithm__coverage_orig 76 | type: 77 | - File 78 | - 'null' 79 | - id: config__algorithm__seq2c_bed_ready 80 | type: 81 | - File 82 | - 'null' 83 | requirements: 84 | - class: InlineJavascriptRequirement 85 | - class: InitialWorkDirRequirement 86 | listing: 87 | - entry: $(JSON.stringify(inputs)) 88 | entryname: cwl.inputs.json 89 | -------------------------------------------------------------------------------- /giab-chm/giab-chm-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:description;resources;reference__fasta__base;config__algorithm__coverage;config__algorithm__variant_regions 8 | - sentinel_inputs=config__algorithm__coverage:var,config__algorithm__variant_regions:var,reference__fasta__base:var,description:var,resources:var 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples_to_rec 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 10621 23 | ramMin: 3840 24 | tmpdirMin: 4799 25 | - class: dx:InputResourceRequirement 26 | indirMin: 0 27 | inputs: 28 | - id: config__algorithm__coverage 29 | type: 30 | items: File 31 | type: array 32 | - id: config__algorithm__variant_regions 33 | type: 34 | items: File 35 | type: array 36 | - id: reference__fasta__base 37 | secondaryFiles: 38 | - .fai 39 | - ^.dict 40 | type: 41 | items: File 42 | type: array 43 | - id: description 44 | type: 45 | items: string 46 | type: array 47 | - id: resources 48 | type: 49 | items: string 50 | type: array 51 | outputs: 52 | - id: prep_samples_rec 53 | type: 54 | items: 55 | fields: 56 | - name: description 57 | type: string 58 | - name: resources 59 | type: string 60 | - name: reference__fasta__base 61 | type: File 62 | - name: config__algorithm__coverage 63 | type: File 64 | - name: config__algorithm__variant_regions 65 | type: File 66 | name: prep_samples_rec 67 | type: record 68 | type: array 69 | requirements: 70 | - class: InlineJavascriptRequirement 71 | - class: InitialWorkDirRequirement 72 | listing: 73 | - entry: $(JSON.stringify(inputs)) 74 | entryname: cwl.inputs.json 75 | -------------------------------------------------------------------------------- /giab-chm/giab-chm.csv: -------------------------------------------------------------------------------- 1 | samplename,description,variant_regions,coverage,validate,validate_regions 2 | NA12878_R1.fq.gz;NA12878_R2.fq.gz,NA12878,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,hg38/validation/giab-NA12878/truth_small_variants.vcf.gz,hg38/validation/giab-NA12878/truth_regions.bed 3 | CHM1_CHM13_R1.fq.gz;CHM1_CHM13_R2.fq.gz,CHM1_CHM13,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,chm-hg38.vcf.gz,chm-hg38-regions.bed.gz 4 | -------------------------------------------------------------------------------- /giab-chm/prep_chm_truth.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | """Prepare standard truth set + confident regions for CHM benchmark. 3 | 4 | Extracts input from CHM-eval.kit download, prepping the BED file as: 5 | 6 | - Start with hybrid.genome.bed.gz as baseline confident regions 7 | - Remove polyA10 regions 8 | - Remove regions within +/- 10bp of problematic PacBio indels (1bp and >50bp). 9 | This is imperfect but is roughly the approach taken in the CHM paper, 10 | which is to ignore TP/FP/FNs in these regions. 11 | """ 12 | import os 13 | import shutil 14 | import subprocess 15 | import sys 16 | 17 | import cyvcf2 18 | 19 | def main(genome, ref_fai): 20 | pad = 10 21 | if genome == 38: 22 | polya_file = os.path.join("CHM-eval.kit", "ployA10-38DH.bed.gz") 23 | vcf_file = os.path.join("CHM-eval.kit", "hybrid.m38DH.vcf.gz") 24 | confident_file = os.path.join("CHM-eval.kit", "hybrid.m38DH.bed.gz") 25 | 26 | work_dir = os.path.join(os.getcwd(), "tmpwork") 27 | if not os.path.exists(work_dir): 28 | os.makedirs(work_dir) 29 | 30 | badindel_file = os.path.join(work_dir, "chm-hg%s-badindel.bed" % genome) 31 | if not os.path.exists(badindel_file): 32 | with open(badindel_file, "w") as out_handle: 33 | for rec in cyvcf2.VCF(vcf_file): 34 | has_problem = False 35 | for a in rec.ALT: 36 | sizediff = abs(len(rec.REF) - len(a)) 37 | if a == 1 or a > 50: 38 | has_problem = True 39 | break 40 | if has_problem: 41 | out_handle.write("%s\t%s\t%s\n" % (rec.CHROM, rec.start - pad, rec.end + pad)) 42 | combinedbad_file = os.path.join(work_dir, "chm-hg%s-combinedbad.bed.gz" % genome) 43 | if not os.path.exists(combinedbad_file): 44 | cmd = ("zcat {polya_file} | cat - {combinedbad_file} | bedtools merge | " 45 | "sort -k1,1 -k2,2n | bgzip -c > {combinedbad_file}") 46 | subprocess.check_call(cmd.format(**locals()), shell=True) 47 | 48 | truth_vcf = "chm-hg%s.vcf.gz" % genome 49 | truth_bed = "chm-hg%s-regions.bed.gz" % genome 50 | cmd = ("bedtools subtract -a {confident_file} -b {combinedbad_file} | " 51 | "gsort - {ref_fai} | bgzip -c > {truth_bed}") 52 | subprocess.check_call(cmd.format(**locals()), shell=True) 53 | shutil.copy(vcf_file, truth_vcf) 54 | subprocess.check_call(["tabix", "-p", "bed", truth_bed]) 55 | subprocess.check_call(["tabix", "-p", "vcf", truth_vcf]) 56 | 57 | if __name__ == "__main__": 58 | main(38, sys.argv[1]) 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /giab-chm/prep_chm_truth.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Simplified truth set preparation from rtg ready v0.4+ versions of 3 | # the CHM truth set 4 | set -eu -o pipefail 5 | 6 | bcftools view -f 'PASS,.' -T CHM-eval.kit/full.38.bed.gz -O z -o biodata/chm/chm-hg38.vcf.gz CHM-eval.kit/full.38.vcf.gz 7 | tabix -f -p vcf biodata/chm/chm-hg38.vcf.gz 8 | 9 | cp CHM-eval.kit/full.38.bed.gz biodata/chm/chm-hg38-regions.bed.gz 10 | cp CHM-eval.kit/full.38.bed.gz.tbi biodata/chm/chm-hg38-regions.bed.gz.tbi 11 | -------------------------------------------------------------------------------- /giab-chm/run_bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-chm 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun bunny --no-container $PNAME-workflow 8 | 9 | # with Docker 10 | #bcbio_vm.py cwlrun bunny $PNAME-workflow 11 | -------------------------------------------------------------------------------- /giab-chm/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-chm 5 | TEMPLATE=germline 6 | PLATFORM=local 7 | 8 | CWD=`pwd` 9 | rm -rf $PNAME-workflow 10 | bcbio_vm.py template --systemconfig bcbio_system-$PLATFORM.yaml $TEMPLATE-template.yaml $PNAME.csv 11 | bcbio_vm.py cwl --systemconfig bcbio_system-$PLATFORM.yaml $PNAME/config/$PNAME.yaml 12 | sed -i "s#$CWD/biodata/#../biodata/#" $PNAME-workflow/main-$PNAME-samples.json 13 | -------------------------------------------------------------------------------- /giab-exome/bcbio_system-local.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/regions 5 | - biodata/giab/exome 6 | resources: 7 | default: {cores: 16, memory: 3750M, jvm_opts: [-Xms750m, -Xmx3750m]} 8 | -------------------------------------------------------------------------------- /giab-exome/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # regions 6 | mkdir -p biodata/regions 7 | cd biodata/regions 8 | synapse get -r syn10468188 9 | cd $ORIG_DIR 10 | # hg38 11 | mkdir -p biodata/collections/hg38 12 | cd biodata/collections/hg38 13 | synapse get -r syn10468301 14 | cd $ORIG_DIR 15 | # giab samples 16 | mkdir -p biodata/giab/exome 17 | cd biodata/giab/exome 18 | synapse get -r syn15666200 19 | cd $ORIG_DIR 20 | # cleanup synapse files 21 | cd biodata 22 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 23 | cd $ORIG_DIR 24 | -------------------------------------------------------------------------------- /giab-exome/germline-template.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - analysis: variant2 3 | genome_build: hg38 4 | algorithm: 5 | aligner: bwa 6 | variantcaller: gatk-haplotype 7 | tools_on: [gvcf] 8 | metadata: 9 | validate_batch: giab 10 | -------------------------------------------------------------------------------- /giab-exome/giab-exome-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | - run_number=0 11 | baseCommand: 12 | - bcbio_nextgen.py 13 | - runfn 14 | - combine_sample_regions 15 | - cwl 16 | class: CommandLineTool 17 | cwlVersion: v1.0 18 | hints: 19 | - class: DockerRequirement 20 | dockerImageId: quay.io/bcbio/bcbio-vc 21 | dockerPull: quay.io/bcbio/bcbio-vc 22 | - class: ResourceRequirement 23 | coresMin: 1 24 | outdirMin: 5868 25 | ramMin: 3840 26 | tmpdirMin: 2422 27 | - class: dx:InputResourceRequirement 28 | indirMin: 3113 29 | - class: SoftwareRequirement 30 | packages: 31 | - package: bedtools 32 | specs: 33 | - https://anaconda.org/bioconda/bedtools 34 | - package: htslib 35 | specs: 36 | - https://anaconda.org/bioconda/htslib 37 | - package: gatk4 38 | specs: 39 | - https://anaconda.org/bioconda/gatk4 40 | version: 41 | - 4.0.3.0 42 | - class: arv:APIRequirement 43 | inputs: 44 | - id: regions__callable 45 | type: 46 | items: 47 | - File 48 | - 'null' 49 | type: array 50 | - id: regions__nblock 51 | type: 52 | items: 53 | - File 54 | - 'null' 55 | type: array 56 | - id: metadata__batch 57 | type: 58 | items: 59 | - 'null' 60 | - string 61 | type: array 62 | - id: config__algorithm__nomap_split_size 63 | type: 64 | items: long 65 | type: array 66 | - id: config__algorithm__nomap_split_targets 67 | type: 68 | items: long 69 | type: array 70 | - id: reference__fasta__base 71 | secondaryFiles: 72 | - .fai 73 | - ^.dict 74 | type: 75 | items: File 76 | type: array 77 | - id: resources 78 | type: 79 | items: string 80 | type: array 81 | - id: description 82 | type: 83 | items: string 84 | type: array 85 | outputs: 86 | - id: config__algorithm__callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__non_callable_regions 91 | type: 92 | items: File 93 | type: array 94 | - id: config__algorithm__callable_count 95 | type: 96 | items: int 97 | type: array 98 | requirements: 99 | - class: InlineJavascriptRequirement 100 | - class: InitialWorkDirRequirement 101 | listing: 102 | - entry: $(JSON.stringify(inputs)) 103 | entryname: cwl.inputs.json 104 | -------------------------------------------------------------------------------- /giab-exome/giab-exome-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - multiqc_summary 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 20402 24 | ramMin: 3840 25 | tmpdirMin: 9689 26 | - class: dx:InputResourceRequirement 27 | indirMin: 1 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: multiqc 31 | specs: 32 | - https://anaconda.org/bioconda/multiqc 33 | - package: multiqc-bcbio 34 | specs: 35 | - https://anaconda.org/bioconda/multiqc-bcbio 36 | inputs: 37 | - id: qcout_rec 38 | type: 39 | items: 40 | fields: 41 | - name: summary__qc 42 | type: 43 | - File 44 | - 'null' 45 | - name: summary__metrics 46 | type: 47 | - string 48 | - 'null' 49 | - name: description 50 | type: string 51 | - name: genome_build 52 | type: string 53 | - name: config__algorithm__tools_off 54 | type: 55 | - 'null' 56 | - string 57 | - items: 58 | - 'null' 59 | - string 60 | type: array 61 | - name: config__algorithm__qc 62 | type: 63 | items: string 64 | type: array 65 | - name: config__algorithm__tools_on 66 | type: 67 | items: string 68 | type: array 69 | name: qcout_rec 70 | type: record 71 | type: array 72 | outputs: 73 | - id: summary__multiqc 74 | type: 75 | items: 76 | - File 77 | - 'null' 78 | type: array 79 | requirements: 80 | - class: InlineJavascriptRequirement 81 | - class: InitialWorkDirRequirement 82 | listing: 83 | - entry: $(JSON.stringify(inputs)) 84 | entryname: cwl.inputs.json 85 | -------------------------------------------------------------------------------- /giab-exome/giab-exome-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=rgnames__sample,config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 5868 24 | ramMin: 3840 25 | tmpdirMin: 2422 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3198 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: htslib 31 | specs: 32 | - https://anaconda.org/bioconda/htslib 33 | - package: bedtools 34 | specs: 35 | - https://anaconda.org/bioconda/bedtools 36 | - package: pythonpy 37 | specs: 38 | - https://anaconda.org/bioconda/pythonpy 39 | inputs: 40 | - id: prep_samples_rec 41 | type: 42 | fields: 43 | - name: resources 44 | type: string 45 | - name: description 46 | type: string 47 | - name: reference__fasta__base 48 | type: File 49 | - name: rgnames__sample 50 | type: string 51 | - name: config__algorithm__variant_regions 52 | type: File 53 | name: prep_samples_rec 54 | type: record 55 | outputs: 56 | - id: rgnames__sample 57 | type: string 58 | - id: config__algorithm__variant_regions 59 | type: 60 | - File 61 | - 'null' 62 | - id: config__algorithm__variant_regions_merged 63 | type: 64 | - File 65 | - 'null' 66 | - id: config__algorithm__variant_regions_orig 67 | type: 68 | - File 69 | - 'null' 70 | - id: config__algorithm__coverage 71 | type: 72 | - File 73 | - 'null' 74 | - id: config__algorithm__coverage_merged 75 | type: 76 | - File 77 | - 'null' 78 | - id: config__algorithm__coverage_orig 79 | type: 80 | - File 81 | - 'null' 82 | - id: config__algorithm__seq2c_bed_ready 83 | type: 84 | - File 85 | - 'null' 86 | requirements: 87 | - class: InlineJavascriptRequirement 88 | - class: InitialWorkDirRequirement 89 | listing: 90 | - entry: $(JSON.stringify(inputs)) 91 | entryname: cwl.inputs.json 92 | -------------------------------------------------------------------------------- /giab-exome/giab-exome-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;rgnames__sample;config__algorithm__variant_regions 8 | - sentinel_inputs=rgnames__sample:var,config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples_to_rec 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 5868 24 | ramMin: 3840 25 | tmpdirMin: 2422 26 | - class: dx:InputResourceRequirement 27 | indirMin: 0 28 | inputs: 29 | - id: rgnames__sample 30 | type: 31 | items: string 32 | type: array 33 | - id: config__algorithm__variant_regions 34 | type: 35 | items: File 36 | type: array 37 | - id: reference__fasta__base 38 | secondaryFiles: 39 | - .fai 40 | - ^.dict 41 | type: 42 | items: File 43 | type: array 44 | - id: resources 45 | type: 46 | items: string 47 | type: array 48 | - id: description 49 | type: 50 | items: string 51 | type: array 52 | outputs: 53 | - id: prep_samples_rec 54 | type: 55 | items: 56 | fields: 57 | - name: resources 58 | type: string 59 | - name: description 60 | type: string 61 | - name: reference__fasta__base 62 | type: File 63 | - name: rgnames__sample 64 | type: string 65 | - name: config__algorithm__variant_regions 66 | type: File 67 | name: prep_samples_rec 68 | type: record 69 | type: array 70 | requirements: 71 | - class: InlineJavascriptRequirement 72 | - class: InitialWorkDirRequirement 73 | listing: 74 | - entry: $(JSON.stringify(inputs)) 75 | entryname: cwl.inputs.json 76 | -------------------------------------------------------------------------------- /giab-exome/giab-exome.csv: -------------------------------------------------------------------------------- 1 | filename,description,validate,validate_regions,variant_regions 2 | NA12878-xGen-exome_R1.fq.gz;NA12878-xGen-exome_R2.fq.gz,NA12878,hg38/validation/giab-NA12878/truth_small_variants.vcf.gz,hg38/validation/giab-NA12878/truth_regions.bed,Exome-IDT-xGen-hg38.bed 3 | NA24385-Oslo-exome_R1.fq.gz;NA24385-Oslo-exome_R2.fq.gz,NA24385,hg38/validation/giab-NA24385/truth_small_variants.vcf.gz,hg38/validation/giab-NA24385/truth_regions.bed,Exome-Agilent-SureSelect-v05-hg38.bed 4 | NA24631-Oslo-exome_R1.fq.gz;NA24631-Oslo-exome_R2.fq.gz,NA24631,hg38/validation/giab-NA24631/truth_small_variants.vcf.gz,hg38/validation/giab-NA24631/truth_regions.bed,Exome-Agilent-SureSelect-v05-hg38.bed 5 | -------------------------------------------------------------------------------- /giab-exome/input/get_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | # BED files 5 | 6 | wget 'http://sfvideo.blob.core.windows.net/sitefinity/docs/default-source/supplementary-product-info/xgen-exome-research-panel-targetsae255a1532796e2eaa53ff00001c1b3c.bed?sfvrsn=435c3407_7&download=true' 7 | wget ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/OsloUniversityHospital_Exome_GATK_jointVC_11242015/wex_Agilent_SureSelect_v05_b37.baits.slop50.merged.list 8 | 9 | wget -c http://hgdownload.cse.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg38.over.chain.gz 10 | 11 | grep -v ^@ wex_Agilent_SureSelect_v05_b37.baits.slop50.merged.list | sed "s/^\([0-9]\+\)\t/chr\1\t/g" | 12 | sed "s/^MT/chrM/g" | sed "s/^X/chrX/g" | sed "s/^Y/chrY/g" | CrossMap.py bed hg19ToHg38.over.chain.gz /dev/stdin Exome-Agilent-raw.bed 13 | gsort Exome-Agilent-raw.bed /mnt/work/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa.fai | grep -v '_alt' > Exome-Agilent-SureSelect-v05-hg38.bed 14 | CrossMap.py bed hg19ToHg38.over.chain.gz xgen-exome-research-panel-targets*bed* Exome-IDT-raw.bed 15 | gsort Exome-IDT-raw.bed /mnt/work/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa.fai | grep -v '_alt' > Exome-IDT-xGen-hg38.bed 16 | 17 | # Fasta files 18 | 19 | cp bs/Projects/NovaSeq\ S2\:\ TruSeq\ DNA\ Enrichment\ \(IDT\ xGen\ Exome\ Research\ Panel\)/Samples/NA12878-Rep*2x101/Files/*-Rep01-*.fastq.gz* . 20 | cp bs/Projects/NovaSeq\ S2\:\ TruSeq\ DNA\ Enrichment\ \(IDT\ xGen\ Exome\ Research\ Panel\)/Samples/NA12878-Rep*2x101/Files/*-Rep02-*.fastq.gz* . 21 | 22 | aws s3 sync s3://giab/data/AshkenazimTrio/HG002_NA24385_son/OsloUniversityHospital_Exome/151002_7001448_0359_AC7F6GANXX_Sample_HG002-EEogPU_v02-KIT-Av5_AGATGTAC_L008.posiSrt.markDup.bam . 23 | aws s3 cp s3://giab/data/AshkenazimTrio/HG002_NA24385_son/OsloUniversityHospital_Exome/151002_7001448_0359_AC7F6GANXX_Sample_HG002-EEogPU_v02-KIT-Av5_AGATGTAC_L008.posiSrt.markDup.bai . 24 | 25 | aws s3 cp s3://giab/data/ChineseTrio/HG005_NA24631_son/OsloUniversityHospital_Exome/151002_7001448_0359_AC7F6GANXX_Sample_HG005-EEogPU_v02-KIT-Av5_CGCATACA_L008.posiSrt.markDup.bam . 26 | aws s3 cp s3://giab/data/ChineseTrio/HG005_NA24631_son/OsloUniversityHospital_Exome/151002_7001448_0359_AC7F6GANXX_Sample_HG005-EEogPU_v02-KIT-Av5_CGCATACA_L008.posiSrt.markDup.bai . 27 | 28 | zcat NA12878-Rep*_R1_*.fastq.gz | bgzip -c > NA12878-xGen-exome_R1.fq.gz 29 | zcat NA12878-Rep*_R2_*.fastq.gz | bgzip -c > NA12878-xGen-exome_R2.fq.gz 30 | bamtofastq filename=151002_7001448_0359_AC7F6GANXX_Sample_HG002-EEogPU_v02-KIT-Av5_AGATGTAC_L008.posiSrt.markDup.bam F=>(bgzip -c > NA24385-Oslo-exome_R1.fq.gz) F2=>(bgzip -c > NA24385-Oslo-exome_R2.fq.gz) 31 | bamtofastq filename=151002_7001448_0359_AC7F6GANXX_Sample_HG005-EEogPU_v02-KIT-Av5_CGCATACA_L008.posiSrt.markDup.bam F=>(bgzip -c > NA24631-Oslo-exome_R1.fq.gz) F2=>(bgzip -c > NA24631-Oslo-exome_R2.fq.gz) 32 | -------------------------------------------------------------------------------- /giab-exome/run_cromwell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-exome 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun cromwell -s htcondor --no-container $PNAME-workflow 8 | -------------------------------------------------------------------------------- /giab-exome/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-exome 5 | TEMPLATE=germline 6 | LOCATION=local 7 | rm -rf $PNAME $PNAME-workflow 8 | bcbio_vm.py template --systemconfig bcbio_system-$LOCATION.yaml $TEMPLATE-template.yaml $PNAME.csv 9 | bcbio_vm.py cwl --systemconfig bcbio_system-$LOCATION.yaml $PNAME/config/$PNAME.yaml 10 | -------------------------------------------------------------------------------- /giab-joint/arvados/bcbio_system-arvados.yaml: -------------------------------------------------------------------------------- 1 | arvados: 2 | reference: 38a3166acddf30ff581c249ece68e7f5+47411 3 | input: [38a3166acddf30ff581c249ece68e7f5+47411] 4 | resources: 5 | default: {cores: 16, memory: 3500M, jvm_opts: [-Xms750m, -Xmx3500m]} 6 | -------------------------------------------------------------------------------- /giab-joint/arvados/run_arvados.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | unset ARVADOS_API_HOST_INSECURE 5 | 6 | PNAME=giab-joint 7 | PROJECT_ID=qr1hi-j7d0g-h691y6104tlg8b4 8 | 9 | bcbio_vm.py cwlrun arvados ${PNAME}-workflow -- --project-uuid $PROJECT_ID --ignore-docker-for-reuse 10 | -------------------------------------------------------------------------------- /giab-joint/arvados/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-joint 5 | 6 | CWD=`pwd` 7 | rm -rf $PNAME-workflow 8 | bcbio_vm.py template --systemconfig bcbio_system-arvados.yaml ../joint-template.yaml ../$PNAME.csv 9 | bcbio_vm.py cwl --systemconfig bcbio_system-arvados.yaml $PNAME/config/$PNAME.yaml 10 | -------------------------------------------------------------------------------- /giab-joint/bcbio_system.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/regions 5 | - biodata/giab/na12878 6 | - biodata/giab/na24385 7 | - biodata/giab/na24631 8 | resources: 9 | default: {cores: 16, memory: 3500M, jvm_opts: [-Xms750m, -Xmx3500m]} 10 | -------------------------------------------------------------------------------- /giab-joint/dnanexus/bcbio_system-dnanexus.yaml: -------------------------------------------------------------------------------- 1 | dnanexus: 2 | project: CGR_Dragen_Benchmark 3 | ref: 4 | project: DREAM-challenge 5 | folder: /DREAM-data/bcbio-giab-joint/biodata/collections 6 | inputs: 7 | - /Inputs/biodata/giab 8 | - /Inputs/biodata/regions 9 | resources: 10 | default: {cores: 8, memory: 3500M, jvm_opts: [-Xms1g, -Xmx3500m]} 11 | -------------------------------------------------------------------------------- /giab-joint/dnanexus/run_compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-joint 5 | DX_PROJECT_ID=project-F8Q7fJj0XFJJ3XbBPQYXP4B9 6 | 7 | # Clean up previous runs, useful for testing 8 | dx mkdir -p $DX_PROJECT_ID:/dx-cwl-run 9 | dx mkdir -p $DX_PROJECT_ID:/.cwl_workflow_archive 10 | dx mkdir -p $DX_PROJECT_ID:/$PNAME-workflow 11 | dx rm -r $DX_PROJECT_ID:/dx-cwl-run 12 | dx rm -r $DX_PROJECT_ID:/.cwl_workflow_archive 13 | dx rm -r $DX_PROJECT_ID:/$PNAME-workflow 14 | rm -rf dx-cwl-run 15 | 16 | # Compile and upload run file 17 | dx-cwl compile-workflow $PNAME-workflow/main-$PNAME.cwl --project $DX_PROJECT_ID --token $DX_AUTH_TOKEN 18 | dx upload -p --path $DX_PROJECT_ID:/$PNAME-workflow $PNAME-workflow/main-$PNAME-samples.json 19 | 20 | -------------------------------------------------------------------------------- /giab-joint/dnanexus/run_dnanexus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PNAME=giab-joint 4 | DX_PROJECT_ID=project-F8Q7fJj0XFJJ3XbBPQYXP4B9 5 | 6 | dx-cwl run-workflow /dx-cwl-run/main-$PNAME/main-$PNAME /$PNAME-workflow/main-$PNAME-samples.json \ 7 | --project $PROJECT_ID --token $DX_AUTH_TOKEN 8 | -------------------------------------------------------------------------------- /giab-joint/dnanexus/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-joint 5 | 6 | CWD=`pwd` 7 | rm -rf $PNAME 8 | rm -rf $PNAME-workflow 9 | bcbio_vm.py template --systemconfig bcbio_system-dnanexus.yaml ../joint-template.yaml ../$PNAME.csv 10 | bcbio_vm.py cwl --systemconfig bcbio_system-dnanexus.yaml $PNAME/config/$PNAME.yaml 11 | -------------------------------------------------------------------------------- /giab-joint/dnanexus_single/README.txt: -------------------------------------------------------------------------------- 1 | ### DNAnexus single GATK4 variant calling 2 | 3 | Genome in a Bottle NA12878 single sample GATK4 HaplotypeCaller germline validation: 4 | 5 | - 65x NA12878 NovaSeq input data subset to exome regions + chr20 6 | - hg38 reference genome 7 | - csv inputs for both bwa-mem alignment and taking pre-aligned DRAGEN BAM files 8 | -------------------------------------------------------------------------------- /giab-joint/dnanexus_single/bcbio_system-dnanexus.yaml: -------------------------------------------------------------------------------- 1 | dnanexus: 2 | project: CGR_Dragen_Benchmark 3 | ref: 4 | project: bcbio_resources 5 | folder: /reference_genomes 6 | inputs: 7 | - /Inputs/biodata/giab 8 | - /Inputs/biodata/regions 9 | - /outputs/giab/dragen-hg38 10 | resources: 11 | default: {cores: 16, memory: 3500M, jvm_opts: [-Xms1g, -Xmx3500m]} 12 | -------------------------------------------------------------------------------- /giab-joint/dnanexus_single/germline-template.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - algorithm: 3 | aligner: bwa 4 | align_split_size: false 5 | nomap_split_targets: 40 6 | variantcaller: gatk-haplotype 7 | tools_on: [gatk4] 8 | tools_off: [gemini] 9 | analysis: variant2 10 | genome_build: hg38 11 | -------------------------------------------------------------------------------- /giab-joint/dnanexus_single/giab-single-bwa.csv: -------------------------------------------------------------------------------- 1 | samplename,description,batch,variant_regions,coverage,validate,validate_regions 2 | NA12878_R1.fq.gz;NA12878_R2.fq.gz,NA12878,gj1,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,hg38/validation/giab-NA12878/truth_small_variants.vcf.gz,hg38/validation/giab-NA12878/truth_regions.bed 3 | -------------------------------------------------------------------------------- /giab-joint/dnanexus_single/giab-single-dragen.csv: -------------------------------------------------------------------------------- 1 | samplename,description,batch,variant_regions,coverage,validate,validate_regions,aligner,bam_clean 2 | NA12878.bam,NA12878-DRAGEN,gj2,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,hg38/validation/giab-NA12878/truth_small_variants.vcf.gz,hg38/validation/giab-NA12878/truth_regions.bed,false,remove_extracontigs 3 | -------------------------------------------------------------------------------- /giab-joint/dnanexus_single/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | TEMPLATE=germline 5 | 6 | PNAME=giab-single-bwa 7 | rm -rf $PNAME 8 | rm -rf $PNAME-workflow 9 | bcbio_vm.py template --systemconfig bcbio_system-dnanexus.yaml $TEMPLATE-template.yaml $PNAME.csv 10 | bcbio_vm.py cwl --systemconfig bcbio_system-dnanexus.yaml $PNAME/config/$PNAME.yaml 11 | 12 | PNAME=giab-single-dragen 13 | rm -rf $PNAME 14 | rm -rf $PNAME-workflow 15 | bcbio_vm.py template --systemconfig bcbio_system-dnanexus.yaml $TEMPLATE-template.yaml $PNAME.csv 16 | bcbio_vm.py cwl --systemconfig bcbio_system-dnanexus.yaml $PNAME/config/$PNAME.yaml 17 | -------------------------------------------------------------------------------- /giab-joint/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # regions 6 | mkdir -p biodata/regions 7 | cd biodata/regions 8 | synapse get -r syn10468188 9 | cd $ORIG_DIR 10 | # hg38 11 | mkdir -p biodata/collections/hg38 12 | cd biodata/collections/hg38 13 | synapse get -r syn10468301 14 | cd $ORIG_DIR 15 | # giab samples 16 | mkdir -p biodata/giab/na12878 17 | cd biodata/giab/na12878 18 | synapse get -r syn11831606 19 | cd $ORIG_DIR 20 | mkdir -p biodata/giab/na24385 21 | cd biodata/giab/na24385 22 | synapse get -r syn11831515 23 | cd $ORIG_DIR 24 | mkdir -p biodata/giab/na24631 25 | cd biodata/giab/na24631 26 | synapse get -r syn11831625 27 | cd $ORIG_DIR 28 | # cleanup synapse files 29 | cd biodata 30 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 31 | cd $ORIG_DIR 32 | -------------------------------------------------------------------------------- /giab-joint/ga4gh_execution_challenge/bcbio-giab-joint_checker.json: -------------------------------------------------------------------------------- 1 | { 2 | "baseline": { 3 | "class": "File", 4 | "path": "grading-summary-gj1-baseline.csv" 5 | }, 6 | "comparison": { 7 | "class": "File", 8 | "path": "grading-summary-gj1.csv" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /giab-joint/ga4gh_execution_challenge/bcbio-giab-joint_submit.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_file": { 3 | "class": "File", 4 | "path": ".synapseConfig" 5 | }, 6 | "team_name": "", 7 | "eval_id": "", 8 | "file": [ 9 | { 10 | "class": "File", 11 | "path": "grading-summary-gj1.csv" 12 | }, 13 | { 14 | "class": "File", 15 | "path": "multiqc_report.html" 16 | }, 17 | { 18 | "class": "File", 19 | "path": "NA12878-gatk-haplotype.vcf.gz" 20 | }, 21 | { 22 | "class": "File", 23 | "path": "NA24385-gatk-haplotype.vcf.gz" 24 | }, 25 | { 26 | "class": "File", 27 | "path": "NA24631-gatk-haplotype.vcf.gz" 28 | }, 29 | { 30 | "class": "File", 31 | "path": "gj1-gatk-haplotype.vcf.gz" 32 | } 33 | ], 34 | "parent_id": "" 35 | } 36 | -------------------------------------------------------------------------------- /giab-joint/ga4gh_execution_challenge/grading-summary-gj1-baseline.csv: -------------------------------------------------------------------------------- 1 | sample,caller,vtype,metric,value 2 | NA12878,gatk-haplotype,SNPs,tp,197856 3 | NA12878,gatk-haplotype,Indels,tp,26882 4 | NA12878,gatk-haplotype,SNPs,fp,6407 5 | NA12878,gatk-haplotype,Indels,fp,6670 6 | NA12878,gatk-haplotype,SNPs,fn,1052 7 | NA12878,gatk-haplotype,Indels,fn,1727 8 | NA24385,gatk-haplotype,SNPs,tp,200231 9 | NA24385,gatk-haplotype,Indels,tp,24737 10 | NA24385,gatk-haplotype,SNPs,fp,31535 11 | NA24385,gatk-haplotype,Indels,fp,50931 12 | NA24385,gatk-haplotype,SNPs,fn,722 13 | NA24385,gatk-haplotype,Indels,fn,2777 14 | NA24631,gatk-haplotype,SNPs,tp,196017 15 | NA24631,gatk-haplotype,Indels,tp,24239 16 | NA24631,gatk-haplotype,SNPs,fp,1992 17 | NA24631,gatk-haplotype,Indels,fp,384 18 | NA24631,gatk-haplotype,SNPs,fn,121 19 | NA24631,gatk-haplotype,Indels,fn,89 20 | -------------------------------------------------------------------------------- /giab-joint/ga4gh_execution_challenge/link_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG=`pwd` 5 | cd ../ 6 | bcbio_python $ORIG/link_to_synapse.py bcbio-giab-joint giab-joint-workflow syn10466755 7 | cd $ORIG 8 | -------------------------------------------------------------------------------- /giab-joint/ga4gh_execution_challenge/link_to_synapse.py: -------------------------------------------------------------------------------- 1 | ../../NA12878-chr20/ga4gh_execution_challenge/link_to_synapse.py -------------------------------------------------------------------------------- /giab-joint/ga4gh_execution_challenge/upload_biodata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG=`pwd` 5 | cd ../ 6 | bcbio_python /mnt/work/bcbio/code/bcbio-nextgen/scripts/utils/upload_to_synapse.py bcbio-giab-joint biodata syn10466755 7 | cd $ORIG 8 | -------------------------------------------------------------------------------- /giab-joint/giab-joint-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 6 | - sentinel_inputs=regions__callable:var,regions__nblock:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,description:var,resources:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - combine_sample_regions 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 17245 21 | ramMin: 3584 22 | tmpdirMin: 16221 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: bedtools 26 | specs: 27 | - https://anaconda.org/bioconda/bedtools 28 | - package: htslib 29 | specs: 30 | - https://anaconda.org/bioconda/htslib 31 | - package: gatk4 32 | specs: 33 | - https://anaconda.org/bioconda/gatk4 34 | - package: gatk 35 | specs: 36 | - https://anaconda.org/bioconda/gatk 37 | inputs: 38 | - id: regions__callable 39 | type: 40 | items: File 41 | type: array 42 | - id: regions__nblock 43 | type: 44 | items: File 45 | type: array 46 | - id: config__algorithm__nomap_split_size 47 | type: 48 | items: long 49 | type: array 50 | - id: config__algorithm__nomap_split_targets 51 | type: 52 | items: long 53 | type: array 54 | - id: reference__fasta__base 55 | secondaryFiles: 56 | - .fai 57 | - ^.dict 58 | type: 59 | items: File 60 | type: array 61 | - id: description 62 | type: 63 | items: string 64 | type: array 65 | - id: resources 66 | type: 67 | items: string 68 | type: array 69 | outputs: 70 | - id: config__algorithm__callable_regions 71 | type: 72 | items: File 73 | type: array 74 | - id: config__algorithm__non_callable_regions 75 | type: 76 | items: File 77 | type: array 78 | - id: config__algorithm__callable_count 79 | type: 80 | items: int 81 | type: array 82 | requirements: 83 | - class: InlineJavascriptRequirement 84 | - class: InitialWorkDirRequirement 85 | listing: 86 | - entry: $(JSON.stringify(inputs)) 87 | entryname: cwl.inputs.json 88 | -------------------------------------------------------------------------------- /giab-joint/giab-joint-workflow/steps/prep_align_inputs.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=single-split 5 | - sentinel_outputs=process_alignment_rec:files;config__algorithm__quality_format;align_split 6 | - sentinel_inputs=alignment_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_align_inputs 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 68549 21 | ramMin: 3584 22 | tmpdirMin: 67525 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: grabix 26 | specs: 27 | - https://anaconda.org/bioconda/grabix 28 | - package: htslib 29 | specs: 30 | - https://anaconda.org/bioconda/htslib 31 | - package: biobambam 32 | specs: 33 | - https://anaconda.org/bioconda/biobambam 34 | inputs: 35 | - id: alignment_rec 36 | type: 37 | fields: 38 | - name: description 39 | type: string 40 | - name: resources 41 | type: string 42 | - name: config__algorithm__align_split_size 43 | type: 44 | - 'null' 45 | - string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: rgnames__lb 49 | type: 50 | - 'null' 51 | - string 52 | - name: rgnames__rg 53 | type: string 54 | - name: rgnames__lane 55 | type: string 56 | - name: reference__bwa__indexes 57 | type: File 58 | - name: files 59 | type: 60 | items: File 61 | type: array 62 | - name: config__algorithm__aligner 63 | type: string 64 | - name: rgnames__pl 65 | type: string 66 | - name: config__algorithm__mark_duplicates 67 | type: 68 | - string 69 | - 'null' 70 | - boolean 71 | - name: rgnames__pu 72 | type: string 73 | - name: rgnames__sample 74 | type: string 75 | name: alignment_rec 76 | type: record 77 | outputs: 78 | - id: process_alignment_rec 79 | type: 80 | items: 81 | fields: 82 | - name: files 83 | type: 84 | items: File 85 | type: array 86 | - name: config__algorithm__quality_format 87 | type: string 88 | - name: align_split 89 | type: 90 | - string 91 | - 'null' 92 | name: process_alignment_rec 93 | type: record 94 | type: array 95 | requirements: 96 | - class: InlineJavascriptRequirement 97 | - class: InitialWorkDirRequirement 98 | listing: 99 | - entry: $(JSON.stringify(inputs)) 100 | entryname: cwl.inputs.json 101 | -------------------------------------------------------------------------------- /giab-joint/giab-joint-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-parallel 5 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 6 | - sentinel_inputs=prep_samples_rec:record 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_samples 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 17266 21 | ramMin: 3584 22 | tmpdirMin: 16242 23 | - class: SoftwareRequirement 24 | packages: 25 | - package: htslib 26 | specs: 27 | - https://anaconda.org/bioconda/htslib 28 | - package: bedtools 29 | specs: 30 | - https://anaconda.org/bioconda/bedtools 31 | - package: pythonpy 32 | specs: 33 | - https://anaconda.org/bioconda/pythonpy 34 | inputs: 35 | - id: prep_samples_rec 36 | type: 37 | fields: 38 | - name: description 39 | type: string 40 | - name: resources 41 | type: string 42 | - name: reference__fasta__base 43 | type: File 44 | - name: config__algorithm__coverage 45 | type: File 46 | - name: config__algorithm__variant_regions 47 | type: File 48 | name: prep_samples_rec 49 | type: record 50 | outputs: 51 | - id: config__algorithm__variant_regions 52 | type: 53 | - File 54 | - 'null' 55 | - id: config__algorithm__variant_regions_merged 56 | type: 57 | - File 58 | - 'null' 59 | - id: config__algorithm__variant_regions_orig 60 | type: 61 | - File 62 | - 'null' 63 | - id: config__algorithm__coverage 64 | type: 65 | - File 66 | - 'null' 67 | - id: config__algorithm__coverage_merged 68 | type: 69 | - File 70 | - 'null' 71 | - id: config__algorithm__coverage_orig 72 | type: 73 | - File 74 | - 'null' 75 | - id: config__algorithm__seq2c_bed_ready 76 | type: 77 | - File 78 | - 'null' 79 | requirements: 80 | - class: InlineJavascriptRequirement 81 | - class: InitialWorkDirRequirement 82 | listing: 83 | - entry: $(JSON.stringify(inputs)) 84 | entryname: cwl.inputs.json 85 | -------------------------------------------------------------------------------- /giab-joint/giab-joint-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | arguments: 2 | - position: 0 3 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 4 | - sentinel_parallel=multi-combined 5 | - sentinel_outputs=prep_samples_rec:description;resources;reference__fasta__base;config__algorithm__coverage;config__algorithm__variant_regions 6 | - sentinel_inputs=config__algorithm__coverage:var,config__algorithm__variant_regions:var,reference__fasta__base:var,description:var,resources:var 7 | baseCommand: 8 | - bcbio_nextgen.py 9 | - runfn 10 | - prep_samples_to_rec 11 | - cwl 12 | class: CommandLineTool 13 | cwlVersion: v1.0 14 | hints: 15 | - class: DockerRequirement 16 | dockerImageId: quay.io/bcbio/bcbio-vc 17 | dockerPull: quay.io/bcbio/bcbio-vc 18 | - class: ResourceRequirement 19 | coresMin: 1 20 | outdirMin: 17266 21 | ramMin: 3584 22 | tmpdirMin: 16242 23 | inputs: 24 | - id: config__algorithm__coverage 25 | type: 26 | items: File 27 | type: array 28 | - id: config__algorithm__variant_regions 29 | type: 30 | items: File 31 | type: array 32 | - id: reference__fasta__base 33 | secondaryFiles: 34 | - .fai 35 | - ^.dict 36 | type: 37 | items: File 38 | type: array 39 | - id: description 40 | type: 41 | items: string 42 | type: array 43 | - id: resources 44 | type: 45 | items: string 46 | type: array 47 | outputs: 48 | - id: prep_samples_rec 49 | type: 50 | items: 51 | fields: 52 | - name: description 53 | type: string 54 | - name: resources 55 | type: string 56 | - name: reference__fasta__base 57 | type: File 58 | - name: config__algorithm__coverage 59 | type: File 60 | - name: config__algorithm__variant_regions 61 | type: File 62 | name: prep_samples_rec 63 | type: record 64 | type: array 65 | requirements: 66 | - class: InlineJavascriptRequirement 67 | - class: InitialWorkDirRequirement 68 | listing: 69 | - entry: $(JSON.stringify(inputs)) 70 | entryname: cwl.inputs.json 71 | -------------------------------------------------------------------------------- /giab-joint/giab-joint.csv: -------------------------------------------------------------------------------- 1 | samplename,description,batch,variant_regions,coverage,validate,validate_regions 2 | NA12878_R1.fq.gz;NA12878_R2.fq.gz,NA12878,gj1,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,hg38/validation/giab-NA12878/truth_small_variants.vcf.gz,hg38/validation/giab-NA12878/truth_regions.bed 3 | NA24385_R1.fq.gz;NA24385_R2.fq.gz,NA24385,gj1,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,hg38/validation/giab-NA24385/truth_small_variants.vcf.gz,hg38/validation/giab-NA24385/truth_regions.bed 4 | NA24631_R1.fq.gz;NA24631_R2.fq.gz,NA24631,gj1,Exome-AZ_V2_pluschr20-hg38.bed,Exome-AZ_V2_pluschr20-hg38.bed,hg38/validation/giab-NA24631/truth_small_variants.vcf.gz,hg38/validation/giab-NA24631/truth_regions.bed 5 | -------------------------------------------------------------------------------- /giab-joint/joint-template.yaml: -------------------------------------------------------------------------------- 1 | #resources: 2 | # sentieon: 3 | # keyfile: /mnt/work/bcbio/license/Harvard_Chapman_eval.lic 4 | details: 5 | - algorithm: 6 | aligner: bwa 7 | recalibrate: true 8 | #variantcaller: [gatk-haplotype, strelka2, haplotyper] 9 | variantcaller: [gatk-haplotype, strelka2] 10 | tools_on: [gatk4, gvcf] 11 | tools_off: [gemini] 12 | analysis: variant2 13 | genome_build: hg38 14 | -------------------------------------------------------------------------------- /giab-joint/run_bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-joint 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun bunny --no-container $PNAME-workflow 8 | 9 | # with Docker 10 | #bcbio_vm.py cwlrun bunny $PNAME-workflow 11 | -------------------------------------------------------------------------------- /giab-joint/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=giab-joint 5 | 6 | CWD=`pwd` 7 | rm -rf $PNAME-workflow 8 | bcbio_vm.py template --systemconfig bcbio_system.yaml joint-template.yaml $PNAME.csv 9 | bcbio_vm.py cwl --systemconfig bcbio_system.yaml $PNAME/config/$PNAME.yaml 10 | sed -i "s#$CWD/biodata/#../biodata/#" $PNAME-workflow/main-$PNAME-samples.json 11 | -------------------------------------------------------------------------------- /giab-joint/run_toil.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | #synapse get -r synXXX 4 | PNAME=giab-joint 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun toil --no-container $PNAME-workflow 8 | 9 | # with Docker 10 | #bcbio_vm.py cwlrun toil $PNAME-workflow 11 | -------------------------------------------------------------------------------- /pgp/bcbio_system-arvados.yaml: -------------------------------------------------------------------------------- 1 | # Reference genomes: 2 | # https://workbench.su92l.arvadosapi.com/collections/su92l-4zz18-3p00f79y4p535ia 3 | # Portable input BAMs: 4 | # https://workbench.su92l.arvadosapi.com/collections/su92l-4zz18-ihm3wrgyuwcmsx1 5 | arvados: 6 | reference: su92l-4zz18-3p00f79y4p535ia 7 | input: [su92l-4zz18-ihm3wrgyuwcmsx1] 8 | resources: 9 | default: {cores: 16, memory: 3500M, jvm_opts: [-Xms1g, -Xmx3500m]} 10 | -------------------------------------------------------------------------------- /pgp/pgp_sv_hla.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | details: 3 | - files: huD57BBF.bam 4 | description: huD57BBF 5 | analysis: variant 6 | genome_build: hg38 7 | algorithm: 8 | aligner: bwa 9 | variantcaller: gatk-haplotype 10 | svcaller: [manta, lumpy, cnvkit] 11 | hlacaller: optitype 12 | align_split_size: false 13 | nomap_split_targets: 20 14 | exclude_regions: [altcontigs, polyx] 15 | -------------------------------------------------------------------------------- /pgp/run_arvados.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=pgp_sv_hla 5 | PID=su92l-j7d0g-eoibug3nrwg8ysj 6 | 7 | bcbio_vm.py cwlrun arvados ${PNAME}-workflow -- --project-uuid $PID 8 | #bcbio_vm.py cwlrun arvados ${PNAME}-workflow -- --project-uuid $PID --ignore-docker-for-reuse 9 | -------------------------------------------------------------------------------- /pgp/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | PNAME=pgp_sv_hla 4 | bcbio_vm.py cwl --systemconfig bcbio_system-arvados.yaml $PNAME.yaml 5 | -------------------------------------------------------------------------------- /pgp/scripts/extract_veritas_pgp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Explore PGP participant data from untap sqlite database. 3 | 4 | Extracts recently sequenced Illumina data from a diverse set of participants 5 | with other associated data. 6 | """ 7 | from __future__ import print_function 8 | import sys 9 | 10 | import pandas as pd 11 | import sqlite3 12 | 13 | try: 14 | import arvados 15 | except ImportError: 16 | arvados = None 17 | 18 | def main(sqlite_db): 19 | # Query for recent Veritas sequenced samples 20 | query = ("SELECT uploaded_data.human_id, date, name " 21 | "FROM uploaded_data WHERE " 22 | "data_type == 'Veritas Genetics' AND " 23 | "uploaded_data.name GLOB '*VCF'") 24 | conn = sqlite3.connect(sys.argv[1]) 25 | df = pd.read_sql_query(query, conn) 26 | 27 | # Uniquify by sample 28 | df.sort_values("date", inplace=True) 29 | df.drop_duplicates(inplace=True) 30 | print(df.describe()) 31 | 32 | # Load files in Arvados BAM collection, if arvados client installed 33 | if arvados: 34 | api = arvados.api(host="su92l.arvadosapi.com", token="42yz0fp9s19djsgkae33khevpzq4or1ile5o7khofzw388lvfl") 35 | cr = arvados.CollectionReader("su92l-4zz18-1rqqi0kpkfmfite", api) 36 | bam_coll = [(x.name, x.size()) for x in cr.all_files()] 37 | else: 38 | bam_coll = None 39 | 40 | # Find recent samples with more than 1 data type, emphasizing diverse samples 41 | # Require higher depth coverage > 50Gb 42 | for sample in df["human_id"]: 43 | query = ("SELECT data_type, date FROM uploaded_data WHERE human_id='%s'" % sample) 44 | df = pd.read_sql_query(query, conn) 45 | if len(df.data_type.unique()) > 1: 46 | bam_size = find_bam_size(sample, bam_coll) if bam_coll else 100 47 | if bam_size > 50: 48 | query = ("SELECT * from demographics WHERE human_id='%s'"% sample) 49 | dfd = pd.read_sql_query(query, conn) 50 | if (len(dfd) and ((dfd["gender"][0] not in ["", "Male"]) 51 | or (dfd["race"][0] not in ["", "White", "Caucasian (White)"]))): 52 | demo = "%s %s" % (dfd["gender"][0], dfd["race"][0]) 53 | elif len(dfd) == 0: 54 | demo = "No demographics" 55 | print(sample, "%sGb" % bam_size, demo, list(set(df["data_type"])), list(set(df["date"]))) 56 | 57 | def find_bam_size(huid, coll): 58 | """Find PGP BAM size, in Gb from the Arvados collection. 59 | """ 60 | return int(sum([s for (n, s) in coll if n.startswith(huid)]) / (1024.0 * 1024.0 * 1024.0)) 61 | 62 | if __name__ == "__main__": 63 | main(sys.argv[1:]) 64 | -------------------------------------------------------------------------------- /somatic-giab-mix/bcbio_system.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata/regions 5 | - biodata/giab/na12878-na24385-somatic 6 | resources: 7 | default: {cores: 16, memory: 3750M, jvm_opts: [-Xms750m, -Xmx3750m]} 8 | -------------------------------------------------------------------------------- /somatic-giab-mix/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # regions 6 | mkdir -p biodata/regions 7 | cd biodata/regions 8 | synapse get -r syn10468188 9 | cd $ORIG_DIR 10 | # GRCh37 11 | mkdir -p biodata/collections/GRCh37 12 | cd biodata/collections/GRCh37 13 | synapse get -r syn12027897 14 | cd $ORIG_DIR 15 | # giab mix sample 16 | mkdir -p biodata/giab/na12878-na24385-somatic 17 | cd biodata/giab/na12878-na24385-somatic 18 | synapse get -r syn12028788 19 | cd $ORIG_DIR 20 | # cleanup synapse files 21 | cd biodata 22 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 23 | cd $ORIG_DIR 24 | -------------------------------------------------------------------------------- /somatic-giab-mix/run_bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=somatic-giab-mix 5 | 6 | # local bcbio install 7 | #bcbio_vm.py cwlrun bunny --no-container $PNAME-workflow 8 | 9 | # with Docker 10 | bcbio_vm.py cwlrun bunny $PNAME-workflow 11 | -------------------------------------------------------------------------------- /somatic-giab-mix/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=somatic-giab-mix 5 | TEMPLATE=somatic 6 | 7 | CWD=`pwd` 8 | rm -rf $PNAME/config 9 | rm -rf $PNAME-workflow 10 | bcbio_vm.py template --systemconfig bcbio_system.yaml ${TEMPLATE}-template.yaml $PNAME.csv 11 | bcbio_vm.py cwl --systemconfig bcbio_system.yaml $PNAME/config/$PNAME.yaml 12 | sed -i "s#$CWD/biodata/#../biodata/#" $PNAME-workflow/main-$PNAME-samples.json 13 | -------------------------------------------------------------------------------- /somatic-giab-mix/run_toil.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | PNAME=somatic-giab-mix 4 | 5 | # local bcbio install 6 | #bcbio_vm.py cwlrun toil --no-container $PNAME-workflow 7 | 8 | # with Docker 9 | bcbio_vm.py cwlrun toil $PNAME-workflow 10 | -------------------------------------------------------------------------------- /somatic-giab-mix/somatic-giab-mix-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,description:var,resources:var 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - combine_sample_regions 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 14875 24 | ramMin: 3840 25 | tmpdirMin: 6926 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3008 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: bedtools 31 | specs: 32 | - https://anaconda.org/bioconda/bedtools 33 | - package: htslib 34 | specs: 35 | - https://anaconda.org/bioconda/htslib 36 | - package: gatk4 37 | specs: 38 | - https://anaconda.org/bioconda/gatk4 39 | - package: gatk 40 | specs: 41 | - https://anaconda.org/bioconda/gatk 42 | - class: arv:APIRequirement 43 | inputs: 44 | - id: regions__callable 45 | type: 46 | items: 47 | - File 48 | - 'null' 49 | type: array 50 | - id: regions__nblock 51 | type: 52 | items: 53 | - File 54 | - 'null' 55 | type: array 56 | - id: metadata__batch 57 | type: 58 | items: string 59 | type: array 60 | - id: config__algorithm__nomap_split_size 61 | type: 62 | items: long 63 | type: array 64 | - id: config__algorithm__nomap_split_targets 65 | type: 66 | items: long 67 | type: array 68 | - id: reference__fasta__base 69 | secondaryFiles: 70 | - .fai 71 | - ^.dict 72 | type: 73 | items: File 74 | type: array 75 | - id: description 76 | type: 77 | items: string 78 | type: array 79 | - id: resources 80 | type: 81 | items: string 82 | type: array 83 | outputs: 84 | - id: config__algorithm__callable_regions 85 | type: 86 | items: File 87 | type: array 88 | - id: config__algorithm__non_callable_regions 89 | type: 90 | items: File 91 | type: array 92 | - id: config__algorithm__callable_count 93 | type: 94 | items: int 95 | type: array 96 | requirements: 97 | - class: InlineJavascriptRequirement 98 | - class: InitialWorkDirRequirement 99 | listing: 100 | - entry: $(JSON.stringify(inputs)) 101 | entryname: cwl.inputs.json 102 | -------------------------------------------------------------------------------- /somatic-giab-mix/somatic-giab-mix-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - multiqc_summary 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 56429 23 | ramMin: 3840 24 | tmpdirMin: 27703 25 | - class: dx:InputResourceRequirement 26 | indirMin: 1 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: multiqc 30 | specs: 31 | - https://anaconda.org/bioconda/multiqc 32 | - package: multiqc-bcbio 33 | specs: 34 | - https://anaconda.org/bioconda/multiqc-bcbio 35 | inputs: 36 | - id: qcout_rec 37 | type: 38 | items: 39 | fields: 40 | - name: summary__qc 41 | type: 42 | - File 43 | - 'null' 44 | - name: summary__metrics 45 | type: 46 | - string 47 | - 'null' 48 | - name: description 49 | type: string 50 | - name: genome_build 51 | type: string 52 | - name: config__algorithm__tools_off 53 | type: 54 | - 'null' 55 | - string 56 | - items: 57 | - 'null' 58 | - string 59 | type: array 60 | - name: config__algorithm__qc 61 | type: 62 | items: string 63 | type: array 64 | - name: config__algorithm__tools_on 65 | type: 66 | - 'null' 67 | - string 68 | - items: 69 | - 'null' 70 | - string 71 | type: array 72 | name: qcout_rec 73 | type: record 74 | type: array 75 | outputs: 76 | - id: summary__multiqc 77 | type: 78 | items: 79 | - File 80 | - 'null' 81 | type: array 82 | requirements: 83 | - class: InlineJavascriptRequirement 84 | - class: InitialWorkDirRequirement 85 | listing: 86 | - entry: $(JSON.stringify(inputs)) 87 | entryname: cwl.inputs.json 88 | -------------------------------------------------------------------------------- /somatic-giab-mix/somatic-giab-mix-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 14875 23 | ramMin: 3840 24 | tmpdirMin: 6926 25 | - class: dx:InputResourceRequirement 26 | indirMin: 3021 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: htslib 30 | specs: 31 | - https://anaconda.org/bioconda/htslib 32 | - package: bedtools 33 | specs: 34 | - https://anaconda.org/bioconda/bedtools 35 | - package: pythonpy 36 | specs: 37 | - https://anaconda.org/bioconda/pythonpy 38 | inputs: 39 | - id: prep_samples_rec 40 | type: 41 | fields: 42 | - name: description 43 | type: string 44 | - name: resources 45 | type: string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: config__algorithm__coverage 49 | type: File 50 | - name: config__algorithm__variant_regions 51 | type: File 52 | name: prep_samples_rec 53 | type: record 54 | outputs: 55 | - id: config__algorithm__variant_regions 56 | type: 57 | - File 58 | - 'null' 59 | - id: config__algorithm__variant_regions_merged 60 | type: 61 | - File 62 | - 'null' 63 | - id: config__algorithm__variant_regions_orig 64 | type: 65 | - File 66 | - 'null' 67 | - id: config__algorithm__coverage 68 | type: 69 | - File 70 | - 'null' 71 | - id: config__algorithm__coverage_merged 72 | type: 73 | - File 74 | - 'null' 75 | - id: config__algorithm__coverage_orig 76 | type: 77 | - File 78 | - 'null' 79 | - id: config__algorithm__seq2c_bed_ready 80 | type: 81 | - File 82 | - 'null' 83 | requirements: 84 | - class: InlineJavascriptRequirement 85 | - class: InitialWorkDirRequirement 86 | listing: 87 | - entry: $(JSON.stringify(inputs)) 88 | entryname: cwl.inputs.json 89 | -------------------------------------------------------------------------------- /somatic-giab-mix/somatic-giab-mix-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:description;resources;reference__fasta__base;config__algorithm__coverage;config__algorithm__variant_regions 8 | - sentinel_inputs=config__algorithm__coverage:var,config__algorithm__variant_regions:var,reference__fasta__base:var,description:var,resources:var 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples_to_rec 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 14875 23 | ramMin: 3840 24 | tmpdirMin: 6926 25 | - class: dx:InputResourceRequirement 26 | indirMin: 0 27 | inputs: 28 | - id: config__algorithm__coverage 29 | type: 30 | items: File 31 | type: array 32 | - id: config__algorithm__variant_regions 33 | type: 34 | items: File 35 | type: array 36 | - id: reference__fasta__base 37 | secondaryFiles: 38 | - .fai 39 | - ^.dict 40 | type: 41 | items: File 42 | type: array 43 | - id: description 44 | type: 45 | items: string 46 | type: array 47 | - id: resources 48 | type: 49 | items: string 50 | type: array 51 | outputs: 52 | - id: prep_samples_rec 53 | type: 54 | items: 55 | fields: 56 | - name: description 57 | type: string 58 | - name: resources 59 | type: string 60 | - name: reference__fasta__base 61 | type: File 62 | - name: config__algorithm__coverage 63 | type: File 64 | - name: config__algorithm__variant_regions 65 | type: File 66 | name: prep_samples_rec 67 | type: record 68 | type: array 69 | requirements: 70 | - class: InlineJavascriptRequirement 71 | - class: InitialWorkDirRequirement 72 | listing: 73 | - entry: $(JSON.stringify(inputs)) 74 | entryname: cwl.inputs.json 75 | -------------------------------------------------------------------------------- /somatic-giab-mix/somatic-giab-mix.csv: -------------------------------------------------------------------------------- 1 | samplename,description,batch,phenotype,variant_regions,coverage,validate,validate_regions 2 | NA12878-NA24385-tumor_R1.fq.gz;NA12878-NA24385-tumor_R2.fq.gz,giabmix-tumor,gm1,tumor,Exome-AZ_V2_pluschr20-GRCh37.bed,Exome-AZ_V2_pluschr20-GRCh37.bed,GRCh37/validation/giab-NA12878-NA24385-somatic/truth_small_variants.vcf.gz,GRCh37/validation/giab-NA12878-NA24385-somatic/truth_regions.bed 3 | NA24385-normal_R1.fq.gz;NA24385-normal_R2.fq.gz,giabmix-normal,gm1,normal,Exome-AZ_V2_pluschr20-GRCh37.bed,Exome-AZ_V2_pluschr20-GRCh37.bed,, 4 | -------------------------------------------------------------------------------- /somatic-giab-mix/somatic-template.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - algorithm: 3 | trim_reads: atropos 4 | #adapters: polyx 5 | #exclude_regions: [polyx, highdepth] 6 | aligner: minimap2 7 | align_split_size: false 8 | nomap_split_targets: 30 9 | variantcaller: [vardict, mutect2, strelka2] 10 | analysis: variant2 11 | genome_build: GRCh37 12 | -------------------------------------------------------------------------------- /somatic-lowfreq/bcbio_system.yaml: -------------------------------------------------------------------------------- 1 | local: 2 | ref: biodata/collections 3 | inputs: 4 | - biodata 5 | resources: 6 | default: {cores: 8, memory: 3500M, jvm_opts: [-Xms750m, -Xmx3500m]} 7 | -------------------------------------------------------------------------------- /somatic-lowfreq/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | ORIG_DIR=`pwd` 5 | # pisces 6 | mkdir -p biodata/pisces 7 | cd biodata/pisces 8 | synapse get -r syn12255556 9 | cd $ORIG_DIR 10 | mkdir -p biodata/smcounter2 11 | cd biodata/smcounter2 12 | synapse get -r syn12333650 13 | # GRCh37 14 | mkdir -p biodata/collections/GRCh37 15 | cd biodata/collections/GRCh37 16 | synapse get -r syn12027897 17 | cd $ORIG_DIR 18 | # cleanup synapse files 19 | cd biodata 20 | find . -name "SYNAPSE_METADATA_MANIFEST.tsv" -exec rm -f {} \; 21 | cd $ORIG_DIR 22 | 23 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-ras-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - combine_sample_regions 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 1144 24 | ramMin: 3584 25 | tmpdirMin: 60 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3008 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: bedtools 31 | specs: 32 | - https://anaconda.org/bioconda/bedtools 33 | - package: htslib 34 | specs: 35 | - https://anaconda.org/bioconda/htslib 36 | - package: gatk4 37 | specs: 38 | - https://anaconda.org/bioconda/gatk4 39 | - package: gatk 40 | specs: 41 | - https://anaconda.org/bioconda/gatk 42 | - class: arv:APIRequirement 43 | inputs: 44 | - id: regions__callable 45 | type: 46 | items: 47 | - File 48 | - 'null' 49 | type: array 50 | - id: regions__nblock 51 | type: 52 | items: 53 | - File 54 | - 'null' 55 | type: array 56 | - id: metadata__batch 57 | type: 58 | items: 59 | - 'null' 60 | - string 61 | type: array 62 | - id: config__algorithm__nomap_split_size 63 | type: 64 | items: long 65 | type: array 66 | - id: config__algorithm__nomap_split_targets 67 | type: 68 | items: long 69 | type: array 70 | - id: reference__fasta__base 71 | secondaryFiles: 72 | - .fai 73 | - ^.dict 74 | type: 75 | items: File 76 | type: array 77 | - id: resources 78 | type: 79 | items: string 80 | type: array 81 | - id: description 82 | type: 83 | items: string 84 | type: array 85 | outputs: 86 | - id: config__algorithm__callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__non_callable_regions 91 | type: 92 | items: File 93 | type: array 94 | - id: config__algorithm__callable_count 95 | type: 96 | items: int 97 | type: array 98 | requirements: 99 | - class: InlineJavascriptRequirement 100 | - class: InitialWorkDirRequirement 101 | listing: 102 | - entry: $(JSON.stringify(inputs)) 103 | entryname: cwl.inputs.json 104 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-ras-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - multiqc_summary 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 1507 23 | ramMin: 3584 24 | tmpdirMin: 242 25 | - class: dx:InputResourceRequirement 26 | indirMin: 1 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: multiqc 30 | specs: 31 | - https://anaconda.org/bioconda/multiqc 32 | - package: multiqc-bcbio 33 | specs: 34 | - https://anaconda.org/bioconda/multiqc-bcbio 35 | inputs: 36 | - id: qcout_rec 37 | type: 38 | items: 39 | fields: 40 | - name: summary__qc 41 | type: 42 | - File 43 | - 'null' 44 | - name: summary__metrics 45 | type: 46 | - string 47 | - 'null' 48 | - name: description 49 | type: string 50 | - name: genome_build 51 | type: string 52 | - name: config__algorithm__tools_off 53 | type: 54 | items: string 55 | type: array 56 | - name: config__algorithm__qc 57 | type: 58 | items: string 59 | type: array 60 | - name: config__algorithm__tools_on 61 | type: 62 | - 'null' 63 | - string 64 | - items: 65 | - 'null' 66 | - string 67 | type: array 68 | name: qcout_rec 69 | type: record 70 | type: array 71 | outputs: 72 | - id: summary__multiqc 73 | type: 74 | items: 75 | - File 76 | - 'null' 77 | type: array 78 | requirements: 79 | - class: InlineJavascriptRequirement 80 | - class: InitialWorkDirRequirement 81 | listing: 82 | - entry: $(JSON.stringify(inputs)) 83 | entryname: cwl.inputs.json 84 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-ras-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 1144 23 | ramMin: 3584 24 | tmpdirMin: 60 25 | - class: dx:InputResourceRequirement 26 | indirMin: 3008 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: htslib 30 | specs: 31 | - https://anaconda.org/bioconda/htslib 32 | - package: bedtools 33 | specs: 34 | - https://anaconda.org/bioconda/bedtools 35 | - package: pythonpy 36 | specs: 37 | - https://anaconda.org/bioconda/pythonpy 38 | inputs: 39 | - id: prep_samples_rec 40 | type: 41 | fields: 42 | - name: resources 43 | type: string 44 | - name: description 45 | type: string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: config__algorithm__variant_regions 49 | type: File 50 | name: prep_samples_rec 51 | type: record 52 | outputs: 53 | - id: config__algorithm__variant_regions 54 | type: 55 | - File 56 | - 'null' 57 | - id: config__algorithm__variant_regions_merged 58 | type: 59 | - File 60 | - 'null' 61 | - id: config__algorithm__variant_regions_orig 62 | type: 63 | - File 64 | - 'null' 65 | - id: config__algorithm__coverage 66 | type: 67 | - File 68 | - 'null' 69 | - id: config__algorithm__coverage_merged 70 | type: 71 | - File 72 | - 'null' 73 | - id: config__algorithm__coverage_orig 74 | type: 75 | - File 76 | - 'null' 77 | - id: config__algorithm__seq2c_bed_ready 78 | type: 79 | - File 80 | - 'null' 81 | requirements: 82 | - class: InlineJavascriptRequirement 83 | - class: InitialWorkDirRequirement 84 | listing: 85 | - entry: $(JSON.stringify(inputs)) 86 | entryname: cwl.inputs.json 87 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-ras-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;config__algorithm__variant_regions 8 | - sentinel_inputs=config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples_to_rec 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 1144 23 | ramMin: 3584 24 | tmpdirMin: 60 25 | - class: dx:InputResourceRequirement 26 | indirMin: 0 27 | inputs: 28 | - id: config__algorithm__variant_regions 29 | type: 30 | items: File 31 | type: array 32 | - id: reference__fasta__base 33 | secondaryFiles: 34 | - .fai 35 | - ^.dict 36 | type: 37 | items: File 38 | type: array 39 | - id: resources 40 | type: 41 | items: string 42 | type: array 43 | - id: description 44 | type: 45 | items: string 46 | type: array 47 | outputs: 48 | - id: prep_samples_rec 49 | type: 50 | items: 51 | fields: 52 | - name: resources 53 | type: string 54 | - name: description 55 | type: string 56 | - name: reference__fasta__base 57 | type: File 58 | - name: config__algorithm__variant_regions 59 | type: File 60 | name: prep_samples_rec 61 | type: record 62 | type: array 63 | requirements: 64 | - class: InlineJavascriptRequirement 65 | - class: InitialWorkDirRequirement 66 | listing: 67 | - entry: $(JSON.stringify(inputs)) 68 | entryname: cwl.inputs.json 69 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-titr-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - combine_sample_regions 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 1099 24 | ramMin: 3584 25 | tmpdirMin: 38 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3008 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: bedtools 31 | specs: 32 | - https://anaconda.org/bioconda/bedtools 33 | - package: htslib 34 | specs: 35 | - https://anaconda.org/bioconda/htslib 36 | - package: gatk4 37 | specs: 38 | - https://anaconda.org/bioconda/gatk4 39 | - package: gatk 40 | specs: 41 | - https://anaconda.org/bioconda/gatk 42 | - class: arv:APIRequirement 43 | inputs: 44 | - id: regions__callable 45 | type: 46 | items: 47 | - File 48 | - 'null' 49 | type: array 50 | - id: regions__nblock 51 | type: 52 | items: 53 | - File 54 | - 'null' 55 | type: array 56 | - id: metadata__batch 57 | type: 58 | items: 59 | - 'null' 60 | - string 61 | type: array 62 | - id: config__algorithm__nomap_split_size 63 | type: 64 | items: long 65 | type: array 66 | - id: config__algorithm__nomap_split_targets 67 | type: 68 | items: long 69 | type: array 70 | - id: reference__fasta__base 71 | secondaryFiles: 72 | - .fai 73 | - ^.dict 74 | type: 75 | items: File 76 | type: array 77 | - id: resources 78 | type: 79 | items: string 80 | type: array 81 | - id: description 82 | type: 83 | items: string 84 | type: array 85 | outputs: 86 | - id: config__algorithm__callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__non_callable_regions 91 | type: 92 | items: File 93 | type: array 94 | - id: config__algorithm__callable_count 95 | type: 96 | items: int 97 | type: array 98 | requirements: 99 | - class: InlineJavascriptRequirement 100 | - class: InitialWorkDirRequirement 101 | listing: 102 | - entry: $(JSON.stringify(inputs)) 103 | entryname: cwl.inputs.json 104 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-titr-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - multiqc_summary 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 1325 23 | ramMin: 3584 24 | tmpdirMin: 151 25 | - class: dx:InputResourceRequirement 26 | indirMin: 1 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: multiqc 30 | specs: 31 | - https://anaconda.org/bioconda/multiqc 32 | - package: multiqc-bcbio 33 | specs: 34 | - https://anaconda.org/bioconda/multiqc-bcbio 35 | inputs: 36 | - id: qcout_rec 37 | type: 38 | items: 39 | fields: 40 | - name: summary__qc 41 | type: 42 | - File 43 | - 'null' 44 | - name: summary__metrics 45 | type: 46 | - string 47 | - 'null' 48 | - name: description 49 | type: string 50 | - name: genome_build 51 | type: string 52 | - name: config__algorithm__tools_off 53 | type: 54 | items: string 55 | type: array 56 | - name: config__algorithm__qc 57 | type: 58 | items: string 59 | type: array 60 | - name: config__algorithm__tools_on 61 | type: 62 | - 'null' 63 | - string 64 | - items: 65 | - 'null' 66 | - string 67 | type: array 68 | name: qcout_rec 69 | type: record 70 | type: array 71 | outputs: 72 | - id: summary__multiqc 73 | type: 74 | items: 75 | - File 76 | - 'null' 77 | type: array 78 | requirements: 79 | - class: InlineJavascriptRequirement 80 | - class: InitialWorkDirRequirement 81 | listing: 82 | - entry: $(JSON.stringify(inputs)) 83 | entryname: cwl.inputs.json 84 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-titr-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 1099 23 | ramMin: 3584 24 | tmpdirMin: 38 25 | - class: dx:InputResourceRequirement 26 | indirMin: 3008 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: htslib 30 | specs: 31 | - https://anaconda.org/bioconda/htslib 32 | - package: bedtools 33 | specs: 34 | - https://anaconda.org/bioconda/bedtools 35 | - package: pythonpy 36 | specs: 37 | - https://anaconda.org/bioconda/pythonpy 38 | inputs: 39 | - id: prep_samples_rec 40 | type: 41 | fields: 42 | - name: resources 43 | type: string 44 | - name: description 45 | type: string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: config__algorithm__variant_regions 49 | type: File 50 | name: prep_samples_rec 51 | type: record 52 | outputs: 53 | - id: config__algorithm__variant_regions 54 | type: 55 | - File 56 | - 'null' 57 | - id: config__algorithm__variant_regions_merged 58 | type: 59 | - File 60 | - 'null' 61 | - id: config__algorithm__variant_regions_orig 62 | type: 63 | - File 64 | - 'null' 65 | - id: config__algorithm__coverage 66 | type: 67 | - File 68 | - 'null' 69 | - id: config__algorithm__coverage_merged 70 | type: 71 | - File 72 | - 'null' 73 | - id: config__algorithm__coverage_orig 74 | type: 75 | - File 76 | - 'null' 77 | - id: config__algorithm__seq2c_bed_ready 78 | type: 79 | - File 80 | - 'null' 81 | requirements: 82 | - class: InlineJavascriptRequirement 83 | - class: InitialWorkDirRequirement 84 | listing: 85 | - entry: $(JSON.stringify(inputs)) 86 | entryname: cwl.inputs.json 87 | -------------------------------------------------------------------------------- /somatic-lowfreq/pisces-titr-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;config__algorithm__variant_regions 8 | - sentinel_inputs=config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples_to_rec 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 1099 23 | ramMin: 3584 24 | tmpdirMin: 38 25 | - class: dx:InputResourceRequirement 26 | indirMin: 0 27 | inputs: 28 | - id: config__algorithm__variant_regions 29 | type: 30 | items: File 31 | type: array 32 | - id: reference__fasta__base 33 | secondaryFiles: 34 | - .fai 35 | - ^.dict 36 | type: 37 | items: File 38 | type: array 39 | - id: resources 40 | type: 41 | items: string 42 | type: array 43 | - id: description 44 | type: 45 | items: string 46 | type: array 47 | outputs: 48 | - id: prep_samples_rec 49 | type: 50 | items: 51 | fields: 52 | - name: resources 53 | type: string 54 | - name: description 55 | type: string 56 | - name: reference__fasta__base 57 | type: File 58 | - name: config__algorithm__variant_regions 59 | type: File 60 | name: prep_samples_rec 61 | type: record 62 | type: array 63 | requirements: 64 | - class: InlineJavascriptRequirement 65 | - class: InitialWorkDirRequirement 66 | listing: 67 | - entry: $(JSON.stringify(inputs)) 68 | entryname: cwl.inputs.json 69 | -------------------------------------------------------------------------------- /somatic-lowfreq/prepare_inputs/clean_truth_sets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Clean Pisces truth sets, creating ready to compare VCFs. 3 | """ 4 | import glob 5 | import os 6 | import subprocess 7 | 8 | from bcbio.variation import vcfutils 9 | 10 | in_vcf_dir = os.path.join("pisces", "truth_raw") 11 | out_vcf_dir = os.path.join("pisces", "truth") 12 | in_region_dir = os.path.join("pisces", "regions_raw") 13 | out_region_dir = os.path.join("pisces", "regions") 14 | 15 | def clean_ras(): 16 | """Add VCF headers to RAS VCFs and bgzip and tabix. 17 | """ 18 | for in_vcf in (x for x in glob.glob(os.path.join(in_vcf_dir, "*.vcf")) if x.find("NA1287") == -1): 19 | out_vcf = os.path.join(out_vcf_dir, os.path.join(os.path.basename(in_vcf).replace(".txt", ""))) 20 | if not os.path.exists(out_vcf) and not os.path.exists(out_vcf + ".gz"): 21 | vcfutils.write_empty_vcf(out_vcf, samples=[os.path.basename(in_vcf).replace(".txt.vcf", "")]) 22 | with open(out_vcf, "a") as out_handle: 23 | with open(in_vcf) as in_handle: 24 | for line in in_handle: 25 | if line.startswith("chr"): 26 | line = line[3:] 27 | out_handle.write(line) 28 | vcfutils.bgzip_and_index(out_vcf) 29 | 30 | def clean_titration(): 31 | """Subset to interval regions and bgzip/tabix. 32 | """ 33 | region_bed = os.path.join(in_region_dir, "Intervals_TSAVP_Titr.bed") 34 | for in_vcf in glob.glob(os.path.join(in_vcf_dir, "NA1287*.vcf")): 35 | out_vcf = os.path.join(out_vcf_dir, "%s.gz" % os.path.join(os.path.basename(in_vcf))) 36 | if not os.path.exists(out_vcf): 37 | cmd = ("bcftools view {in_vcf} -T {region_bed} | grep -v '##contig' | " 38 | "sed 's/^chr//g' | bgzip -c > {out_vcf}") 39 | subprocess.check_call(cmd.format(**locals()), shell=True) 40 | vcfutils.bgzip_and_index(out_vcf) 41 | 42 | def fix_regions(): 43 | for in_bed in glob.glob(os.path.join(in_region_dir, "*.bed")): 44 | out_bed = os.path.join(out_region_dir, os.path.basename(in_bed)) 45 | if not os.path.exists(out_bed): 46 | with open(in_bed) as in_handle: 47 | with open(out_bed, "w") as out_handle: 48 | for line in in_handle: 49 | if line.startswith("chr"): 50 | line = line[3:] 51 | out_handle.write(line) 52 | 53 | 54 | clean_titration() 55 | clean_ras() 56 | fix_regions() 57 | -------------------------------------------------------------------------------- /somatic-lowfreq/prepare_inputs/prepare_bcbio_inputs.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | """Prepare bcbio input sample YAML files for analysis. 3 | """ 4 | import copy 5 | import glob 6 | import os 7 | 8 | import yaml 9 | 10 | base = {"analysis": "variant", "genome_build": "GRCh37", 11 | "algorithm": { 12 | "aligner": "minimap2", 13 | "variantcaller": ["vardict", "freebayes", "pisces"], 14 | "validate_method": "rtg-squash-ploidy", 15 | "tools_off": ["gemini", "tumoronly-prioritization"], 16 | "effects": False, 17 | "min_allele_fraction": 1 18 | }, 19 | "metadata": {"phenotype": "tumor"}} 20 | 21 | def write_sample_yaml(out, fname): 22 | with open(fname, "w") as out_handle: 23 | yaml.safe_dump({"details": out}, out_handle, default_flow_style=False, allow_unicode=False) 24 | 25 | def prepare_titration(): 26 | out = [] 27 | for in_bam in sorted(glob.glob(os.path.join("pisces", "inputs", "*pct-NA1287*bam"))): 28 | name = "-".join(os.path.basename(in_bam).split("-")[:2]) 29 | cur = copy.deepcopy(base) 30 | cur["description"] = name 31 | cur["files"] = in_bam 32 | cur["algorithm"]["variant_regions"] = "pisces/regions/Intervals_TSAVP_Titr.bed" 33 | cur["algorithm"]["validate_regions"] = "pisces/regions/Intervals_TSAVP_Titr.bed" 34 | cur["algorithm"]["validate"] = "pisces/truth/NA1287_78Titr.vcf.gz" 35 | cur["metadata"]["validate_batch"] = "titration" 36 | out.append(cur) 37 | return write_sample_yaml(out, "pisces-titr.yaml") 38 | 39 | def prepare_ras(): 40 | out = [] 41 | for in_bam in sorted(glob.glob(os.path.join("pisces", "inputs", "*bam"))): 42 | if in_bam.find("pct-NA1287") < 0: 43 | name = os.path.basename(in_bam).split("_")[0] 44 | cur = copy.deepcopy(base) 45 | cur["description"] = name 46 | cur["files"] = in_bam 47 | cur["algorithm"]["variant_regions"] = "pisces/regions/KRASandNRASinterval2.bed" 48 | cur["algorithm"]["validate_regions"] = "pisces/regions/KRASandNRASinterval2.bed" 49 | cur["algorithm"]["validate"] = "pisces/truth/%s_truth.vcf.gz" % name 50 | cur["metadata"]["validate_combine"] = "ras" 51 | assert os.path.exists(cur["algorithm"]["validate"]), cur["algorithm"]["validate"] 52 | out.append(cur) 53 | return write_sample_yaml(out, "pisces-ras.yaml") 54 | 55 | prepare_titration() 56 | prepare_ras() 57 | -------------------------------------------------------------------------------- /somatic-lowfreq/prepare_inputs/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | # Retrieve input data using basemount 4 | 5 | #sudo bash -c "$(curl -L https://basemount.basespace.illumina.com/install)" 6 | #mkdir basespace 7 | #basemount basespace/ 8 | 9 | mkdir -p pisces 10 | mkdir -p pisces/regions_raw 11 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/BedFiles/Files/Intervals_TSAVP_Titr.bed pisces/regions_raw 12 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/BedFiles/Files/KRASandNRASinterval2.bed pisces/regions_raw 13 | mkdir -p pisces/truth_raw 14 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/Truth_Titration/Files/*.vcf pisces/truth_raw 15 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/Truth_RAS/Files/*.vcf pisces/truth_raw 16 | mkdir -p pisces/inputs 17 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/SampleSet2_RASPanel/Files/*_S1.bam* pisces/inputs 18 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/SampleSet1_Titration/Files/8pct-*_S2*.bam* pisces/inputs 19 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/SampleSet1_Titration/Files/16pct-*_S1*.bam* pisces/inputs 20 | rsync -av basespace/Projects/Pisces_Supplementary_Data_v1.0.1/AppResults/SampleSet1_Titration/Files/12pct-*_S2*.bam* pisces/inputs 21 | 22 | mkdir -p pisces/truth 23 | mkdir -p pisces/regions 24 | bcbio_python clean_truth_sets.py 25 | bcbio_python prepare_bcbio_inputs.py 26 | -------------------------------------------------------------------------------- /somatic-lowfreq/prepare_inputs/smcounter2/convert_truth_vcf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Convert smcounter TSV files into valid sorted VCF inputs. 3 | 4 | Handles M cases (columns with variation info) 5 | and N cases (VCF like columns) 6 | """ 7 | import sys 8 | 9 | def write_header(parts, sample_name): 10 | if parts[0] == "Gene": 11 | cur_type = "M" 12 | else: 13 | cur_type = "N" 14 | sys.stdout.write("##fileformat=VCFv4.1\n") 15 | sys.stdout.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % sample_name) 16 | return cur_type 17 | 18 | def write_m_vcf(parts): 19 | gene, variant, chrom, position, ref, alt, _, af, _, _, _, _ = parts 20 | out = [chrom.replace("chr", ""), position, "%s_%s" % (gene, variant), ref, alt, ".", "PASS", ".", "GT:AF", 21 | "0/1:%s" % af] 22 | sys.stdout.write("\t".join(out) + "\n") 23 | 24 | def write_n_vcf(parts): 25 | sys.stdout.write("\t".join(parts) + "\n") 26 | 27 | sample_name = sys.argv[1] 28 | cur_type = None 29 | for line in sys.stdin: 30 | parts = line.strip().split("\t") 31 | if not cur_type: # first line 32 | cur_type = write_header(parts, sample_name) 33 | elif cur_type == "M": 34 | write_m_vcf(parts) 35 | else: 36 | write_n_vcf(parts) 37 | -------------------------------------------------------------------------------- /somatic-lowfreq/prepare_inputs/smcounter2/fix_bam_umis.py: -------------------------------------------------------------------------------- 1 | """Convert svcounter2 BAM tags (mi) into UMI_ in fastq name 2 | """ 3 | import sys 4 | 5 | for line in sys.stdin: 6 | if line.startswith("@") and line.find(":Z:") > 0: 7 | parts = line.strip().split() 8 | umi = [x for x in parts[1:] if x.startswith("mi:")][0].split(":")[-1] 9 | base, read_num = parts[0].split("/") 10 | line = "%s:UMI_%s/%s\n" % (base, umi, read_num) 11 | sys.stdout.write(line) 12 | -------------------------------------------------------------------------------- /somatic-lowfreq/prepare_inputs/smcounter2/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | REF=/mnt/work/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa.fai 5 | 6 | # Region BEDs 7 | wget -O - https://storage.googleapis.com/smcounterv2-paper/N0261/CDHS-13907Z-562.roi.bed | grep -v ^track | sed "s/^chr//g" | gsort /dev/stdin $REF > N0261.bed 8 | wget -O - https://storage.googleapis.com/smcounterv2-paper/N13532/CDHS-13532Z-10181.roi.bed | grep -v ^track | sed "s/^chr//g" | gsort /dev/stdin $REF > N13532.bed 9 | wget -O - https://storage.googleapis.com/smcounterv2-paper/M0253/DHS-101Z.roi.bed | grep -v ^track | sed "s/^chr//g" | gsort /dev/stdin $REF > M0253.bed 10 | 11 | # Truth sets, converted to VCF 12 | wget -c https://www.biorxiv.org/highwire/filestream/86698/field_highwire_adjunct_files/1/281659-2.xlsx 13 | 14 | I=1 15 | for SAMPLE in N13532 N0261 M0253 16 | do 17 | xlsx2csv -d tab -s $I 281659-2.xlsx | python convert_truth_vcf.py $SAMPLE | gsort /dev/stdin $REF | bgzip -c > smcounter2-$SAMPLE-truth.vcf.gz 18 | tabix -p vcf smcounter2-$SAMPLE-truth.vcf.gz 19 | let I=${I}+1 20 | done 21 | 22 | # Prepare fastq input files 23 | wget -c https://www.biorxiv.org/highwire/filestream/86698/field_highwire_adjunct_files/0/281659-1.xlsx 24 | 25 | 26 | for SAMPLE in N13532 N0261 M0253 27 | do 28 | wget -c https://storage.googleapis.com/smcounterv2-paper/$SAMPLE/$SAMPLE.bam 29 | bamtofastq filename=$SAMPLE.bam tags=mi collate=1 S=/dev/null O=/dev/null O2=/dev/null F=>(python fix_bam_umis.py | bgzip -c > smcounter2-${SAMPLE}_R1.fq.gz) F2=>(python fix_bam_umis.py | bgzip -c > smcounter2-${SAMPLE}_R2.fq.gz) 30 | done 31 | -------------------------------------------------------------------------------- /somatic-lowfreq/run_bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=pisces-ras 5 | 6 | # local bcbio install 7 | bcbio_vm.py cwlrun bunny --no-container $PNAME-workflow 8 | -------------------------------------------------------------------------------- /somatic-lowfreq/run_cromwell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=pisces-ras 5 | #PNAME=pisces-titr 6 | 7 | # local bcbio install 8 | bcbio_vm.py cwlrun cromwell -s htcondor --no-container $PNAME-workflow 9 | -------------------------------------------------------------------------------- /somatic-lowfreq/run_generate_cwl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=pisces-ras 5 | 6 | for PNAME in pisces-titr pisces-ras smcounter2-umi 7 | do 8 | CWD=`pwd` 9 | rm -rf $PNAME-workflow 10 | bcbio_vm.py cwl --systemconfig bcbio_system.yaml $PNAME.yaml 11 | sed -i "s#$CWD/biodata/#../biodata/#" $PNAME-workflow/main-$PNAME-samples.json 12 | done 13 | -------------------------------------------------------------------------------- /somatic-lowfreq/smcounter2-umi-workflow/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - combine_sample_regions 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 11430 24 | ramMin: 3584 25 | tmpdirMin: 5203 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3008 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: bedtools 31 | specs: 32 | - https://anaconda.org/bioconda/bedtools 33 | - package: htslib 34 | specs: 35 | - https://anaconda.org/bioconda/htslib 36 | - package: gatk4 37 | specs: 38 | - https://anaconda.org/bioconda/gatk4 39 | - package: gatk 40 | specs: 41 | - https://anaconda.org/bioconda/gatk 42 | - class: arv:APIRequirement 43 | inputs: 44 | - id: regions__callable 45 | type: 46 | items: 47 | - File 48 | - 'null' 49 | type: array 50 | - id: regions__nblock 51 | type: 52 | items: 53 | - File 54 | - 'null' 55 | type: array 56 | - id: metadata__batch 57 | type: 58 | items: 59 | - 'null' 60 | - string 61 | type: array 62 | - id: config__algorithm__nomap_split_size 63 | type: 64 | items: long 65 | type: array 66 | - id: config__algorithm__nomap_split_targets 67 | type: 68 | items: long 69 | type: array 70 | - id: reference__fasta__base 71 | secondaryFiles: 72 | - .fai 73 | - ^.dict 74 | type: 75 | items: File 76 | type: array 77 | - id: resources 78 | type: 79 | items: string 80 | type: array 81 | - id: description 82 | type: 83 | items: string 84 | type: array 85 | outputs: 86 | - id: config__algorithm__callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__non_callable_regions 91 | type: 92 | items: File 93 | type: array 94 | - id: config__algorithm__callable_count 95 | type: 96 | items: int 97 | type: array 98 | requirements: 99 | - class: InlineJavascriptRequirement 100 | - class: InitialWorkDirRequirement 101 | listing: 102 | - entry: $(JSON.stringify(inputs)) 103 | entryname: cwl.inputs.json 104 | -------------------------------------------------------------------------------- /somatic-lowfreq/smcounter2-umi-workflow/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc 8 | - sentinel_inputs=qcout_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - multiqc_summary 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 42648 23 | ramMin: 3584 24 | tmpdirMin: 20812 25 | - class: dx:InputResourceRequirement 26 | indirMin: 1 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: multiqc 30 | specs: 31 | - https://anaconda.org/bioconda/multiqc 32 | - package: multiqc-bcbio 33 | specs: 34 | - https://anaconda.org/bioconda/multiqc-bcbio 35 | inputs: 36 | - id: qcout_rec 37 | type: 38 | items: 39 | fields: 40 | - name: summary__qc 41 | type: 42 | - File 43 | - 'null' 44 | - name: summary__metrics 45 | type: 46 | - string 47 | - 'null' 48 | - name: description 49 | type: string 50 | - name: genome_build 51 | type: string 52 | - name: config__algorithm__tools_off 53 | type: 54 | items: string 55 | type: array 56 | - name: config__algorithm__qc 57 | type: 58 | items: string 59 | type: array 60 | - name: config__algorithm__tools_on 61 | type: 62 | - 'null' 63 | - string 64 | - items: 65 | - 'null' 66 | - string 67 | type: array 68 | name: qcout_rec 69 | type: record 70 | type: array 71 | outputs: 72 | - id: summary__multiqc 73 | type: 74 | items: 75 | - File 76 | - 'null' 77 | type: array 78 | requirements: 79 | - class: InlineJavascriptRequirement 80 | - class: InitialWorkDirRequirement 81 | listing: 82 | - entry: $(JSON.stringify(inputs)) 83 | entryname: cwl.inputs.json 84 | -------------------------------------------------------------------------------- /somatic-lowfreq/smcounter2-umi-workflow/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 11430 23 | ramMin: 3584 24 | tmpdirMin: 5203 25 | - class: dx:InputResourceRequirement 26 | indirMin: 3008 27 | - class: SoftwareRequirement 28 | packages: 29 | - package: htslib 30 | specs: 31 | - https://anaconda.org/bioconda/htslib 32 | - package: bedtools 33 | specs: 34 | - https://anaconda.org/bioconda/bedtools 35 | - package: pythonpy 36 | specs: 37 | - https://anaconda.org/bioconda/pythonpy 38 | inputs: 39 | - id: prep_samples_rec 40 | type: 41 | fields: 42 | - name: resources 43 | type: string 44 | - name: description 45 | type: string 46 | - name: reference__fasta__base 47 | type: File 48 | - name: config__algorithm__variant_regions 49 | type: File 50 | name: prep_samples_rec 51 | type: record 52 | outputs: 53 | - id: config__algorithm__variant_regions 54 | type: 55 | - File 56 | - 'null' 57 | - id: config__algorithm__variant_regions_merged 58 | type: 59 | - File 60 | - 'null' 61 | - id: config__algorithm__variant_regions_orig 62 | type: 63 | - File 64 | - 'null' 65 | - id: config__algorithm__coverage 66 | type: 67 | - File 68 | - 'null' 69 | - id: config__algorithm__coverage_merged 70 | type: 71 | - File 72 | - 'null' 73 | - id: config__algorithm__coverage_orig 74 | type: 75 | - File 76 | - 'null' 77 | - id: config__algorithm__seq2c_bed_ready 78 | type: 79 | - File 80 | - 'null' 81 | requirements: 82 | - class: InlineJavascriptRequirement 83 | - class: InitialWorkDirRequirement 84 | listing: 85 | - entry: $(JSON.stringify(inputs)) 86 | entryname: cwl.inputs.json 87 | -------------------------------------------------------------------------------- /somatic-lowfreq/smcounter2-umi-workflow/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;config__algorithm__variant_regions 8 | - sentinel_inputs=config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | baseCommand: 10 | - bcbio_nextgen.py 11 | - runfn 12 | - prep_samples_to_rec 13 | - cwl 14 | class: CommandLineTool 15 | cwlVersion: v1.0 16 | hints: 17 | - class: DockerRequirement 18 | dockerImageId: quay.io/bcbio/bcbio-vc 19 | dockerPull: quay.io/bcbio/bcbio-vc 20 | - class: ResourceRequirement 21 | coresMin: 1 22 | outdirMin: 11430 23 | ramMin: 3584 24 | tmpdirMin: 5203 25 | - class: dx:InputResourceRequirement 26 | indirMin: 0 27 | inputs: 28 | - id: config__algorithm__variant_regions 29 | type: 30 | items: File 31 | type: array 32 | - id: reference__fasta__base 33 | secondaryFiles: 34 | - .fai 35 | - ^.dict 36 | type: 37 | items: File 38 | type: array 39 | - id: resources 40 | type: 41 | items: string 42 | type: array 43 | - id: description 44 | type: 45 | items: string 46 | type: array 47 | outputs: 48 | - id: prep_samples_rec 49 | type: 50 | items: 51 | fields: 52 | - name: resources 53 | type: string 54 | - name: description 55 | type: string 56 | - name: reference__fasta__base 57 | type: File 58 | - name: config__algorithm__variant_regions 59 | type: File 60 | name: prep_samples_rec 61 | type: record 62 | type: array 63 | requirements: 64 | - class: InlineJavascriptRequirement 65 | - class: InitialWorkDirRequirement 66 | listing: 67 | - entry: $(JSON.stringify(inputs)) 68 | entryname: cwl.inputs.json 69 | -------------------------------------------------------------------------------- /somatic-lowfreq/smcounter2-umi.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - algorithm: 3 | aligner: minimap2 4 | min_allele_fraction: 0.25 5 | tools_off: 6 | - gemini 7 | umi_type: fastq_name 8 | validate: biodata/smcounter2/truth/smcounter2-M0253-truth.vcf.gz 9 | variant_regions: biodata/smcounter2/regions/M0253.bed 10 | variantcaller: 11 | - vardict 12 | - freebayes 13 | - pisces 14 | analysis: variant 15 | description: M0253 16 | files: [biodata/smcounter2/inputs/smcounter2-M0253_R1.fq.gz, biodata/smcounter2/inputs/smcounter2-M0253_R2.fq.gz] 17 | genome_build: GRCh37 18 | metadata: 19 | phenotype: tumor 20 | validate_batch: sm2 21 | - algorithm: 22 | aligner: minimap2 23 | min_allele_fraction: 0.25 24 | tools_off: 25 | - gemini 26 | umi_type: fastq_name 27 | validate: biodata/smcounter2/truth/smcounter2-N0261-truth.vcf.gz 28 | variant_regions: biodata/smcounter2/regions/N0261.bed 29 | variantcaller: 30 | - vardict 31 | - freebayes 32 | - pisces 33 | analysis: variant 34 | description: N0261 35 | files: [biodata/smcounter2/inputs/smcounter2-N0261_R1.fq.gz, biodata/smcounter2/inputs/smcounter2-N0261_R2.fq.gz] 36 | genome_build: GRCh37 37 | metadata: 38 | phenotype: tumor 39 | validate_batch: sm2 40 | - algorithm: 41 | aligner: minimap2 42 | min_allele_fraction: 0.25 43 | tools_off: 44 | - gemini 45 | umi_type: fastq_name 46 | validate: biodata/smcounter2/truth/smcounter2-N13532-truth.vcf.gz 47 | variant_regions: biodata/smcounter2/regions/N13532.bed 48 | variantcaller: 49 | - vardict 50 | - freebayes 51 | - pisces 52 | analysis: variant 53 | description: N13532 54 | files: [biodata/smcounter2/inputs/smcounter2-N13532_R1.fq.gz, biodata/smcounter2/inputs/smcounter2-N13532_R2.fq.gz] 55 | genome_build: GRCh37 56 | metadata: 57 | phenotype: tumor 58 | validate_batch: sm2 59 | -------------------------------------------------------------------------------- /wes-agha-test/README.md: -------------------------------------------------------------------------------- 1 | # GA4GH WES workflow 2 | 3 | A [bcbio](https://bcbio-nextgen.readthedocs.io/) somatic variant calling analysis 4 | as a GA4GH test example for demonstrating Workflow Execution Service (WES) interoperability. 5 | This is contributed by the 6 | [Australian Genomics Health Alliance (AGHA)](https://www.australiangenomics.org.au/) 7 | and contains Common Workflow Language descriptions for running on: 8 | 9 | - [Veritas Genetics'](https://www.veritasgenetics.com/) [Arvados](https://arvados.org/) platform. 10 | The data is externally hosted on the [Arvados public playground](https://playground.arvados.org/). 11 | 12 | - The [Cromwell Workflow Management System](https://cromwell.readthedocs.io/en/stable/) developed 13 | by the Broad Institute and tested on Google Cloud Platform by Illumina. The data is hosted 14 | in a publicly accessible Google Storage bucket and referenced by http URLs. 15 | -------------------------------------------------------------------------------- /wes-agha-test/bcbio-validation-checker/Dockstore.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | class: CommandLineTool 4 | id: "bcbio-validation-checker" 5 | label: "Compare bcbio validation results against expected baseline" 6 | 7 | cwlVersion: v1.0 8 | 9 | requirements: 10 | - class: DockerRequirement 11 | dockerPull: quay.io/bcbio/bcbio-validation-checker 12 | 13 | inputs: 14 | baseline: 15 | type: File 16 | inputBinding: 17 | position: 1 18 | 19 | comparison: 20 | type: 21 | items: 22 | - File 23 | - 'null' 24 | type: array 25 | inputBinding: 26 | position: 2 27 | 28 | outputs: 29 | output: 30 | type: File 31 | outputBinding: 32 | glob: "results.json" 33 | 34 | log: 35 | type: File 36 | outputBinding: 37 | glob: "log.txt" 38 | 39 | baseCommand: ["python", "/usr/local/bin/bcbio_check_validation.py"] 40 | -------------------------------------------------------------------------------- /wes-agha-test/bcbio-validation-checker/grading-summary-combined-expected.csv: -------------------------------------------------------------------------------- 1 | sample,caller,variant.type,category,value 2 | NA12878_chr21,ensemble,SNPs,tp,195 3 | NA12878_chr21,ensemble,Indels,tp,11 4 | NA12878_chr21,ensemble,SNPs,fp,5 5 | NA12878_chr21,ensemble,Indels,fp,0 6 | NA12878_chr21,ensemble,SNPs,fn,1 7 | NA12878_chr21,ensemble,Indels,fn,1 8 | NA12878_chr21,mutect2,SNPs,tp,191 9 | NA12878_chr21,mutect2,Indels,tp,10 10 | NA12878_chr21,mutect2,SNPs,fp,10 11 | NA12878_chr21,mutect2,Indels,fp,1 12 | NA12878_chr21,mutect2,SNPs,fn,5 13 | NA12878_chr21,mutect2,Indels,fn,2 14 | NA12878_chr21,vardict,SNPs,tp,191 15 | NA12878_chr21,vardict,Indels,tp,11 16 | NA12878_chr21,vardict,SNPs,fp,9 17 | NA12878_chr21,vardict,Indels,fp,5 18 | NA12878_chr21,vardict,SNPs,fn,5 19 | NA12878_chr21,vardict,Indels,fn,1 20 | NA12878_chr21,strelka2,SNPs,tp,192 21 | NA12878_chr21,strelka2,Indels,tp,10 22 | NA12878_chr21,strelka2,SNPs,fp,2 23 | NA12878_chr21,strelka2,Indels,fp,2 24 | NA12878_chr21,strelka2,SNPs,fn,4 25 | NA12878_chr21,strelka2,Indels,fn,2 26 | -------------------------------------------------------------------------------- /wes-agha-test/bcbio_system-arvados.yaml: -------------------------------------------------------------------------------- 1 | arvados: 2 | ref: qr1hi-4zz18-k3cpvj60bvqbr2a 3 | inputs: [qr1hi-4zz18-685wngk7zrynqad] 4 | resources: 5 | default: {cores: 16, memory: 3000M, jvm_opts: [-Xms750m, -Xmx3000m]} 6 | -------------------------------------------------------------------------------- /wes-agha-test/bcbio_system-gcp.yaml: -------------------------------------------------------------------------------- 1 | gs: 2 | #input_type: http 3 | ref: gs://bcbiodata/collections 4 | inputs: 5 | - gs://bcbiodata/wes_agha_test 6 | resources: 7 | default: {cores: 16, memory: 3000M, jvm_opts: [-Xms750m, -Xmx3000m]} 8 | -------------------------------------------------------------------------------- /wes-agha-test/run_arvados.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PID=qr1hi-j7d0g-a68mg4e5oh37oek 5 | PNAME=wes_chr21_test 6 | PLATFORM=arvados 7 | 8 | WES_API_HOST=wes.qr1hi.arvadosapi.com 9 | WES_API_AUTH=5vipdwj62f3wz6avnih8p27krsdpngphgymyukw79nslnxk29x 10 | 11 | bcbio_vm.py cwlrun wes --host $WES_API_HOST --auth $WES_API_AUTH $PNAME-workflow-$PLATFORM 12 | #bcbio_vm.py cwlrun arvados $PNAME-workflow-$PLATFORM -- --project-uuid $PID 13 | -------------------------------------------------------------------------------- /wes-agha-test/run_cromwell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=wes_chr21_test 5 | PLATFORM=arvados 6 | 7 | bcbio_vm.py cwlrun cromwell --no-container $PNAME-workflow-$PLATFORM 8 | -------------------------------------------------------------------------------- /wes-agha-test/run_generate_cwl_arvados.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=wes_chr21_test 5 | TEMPLATE=wes_chr21_test 6 | PLATFORM=arvados 7 | rm -rf $PNAME 8 | rm -rf $PNAME-workflow 9 | rm -rf $PNAME-workflow-$PLATFORM 10 | bcbio_vm.py template --systemconfig bcbio_system-${PLATFORM}.yaml ${TEMPLATE}-template.yaml $PNAME.csv 11 | bcbio_vm.py cwl --systemconfig bcbio_system-${PLATFORM}.yaml $PNAME/config/$PNAME.yaml 12 | mv $PNAME-workflow $PNAME-workflow-$PLATFORM 13 | -------------------------------------------------------------------------------- /wes-agha-test/run_generate_cwl_gcp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | PNAME=wes_chr21_test 5 | TEMPLATE=wes_chr21_test 6 | PLATFORM=gcp 7 | rm -rf $PNAME 8 | rm -rf $PNAME-workflow 9 | rm -rf $PNAME-workflow-$PLATFORM 10 | bcbio_vm.py template --systemconfig bcbio_system-${PLATFORM}.yaml ${TEMPLATE}-template.yaml $PNAME.csv 11 | bcbio_vm.py cwl --systemconfig bcbio_system-${PLATFORM}.yaml $PNAME/config/$PNAME.yaml 12 | mv $PNAME-workflow $PNAME-workflow-$PLATFORM 13 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-template.yaml: -------------------------------------------------------------------------------- 1 | details: 2 | - analysis: variant2 3 | genome_build: GRCh37 4 | algorithm: 5 | # Alignment parameters 6 | aligner: bwa 7 | recalibrate: false 8 | realign: false 9 | mark_duplicates: true 10 | remove_lcr: false 11 | 12 | # Poly-G filtering 13 | trim_reads: atropos 14 | adapters: polyx 15 | 16 | # ad hoc downsampling. Set to 100 for challenging samples 17 | maxcov_downsample: false 18 | 19 | # Variant calling, 2-out-of-3. All callers handle InDels 20 | variantcaller: 21 | germline: [vardict, strelka2, gatk-haplotype] 22 | somatic: [vardict, strelka2, mutect2] 23 | ensemble: 24 | numpass: 2 25 | 26 | # Limit to validation regions to speed things up 27 | variant_regions: Exome-Agilent_V6_chr21.bed 28 | 29 | # Structural variation. No point in using Manta for this test data 30 | svcaller: [cnvkit] 31 | 32 | # Extras 33 | tools_on: [gatk4, break-point-inspector, noalt_calling] 34 | tools_off: [gemini] 35 | vcfanno: [gemini] 36 | upload: 37 | dir: ../final 38 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-arvados/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | - run_number=0 11 | baseCommand: 12 | - bcbio_nextgen.py 13 | - runfn 14 | - combine_sample_regions 15 | - cwl 16 | class: CommandLineTool 17 | cwlVersion: v1.0 18 | hints: 19 | - class: DockerRequirement 20 | dockerImageId: quay.io/bcbio/bcbio-vc 21 | dockerPull: quay.io/bcbio/bcbio-vc 22 | - class: ResourceRequirement 23 | coresMin: 1 24 | outdirMin: 1083 25 | ramMin: 3072 26 | tmpdirMin: 30 27 | - class: dx:InputResourceRequirement 28 | indirMin: 3008 29 | - class: SoftwareRequirement 30 | packages: 31 | - package: bedtools 32 | specs: 33 | - https://anaconda.org/bioconda/bedtools 34 | - package: htslib 35 | specs: 36 | - https://anaconda.org/bioconda/htslib 37 | - package: gatk4 38 | specs: 39 | - https://anaconda.org/bioconda/gatk4 40 | - class: arv:APIRequirement 41 | inputs: 42 | - id: regions__callable 43 | type: 44 | items: 45 | - File 46 | - 'null' 47 | type: array 48 | - id: regions__nblock 49 | type: 50 | items: 51 | - File 52 | - 'null' 53 | type: array 54 | - id: metadata__batch 55 | type: 56 | items: string 57 | type: array 58 | - id: config__algorithm__nomap_split_size 59 | type: 60 | items: long 61 | type: array 62 | - id: config__algorithm__nomap_split_targets 63 | type: 64 | items: long 65 | type: array 66 | - id: reference__fasta__base 67 | secondaryFiles: 68 | - .fai 69 | - ^.dict 70 | type: 71 | items: File 72 | type: array 73 | - id: resources 74 | type: 75 | items: string 76 | type: array 77 | - id: description 78 | type: 79 | items: string 80 | type: array 81 | outputs: 82 | - id: config__algorithm__callable_regions 83 | type: 84 | items: File 85 | type: array 86 | - id: config__algorithm__non_callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__callable_count 91 | type: 92 | items: int 93 | type: array 94 | requirements: 95 | - class: InlineJavascriptRequirement 96 | - class: InitialWorkDirRequirement 97 | listing: 98 | - entry: $(JSON.stringify(inputs)) 99 | entryname: cwl.inputs.json 100 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-arvados/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc,versions__tools,versions__data 8 | - sentinel_inputs=qcout_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - multiqc_summary 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 1261 24 | ramMin: 3072 25 | tmpdirMin: 119 26 | - class: dx:InputResourceRequirement 27 | indirMin: 1 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: multiqc 31 | specs: 32 | - https://anaconda.org/bioconda/multiqc 33 | - package: multiqc-bcbio 34 | specs: 35 | - https://anaconda.org/bioconda/multiqc-bcbio 36 | inputs: 37 | - id: qcout_rec 38 | type: 39 | items: 40 | fields: 41 | - name: summary__qc 42 | type: 43 | - File 44 | - 'null' 45 | - name: summary__metrics 46 | type: 47 | - string 48 | - 'null' 49 | - name: description 50 | type: string 51 | - name: genome_build 52 | type: string 53 | - name: config__algorithm__tools_off 54 | type: 55 | items: string 56 | type: array 57 | - name: reference__versions 58 | type: File 59 | - name: config__algorithm__qc 60 | type: 61 | items: string 62 | type: array 63 | - name: config__algorithm__tools_on 64 | type: 65 | items: string 66 | type: array 67 | name: qcout_rec 68 | type: record 69 | type: array 70 | outputs: 71 | - id: summary__multiqc 72 | type: 73 | items: 74 | - File 75 | - 'null' 76 | type: array 77 | - id: versions__tools 78 | type: 79 | items: 80 | - File 81 | - 'null' 82 | type: array 83 | - id: versions__data 84 | type: 85 | items: 86 | - File 87 | - 'null' 88 | type: array 89 | requirements: 90 | - class: InlineJavascriptRequirement 91 | - class: InitialWorkDirRequirement 92 | listing: 93 | - entry: $(JSON.stringify(inputs)) 94 | entryname: cwl.inputs.json 95 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-arvados/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=rgnames__sample,config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 1083 24 | ramMin: 3072 25 | tmpdirMin: 30 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3008 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: htslib 31 | specs: 32 | - https://anaconda.org/bioconda/htslib 33 | - package: bedtools 34 | specs: 35 | - https://anaconda.org/bioconda/bedtools 36 | - package: pythonpy 37 | specs: 38 | - https://anaconda.org/bioconda/pythonpy 39 | inputs: 40 | - id: prep_samples_rec 41 | type: 42 | fields: 43 | - name: resources 44 | type: string 45 | - name: description 46 | type: string 47 | - name: reference__fasta__base 48 | type: File 49 | - name: rgnames__sample 50 | type: string 51 | - name: config__algorithm__variant_regions 52 | type: File 53 | name: prep_samples_rec 54 | type: record 55 | outputs: 56 | - id: rgnames__sample 57 | type: string 58 | - id: config__algorithm__variant_regions 59 | type: 60 | - File 61 | - 'null' 62 | - id: config__algorithm__variant_regions_merged 63 | type: 64 | - File 65 | - 'null' 66 | - id: config__algorithm__variant_regions_orig 67 | type: 68 | - File 69 | - 'null' 70 | - id: config__algorithm__coverage 71 | type: 72 | - File 73 | - 'null' 74 | - id: config__algorithm__coverage_merged 75 | type: 76 | - File 77 | - 'null' 78 | - id: config__algorithm__coverage_orig 79 | type: 80 | - File 81 | - 'null' 82 | - id: config__algorithm__seq2c_bed_ready 83 | type: 84 | - File 85 | - 'null' 86 | requirements: 87 | - class: InlineJavascriptRequirement 88 | - class: InitialWorkDirRequirement 89 | listing: 90 | - entry: $(JSON.stringify(inputs)) 91 | entryname: cwl.inputs.json 92 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-arvados/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;rgnames__sample;config__algorithm__variant_regions 8 | - sentinel_inputs=rgnames__sample:var,config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples_to_rec 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 1083 24 | ramMin: 3072 25 | tmpdirMin: 30 26 | - class: dx:InputResourceRequirement 27 | indirMin: 0 28 | inputs: 29 | - id: rgnames__sample 30 | type: 31 | items: string 32 | type: array 33 | - id: config__algorithm__variant_regions 34 | type: 35 | items: File 36 | type: array 37 | - id: reference__fasta__base 38 | secondaryFiles: 39 | - .fai 40 | - ^.dict 41 | type: 42 | items: File 43 | type: array 44 | - id: resources 45 | type: 46 | items: string 47 | type: array 48 | - id: description 49 | type: 50 | items: string 51 | type: array 52 | outputs: 53 | - id: prep_samples_rec 54 | type: 55 | items: 56 | fields: 57 | - name: resources 58 | type: string 59 | - name: description 60 | type: string 61 | - name: reference__fasta__base 62 | type: File 63 | - name: rgnames__sample 64 | type: string 65 | - name: config__algorithm__variant_regions 66 | type: File 67 | name: prep_samples_rec 68 | type: record 69 | type: array 70 | requirements: 71 | - class: InlineJavascriptRequirement 72 | - class: InitialWorkDirRequirement 73 | listing: 74 | - entry: $(JSON.stringify(inputs)) 75 | entryname: cwl.inputs.json 76 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-gcp/steps/combine_sample_regions.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | arv: http://arvados.org/cwl# 3 | dx: https://www.dnanexus.com/cwl# 4 | arguments: 5 | - position: 0 6 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 7 | - sentinel_parallel=multi-combined 8 | - sentinel_outputs=config__algorithm__callable_regions,config__algorithm__non_callable_regions,config__algorithm__callable_count 9 | - sentinel_inputs=regions__callable:var,regions__nblock:var,metadata__batch:var,config__algorithm__nomap_split_size:var,config__algorithm__nomap_split_targets:var,reference__fasta__base:var,resources:var,description:var 10 | - run_number=0 11 | baseCommand: 12 | - bcbio_nextgen.py 13 | - runfn 14 | - combine_sample_regions 15 | - cwl 16 | class: CommandLineTool 17 | cwlVersion: v1.0 18 | hints: 19 | - class: DockerRequirement 20 | dockerImageId: quay.io/bcbio/bcbio-vc 21 | dockerPull: quay.io/bcbio/bcbio-vc 22 | - class: ResourceRequirement 23 | coresMin: 1 24 | outdirMin: 10299 25 | ramMin: 3072 26 | tmpdirMin: 30 27 | - class: dx:InputResourceRequirement 28 | indirMin: 3008 29 | - class: SoftwareRequirement 30 | packages: 31 | - package: bedtools 32 | specs: 33 | - https://anaconda.org/bioconda/bedtools 34 | - package: htslib 35 | specs: 36 | - https://anaconda.org/bioconda/htslib 37 | - package: gatk4 38 | specs: 39 | - https://anaconda.org/bioconda/gatk4 40 | - class: arv:APIRequirement 41 | inputs: 42 | - id: regions__callable 43 | type: 44 | items: 45 | - File 46 | - 'null' 47 | type: array 48 | - id: regions__nblock 49 | type: 50 | items: 51 | - File 52 | - 'null' 53 | type: array 54 | - id: metadata__batch 55 | type: 56 | items: string 57 | type: array 58 | - id: config__algorithm__nomap_split_size 59 | type: 60 | items: long 61 | type: array 62 | - id: config__algorithm__nomap_split_targets 63 | type: 64 | items: long 65 | type: array 66 | - id: reference__fasta__base 67 | secondaryFiles: 68 | - ^.dict 69 | - .fai 70 | type: 71 | items: File 72 | type: array 73 | - id: resources 74 | type: 75 | items: string 76 | type: array 77 | - id: description 78 | type: 79 | items: string 80 | type: array 81 | outputs: 82 | - id: config__algorithm__callable_regions 83 | type: 84 | items: File 85 | type: array 86 | - id: config__algorithm__non_callable_regions 87 | type: 88 | items: File 89 | type: array 90 | - id: config__algorithm__callable_count 91 | type: 92 | items: int 93 | type: array 94 | requirements: 95 | - class: InlineJavascriptRequirement 96 | - class: InitialWorkDirRequirement 97 | listing: 98 | - entry: $(JSON.stringify(inputs)) 99 | entryname: cwl.inputs.json 100 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-gcp/steps/multiqc_summary.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=summary__multiqc,versions__tools,versions__data 8 | - sentinel_inputs=qcout_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - multiqc_summary 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 10477 24 | ramMin: 3072 25 | tmpdirMin: 119 26 | - class: dx:InputResourceRequirement 27 | indirMin: 1 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: multiqc 31 | specs: 32 | - https://anaconda.org/bioconda/multiqc 33 | - package: multiqc-bcbio 34 | specs: 35 | - https://anaconda.org/bioconda/multiqc-bcbio 36 | inputs: 37 | - id: qcout_rec 38 | type: 39 | items: 40 | fields: 41 | - name: summary__qc 42 | type: 43 | - File 44 | - 'null' 45 | - name: summary__metrics 46 | type: 47 | - string 48 | - 'null' 49 | - name: description 50 | type: string 51 | - name: genome_build 52 | type: string 53 | - name: config__algorithm__tools_off 54 | type: 55 | items: string 56 | type: array 57 | - name: reference__versions 58 | type: File 59 | - name: config__algorithm__qc 60 | type: 61 | items: string 62 | type: array 63 | - name: config__algorithm__tools_on 64 | type: 65 | items: string 66 | type: array 67 | name: qcout_rec 68 | type: record 69 | type: array 70 | outputs: 71 | - id: summary__multiqc 72 | type: 73 | items: 74 | - File 75 | - 'null' 76 | type: array 77 | - id: versions__tools 78 | type: 79 | items: 80 | - File 81 | - 'null' 82 | type: array 83 | - id: versions__data 84 | type: 85 | items: 86 | - File 87 | - 'null' 88 | type: array 89 | requirements: 90 | - class: InlineJavascriptRequirement 91 | - class: InitialWorkDirRequirement 92 | listing: 93 | - entry: $(JSON.stringify(inputs)) 94 | entryname: cwl.inputs.json 95 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-gcp/steps/prep_samples.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-parallel 7 | - sentinel_outputs=rgnames__sample,config__algorithm__variant_regions,config__algorithm__variant_regions_merged,config__algorithm__variant_regions_orig,config__algorithm__coverage,config__algorithm__coverage_merged,config__algorithm__coverage_orig,config__algorithm__seq2c_bed_ready 8 | - sentinel_inputs=prep_samples_rec:record 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 10299 24 | ramMin: 3072 25 | tmpdirMin: 30 26 | - class: dx:InputResourceRequirement 27 | indirMin: 3008 28 | - class: SoftwareRequirement 29 | packages: 30 | - package: htslib 31 | specs: 32 | - https://anaconda.org/bioconda/htslib 33 | - package: bedtools 34 | specs: 35 | - https://anaconda.org/bioconda/bedtools 36 | - package: pythonpy 37 | specs: 38 | - https://anaconda.org/bioconda/pythonpy 39 | inputs: 40 | - id: prep_samples_rec 41 | type: 42 | fields: 43 | - name: resources 44 | type: string 45 | - name: description 46 | type: string 47 | - name: reference__fasta__base 48 | type: File 49 | - name: config__algorithm__svcaller 50 | type: 51 | items: string 52 | type: array 53 | - name: rgnames__sample 54 | type: string 55 | - name: config__algorithm__variant_regions 56 | type: File 57 | name: prep_samples_rec 58 | type: record 59 | outputs: 60 | - id: rgnames__sample 61 | type: string 62 | - id: config__algorithm__variant_regions 63 | type: 64 | - File 65 | - 'null' 66 | - id: config__algorithm__variant_regions_merged 67 | type: 68 | - File 69 | - 'null' 70 | - id: config__algorithm__variant_regions_orig 71 | type: 72 | - File 73 | - 'null' 74 | - id: config__algorithm__coverage 75 | type: 76 | - File 77 | - 'null' 78 | - id: config__algorithm__coverage_merged 79 | type: 80 | - File 81 | - 'null' 82 | - id: config__algorithm__coverage_orig 83 | type: 84 | - File 85 | - 'null' 86 | - id: config__algorithm__seq2c_bed_ready 87 | type: 88 | - File 89 | - 'null' 90 | requirements: 91 | - class: InlineJavascriptRequirement 92 | - class: InitialWorkDirRequirement 93 | listing: 94 | - entry: $(JSON.stringify(inputs)) 95 | entryname: cwl.inputs.json 96 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test-workflow-gcp/steps/prep_samples_to_rec.cwl: -------------------------------------------------------------------------------- 1 | $namespaces: 2 | dx: https://www.dnanexus.com/cwl# 3 | arguments: 4 | - position: 0 5 | valueFrom: sentinel_runtime=cores,$(runtime['cores']),ram,$(runtime['ram']) 6 | - sentinel_parallel=multi-combined 7 | - sentinel_outputs=prep_samples_rec:resources;description;reference__fasta__base;config__algorithm__svcaller;rgnames__sample;config__algorithm__variant_regions 8 | - sentinel_inputs=rgnames__sample:var,config__algorithm__svcaller:var,config__algorithm__variant_regions:var,reference__fasta__base:var,resources:var,description:var 9 | - run_number=0 10 | baseCommand: 11 | - bcbio_nextgen.py 12 | - runfn 13 | - prep_samples_to_rec 14 | - cwl 15 | class: CommandLineTool 16 | cwlVersion: v1.0 17 | hints: 18 | - class: DockerRequirement 19 | dockerImageId: quay.io/bcbio/bcbio-vc 20 | dockerPull: quay.io/bcbio/bcbio-vc 21 | - class: ResourceRequirement 22 | coresMin: 1 23 | outdirMin: 10299 24 | ramMin: 3072 25 | tmpdirMin: 30 26 | - class: dx:InputResourceRequirement 27 | indirMin: 0 28 | inputs: 29 | - id: rgnames__sample 30 | type: 31 | items: string 32 | type: array 33 | - id: config__algorithm__svcaller 34 | type: 35 | items: 36 | items: string 37 | type: array 38 | type: array 39 | - id: config__algorithm__variant_regions 40 | type: 41 | items: File 42 | type: array 43 | - id: reference__fasta__base 44 | secondaryFiles: 45 | - ^.dict 46 | - .fai 47 | type: 48 | items: File 49 | type: array 50 | - id: resources 51 | type: 52 | items: string 53 | type: array 54 | - id: description 55 | type: 56 | items: string 57 | type: array 58 | outputs: 59 | - id: prep_samples_rec 60 | type: 61 | items: 62 | fields: 63 | - name: resources 64 | type: string 65 | - name: description 66 | type: string 67 | - name: reference__fasta__base 68 | type: File 69 | - name: config__algorithm__svcaller 70 | type: 71 | items: string 72 | type: array 73 | - name: rgnames__sample 74 | type: string 75 | - name: config__algorithm__variant_regions 76 | type: File 77 | name: prep_samples_rec 78 | type: record 79 | type: array 80 | requirements: 81 | - class: InlineJavascriptRequirement 82 | - class: InitialWorkDirRequirement 83 | listing: 84 | - entry: $(JSON.stringify(inputs)) 85 | entryname: cwl.inputs.json 86 | -------------------------------------------------------------------------------- /wes-agha-test/wes_chr21_test.csv: -------------------------------------------------------------------------------- 1 | samplename,description,batch,phenotype,family_id,individual_id,validate,validate_regions 2 | NA12878-ready_exome21.bam,NA12878_chr21,wes,tumor,silico_mix,silico_mix,wes-ensemble-annotated_exome_chr21_noAnnot_somatic.vcf.gz,Exome-Agilent_V6_chr21.bed 3 | NA24385-ready_exome21.bam,NA24385_chr21,wes,normal,silico_mix,silico_mix,, 4 | 5 | 6 | --------------------------------------------------------------------------------