├── .circleci └── config.yml ├── .dockerignore ├── .editorconfig ├── .github └── ISSUE_TEMPLATE │ └── bug_report.md ├── .gitignore ├── LICENSE ├── README.md ├── chip.croo.v5.json ├── chip.wdl ├── dev ├── build_on_dx_dockerhub.sh ├── docker_image │ ├── Dockerfile │ └── mysql │ │ └── init_user.sql ├── example_input_json │ ├── caper │ │ ├── ENCSR000DYI_subsampled_chr19_only_caper.json │ │ └── ENCSR936XTK_subsampled_chr19_only_caper.json │ ├── gcp │ │ └── ENCSR936XTK_subsampled_chr19_only_gcp.json │ └── klab │ │ ├── ENCSR000DYI_subsampled_chr19_only_klab.json │ │ ├── ENCSR000DYI_subsampled_ctl_sub_chr19_only_klab.json │ │ ├── ENCSR936XTK_klab.json │ │ ├── ENCSR936XTK_subsampled_chr19_only_klab.json │ │ └── ENCSR936XTK_subsampled_ctl_sub_chr19_only_klab.json └── test │ ├── README.md │ ├── run_cromwell_server_on_gc.sh │ ├── test_py │ ├── __init__.py │ └── test_encode_lib_genomic.py │ ├── test_task │ ├── .gitignore │ ├── compare_md5sum.wdl │ ├── test_bam2ta.json │ ├── test_bam2ta.wdl │ ├── test_bam_to_pbam.json │ ├── test_bam_to_pbam.wdl │ ├── test_bowtie2.json │ ├── test_bowtie2.wdl │ ├── test_bwa.json │ ├── test_bwa.wdl │ ├── test_choose_ctl.aux.xlsx │ ├── test_choose_ctl.json │ ├── test_choose_ctl.wdl │ ├── test_count_signal_track.json │ ├── test_count_signal_track.wdl │ ├── test_filter.json │ ├── test_filter.wdl │ ├── test_gc_bias.json │ ├── test_gc_bias.wdl │ ├── test_idr.json │ ├── test_idr.wdl │ ├── test_jsd.json │ ├── test_jsd.wdl │ ├── test_macs2.json │ ├── test_macs2.wdl │ ├── test_macs2_signal_track.json │ ├── test_macs2_signal_track.wdl │ ├── test_overlap.json │ ├── test_overlap.wdl │ ├── test_pool_ta.json │ ├── test_pool_ta.wdl │ ├── test_reproducibility.json │ ├── test_reproducibility.wdl │ ├── test_spp.json │ ├── test_spp.wdl │ ├── test_spr.json │ ├── test_spr.wdl │ ├── test_subsample_ctl.json │ ├── test_subsample_ctl.wdl │ ├── test_trimmomatic.json │ ├── test_trimmomatic.wdl │ ├── test_xcor.json │ └── test_xcor.wdl │ └── test_workflow │ ├── .gitignore │ ├── ENCSR000DYI.json │ ├── ENCSR000DYI_subsampled.json │ ├── ENCSR000DYI_subsampled_chr19_only.json │ ├── ENCSR000DYI_subsampled_chr19_only_hist.json │ ├── ENCSR000DYI_subsampled_chr19_only_hist_unrep.json │ ├── ENCSR000DYI_subsampled_chr19_only_true_rep_only_pbam.json │ ├── ENCSR000DYI_subsampled_chr19_only_unrep.json │ ├── ENCSR000DYI_subsampled_ctl_sub_chr19_only.json │ ├── ENCSR106GXJ.json │ ├── ENCSR106GXJ_pbam.json │ ├── ENCSR203KEU.json │ ├── ENCSR400WEK.json │ ├── ENCSR878KIY.json │ ├── ENCSR936XTK.json │ ├── ENCSR936XTK_subsampled.json │ ├── ENCSR936XTK_subsampled_chr19_only.json │ ├── ENCSR936XTK_subsampled_chr19_only_control_mode.json │ ├── ENCSR936XTK_subsampled_chr19_only_hist.json │ ├── ENCSR936XTK_subsampled_chr19_only_true_rep_only_pbam.json │ ├── ENCSR936XTK_subsampled_ctl_sub_1ctl_chr19_only.json │ ├── ENCSR936XTK_subsampled_ctl_sub_chr19_only.json │ ├── ENCSR970FPM.json │ ├── benchmark │ ├── ENCSR000DYI_bwa.json │ ├── ENCSR000DYI_m0_q255.json │ ├── ENCSR000DYI_m0_q30.json │ ├── ENCSR000DYI_m0_q40.json │ ├── ENCSR000DYI_m4_q255.json │ ├── ENCSR000DYI_m4_q30.json │ ├── ENCSR000DYI_m4_q40.json │ ├── ENCSR936XTK_bwa.json │ ├── ENCSR936XTK_m0_q255.json │ ├── ENCSR936XTK_m0_q30.json │ ├── ENCSR936XTK_m0_q40.json │ ├── ENCSR936XTK_m4_q255.json │ ├── ENCSR936XTK_m4_q30.json │ └── ENCSR936XTK_m4_q40.json │ └── ref_output │ ├── sync.sh │ ├── v1.1.4 │ ├── ENCSR000DYI_qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_qc.json │ ├── ENCSR000DYI_subsampled_qc.json │ ├── ENCSR936XTK_qc.json │ ├── ENCSR936XTK_subsampled_chr19_only_qc.json │ └── ENCSR936XTK_subsampled_qc.json │ ├── v1.1.5 │ ├── ENCSR000DYI │ │ └── qc.json │ ├── ENCSR000DYI_subsampled │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only │ │ └── qc.json │ ├── ENCSR936XTK │ │ └── qc.json │ ├── ENCSR936XTK_subsampled │ │ └── qc.json │ └── ENCSR936XTK_subsampled_chr19_only │ │ └── qc.json │ ├── v1.2.2 │ ├── ENCSR000DYI_subsampled_chr19_only │ │ └── qc.json │ └── ENCSR936XTK_subsampled_chr19_only │ │ └── qc.json │ ├── v1.3.0 │ ├── ENCSR000DYI_subsampled_chr19_only │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_hist │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_hist_unrep │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_unrep │ │ └── qc.json │ ├── ENCSR936XTK_subsampled_chr19_only │ │ └── qc.json │ └── ENCSR936XTK_subsampled_chr19_only_hist │ │ └── qc.json │ ├── v1.4.0 │ ├── ENCSR000DYI_subsampled_ctl_sub_chr19_only │ │ └── qc.json │ ├── ENCSR936XTK_subsampled_ctl_sub_1ctl_chr19_only │ │ └── qc.json │ └── ENCSR936XTK_subsampled_ctl_sub_chr19_only │ │ └── qc.json │ ├── v1.4.1 │ └── ENCSR936XTK_subsampled_chr19_only_control_mode │ │ └── qc.json │ ├── v1.5.0 │ ├── ENCSR000DYI_subsampled_chr19_only │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_hist │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_hist_unrep │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_chr19_only_unrep │ │ └── qc.json │ ├── ENCSR000DYI_subsampled_ctl_sub_chr19_only │ │ └── qc.json │ ├── ENCSR936XTK_subsampled_chr19_only │ │ └── qc.json │ ├── ENCSR936XTK_subsampled_chr19_only_control_mode │ │ └── qc.json │ ├── ENCSR936XTK_subsampled_chr19_only_hist │ │ └── qc.json │ ├── ENCSR936XTK_subsampled_ctl_sub_1ctl_chr19_only │ │ └── qc.json │ └── ENCSR936XTK_subsampled_ctl_sub_chr19_only │ │ └── qc.json │ └── v1.7.0 │ ├── ENCSR000DYI_subsampled_chr19_only_true_rep_only_pbam │ └── qc.json │ ├── ENCSR936XTK_subsampled_chr19_only_true_rep_only_pbam │ └── qc.json │ └── qc.json ├── docs ├── build_genome_database.md ├── how_to_config_sge.md ├── input.md ├── input_short.md ├── troubleshooting.md ├── tutorial_dx_cli.md └── tutorial_dx_web.md ├── example_input_json ├── ENCSR000DYI_subsampled_chr19_only.json ├── ENCSR936XTK_subsampled_chr19_only.json ├── dx │ ├── ENCSR000DYI_dx.json │ ├── ENCSR000DYI_subsampled_chr19_only_dx.json │ ├── ENCSR000DYI_subsampled_chr19_only_rep1_dx.json │ ├── ENCSR936XTK_dx.json │ ├── template_general.json │ ├── template_hg19.json │ ├── template_hg38.json │ ├── template_mm10.json │ └── template_mm9.json ├── dx_azure │ ├── ENCSR000DYI_dx_azure.json │ ├── ENCSR000DYI_subsampled_chr19_only_dx_azure.json │ ├── ENCSR936XTK_dx_azure.json │ ├── template_general.json │ ├── template_hg19.json │ ├── template_hg38.json │ ├── template_mm10.json │ └── template_mm9.json ├── template.full.json ├── template.json └── terra │ ├── ENCSR000DYI_subsampled_chr19_only.terra.json │ └── ENCSR936XTK_subsampled_chr19_only.terra.json ├── scripts ├── build_genome_data.sh ├── download_genome_data.sh ├── install_conda_env.sh ├── requirements.macs2.txt ├── requirements.spp.txt ├── requirements.txt ├── uninstall_conda_env.sh └── update_conda_env.sh └── src ├── assign_multimappers.py ├── detect_adapter.py ├── dev_check_sync_atac.sh ├── encode_lib_blacklist_filter.py ├── encode_lib_common.py ├── encode_lib_frip.py ├── encode_lib_genomic.py ├── encode_lib_log_parser.py ├── encode_lib_qc_category.py ├── encode_task_annot_enrich.py ├── encode_task_bam2ta.py ├── encode_task_bam_to_pbam.py ├── encode_task_bowtie2.py ├── encode_task_bwa.py ├── encode_task_choose_ctl.py ├── encode_task_compare_signal_to_roadmap.py ├── encode_task_count_signal_track.py ├── encode_task_filter.py ├── encode_task_frac_mito.py ├── encode_task_fraglen_stat_pe.py ├── encode_task_gc_bias.py ├── encode_task_idr.py ├── encode_task_jsd.py ├── encode_task_macs2_atac.py ├── encode_task_macs2_chip.py ├── encode_task_macs2_signal_track_atac.py ├── encode_task_macs2_signal_track_chip.py ├── encode_task_merge_fastq.py ├── encode_task_overlap.py ├── encode_task_pool_ta.py ├── encode_task_post_align.py ├── encode_task_post_call_peak_atac.py ├── encode_task_post_call_peak_chip.py ├── encode_task_preseq.py ├── encode_task_qc_report.py ├── encode_task_reproducibility.py ├── encode_task_spp.py ├── encode_task_spr.py ├── encode_task_subsample_ctl.py ├── encode_task_trim_adapter.py ├── encode_task_trim_fastq.py ├── encode_task_trimmomatic.py ├── encode_task_tss_enrich.py ├── encode_task_xcor.py └── trimfastq.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .editorconfig 3 | .git 4 | .gitignore 5 | cromwell-executions 6 | cromwell-workflow-logs 7 | 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*.{wdl,json,conf}] 2 | indent_style = tab 3 | indent_size = 4 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## **Describe the bug** 11 | A clear and concise description of what the problem is. 12 | 13 | ## **OS/Platform** 14 | - OS/Platform: [e.g. Ubuntu 18.04, Google Cloud, Stanford Sherlock/SCG cluster, ...] 15 | - Conda version: If you used Conda (`$ conda --version`). 16 | - Pipeline version: [e.g. v1.6.0] 17 | - Caper version: [e.g. v1.2.0] 18 | 19 | ## **Caper configuration file** 20 | Paste contents of `~/.caper/default.conf`. 21 | ```ini 22 | PASTE CAPER CONF CONTENTS HERE 23 | ``` 24 | 25 | ## **Input JSON file** 26 | Paste contents of your input JSON file. 27 | ```json 28 | PASTE INPUT JSON CONTENTS HERE 29 | ``` 30 | 31 | ## **Troubleshooting result** 32 | 33 | If you ran `caper run` without Caper server then Caper automatically runs a troubleshooter for failed workflows. Find troubleshooting result in the bottom of Caper's screen log. 34 | 35 | If you ran `caper submit` with a running Caper server then first find your workflow ID (1st column) with `caper list` and run `caper debug [WORKFLOW_ID]`. 36 | 37 | Paste troubleshooting result. 38 | ``` 39 | PASTE TROUBLESHOOTING RESULT HERE 40 | ``` 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # temp fastqs 104 | fastqs/ 105 | 106 | # cromwell temp dirs 107 | cromwell-executions/ 108 | cromwell-workflow-logs/ 109 | cromwell*.jar 110 | 111 | #test.wdl 112 | #test_google.wdl 113 | output_*.json 114 | test_genome* 115 | test_sample* 116 | tmp 117 | hg38 118 | *.fastq.gz 119 | 120 | metadata.json 121 | resume.*.json 122 | 123 | tmp_db* 124 | *.local.json 125 | temp_db* 126 | 127 | cromwell.out 128 | cromwell.out.* 129 | 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 ENCODE DCC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dev/docker_image/mysql/init_user.sql: -------------------------------------------------------------------------------- 1 | CREATE USER 'cromwell'@'localhost' IDENTIFIED BY 'cromwell'; 2 | GRANT ALL PRIVILEGES ON cromwell_db.* TO 'cromwell'@'localhost' WITH GRANT OPTION; 3 | CREATE USER 'cromwell'@'%' IDENTIFIED BY 'cromwell'; 4 | GRANT ALL PRIVILEGES ON cromwell_db.* TO 'cromwell'@'%' WITH GRANT OPTION; -------------------------------------------------------------------------------- /dev/example_input_json/caper/ENCSR000DYI_subsampled_chr19_only_caper.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_chr19_chrM_caper.tsv", 4 | "chip.fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only)", 14 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 15 | } 16 | -------------------------------------------------------------------------------- /dev/example_input_json/caper/ENCSR936XTK_subsampled_chr19_only_caper.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_chr19_chrM_caper.tsv", 4 | "chip.fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19 and chrM Only)", 22 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 23 | } 24 | -------------------------------------------------------------------------------- /dev/example_input_json/gcp/ENCSR936XTK_subsampled_chr19_only_gcp.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_chr19_chrM_gcp.tsv", 4 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19 and chrM Only)", 22 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 23 | } 24 | -------------------------------------------------------------------------------- /dev/example_input_json/klab/ENCSR000DYI_subsampled_chr19_only_klab.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "/mnt/data/pipeline_genome_data/genome_tsv/v1/hg38_chr19_chrM_klab.tsv", 4 | "chip.fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only)", 14 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 15 | } 16 | -------------------------------------------------------------------------------- /dev/example_input_json/klab/ENCSR000DYI_subsampled_ctl_sub_chr19_only_klab.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "/mnt/data/pipeline_genome_data/genome_tsv/v1/hg38_chr19_chrM_klab.tsv", 4 | "chip.fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.ctl_depth_limit": 46000, 14 | "chip.exp_ctl_depth_ratio_limit": 0.9, 15 | "chip.title" : "ENCSR000DYI (subsampled 1/25, ctl subsampled 0.9 46000, chr19_chrM only)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /dev/example_input_json/klab/ENCSR936XTK_klab.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "/mnt/data/pipeline_genome_data/genome_tsv/v1/hg38_klab.tsv", 4 | "chip.fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.title" : "ENCSR936XTK", 22 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 23 | } 24 | -------------------------------------------------------------------------------- /dev/example_input_json/klab/ENCSR936XTK_subsampled_chr19_only_klab.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "/mnt/data/pipeline_genome_data/genome_tsv/v1/hg38_chr19_chrM_klab.tsv", 4 | "chip.fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19 and chrM Only)", 22 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 23 | } 24 | -------------------------------------------------------------------------------- /dev/example_input_json/klab/ENCSR936XTK_subsampled_ctl_sub_chr19_only_klab.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "/mnt/data/pipeline_genome_data/genome_tsv/v1/hg38_chr19_chrM_klab.tsv", 4 | "chip.fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["/mnt/data/pipeline_test_samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.ctl_depth_limit": 46000, 22 | "chip.exp_ctl_depth_ratio_limit": 0.9, 23 | "chip.title" : "ENCSR936XTK (subsampled 1/50, ctl subsampled 0.9 46000, chr19 and chrM Only)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 25 | } 26 | -------------------------------------------------------------------------------- /dev/test/run_cromwell_server_on_gc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f "cromwell-32.jar" ]; then 4 | echo "Skip downloading cromwell." 5 | else 6 | wget -N -c https://github.com/broadinstitute/cromwell/releases/download/32/cromwell-32.jar 7 | fi 8 | CROMWELL_JAR=cromwell-32.jar 9 | BACKEND_CONF=../backends/backend_with_db.conf 10 | BACKEND=google 11 | GC_PROJ=encode-dcc-1016 12 | GC_ROOT=gs://encode-pipeline-test-runs 13 | 14 | java -Dconfig.file=${BACKEND_CONF} -Dbackend.default=${BACKEND} -Dbackend.providers.google.config.project=${GC_PROJ} \ 15 | -Dbackend.providers.google.config.root=${GC_ROOT} -jar ${CROMWELL_JAR} server 16 | -------------------------------------------------------------------------------- /dev/test/test_py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCODE-DCC/chip-seq-pipeline2/26eeda81a0540dc793fc69b0c390d232ca7ca50a/dev/test/test_py/__init__.py -------------------------------------------------------------------------------- /dev/test/test_task/.gitignore: -------------------------------------------------------------------------------- 1 | chip-seq-pipeline-test-data 2 | *.result.json 3 | *.metadata.json 4 | *wf_opt.json 5 | cromwell*.jar 6 | -------------------------------------------------------------------------------- /dev/test/test_task/compare_md5sum.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | task compare_md5sum { 3 | input { 4 | Array[String] labels 5 | Array[File] files 6 | Array[File] ref_files 7 | } 8 | 9 | command <<< 10 | python <>> 83 | output { 84 | Map[String,String] match = read_map('match.tsv') # key:label, val:match 85 | Boolean match_overall = read_boolean('match_overall.txt') 86 | File json = glob('result.json')[0] # details (json file) 87 | String json_str = read_string('result.json') # details (string) 88 | } 89 | runtime { 90 | cpu : 1 91 | memory : '4000 MB' 92 | time : 1 93 | disks : 'local-disk 50 HDD' 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /dev/test/test_task/test_bam2ta.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_bam2ta.pe_nodup_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/nodup_bams/rep1/rep1-R1.subsampled.67.merged.nodup.bam", 3 | "test_bam2ta.se_nodup_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/nodup_bams/rep1/rep1.subsampled.25.merged.nodup.bam", 4 | 5 | "test_bam2ta.ref_pe_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bam2ta/pe/rep1-R1.subsampled.67.merged.nodup.tagAlign.gz", 6 | "test_bam2ta.ref_pe_ta_subsample" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bam2ta/pe/subsample/fix_PIP-917/rep1-R1.subsampled.67.merged.nodup.17K.tagAlign.gz", 7 | 8 | "test_bam2ta.ref_se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bam2ta/se/rep1.subsampled.25.merged.nodup.tagAlign.gz", 9 | "test_bam2ta.ref_se_ta_subsample" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bam2ta/se/subsample/rep1.subsampled.25.merged.nodup.17K.tagAlign.gz", 10 | 11 | "test_bam2ta.bam2ta_subsample" : 17000 12 | } 13 | -------------------------------------------------------------------------------- /dev/test/test_task/test_bam2ta.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_bam2ta { 6 | input { 7 | Int bam2ta_subsample 8 | 9 | String pe_nodup_bam 10 | String se_nodup_bam 11 | 12 | String ref_pe_ta 13 | String ref_pe_ta_subsample 14 | String ref_se_ta 15 | String ref_se_ta_subsample 16 | String docker 17 | } 18 | RuntimeEnvironment runtime_environment = { 19 | "docker": docker, 20 | "singularity": "", 21 | "conda": "" 22 | } 23 | String mito_chr_name = 'chrM' 24 | 25 | Int bam2ta_cpu = 1 26 | Float bam2ta_mem_factor = 0.0 27 | Int bam2ta_time_hr = 6 28 | Float bam2ta_disk_factor = 4.0 29 | 30 | call chip.bam2ta as pe_bam2ta { input : 31 | bam = pe_nodup_bam, 32 | subsample = 0, 33 | paired_end = true, 34 | mito_chr_name = mito_chr_name, 35 | 36 | cpu = bam2ta_cpu, 37 | mem_factor = bam2ta_mem_factor, 38 | time_hr = bam2ta_time_hr, 39 | disk_factor = bam2ta_disk_factor, 40 | runtime_environment = runtime_environment, 41 | } 42 | call chip.bam2ta as pe_bam2ta_subsample { input : 43 | bam = pe_nodup_bam, 44 | subsample = bam2ta_subsample, 45 | paired_end = true, 46 | mito_chr_name = mito_chr_name, 47 | 48 | cpu = bam2ta_cpu, 49 | mem_factor = bam2ta_mem_factor, 50 | time_hr = bam2ta_time_hr, 51 | disk_factor = bam2ta_disk_factor, 52 | runtime_environment = runtime_environment, 53 | } 54 | call chip.bam2ta as se_bam2ta { input : 55 | bam = se_nodup_bam, 56 | subsample = 0, 57 | paired_end = false, 58 | mito_chr_name = mito_chr_name, 59 | 60 | cpu = bam2ta_cpu, 61 | mem_factor = bam2ta_mem_factor, 62 | time_hr = bam2ta_time_hr, 63 | disk_factor = bam2ta_disk_factor, 64 | runtime_environment = runtime_environment, 65 | } 66 | call chip.bam2ta as se_bam2ta_subsample { input : 67 | bam = se_nodup_bam, 68 | subsample = bam2ta_subsample, 69 | paired_end = false, 70 | mito_chr_name = mito_chr_name, 71 | 72 | cpu = bam2ta_cpu, 73 | mem_factor = bam2ta_mem_factor, 74 | time_hr = bam2ta_time_hr, 75 | disk_factor = bam2ta_disk_factor, 76 | runtime_environment = runtime_environment, 77 | } 78 | 79 | call compare_md5sum.compare_md5sum { input : 80 | labels = [ 81 | 'pe_bam2ta', 82 | 'pe_bam2ta_subsample', 83 | 'se_bam2ta', 84 | 'se_bam2ta_subsample', 85 | ], 86 | files = [ 87 | pe_bam2ta.ta, 88 | pe_bam2ta_subsample.ta, 89 | se_bam2ta.ta, 90 | se_bam2ta_subsample.ta, 91 | ], 92 | ref_files = [ 93 | ref_pe_ta, 94 | ref_pe_ta_subsample, 95 | ref_se_ta, 96 | ref_se_ta_subsample, 97 | ], 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /dev/test/test_task/test_bam_to_pbam.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_bam_to_pbam.ref_fa" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.gz", 3 | "test_bam_to_pbam.chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 4 | "test_bam_to_pbam.pe_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/bams/rep1/rep1-R1.subsampled.67.merged.bam", 5 | "test_bam_to_pbam.se_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/bams/rep1/rep1.subsampled.25.merged.bam", 6 | 7 | "test_bam_to_pbam.ref_pe_samtools_flagstat_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bam_to_pbam/pe/flagstat.qc", 8 | "test_bam_to_pbam.ref_se_samtools_flagstat_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bam_to_pbam/se/flagstat.qc" 9 | } 10 | -------------------------------------------------------------------------------- /dev/test/test_task/test_bam_to_pbam.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_bam_to_pbam { 6 | input { 7 | String dup_marker = 'picard' 8 | Int mapq_thresh = 30 9 | Boolean no_dup_removal = false 10 | File ref_fa 11 | File pe_bam 12 | File se_bam 13 | 14 | File chrsz 15 | 16 | File ref_pe_samtools_flagstat_qc 17 | File ref_se_samtools_flagstat_qc 18 | String docker 19 | } 20 | RuntimeEnvironment runtime_environment = { 21 | "docker": docker, 22 | "singularity": "", 23 | "conda": "" 24 | } 25 | String mito_chr_name = 'chrM' 26 | 27 | Int filter_cpu = 1 28 | Float filter_mem_factor = 0.0 29 | Int filter_time_hr = 24 30 | Float filter_disk_factor = 6.0 31 | 32 | call chip.filter as pe_filter { input : 33 | bam = pe_bam, 34 | no_dup_removal = false, 35 | paired_end = true, 36 | ref_fa = ref_fa, 37 | redact_nodup_bam = false, 38 | dup_marker = dup_marker, 39 | mapq_thresh = mapq_thresh, 40 | mito_chr_name = mito_chr_name, 41 | filter_chrs = [], 42 | chrsz = chrsz, 43 | 44 | cpu = filter_cpu, 45 | mem_factor = filter_mem_factor, 46 | picard_java_heap = '4G', 47 | time_hr = filter_time_hr, 48 | disk_factor = filter_disk_factor, 49 | runtime_environment = runtime_environment, 50 | } 51 | call chip.filter as se_filter { input : 52 | bam = se_bam, 53 | no_dup_removal = false, 54 | paired_end = false, 55 | ref_fa = ref_fa, 56 | redact_nodup_bam = false, 57 | dup_marker = dup_marker, 58 | mapq_thresh = mapq_thresh, 59 | mito_chr_name = mito_chr_name, 60 | filter_chrs = [], 61 | chrsz = chrsz, 62 | 63 | cpu = filter_cpu, 64 | mem_factor = filter_mem_factor, 65 | picard_java_heap = '4G', 66 | time_hr = filter_time_hr, 67 | disk_factor = filter_disk_factor, 68 | runtime_environment = runtime_environment, 69 | } 70 | call samtools_flagstat as pe_samtools_flagstat { input : 71 | bam = pe_filter.nodup_bam, 72 | runtime_environment = runtime_environment, 73 | } 74 | call samtools_flagstat as se_samtools_flagstat { input : 75 | bam = se_filter.nodup_bam, 76 | runtime_environment = runtime_environment, 77 | } 78 | 79 | call compare_md5sum.compare_md5sum { input : 80 | labels = [ 81 | 'pe_bam_to_pbam', 82 | 'se_bam_to_pbam', 83 | ], 84 | files = [ 85 | pe_samtools_flagstat.flagstat_qc, 86 | se_samtools_flagstat.flagstat_qc, 87 | ], 88 | ref_files = [ 89 | ref_pe_samtools_flagstat_qc, 90 | ref_se_samtools_flagstat_qc, 91 | ], 92 | } 93 | } 94 | 95 | task samtools_flagstat { 96 | input { 97 | File bam 98 | RuntimeEnvironment runtime_environment 99 | } 100 | command { 101 | samtools flagstat ~{bam} > flagstat.qc 102 | } 103 | output { 104 | File flagstat_qc = 'flagstat.qc' 105 | } 106 | runtime { 107 | docker: runtime_environment.docker 108 | singularity: runtime_environment.singularity 109 | conda: runtime_environment.conda 110 | } 111 | } -------------------------------------------------------------------------------- /dev/test/test_task/test_bowtie2.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_bowtie2.pe_bowtie2_idx_tar" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bowtie2_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", 3 | "test_bowtie2.se_bowtie2_idx_tar" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bowtie2_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", 4 | 5 | "test_bowtie2.pe_fastqs_R1" : [ 6 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/rep1-R1.subsampled.67.fastq.gz" 7 | ], 8 | "test_bowtie2.pe_fastqs_R2" : [ 9 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/rep1-R2.subsampled.67.fastq.gz" 10 | ], 11 | "test_bowtie2.se_fastqs" : [ 12 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/fastqs/rep1/rep1.subsampled.25.fastq.gz" 13 | ], 14 | "test_bowtie2.ref_pe_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bowtie2/pe/rep1-R1.subsampled.67.samstats.qc", 15 | "test_bowtie2.ref_se_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/rep1.subsampled.25.samstats.qc", 16 | 17 | "test_bowtie2.ref_pe_local_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bowtie2/pe/local/rep1-R1.subsampled.67.srt.samstats.qc", 18 | "test_bowtie2.ref_se_local_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/local/rep1.subsampled.25.srt.samstats.qc" 19 | 20 | } 21 | -------------------------------------------------------------------------------- /dev/test/test_task/test_bwa.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_bwa.pe_bwa_idx_tar" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bwa_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", 3 | "test_bwa.se_bwa_idx_tar" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bwa_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", 4 | 5 | "test_bwa.pe_fastqs_R1" : [ 6 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/rep1-R1.subsampled.67.fastq.gz" 7 | ], 8 | "test_bwa.pe_fastqs_R2" : [ 9 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/rep1-R2.subsampled.67.fastq.gz" 10 | ], 11 | "test_bwa.se_fastqs" : [ 12 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/fastqs/rep1/rep1.subsampled.25.fastq.gz" 13 | ], 14 | 15 | "test_bwa.ref_pe_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bwa/pe/rep1-R1.subsampled.67.samstats.qc", 16 | "test_bwa.ref_se_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_bwa/se/rep1.subsampled.25.samstats.qc" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_task/test_bwa.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_bwa { 6 | input { 7 | Array[File] pe_fastqs_R1 8 | Array[File] pe_fastqs_R2 9 | Array[File] se_fastqs 10 | 11 | # we don't compare BAM because BAM's header includes date 12 | # hence md5sums don't match all the time 13 | String ref_pe_flagstat 14 | String ref_se_flagstat 15 | 16 | String pe_bwa_idx_tar 17 | String se_bwa_idx_tar 18 | String docker 19 | } 20 | RuntimeEnvironment runtime_environment = { 21 | "docker": docker, 22 | "singularity": "", 23 | "conda": "" 24 | } 25 | 26 | Int bwa_cpu = 1 27 | Float bwa_mem_factor = 0.0 28 | Int bwa_time_hr = 48 29 | Float bwa_disk_factor = 8.0 30 | 31 | call chip.align as pe_bwa { input : 32 | aligner = 'bwa', 33 | idx_tar = pe_bwa_idx_tar, 34 | mito_chr_name = 'chrM', 35 | fastqs_R1 = pe_fastqs_R1, 36 | fastqs_R2 = pe_fastqs_R2, 37 | paired_end = true, 38 | use_bwa_mem_for_pe = false, 39 | bwa_mem_read_len_limit = 70, 40 | use_bowtie2_local_mode = false, 41 | crop_length = 0, 42 | crop_length_tol = 0, 43 | 44 | cpu = bwa_cpu, 45 | mem_factor = bwa_mem_factor, 46 | time_hr = bwa_time_hr, 47 | disk_factor = bwa_disk_factor, 48 | runtime_environment = runtime_environment, 49 | } 50 | call chip.align as se_bwa { input : 51 | aligner = 'bwa', 52 | idx_tar = se_bwa_idx_tar, 53 | mito_chr_name = 'chrM', 54 | fastqs_R1 = se_fastqs, 55 | fastqs_R2 = [], 56 | paired_end = false, 57 | use_bwa_mem_for_pe = false, 58 | bwa_mem_read_len_limit = 70, 59 | use_bowtie2_local_mode = false, 60 | crop_length = 0, 61 | crop_length_tol = 0, 62 | 63 | cpu = bwa_cpu, 64 | mem_factor = bwa_mem_factor, 65 | time_hr = bwa_time_hr, 66 | disk_factor = bwa_disk_factor, 67 | runtime_environment = runtime_environment, 68 | } 69 | 70 | call compare_md5sum.compare_md5sum { input : 71 | labels = [ 72 | 'pe_bwa', 73 | 'se_bwa', 74 | ], 75 | files = [ 76 | pe_bwa.samstat_qc, 77 | se_bwa.samstat_qc, 78 | ], 79 | ref_files = [ 80 | ref_pe_flagstat, 81 | ref_se_flagstat, 82 | ], 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /dev/test/test_task/test_choose_ctl.aux.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCODE-DCC/chip-seq-pipeline2/26eeda81a0540dc793fc69b0c390d232ca7ca50a/dev/test/test_task/test_choose_ctl.aux.xlsx -------------------------------------------------------------------------------- /dev/test/test_task/test_choose_ctl.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_choose_ctl.se_ta_rep1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 3 | "test_choose_ctl.se_ta_rep2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep2/rep2.subsampled.20.merged.nodup.tagAlign.gz", 4 | "test_choose_ctl.se_ta_pooled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_rep/rep1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 5 | "test_choose_ctl.se_ctl_ta_rep1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/ctl1_fake_doubled/ctl1.double.tagAlign.gz", 6 | "test_choose_ctl.se_ctl_ta_rep2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/ctl2_fake_doubled/ctl2.double.tagAlign.gz", 7 | "test_choose_ctl.se_ctl_ta_pooled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_ctl_fake_doubled/ctl_pooled.double.tagnAlign.gz", 8 | 9 | "test_choose_ctl.ref_se_choose_ctl_idx1" : 0, 10 | "test_choose_ctl.ref_se_choose_ctl_idx2" : 1, 11 | "test_choose_ctl.ref_se_choose_ctl_sub1" : 0, 12 | "test_choose_ctl.ref_se_choose_ctl_sub2" : 46249, 13 | "test_choose_ctl.ref_se_choose_ctl_sub_pooled" : 93085, 14 | 15 | "test_choose_ctl.ref_se_choose_ctl_always_use_pooled_ctl_idx1" : -1, 16 | "test_choose_ctl.ref_se_choose_ctl_always_use_pooled_ctl_idx2" : -1, 17 | "test_choose_ctl.ref_se_choose_ctl_always_use_pooled_ctl_sub1" : 46836, 18 | "test_choose_ctl.ref_se_choose_ctl_always_use_pooled_ctl_sub2" : 46249, 19 | "test_choose_ctl.ref_se_choose_ctl_always_use_pooled_ctl_sub_pooled" : 93085, 20 | 21 | "test_choose_ctl.ref_se_choose_ctl_single_rep_idx1" : 0, 22 | "test_choose_ctl.ref_se_choose_ctl_single_rep_sub1" : 46836, 23 | "test_choose_ctl.ref_se_choose_ctl_single_rep_sub_pooled" : 46836, 24 | 25 | "test_choose_ctl.ref_se_choose_ctl_single_ctl_idx1" : 0, 26 | "test_choose_ctl.ref_se_choose_ctl_single_ctl_idx2" : 0, 27 | "test_choose_ctl.ref_se_choose_ctl_single_ctl_sub1" : 0, 28 | "test_choose_ctl.ref_se_choose_ctl_single_ctl_sub2" : 46249, 29 | "test_choose_ctl.ref_se_choose_ctl_single_ctl_sub_pooled" : 0, 30 | 31 | "test_choose_ctl.ref_se_choose_ctl_disabled_idx1" : 0, 32 | "test_choose_ctl.ref_se_choose_ctl_disabled_idx2" : 1, 33 | "test_choose_ctl.ref_se_choose_ctl_disabled_sub1" : 0, 34 | "test_choose_ctl.ref_se_choose_ctl_disabled_sub2" : 0, 35 | "test_choose_ctl.ref_se_choose_ctl_disabled_sub_pooled" : 0, 36 | 37 | "test_choose_ctl.ref_se_choose_ctl_ctl_depth_limit_only_idx1" : 0, 38 | "test_choose_ctl.ref_se_choose_ctl_ctl_depth_limit_only_idx2" : 1, 39 | "test_choose_ctl.ref_se_choose_ctl_ctl_depth_limit_only_sub1" : 45000, 40 | "test_choose_ctl.ref_se_choose_ctl_ctl_depth_limit_only_sub2" : 45000, 41 | "test_choose_ctl.ref_se_choose_ctl_ctl_depth_limit_only_sub_pooled" : 45000, 42 | 43 | "test_choose_ctl.ref_se_choose_ctl_exp_ctl_depth_ratio_limit_only_idx1" : 0, 44 | "test_choose_ctl.ref_se_choose_ctl_exp_ctl_depth_ratio_limit_only_idx2" : 1, 45 | "test_choose_ctl.ref_se_choose_ctl_exp_ctl_depth_ratio_limit_only_sub1" : 0, 46 | "test_choose_ctl.ref_se_choose_ctl_exp_ctl_depth_ratio_limit_only_sub2" : 46249, 47 | "test_choose_ctl.ref_se_choose_ctl_exp_ctl_depth_ratio_limit_only_sub_pooled" : 93085, 48 | 49 | "test_choose_ctl.ctl_depth_ratio" : 1.5, 50 | "test_choose_ctl.ctl_depth_limit" : 45000, 51 | "test_choose_ctl.exp_ctl_depth_ratio_limit" : 1.5 52 | } 53 | -------------------------------------------------------------------------------- /dev/test/test_task/test_count_signal_track.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_count_signal_track.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 3 | 4 | "test_count_signal_track.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 5 | 6 | "test_count_signal_track.ref_se_count_signal_track_pos_bw" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_count_signal_track/rep1.subsampled.25.merged.nodup.positive.bigwig", 7 | "test_count_signal_track.ref_se_count_signal_track_neg_bw" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_count_signal_track/rep1.subsampled.25.merged.nodup.negative.bigwig" 8 | } 9 | -------------------------------------------------------------------------------- /dev/test/test_task/test_count_signal_track.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_count_signal_track { 6 | input { 7 | String se_ta 8 | 9 | String ref_se_count_signal_track_pos_bw 10 | String ref_se_count_signal_track_neg_bw 11 | 12 | String se_chrsz 13 | String docker 14 | } 15 | RuntimeEnvironment runtime_environment = { 16 | "docker": docker, 17 | "singularity": "", 18 | "conda": "" 19 | } 20 | 21 | call chip.count_signal_track as se_count_signal_track { input : 22 | ta = se_ta, 23 | chrsz = se_chrsz, 24 | runtime_environment = runtime_environment, 25 | } 26 | 27 | call compare_md5sum.compare_md5sum { input : 28 | labels = [ 29 | 'se_count_signal_track_pos_bw', 30 | 'se_count_signal_track_neg_bw', 31 | ], 32 | files = [ 33 | se_count_signal_track.pos_bw, 34 | se_count_signal_track.neg_bw, 35 | ], 36 | ref_files = [ 37 | ref_se_count_signal_track_pos_bw, 38 | ref_se_count_signal_track_neg_bw, 39 | ], 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /dev/test/test_task/test_filter.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_filter.chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 3 | "test_filter.pe_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/bams/rep1/rep1-R1.subsampled.67.merged.bam", 4 | "test_filter.se_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/bams/rep1/rep1.subsampled.25.merged.bam", 5 | 6 | "test_filter.ref_pe_nodup_samstat_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_filter/pe/rep1-R1.subsampled.67.merged.nodup.samstats.qc", 7 | "test_filter.ref_pe_filt_samstat_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_filter/pe/rep1-R1.subsampled.67.merged.filt.samstats.qc", 8 | 9 | "test_filter.ref_se_nodup_samstat_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_filter/se/rep1.subsampled.25.merged.nodup.samstats.qc", 10 | "test_filter.ref_se_filt_samstat_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_filter/se/rep1.subsampled.25.merged.filt.samstats.qc" 11 | } 12 | -------------------------------------------------------------------------------- /dev/test/test_task/test_gc_bias.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_gc_bias.ref_fa" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.gz", 3 | "test_gc_bias.nodup_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/gc_bias/rep1-R1.subsampled.67.merged.nodup.bam", 4 | 5 | "test_gc_bias.ref_gc_log" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_gc_bias/rep1-R1.subsampled.67.merged.nodup.gc.txt" 6 | } 7 | -------------------------------------------------------------------------------- /dev/test/test_task/test_gc_bias.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_gc_bias { 6 | input { 7 | File nodup_bam 8 | File ref_fa 9 | File ref_gc_log 10 | String docker 11 | } 12 | RuntimeEnvironment runtime_environment = { 13 | "docker": docker, 14 | "singularity": "", 15 | "conda": "" 16 | } 17 | 18 | call chip.gc_bias { input : 19 | nodup_bam = nodup_bam, 20 | ref_fa = ref_fa, 21 | picard_java_heap = '4G', 22 | runtime_environment = runtime_environment, 23 | } 24 | 25 | call remove_comments_from_gc_log { input : 26 | gc_log = gc_bias.gc_log 27 | } 28 | 29 | call remove_comments_from_gc_log as remove_comments_from_gc_log_ref { input : 30 | gc_log = ref_gc_log 31 | } 32 | 33 | call compare_md5sum.compare_md5sum { input : 34 | labels = [ 35 | 'test_gc_log', 36 | ], 37 | files = [ 38 | remove_comments_from_gc_log.filt_gc_log, 39 | ], 40 | ref_files = [ 41 | remove_comments_from_gc_log_ref.filt_gc_log, 42 | ], 43 | } 44 | } 45 | 46 | task remove_comments_from_gc_log { 47 | input { 48 | File gc_log 49 | } 50 | command { 51 | zcat -f ${gc_log} | grep -v '# ' \ 52 | > ${basename(gc_log) + '.date_filt_out'} 53 | } 54 | output { 55 | File filt_gc_log = glob('*.date_filt_out')[0] 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /dev/test/test_task/test_idr.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_idr.se_blacklist" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", 3 | "test_idr.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 4 | 5 | "test_idr.se_peak_rep1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/rpeaks/rep1/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 6 | "test_idr.se_peak_rep2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/rpeaks/rep2/rep2.subsampled.20.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 7 | "test_idr.se_peak_pooled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/rpeaks/pooled_rep/rep1.subsampled.25.merged.nodup.pooled_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 8 | "test_idr.se_ta_pooled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_rep/rep1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 9 | 10 | "test_idr.ref_se_idr_peak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.1.regionPeak.gz", 11 | "test_idr.ref_se_idr_bfilt_peak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.1.bfilt.regionPeak.gz", 12 | "test_idr.ref_se_idr_frip_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.1.bfilt.frip.qc", 13 | 14 | "test_idr.fraglen" : 100, 15 | "test_idr.idr_thresh" : 0.1 16 | } 17 | -------------------------------------------------------------------------------- /dev/test/test_task/test_idr.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_idr { 6 | input { 7 | Float idr_thresh 8 | 9 | String se_peak_rep1 10 | String se_peak_rep2 11 | String se_peak_pooled 12 | String se_ta_pooled 13 | 14 | String ref_se_idr_peak 15 | String ref_se_idr_bfilt_peak 16 | String ref_se_idr_frip_qc 17 | 18 | String se_blacklist 19 | String se_chrsz 20 | Int fraglen 21 | String docker 22 | } 23 | RuntimeEnvironment runtime_environment = { 24 | "docker": docker, 25 | "singularity": "", 26 | "conda": "" 27 | } 28 | 29 | String regex_bfilt_peak_chr_name = 'chr[\\dXY]+' 30 | 31 | call chip.idr as se_idr { input : 32 | prefix = 'rep1-rep2', 33 | peak1 = se_peak_rep1, 34 | peak2 = se_peak_rep2, 35 | peak_pooled = se_peak_pooled, 36 | idr_thresh = idr_thresh, 37 | peak_type = 'regionPeak', # using SPP regionPeaks 38 | rank = 'signal.value', # need to use signal.value for regionPeaks instead of p.value 39 | chrsz = se_chrsz, 40 | fraglen = fraglen, 41 | blacklist = se_blacklist, 42 | regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name, 43 | 44 | ta = se_ta_pooled, 45 | runtime_environment = runtime_environment, 46 | } 47 | 48 | call compare_md5sum.compare_md5sum { input : 49 | labels = [ 50 | 'se_idr_peak', 51 | 'se_idr_bfilt_peak', 52 | 'se_idr_frip_qc', 53 | ], 54 | files = [ 55 | se_idr.idr_peak, 56 | se_idr.bfilt_idr_peak, 57 | se_idr.frip_qc, 58 | ], 59 | ref_files = [ 60 | ref_se_idr_peak, 61 | ref_se_idr_bfilt_peak, 62 | ref_se_idr_frip_qc, 63 | ], 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /dev/test/test_task/test_jsd.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_jsd.se_nodup_bams" : [ 3 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/nodup_bams/rep1/rep1.subsampled.25.merged.nodup.bam", 4 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/nodup_bams/rep2/rep2.subsampled.20.merged.nodup.bam" 5 | ], 6 | "test_jsd.se_ctl_nodup_bam" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/nodup_bams/ctl1/ctl1.subsampled.25.merged.nodup.bam", 7 | "test_jsd.se_blacklist" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", 8 | 9 | "test_jsd.ref_se_jsd_logs" : [ 10 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_jsd/se/rep1.rep1.subsampled.25.merged.nodup.jsd.qc", 11 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_jsd/se/rep2.rep2.subsampled.20.merged.nodup.jsd.qc" 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /dev/test/test_task/test_jsd.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_jsd { 6 | input { 7 | Array[File] se_nodup_bams 8 | File se_ctl_nodup_bam 9 | File se_blacklist 10 | Array[File] ref_se_jsd_logs 11 | String docker 12 | } 13 | RuntimeEnvironment runtime_environment = { 14 | "docker": docker, 15 | "singularity": "", 16 | "conda": "" 17 | } 18 | # task level test data (BAM) is generated from BWA 19 | # so we keep using 30 here, this should be 255 for bowtie2 BAMs 20 | Int mapq_thresh = 30 21 | 22 | Int jsd_cpu = 1 23 | Float jsd_mem_factor = 0.0 24 | Int jsd_time_hr = 6 25 | Float jsd_disk_factor = 2.0 26 | 27 | call chip.jsd as se_jsd { input : 28 | nodup_bams = se_nodup_bams, 29 | ctl_bams = [se_ctl_nodup_bam], # use first control only 30 | blacklist = se_blacklist, 31 | mapq_thresh = mapq_thresh, 32 | 33 | cpu = jsd_cpu, 34 | mem_factor = jsd_mem_factor, 35 | time_hr = jsd_time_hr, 36 | disk_factor = jsd_disk_factor, 37 | runtime_environment = runtime_environment, 38 | } 39 | 40 | # take first 8 columns (vaule in other columns are random) 41 | scatter(i in range(2)){ 42 | call take_8_cols { input : 43 | f = se_jsd.jsd_qcs[i], 44 | } 45 | call take_8_cols as ref_take_8_cols { input : 46 | f = ref_se_jsd_logs[i], 47 | } 48 | } 49 | 50 | call compare_md5sum.compare_md5sum { input : 51 | labels = [ 52 | 'se_jsd_rep1', 53 | 'se_jsd_rep2', 54 | ], 55 | files = [ 56 | take_8_cols.out[0], 57 | take_8_cols.out[1], 58 | #se_jsd.jsd_qcs[0], 59 | #se_jsd.jsd_qcs[1], 60 | ], 61 | ref_files = [ 62 | ref_take_8_cols.out[0], 63 | ref_take_8_cols.out[1], 64 | #ref_se_jsd_logs[0], 65 | #ref_se_jsd_logs[1], 66 | ], 67 | } 68 | } 69 | 70 | task take_8_cols { 71 | input { 72 | File f 73 | } 74 | command { 75 | cut -f 1-8 ${f} > out.txt 76 | } 77 | output { 78 | File out = 'out.txt' 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /dev/test/test_task/test_macs2.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_macs2.se_blacklist" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", 3 | "test_macs2.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 4 | "test_macs2.se_gensz" : "hs", 5 | 6 | "test_macs2.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 7 | "test_macs2.se_ctl_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_ctl/ctl1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 8 | 9 | "test_macs2.fraglen" : 95, 10 | 11 | "test_macs2.ref_se_macs2_npeak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_macs2/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.pval0.01.500K.narrowPeak.gz", 12 | "test_macs2.ref_se_macs2_bfilt_npeak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_macs2/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.pval0.01.500K.bfilt.narrowPeak.gz", 13 | "test_macs2.ref_se_macs2_frip_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_macs2/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.pval0.01.500K.bfilt.frip.qc", 14 | 15 | "test_macs2.cap_num_peak" : 500000, 16 | "test_macs2.pval_thresh" : 0.01 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_task/test_macs2.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_macs2 { 6 | input { 7 | Int cap_num_peak 8 | Float pval_thresh 9 | 10 | Int fraglen 11 | # test macs2 for SE set only 12 | String se_ta 13 | String se_ctl_ta 14 | 15 | String ref_se_macs2_npeak # raw narrow-peak 16 | String ref_se_macs2_bfilt_npeak # blacklist filtered narrow-peak 17 | String ref_se_macs2_frip_qc 18 | 19 | String se_blacklist 20 | String se_chrsz 21 | String se_gensz 22 | String docker 23 | } 24 | RuntimeEnvironment runtime_environment = { 25 | "docker": docker, 26 | "singularity": "", 27 | "conda": "" 28 | } 29 | 30 | String regex_bfilt_peak_chr_name = 'chr[\\dXY]+' 31 | 32 | Float macs2_mem_factor = 0.0 33 | Int macs2_time_hr = 24 34 | Float macs2_disk_factor = 15.0 35 | 36 | call chip.call_peak as se_macs2 { input : 37 | peak_caller = 'macs2', 38 | peak_type = 'narrowPeak', 39 | tas = [se_ta, se_ctl_ta], 40 | gensz = se_gensz, 41 | chrsz = se_chrsz, 42 | fraglen = fraglen, 43 | cap_num_peak = cap_num_peak, 44 | pval_thresh = pval_thresh, 45 | blacklist = se_blacklist, 46 | regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name, 47 | 48 | cpu = 1, 49 | mem_factor = macs2_mem_factor, 50 | time_hr = macs2_time_hr, 51 | disk_factor = macs2_disk_factor, 52 | runtime_environment = runtime_environment, 53 | } 54 | 55 | call compare_md5sum.compare_md5sum { input : 56 | labels = [ 57 | 'se_macs2_npeak', 58 | 'se_macs2_bfilt_npeak', 59 | 'se_macs2_frip_qc', 60 | ], 61 | files = [ 62 | se_macs2.peak, 63 | se_macs2.bfilt_peak, 64 | se_macs2.frip_qc, 65 | ], 66 | ref_files = [ 67 | ref_se_macs2_npeak, 68 | ref_se_macs2_bfilt_npeak, 69 | ref_se_macs2_frip_qc, 70 | ], 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /dev/test/test_task/test_macs2_signal_track.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_macs2_signal_track.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 3 | "test_macs2_signal_track.se_gensz" : "hs", 4 | 5 | "test_macs2_signal_track.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 6 | "test_macs2_signal_track.se_ctl_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_ctl/ctl1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 7 | 8 | "test_macs2_signal_track.fraglen" : 95, 9 | 10 | "test_macs2_signal_track.ref_se_macs2_pval_bw" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_macs2/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.pval.signal.bigwig", 11 | 12 | "test_macs2_signal_track.pval_thresh" : 0.01 13 | } 14 | -------------------------------------------------------------------------------- /dev/test/test_task/test_macs2_signal_track.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_macs2_signal_track { 6 | input { 7 | Float pval_thresh 8 | 9 | Int fraglen 10 | # test macs2 for SE set only 11 | String se_ta 12 | String se_ctl_ta 13 | 14 | String ref_se_macs2_pval_bw # p-val signal 15 | String se_chrsz 16 | String se_gensz 17 | String docker 18 | } 19 | RuntimeEnvironment runtime_environment = { 20 | "docker": docker, 21 | "singularity": "", 22 | "conda": "" 23 | } 24 | 25 | Float macs2_mem_factor = 0.0 26 | Int macs2_time_hr = 24 27 | Float macs2_disk_factor = 40.0 28 | 29 | call chip.macs2_signal_track as se_macs2_signal_track { input : 30 | tas = [se_ta, se_ctl_ta], 31 | gensz = se_gensz, 32 | chrsz = se_chrsz, 33 | fraglen = fraglen, 34 | pval_thresh = pval_thresh, 35 | 36 | mem_factor = macs2_mem_factor, 37 | time_hr = macs2_time_hr, 38 | disk_factor = macs2_disk_factor, 39 | runtime_environment = runtime_environment, 40 | } 41 | 42 | call compare_md5sum.compare_md5sum { input : 43 | labels = [ 44 | 'se_macs2_pval_bw', 45 | ], 46 | files = [ 47 | se_macs2_signal_track.pval_bw, 48 | ], 49 | ref_files = [ 50 | ref_se_macs2_pval_bw, 51 | ], 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /dev/test/test_task/test_overlap.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_overlap.se_blacklist" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", 3 | "test_overlap.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 4 | 5 | "test_overlap.se_peak_rep1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/rpeaks/rep1/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 6 | "test_overlap.se_peak_rep2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/rpeaks/rep2/rep2.subsampled.20.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 7 | "test_overlap.se_peak_pooled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/rpeaks/pooled_rep/rep1.subsampled.25.merged.nodup.pooled_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 8 | "test_overlap.se_ta_pooled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_rep/rep1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 9 | 10 | "test_overlap.ref_se_overlap_peak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.regionPeak.gz", 11 | "test_overlap.ref_se_overlap_bfilt_peak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.bfilt.regionPeak.gz", 12 | "test_overlap.ref_se_overlap_frip_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.bfilt.frip.qc", 13 | 14 | "test_overlap.fraglen" : 100 15 | } 16 | -------------------------------------------------------------------------------- /dev/test/test_task/test_overlap.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_overlap { 6 | input { 7 | String se_peak_rep1 # test overlap,idr for SE set only 8 | String se_peak_rep2 9 | String se_peak_pooled 10 | String se_ta_pooled 11 | 12 | String ref_se_overlap_peak 13 | String ref_se_overlap_bfilt_peak 14 | String ref_se_overlap_frip_qc 15 | 16 | String se_blacklist 17 | String se_chrsz 18 | Int fraglen 19 | String docker 20 | } 21 | RuntimeEnvironment runtime_environment = { 22 | "docker": docker, 23 | "singularity": "", 24 | "conda": "" 25 | } 26 | 27 | String regex_bfilt_peak_chr_name = 'chr[\\dXY]+' 28 | 29 | 30 | call chip.overlap as se_overlap { input : 31 | prefix = 'rep1-rep2', 32 | peak1 = se_peak_rep1, 33 | peak2 = se_peak_rep2, 34 | peak_pooled = se_peak_pooled, 35 | peak_type = 'regionPeak', 36 | blacklist = se_blacklist, 37 | regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name, 38 | 39 | chrsz = se_chrsz, 40 | fraglen = fraglen, 41 | ta = se_ta_pooled, 42 | runtime_environment = runtime_environment, 43 | } 44 | 45 | call compare_md5sum.compare_md5sum { input : 46 | labels = [ 47 | 'se_overlap_peak', 48 | 'se_overlap_bfilt_peak', 49 | 'se_overlap_frip_qc', 50 | ], 51 | files = [ 52 | se_overlap.overlap_peak, 53 | se_overlap.bfilt_overlap_peak, 54 | se_overlap.frip_qc, 55 | ], 56 | ref_files = [ 57 | ref_se_overlap_peak, 58 | ref_se_overlap_bfilt_peak, 59 | ref_se_overlap_frip_qc, 60 | ], 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /dev/test/test_task/test_pool_ta.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_pool_ta.se_ta_rep1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 3 | "test_pool_ta.se_ta_rep2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep2/rep2.subsampled.20.merged.nodup.tagAlign.gz", 4 | 5 | "test_pool_ta.ref_se_pooled_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_pool_ta/rep1.subsampled.25.merged.nodup.pooled.tagAlign.gz" 6 | } 7 | -------------------------------------------------------------------------------- /dev/test/test_task/test_pool_ta.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_pool_ta { 6 | input { 7 | String se_ta_rep1 8 | String se_ta_rep2 9 | 10 | String ref_se_pooled_ta 11 | String docker 12 | } 13 | RuntimeEnvironment runtime_environment = { 14 | "docker": docker, 15 | "singularity": "", 16 | "conda": "" 17 | } 18 | 19 | call chip.pool_ta as se_pool_ta { input : 20 | tas = [se_ta_rep1, se_ta_rep2], 21 | runtime_environment = runtime_environment, 22 | } 23 | 24 | call compare_md5sum.compare_md5sum { input : 25 | labels = [ 26 | 'se_pool_ta', 27 | ], 28 | files = [ 29 | se_pool_ta.ta_pooled, 30 | ], 31 | ref_files = [ 32 | ref_se_pooled_ta, 33 | ], 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /dev/test/test_task/test_reproducibility.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_reproducibility.se_overlap_peak_rep1_vs_rep2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/overlap_peaks/rep1-rep2.overlap.bfilt.regionPeak.gz", 3 | "test_reproducibility.se_overlap_peak_rep1_pr" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/overlap_peaks/rep1-pr.overlap.bfilt.regionPeak.gz", 4 | "test_reproducibility.se_overlap_peak_rep2_pr" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/overlap_peaks/rep2-pr.overlap.bfilt.regionPeak.gz", 5 | "test_reproducibility.se_overlap_peak_ppr" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/overlap_peaks/ppr.overlap.bfilt.regionPeak.gz", 6 | "test_reproducibility.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 7 | 8 | "test_reproducibility.ref_se_reproducibility_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_reproducibility/overlap.reproducibility.qc" 9 | } 10 | -------------------------------------------------------------------------------- /dev/test/test_task/test_reproducibility.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_reproducibility { 6 | input { 7 | String se_overlap_peak_rep1_vs_rep2 8 | String se_overlap_peak_rep1_pr 9 | String se_overlap_peak_rep2_pr 10 | String se_overlap_peak_ppr 11 | String se_chrsz 12 | 13 | String ref_se_reproducibility_qc 14 | String docker 15 | } 16 | RuntimeEnvironment runtime_environment = { 17 | "docker": docker, 18 | "singularity": "", 19 | "conda": "" 20 | } 21 | 22 | call chip.reproducibility as se_reproducibility { input : 23 | prefix = 'overlap', 24 | peaks = [se_overlap_peak_rep1_vs_rep2], 25 | peaks_pr = [se_overlap_peak_rep1_pr, se_overlap_peak_rep2_pr], 26 | peak_ppr = se_overlap_peak_ppr, 27 | peak_type = 'regionPeak', 28 | chrsz = se_chrsz, 29 | runtime_environment = runtime_environment, 30 | } 31 | 32 | call compare_md5sum.compare_md5sum { input : 33 | labels = [ 34 | 'se_reproducibility', 35 | ], 36 | files = [ 37 | se_reproducibility.reproducibility_qc, 38 | ], 39 | ref_files = [ 40 | ref_se_reproducibility_qc, 41 | ], 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /dev/test/test_task/test_spp.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_spp.se_blacklist" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", 3 | "test_spp.se_chrsz" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", 4 | 5 | "test_spp.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 6 | "test_spp.se_ctl_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_ctl/ctl1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 7 | 8 | "test_spp.fraglen" : 95, 9 | 10 | "test_spp.ref_se_spp_rpeak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spp/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.regionPeak.gz", 11 | "test_spp.ref_se_spp_bfilt_rpeak" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spp/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.bfilt.regionPeak.gz", 12 | "test_spp.ref_se_spp_frip_qc" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spp/rep1.subsampled.25.merged.nodup_x_ctl1.subsampled.25.merged.nodup.pooled.300K.bfilt.frip.qc", 13 | 14 | "test_spp.cap_num_peak" : 300000 15 | } 16 | -------------------------------------------------------------------------------- /dev/test/test_task/test_spp.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_spp { 6 | input { 7 | Int cap_num_peak 8 | 9 | Int fraglen 10 | # test spp for SE set only 11 | String se_ta 12 | String se_ctl_ta 13 | 14 | String ref_se_spp_rpeak # raw narrow-peak 15 | String ref_se_spp_bfilt_rpeak # blacklist filtered narrow-peak 16 | String ref_se_spp_frip_qc 17 | 18 | String se_blacklist 19 | String se_chrsz 20 | String docker 21 | } 22 | RuntimeEnvironment runtime_environment = { 23 | "docker": docker, 24 | "singularity": "", 25 | "conda": "" 26 | } 27 | 28 | String regex_bfilt_peak_chr_name = 'chr[\\dXY]+' 29 | 30 | Int spp_cpu = 1 31 | Float spp_mem_factor = 0.0 32 | Int spp_time_hr = 72 33 | Float spp_disk_factor = 5.0 34 | 35 | call chip.call_peak as se_spp { input : 36 | peak_caller = 'spp', 37 | peak_type = 'regionPeak', 38 | gensz = se_chrsz, 39 | pval_thresh = 0.0, 40 | tas = [se_ta, se_ctl_ta], 41 | chrsz = se_chrsz, 42 | fraglen = fraglen, 43 | cap_num_peak = cap_num_peak, 44 | blacklist = se_blacklist, 45 | regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name, 46 | 47 | cpu = spp_cpu, 48 | mem_factor = spp_mem_factor, 49 | time_hr = spp_time_hr, 50 | disk_factor = spp_disk_factor, 51 | runtime_environment = runtime_environment, 52 | } 53 | 54 | call compare_md5sum.compare_md5sum { input : 55 | labels = [ 56 | 'se_spp_rpeak', 57 | 'se_spp_bfilt_rpeak', 58 | 'se_spp_frip_qc', 59 | ], 60 | files = [ 61 | se_spp.peak, 62 | se_spp.bfilt_peak, 63 | se_spp.frip_qc, 64 | ], 65 | ref_files = [ 66 | ref_se_spp_rpeak, 67 | ref_se_spp_bfilt_rpeak, 68 | ref_se_spp_frip_qc, 69 | ], 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /dev/test/test_task/test_spr.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_spr.pe_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/tas/rep1/rep1-R1.subsampled.67.merged.nodup.tagAlign.gz", 3 | "test_spr.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 4 | 5 | "test_spr.ref_pe_ta_pr1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/pe/rep1-R1.subsampled.67.merged.nodup.pr1.tagAlign.gz", 6 | "test_spr.ref_pe_ta_pr2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/pe/rep1-R1.subsampled.67.merged.nodup.pr2.tagAlign.gz", 7 | "test_spr.ref_se_ta_pr1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/se/rep1.subsampled.25.merged.nodup.pr1.tagAlign.gz", 8 | "test_spr.ref_se_ta_pr2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/se/rep1.subsampled.25.merged.nodup.pr2.tagAlign.gz", 9 | 10 | "test_spr.ref_pe_seed_10_ta_pr1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/pe/pseudoreplication_random_seed_10/rep1-R1.subsampled.67.merged.nodup.pr1.tagAlign.gz", 11 | "test_spr.ref_pe_seed_10_ta_pr2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/pe/pseudoreplication_random_seed_10/rep1-R1.subsampled.67.merged.nodup.pr2.tagAlign.gz", 12 | "test_spr.ref_se_seed_10_ta_pr1" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/se/pseudoreplication_random_seed_10/rep1.subsampled.25.merged.nodup.pr1.tagAlign.gz", 13 | "test_spr.ref_se_seed_10_ta_pr2" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_spr/se/pseudoreplication_random_seed_10/rep1.subsampled.25.merged.nodup.pr2.tagAlign.gz" 14 | } 15 | -------------------------------------------------------------------------------- /dev/test/test_task/test_spr.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_spr { 6 | input { 7 | File pe_ta 8 | File se_ta 9 | 10 | File ref_pe_ta_pr1 11 | File ref_pe_ta_pr2 12 | File ref_se_ta_pr1 13 | File ref_se_ta_pr2 14 | File ref_pe_seed_10_ta_pr1 15 | File ref_pe_seed_10_ta_pr2 16 | File ref_se_seed_10_ta_pr1 17 | File ref_se_seed_10_ta_pr2 18 | String docker 19 | } 20 | RuntimeEnvironment runtime_environment = { 21 | "docker": docker, 22 | "singularity": "", 23 | "conda": "" 24 | } 25 | Float spr_mem_factor = 0.0 26 | Float spr_disk_factor = 6.0 27 | 28 | call chip.spr as pe_spr { input : 29 | ta = pe_ta, 30 | paired_end = true, 31 | pseudoreplication_random_seed = 0, 32 | mem_factor = spr_mem_factor, 33 | disk_factor = spr_disk_factor, 34 | runtime_environment = runtime_environment, 35 | } 36 | call chip.spr as se_spr { input : 37 | ta = se_ta, 38 | paired_end = false, 39 | pseudoreplication_random_seed = 0, 40 | mem_factor = spr_mem_factor, 41 | disk_factor = spr_disk_factor, 42 | runtime_environment = runtime_environment, 43 | } 44 | call chip.spr as pe_spr_seed_10 { input : 45 | ta = pe_ta, 46 | paired_end = true, 47 | pseudoreplication_random_seed = 10, 48 | mem_factor = spr_mem_factor, 49 | disk_factor = spr_disk_factor, 50 | runtime_environment = runtime_environment, 51 | } 52 | call chip.spr as se_spr_seed_10 { input : 53 | ta = se_ta, 54 | paired_end = false, 55 | pseudoreplication_random_seed = 10, 56 | mem_factor = spr_mem_factor, 57 | disk_factor = spr_disk_factor, 58 | runtime_environment = runtime_environment, 59 | } 60 | 61 | call compare_md5sum.compare_md5sum { input : 62 | labels = [ 63 | 'pe_spr_pr1', 64 | 'pe_spr_pr2', 65 | 'se_spr_pr1', 66 | 'se_spr_pr2', 67 | 'pe_spr_seed_10_pr1', 68 | 'pe_spr_seed_10_pr2', 69 | 'se_spr_seed_10_pr1', 70 | 'se_spr_seed_10_pr2', 71 | ], 72 | files = [ 73 | pe_spr.ta_pr1, 74 | pe_spr.ta_pr2, 75 | se_spr.ta_pr1, 76 | se_spr.ta_pr2, 77 | pe_spr_seed_10.ta_pr1, 78 | pe_spr_seed_10.ta_pr2, 79 | se_spr_seed_10.ta_pr1, 80 | se_spr_seed_10.ta_pr2, 81 | ], 82 | ref_files = [ 83 | ref_pe_ta_pr1, 84 | ref_pe_ta_pr2, 85 | ref_se_ta_pr1, 86 | ref_se_ta_pr2, 87 | ref_pe_seed_10_ta_pr1, 88 | ref_pe_seed_10_ta_pr2, 89 | ref_se_seed_10_ta_pr1, 90 | ref_se_seed_10_ta_pr2, 91 | ], 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /dev/test/test_task/test_subsample_ctl.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_subsample_ctl.pe_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/tas/rep1/rep1-R1.subsampled.67.merged.nodup.tagAlign.gz", 3 | "test_subsample_ctl.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/pooled_ctl/ctl1.subsampled.25.merged.nodup.pooled.tagAlign.gz", 4 | "test_subsample_ctl.pe_subsample": 17000, 5 | "test_subsample_ctl.se_subsample": 48000, 6 | 7 | "test_subsample_ctl.ref_pe_ta_subsampled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_subsample_ctl/pe/rep1-R1.subsampled.67.merged.nodup.17K.tagAlign.gz", 8 | "test_subsample_ctl.ref_se_ta_subsampled" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_subsample_ctl/se/ctl1.subsampled.25.merged.nodup.pooled.48K.tagAlign.gz", 9 | "test_subsample_ctl.ref_pe_ta_subsampled_trivial" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_subsample_ctl/pe/rep1-R1.subsampled.67.merged.nodup.100M.tagAlign.gz", 10 | "test_subsample_ctl.ref_se_ta_subsampled_trivial" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_subsample_ctl/se/ctl1.subsampled.25.merged.nodup.pooled.100M.tagAlign.gz" 11 | } 12 | -------------------------------------------------------------------------------- /dev/test/test_task/test_subsample_ctl.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_subsample_ctl { 6 | input { 7 | File pe_ta 8 | File se_ta 9 | Int pe_subsample 10 | Int se_subsample 11 | File ref_pe_ta_subsampled 12 | File ref_se_ta_subsampled 13 | File ref_pe_ta_subsampled_trivial 14 | File ref_se_ta_subsampled_trivial 15 | String docker 16 | } 17 | RuntimeEnvironment runtime_environment = { 18 | "docker": docker, 19 | "singularity": "", 20 | "conda": "" 21 | } 22 | Float subsample_ctl_mem_factor = 0.0 23 | Float subsample_ctl_disk_factor = 7.5 24 | 25 | call chip.subsample_ctl as pe_subsample_ctl { input : 26 | ta = pe_ta, 27 | paired_end = true, 28 | subsample = pe_subsample, 29 | 30 | mem_factor = subsample_ctl_mem_factor, 31 | disk_factor = subsample_ctl_disk_factor, 32 | runtime_environment = runtime_environment, 33 | } 34 | call chip.subsample_ctl as se_subsample_ctl { input : 35 | ta = se_ta, 36 | paired_end = false, 37 | subsample = se_subsample, 38 | 39 | mem_factor = subsample_ctl_mem_factor, 40 | disk_factor = subsample_ctl_disk_factor, 41 | runtime_environment = runtime_environment, 42 | } 43 | # subsample > number of reads in TA 44 | # output will be just a shuffled TA. 45 | call chip.subsample_ctl as pe_subsample_ctl_trivial { input : 46 | ta = pe_ta, 47 | paired_end = true, 48 | subsample = 100000000, 49 | 50 | mem_factor = subsample_ctl_mem_factor, 51 | disk_factor = subsample_ctl_disk_factor, 52 | runtime_environment = runtime_environment, 53 | } 54 | call chip.subsample_ctl as se_subsample_ctl_trivial { input : 55 | ta = se_ta, 56 | paired_end = false, 57 | subsample = 100000000, 58 | 59 | mem_factor = subsample_ctl_mem_factor, 60 | disk_factor = subsample_ctl_disk_factor, 61 | runtime_environment = runtime_environment, 62 | } 63 | 64 | call compare_md5sum.compare_md5sum { input : 65 | labels = [ 66 | 'pe_subsample_ctl', 67 | 'se_subsample_ctl', 68 | 'pe_subsample_ctl_trivial', 69 | 'se_subsample_ctl_trivial', 70 | ], 71 | files = [ 72 | pe_subsample_ctl.ta_subsampled, 73 | se_subsample_ctl.ta_subsampled, 74 | pe_subsample_ctl_trivial.ta_subsampled, 75 | se_subsample_ctl_trivial.ta_subsampled, 76 | ], 77 | ref_files = [ 78 | ref_pe_ta_subsampled, 79 | ref_se_ta_subsampled, 80 | ref_pe_ta_subsampled_trivial, 81 | ref_se_ta_subsampled_trivial, 82 | ], 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /dev/test/test_task/test_trimmomatic.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_trimmomatic.pe_bowtie2_idx_tar" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bowtie2_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", 3 | "test_trimmomatic.se_bowtie2_idx_tar" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bowtie2_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", 4 | 5 | "test_trimmomatic.pe_fastqs_R1" : [ 6 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/rep1-R1.subsampled.67.fastq.gz" 7 | ], 8 | "test_trimmomatic.pe_fastqs_R2" : [ 9 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/rep1-R2.subsampled.67.fastq.gz" 10 | ], 11 | "test_trimmomatic.se_fastqs" : [ 12 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/fastqs/rep1/rep1.subsampled.25.fastq.gz" 13 | ], 14 | "test_trimmomatic.pe_crop_length" : 50, 15 | "test_trimmomatic.se_crop_length" : 30, 16 | 17 | "test_trimmomatic.ref_pe_cropped_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_trimmomatic/pe/rep1-R1.subsampled.67.merged.crop_50bp.samstats.qc", 18 | "test_trimmomatic.ref_se_cropped_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_trimmomatic/se/rep1.subsampled.25.merged.crop_30bp.samstats.qc", 19 | "test_trimmomatic.ref_pe_cropped_phred33_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_trimmomatic/pe/rep1-R1.subsampled.67.merged.crop_50bp.samstats.qc", 20 | "test_trimmomatic.ref_se_cropped_phred64_flagstat" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_trimmomatic/se/rep1.subsampled.25.merged.crop_30bp.samstats.qc" 21 | } 22 | -------------------------------------------------------------------------------- /dev/test/test_task/test_xcor.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_xcor.pe_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/pe/tas/rep1/rep1-R1.subsampled.67.merged.nodup.tagAlign.gz", 3 | "test_xcor.se_ta" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/input/se/tas/rep1/rep1.subsampled.25.merged.nodup.tagAlign.gz", 4 | 5 | "test_xcor.ref_pe_xcor_log" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_xcor/pe/rep1-R1.subsampled.67.merged.nodup.no_chrM.R1.15M.cc.qc", 6 | "test_xcor.ref_pe_xcor_log_subsample" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_xcor/pe/subsample/fix_PIP-917/rep1-R1.subsampled.67.merged.nodup.no_chrM.R1.17K.cc.qc", 7 | "test_xcor.ref_se_xcor_log" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_xcor/se/rep1.subsampled.25.merged.nodup.no_chrM.15M.cc.qc", 8 | "test_xcor.ref_se_xcor_log_subsample" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/chip-seq-pipeline-test-data/ref_output/test_xcor/se/subsample/rep1.subsampled.25.merged.nodup.no_chrM.17K.cc.qc", 9 | 10 | "test_xcor.xcor_subsample" : 17000 11 | } 12 | -------------------------------------------------------------------------------- /dev/test/test_task/test_xcor.wdl: -------------------------------------------------------------------------------- 1 | version 1.0 2 | import '../../../chip.wdl' as chip 3 | import 'compare_md5sum.wdl' as compare_md5sum 4 | 5 | workflow test_xcor { 6 | input { 7 | Int xcor_subsample 8 | Int xcor_subsample_default = 15000000 9 | 10 | String pe_ta 11 | String se_ta 12 | 13 | String ref_pe_xcor_log 14 | String ref_pe_xcor_log_subsample 15 | String ref_se_xcor_log 16 | String ref_se_xcor_log_subsample 17 | String mito_chr_name = 'chrM' 18 | String docker 19 | } 20 | RuntimeEnvironment runtime_environment = { 21 | "docker": docker, 22 | "singularity": "", 23 | "conda": "" 24 | } 25 | Int xcor_cpu = 1 26 | Float xcor_mem_factor = 0.0 27 | Int xcor_time_hr = 24 28 | Float xcor_disk_factor = 4.5 29 | 30 | call chip.xcor as pe_xcor { input : 31 | ta = pe_ta, 32 | subsample = xcor_subsample_default, 33 | paired_end = true, 34 | mito_chr_name = mito_chr_name, 35 | 36 | cpu = xcor_cpu, 37 | mem_factor = xcor_mem_factor, 38 | time_hr = xcor_time_hr, 39 | disk_factor = xcor_disk_factor, 40 | runtime_environment = runtime_environment, 41 | } 42 | call chip.xcor as pe_xcor_subsample { input : 43 | ta = pe_ta, 44 | subsample = xcor_subsample, 45 | paired_end = true, 46 | mito_chr_name = mito_chr_name, 47 | 48 | cpu = xcor_cpu, 49 | mem_factor = xcor_mem_factor, 50 | time_hr = xcor_time_hr, 51 | disk_factor = xcor_disk_factor, 52 | runtime_environment = runtime_environment, 53 | } 54 | call chip.xcor as se_xcor { input : 55 | ta = se_ta, 56 | subsample = xcor_subsample_default, 57 | paired_end = false, 58 | mito_chr_name = mito_chr_name, 59 | 60 | cpu = xcor_cpu, 61 | mem_factor = xcor_mem_factor, 62 | time_hr = xcor_time_hr, 63 | disk_factor = xcor_disk_factor, 64 | runtime_environment = runtime_environment, 65 | } 66 | call chip.xcor as se_xcor_subsample { input : 67 | ta = se_ta, 68 | subsample = xcor_subsample, 69 | paired_end = false, 70 | mito_chr_name = mito_chr_name, 71 | 72 | cpu = xcor_cpu, 73 | mem_factor = xcor_mem_factor, 74 | time_hr = xcor_time_hr, 75 | disk_factor = xcor_disk_factor, 76 | runtime_environment = runtime_environment, 77 | } 78 | 79 | call compare_md5sum.compare_md5sum { input : 80 | labels = [ 81 | 'pe_xcor', 82 | 'pe_xcor_subsample', 83 | 'se_xcor', 84 | 'se_xcor_subsample', 85 | ], 86 | files = [ 87 | pe_xcor.score, 88 | pe_xcor_subsample.score, 89 | se_xcor.score, 90 | se_xcor_subsample.score, 91 | ], 92 | ref_files = [ 93 | ref_pe_xcor_log, 94 | ref_pe_xcor_log_subsample, 95 | ref_se_xcor_log, 96 | ref_se_xcor_log_subsample, 97 | ], 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /dev/test/test_workflow/.gitignore: -------------------------------------------------------------------------------- 1 | *qc_json_diff.txt 2 | *qc_json_match.txt 3 | *.result.json 4 | *.result.qc.json 5 | *.status.json 6 | *.metadata.json 7 | *.submit.json 8 | *.test_chip_wf_opt.json 9 | cromwell*.jar 10 | tmp_secret_key.json 11 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI_subsampled/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.15.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (subsampled 1/25)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR000DYI_subsampled_chr19_only/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled_chr19_only_hist.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR000DYI_subsampled_chr19_only_hist/qc.json", 3 | "chip.pipeline_type" : "histone", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only, fake histone)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled_chr19_only_hist_unrep.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR000DYI_subsampled_chr19_only_hist_unrep/qc.json", 3 | "chip.pipeline_type" : "histone", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 8 | ], 9 | "chip.paired_end" : false, 10 | "chip.always_use_pooled_ctl" : true, 11 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only, fake histone, unrep)", 12 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 13 | } 14 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled_chr19_only_true_rep_only_pbam.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.7.0/ENCSR000DYI_subsampled_chr19_only_true_rep_only_pbam/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.redact_nodup_bam" : true, 16 | "chip.true_rep_only" : true, 17 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only, redacted BAM)", 18 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab (redacted BAM)" 19 | } 20 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled_chr19_only_unrep.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR000DYI_subsampled_chr19_only_unrep/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 8 | ], 9 | "chip.paired_end" : false, 10 | "chip.always_use_pooled_ctl" : true, 11 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only, unrep)", 12 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 13 | } 14 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR000DYI_subsampled_ctl_sub_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR000DYI_subsampled_ctl_sub_chr19_only/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.ctl_depth_limit": 46000, 15 | "chip.exp_ctl_depth_ratio_limit": 0.9, 16 | "chip.title" : "ENCSR000DYI (subsampled 1/25, ctl sub 46000 0.9 chr19_chrM only)", 17 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 18 | } 19 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR106GXJ.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : [ 6 | "gs://input-data-mirrored/input_fastqs/parathyroid_fastqs/ENCFF002AVN.fastq.gz", 7 | "gs://input-data-mirrored/input_fastqs/parathyroid_fastqs/ENCFF361EQE.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : [ 10 | "gs://input-data-mirrored/input_fastqs/parathyroid_fastqs/ENCFF407DUW.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.redact_nodup_bam" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR106GXJ", 16 | "chip.description" : ""ENCSR106GXJ" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR106GXJ_pbam.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : [ 6 | "gs://input-data-mirrored/input_fastqs/parathyroid_fastqs/ENCFF002AVN.fastq.gz", 7 | "gs://input-data-mirrored/input_fastqs/parathyroid_fastqs/ENCFF361EQE.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : [ 10 | "gs://input-data-mirrored/input_fastqs/parathyroid_fastqs/ENCFF407DUW.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.redact_nodup_bam" : true, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR106GXJ (with redacted BAM)", 16 | "chip.description" : ""ENCSR106GXJ (with redacted BAM)" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR203KEU.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "histone", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR203KEU/fastq/rep1/pair1/ENCFF950OXW.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR203KEU/fastq/rep1/pair2/ENCFF054HVM.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR203KEU/fastq/rep2/pair1/ENCFF666GAH.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR203KEU/fastq/rep2/pair2/ENCFF925HTP.fastq.gz" 12 | ], 13 | "chip.paired_end" : true, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR203KEU", 16 | "chip.description" : "ENCSR203KEU - ENCODE Histone ChIP-seq PE test sample" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR400WEK.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR400WEK/fastq/rep1/pair1/ENCFF251IZE.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR400WEK/fastq/rep1/pair2/ENCFF871SZT.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR400WEK/fastq/ctl1/pair1/ENCFF094LCI.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR400WEK/fastq/ctl1/pair2/ENCFF818HXE.fastq.gz" 12 | ], 13 | "chip.paired_end" : true, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR400WEK", 16 | "chip.description" : "ENCSR400WEK - ENCODE TF ChIP-seq PE unreplicated test sample" 17 | } 18 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR878KIY.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "histone", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : [ 6 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR878KIY/fastq/rep1/ENCFF846MLZ.fastq.gz", 7 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR878KIY/fastq/rep1/ENCFF153OYB.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : [ 10 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR878KIY/fastq/rep2/ENCFF536KEU.fastq.gz", 11 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR878KIY/fastq/rep2/ENCFF481VUI.fastq.gz", 12 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR878KIY/fastq/rep2/ENCFF270WDO.fastq.gz", 13 | "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR878KIY/fastq/rep2/ENCFF753JZK.fastq.gz" 14 | ], 15 | "chip.paired_end" : false, 16 | "chip.always_use_pooled_ctl" : true, 17 | "chip.title" : "ENCSR878KIY", 18 | "chip.description" : "ENCSR878KIY - Histone ChIP-seq SE test sample" 19 | } 20 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | "chip.align_cpu" : 8 26 | } 27 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK_subsampled/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (subsampled 1/50)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 25 | } 26 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR936XTK_subsampled_chr19_only/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19_chrM Only)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 25 | } 26 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled_chr19_only_control_mode.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR936XTK_subsampled_chr19_only_control_mode/qc.json", 3 | "chip.pipeline_type" : "control", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 12 | ], 13 | "chip.paired_end" : true, 14 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19_chrM Only, control only)", 15 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 16 | } 17 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled_chr19_only_hist.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR936XTK_subsampled_chr19_only_hist/qc.json", 3 | "chip.pipeline_type" : "histone", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19_chrM Only, fake histone)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 25 | } 26 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled_chr19_only_true_rep_only_pbam.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.7.0/ENCSR936XTK_subsampled_chr19_only_true_rep_only_pbam/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.redact_nodup_bam" : true, 24 | "chip.true_rep_only" : true, 25 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19_chrM Only, true rep only, redacted BAM)", 26 | "chip.description" : "ZNF143 ChIP-seq on human GM12878 (redacted BAM)" 27 | } 28 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled_ctl_sub_1ctl_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR936XTK_subsampled_ctl_sub_1ctl_chr19_only/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 16 | ], 17 | "chip.paired_end" : true, 18 | "chip.ctl_depth_limit": 25000, 19 | "chip.exp_ctl_depth_ratio_limit": 0.53, 20 | "chip.title" : "ENCSR936XTK (subsampled 1/50, ctl sub 25000 0.53, chr19_chrM Only)", 21 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 22 | } 23 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR936XTK_subsampled_ctl_sub_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.5.0/ENCSR936XTK_subsampled_ctl_sub_chr19_only/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.ctl_depth_limit": 46000, 23 | "chip.exp_ctl_depth_ratio_limit": 0.9, 24 | "chip.title" : "ENCSR936XTK (subsampled 1/50, ctl sub 46000 0.9, chr19_chrM Only)", 25 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 26 | } 27 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ENCSR970FPM.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "histone", 4 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR970FPM/fastq/rep1/ENCFF774JBP.fastq.gz" 6 | ], 7 | "chip.paired_end" : false, 8 | "chip.always_use_pooled_ctl" : true, 9 | "chip.title" : "ENCSR970FPM", 10 | "chip.description" : "ENCSR970FPM - Histone ChIP-seq SE unreplicated test sample. Only SE rep1 is taken. There is actuall a rep2 (PE)." 11 | } 12 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_bwa.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (bwa)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.aligner": "bwa", 19 | "chip.align_only": true 20 | } 21 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_m0_q255.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (m0, q255)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.align.multimapping": 0, 19 | "chip.align_R1.multimapping": 0, 20 | "chip.align_ctl.multimapping": 0, 21 | "chip.mapq_thresh": 255, 22 | "chip.align_only": true 23 | } 24 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_m0_q30.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (m0, q30)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.align.multimapping": 0, 19 | "chip.align_R1.multimapping": 0, 20 | "chip.align_ctl.multimapping": 0, 21 | "chip.mapq_thresh": 30, 22 | "chip.align_only": false, 23 | "chip.spp_cpu": 8 24 | } 25 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_m0_q40.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (m0, q40)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.align.multimapping": 0, 19 | "chip.align_R1.multimapping": 0, 20 | "chip.align_ctl.multimapping": 0, 21 | "chip.mapq_thresh": 40, 22 | "chip.align_only": true 23 | } 24 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_m4_q255.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (m4, q255)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.align.multimapping": 4, 19 | "chip.align_R1.multimapping": 4, 20 | "chip.align_ctl.multimapping": 4, 21 | "chip.mapq_thresh": 255, 22 | "chip.align_only": true 23 | } 24 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_m4_q30.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (m4, q30)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.align.multimapping": 4, 19 | "chip.align_R1.multimapping": 4, 20 | "chip.align_ctl.multimapping": 4, 21 | "chip.mapq_thresh": 30, 22 | "chip.align_only": true 23 | } 24 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR000DYI_m4_q40.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR000DYI/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 8 | ], 9 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 10 | ], 11 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 12 | ], 13 | "chip.paired_end" : false, 14 | "chip.always_use_pooled_ctl" : true, 15 | "chip.title" : "ENCSR000DYI (m4, q40)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab", 17 | 18 | "chip.align.multimapping": 4, 19 | "chip.align_R1.multimapping": 4, 20 | "chip.align_ctl.multimapping": 4, 21 | "chip.mapq_thresh": 40, 22 | "chip.align_only": true 23 | } 24 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_bwa.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (bwa)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.aligner": "bwa", 27 | "chip.align_only": true 28 | } 29 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_m0_q255.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (m0, q255)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.align.multimapping": 0, 27 | "chip.align_R1.multimapping": 0, 28 | "chip.align_ctl.multimapping": 0, 29 | "chip.mapq_thresh": 255, 30 | "chip.align_only": true 31 | } 32 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_m0_q30.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (m0, q30)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.align.multimapping": 0, 27 | "chip.align_R1.multimapping": 0, 28 | "chip.align_ctl.multimapping": 0, 29 | "chip.mapq_thresh": 30, 30 | "chip.align_only": false, 31 | "chip.spp_cpu": 8 32 | } 33 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_m0_q40.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (m0, q40)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.align.multimapping": 0, 27 | "chip.align_R1.multimapping": 0, 28 | "chip.align_ctl.multimapping": 0, 29 | "chip.mapq_thresh": 40, 30 | "chip.align_only": true 31 | } 32 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_m4_q255.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (m4, q255)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.align.multimapping": 4, 27 | "chip.align_R1.multimapping": 4, 28 | "chip.align_ctl.multimapping": 4, 29 | "chip.mapq_thresh": 255, 30 | "chip.align_only": true 31 | } 32 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_m4_q30.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (m4, q30)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.align.multimapping": 4, 27 | "chip.align_R1.multimapping": 4, 28 | "chip.align_ctl.multimapping": 4, 29 | "chip.mapq_thresh": 30, 30 | "chip.align_only": true 31 | } 32 | -------------------------------------------------------------------------------- /dev/test/test_workflow/benchmark/ENCSR936XTK_m4_q40.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output/v1.1.5/ENCSR936XTK/qc.json", 3 | "chip.pipeline_type" : "tf", 4 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v1/hg38_gcp.tsv", 5 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 6 | ], 7 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 8 | ], 9 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 12 | ], 13 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 14 | ], 15 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 18 | ], 19 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 20 | ], 21 | "chip.paired_end" : true, 22 | "chip.always_use_pooled_ctl" : true, 23 | "chip.title" : "ENCSR936XTK (m4, q40)", 24 | "chip.description" : "ZNF143 ChIP-seq on human GM12878", 25 | 26 | "chip.align.multimapping": 4, 27 | "chip.align_R1.multimapping": 4, 28 | "chip.align_ctl.multimapping": 4, 29 | "chip.mapq_thresh": 40, 30 | "chip.align_only": true 31 | } 32 | -------------------------------------------------------------------------------- /dev/test/test_workflow/ref_output/sync.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gsutil -m rsync -r -d . gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ref_output 4 | -------------------------------------------------------------------------------- /docs/build_genome_database.md: -------------------------------------------------------------------------------- 1 | ## How to download genome database 2 | 3 | 1. Choose `GENOME` from `hg19`, `hg38`, `mm9` and `mm10` and specify a destination directory. 4 | ```bash 5 | $ bash scripts/download_genome_data.sh [GENOME] [DESTINATION_DIR] 6 | ``` 7 | 2. Find a TSV file on the destination directory and use it for `"chip.genome_tsv"` in your input JSON. 8 | 9 | # How to build genome database 10 | 11 | 1. [Install Conda](https://conda.io/miniconda.html). 12 | 13 | 2. Install pipeline's Conda environment. 14 | ```bash 15 | $ bash scripts/uninstall_conda_env.sh # to remove any existing pipeline env 16 | $ bash scripts/install_conda_env.sh 17 | ``` 18 | 19 | 2. Choose `GENOME` from `hg19`, `hg38`, `mm9` and `mm10` and specify a destination directory. This will take several hours. We recommend not to run this installer on a login node of your cluster. It will take >8GB memory and >2h time. 20 | ```bash 21 | $ conda activate encd-chip 22 | $ bash scripts/build_genome_data.sh [GENOME] [DESTINATION_DIR] 23 | ``` 24 | 25 | 3. Find a TSV file on the destination directory and use it for `"chip.genome_tsv"` in your input JSON. 26 | 27 | 28 | ## How to build genome database for your own genome 29 | 30 | 1. You can build your own genome database if your reference genome has one of the following file types. 31 | * `.fasta.gz` 32 | * `.fa.gz` 33 | * `.fasta.bz2` 34 | * `.fa.gz2` 35 | * `.2bit` 36 | 37 | 2. Get a URL for your reference genome. You may need to upload it to somewhere on the internet. 38 | 39 | 3. Get a URL for a gzipped blacklist BED file for your genome. If you don't have one then skip this step. An example blacklist for hg38 is [here](https://www.encodeproject.org/files/ENCFF356LFX/@@download/ENCFF356LFX.bed.gz). 40 | 41 | 4. Find the following lines in `scripts/build_genome_data.sh` and modify them as follows. Give a good name `[YOUR_OWN_GENOME]` for your genome. For `MITO_CHR_NAME` use a correct mitochondrial chromosome name of your genome (e.g. `chrM` or `MT`). For `REGEX_BFILT_PEAK_CHR_NAME` Perl style regular expression must be used to keep regular chromosome names only in a blacklist filtered (`.bfilt.`) peaks files. This `.bfilt.` peak files are considered final peaks output of the pipeline and peaks BED files for genome browser tracks (`.bigBed` and `.hammock.gz`) are converted from these `.bfilt.` peaks files. Chromosome name filtering with `REGEX_BFILT_PEAK_CHR_NAME` will be done even without the blacklist itself. 42 | ```bash 43 | ... 44 | 45 | elif [[ $GENOME == "YOUR_OWN_GENOME" ]]; then 46 | # Perl style regular expression to keep regular chromosomes only. 47 | # this reg-ex will be applied to peaks after blacklist filtering (b-filt) with "grep -P". 48 | # so that b-filt peak file (.bfilt.*Peak.gz) will only have chromosomes matching with this pattern 49 | # this reg-ex will work even without a blacklist. 50 | # you will still be able to find a .bfilt. peak file 51 | REGEX_BFILT_PEAK_CHR_NAME="chr[\dXY]+" 52 | # mitochondrial chromosome name (e.g. chrM, MT) 53 | MITO_CHR_NAME="chrM" 54 | # URL for your reference FASTA (fasta, fasta.gz, fa, fa.gz, 2bit) 55 | REF_FA="https://some.where.com/your.genome.fa.gz" 56 | # 3-col blacklist BED file to filter out overlapping peaks from b-filt peak file (.bfilt.*Peak.gz file). 57 | # leave it empty if you don't have one 58 | BLACKLIST= 59 | ... 60 | ``` 61 | 62 | 5. Specify a destination directory for your genome database and run the installer. This will take several hours. 63 | ```bash 64 | $ bash scripts/build_genome_data.sh [YOUR_OWN_GENOME] [DESTINATION_DIR] 65 | ``` 66 | 67 | 6. Find a TSV file in the destination directory and use it for `"chip.genome_tsv"` in your input JSON. 68 | -------------------------------------------------------------------------------- /docs/how_to_config_sge.md: -------------------------------------------------------------------------------- 1 | # How to configure SGE for pipeline 2 | 3 | 1. List all parallel environments (PE) on your SGE. 4 | ```bash 5 | $ qconf -spl 6 | ``` 7 | 8 | 2. If you don't have one then ask your system admin to add a new one with name `shm`. 9 | ```bash 10 | $ sudo qconf -ap shm 11 | ``` 12 | 13 | 3. Give a large number to `slots` for your PE. 14 | ```bash 15 | $ sudo qconf -mp shm 16 | pe_name shm 17 | slots 999 18 | ... 19 | ``` 20 | 21 | 4. List all queues on your SGE. 22 | ```bash 23 | $ qconf -sql 24 | ``` 25 | 26 | 5. Ask your system admin to connect PE to your queue. 27 | ```bash 28 | $ sudo qconf -mq [QUEUE_NAME] 29 | ... 30 | pe_list make shm 31 | ... 32 | ``` 33 | -------------------------------------------------------------------------------- /docs/troubleshooting.md: -------------------------------------------------------------------------------- 1 | 2 | ## Trimmomatic errors 3 | 4 | ### `Error: Unable to detect quality encoding` 5 | 6 | Take a look at your FASTQs first. If there are multiple technical replicates for one biological replicate, then pipeline merge all technical replicates' FASTQs first and then crop the merged one with Trimmomatic. If you mix up FASTQs with different base encoding then you will see this error. 7 | 8 | Add `phred33` or `phred64` to your input JSON. It's `auto` by default. 9 | 10 | ```javascript 11 | { 12 | "chip.trimmomatic_phred_score_format": "phred33" 13 | } 14 | ``` 15 | 16 | ## Conda environment 17 | 18 | It takes long (>30 minutes) to resolve pipeline's Conda environment since the pipeline uses lots of dependencies including `bowtie2`, `samtools`, `bedtools` and math libraries like `numpy`. 19 | 20 | If it takes too long (>hours) then try with a different method other than Conda. Install docker and try with `--docker` for `caper run` or `caper submit`. You don't need to define docker/singularity images and their versions (image's tag) since they are already defined in pipeline' WDL and caper take it automatically. 21 | -------------------------------------------------------------------------------- /docs/tutorial_dx_cli.md: -------------------------------------------------------------------------------- 1 | # Tutorial for DNAnexus Platform (CLI) 2 | 3 | All test samples and genome data are shared on our public DNAnexus project. You don't have to download any data for testing our pipeline on DNAnexus platform. 4 | 5 | There are two methods to run our pipeline on DNAnexus. 6 | 7 | 1) Building your own DX workflow from `chip.wdl` with dxWDL (CLI) 8 | 2) [Using a pre-built DX workflow on our public DX project (Web UI)](tutorial_dx_web.md) 9 | 10 | This document describes instruction for the item 1). 11 | 12 | 1. Sign up for a [DNAnexus account](https://platform.DNAnexus.com/register). 13 | 14 | 2. Create a new [DX project](https://platform.DNAnexus.com/projects) with name `[YOUR_PROJECT_NAME]` by clicking on "+New Project" on the top left. 15 | 16 | 3. Download dxWDL. 17 | ```bash 18 | $ cd 19 | $ wget https://github.com/dnanexus/dxWDL/releases/download/v1.46.4/dxWDL-v1.46.4.jar 20 | $ chmod +rx dxWDL-v1.46.4.jar 21 | ``` 22 | 23 | 4. Git clone this pipeline. 24 | ```bash 25 | $ cd 26 | $ git clone https://github.com/ENCODE-DCC/chip-seq-pipeline2 27 | ``` 28 | 29 | 5. Move to pipeline's directory. 30 | ```bash 31 | $ cd chip-seq-pipeline2 32 | ``` 33 | 34 | 6. Choose an appropriate input for your project (AWS or Azure): 35 | * AWS 36 | ```bash 37 | $ INPUT=example_input_json/dx/ENCSR936XTK_subsampled_chr19_only_dx.json 38 | ``` 39 | * Azure 40 | ```bash 41 | $ INPUT=example_input_json/dx_azure/ENCSR936XTK_subsampled_chr19_only_dx_azure.json 42 | ``` 43 | 44 | 7. Make a WDL for DNAnexus use only. The original WDL will not work with inputs (e.g. BAMs, TAs) other than FASTQs. Then compile `chip.dx.wdl` with an input JSON for the SUBSAMPLED paired-end sample of [ENCSR936XTK](https://www.encodeproject.org/experiments/ENCSR936XTK/). 45 | ```bash 46 | $ cp chip.wdl chip.dx.wdl 47 | $ sed -i 's/Array\[File?\] bams = \[\]/Array\[File\] bams = \[\]/g' chip.dx.wdl 48 | $ sed -i 's/Array\[File?\] nodup_bams = \[\]/Array\[File\] nodup_bams = \[\]/g' chip.dx.wdl 49 | $ sed -i 's/Array\[File?\] tas = \[\]/Array\[File\] tas = \[\]/g' chip.dx.wdl 50 | $ sed -i 's/Array\[File?\] ctl_bams = \[\]/Array\[File\] ctl_bams = \[\]/g' chip.dx.wdl 51 | $ sed -i 's/Array\[File?\] ctl_nodup_bams = \[\]/Array\[File\] ctl_nodup_bams = \[\]/g' chip.dx.wdl 52 | $ sed -i 's/Array\[File?\] ctl_tas = \[\]/Array\[File\] ctl_tas = \[\]/g' chip.dx.wdl 53 | ``` 54 | 55 | ```bash 56 | $ WDL=chip.dx.wdl 57 | $ DXWDL=dxWDL-v1.46.4.jar 58 | $ PROJECT=[YOUR_PROJECT_NAME] 59 | $ OUT_FOLDER=/test_sample_chip_ENCSR936XTK_subsampled_chr19_only 60 | $ DOCKER=$(cat ${WDL} | grep caper_docker | awk 'BEGIN{FS="'\''"} {print $2}') 61 | 62 | $ java -jar ${DXWDL} compile ${WDL} -project ${PROJECT} -f -folder ${OUT_FOLDER} -defaults ${INPUT} -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") 63 | ``` 64 | 65 | 8. Go to DNAnexus [project page](https://platform.DNAnexus.com/projects) and click on your project. 66 | 67 | 9. Move to the directory `/test_sample_chip_ENCSR936XTK_subsampled_chr19_only`. 68 | 69 | 10. You will find a DX workflow `chip` with all parameters pre-defined. Click on it. 70 | 71 | 11. Specify an output directory by clicking "Workflow Actions" on the top right. Click on "Set output folder" and choose an output folder. 72 | 73 | 12. Click on "Run as Analysis..." and you will be automatically redirected to the "Monitor" tab. 74 | 75 | 13. It will take about 6 hours. You will be able to find all outputs on your output folder. Final QC report (`qc.html`)/JSON (`qc.json`) will be found on it. 76 | 77 | 14. See full specification for [input JSON file](input.md). 78 | -------------------------------------------------------------------------------- /docs/tutorial_dx_web.md: -------------------------------------------------------------------------------- 1 | # Tutorial for DNAnexus Platform (web) 2 | 3 | All test samples and genome data are shared on our public DNAnexus project. You don't have to download any data for testing our pipeline on DNAnexus platform. 4 | 5 | There are two methods to run our pipeline on DNAnexus. 6 | 7 | 1) [Building your own DX workflow from `chip.wdl` with dxWDL (CLI)](tutorial_dx_cli.md) 8 | 2) Using a pre-built DX workflow on our public DX project (Web UI) 9 | 10 | This document describes instruction for the item 2). 11 | 12 | 1. Sign up for a [DNAnexus account](https://platform.DNAnexus.com/register). 13 | 14 | 2. Create a new [DX project](https://platform.DNAnexus.com/projects) by clicking on "+New Project" on the top left. 15 | 16 | 3. Move to one of the following workflow directories according to the platform you have chosen for your project (AWS or Azure). These DX workflows are pre-built with all parameters defined. 17 | 18 | * [AWS test workflow](https://platform.DNAnexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows): Use `[LATEST_VER]/test_ENCSR936XTK_subsampled_chr19_only`. 19 | * [Azure test workflow](https://platform.DNAnexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows): Use `[LATEST_VER]/test_ENCSR936XTK_subsampled_chr19_only`. 20 | 21 | 4. Copy it to your project by right-clicking on the DX workflow `chip` and choose "Copy". 22 | 23 | 5. Choose your project and create a folder for the test run by clicking on the "Folder+" icon. 24 | 25 | 6. Click on "Copy into this folder" on the bottom left. 26 | 27 | 7. Move to the target folder and click on the DX workflow `chip`. 28 | 29 | 9. Specify an output directory by clicking "Workflow Actions" on the top right. Click on "Set output folder" and choose an output folder. 30 | 31 | 10. Click on "Run as Analysis..." and you will be automatically redirected to the "Monitor" tab. 32 | 33 | 11. It will take about 6 hours. You will be able to find all outputs on your output folder. Final QC report (`qc.html`)/JSON (`qc.json`) will be found on it. 34 | 35 | 11. See full specification for [input JSON file](input.md). 36 | 37 | 38 | ## Extras for advanced users 39 | 40 | 1. DNAnexus allows only one copy of a workflow per project. The example workflow in the previous section is pre-built for the subsampled test sample [ENCSR936XTK](https://www.encodeproject.org/experiments/ENCSR936XTK/) with all parameters defined already. 41 | 42 | 2. Choose your main platform (AWS or Azure). Move to [ENCODE ChIP-seq pipeline repository for AWS](https://platform.DNAnexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows) or [ENCODE ChIP-seq pipeline repository for Azure](https://platform.DNAnexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows). 43 | 44 | 3. Choose a folder with the latest available version. 45 | 46 | 4. Copy one of the following workflows according to the platform you have chosen for your project. 47 | > **IMPORTANT**: Make sure that you have chosen a correct platform (AWS or Azure) for your project. 48 | 49 | * general: General workflow without pre-defined reference genome. 50 | * hg38: Worfklow with pre-defined hg38 reference genome. 51 | * hg19: Worfklow with pre-defined hg19 reference genome. 52 | * mm10: Worfklow with pre-defined mm10 reference genome. 53 | * mm9: Worfklow with pre-defined mm9 reference genome. 54 | 55 | 5. Click on the DX workflow `chip`. 56 | 57 | 6. Specify your input files (FASTQs, BAMs, TAG-ALIGNs, ...) on the top left. For example, click on the item "fastqs_rep1_R1" and choose your R1 FASTQ file for replicate 1. See details [here](input.md) for other input types. 58 | 59 | 7. Choose a reference genome. See details [here](input.md). 60 | 61 | 8. Click on "Run as Analysis..." and you will be automatically redirected to the "Monitor" tab. 62 | -------------------------------------------------------------------------------- /example_input_json/ENCSR000DYI_subsampled_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.tsv", 4 | "chip.fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only)", 14 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 15 | } 16 | -------------------------------------------------------------------------------- /example_input_json/ENCSR936XTK_subsampled_chr19_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.tsv", 4 | "chip.fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19 and chrM Only)", 22 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 23 | } 24 | -------------------------------------------------------------------------------- /example_input_json/dx/ENCSR000DYI_dx.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv", 4 | "chip.fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 11 | ], 12 | 13 | "chip.paired_end" : false, 14 | 15 | "chip.title" : "ENCSR000DYI", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.dx.tsv", 4 | "chip.fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | 13 | "chip.paired_end" : false, 14 | 15 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19 and chrM only)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.dx.tsv", 4 | "chip.fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.ctl_fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 7 | ], 8 | 9 | "chip.paired_end" : false, 10 | 11 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19 and chrM only)", 12 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 13 | } 14 | -------------------------------------------------------------------------------- /example_input_json/dx/ENCSR936XTK_dx.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv", 4 | 5 | "chip.fastqs_rep1_R1" : [ 6 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 7 | ], 8 | "chip.fastqs_rep1_R2" : [ 9 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R1" : [ 12 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 13 | ], 14 | "chip.fastqs_rep2_R2" : [ 15 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep1_R1" : [ 18 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 19 | ], 20 | "chip.ctl_fastqs_rep1_R2" : [ 21 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 22 | ], 23 | "chip.ctl_fastqs_rep2_R1" : [ 24 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 25 | ], 26 | "chip.ctl_fastqs_rep2_R2" : [ 27 | "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 28 | ], 29 | 30 | "chip.paired_end" : true, 31 | 32 | "chip.title" : "ENCSR936XTK", 33 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 34 | } 35 | -------------------------------------------------------------------------------- /example_input_json/dx/template_general.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf" 3 | } 4 | -------------------------------------------------------------------------------- /example_input_json/dx/template_hg19.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v1/hg19_dx.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx/template_hg38.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx/template_mm10.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/mm10.dx.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx/template_mm9.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v1/mm9_dx.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/ENCSR000DYI_dx_azure.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv", 4 | "chip.fastqs_rep1_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep1.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/rep2.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl1.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq/ctl2.fastq.gz" 11 | ], 12 | 13 | "chip.paired_end" : false, 14 | 15 | "chip.title" : "ENCSR000DYI", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.dx_azure.tsv", 4 | "chip.fastqs_rep1_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.15.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | 13 | "chip.paired_end" : false, 14 | 15 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19/chrM only)", 16 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 17 | } 18 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/ENCSR936XTK_dx_azure.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv", 4 | 5 | "chip.fastqs_rep1_R1" : [ 6 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R1.fastq.gz" 7 | ], 8 | "chip.fastqs_rep1_R2" : [ 9 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep1-R2.fastq.gz" 10 | ], 11 | "chip.fastqs_rep2_R1" : [ 12 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R1.fastq.gz" 13 | ], 14 | "chip.fastqs_rep2_R2" : [ 15 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/rep2-R2.fastq.gz" 16 | ], 17 | "chip.ctl_fastqs_rep1_R1" : [ 18 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R1.fastq.gz" 19 | ], 20 | "chip.ctl_fastqs_rep1_R2" : [ 21 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl1-R2.fastq.gz" 22 | ], 23 | "chip.ctl_fastqs_rep2_R1" : [ 24 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R1.fastq.gz" 25 | ], 26 | "chip.ctl_fastqs_rep2_R2" : [ 27 | "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq/ctl2-R2.fastq.gz" 28 | ], 29 | 30 | "chip.paired_end" : true, 31 | 32 | "chip.title" : "ENCSR936XTK", 33 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 34 | } 35 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/template_general.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf" 3 | } 4 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/template_hg19.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v1/hg19_dx_azure.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/template_hg38.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/template_mm10.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/mm10.dx_azure.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/dx_azure/template_mm9.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v1/mm9_dx_azure.tsv" 4 | } 5 | -------------------------------------------------------------------------------- /example_input_json/template.full.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.title" : "Example (paired-end)", 3 | "chip.description" : "This is an template input JSON for paired-end sample.", 4 | 5 | "chip.pipeline_type" : "tf", 6 | "chip.aligner" : "bowtie2", 7 | "chip.align_only" : false, 8 | "chip.true_rep_only" : false, 9 | 10 | "chip.genome_tsv" : "/path_to_genome_data/hg38/hg38.tsv", 11 | 12 | "chip.paired_end" : true, 13 | "chip.ctl_paired_end" : true, 14 | 15 | "chip.fastqs_rep1_R1" : [ "rep1_R1_L1.fastq.gz", "rep1_R1_L2.fastq.gz", "rep1_R1_L3.fastq.gz" ], 16 | "chip.fastqs_rep1_R2" : [ "rep1_R2_L1.fastq.gz", "rep1_R2_L2.fastq.gz", "rep1_R2_L3.fastq.gz" ], 17 | "chip.fastqs_rep2_R1" : [ "rep2_R1_L1.fastq.gz", "rep2_R1_L2.fastq.gz" ], 18 | "chip.fastqs_rep2_R2" : [ "rep2_R2_L1.fastq.gz", "rep2_R2_L2.fastq.gz" ], 19 | 20 | "chip.ctl_fastqs_rep1_R1" : [ "ctl1_R1.fastq.gz" ], 21 | "chip.ctl_fastqs_rep1_R2" : [ "ctl1_R2.fastq.gz" ], 22 | "chip.ctl_fastqs_rep2_R1" : [ "ctl2_R1.fastq.gz" ], 23 | "chip.ctl_fastqs_rep2_R2" : [ "ctl2_R2.fastq.gz" ], 24 | 25 | "chip.use_bwa_mem_for_pe" : false, 26 | "chip.bwa_mem_read_len_limit" : 70, 27 | "chip.use_bowtie2_local_mode" : false, 28 | 29 | "chip.crop_length" : 0, 30 | 31 | "chip.mapq_thresh" : 30, 32 | "chip.dup_marker" : "picard", 33 | "chip.no_dup_removal" : false, 34 | 35 | "chip.subsample_reads" : 0, 36 | "chip.ctl_subsample_reads" : 0, 37 | "chip.xcor_subsample_reads" : 15000000, 38 | "chip.pseudoreplication_random_seed" : 0, 39 | 40 | "chip.xcor_trim_bp" : 50, 41 | "chip.use_filt_pe_ta_for_xcor" : false, 42 | 43 | "chip.always_use_pooled_ctl" : true, 44 | "chip.ctl_depth_ratio" : 1.2, 45 | 46 | "chip.peak_caller" : null, 47 | "chip.cap_num_peak_macs2" : 500000, 48 | "chip.pval_thresh" : 0.01, 49 | "chip.fdr_thresh" : 0.01, 50 | "chip.idr_thresh" : 0.05, 51 | "chip.cap_num_peak_spp" : 300000, 52 | 53 | "chip.enable_jsd" : true, 54 | "chip.enable_gc_bias" : true, 55 | "chip.enable_count_signal_track" : false, 56 | 57 | "chip.filter_chrs" : [], 58 | 59 | "chip.align_cpu" : 6, 60 | "chip.align_bowtie2_mem_factor" : 0.15, 61 | "chip.align_bwa_mem_factor" : 1.0, 62 | "chip.align_time_hr" : 48, 63 | "chip.align_bowtie2_disk_factor" : 8.0, 64 | "chip.align_bwa_disk_factor" : 8.0, 65 | 66 | "chip.filter_cpu" : 4, 67 | "chip.filter_mem_factor" : 0.4, 68 | "chip.filter_time_hr" : 24, 69 | "chip.filter_disk_factor" : 8.0, 70 | 71 | "chip.bam2ta_cpu" : 2, 72 | "chip.bam2ta_mem_factor" : 0.35, 73 | "chip.bam2ta_time_hr" : 6, 74 | "chip.bam2ta_disk_factor" : 4.0, 75 | 76 | "chip.spr_mem_factor" : 13.5, 77 | "chip.spr_disk_factor" : 18.0, 78 | 79 | "chip.jsd_cpu" : 4, 80 | "chip.jsd_mem_factor" : 0.1, 81 | "chip.jsd_time_hr" : 6, 82 | "chip.jsd_disk_factor" : 2.0, 83 | 84 | "chip.xcor_cpu" : 2, 85 | "chip.xcor_mem_factor" : 1.0, 86 | "chip.xcor_time_hr" : 24, 87 | "chip.xcor_disk_factor" : 4.5, 88 | 89 | "chip.subsample_ctl_mem_factor" : 14.0, 90 | "chip.subsample_ctl_disk_factor" : 15.0, 91 | 92 | "chip.call_peak_cpu" : 6, 93 | "chip.call_peak_spp_mem_factor" : 5.0, 94 | "chip.call_peak_macs2_mem_factor" : 5.0, 95 | "chip.call_peak_time_hr" : 72, 96 | "chip.call_peak_spp_disk_factor" : 5.0, 97 | "chip.call_peak_macs2_disk_factor" : 30.0, 98 | 99 | "chip.macs2_signal_track_mem_factor" : 12.0, 100 | "chip.macs2_signal_track_time_hr" : 24, 101 | "chip.macs2_signal_track_disk_factor" : 80.0 102 | } 103 | -------------------------------------------------------------------------------- /example_input_json/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.title" : "Example (paired-end)", 3 | "chip.description" : "This is an template input JSON for paired-end sample.", 4 | 5 | "chip.pipeline_type" : "tf", 6 | "chip.aligner" : "bowtie2", 7 | "chip.align_only" : false, 8 | "chip.true_rep_only" : false, 9 | 10 | "chip.genome_tsv" : "/path_to_genome_data/hg38/hg38.tsv", 11 | 12 | "chip.paired_end" : true, 13 | "chip.ctl_paired_end" : true, 14 | 15 | "chip.always_use_pooled_ctl" : true, 16 | 17 | "chip.fastqs_rep1_R1" : [ "rep1_R1_L1.fastq.gz", "rep1_R1_L2.fastq.gz", "rep1_R1_L3.fastq.gz" ], 18 | "chip.fastqs_rep1_R2" : [ "rep1_R2_L1.fastq.gz", "rep1_R2_L2.fastq.gz", "rep1_R2_L3.fastq.gz" ], 19 | "chip.fastqs_rep2_R1" : [ "rep2_R1_L1.fastq.gz", "rep2_R1_L2.fastq.gz" ], 20 | "chip.fastqs_rep2_R2" : [ "rep2_R2_L1.fastq.gz", "rep2_R2_L2.fastq.gz" ], 21 | 22 | "chip.ctl_fastqs_rep1_R1" : [ "ctl1_R1.fastq.gz" ], 23 | "chip.ctl_fastqs_rep1_R2" : [ "ctl1_R2.fastq.gz" ], 24 | "chip.ctl_fastqs_rep2_R1" : [ "ctl2_R1.fastq.gz" ], 25 | "chip.ctl_fastqs_rep2_R2" : [ "ctl2_R2.fastq.gz" ] 26 | } 27 | -------------------------------------------------------------------------------- /example_input_json/terra/ENCSR000DYI_subsampled_chr19_only.terra.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.terra.tsv", 4 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz" 5 | ], 6 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz" 7 | ], 8 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz" 9 | ], 10 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl2.subsampled.25.fastq.gz" 11 | ], 12 | "chip.paired_end" : false, 13 | "chip.title" : "ENCSR000DYI (subsampled 1/25, chr19_chrM only)", 14 | "chip.description" : "CEBPB ChIP-seq on human A549 produced by the Snyder lab" 15 | } 16 | -------------------------------------------------------------------------------- /example_input_json/terra/ENCSR936XTK_subsampled_chr19_only.terra.json: -------------------------------------------------------------------------------- 1 | { 2 | "chip.pipeline_type" : "tf", 3 | "chip.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.terra.tsv", 4 | "chip.fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz" 5 | ], 6 | "chip.fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz" 7 | ], 8 | "chip.fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz" 9 | ], 10 | "chip.fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz" 11 | ], 12 | "chip.ctl_fastqs_rep1_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz" 13 | ], 14 | "chip.ctl_fastqs_rep1_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz" 15 | ], 16 | "chip.ctl_fastqs_rep2_R1" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz" 17 | ], 18 | "chip.ctl_fastqs_rep2_R2" : ["gs://encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz" 19 | ], 20 | "chip.paired_end" : true, 21 | "chip.title" : "ENCSR936XTK (subsampled 1/50, chr19 and chrM Only)", 22 | "chip.description" : "ZNF143 ChIP-seq on human GM12878" 23 | } 24 | -------------------------------------------------------------------------------- /scripts/requirements.macs2.txt: -------------------------------------------------------------------------------- 1 | # Conda environment for tasks (macs2, macs2_signal_track) in atac/chip 2 | 3 | nomkl # using MKL can change MACS2 output randomly on different platforms 4 | python >=3 5 | 6 | macs2 ==2.2.4 7 | bedtools ==2.29.0 8 | bedops ==2.4.39 9 | pybedtools ==0.8.0 10 | pybigwig ==0.3.13 11 | tabix 12 | 13 | matplotlib 14 | ghostscript 15 | 16 | -------------------------------------------------------------------------------- /scripts/requirements.spp.txt: -------------------------------------------------------------------------------- 1 | # Conda environment for tasks (spp, xcor) in atac/chip 2 | # some packages (phantompeakquals, r-spp) will be installed separately 3 | # couldn't resolve all conda conflicts 4 | 5 | python >=3 6 | bedtools ==2.29.0 7 | bedops ==2.4.39 8 | 9 | r-base ==3.6.1 10 | 11 | tabix 12 | 13 | matplotlib 14 | pandas 15 | numpy 16 | ghostscript 17 | 18 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | # default Conda environment for atac/chip 2 | 3 | python >=3 4 | bwa ==0.7.17 5 | bowtie2 ==2.3.4.3 6 | tbb ==2020.2 # use old version to fix libtbb.so.2 error for bowtie2 7 | samtools ==1.9 8 | htslib ==1.9 9 | bedtools ==2.29.0 10 | sambamba ==0.6.6 11 | 12 | pysam ==0.15.3 13 | pybedtools ==0.8.0 14 | pybigwig ==0.3.13 15 | 16 | deeptools ==3.3.1 17 | cutadapt ==2.5 18 | preseq ==2.0.3 19 | pyfaidx ==0.5.5.2 20 | bedops ==2.4.39 21 | 22 | ptools_bin 23 | 24 | jsondiff ==1.1.1 25 | ghostscript 26 | tabix 27 | matplotlib 28 | numpy 29 | scikit-learn 30 | scipy 31 | pandas 32 | jinja2 33 | gsl 34 | 35 | samstats ==0.2.1 36 | idr ==2.0.4.2 37 | 38 | java-jdk 39 | 40 | picard ==2.20.7 41 | trimmomatic ==0.39 42 | 43 | -------------------------------------------------------------------------------- /scripts/uninstall_conda_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PIPELINE_CONDA_ENVS=( 4 | encd-chip 5 | encd-chip-macs2 6 | encd-chip-spp 7 | ) 8 | for PIPELINE_CONDA_ENV in "${PIPELINE_CONDA_ENVS[@]}" 9 | do 10 | conda env remove -n ${PIPELINE_CONDA_ENV} -y 11 | done 12 | -------------------------------------------------------------------------------- /scripts/update_conda_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Stop on error 3 | 4 | SH_SCRIPT_DIR=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd) 5 | SRC_DIR=${SH_SCRIPT_DIR}/../src 6 | 7 | PIPELINE_CONDA_ENVS=( 8 | encd-chip 9 | encd-chip-macs2 10 | encd-chip-spp 11 | ) 12 | chmod u+rx ${SRC_DIR}/*.py 13 | 14 | echo "$(date): Updating WDL task wrappers on each Conda environment..." 15 | for PIPELINE_CONDA_ENV in "${PIPELINE_CONDA_ENVS[@]}" 16 | do 17 | CONDA_BIN=$(dirname $(conda run -n ${PIPELINE_CONDA_ENV} which python)) 18 | echo -e "$(date): Transferring WDL task wrappers to ${CONDA_BIN}..." 19 | cp -f ${SRC_DIR}/*.py ${CONDA_BIN}/ 20 | done 21 | -------------------------------------------------------------------------------- /src/assign_multimappers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # piped script to take multimappers and randomly assign 4 | # requires a qname sorted file!! 5 | 6 | import sys 7 | import random 8 | import argparse 9 | 10 | 11 | def parse_args(): 12 | ''' 13 | Gives options 14 | ''' 15 | parser = argparse.ArgumentParser( 16 | description='Saves reads below a alignment threshold and discards all others') 17 | parser.add_argument('-k', help='Alignment number cutoff') 18 | parser.add_argument('--paired-end', dest='paired_ended', 19 | action='store_true', help='Data is paired-end') 20 | args = parser.parse_args() 21 | alignment_cutoff = int(args.k) 22 | paired_ended = args.paired_ended 23 | 24 | return alignment_cutoff, paired_ended 25 | 26 | 27 | if __name__ == "__main__": 28 | ''' 29 | Runs the filtering step of choosing multimapped reads 30 | ''' 31 | 32 | [alignment_cutoff, paired_ended] = parse_args() 33 | 34 | if paired_ended: 35 | alignment_cutoff = int(alignment_cutoff) * 2 36 | 37 | # Store each line in sam file as a list of reads, 38 | # where each read is a list of elements to easily 39 | # modify or grab things 40 | current_reads = [] 41 | current_qname = '' 42 | 43 | for line in sys.stdin: 44 | 45 | read_elems = line.strip().split('\t') 46 | 47 | if read_elems[0].startswith('@'): 48 | sys.stdout.write(line) 49 | continue 50 | 51 | # Keep taking lines that have the same qname 52 | if read_elems[0] == current_qname: 53 | # Add line to current reads 54 | current_reads.append(line) 55 | pass 56 | else: 57 | # Discard if there are more than the alignment cutoff 58 | if len(current_reads) > alignment_cutoff: 59 | current_reads = [line] 60 | current_qname = read_elems[0] 61 | elif len(current_reads) > 0: 62 | # Just output all reads, which are then filtered with 63 | # samtools 64 | for read in current_reads: 65 | sys.stdout.write(str(read)) 66 | 67 | # And then discard 68 | current_reads = [line] 69 | current_qname = read_elems[0] 70 | else: 71 | # First read in file 72 | current_reads.append(line) 73 | current_qname = read_elems[0] 74 | -------------------------------------------------------------------------------- /src/detect_adapter.py: -------------------------------------------------------------------------------- 1 | # written by Nathan Boley, from https://github.com/nboley/GGR_code 2 | 3 | import sys 4 | import gzip 5 | 6 | VERBOSE = False 7 | 8 | adapters = { 9 | 'Illumina': b'AGATCGGAAGAGC', 10 | 'Nextera ': b'CTGTCTCTTATA', 11 | 'smallRNA': b'TGGAATTCTCGG' 12 | } 13 | 14 | 15 | def open_gz(fname): 16 | return gzip.open(fname) if fname.endswith('.gz') else open(fname, 'rb') 17 | 18 | 19 | def detect_adapters_and_cnts(fname, max_n_lines=1000000): 20 | adapter_cnts = { 21 | 'Illumina': 0, 22 | 'Nextera ': 0, 23 | 'smallRNA': 0 24 | } 25 | 26 | with open_gz(fname) as fp: 27 | # read the first million sequences or to the end of the while -- whichever 28 | # comes first, and then use the adapter for trimming which was found to 29 | # occur most often 30 | for seq_index, line in enumerate(fp): 31 | if seq_index >= max_n_lines: 32 | break 33 | if seq_index % 4 != 1: 34 | continue 35 | for key in adapters: 36 | if line.find(adapters[key]) > -1: 37 | adapter_cnts[key] += 1 38 | 39 | observed_adapters = [ 40 | adapter for adapter, cnt in sorted( 41 | adapter_cnts.items(), key=lambda x: -x[1]) 42 | if cnt > 0 43 | ] 44 | return observed_adapters, adapter_cnts, seq_index//4 45 | 46 | 47 | def detect_most_likely_adapter(fname): 48 | observed_adapters, adapter_cnts, n_obs_adapters = detect_adapters_and_cnts( 49 | fname) 50 | if observed_adapters: 51 | best_adapter = observed_adapters[0] 52 | else: 53 | best_adapter = "" 54 | 55 | if VERBOSE: 56 | print("\n\nAUTO-DETECTING ADAPTER TYPE\n===========================") 57 | print("Attempting to auto-detect adapter type from the first 1 million sequences of the first file (>> {} <<)\n".format( 58 | fname) 59 | ) 60 | print("Found perfect matches for the following adapter sequences:") 61 | print("Adapter type\tCount\tSequence\tSequences analysed\tPercentage") 62 | for adapter in observed_adapters: 63 | print("{}\t{}\t{}\t{}\t\t\t{:.2%}".format( 64 | adapter, 65 | adapter_cnts[adapter], 66 | adapters[adapter].decode(), 67 | n_obs_adapters, 68 | adapter_cnts[adapter]/n_obs_adapters) 69 | ) 70 | if best_adapter: 71 | return adapters[best_adapter].decode() 72 | else: 73 | return "" 74 | 75 | 76 | def main(): 77 | global VERBOSE 78 | VERBOSE = False 79 | best_adapter = detect_most_likely_adapter(sys.argv[1]) 80 | print(best_adapter) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /src/dev_check_sync_atac.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | diff ../../atac-seq-pipeline/src/encode_task_bam2ta.py encode_task_bam2ta.py 4 | diff ../../atac-seq-pipeline/src/encode_lib_blacklist_filter.py encode_lib_blacklist_filter.py 5 | diff ../../atac-seq-pipeline/src/encode_lib_genomic.py encode_lib_genomic.py 6 | diff ../../atac-seq-pipeline/src/encode_lib_log_parser.py encode_lib_log_parser.py 7 | diff ../../atac-seq-pipeline/src/encode_lib_common.py encode_lib_common.py 8 | diff ../../atac-seq-pipeline/src/encode_task_bowtie2.py encode_task_bowtie2.py 9 | diff ../../atac-seq-pipeline/src/encode_task_filter.py encode_task_filter.py 10 | diff ../../atac-seq-pipeline/src/encode_task_post_align.py encode_task_post_align.py 11 | diff ../../atac-seq-pipeline/src/encode_lib_frip.py encode_lib_frip.py 12 | diff ../../atac-seq-pipeline/src/encode_task_idr.py encode_task_idr.py 13 | diff ../../atac-seq-pipeline/src/encode_task_overlap.py encode_task_overlap.py 14 | diff ../../atac-seq-pipeline/src/encode_task_pool_ta.py encode_task_pool_ta.py 15 | diff ../../atac-seq-pipeline/src/encode_task_qc_report.py encode_task_qc_report.py 16 | diff ../../atac-seq-pipeline/src/encode_task_reproducibility.py encode_task_reproducibility.py 17 | diff ../../atac-seq-pipeline/src/encode_task_spr.py encode_task_spr.py 18 | diff ../../atac-seq-pipeline/src/encode_task_xcor.py encode_task_xcor.py 19 | diff ../../atac-seq-pipeline/src/encode_task_jsd.py encode_task_jsd.py 20 | diff ../../atac-seq-pipeline/src/encode_task_gc_bias.py encode_task_gc_bias.py 21 | 22 | -------------------------------------------------------------------------------- /src/encode_lib_frip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ENCODE DCC FRiP wrapper 4 | # Author: Jin Lee (leepc12@gmail.com) 5 | 6 | import sys 7 | import os 8 | import argparse 9 | from encode_lib_common import ( 10 | get_num_lines, gunzip, log, ls_l, mkdir_p, rm_f, 11 | run_shell_cmd, strip_ext, write_txt) 12 | 13 | 14 | def parse_arguments(): 15 | parser = argparse.ArgumentParser(prog='ENCODE DCC FRiP.', 16 | description='') 17 | parser.add_argument('peak', type=str, 18 | help='Peak file.') 19 | parser.add_argument('ta', type=str, 20 | help='TAGALIGN file.') 21 | parser.add_argument('--chrsz', type=str, 22 | help='2-col chromosome sizes file. \ 23 | If given, do shifted FRiP (for ChIP-Seq).') 24 | parser.add_argument('--fraglen', type=int, default=0, 25 | help='Fragment length for TAGALIGN file. \ 26 | If given, do shifted FRiP (for ChIP-Seq).') 27 | parser.add_argument('--out-dir', default='', type=str, 28 | help='Output directory.') 29 | parser.add_argument('--log-level', default='INFO', 30 | choices=['NOTSET', 'DEBUG', 'INFO', 31 | 'WARNING', 'CRITICAL', 'ERROR', 32 | 'CRITICAL'], 33 | help='Log level') 34 | args = parser.parse_args() 35 | log.setLevel(args.log_level) 36 | log.info(sys.argv) 37 | return args 38 | 39 | 40 | def frip(ta, peak, out_dir): 41 | prefix = os.path.join(out_dir, 42 | os.path.basename(strip_ext(peak))) 43 | frip_qc = '{}.frip.qc'.format(prefix) 44 | 45 | if get_num_lines(peak) == 0: 46 | val1 = 0.0 47 | tmp_files = [] 48 | else: 49 | # due to bedtools bug when .gz is given for -a and -b 50 | tmp1 = gunzip(ta, 'tmp1', out_dir) 51 | tmp2 = gunzip(peak, 'tmp2', out_dir) 52 | 53 | cmd = 'bedtools intersect -nonamecheck -a {} -b {} -wa -u | wc -l' 54 | cmd = cmd.format( 55 | tmp1, # ta 56 | tmp2) # peak 57 | val1 = run_shell_cmd(cmd) 58 | tmp_files = [tmp1, tmp2] 59 | val2 = get_num_lines(ta) 60 | write_txt(frip_qc, str(float(val1)/float(val2))) 61 | rm_f(tmp_files) 62 | return frip_qc 63 | 64 | 65 | def frip_shifted(ta, peak, chrsz, fraglen, out_dir): 66 | prefix = os.path.join(out_dir, 67 | os.path.basename(strip_ext(peak))) 68 | frip_qc = '{}.frip.qc'.format(prefix) 69 | half_fraglen = (fraglen+1)/2 70 | 71 | if get_num_lines(peak) == 0: 72 | val1 = 0.0 73 | else: 74 | # due to bedtools bug when .gz is given for -a and -b 75 | tmp2 = gunzip(peak, 'tmp2', out_dir) 76 | 77 | cmd = 'bedtools slop -i {} -g {} ' 78 | cmd += '-s -l {} -r {} | ' 79 | cmd += 'awk \'{{if ($2>=0 && $3>=0 && $2<=$3) print $0}}\' | ' 80 | cmd += 'bedtools intersect -nonamecheck -a stdin -b {} ' 81 | cmd += '-wa -u | wc -l' 82 | cmd = cmd.format( 83 | ta, 84 | chrsz, 85 | -half_fraglen, 86 | half_fraglen, 87 | tmp2) # peak 88 | val1 = run_shell_cmd(cmd) 89 | rm_f(tmp2) 90 | val2 = get_num_lines(ta) 91 | write_txt(frip_qc, str(float(val1)/float(val2))) 92 | return frip_qc 93 | 94 | 95 | def main(): 96 | # read params 97 | args = parse_arguments() 98 | log.info('Initializing and making output directory...') 99 | mkdir_p(args.out_dir) 100 | 101 | if args.fraglen: 102 | frip_shifted(args.ta, args.peak, 103 | args.chrsz, args.fraglen, args.out_dir) 104 | else: 105 | frip(args.ta, args.peak, args.out_dir) 106 | 107 | log.info('List all files in output directory...') 108 | ls_l(args.out_dir) 109 | 110 | log.info('All done.') 111 | 112 | 113 | if __name__ == '__main__': 114 | main() 115 | -------------------------------------------------------------------------------- /src/encode_task_bam_to_pbam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author: Jin Lee (leepc12@gmail.com) 4 | 5 | import sys 6 | import os 7 | import argparse 8 | from encode_lib_common import ( 9 | log, 10 | ls_l, 11 | mkdir_p, 12 | rm_f, 13 | ) 14 | from encode_lib_genomic import ( 15 | bam_to_pbam, 16 | ) 17 | 18 | 19 | def parse_arguments(): 20 | parser = argparse.ArgumentParser(prog='ENCODE bam to pbam', 21 | description='') 22 | parser.add_argument('bam', type=str, 23 | help='Path for BAM.') 24 | parser.add_argument('--ref-fa', type=str, 25 | help='Path for reference fasta.') 26 | parser.add_argument('--delete-original-bam', action='store_true', 27 | help='Delete original BAM after conversion.') 28 | parser.add_argument('--out-dir', default='', type=str, 29 | help='Output directory.') 30 | parser.add_argument('--log-level', default='INFO', 31 | choices=['NOTSET', 'DEBUG', 'INFO', 32 | 'WARNING', 'CRITICAL', 'ERROR', 33 | 'CRITICAL'], 34 | help='Log level') 35 | args = parser.parse_args() 36 | 37 | log.setLevel(args.log_level) 38 | log.info(sys.argv) 39 | return args 40 | 41 | 42 | def main(): 43 | # read params 44 | args = parse_arguments() 45 | 46 | log.info('Initializing and making output directory...') 47 | mkdir_p(args.out_dir) 48 | 49 | # generate read length file 50 | log.info('Converting BAM into pBAM...') 51 | bam_to_pbam(args.bam, args.ref_fa, args.out_dir) 52 | 53 | if args.delete_original_bam: 54 | log.info('Deleting original BAM...') 55 | rm_f(args.bam) 56 | 57 | log.info('List all files in output directory...') 58 | ls_l(args.out_dir) 59 | 60 | log.info('All done.') 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /src/encode_task_count_signal_track.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ENCODE DCC Count signal track generation 4 | # Author: Jin Lee (leepc12@gmail.com) 5 | 6 | import sys 7 | import os 8 | import argparse 9 | from encode_lib_common import ( 10 | log, ls_l, mkdir_p, rm_f, run_shell_cmd, strip_ext_ta, 11 | get_gnu_sort_param, 12 | ) 13 | 14 | 15 | def parse_arguments(): 16 | parser = argparse.ArgumentParser( 17 | prog='ENCODE DCC Count signal track generation') 18 | parser.add_argument('ta', type=str, 19 | help='Path for TAGALIGN file.') 20 | parser.add_argument('--chrsz', type=str, 21 | help='2-col chromosome sizes file.') 22 | parser.add_argument('--mem-gb', type=float, default=4.0, 23 | help='Max. memory for this job in GB. ' 24 | 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 25 | 'It should be total memory for this task (not memory per thread).') 26 | parser.add_argument('--out-dir', default='', type=str, 27 | help='Output directory.') 28 | parser.add_argument('--log-level', default='INFO', 29 | choices=['NOTSET', 'DEBUG', 'INFO', 30 | 'WARNING', 'CRITICAL', 'ERROR', 31 | 'CRITICAL'], 32 | help='Log level') 33 | args = parser.parse_args() 34 | log.setLevel(args.log_level) 35 | log.info(sys.argv) 36 | return args 37 | 38 | 39 | def count_signal_track(ta, chrsz, mem_gb, out_dir): 40 | prefix = os.path.join(out_dir, os.path.basename(strip_ext_ta(ta))) 41 | pos_bw = '{}.positive.bigwig'.format(prefix) 42 | neg_bw = '{}.negative.bigwig'.format(prefix) 43 | # temporary files 44 | pos_bedgraph = '{}.positive.bedgraph'.format(prefix) 45 | neg_bedgraph = '{}.negative.bedgraph'.format(prefix) 46 | 47 | temp_files = [] 48 | 49 | run_shell_cmd( 50 | 'zcat -f {ta} | sort -k1,1 -k2,2n {sort_param} | ' 51 | 'bedtools genomecov -5 -bg -strand + -g {chrsz} -i stdin > {pos_bedgraph}'.format( 52 | ta=ta, 53 | sort_param=get_gnu_sort_param(mem_gb * 1024 ** 3, ratio=0.5), 54 | chrsz=chrsz, 55 | pos_bedgraph=pos_bedgraph, 56 | ) 57 | ) 58 | 59 | run_shell_cmd( 60 | 'zcat -f {ta} | sort -k1,1 -k2,2n {sort_param} | ' 61 | 'bedtools genomecov -5 -bg -strand - -g {chrsz} -i stdin > {neg_bedgraph}'.format( 62 | ta=ta, 63 | sort_param=get_gnu_sort_param(mem_gb * 1024 ** 3, ratio=0.5), 64 | chrsz=chrsz, 65 | neg_bedgraph=neg_bedgraph, 66 | ) 67 | ) 68 | 69 | run_shell_cmd( 70 | 'bedGraphToBigWig {pos_bedgraph} {chrsz} {pos_bw}'.format( 71 | pos_bedgraph=pos_bedgraph, 72 | chrsz=chrsz, 73 | pos_bw=pos_bw, 74 | ) 75 | ) 76 | 77 | run_shell_cmd( 78 | 'bedGraphToBigWig {neg_bedgraph} {chrsz} {neg_bw}'.format( 79 | neg_bedgraph=neg_bedgraph, 80 | chrsz=chrsz, 81 | neg_bw=neg_bw, 82 | ) 83 | ) 84 | 85 | # remove temporary files 86 | temp_files.append(pos_bedgraph) 87 | temp_files.append(neg_bedgraph) 88 | rm_f(temp_files) 89 | 90 | return pos_bw, neg_bw 91 | 92 | 93 | def main(): 94 | # read params 95 | args = parse_arguments() 96 | 97 | log.info('Initializing and making output directory...') 98 | mkdir_p(args.out_dir) 99 | 100 | log.info('Generating count signal tracks...') 101 | pos_bw, neg_bw = count_signal_track( 102 | args.ta, 103 | args.chrsz, 104 | args.mem_gb, 105 | args.out_dir 106 | ) 107 | 108 | log.info('List all files in output directory...') 109 | ls_l(args.out_dir) 110 | 111 | log.info('All done.') 112 | 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /src/encode_task_frac_mito.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ENCODE frac mito 4 | # Author: Jin Lee (leepc12@gmail.com) 5 | 6 | import sys 7 | import os 8 | import argparse 9 | from encode_lib_common import ( 10 | log, ls_l, mkdir_p, strip_ext) 11 | from encode_lib_log_parser import parse_flagstat_qc 12 | 13 | 14 | def parse_arguments(): 15 | parser = argparse.ArgumentParser( 16 | prog='ENCODE frac mito', 17 | description='Calculates fraction of mito reads') 18 | parser.add_argument('non_mito_samstat', type=str, 19 | help='Path for SAMstats log file') 20 | parser.add_argument('mito_samstat', type=str, 21 | help='Path for SAMstats log file (mito only)') 22 | parser.add_argument('--out-dir', default='', type=str, 23 | help='Output directory.') 24 | parser.add_argument('--log-level', default='INFO', 25 | choices=['NOTSET', 'DEBUG', 'INFO', 26 | 'WARNING', 'CRITICAL', 'ERROR', 27 | 'CRITICAL'], 28 | help='Log level') 29 | args = parser.parse_args() 30 | 31 | log.setLevel(args.log_level) 32 | log.info(sys.argv) 33 | return args 34 | 35 | 36 | def frac_mito(non_mito_samstat, mito_samstat, out_dir): 37 | prefix = os.path.join( 38 | out_dir, 39 | os.path.basename(strip_ext(non_mito_samstat, 40 | 'non_mito.samstats.qc'))) 41 | frac_mito_qc = '{}.frac_mito.qc'.format(prefix) 42 | 43 | non_mito_samstat_dict = parse_flagstat_qc(non_mito_samstat) 44 | mito_samstat_dict = parse_flagstat_qc(mito_samstat) 45 | 46 | if 'mapped' in non_mito_samstat_dict: 47 | # backward compatibility (old key name was 'total') 48 | key_mapped = 'mapped' 49 | elif 'mapped_reads' in non_mito_samstat_dict: 50 | key_mapped = 'mapped_reads' 51 | Rn = non_mito_samstat_dict[key_mapped] 52 | 53 | if 'mapped' in mito_samstat_dict: 54 | # backward compatibility (old key name was 'total') 55 | key_mapped = 'mapped' 56 | elif 'mapped_reads' in mito_samstat_dict: 57 | key_mapped = 'mapped_reads' 58 | Rm = mito_samstat_dict[key_mapped] 59 | 60 | frac = float(Rm)/float(Rn + Rm) 61 | with open(frac_mito_qc, 'w') as fp: 62 | fp.write('non_mito_reads\t{}\n'.format(Rn)) 63 | fp.write('mito_reads\t{}\n'.format(Rm)) 64 | fp.write('frac_mito_reads\t{}\n'.format(frac)) 65 | 66 | return frac_mito_qc 67 | 68 | 69 | def main(): 70 | # read params 71 | args = parse_arguments() 72 | log.info('Initializing and making output directory...') 73 | mkdir_p(args.out_dir) 74 | 75 | frac_mito(args.non_mito_samstat, 76 | args.mito_samstat, 77 | args.out_dir) 78 | 79 | log.info('List all files in output directory...') 80 | ls_l(args.out_dir) 81 | 82 | log.info('All done.') 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /src/encode_task_merge_fastq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ENCODE DCC fastq merger wrapper 4 | # Author: Jin Lee (leepc12@gmail.com) 5 | 6 | import sys 7 | import os 8 | import argparse 9 | from encode_lib_common import ( 10 | log, ls_l, mkdir_p, read_tsv, run_shell_cmd, 11 | strip_ext_fastq) 12 | 13 | 14 | def parse_arguments(debug=False): 15 | parser = argparse.ArgumentParser(prog='ENCODE DCC fastq merger.', 16 | description='') 17 | parser.add_argument( 18 | 'fastqs', nargs='+', type=str, 19 | help='TSV file path or list of FASTQs. ' 20 | 'FASTQs must be compressed with gzip (with .gz). ' 21 | 'Use TSV for multiple fastqs to be merged later. ' 22 | 'row=merge_id, col=end_id).') 23 | parser.add_argument('--paired-end', action="store_true", 24 | help='Paired-end FASTQs.') 25 | parser.add_argument('--nth', type=int, default=1, 26 | help='Number of threads to parallelize.') 27 | parser.add_argument('--out-dir', default='', type=str, 28 | help='Output directory.') 29 | parser.add_argument('--log-level', default='INFO', 30 | choices=['NOTSET', 'DEBUG', 'INFO', 31 | 'WARNING', 'CRITICAL', 'ERROR', 32 | 'CRITICAL'], 33 | help='Log level') 34 | args = parser.parse_args() 35 | 36 | # parse fastqs command line 37 | if args.fastqs[0].endswith('.gz') or args.fastqs[0].endswith('.fastq') or \ 38 | args.fastqs[0].endswith('.fq'): # it's fastq 39 | args.fastqs = [[f] for f in args.fastqs] # make it a matrix 40 | else: # it's TSV 41 | args.fastqs = read_tsv(args.fastqs[0]) 42 | 43 | for i, fastqs in enumerate(args.fastqs): 44 | if args.paired_end and len(fastqs) != 2: 45 | raise argparse.ArgumentTypeError( 46 | 'Need 2 fastqs per replicate for paired end.') 47 | if not args.paired_end and len(fastqs) != 1: 48 | raise argparse.ArgumentTypeError( 49 | 'Need 1 fastq per replicate for single end.') 50 | 51 | log.setLevel(args.log_level) 52 | log.info(sys.argv) 53 | return args 54 | 55 | 56 | def merge_fastqs(fastqs, end, out_dir): 57 | """make merged fastqs on $out_dir/R1, $out_dir/R2 58 | """ 59 | out_dir = os.path.join(out_dir, end) 60 | mkdir_p(out_dir) 61 | prefix = os.path.join(out_dir, 62 | os.path.basename(strip_ext_fastq(fastqs[0]))) 63 | 64 | if len(fastqs) > 1: 65 | merged = '{}.merged.fastq.gz'.format(prefix) 66 | else: 67 | merged = '{}.fastq.gz'.format(prefix) 68 | 69 | cmd = 'zcat -f {} | gzip -nc > {}'.format( 70 | ' '.join(fastqs), 71 | merged) 72 | run_shell_cmd(cmd) 73 | return merged 74 | 75 | 76 | def main(): 77 | # read params 78 | args = parse_arguments() 79 | 80 | log.info('Initializing and making output directory...') 81 | mkdir_p(args.out_dir) 82 | 83 | # update array with trimmed fastqs 84 | fastqs_R1 = [] 85 | fastqs_R2 = [] 86 | for fastqs in args.fastqs: 87 | fastqs_R1.append(fastqs[0]) 88 | if args.paired_end: 89 | fastqs_R2.append(fastqs[1]) 90 | 91 | log.info('Merging fastqs...') 92 | log.info('R1 to be merged: {}'.format(fastqs_R1)) 93 | merged_R1 = merge_fastqs(fastqs_R1, 'R1', args.out_dir) 94 | if args.paired_end: 95 | log.info('R2 to be merged: {}'.format(fastqs_R2)) 96 | merged_R2 = merge_fastqs(fastqs_R2, 'R2', args.out_dir) 97 | 98 | log.info('List all files in output directory...') 99 | ls_l(args.out_dir) 100 | 101 | log.info('All done.') 102 | 103 | 104 | if __name__ == '__main__': 105 | main() 106 | -------------------------------------------------------------------------------- /src/encode_task_pool_ta.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ENCODE DCC TAGALIGN pooler wrapper 4 | # Author: Jin Lee (leepc12@gmail.com) 5 | 6 | import sys 7 | import os 8 | import argparse 9 | from encode_lib_common import ( 10 | log, ls_l, make_hard_link, mkdir_p, run_shell_cmd, strip_ext_ta) 11 | 12 | 13 | def parse_arguments(): 14 | parser = argparse.ArgumentParser(prog='ENCODE DCC TAGALIGN pooler.', 15 | description='') 16 | parser.add_argument('tas', nargs='+', type=str, 17 | help='List of TAGALIGNs to be pooled.') 18 | parser.add_argument('--prefix', type=str, 19 | help='Basename prefix.') 20 | parser.add_argument('--out-dir', default='', type=str, 21 | help='Output directory.') 22 | parser.add_argument('--col', 23 | help='Number of columns to keep in a pooled TAGALIGN. ' 24 | 'Keep all columns if not defined.') 25 | parser.add_argument('--log-level', default='INFO', 26 | choices=['NOTSET', 'DEBUG', 'INFO', 27 | 'WARNING', 'CRITICAL', 'ERROR', 28 | 'CRITICAL'], 29 | help='Log level') 30 | args = parser.parse_args() 31 | 32 | log.setLevel(args.log_level) 33 | log.info(sys.argv) 34 | return args 35 | 36 | 37 | def pool_ta(tas, col, basename_prefix, out_dir): 38 | if len(tas) > 1: 39 | if basename_prefix is not None: 40 | prefix = os.path.join(out_dir, basename_prefix) 41 | else: 42 | prefix = os.path.join(out_dir, 43 | os.path.basename(strip_ext_ta(tas[0]))) 44 | pooled_ta = '{}.pooled.tagAlign.gz'.format(prefix) 45 | 46 | cmd = 'zcat -f {} | ' 47 | if col is not None: 48 | cmd += 'cut -f 1-{} | '.format(col) 49 | cmd += 'gzip -nc > {}' 50 | cmd = cmd.format( 51 | ' '.join(tas), 52 | pooled_ta) 53 | run_shell_cmd(cmd) 54 | return pooled_ta 55 | else: 56 | raise ValueError('Needs at least two TAs (or BEDs) to be pooled.') 57 | 58 | 59 | def main(): 60 | # read params 61 | args = parse_arguments() 62 | 63 | log.info('Initializing and making output directory...') 64 | mkdir_p(args.out_dir) 65 | 66 | log.info('Pooling TAGALIGNs...') 67 | pool_ta(args.tas, args.col, args.prefix, args.out_dir) 68 | 69 | log.info('List all files in output directory...') 70 | ls_l(args.out_dir) 71 | 72 | log.info('All done.') 73 | 74 | 75 | if __name__ == '__main__': 76 | main() 77 | -------------------------------------------------------------------------------- /src/encode_task_post_align.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author: Jin Lee (leepc12@gmail.com) 4 | 5 | import sys 6 | import os 7 | import argparse 8 | from encode_lib_common import ( 9 | mkdir_p, log, ls_l, rm_f, strip_ext_fastq) 10 | from encode_lib_genomic import ( 11 | get_read_length, remove_chrs_from_bam, samstat, samtools_index) 12 | 13 | 14 | def parse_arguments(): 15 | parser = argparse.ArgumentParser(prog='ENCODE post align', 16 | description='') 17 | parser.add_argument('fastq', type=str, 18 | help='Path for FASTQ R1') 19 | parser.add_argument('bam', type=str, 20 | help='Path for BAM') 21 | parser.add_argument( 22 | '--chrsz', type=str, 23 | help='2-col chromosome sizes file. If not given then ' 24 | 'SAMstats on mito-free BAM will not be calcaulted.') 25 | parser.add_argument('--mito-chr-name', default='chrM', 26 | help='Mito chromosome name.') 27 | parser.add_argument('--nth', type=int, default=1, 28 | help='Number of threads to parallelize.') 29 | parser.add_argument('--mem-gb', type=float, 30 | help='Max. memory for samtools sort in GB. ' 31 | 'It should be total memory for this task (not memory per thread).') 32 | parser.add_argument('--out-dir', default='', type=str, 33 | help='Output directory.') 34 | parser.add_argument('--log-level', default='INFO', 35 | choices=['NOTSET', 'DEBUG', 'INFO', 36 | 'WARNING', 'CRITICAL', 'ERROR', 37 | 'CRITICAL'], 38 | help='Log level') 39 | args = parser.parse_args() 40 | 41 | log.setLevel(args.log_level) 42 | log.info(sys.argv) 43 | return args 44 | 45 | 46 | def make_read_length_file(fastq, out_dir): 47 | basename = os.path.basename(strip_ext_fastq(fastq)) 48 | prefix = os.path.join(out_dir, basename) 49 | txt = '{}.read_length.txt'.format(prefix) 50 | read_length = get_read_length(fastq) 51 | with open(txt, 'w') as fp: 52 | fp.write(str(read_length)) 53 | return txt 54 | 55 | 56 | def main(): 57 | # read params 58 | args = parse_arguments() 59 | 60 | log.info('Initializing and making output directory...') 61 | mkdir_p(args.out_dir) 62 | 63 | # generate read length file 64 | log.info('Generating read length file...') 65 | make_read_length_file( 66 | args.fastq, args.out_dir) 67 | 68 | log.info('Running samtools index...') 69 | samtools_index(args.bam, args.nth, args.out_dir) 70 | 71 | log.info('SAMstats on raw BAM...') 72 | samstat(args.bam, args.nth, args.mem_gb, args.out_dir) 73 | 74 | if args.chrsz: 75 | log.info('SAMstats on non-mito BAM...') 76 | non_mito_out_dir = os.path.join(args.out_dir, 'non_mito') 77 | mkdir_p(non_mito_out_dir) 78 | non_mito_bam = remove_chrs_from_bam(args.bam, [args.mito_chr_name], 79 | args.chrsz, 80 | args.nth, 81 | non_mito_out_dir) 82 | samstat(non_mito_bam, args.nth, args.mem_gb, non_mito_out_dir) 83 | rm_f(non_mito_bam) 84 | 85 | log.info('List all files in output directory...') 86 | ls_l(args.out_dir) 87 | 88 | log.info('All done.') 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /src/encode_task_post_call_peak_atac.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author: Jin Lee (leepc12@gmail.com) 4 | 5 | import sys 6 | import argparse 7 | from encode_lib_common import ( 8 | assert_file_not_empty, 9 | log, 10 | ls_l, 11 | mkdir_p, 12 | ) 13 | from encode_lib_genomic import ( 14 | peak_to_bigbed, 15 | peak_to_hammock, 16 | get_region_size_metrics, 17 | get_num_peaks, 18 | peak_to_starch, 19 | ) 20 | from encode_lib_blacklist_filter import blacklist_filter 21 | from encode_lib_frip import frip 22 | 23 | 24 | def parse_arguments(): 25 | parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (atac)', 26 | description='') 27 | parser.add_argument( 28 | 'peak', type=str, 29 | help='Path for PEAK file. Peak filename should be "*.*Peak.gz". ' 30 | 'e.g. rep1.narrowPeak.gz') 31 | parser.add_argument('--ta', type=str, 32 | help='TAG-ALIGN file.') 33 | parser.add_argument('--peak-type', type=str, required=True, 34 | choices=['narrowPeak', 'regionPeak', 35 | 'broadPeak', 'gappedPeak'], 36 | help='Peak file type.') 37 | parser.add_argument('--chrsz', type=str, 38 | help='2-col chromosome sizes file.') 39 | parser.add_argument('--blacklist', type=str, 40 | help='Blacklist BED file.') 41 | parser.add_argument('--regex-bfilt-peak-chr-name', 42 | help='Keep chromosomes matching this pattern only ' 43 | 'in .bfilt. peak files.') 44 | parser.add_argument('--mem-gb', type=float, default=4.0, 45 | help='Max. memory for this job in GB. ' 46 | 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 47 | 'It should be total memory for this task (not memory per thread).') 48 | parser.add_argument('--out-dir', default='', type=str, 49 | help='Output directory.') 50 | parser.add_argument('--log-level', default='INFO', 51 | choices=['NOTSET', 'DEBUG', 'INFO', 52 | 'WARNING', 'CRITICAL', 'ERROR', 53 | 'CRITICAL'], 54 | help='Log level') 55 | args = parser.parse_args() 56 | if args.blacklist is None or args.blacklist.endswith('null'): 57 | args.blacklist = '' 58 | 59 | log.setLevel(args.log_level) 60 | log.info(sys.argv) 61 | return args 62 | 63 | 64 | def main(): 65 | # read params 66 | args = parse_arguments() 67 | 68 | log.info('Initializing and making output directory...') 69 | mkdir_p(args.out_dir) 70 | 71 | log.info('Blacklist-filtering peaks...') 72 | bfilt_peak = blacklist_filter( 73 | args.peak, args.blacklist, args.regex_bfilt_peak_chr_name, args.out_dir) 74 | 75 | log.info('Checking if output is empty...') 76 | assert_file_not_empty(bfilt_peak) 77 | 78 | log.info('Converting peak to bigbed...') 79 | peak_to_bigbed(bfilt_peak, args.peak_type, args.chrsz, 80 | args.mem_gb, args.out_dir) 81 | 82 | log.info('Converting peak to starch...') 83 | peak_to_starch(bfilt_peak, args.out_dir) 84 | 85 | log.info('Converting peak to hammock...') 86 | peak_to_hammock(bfilt_peak, args.mem_gb, args.out_dir) 87 | 88 | log.info('FRiP without fragment length...') 89 | frip(args.ta, bfilt_peak, args.out_dir) 90 | 91 | log.info('Calculating (blacklist-filtered) peak region size QC/plot...') 92 | get_region_size_metrics(bfilt_peak) 93 | 94 | log.info('Calculating number of peaks (blacklist-filtered)...') 95 | get_num_peaks(bfilt_peak) 96 | 97 | log.info('List all files in output directory...') 98 | ls_l(args.out_dir) 99 | 100 | log.info('All done.') 101 | 102 | 103 | if __name__ == '__main__': 104 | main() 105 | -------------------------------------------------------------------------------- /src/encode_task_subsample_ctl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import argparse 5 | from encode_lib_common import ( 6 | assert_file_not_empty, get_num_lines, log, ls_l, mkdir_p, rm_f, 7 | run_shell_cmd, strip_ext_ta) 8 | from encode_lib_genomic import ( 9 | subsample_ta_pe, subsample_ta_se) 10 | 11 | def parse_arguments(): 12 | parser = argparse.ArgumentParser( 13 | prog='ENCODE DCC control TAG-ALIGN subsampler.' 14 | 'This script does not check if number of reads in TA is higher than ' 15 | 'subsampling number (--subsample). ' 16 | 'If number of reads in TA is lower than subsampling number then ' 17 | 'TA will be just shuffled.') 18 | parser.add_argument('ta', type=str, 19 | help='Path for control TAGALIGN file.') 20 | parser.add_argument('--paired-end', action="store_true", 21 | help='Paired-end TAGALIGN.') 22 | parser.add_argument('--subsample', default=0, type=int, 23 | help='Number of reads to subsample.') 24 | parser.add_argument('--out-dir', default='', type=str, 25 | help='Output directory.') 26 | parser.add_argument('--log-level', default='INFO', 27 | choices=['NOTSET', 'DEBUG', 'INFO', 28 | 'WARNING', 'CRITICAL', 'ERROR', 29 | 'CRITICAL'], 30 | help='Log level') 31 | args = parser.parse_args() 32 | if not args.subsample: 33 | raise ValueError('--subsample should be a positive integer.') 34 | 35 | log.setLevel(args.log_level) 36 | log.info(sys.argv) 37 | return args 38 | 39 | 40 | def main(): 41 | # read params 42 | args = parse_arguments() 43 | log.info('Initializing and making output directory...') 44 | mkdir_p(args.out_dir) 45 | 46 | if args.paired_end: 47 | subsampled_ta = subsample_ta_pe( 48 | args.ta, args.subsample, 49 | non_mito=False, mito_chr_name=None, r1_only=False, 50 | out_dir=args.out_dir) 51 | else: 52 | subsampled_ta = subsample_ta_se( 53 | args.ta, args.subsample, 54 | non_mito=False, mito_chr_name=None, 55 | out_dir=args.out_dir) 56 | log.info('Checking if output is empty...') 57 | assert_file_not_empty(subsampled_ta) 58 | 59 | log.info('List all files in output directory...') 60 | ls_l(args.out_dir) 61 | 62 | log.info('All done.') 63 | 64 | 65 | if __name__ == '__main__': 66 | main() 67 | -------------------------------------------------------------------------------- /src/encode_task_trim_fastq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ENCODE DCC fastq merger wrapper 4 | # Author: Jin Lee (leepc12@gmail.com) 5 | 6 | import sys 7 | import os 8 | import argparse 9 | from encode_lib_common import ( 10 | assert_file_not_empty, copy_f_to_f, log, ls_l, mkdir_p, 11 | run_shell_cmd, strip_ext_fastq) 12 | 13 | 14 | def parse_arguments(debug=False): 15 | parser = argparse.ArgumentParser( 16 | prog='ENCODE DCC fastq merger.') 17 | parser.add_argument('fastq', type=str, 18 | help='FASTQ to be trimmed.') 19 | parser.add_argument('--trim-bp', type=int, default=50, 20 | help='Number of basepair after trimming.') 21 | parser.add_argument('--out-dir', default='', type=str, 22 | help='Output directory.') 23 | parser.add_argument('--log-level', default='INFO', 24 | choices=['NOTSET', 'DEBUG', 'INFO', 25 | 'WARNING', 'CRITICAL', 'ERROR', 26 | 'CRITICAL'], 27 | help='Log level') 28 | args = parser.parse_args() 29 | 30 | log.setLevel(args.log_level) 31 | log.info(sys.argv) 32 | return args 33 | 34 | 35 | def trim_fastq(fastq, trim_bp, out_dir): 36 | prefix = os.path.join(out_dir, 37 | os.path.basename(strip_ext_fastq(fastq))) 38 | trimmed = '{}.trim_{}bp.fastq.gz'.format(prefix, trim_bp) 39 | 40 | cmd = 'python $(which trimfastq.py) {} {} | gzip -nc > {}'.format( 41 | fastq, trim_bp, trimmed) 42 | run_shell_cmd(cmd) 43 | 44 | # if shorter than trim_bp 45 | cmd2 = 'zcat -f {} | (grep \'sequences shorter than desired length\' ' 46 | cmd2 += '|| true) | wc -l' 47 | cmd2 = cmd2.format( 48 | trimmed) 49 | if int(run_shell_cmd(cmd2)) > 0: 50 | copy_f_to_f(fastq, trimmed) 51 | 52 | return trimmed 53 | 54 | 55 | def main(): 56 | # read params 57 | args = parse_arguments() 58 | 59 | log.info('Initializing and making output directory...') 60 | mkdir_p(args.out_dir) 61 | 62 | log.info('Trimming fastqs ({} bp)...'.format(args.trim_bp)) 63 | trimmed = trim_fastq(args.fastq, args.trim_bp, args.out_dir) 64 | assert_file_not_empty(trimmed) 65 | 66 | log.info('List all files in output directory...') 67 | ls_l(args.out_dir) 68 | 69 | log.info('All done.') 70 | 71 | 72 | if __name__ == '__main__': 73 | main() 74 | --------------------------------------------------------------------------------