├── LICENSE ├── README.md ├── README.processing_description.md ├── TODO ├── docker ├── 1_make_docker.sh ├── 2_docker_push.sh ├── Dockerfile ├── Dockerfile.mgi ├── README.md └── vimrc ├── src ├── README.logs ├── evaluate_cases.sh ├── evaluate_tumorsamples.sh ├── execute_workflow.sh ├── gene_segment_overlap.py ├── get_unique.sh ├── make_analysis_description.sh ├── make_analysis_description_bytumorsample.sh ├── make_case_list.sh ├── make_case_list_bytumorsample.sh ├── make_dockermap.sh ├── make_dockermap_bytumorsample.sh ├── prep_gene_annotation.sh ├── prep_mappability.sh ├── process_cases.sh ├── process_tumorsamples.sh ├── push_git.sh ├── run_annotation.sh ├── run_norm.sh ├── run_norm_20210407.sh ├── run_segmentation.sh ├── start_docker.sh └── test_bc.sh └── testing ├── README.md ├── README.project_config ├── direct_call ├── README.md ├── execute_pipeline.C3L-chr.MGI │ ├── 0_start_docker.sh │ ├── 1_execute_workflow.sh │ ├── README.md │ └── project_config.execute_workflow.C3L-chr.MGI.sh ├── execute_pipeline.C3L-chr.katmai │ ├── 0_start_docker.sh │ ├── 1_execute_workflow.sh │ ├── README.md │ └── project_config.execute_workflow.C3L-chr.katmai.sh ├── prep_mappability.demo.katmai │ ├── 1_test_mappability.sh │ └── project_config.demo.sh ├── run_sample.C3L-chr.MGI │ ├── 0_start_docker.sh │ ├── 1_get_unique_reads.sh │ ├── 2_run_norm.sh │ ├── 3_run_segmentation.sh │ ├── 4_run_gene_annotation.sh │ ├── README.md │ ├── a_prep_gene_annotation.sh │ ├── project_config.run_sample.C3L-chr.MGI.sh │ └── run_all.sh └── run_sample.C3L-chr.katmai │ ├── 0_start_docker.sh │ ├── 1_get_unique_reads.sh │ ├── 2_run_norm.sh │ ├── 3_run_segmentation.sh │ ├── 4_run_gene_annotation.sh │ ├── README.md │ ├── a_prep_gene_annotation.sh │ ├── project_config.run_sample.C3L-chr.katmai.sh │ └── run_all.sh ├── docker_call ├── execute_pipeline.C3L-chr.MGI │ ├── 1_execute_workflow.sh │ ├── README.md │ └── project_config.execute_workflow.C3L-chr.MGI.sh ├── execute_pipeline.C3L-chr.katmai │ ├── 1_execute_workflow.sh │ ├── README.md │ └── project_config.execute_workflow.C3L-chr.katmai.sh ├── execute_pipeline.LUAD-test.MGI │ ├── 1_execute_workflow.sh │ ├── README.md │ └── project_config.execute_workflow.C3L-chr.MGI.sh ├── run_cases.CCRCC-select.MGI │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── README.md │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ └── case_names.dat │ └── project_config.run_cases.LUAD.MGI.sh ├── run_cases.GBM-subset.katmai │ ├── .gitignore │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── README.md │ ├── project_config-host.sh │ └── project_config.sh ├── run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── C3L-00987 │ ├── C3L-01237 │ ├── C3L-03378 │ ├── C3N-01752 │ ├── C3N-01754 │ ├── C3N-01756 │ ├── C3N-01758 │ ├── C3N-01943 │ ├── C3N-03042 │ ├── C3N-03180 │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── 0408.evaluate.v1.txt │ │ ├── 0412.evaluate.v1.txt │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── case_names.dat │ │ ├── start_docker_to_get_filesize.compute1.sh │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.LSCC-subset.katmai │ ├── .gitignore │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── project_config-host.sh │ └── project_config.sh ├── run_cases.LUAD.MGI │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── README.md │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ └── case_names.dat │ └── project_config.run_cases.LUAD.MGI.sh ├── run_cases.PDA.Y2.b2-noWXS │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Dockermap_backup.dat │ │ ├── Y2.b1.analysis_description.dat │ │ └── case_names.dat │ ├── project_config-host.sh │ ├── project_config.sh │ └── src │ │ ├── make_analysis_description.sh │ │ ├── make_analysis_description2.sh │ │ └── make_case_list.sh ├── run_cases.PanCan58.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── PanCan58.analysis_description.dat │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.UCEC-test.katmai │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── README.md │ ├── project_config-host.sh │ └── project_config.sh ├── run_cases.UCEC.rerun.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── 0319.evaluate.v1.txt │ │ ├── 0319.evaluate.v2.txt │ │ ├── 0319.evaluate.v3.txt │ │ ├── 0322.evaluate.v1.txt │ │ ├── 0322.evaluate.v2.txt │ │ ├── 0322.evaluate.v3.txt │ │ ├── 0322.evaluate.v4.txt │ │ ├── 0323.evaluate.v1.txt │ │ ├── 0323.evaluate.v2.txt │ │ ├── 0324.evaluate.v1.txt │ │ ├── 0324.evaluate.v2.txt │ │ ├── 0325.evaluate.v1.txt │ │ ├── 0325.evaluate.v2.txt │ │ ├── 0325.evaluate.v3.txt │ │ ├── 0325.evaluate.v4.txt │ │ ├── 0326.evaluate.v1.txt │ │ ├── 0326.evaluate.v2.txt │ │ ├── 0326.evaluate.v3.txt │ │ ├── 0327.evaluate.v1.txt │ │ ├── 0328.evaluate.v1.txt │ │ ├── 0329.evaluate.v1.txt │ │ ├── 0329.evaluate.v2.txt │ │ ├── 0329.evaluate.v3.txt │ │ ├── 0330.evaluate.v1.txt │ │ ├── 0330.evaluate.v2.txt │ │ ├── 0330.evaluate.v3.txt │ │ ├── 0331.evaluate.v1.txt │ │ ├── 0331.evaluate.v2.txt │ │ ├── 0331.evaluate.v3.txt │ │ ├── 0331.evaluate.v4.txt │ │ ├── 0401.evaluate.v1.txt │ │ ├── 0401.evaluate.v2.txt │ │ ├── 0402.evaluate.v1.txt │ │ ├── 0402.evaluate.v2.txt │ │ ├── 0404.evaluate.v1.txt │ │ ├── 0405.evaluate.v1.txt │ │ ├── 0406.evaluate.v1.txt │ │ ├── 0407.evaluate.v1.txt │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── UCEC.rerun.analysis_description.dat │ │ ├── UCEC_3.analysis_description.dat │ │ ├── backup │ │ │ ├── CaseList.dat │ │ │ ├── Dockermap.dat │ │ │ └── case_names.dat │ │ ├── case_names.dat │ │ ├── start_docker_to_get_filesize.compute1.sh │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.UCEC_3.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ └── case_names.dat │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.Y2.b1.katmai │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Y2.b1.analysis_description.dat │ │ └── case_names.dat │ ├── project_config-host.sh │ ├── project_config.sh │ └── src │ │ ├── make_analysis_description.sh │ │ ├── make_analysis_description2.sh │ │ └── make_case_list.sh ├── run_cases.Y2.b2.katmai │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── dat │ │ ├── 0324.evaluate.txt │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Y2.b1.analysis_description.dat │ │ ├── Y2.b2.analysis_description.20190530.dat │ │ ├── Y2.b2.analysis_description.20191021.v1.dat │ │ ├── Y2.b2.analysis_description.dat │ │ └── case_names.dat │ ├── project_config-host.sh │ ├── project_config.sh │ └── src │ │ ├── make_analysis_description.20191021.v1.sh │ │ ├── make_analysis_description.sh │ │ ├── make_analysis_description2.sh │ │ └── make_case_list.sh ├── run_cases.Y3.620.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── WGS_CNV_Somatic.Y3.620.analysis_description.dat │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.Y3.620.rerun1.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.Y3.UCEC.178.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── WGS_CNV_Somatic.Y3.UCEC.178.analysis_description.dat │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.Y3.UCEC.Discovery.22.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── UCEC.Discovery.22.analysis_description.dat │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh ├── run_cases.Y3.b1.PDA.rerun.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Weirdp1.txt │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_interactive.sh ├── run_cases.Y3.b1.PDA.rerun2.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Weirdp1.txt │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_interactive.sh ├── run_cases.Y3.b1.PDA.rerun3.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Weirdp1.txt │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_interactive.sh ├── run_cases.Y3.b1.PDA.rerun4.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Weirdp1.txt │ │ ├── case_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_interactive.sh ├── run_cases.Y3.b1.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_project_cases.sh │ ├── B.evaluate_project_cases.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── 1.WGS_CNV_Somatic.to_process.dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── Y3.b1.analysis_description.dat │ │ ├── case_names.dat │ │ └── case_names_not_run.dat │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_interactive.sh ├── run_samples.CCRCC.ITH.compute1 │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_tumorsamples.sh │ ├── B.evaluate_tumorsamples.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ │ ├── CCRCC.ITH.analysis_description.20210219.dat │ │ ├── CCRCC.ITH.analysis_description.dat │ │ ├── CaseList.dat │ │ ├── Dockermap.dat │ │ ├── WGS_SV.CCRCC_ITH.dat │ │ ├── tumor_sample_names.dat │ │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh └── run_samples.CCRCC.ITH.rerun.compute1 │ ├── 0319.evaluate.v1.txt │ ├── 0322.evaluate.v1.txt │ ├── 0322.evaluate.v2.txt │ ├── 0322.evaluate.v3.txt │ ├── 0323.evaluate.v1.txt │ ├── 0323.evaluate.v2.txt │ ├── 0324.evaluate.v1.txt │ ├── 0324.evaluate.v2.txt │ ├── 0325.evaluate.v1.txt │ ├── 0325.evaluate.v2.txt │ ├── 0405.evaluate.v1.txt │ ├── 0406.evaluate.v1.txt │ ├── 1_make_case_names.sh │ ├── 2_make_dockermap.sh │ ├── 3_make_case_list.sh │ ├── A.process_tumorsamples.sh │ ├── B.evaluate_tumorsamples.sh │ ├── C.make_analysis_description.sh │ ├── README.md │ ├── TODO │ ├── dat │ ├── CCRCC.ITH.analysis_description.20210219.dat │ ├── CCRCC.ITH.analysis_description.dat │ ├── CaseList.dat │ ├── Dockermap.dat │ ├── ITH_genomic_Identifier.tsv │ ├── WGS_SV.CCRCC_ITH.dat │ ├── tumor_sample_names.ITH.all.dat │ ├── tumor_sample_names.dat │ └── worklog │ ├── project_config-host.sh │ ├── project_config.sh │ └── start_docker_to_launch_jobs.compute1.sh └── test_data ├── chromosomes.18-20.dat ├── chromosomes.20.dat ├── chromosomes.8.11.dat └── chromosomes.dat /README.processing_description.md: -------------------------------------------------------------------------------- 1 | # BIC-Seq2 2 | 3 | BICSEQ2 pipeline, version 2.0 4 | 5 | ## Processing description 6 | We used BIC-seq2 (Xi et al., 2016), a read-depth-based CNV calling algorithm to 7 | detect somatic copy number variation (CNVs) from the WGS data of tumors. 8 | Briefly, BIC-seq2 divides genomic regions into disjoint bins and counts 9 | uniquely aligned reads in each bin. Then, it combines neighboring bins into 10 | genomic segments with similar copy numbers iteratively based on Bayesian 11 | Information Criteria (BIC), a statistical criterion measuring both the fitness 12 | and complexity of a statistical model. 13 | 14 | We used paired-sample CNV calling that takes a pair of samples as input and 15 | detects genomic regions with different copy numbers between the two samples. We 16 | used a bin size of ∼100 bp and a lambda of 3 (a smoothing parameter for CNV 17 | segmentation). We recommend to call segments as copy gain or loss when their 18 | log2 copy ratios were larger than 0.2 or smaller than −0.2, respectively 19 | (according to the BIC-seq publication). 20 | 21 | ## Processing pipeline 22 | This is a docker implementation. 23 | Github: https://github.com/mwyczalkowski/BICSEQ2.git 24 | 25 | ## Input data 26 | 27 | WGS tumor and normal 28 | 29 | ## Output 30 | 31 | * CNV file. Output of Segmentation step. Per-segment log2(copy ratio) 32 | * SEG file. Per-gene log2(copy ratio) 33 | 34 | # Contact: 35 | 36 | * Yige Wu 37 | * Matt Wyczalkowski 38 | 39 | -------------------------------------------------------------------------------- /docker/1_make_docker.sh: -------------------------------------------------------------------------------- 1 | IMAGE="mwyczalkowski/bicseq2" 2 | 3 | # Build needs to take place in root directory of project 4 | cd .. 5 | docker build -f docker/Dockerfile -t $IMAGE . 6 | -------------------------------------------------------------------------------- /docker/2_docker_push.sh: -------------------------------------------------------------------------------- 1 | IMAGE="mwyczalkowski/bicseq2" 2 | docker push $IMAGE 3 | 4 | -------------------------------------------------------------------------------- /docker/Dockerfile.mgi: -------------------------------------------------------------------------------- 1 | # Dockerfile for MGI-specific modifications 2 | FROM mwyczalkowski/somatic-wrapper:latest 3 | 4 | #docker build . 5 | #docker tag registry.gsc.wustl.edu// 6 | #docker push registry.gsc.wustl.edu// 7 | 8 | # Also, mysql is now installed by default, so get rid of the code below 9 | 10 | USER root 11 | 12 | # This is required to play well at MGI 13 | # MGI also does not respect USER directive, so /usr/local/somoaticwrapper is immutable 14 | RUN apt-get update \ 15 | && apt-get install -y libnss-sss\ 16 | && apt-get clean 17 | 18 | COPY mgi-init/mgi-sw.bashrc /home/sw/.bashrc 19 | #COPY mgi-init/mgi-sw_start.sh /home/sw/ 20 | 21 | USER sw 22 | 23 | CMD ["/bin/bash", "/home/sw/mgi-sw_start.sh"] 24 | 25 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | Dockerfile from shiso:/Users/mwyczalk/Projects/Docker/MGI-basic 2 | 3 | Note that docker build changed significantly in v2, and uses an apt-based 4 | installation rather than conda-based. This switch was motivated by a successful attempt 5 | to compile samtools, which is not in fact necessary; details can be found in revision 6 | history. 7 | -------------------------------------------------------------------------------- /docker/vimrc: -------------------------------------------------------------------------------- 1 | syntax enable 2 | set tabstop=4 3 | set shiftwidth=4 4 | set expandtab 5 | set autowrite 6 | set ruler 7 | set nohlsearch 8 | 9 | set textwidth=0 10 | set nocindent 11 | 12 | " colorscheme apprentice 13 | -------------------------------------------------------------------------------- /src/README.logs: -------------------------------------------------------------------------------- 1 | Notes on generating log files during workflow. 2 | 3 | * All BICSEQ2 logs are saved to stderr 4 | * Principal scripts will indicate status of analysis by writing lines of this format to STDERR 5 | BS2:status [timestamp] script message 6 | (tab delimited), where status is one of 7 | * START 8 | * SUCCESS 9 | * ERROR 10 | * COMPLETE 11 | 12 | COMPLETE indicates a partial completion, e.g. an end of an iteration of a loop, 13 | but not the successful end of the complete workflow. 14 | 15 | START tag will be written upon start of script, but it can be delayed until 16 | after argument parsing complete, to allow for e.g. CASE to be added. In the 17 | case of an argument parsing error START will not be written. 18 | 19 | One script can have multiple START tags, indicating, e.g., looping over cases. 20 | However, a script must write exactly one SUCCESS tag, indicating successful 21 | completion of loop Generally expect one (or zero) ERROR tags, but OK if 22 | multiple occur 23 | -------------------------------------------------------------------------------- /src/push_git.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | batchName=$1 4 | version=$2 5 | 6 | git add . 7 | git commit -m ${version} 8 | git checkout -b ${batchName} 9 | git push origin ${batchName} 10 | -------------------------------------------------------------------------------- /src/test_bc.sh: -------------------------------------------------------------------------------- 1 | # Bash and GNU bc 2 | a=1.4 3 | b=2.5 4 | #if (( $(bc <<<'$a < $b') )); then 5 | if (( $(echo "$a < $b" | bc -l) )); then 6 | echo '1.4 is less than 2.5.' 7 | fi 8 | -------------------------------------------------------------------------------- /testing/README.project_config: -------------------------------------------------------------------------------- 1 | Project_config files may evolve over time. Having common elements would be helpful. 2 | 3 | For now, main difference between them, aside from project names, is that GENE_BED may be obtained either 4 | from workflow directory or from mapped directory (e.g. data4, as is done in execute_workflow) 5 | 6 | 7 | * early Jan 2019 8 | Starting with project_config.execute_workflow.C3L-chr.MGI.sh, OUTD is changed from /data1/PROJECT to /data1, 9 | and it no longer has `mkdir`. It is the responsibility of the calling container to create the appropriate 10 | directories. This is done so that host-based log files can be associated with the run output 11 | Project is not defined in project_config right now 12 | 13 | * 1/11/19 14 | Output base directory is set in script via argument rather than defined in project_config; by default it is /data1 15 | Renamed to OUTD_BASE. Reference implementation: 16 | -> /docker_call/run_cases.LUAD.MGI/project_config.run_cases.LUAD.MGI.sh 17 | 18 | 19 | * 1/17/19 20 | Change "PDF" -> "PNG" 21 | 22 | * 1/19/19 23 | Working in run_cases.UCEC-test.katmai, significant changes to project_config.sh structure. Includes moving 24 | host-specific details to project_config-host.sh 25 | -------------------------------------------------------------------------------- /testing/direct_call/execute_pipeline.C3L-chr.MGI/0_start_docker.sh: -------------------------------------------------------------------------------- 1 | BICSEQ2="/gscuser/mwyczalk/projects/BICSEQ2" 2 | 3 | CONFIG="project_config.execute_pipeline.C3L-chr.MGI.sh" 4 | source $CONFIG 5 | 6 | OUTD="/gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/$PROJECT " 7 | >&2 echo Output directory: $OUTD 8 | mkdir -p $OUTD 9 | 10 | # See README.md for details. Paths specific to MGI 11 | bash $BICSEQ2/src/start_docker.sh $@ -M \ 12 | $OUTD \ 13 | /gscmnt/gc2521/dinglab/yigewu/Projects/CPTAC3CNV/BICSEQ2/inputs \ 14 | /gscmnt/gc2619/dinglab_cptac3/GDC_import/data \ 15 | /gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/cached.annotation 16 | 17 | # Tip: run this command within a tmux session for long runs 18 | -------------------------------------------------------------------------------- /testing/direct_call/execute_pipeline.C3L-chr.MGI/1_execute_workflow.sh: -------------------------------------------------------------------------------- 1 | BICSEQ2="/gscuser/mwyczalk/projects/BICSEQ2" 2 | 3 | # bash execute_pipeline [options] PROJECT_CONFIG CASE_NAME SN_TUMOR TUMOR_BAM SN_NORMAL NORMAL_BAM 4 | PROJECT_CONFIG="project_config.execute_workflow.C3L-chr.MGI.sh" 5 | source $PROJECT_CONFIG 6 | 7 | CASE_NAME="C3L-00006" 8 | SN_NORMAL="C3L-00006.WGS.N.hg38" 9 | SN_TUMOR="C3L-00006.WGS.T.hg38" 10 | 11 | # From MGI.BamMap.dat 12 | # C3L-00001.WGS.N.hg38 C3L-00001 LUAD WGS blood_normal /gscmnt/gc2619/dinglab_cptac3/GDC_import/data/1d301dc5-ebb2-47e0-9a9f-e31ed41b4542/2595f8ca-ef17-4bf0-984d-27caaa8ee608_gdc_realn.bam 202924825766 BAM hg38 1d301dc5-ebb2-47e0-9a9f-e31ed41b4542 MGI 13 | # C3L-00001.WGS.T.hg38 C3L-00001 LUAD WGS tumor /gscmnt/gc2619/dinglab_cptac3/GDC_import/data/b919a0f4-c85d-4fe0-9947-2b8cb9b9a2b4/1cc7a20f-b05e-4661-95ec-399b3080a02b_gdc_realn.bam 200258660209 BAM hg38 b919a0f4-c85d-4fe0-9947-2b8cb9b9a2b4 MGI 14 | 15 | # Assume /data3 maps to /gscmnt/gc2619/dinglab_cptac3/GDC_import/data 16 | NORMAL_BAM="/data3/1d301dc5-ebb2-47e0-9a9f-e31ed41b4542/2595f8ca-ef17-4bf0-984d-27caaa8ee608_gdc_realn.bam" 17 | TUMOR_BAM="/data3/b919a0f4-c85d-4fe0-9947-2b8cb9b9a2b4/1cc7a20f-b05e-4661-95ec-399b3080a02b_gdc_realn.bam" 18 | 19 | OUTD="/data1/$PROJECT" 20 | 21 | LANG="C" 22 | # On MGI, convenient to develop on non-image version 23 | bash $BICSEQ2/src/execute_workflow.sh $@ -o $OUTD $PROJECT_CONFIG $CASE_NAME $SN_TUMOR $TUMOR_BAM $SN_NORMAL $NORMAL_BAM 24 | -------------------------------------------------------------------------------- /testing/direct_call/execute_pipeline.C3L-chr.MGI/README.md: -------------------------------------------------------------------------------- 1 | # `execute_pipeline.C3L-chr.katmai` 2 | 3 | Test complete workflow on C3L-chr dataset. 4 | 5 | Based on "../run_sample.C3L-chr.katmai" 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /testing/direct_call/execute_pipeline.C3L-chr.katmai/0_start_docker.sh: -------------------------------------------------------------------------------- 1 | BICSEQ2="/home/mwyczalk_test/Projects/BICSEQ2" 2 | 3 | # gene annotation file: 4 | # /diskmnt/Projects/CPTAC3CNV/gatk4wxscnv/inputs/gencode.v29.annotation.hg38.p12.protein_coding.bed 5 | 6 | CONFIG="project_config.execute_pipeline.C3L-chr.katmai.sh" 7 | source $CONFIG 8 | 9 | OUTD="/diskmnt/Datasets/BICSEQ2-dev.tmp/run_sample.C3L-chr.katmai" 10 | >&2 echo Output directory: $OUTD 11 | mkdir -p $OUTD 12 | # See README.md for details. Paths specific to katmai 13 | bash $BICSEQ2/src/start_docker.sh $@ \ 14 | $OUTD \ 15 | /diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs \ 16 | /diskmnt/Projects/cptac_downloads_5/GDC_import/data \ 17 | /diskmnt/Projects/CPTAC3CNV/gatk4wxscnv/inputs 18 | 19 | # Tip: run this command within a tmux session for long runs 20 | -------------------------------------------------------------------------------- /testing/direct_call/execute_pipeline.C3L-chr.katmai/1_execute_workflow.sh: -------------------------------------------------------------------------------- 1 | 2 | # From katmai.BamMap.dat 3 | # C3L-00006.WGS.N.hg38 C3L-00006 UCEC WGS blood_normal /diskmnt/Projects/cptac_downloads_5/GDC_import/data/9f29ebe1-de5d-47a8-a54d-d1e8441409c6/92b5e534-6cb0-43eb-8147-ce7d18526f5e_gdc_realn.bam 220869345161 BAM hg38 9f29ebe1-de5d-47a8-a54d-d1e8441409c6 katmai 4 | # C3L-00006.WGS.T.hg38 C3L-00006 UCEC WGS tumor /diskmnt/Projects/cptac_downloads_5/GDC_import/data/457f2c4d-ddf3-416e-bb50-b112eede02d5/d9975c5f-288d-417d-bdb3-f490d9a36401_gdc_realn.bam 252294227835 BAM hg38 457f2c4d-ddf3-416e-bb50-b112eede02d5 katmai 5 | 6 | # bash execute_pipeline [options] PROJECT_CONFIG CASE_NAME SN_TUMOR TUMOR_BAM SN_NORMAL NORMAL_BAM 7 | PROJECT_CONFIG="project_config.execute_workflow.C3L-chr.katmai.sh" 8 | CASE_NAME="C3L-00006" 9 | SN_NORMAL="C3L-00006.WGS.N.hg38" 10 | SN_TUMOR="C3L-00006.WGS.T.hg38" 11 | 12 | # Assume /data3 maps to /diskmnt/Projects/cptac_downloads_5/GDC_import/data 13 | NORMAL_BAM="/data3/9f29ebe1-de5d-47a8-a54d-d1e8441409c6/92b5e534-6cb0-43eb-8147-ce7d18526f5e_gdc_realn.bam" 14 | TUMOR_BAM="/data3/457f2c4d-ddf3-416e-bb50-b112eede02d5/d9975c5f-288d-417d-bdb3-f490d9a36401_gdc_realn.bam" 15 | 16 | bash /BICSEQ2/src/execute_workflow.sh $@ $PROJECT_CONFIG $CASE_NAME $SN_TUMOR $TUMOR_BAM $SN_NORMAL $NORMAL_BAM 17 | -------------------------------------------------------------------------------- /testing/direct_call/execute_pipeline.C3L-chr.katmai/README.md: -------------------------------------------------------------------------------- 1 | # `execute_pipeline.C3L-chr.katmai` 2 | 3 | Test complete workflow on C3L-chr dataset. 4 | 5 | Based on "../run_sample.C3L-chr.katmai" 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /testing/direct_call/prep_mappability.demo.katmai/1_test_mappability.sh: -------------------------------------------------------------------------------- 1 | # Run make_mappability.sh from within docker. Typically, start docker first with 0_start_docker.sh 2 | 3 | #Before running Demo data, be sure to uncompress reference: 4 | #``` 5 | #cd demo_data 6 | #tar -xvjf Homo_sapiens_assembly19.COST16011_region.fa.tar.bz2 7 | #``` 8 | 9 | source project_config.demo.sh 10 | 11 | # process test data 12 | bash /BICSEQ2/src/make_mappability.sh $REF $MAPD $CHRLIST 13 | 14 | # *TODO* be able to test GRCh38 with project_config.sh 15 | # Note that it failed to run, see README.md for output 16 | # GRCh38 17 | # bash make_mappability.sh /data/Reference/GRCh38.d1.vd1.fa /Reference/GRCh38.d1.vd1.fa/mappability 18 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/0_start_docker.sh: -------------------------------------------------------------------------------- 1 | BICSEQ2="/gscuser/mwyczalk/projects/BICSEQ2" 2 | 3 | CONFIG="project_config.run_sample.C3L-chr.MGI.sh" 4 | source $CONFIG 5 | 6 | OUTD="/gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/$PROJECT " 7 | >&2 echo Output directory: $OUTD 8 | mkdir -p $OUTD 9 | 10 | # See README.md for details. Paths specific to MGI 11 | bash $BICSEQ2/src/start_docker.sh $@ -M \ 12 | $OUTD \ 13 | /gscmnt/gc2521/dinglab/yigewu/Projects/CPTAC3CNV/BICSEQ2/inputs \ 14 | /gscmnt/gc2521/dinglab/yigewu/Projects/CPTAC3CNV/BICSEQ2/inputs \ 15 | /gscmnt/gc2619/dinglab_cptac3/GDC_import/data 16 | 17 | # Tip: run this command within a tmux session for long runs 18 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/1_get_unique_reads.sh: -------------------------------------------------------------------------------- 1 | # Run get_unique step on MantaDemo test data 2 | # Direct (not parallel) evaluation 3 | 4 | # Because user directories are mapped on MGI, CONFIG points to the host (rather than container) path to project config file 5 | CONFIG="/gscuser/mwyczalk/projects/BICSEQ2/testing/direct_call/run_sample.C3L-chr.MGI/project_config.run_sample.C3L-chr.MGI.sh" 6 | 7 | # MGI-specific setup 8 | export LANG=C 9 | 10 | # From MGI.BamMap.dat 11 | # C3L-00001.WGS.N.hg38 C3L-00001 LUAD WGS blood_normal /gscmnt/gc2619/dinglab_cptac3/GDC_import/data/1d301dc5-ebb2-47e0-9a9f-e31ed41b4542/2595f8ca-ef17-4bf0-984d-27caaa8ee608_gdc_realn.bam 202924825766 BAM hg38 1d301dc5-ebb2-47e0-9a9f-e31ed41b4542 MGI 12 | # C3L-00001.WGS.T.hg38 C3L-00001 LUAD WGS tumor /gscmnt/gc2619/dinglab_cptac3/GDC_import/data/b919a0f4-c85d-4fe0-9947-2b8cb9b9a2b4/1cc7a20f-b05e-4661-95ec-399b3080a02b_gdc_realn.bam 200258660209 BAM hg38 b919a0f4-c85d-4fe0-9947-2b8cb9b9a2b4 MGI 13 | 14 | # Assume /data3 maps to /gscmnt/gc2619/dinglab_cptac3/GDC_import/data 15 | NORMAL="/data4/1d301dc5-ebb2-47e0-9a9f-e31ed41b4542/2595f8ca-ef17-4bf0-984d-27caaa8ee608_gdc_realn.bam" 16 | TUMOR="/data4/b919a0f4-c85d-4fe0-9947-2b8cb9b9a2b4/1cc7a20f-b05e-4661-95ec-399b3080a02b_gdc_realn.bam" 17 | 18 | # get_unique.sh [options] SAMPLE_NAME PROJECT_CONFIG BAM 19 | 20 | bash /BICSEQ2/src/get_unique.sh $@ C3L-00001.WGS.T.hg38 $CONFIG $TUMOR 21 | bash /BICSEQ2/src/get_unique.sh $@ C3L-00001.WGS.N.hg38 $CONFIG $NORMAL 22 | 23 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/2_run_norm.sh: -------------------------------------------------------------------------------- 1 | # Execute normalization step on two samples on katmai 2 | 3 | # Because user directories are mapped on MGI, CONFIG points to the host (rather than container) path to project config file 4 | CONFIG="/gscuser/mwyczalk/projects/BICSEQ2/testing/direct_call/run_sample.C3L-chr.MGI/project_config.run_sample.C3L-chr.MGI.sh" 5 | 6 | BICSEQ2="/gscuser/mwyczalk/projects/BICSEQ2" 7 | 8 | # MGI-specific setup 9 | export LANG=C 10 | 11 | # Tip: to debug norm-config file before processing, run with flags -dw, 12 | # check / edit config file as necessary, and run with -C config.txt flag to pass config explicitly 13 | 14 | # bash run_norm.sh [options] SAMPLE_NAME PROJECT_CONFIG 15 | 16 | bash $BICSEQ2/src/run_norm.sh $@ C3L-00001.WGS.T.hg38 $CONFIG 17 | bash $BICSEQ2/src/run_norm.sh $@ C3L-00001.WGS.N.hg38 $CONFIG 18 | 19 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/3_run_segmentation.sh: -------------------------------------------------------------------------------- 1 | # Execute segmentation step using tumor/normal as case/control 2 | 3 | # Because user directories are mapped on MGI, CONFIG points to the host (rather than container) path to project config file 4 | CONFIG="/gscuser/mwyczalk/projects/BICSEQ2/testing/direct_call/run_sample.C3L-chr.MGI/project_config.run_sample.C3L-chr.MGI.sh" 5 | 6 | # MGI-specific setup 7 | export LANG=C 8 | 9 | 10 | # bash run_segmentation.sh [options] SAMPLE_NAME.CASE SAMPLE_NAME.CONTROL PROJECT_CONFIG 11 | 12 | # Tip: to debug seg-config file before processing, run with flags -dw, 13 | # check / edit config file as necessary, and run with -C config.txt flag to pass config explicitly 14 | 15 | CASE_NAME="C3L-00001" 16 | 17 | bash /BICSEQ2/src/run_segmentation.sh $@ -s $CASE_NAME C3L-00001.WGS.T.hg38 C3L-00001.WGS.N.hg38 $CONFIG 18 | 19 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/4_run_gene_annotation.sh: -------------------------------------------------------------------------------- 1 | # execute run_annotation step on katmai 2 | 3 | # Because user directories are mapped on MGI, CONFIG points to the host (rather than container) path to project config file 4 | CONFIG="/gscuser/mwyczalk/projects/BICSEQ2/testing/direct_call/run_sample.C3L-chr.MGI/project_config.run_sample.C3L-chr.MGI.sh" 5 | 6 | # MGI-specific setup 7 | LANG="" 8 | PYTHONPATH="" 9 | 10 | CASE_NAME="C3L-00001" 11 | 12 | bash /BICSEQ2/src/run_annotation.sh $@ $CASE_NAME $CONFIG 13 | 14 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/a_prep_gene_annotation.sh: -------------------------------------------------------------------------------- 1 | # Run prep_gene_annotation.sh from within docker. Typically, start docker first with 0_start_docker.sh 2 | 3 | # Because user directories are mapped on MGI, CONFIG points to the host (rather than container) path to project config file 4 | CONFIG="/gscuser/mwyczalk/projects/BICSEQ2/testing/direct_call/run_sample.C3L-chr.MGI/project_config.run_sample.C3L-chr.MGI.sh" 5 | source $CONFIG 6 | 7 | # MGI-specific setup 8 | export LANG=C 9 | 10 | GFF_URL="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.annotation.gff3.gz" 11 | 12 | # Creates $GENE_BED defined in project_config 13 | 14 | # prep_gene_annotation.sh [options] GFF_URL BED_OUT 15 | bash /BICSEQ2/src/prep_gene_annotation.sh $@ $GFF_URL $GENE_BED 16 | 17 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.MGI/run_all.sh: -------------------------------------------------------------------------------- 1 | function launch { 2 | STEP=$1 3 | F=$2 4 | NOW=$(date) 5 | >&2 echo [ $NOW ] Launching $STEP 6 | bash $STEP $F 7 | rc=$? 8 | if [[ $rc != 0 ]]; then 9 | >&2 echo Fatal ERROR $rc: $!. Exiting. 10 | exit $rc; 11 | fi 12 | } 13 | 14 | # Not clear how to propagage -d etc. 15 | 16 | #FLAG="-d" 17 | launch 1_get_unique_reads.sh $FLAG 18 | launch 2_run_norm.sh $FLAG 19 | launch 3_run_segmentation.sh $FLAG 20 | launch 4_run_gene_annotation.sh $FLAG 21 | 22 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/0_start_docker.sh: -------------------------------------------------------------------------------- 1 | BICSEQ2="/home/mwyczalk_test/Projects/BICSEQ2" 2 | 3 | CONFIG="project_config.run_sample.C3L-chr.katmai.sh" 4 | source $CONFIG 5 | 6 | PROJECT="run_cases.UCEC-test" 7 | OUTBASE_H="/diskmnt/Datasets/BICSEQ2-dev.tmp" 8 | OUTD="$OUTBASE_H/$PROJECT" 9 | 10 | # for testing , define outd as for run_cases.UCEC-test 11 | #OUTD="/diskmnt/Datasets/BICSEQ2-dev.tmp/run_sample.C3L-chr.katmai" 12 | >&2 echo Output directory: $OUTD 13 | mkdir -p $OUTD 14 | 15 | # data2: chrom reference (./hg38) and mappability 16 | DATA2="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs" 17 | # data3: gene annotation file 18 | DATA3="/diskmnt/Projects/CPTAC3CNV/gatk4wxscnv/inputs" 19 | 20 | # See README.md for details. Paths specific to katmai 21 | bash $BICSEQ2/src/start_docker.sh $@ \ 22 | $OUTD \ 23 | $DATA2 $DATA3 24 | # /diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs \ 25 | # /diskmnt/Projects/cptac_downloads_3/GDC_import/data \ 26 | # /diskmnt/Projects/CPTAC3CNV/BICSEQ2/outputs/UCEC.hg38.test/run_uniq 27 | 28 | # Tip: run this command within a tmux session for long runs 29 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/1_get_unique_reads.sh: -------------------------------------------------------------------------------- 1 | # Run get_unique step on MantaDemo test data 2 | # Direct (not parallel) evaluation 3 | 4 | CONFIG="project_config.run_sample.C3L-chr.katmai.sh" 5 | 6 | #NORMAL="/data/TestData/MantaDemo/HCC1954.NORMAL.30x.compare.COST16011_region.bam" 7 | #TUMOR="/data/TestData/MantaDemo/G15512.HCC1954.1.COST16011_region.bam" 8 | 9 | # From katmai.BamMap.dat 10 | # C3L-00008.WGS.N.hg38 C3L-00008 UCEC WGS blood_normal /diskmnt/Projects/cptac_downloads_3/GDC_import/data/846bf455-89b4-4840-b113-e529ffa13277/243bfb3c-d06b-4de5-a6c3-7fa7e2c5fb74_gdc_realn.bam 204714582211 BAM hg38 846bf455-89b4-4840-b113-e529ffa13277 katmai 11 | # C3L-00008.WGS.T.hg38 C3L-00008 UCEC WGS tumor /diskmnt/Projects/cptac_downloads_3/GDC_import/data/1c0e0f84-4caf-4493-9b2f-8f5f9ef9231b/f6924a26-a14f-45a3-b4bd-7a4592d34065_gdc_realn.bam 200107040765 BAM hg38 1c0e0f84-4caf-4493-9b2f-8f5f9ef9231b katmai 12 | 13 | # Assume /data3 maps to /diskmnt/Projects/cptac_downloads_3/GDC_import/data 14 | NORMAL="/data3/846bf455-89b4-4840-b113-e529ffa13277/243bfb3c-d06b-4de5-a6c3-7fa7e2c5fb74_gdc_realn.bam" 15 | TUMOR="/data3/1c0e0f84-4caf-4493-9b2f-8f5f9ef9231b/f6924a26-a14f-45a3-b4bd-7a4592d34065_gdc_realn.bam" 16 | 17 | # get_unique.sh [options] SAMPLE_NAME PROJECT_CONFIG BAM 18 | 19 | bash /BICSEQ2/src/get_unique.sh $@ C3L-00008_tumor $CONFIG $TUMOR 20 | bash /BICSEQ2/src/get_unique.sh $@ C3L-00008_blood_normal $CONFIG $NORMAL 21 | 22 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/2_run_norm.sh: -------------------------------------------------------------------------------- 1 | # Execute normalization step on two samples on katmai 2 | 3 | CONFIG="project_config.run_sample.C3L-chr.katmai.sh" 4 | 5 | # Tip: to debug norm-config file before processing, run with flags -dw, 6 | # check / edit config file as necessary, and run with -C config.txt flag to pass config explicitly 7 | 8 | # bash run_norm.sh [options] SAMPLE_NAME PROJECT_CONFIG 9 | 10 | bash /BICSEQ2/src/run_norm.sh $@ C3L-00008_tumor $CONFIG 11 | bash /BICSEQ2/src/run_norm.sh $@ C3L-00008_blood_normal $CONFIG 12 | 13 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/3_run_segmentation.sh: -------------------------------------------------------------------------------- 1 | # Execute segmentation step using tumor/normal as case/control 2 | 3 | CONFIG="project_config.run_sample.C3L-chr.katmai.sh" 4 | 5 | # bash run_segmentation.sh [options] SAMPLE_NAME.CASE SAMPLE_NAME.CONTROL PROJECT_CONFIG 6 | 7 | # Tip: to debug seg-config file before processing, run with flags -dw, 8 | # check / edit config file as necessary, and run with -C config.txt flag to pass config explicitly 9 | 10 | CASE_NAME="C3L-00008" 11 | 12 | bash /BICSEQ2/src/run_segmentation.sh $@ -s $CASE_NAME C3L-00008_tumor C3L-00008_blood_normal $CONFIG 13 | 14 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/4_run_gene_annotation.sh: -------------------------------------------------------------------------------- 1 | # execute run_annotation step on katmai 2 | 3 | CONFIG="project_config.run_sample.C3L-chr.katmai.sh" 4 | 5 | CASE_NAME="C3L-00008" 6 | 7 | bash /BICSEQ2/src/run_annotation.sh $@ $CASE_NAME $CONFIG 8 | 9 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/README.md: -------------------------------------------------------------------------------- 1 | # `run_sample.C3L-chr.katmai` 2 | 3 | Test the following sample workflow steps on C3L-chr dataset: 4 | * Preparation 5 | * Create gene annotation file 6 | * implemented in `a_prep_gene_annotation` 7 | * this is typically only done only once per system installation 8 | * Requires download of GFF file from ensembl 9 | * Is relatively fast 10 | * Per-sample 11 | * Unique Reads 12 | * Normalization 13 | * Segmentation 14 | * Gene annotation 15 | 16 | Performance 17 | * get_unique ~ 17 min for chrom 18,19,20 18 | * because of this, implementing option in project config to use either pipeline or preprocessed 19 | .seq files 20 | * /data4 is mapped to preprocessed .seq data 21 | 22 | Starting now, doing `get_unique` step. Previously, used `.seq` files provided by other runs. 23 | 24 | Project configuration in `project_config.run_sample.C3L-chr.katmai.sh` 25 | 26 | C3L-chr test dataset consists of chrom 18,19,20 from C3L-00004 CPTAC3 dataset. It is defined 27 | in `/BICSEQ2/testing/test_data/chromosomes.18-20.dat` 28 | 29 | Reference and mapping data on katmai at `/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs/hg38` 30 | 31 | Directory mapping, when launching docker: 32 | * data1:/diskmnt/Datasets/BICSEQ2-dev.tmp 33 | * Output directory of this project 34 | * gene annotation file, created in prep step, also here 35 | * data2:/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs 36 | * Mappability files 37 | * Per-chrom reference in ./hg38 38 | * data3:/diskmnt/Projects/cptac_downloads_3/GDC_import/data 39 | * common path to CPTAC3 WGS BAM files of interest 40 | 41 | Paths are incorporated into `0_launch_docker.sh` 42 | 43 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/a_prep_gene_annotation.sh: -------------------------------------------------------------------------------- 1 | # Run prep_gene_annotation.sh from within docker. Typically, start docker first with 0_start_docker.sh 2 | 3 | # Using same project_config as other katmai steps. 4 | source project_config.run_sample.C3L-chr.katmai.sh 5 | 6 | # previously, ../run_sample.C3L-chr.katmai/project_config.run_sample.C3L-chr.katmai.sh 7 | 8 | GFF_URL="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.annotation.gff3.gz" 9 | 10 | # Creates $GENE_BED defined in project_config 11 | 12 | # prep_gene_annotation.sh [options] GFF_URL BED_OUT 13 | bash /BICSEQ2/src/prep_gene_annotation.sh $@ $GFF_URL $GENE_BED 14 | 15 | -------------------------------------------------------------------------------- /testing/direct_call/run_sample.C3L-chr.katmai/run_all.sh: -------------------------------------------------------------------------------- 1 | function launch { 2 | STEP=$1 3 | F=$2 4 | NOW=$(date) 5 | >&2 echo [ $NOW ] Launching $STEP 6 | bash $STEP $F 7 | rc=$? 8 | if [[ $rc != 0 ]]; then 9 | >&2 echo Fatal ERROR $rc: $!. Exiting. 10 | exit $rc; 11 | fi 12 | } 13 | 14 | # Not clear how to propagage -d etc. 15 | 16 | #FLAG="-d" 17 | launch 1_get_unique_reads.sh $FLAG 18 | launch 2_run_norm.sh $FLAG 19 | launch 3_run_segmentation.sh $FLAG 20 | launch 4_run_gene_annotation.sh $FLAG 21 | 22 | -------------------------------------------------------------------------------- /testing/docker_call/execute_pipeline.C3L-chr.MGI/README.md: -------------------------------------------------------------------------------- 1 | Testing docker workflow on MGI 2 | 3 | 4 | -------------------------------------------------------------------------------- /testing/docker_call/execute_pipeline.C3L-chr.katmai/README.md: -------------------------------------------------------------------------------- 1 | # `execute_pipeline.C3L-chr.katmai` 2 | 3 | Test complete workflow on C3L-chr dataset. This is based on direct_call/execute_workflow.3L-chr.katmai 4 | 5 | Here, calling `execute_workflow.sh` from host. 6 | 7 | Testing now (1/8/19): all-chrom run on C3L-00006, tmux session execute_pipeline.C3L-chr.katmai. 8 | * Note that direct runs died because $ANND directory not created 9 | * Also, investigate why parallel seems to be running only one job at a time 10 | 11 | 12 | -------------------------------------------------------------------------------- /testing/docker_call/execute_pipeline.LUAD-test.MGI/README.md: -------------------------------------------------------------------------------- 1 | Testing docker workflow on MGI 2 | 3 | 4 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | 3 | BAMMAP="/gscuser/mwyczalk/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat" 4 | OUT="dat/case_names.dat" 5 | 6 | grep CCRCC $BAMMAP | grep WGS | grep hg38 | cut -f 2 | sort -u > $OUT 7 | >&2 echo Written to $OUT 8 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | CASES="dat/case_names.dat" 2 | BAMMAP="/gscuser/mwyczalk/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat" 3 | 4 | OUT="dat/Dockermap.dat" 5 | 6 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 7 | 8 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 9 | 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | CASES="dat/case_names.dat" 2 | BAMMAP="/gscuser/mwyczalk/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat" 3 | 4 | DOCKERMAP="dat/Dockermap.dat" 5 | OUT="dat/CaseList.dat" 6 | 7 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 8 | 9 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP -D CCRCC - < $CASES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.run_cases.LUAD.MGI.sh" 12 | source $PROJECT_CONFIG 13 | 14 | # installation location of this BICSEQ2 project 15 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 16 | 17 | # Principal workflow output directory 18 | OUTBASE_H="/gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp" 19 | OUTD_H="$OUTBASE_H/$PROJECT" 20 | >&2 echo Creating output directory $OUTD_H 21 | mkdir -p $OUTD_H 22 | 23 | CASELIST="dat/CaseList.dat" 24 | DOCKERMAP="dat/Dockermap.dat" 25 | 26 | # this is new, specific to MGI. Might be moved to process_cases.sh 27 | # https://github.com/ding-lab/importGDC.CPTAC3 28 | 29 | NJOBS=3 30 | export LSF_GROUP="/mwyczalk/BICSEQ2.2" 31 | ##bgadd -L $NJOBS $LSF_GROUP # this is needed only the first time, when group created 32 | #bgmod -L $NJOBS $LSF_GROUP # this is to modify number of jobs running 33 | #bjgroup -s $LSF_GROUP # to see job group stats. This is relatively slow 34 | 35 | 36 | # DATAMAP lists directories mapped to /data1, /data2, etc. 37 | DATAMAP=" $OUTD_H \ 38 | /gscmnt/gc2521/dinglab/yigewu/Projects/CPTAC3CNV/BICSEQ2/inputs \ 39 | /gscmnt/gc2619/dinglab_cptac3/GDC_import/data \ 40 | /gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/cached.annotation" 41 | 42 | # -M for MGI 43 | bash $BICSEQ_H/src/process_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" -g $LSF_GROUP $@ 44 | 45 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.run_cases.LUAD.MGI.sh" 14 | source $PROJECT_CONFIG 15 | 16 | # installation location of this BICSEQ2 project 17 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 18 | 19 | # Principal workflow output directory. this should be defined in project_config-host 20 | OUTBASE_H="/gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp" 21 | OUTD_H="$OUTBASE_H/$PROJECT" 22 | 23 | CASELIST="dat/CaseList.dat" 24 | 25 | # -M for MGI 26 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 27 | 28 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/README.md: -------------------------------------------------------------------------------- 1 | Develop and test running of multiple cases on MGI 2 | 3 | Goal here is to process 3 CCRCC cases to compare against Yige's analysis. 4 | Case C3L-00796 apparently has problems: "tumor samples for this one cannot generate .seq file for chrX" 5 | 6 | ## 1. make list of case names 7 | First, create list of cases which we will process. 8 | 9 | ## 2. Make Dockermap file 10 | Dockermap file which provides mapping from host to container paths. 11 | 12 | ## 3. Make CaseList file 13 | CaseList file contains BAM paths (mapped to container) and other details to process a given case. 14 | Reads in BamMap and a list of case names. 15 | 16 | 17 | # Testing 18 | 19 | Launched first 5 cases 1/12/19 with, 20 | head -n 5 dat/case_names.dat | bash A.process_project_cases.sh - 21 | 22 | Follow first one along here: 23 | /gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/run_cases.LUAD.MGI/11LU013/bsub/1547328929.err 24 | 25 | 26 | # Running 27 | 28 | ``` 29 | bash B.evaluate_project_cases.sh -f not_started -u | bash A.process_project_cases.sh - 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/dat/CaseList.dat: -------------------------------------------------------------------------------- 1 | # case sample_name_A data_path_A UUID_A sample_name_B data_path_B UUID_B 2 | C3L-00004 C3L-00004.WGS.T.hg38 /import3/d3c54309-50b1-4257-8e90-4536dd45efe1/82ccdf4e-4527-47ca-8151-7e1248f1da09_gdc_realn.bam d3c54309-50b1-4257-8e90-4536dd45efe1 C3L-00004.WGS.N.hg38 /import3/22a34772-76b0-4cab-af6f-43472bb74199/1561b97d-8c8f-4fe6-a244-06452760074d_gdc_realn.bam 22a34772-76b0-4cab-af6f-43472bb74199 3 | C3L-00010 C3L-00010.WGS.T.hg38 /import3/defd1a75-47c4-48d2-ace0-fced371c4933/7a2e2088-9a18-4694-a4ca-f688ebd5331f_gdc_realn.bam defd1a75-47c4-48d2-ace0-fced371c4933 C3L-00010.WGS.N.hg38 /import3/2c1805ba-8194-46a9-8862-15685b11a712/5fc376df-4539-47ef-9749-44d36230a941_gdc_realn.bam 2c1805ba-8194-46a9-8862-15685b11a712 4 | C3L-00796 C3L-00796.WGS.T.hg38 /import3/7a1b9d0e-3801-4de2-b9a9-c33bfc996948/b4d752c8-7996-433f-8e5b-4b9d748eefa5_gdc_realn.bam 7a1b9d0e-3801-4de2-b9a9-c33bfc996948 C3L-00796.WGS.N.hg38 /import3/09da6812-c956-4b0a-8197-22750ad5fe22/84092027-a164-403e-9910-428f99097697_gdc_realn.bam 09da6812-c956-4b0a-8197-22750ad5fe22 5 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /gscmnt/gc2521/dinglab/mwyczalk/somatic-wrapper-data/GDC_import/data:/import1 2 | /gscmnt/gc2619/dinglab_cptac3/GDC_import/data:/import2 3 | /gscmnt/gc2741/ding/CPTAC3-data/GDC_import/data:/import3 4 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.CCRCC-select.MGI/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-00004 2 | C3L-00010 3 | C3L-00796 4 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore the dat folder 2 | dat/ 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all GBM cases with WGS hg38 data 3 | # However, for GBM project, we explicitly pass the case list 4 | 5 | source project_config-host.sh 6 | 7 | OUT="dat/case_names.dat" 8 | 9 | # grep GBM $BAMMAP \ 10 | # | grep WGS \ 11 | # | grep hg38 \ 12 | # | cut -f 2 \ 13 | # | sort -u > $OUT 14 | # >&2 echo Written to $OUT 15 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP" 23 | fi 24 | 25 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 26 | # PARALLEL_CASES=1; PARGS="-J $PARALLEL_CASES" 27 | 28 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 29 | 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM-subset.katmai/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | PROJECT="run_cases.GBM-subset" 5 | 6 | # All paths here are relative to host 7 | BAMMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/katmai.BamMap.dat" 8 | 9 | # Path to the git repo root 10 | BICSEQ_H="/diskmnt/Projects/cptac_downloads_4/CPTAC3_GBM/201902_somatic_wgs_cnv/BICSEQ2" 11 | 12 | # Principal workflow output directory. /data1 will map to $OUTD_H 13 | # Change to folder to store output 14 | OUTBASE_H="/diskmnt/Projects/cptac_downloads_4/CPTAC3_GBM/201902_somatic_wgs_cnv" 15 | OUTD_H="$OUTBASE_H/$PROJECT" 16 | 17 | 18 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 19 | # data2: chrom reference (./hg38) and mappability 20 | DATA2="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs" 21 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 22 | DATA3="/diskmnt/Projects/CPTAC3CNV/gatk4wxscnv/inputs" # /gencode.v29.annotation.hg38.p12.protein_coding.bed 23 | 24 | # set this to 1 if running on MGI 25 | IS_MGI=0 26 | MGI_LSF_GROUP="/mwyczalk/BICSEQ2" 27 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3L-00987: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3L-00987 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3L-01237: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3L-01237 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3L-03378: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3L-03378 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01752: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01752 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01754: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01754 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01756: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01756 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01758: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01758 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01943: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-01943 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-03042: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-03042 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-03180: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ding-lab/BICSEQ2/b6570332496a80867f400fca95859c22991a7ae5/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/C3N-03180 -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-03968 2 | C3N-02785 3 | C3L-03266 4 | C3L-00677 5 | C3L-03728 6 | C3L-03727 7 | C3L-01142 8 | C3L-03748 9 | C3L-02642 10 | C3L-01146 11 | C3L-01049 12 | C3L-03744 13 | C3L-03681 14 | C3L-03407 15 | C3L-01061 16 | C3L-03260 17 | C3L-02900 18 | C3L-01155 19 | C3L-01149 20 | C3L-03387 21 | C3N-03180 22 | C3N-01754 23 | C3L-03378 24 | C3N-03042 25 | C3N-01758 26 | C3N-01943 27 | C3N-01756 28 | C3L-01237 29 | C3L-00987 30 | C3N-01752 31 | C3L-01138 32 | C3N-01755 33 | C3L-00997 34 | C3L-00994 35 | C3N-01944 36 | C3L-02651 37 | C3L-00999 38 | C3L-02621 39 | C3N-01858 40 | C3N-01340 41 | C3N-01757 42 | C3N-01643 43 | C3L-00995 44 | C3L-01455 45 | C3L-00510 46 | C3L-00083 47 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/dat/start_docker_to_get_filesize.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/4E.cases_work_list.dat case_names.dat 2 | head -7 UCEC.rerun.analysis_description.dat > UCEC_3.analysis_description.dat 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="GBM_HNSCC_LSCC_LUAD.rerun.compute1" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.GBM_HNSCC_LSCC_LUAD.rerun.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore the dat folder 2 | dat/ 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all GBM cases with WGS hg38 data 3 | # However, for GBM project, we explicitly pass the case list 4 | 5 | source project_config-host.sh 6 | 7 | OUT="dat/case_names.dat" 8 | 9 | # grep GBM $BAMMAP \ 10 | # | grep WGS \ 11 | # | grep hg38 \ 12 | # | cut -f 2 \ 13 | # | sort -u > $OUT 14 | # >&2 echo Written to $OUT 15 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | #bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | bash ./make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 8 | 9 | >&2 echo Written to $OUT 10 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP" 23 | fi 24 | 25 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 26 | # PARALLEL_CASES=1; PARGS="-J $PARALLEL_CASES" 27 | 28 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 29 | 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -D $DISEASE -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LSCC-subset.katmai/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | DISEASE="LSCC" 5 | BATCH_NAME="LSCC-subset" 6 | PROJECT="run_cases.LSCC-subset" 7 | 8 | # All paths here are relative to host 9 | BAMMAP="/diskmnt/Projects/cptac_scratch/CPTAC3.workflow/CPTAC3.catalog/BamMap/katmai.BamMap.dat" 10 | 11 | # Path to the git repo root 12 | BICSEQ_H="/diskmnt/Projects/cptac_scratch_4/CPTAC3CNV/BICSEQ2/BICSEQ2" 13 | 14 | # Principal workflow output directory. /data1 will map to $OUTD_H 15 | # Change to folder to store output 16 | OUTBASE_H="/diskmnt/Projects/cptac_scratch_4/CPTAC3CNV/BICSEQ2/outputs" 17 | OUTD_H="$OUTBASE_H/$PROJECT" 18 | 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/diskmnt/Projects/CPTAC3CNV/gatk4wxscnv/inputs" # /gencode.v29.annotation.hg38.p12.protein_coding.bed 25 | 26 | # set this to 1 if running on MGI 27 | IS_MGI=0 28 | MGI_LSF_GROUP="/mwyczalk/BICSEQ2" 29 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all LUAD cases with WGS hg38 data 3 | 4 | BAMMAP="/gscuser/mwyczalk/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat" 5 | OUT="dat/case_names.dat" 6 | 7 | grep LUAD $BAMMAP | grep WGS | grep hg38 | cut -f 2 | sort -u > $OUT 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | CASES="dat/case_names.dat" 2 | BAMMAP="/gscuser/mwyczalk/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat" 3 | 4 | OUT="dat/Dockermap.dat" 5 | 6 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 7 | 8 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 9 | 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | CASES="dat/case_names.dat" 2 | BAMMAP="/gscuser/mwyczalk/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat" 3 | 4 | DOCKERMAP="dat/Dockermap.dat" 5 | OUT="dat/CaseList.dat" 6 | 7 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 8 | 9 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP -D LUAD - < $CASES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.run_cases.LUAD.MGI.sh" 12 | source $PROJECT_CONFIG 13 | 14 | # installation location of this BICSEQ2 project 15 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 16 | 17 | # Principal workflow output directory 18 | OUTBASE_H="/gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp" 19 | OUTD_H="$OUTBASE_H/$PROJECT" 20 | >&2 echo Creating output directory $OUTD_H 21 | mkdir -p $OUTD_H 22 | 23 | CASELIST="dat/CaseList.dat" 24 | DOCKERMAP="dat/Dockermap.dat" 25 | 26 | # this is new, specific to MGI. Might be moved to process_cases.sh 27 | # https://github.com/ding-lab/importGDC.CPTAC3 28 | 29 | NJOBS=3 30 | export LSF_GROUP="/mwyczalk/BICSEQ2" 31 | ##bgadd -L $NJOBS $LSF_GROUP # this is needed only the first time, when group created 32 | #bgmod -L $NJOBS $LSF_GROUP # this is to modify number of jobs running 33 | #bjgroup -s $LSF_GROUP # to see job group stats. This is relatively slow 34 | 35 | 36 | # DATAMAP lists directories mapped to /data1, /data2, etc. 37 | DATAMAP=" $OUTD_H \ 38 | /gscmnt/gc2521/dinglab/yigewu/Projects/CPTAC3CNV/BICSEQ2/inputs \ 39 | /gscmnt/gc2619/dinglab_cptac3/GDC_import/data \ 40 | /gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/cached.annotation" 41 | 42 | # -M for MGI 43 | bash $BICSEQ_H/src/process_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" -g $LSF_GROUP $@ 44 | 45 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.run_cases.LUAD.MGI.sh" 14 | source $PROJECT_CONFIG 15 | 16 | # installation location of this BICSEQ2 project 17 | BICSEQ_H="/gscuser/mwyczalk/projects/BICSEQ2" 18 | 19 | # Principal workflow output directory. this should be defined in project_config-host 20 | OUTBASE_H="/gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp" 21 | OUTD_H="$OUTBASE_H/$PROJECT" 22 | 23 | CASELIST="dat/CaseList.dat" 24 | 25 | # -M for MGI 26 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 27 | 28 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/README.md: -------------------------------------------------------------------------------- 1 | Develop and test running of multiple cases on MGI 2 | 3 | ## 1. make list of case names 4 | First, create list of cases which we will process. For instance, LUAD WGS hg38 cases: 5 | ``` 6 | grep LUAD ~/projects/CPTAC3/CPTAC3.catalog/MGI.BamMap.dat | grep WGS | grep hg38 | cut -f 2 | sort -u | head > dat/case_names.dat 7 | ``` 8 | 9 | ## 2. Make Dockermap file 10 | Dockermap file which provides mapping from host to container paths. 11 | 12 | ## 3. Make CaseList file 13 | CaseList file contains BAM paths (mapped to container) and other details to process a given case. 14 | Reads in BamMap and a list of case names. 15 | 16 | 17 | # Testing 18 | 19 | Launched first 5 cases 1/12/19 with, 20 | head -n 5 dat/case_names.dat | bash A.process_project_cases.sh - 21 | 22 | Follow first one along here: 23 | /gscmnt/gc2508/dinglab/mwyczalk/BICSEQ2-dev.tmp/run_cases.LUAD.MGI/11LU013/bsub/1547328929.err 24 | 25 | 26 | # Running 27 | 28 | ``` 29 | bash B.evaluate_project_cases.sh -f not_started -u | bash A.process_project_cases.sh - 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /gscmnt/gc2619/dinglab_cptac3/GDC_import/data:/import1 2 | /gscmnt/gc2741/ding/CPTAC3-data/GDC_import/data:/import2 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.LUAD.MGI/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | 11LU013 2 | 11LU016 3 | 11LU022 4 | 11LU035 5 | C3L-00001 6 | C3L-00009 7 | C3L-00080 8 | C3L-00083 9 | C3L-00093 10 | C3L-00094 11 | C3L-00095 12 | C3L-00140 13 | C3L-00144 14 | C3L-00263 15 | C3L-00279 16 | C3L-00368 17 | C3L-00412 18 | C3L-00422 19 | C3L-00510 20 | C3L-00604 21 | C3L-00893 22 | C3L-00913 23 | C3L-00973 24 | C3L-01330 25 | C3L-01632 26 | C3L-01682 27 | C3L-01683 28 | C3L-01862 29 | C3L-01889 30 | C3L-01890 31 | C3L-01924 32 | C3L-02219 33 | C3L-02345 34 | C3L-02348 35 | C3L-02350 36 | C3L-02365 37 | C3L-02508 38 | C3L-02549 39 | C3N-00167 40 | C3N-00169 41 | C3N-00175 42 | C3N-00180 43 | C3N-00199 44 | C3N-00203 45 | C3N-00217 46 | C3N-00223 47 | C3N-00293 48 | C3N-00294 49 | C3N-00433 50 | C3N-00545 51 | C3N-00546 52 | C3N-00547 53 | C3N-00549 54 | C3N-00550 55 | C3N-00551 56 | C3N-00552 57 | C3N-00556 58 | C3N-00559 59 | C3N-00560 60 | C3N-00572 61 | C3N-00574 62 | C3N-00578 63 | C3N-00579 64 | C3N-00580 65 | C3N-00704 66 | C3N-00737 67 | C3N-00738 68 | C3N-00959 69 | C3N-01016 70 | C3N-01019 71 | C3N-01021 72 | C3N-01022 73 | C3N-01023 74 | C3N-01024 75 | C3N-01030 76 | C3N-01071 77 | C3N-01072 78 | C3N-01074 79 | C3N-01405 80 | C3N-01408 81 | C3N-01409 82 | C3N-01410 83 | C3N-01413 84 | C3N-01414 85 | C3N-01415 86 | C3N-01416 87 | C3N-01488 88 | C3N-01489 89 | C3N-01799 90 | C3N-01823 91 | C3N-01842 92 | C3N-02000 93 | C3N-02002 94 | C3N-02003 95 | C3N-02067 96 | C3N-02087 97 | C3N-02088 98 | C3N-02089 99 | C3N-02090 100 | C3N-02141 101 | C3N-02142 102 | C3N-02145 103 | C3N-02149 104 | C3N-02150 105 | C3N-02153 106 | C3N-02155 107 | C3N-02158 108 | C3N-02379 109 | C3N-02380 110 | C3N-02421 111 | C3N-02422 112 | C3N-02423 113 | C3N-02424 114 | C3N-02433 115 | C3N-02529 116 | C3N-02572 117 | C3N-02582 118 | C3N-02586 119 | C3N-02587 120 | C3N-02588 121 | C3N-02729 122 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | #grep Y2.b1 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | grep PDA /diskmnt/Projects/Users/dcui/Projects/Fusion_hg38/Data_locations/CPTAC3.catalog/katmai.BamMap.dat | grep WGS | cut -f 2 | sort | uniq > $OUT 11 | >&2 echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | #bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | bash src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP" 23 | fi 24 | 25 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 26 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 27 | 28 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 29 | 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/README.md: -------------------------------------------------------------------------------- 1 | Develop and test running of multiple cases on katmai 2 | 3 | # Configure scripts and config files 4 | 5 | ## 1. Edit `project_config-host.sh` 6 | 7 | First, create list of cases which we will process. 8 | 9 | 10 | 11 | ## 2. Make Dockermap file 12 | Dockermap file which provides mapping from host to container paths. 13 | 14 | ## 3. Make CaseList file 15 | CaseList file contains BAM paths (mapped to container) and other details to process a given case. 16 | Reads in BamMap and a list of case names. 17 | 18 | 19 | # Testing 20 | 21 | Good idea to test everything prior to run. Can do `dry run` to print out commands rather than executing them. The 22 | dry run argument -d can be repeated to get down to scripts which are called. the -1 argument will exit after one 23 | case is processed. 24 | 25 | ``` 26 | bash A.process_project_cases.sh -d1 - < dat/case_names.dat 27 | ``` 28 | 29 | 30 | # Running 31 | 32 | To launch all cases with 3 running at once: 33 | ``` 34 | bash A.process_project_cases.sh -J 3 - < dat/case_names.dat 35 | ``` 36 | 37 | Alternatively, use `B` to find cases to run 38 | ``` 39 | bash B.evaluate_project_cases.sh -f not_started -u | bash A.process_project_cases.sh - 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /diskmnt/Projects/cptac_downloads_6/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/dat/Dockermap_backup.dat: -------------------------------------------------------------------------------- 1 | /diskmnt/Projects/cptac_downloads_1/GDC_import/data:/import1 2 | /diskmnt/Projects/cptac_downloads_5/GDC_import/data:/import2 3 | /diskmnt/Projects/cptac_downloads_6/GDC_import/data:/import3 4 | /diskmnt/Projects/cptac/GDC_import/data:/import4 5 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-00017 2 | C3L-00102 3 | C3L-00189 4 | C3L-00277 5 | C3L-00395 6 | C3L-00401 7 | C3L-00589 8 | C3L-00598 9 | C3L-00599 10 | C3L-00622 11 | C3L-00625 12 | C3L-00640 13 | C3L-00819 14 | C3L-00881 15 | C3L-00885 16 | C3L-00928 17 | C3L-01031 18 | C3L-01036 19 | C3L-01051 20 | C3L-01052 21 | C3L-01053 22 | C3L-01054 23 | C3L-01328 24 | C3L-01453 25 | C3L-01598 26 | C3L-01637 27 | C3L-01662 28 | C3L-01687 29 | C3L-01689 30 | C3L-01702 31 | C3L-01703 32 | C3L-01971 33 | C3L-02109 34 | C3L-02112 35 | C3L-02115 36 | C3L-02116 37 | C3L-02118 38 | C3L-02463 39 | C3L-02606 40 | C3L-02610 41 | C3L-02613 42 | C3N-00198 43 | C3N-00249 44 | C3N-00302 45 | C3N-00303 46 | C3N-00436 47 | C3N-00511 48 | C3N-00512 49 | C3N-00513 50 | C3N-00514 51 | C3N-00516 52 | C3N-00517 53 | C3N-00518 54 | C3N-00954 55 | C3N-00957 56 | C3N-01011 57 | C3N-01165 58 | C3N-01166 59 | C3N-01167 60 | C3N-01168 61 | C3N-01169 62 | C3N-01375 63 | C3N-01378 64 | C3N-01379 65 | C3N-01380 66 | C3N-01381 67 | C3N-01382 68 | C3N-01383 69 | C3N-01388 70 | C3N-01389 71 | C3N-01502 72 | C3N-01714 73 | C3N-01715 74 | C3N-01716 75 | C3N-01997 76 | C3N-01998 77 | C3N-02069 78 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PDA.Y2.b2-noWXS/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="PDA.Y2.b2-noWXS" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/katmai.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | #BICSEQ_H="/home/mwyczalk_test/Projects/BICSEQ2" 15 | BICSEQ_H="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/BICSEQ2" 16 | 17 | # Principal workflow output directory. /data1 will map to $OUTD_H 18 | #OUTBASE_H="/diskmnt/Datasets/BICSEQ2-dev.tmp" 19 | OUTBASE_H="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/outputs" 20 | OUTD_H="$OUTBASE_H/$PROJECT" 21 | 22 | 23 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 24 | # data2: chrom reference (./hg38) and mappability 25 | DATA2="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs" 26 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 27 | DATA3="/diskmnt/Datasets/BICSEQ2-dev.tmp/cached.annotation" # /gencode.v29.annotation.hg38.p12.bed 28 | #DATA3="/diskmnt/Datasets/BICSEQ2-dev.tmp/cached.annotation" # /gencode.v29.annotation.hg38.p12.bed 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/mwyczalk/BICSEQ2" 33 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 $DATA5" 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 50" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-01048 2 | C3L-01049 3 | C3L-01061 4 | C3L-01142 5 | C3L-01146 6 | C3L-01149 7 | C3L-01154 8 | C3L-01155 9 | C3L-01156 10 | C3L-01157 11 | C3L-02617 12 | C3L-02642 13 | C3L-02704 14 | C3L-02705 15 | C3L-02707 16 | C3L-02708 17 | C3L-02900 18 | C3L-02955 19 | C3L-02970 20 | C3L-02984 21 | C3L-03260 22 | C3L-03266 23 | C3L-03387 24 | C3L-03390 25 | C3L-03392 26 | C3L-03400 27 | C3L-03405 28 | C3L-03407 29 | C3L-03681 30 | C3L-03727 31 | C3L-03728 32 | C3L-03744 33 | C3L-03748 34 | C3L-03968 35 | C3L-04084 36 | C3N-01851 37 | C3N-01852 38 | C3N-02190 39 | C3N-02255 40 | C3N-02256 41 | C3N-02727 42 | C3N-02769 43 | C3N-02770 44 | C3N-02782 45 | C3N-02783 46 | C3N-02784 47 | C3N-02785 48 | C3N-02786 49 | C3N-02788 50 | C3N-03070 51 | C3N-03088 52 | C3N-03180 53 | C3N-03182 54 | C3N-03183 55 | C3N-03184 56 | C3N-03186 57 | C3N-03188 58 | C3N-03473 59 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/UCEC.Discovery.22.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="PanCan58" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.PanCan58/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.PanCan58/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.PanCan58.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.PanCan58.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | OUT="dat/case_names.dat" 7 | 8 | grep UCEC $BAMMAP | grep WGS | grep hg38 | cut -f 2 | sort -u > $OUT 9 | >&2 echo Written to $OUT 10 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP" 23 | fi 24 | 25 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 26 | # PARALLEL_CASES=1; PARGS="-J $PARALLEL_CASES" 27 | 28 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 29 | 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/README.md: -------------------------------------------------------------------------------- 1 | Develop and test running of multiple cases on katmai 2 | 3 | # Configure scripts and config files 4 | 5 | ## 1. Edit `project_config.sh` 6 | 7 | First, create list of cases which we will process. 8 | 9 | 10 | 11 | ## 2. Make Dockermap file 12 | Dockermap file which provides mapping from host to container paths. 13 | 14 | ## 3. Make CaseList file 15 | CaseList file contains BAM paths (mapped to container) and other details to process a given case. 16 | Reads in BamMap and a list of case names. 17 | 18 | 19 | # Testing 20 | 21 | Good idea to test everything prior to run. Can do `dry run` to print out commands rather than executing them. The 22 | dry run argument -d can be repeated to get down to scripts which are called. the -1 argument will exit after one 23 | case is processed. 24 | 25 | ``` 26 | bash A.process_project_cases.sh -d1 - < dat/case_names.dat 27 | ``` 28 | 29 | 30 | # Running 31 | 32 | To launch all cases with 3 running at once: 33 | ``` 34 | bash A.process_project_cases.sh -J 3 - < dat/case_names.dat 35 | ``` 36 | 37 | Alternatively, use `B` to find cases to run 38 | ``` 39 | bash B.evaluate_project_cases.sh -f not_started -u | bash A.process_project_cases.sh - 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC-test.katmai/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | PROJECT="run_cases.UCEC-test" 5 | 6 | # All paths here are relative to host 7 | BAMMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/katmai.BamMap.dat" 8 | 9 | # Installation directory of BICSEQ2.DL 10 | BICSEQ_H="/home/mwyczalk_test/Projects/BICSEQ2" 11 | 12 | # Principal workflow output directory. /data1 will map to $OUTD_H 13 | OUTBASE_H="/diskmnt/Datasets/BICSEQ2-dev.tmp" 14 | OUTD_H="$OUTBASE_H/$PROJECT" 15 | 16 | 17 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 18 | # data2: chrom reference (./hg38) and mappability 19 | DATA2="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs" 20 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 21 | DATA3="/diskmnt/Datasets/BICSEQ2-dev.tmp/cached.annotation" # /gencode.v29.annotation.hg38.p12.bed 22 | 23 | # set this to 1 if running on MGI 24 | IS_MGI=0 25 | MGI_LSF_GROUP="/mwyczalk/BICSEQ2" 26 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/dat/0322.evaluate.v4.txt: -------------------------------------------------------------------------------- 1 | C3L-05571 execute_workflow.sh:complete 2 | C3L-05848 execute_workflow.sh:complete 3 | C3L-05849 execute_workflow.sh:complete 4 | C3L-00006 execute_workflow.sh:not_started 5 | C3L-00008 execute_workflow.sh:not_started 6 | C3L-00032 execute_workflow.sh:not_started 7 | C3L-00090 execute_workflow.sh:not_started 8 | C3L-00098 execute_workflow.sh:not_started 9 | C3L-00136 execute_workflow.sh:complete 10 | C3L-00137 execute_workflow.sh:complete 11 | C3L-00139 execute_workflow.sh:not_started 12 | C3L-00143 execute_workflow.sh:not_started 13 | C3L-00145 execute_workflow.sh:not_started 14 | C3L-00156 execute_workflow.sh:complete 15 | C3L-00157 execute_workflow.sh:complete 16 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/dat/backup/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/dat/start_docker_to_get_filesize.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/4E.cases_work_list.dat case_names.dat 2 | head -7 UCEC.rerun.analysis_description.dat > UCEC_3.analysis_description.dat 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="UCEC.rerun" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.UCEC.rerun.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC.rerun.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/dat/CaseList.dat: -------------------------------------------------------------------------------- 1 | # case sample_name_A data_path_A UUID_A sample_name_B data_path_B UUID_B 2 | C3L-05571 C3L-05571.WGS.T.hg38 /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/0a1d1971-388c-472e-a6ac-c3608817ee7d/63c942b4-6dcf-4ed7-8c57-d9216fb8ac87_wgs_gdc_realn.bam 0a1d1971-388c-472e-a6ac-c3608817ee7d C3L-05571.WGS.N.hg38 /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/04fe5225-2758-4691-b709-934aa7656050/2154e8df-970e-44cf-a0d9-ee0a6cc3a8f6_wgs_gdc_realn.bam 04fe5225-2758-4691-b709-934aa7656050 3 | C3L-05848 C3L-05848.WGS.T.hg38 /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/11884c29-689e-4c70-adfb-9490351d8c07/4371e677-059d-4c39-9ec3-fdbad29b064e_wgs_gdc_realn.bam 11884c29-689e-4c70-adfb-9490351d8c07 C3L-05848.WGS.N.hg38 /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/b9d8dbc3-28d5-4a81-99c1-88246d743f56/7d7cb0ec-0300-463a-bf17-b3b5126d3cba_wgs_gdc_realn.bam b9d8dbc3-28d5-4a81-99c1-88246d743f56 4 | C3L-05849 C3L-05849.WGS.T.hg38 /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/d3be3cd8-bc72-4e35-a6dd-1155cf4e9d0e/6966a7ce-9a95-4aa0-a41f-0ceb9404607b_wgs_gdc_realn.bam d3be3cd8-bc72-4e35-a6dd-1155cf4e9d0e C3L-05849.WGS.N.hg38 /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/5aa3512e-d383-486a-ac23-d66db53f7388/138f530d-492b-4fc7-aba2-be1c9dc10174_wgs_gdc_realn.bam 5aa3512e-d383-486a-ac23-d66db53f7388 5 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-05571 2 | C3L-05848 3 | C3L-05849 4 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="UCEC_3" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.UCEC_3.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.UCEC_3.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | #bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b1 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | #bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | bash src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP" 23 | fi 24 | 25 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 26 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 27 | 28 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 29 | 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/README.md: -------------------------------------------------------------------------------- 1 | Develop and test running of multiple cases on katmai 2 | 3 | # Configure scripts and config files 4 | 5 | ## 1. Edit `project_config-host.sh` 6 | 7 | First, create list of cases which we will process. 8 | 9 | 10 | 11 | ## 2. Make Dockermap file 12 | Dockermap file which provides mapping from host to container paths. 13 | 14 | ## 3. Make CaseList file 15 | CaseList file contains BAM paths (mapped to container) and other details to process a given case. 16 | Reads in BamMap and a list of case names. 17 | 18 | 19 | # Testing 20 | 21 | Good idea to test everything prior to run. Can do `dry run` to print out commands rather than executing them. The 22 | dry run argument -d can be repeated to get down to scripts which are called. the -1 argument will exit after one 23 | case is processed. 24 | 25 | ``` 26 | bash A.process_project_cases.sh -d1 - < dat/case_names.dat 27 | ``` 28 | 29 | 30 | # Running 31 | 32 | To launch all cases with 3 running at once: 33 | ``` 34 | bash A.process_project_cases.sh -J 3 - < dat/case_names.dat 35 | ``` 36 | 37 | Alternatively, use `B` to find cases to run 38 | ``` 39 | bash B.evaluate_project_cases.sh -f not_started -u | bash A.process_project_cases.sh - 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /diskmnt/Projects/cptac_downloads_1/GDC_import/data:/import1 2 | /diskmnt/Projects/cptac_downloads_3/GDC_import/data:/import2 3 | /diskmnt/Projects/cptac_downloads_5/GDC_import/data:/import3 4 | /diskmnt/Projects/cptac_downloads_6/GDC_import/data:/import4 5 | /diskmnt/Projects/cptac/GDC_import/data:/import5 6 | /diskmnt/Projects/cptac_scratch/GDC_import/data:/import6 7 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-00081 2 | C3L-00104 3 | C3L-00365 4 | C3L-00415 5 | C3L-00503 6 | C3L-00568 7 | C3L-00603 8 | C3L-00674 9 | C3L-00677 10 | C3L-00904 11 | C3L-00923 12 | C3L-00927 13 | C3L-00965 14 | C3L-00993 15 | C3L-01040 16 | C3L-01043 17 | C3L-01045 18 | C3L-01046 19 | C3L-01285 20 | C3L-01327 21 | C3L-01455 22 | C3L-01606 23 | C3L-01663 24 | C3L-01834 25 | C3L-01838 26 | C3L-01887 27 | C3L-02041 28 | C3L-02465 29 | C3L-02504 30 | C3L-02542 31 | C3N-00204 32 | C3N-00211 33 | C3N-00247 34 | C3N-00295 35 | C3N-00297 36 | C3N-00299 37 | C3N-00300 38 | C3N-00306 39 | C3N-00307 40 | C3N-00309 41 | C3N-00497 42 | C3N-00498 43 | C3N-00519 44 | C3N-00555 45 | C3N-00661 46 | C3N-00662 47 | C3N-00663 48 | C3N-00665 49 | C3N-00822 50 | C3N-00825 51 | C3N-00828 52 | C3N-00829 53 | C3N-00839 54 | C3N-00846 55 | C3N-00857 56 | C3N-00871 57 | C3N-01017 58 | C3N-01018 59 | C3N-01025 60 | C3N-01028 61 | C3N-01192 62 | C3N-01194 63 | C3N-01196 64 | C3N-01334 65 | C3N-01337 66 | C3N-01338 67 | C3N-01339 68 | C3N-01364 69 | C3N-01366 70 | C3N-01367 71 | C3N-01368 72 | C3N-01369 73 | C3N-01411 74 | C3N-01505 75 | C3N-01515 76 | C3N-01517 77 | C3N-01518 78 | C3N-01620 79 | C3N-01645 80 | C3N-01798 81 | C3N-01814 82 | C3N-01815 83 | C3N-01816 84 | C3N-01818 85 | C3N-01846 86 | C3N-01856 87 | C3N-01857 88 | C3N-02181 89 | C3N-02183 90 | C3N-02185 91 | C3N-02186 92 | C3N-02188 93 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b1.katmai/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y2.b1" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/katmai.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/home/mwyczalk_test/Projects/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | #OUTBASE_H="/diskmnt/Datasets/BICSEQ2-dev.tmp" 18 | OUTBASE_H="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/outputs" 19 | OUTD_H="$OUTBASE_H/$PROJECT" 20 | 21 | 22 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 23 | # data2: chrom reference (./hg38) and mappability 24 | DATA2="/diskmnt/Projects/CPTAC3CNV/BICSEQ2/inputs" 25 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 26 | DATA3="/diskmnt/Datasets/BICSEQ2-dev.tmp/cached.annotation" # /gencode.v29.annotation.hg38.p12.bed 27 | 28 | # set this to 1 if running on MGI 29 | IS_MGI=0 30 | MGI_LSF_GROUP="/mwyczalk/BICSEQ2" 31 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | #bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | bash src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP" 23 | fi 24 | 25 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 26 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 27 | 28 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 29 | 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/README.md: -------------------------------------------------------------------------------- 1 | Develop and test running of multiple cases on katmai 2 | 3 | # Configure scripts and config files 4 | 5 | ## 1. Edit `project_config-host.sh` 6 | 7 | First, create list of cases which we will process. 8 | 9 | 10 | 11 | ## 2. Make Dockermap file 12 | Dockermap file which provides mapping from host to container paths. 13 | 14 | ## 3. Make CaseList file 15 | CaseList file contains BAM paths (mapped to container) and other details to process a given case. 16 | Reads in BamMap and a list of case names. 17 | 18 | 19 | # Testing 20 | 21 | Good idea to test everything prior to run. Can do `dry run` to print out commands rather than executing them. The 22 | dry run argument -d can be repeated to get down to scripts which are called. the -1 argument will exit after one 23 | case is processed. 24 | 25 | ``` 26 | bash A.process_project_cases.sh -d1 - < dat/case_names.dat 27 | ``` 28 | 29 | 30 | # Running 31 | 32 | To launch all cases with 3 running at once: 33 | ``` 34 | bash A.process_project_cases.sh -J 3 - < dat/case_names.dat 35 | ``` 36 | 37 | Alternatively, use `B` to find cases to run 38 | ``` 39 | bash B.evaluate_project_cases.sh -f not_started -u | bash A.process_project_cases.sh - 40 | ``` 41 | 42 | To generate analysis description file: https://docs.google.com/document/d/1Ho5cygpxd8sB_45nJ90d15DcdaGCiDqF0_jzIcc-9B4/edit 43 | ``` 44 | bash C.make_analysis_description.sh 45 | ``` 46 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /diskmnt/Projects/cptac3_primary_1/GDC_import/data:/import1 2 | /diskmnt/Projects/cptac_downloads_6/GDC_import/data:/import2 3 | /diskmnt/Projects/cptac_downloads_7/GDC_import/data:/import3 4 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y2.b2.katmai/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y2.b2" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/katmai.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/home/mwyczalk_test/Projects/CPTAC3/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/diskmnt/Projects/cptac_scratch/CPTAC3_analysis/Somatic_CNV/BICSEQ2/Y2.b2/scripts" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/diskmnt/Projects/cptac_scratch/CPTAC3_analysis/Somatic_CNV/BICSEQ2/Y2.b2/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | 21 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 22 | # data2: chrom reference (./hg38) and mappability 23 | DATA2="/diskmnt/Datasets/BICSEQ2/inputs" 24 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 25 | DATA3="/diskmnt/Datasets/BICSEQ2-dev.tmp/cached.annotation" # /gencode.v29.annotation.hg38.p12.bed 26 | 27 | # set this to 1 if running on MGI 28 | IS_MGI=0 29 | MGI_LSF_GROUP="/mwyczalk/BICSEQ2" 30 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 $DATA5" 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 50" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-00165 2 | C3L-00966 3 | C3L-00968 4 | C3L-00969 5 | C3L-00970 6 | C3L-00976 7 | C3L-00980 8 | C3L-00981 9 | C3L-00982 10 | C3L-00991 11 | C3L-01033 12 | C3L-01034 13 | C3L-01102 14 | C3L-01106 15 | C3L-01107 16 | C3L-01279 17 | C3L-01665 18 | C3L-01676 19 | C3L-01681 20 | C3L-01869 21 | C3L-01929 22 | C3L-01951 23 | C3L-01952 24 | C3L-01954 25 | C3L-01958 26 | C3L-01963 27 | C3L-01976 28 | C3L-01978 29 | C3L-02175 30 | C3L-02176 31 | C3L-02177 32 | C3L-02201 33 | C3L-02202 34 | C3L-02208 35 | C3L-02211 36 | C3L-02214 37 | C3L-02220 38 | C3L-02346 39 | C3L-02352 40 | C3L-02364 41 | C3L-02544 42 | C3L-02551 43 | C3L-02553 44 | C3L-02556 45 | C3L-02841 46 | C3L-02842 47 | C3L-02856 48 | C3L-02857 49 | C3N-00647 50 | C3N-00715 51 | C3N-00721 52 | C3N-01077 53 | C3N-01078 54 | C3N-01080 55 | C3N-01081 56 | C3N-01082 57 | C3N-01083 58 | C3N-01085 59 | C3N-01086 60 | C3N-01088 61 | C3N-01090 62 | C3N-01091 63 | C3N-01536 64 | C3N-01654 65 | C3N-01655 66 | C3N-01656 67 | C3N-01657 68 | C3N-01806 69 | C3N-01828 70 | C3N-01902 71 | C3N-01905 72 | C3N-01989 73 | C3N-02005 74 | C3N-02015 75 | C3N-02063 76 | C3N-02066 77 | C3N-02068 78 | C3N-02070 79 | C3N-02082 80 | C3N-02084 81 | C3N-02085 82 | C3N-02160 83 | C3N-02224 84 | C3N-02248 85 | C3N-02262 86 | C3N-02263 87 | C3N-02264 88 | C3N-02265 89 | C3N-02266 90 | C3N-02332 91 | C3N-02338 92 | C3N-02429 93 | C3N-02431 94 | C3N-02439 95 | C3N-02577 96 | C3N-02723 97 | C3N-02726 98 | C3N-02761 99 | C3N-02763 100 | C3N-02811 101 | C3N-02945 102 | C3N-03018 103 | C3N-03019 104 | C3N-03020 105 | C3N-03021 106 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/Y3.620.WGS_CNV_Somatic.cases_work_list.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="WGS_CNV_Somatic.Y3.620" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.620/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.620/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.620.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.620/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 $DATA5" 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 75" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-00966 2 | C3L-00969 3 | C3L-01106 4 | C3L-01963 5 | C3N-01081 6 | C3N-01989 7 | C3N-02224 8 | C3N-02248 9 | C3N-02262 10 | C3N-02263 11 | C3N-02265 12 | C3N-02338 13 | C3N-02429 14 | C3N-02763 15 | C3N-02811 16 | C3N-03018 17 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/Y3.620.WGS_CNV_Somatic.cases_work_list.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="WGS_CNV_Somatic.Y3.620.rerun1" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.620/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.620/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.620.rerun1.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.620.rerun1.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.620/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 $DATA5" 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 50" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/4E.cases_work_list.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="WGS_CNV_Somatic.Y3.UCEC.178" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.UCEC.178.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.178.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 $DATA5" 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 50" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3N-00321 2 | C3N-00322 3 | C3N-00323 4 | C3N-00324 5 | C3N-00326 6 | C3N-00328 7 | C3N-00333 8 | C3N-00334 9 | C3N-00335 10 | C3N-00337 11 | C3N-00339 12 | C3N-00340 13 | C3N-00377 14 | C3N-00379 15 | C3N-00383 16 | C3N-00386 17 | C3N-00388 18 | C3N-00389 19 | C3N-00836 20 | C3N-00847 21 | C3N-00848 22 | C3N-00850 23 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/UCEC.Discovery.22.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="UCEC.Discovery.22" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.Discovery.22/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.Discovery.22/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.UCEC.Discovery.22.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.WGS_CNV_Somatic.Y3.UCEC.178/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/dat/case_names.dat: -------------------------------------------------------------------------------- 1 | C3L-03356 2 | C3L-03630 3 | C3L-03371 4 | C3L-03388 5 | C3N-01900 6 | C3L-03628 7 | C3N-01907 8 | C3L-02809 9 | C3L-03743 10 | C3L-02897 11 | C3L-01037 12 | C3L-03635 13 | C3L-03632 14 | C3L-01124 15 | C3L-04072 16 | C3L-02604 17 | C3L-04853 18 | C3N-01719 19 | C3N-00709 20 | C3L-02701 21 | C3N-01012 22 | C3L-02899 23 | C3L-02890 24 | C3L-03129 25 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cat Weirdp1.txt | cut -f 1 | grep -v Sample > case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y3.b1.PDA.rerun" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1" 27 | 28 | # set this to 1 if running on MGI 29 | IS_MGI=0 30 | MGI_LSF_GROUP="/yigewu/bicseq2" 31 | 32 | # set this to 1 if running on Compute1 33 | IS_COMPUTE1=1 34 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun.compute1/start_docker_interactive.sh: -------------------------------------------------------------------------------- 1 | export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/:/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/:/storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data /home/yigewu:/home/yigewu" 2 | bsub -Is -a 'docker1(mwyczalkowski/bicseq2)' bash 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 32" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cat Weirdp1.txt | cut -f 1 | grep -v Sample > case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y3.b1.PDA.rerun2" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1" 27 | 28 | # set this to 1 if running on MGI 29 | IS_MGI=0 30 | MGI_LSF_GROUP="/yigewu/bicseq2" 31 | 32 | # set this to 1 if running on Compute1 33 | IS_COMPUTE1=1 34 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun2.compute1/start_docker_interactive.sh: -------------------------------------------------------------------------------- 1 | export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/:/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/:/storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data /home/yigewu:/home/yigewu" 2 | bsub -Is -a 'docker1(mwyczalkowski/bicseq2)' bash 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 32" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cat Weirdp1.txt | cut -f 1 | grep -v Sample > case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y3.b1.PDA.rerun3" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1" 27 | 28 | # set this to 1 if running on MGI 29 | IS_MGI=0 30 | MGI_LSF_GROUP="/yigewu/bicseq2" 31 | 32 | # set this to 1 if running on Compute1 33 | IS_COMPUTE1=1 34 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun3.compute1/start_docker_interactive.sh: -------------------------------------------------------------------------------- 1 | export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/:/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/:/storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data /home/yigewu:/home/yigewu" 2 | bsub -Is -a 'docker1(mwyczalkowski/bicseq2)' bash 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general -G 50" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cat Weirdp1.txt | cut -f 1 | grep -v Sample > case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y3.b1.PDA.rerun4" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1" 27 | 28 | # set this to 1 if running on MGI 29 | IS_MGI=0 30 | MGI_LSF_GROUP="/yigewu/bicseq2" 31 | 32 | # set this to 1 if running on Compute1 33 | IS_COMPUTE1=1 34 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.PDA.rerun4.compute1/start_docker_interactive.sh: -------------------------------------------------------------------------------- 1 | export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/:/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/:/storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data /home/yigewu:/home/yigewu" 2 | bsub -Is -a 'docker1(mwyczalkowski/bicseq2)' bash 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | 8 | >&2 echo Written to $OUT 9 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/A.process_project_cases.sh: -------------------------------------------------------------------------------- 1 | # start processing of list of cases 2 | 3 | # Usage: 4 | # A.process_project_cases.sh [options] CASE1 CASE2 ... 5 | # or 6 | # cat CASES | A.process_project_cases.sh [options] - 7 | # 8 | # with CASES a list of case names. All options passed to src/process_cases.sh 9 | 10 | # Project config path is on host, and may be relative. Will be mounted as a file /project_config.sh 11 | PROJECT_CONFIG="./project_config.sh" 12 | source project_config-host.sh 13 | 14 | CASELIST="dat/CaseList.dat" 15 | DOCKERMAP="dat/Dockermap.dat" 16 | 17 | # DATAMAP lists directories mapped to /data1, /data2, etc. 18 | DATAMAP=" $OUTD_H $DATA2 $DATA3 $DATA4 " 19 | 20 | if [ $IS_MGI == 1 ]; then 21 | # -M for MGI 22 | MGI_ARGS="-M -g $MGI_LSF_GROUP -q research-hpc" 23 | fi 24 | 25 | if [ $IS_COMPUTE1 == 1 ]; then 26 | # -M for MGI 27 | COMPUTE1_ARGS="-Z -g $COMPUTE1_LSF_GROUP -q general" 28 | fi 29 | 30 | # If PARALLEL_CASES is not defined, on non-MGI run jobs sequentially 31 | PARALLEL_CASES=20; PARGS="-J $PARALLEL_CASES" 32 | 33 | bash $BICSEQ_H/src/process_cases.sh $MGI_ARGS $COMPUTE1_ARGS $PARGS -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST -m $DOCKERMAP -P "$DATAMAP" $@ 34 | 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/B.evaluate_project_cases.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_cases.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $CASES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/TODO: -------------------------------------------------------------------------------- 1 | check samples 2 | C3L-02617 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/dat/case_names_not_run.dat: -------------------------------------------------------------------------------- 1 | hg38 C3L-02617 WGS blood_normal sample not found in /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat 2 | hg38 C3N-02727 WGS blood_normal sample not found in /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="Y3.b1" 5 | PROJECT="run_cases."$BATCH_NAME 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.Y3.b1/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/run_cases.Y3.b1.compute1" 27 | 28 | # set this to 1 if running on MGI 29 | IS_MGI=0 30 | MGI_LSF_GROUP="/yigewu/bicseq2" 31 | 32 | # set this to 1 if running on Compute1 33 | IS_COMPUTE1=1 34 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 35 | -------------------------------------------------------------------------------- /testing/docker_call/run_cases.Y3.b1.compute1/start_docker_interactive.sh: -------------------------------------------------------------------------------- 1 | export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/:/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/:/storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data /home/yigewu:/home/yigewu" 2 | bsub -Is -a 'docker1(mwyczalkowski/bicseq2)' bash 3 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | SAMPLES="dat/tumor_sample_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | #bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | bash $BICSEQ_H/src/make_dockermap_bytumorsample.sh -b $BAMMAP - < $SAMPLES > $OUT 8 | 9 | >&2 echo Written to $OUT 10 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | SAMPLES="dat/tumor_sample_names.dat" 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | #bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | bash $BICSEQ_H/src/make_case_list_bytumorsample.sh -b $BAMMAP -m $DOCKERMAP - < $SAMPLES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/B.evaluate_tumorsamples.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_tumorsamples.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | SAMPLES="dat/tumor_sample_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description_bytumorsample.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $SAMPLES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/4E.cases_work_list.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="CCRCC.ITH" 5 | PROJECT="run_samples."$BATCH_NAME".compute1" 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/${PROJECT}" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/1_make_case_names.sh: -------------------------------------------------------------------------------- 1 | # Make a list of case names 2 | # Specifically, looking for all UCEC cases with WGS hg38 data 3 | 4 | source project_config-host.sh 5 | 6 | mkdir -p dat 7 | OUT="dat/case_names.dat" 8 | 9 | grep Y2.b2 $CASEMAP | cut -f 1 | sort -u > $OUT 10 | >&2 echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/2_make_dockermap.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | SAMPLES="dat/tumor_sample_names.dat" 4 | OUT="dat/Dockermap.dat" 5 | 6 | #bash $BICSEQ_H/src/make_dockermap.sh -b $BAMMAP - < $CASES > $OUT 7 | bash $BICSEQ_H/src/make_dockermap_bytumorsample.sh -b $BAMMAP - < $SAMPLES > $OUT 8 | 9 | >&2 echo Written to $OUT 10 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/3_make_case_list.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | CASES="dat/case_names.dat" 4 | SAMPLES="dat/tumor_sample_names.dat" 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/CaseList.dat" 7 | 8 | #bash $BICSEQ_H/src/make_case_list.sh -b $BAMMAP -m $DOCKERMAP - < $CASES > $OUT 9 | bash $BICSEQ_H/src/make_case_list_bytumorsample.sh -b $BAMMAP -m $DOCKERMAP - < $SAMPLES > $OUT 10 | 11 | echo Written to $OUT 12 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/B.evaluate_tumorsamples.sh: -------------------------------------------------------------------------------- 1 | # Evaluate processing status for list of cases. Runs on host 2 | 3 | # Usage: 4 | # B.evaluate_project_cases.sh [options] 5 | # 6 | # Evaluate status of case processing. All options passed to src/evaluate_cases.sh 7 | # Reads host-directory log files and output directories to indicate status of each case. Status may be one of, 8 | # * not_started - ready to begin processing, not yet started 9 | # * running - processing is being performed 10 | # * complete - processing has completed 11 | # * error - processing has completed with an error 12 | 13 | PROJECT_CONFIG="./project_config.sh" 14 | source project_config-host.sh 15 | 16 | CASELIST="dat/CaseList.dat" 17 | 18 | # -M for MGI 19 | bash $BICSEQ_H/src/evaluate_tumorsamples.sh -M -L $OUTD_H -p $PROJECT_CONFIG -S $CASELIST $@ 20 | 21 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/C.make_analysis_description.sh: -------------------------------------------------------------------------------- 1 | source project_config-host.sh 2 | 3 | SAMPLES="dat/tumor_sample_names.dat" 4 | 5 | DOCKERMAP="dat/Dockermap.dat" 6 | OUT="dat/"$BATCH_NAME".analysis_description.dat" 7 | 8 | bash $BICSEQ_H/src/make_analysis_description_bytumorsample.sh -b $BAMMAP -O $OUTD_H -m $DOCKERMAP - < $SAMPLES > $OUT 9 | 10 | echo Written to $OUT 11 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/TODO: -------------------------------------------------------------------------------- 1 | # Error? It shows error in the B script but the get unique outputs look ok 2 | 3 | [ Thu Jul 23 23:45:40 UTC 2020 ] All jobs launched. Waiting for them to complete 4 | Traceback (most recent call last): 5 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 385, i 6 | n 7 | main(sys.argv[1:]) 8 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 378, i 9 | n main 10 | if driverMonitor.start_monitor() < 0 : 11 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 175, i 12 | n start_monitor 13 | if self.__collect_rusage() < 0 : 14 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 225, i 15 | n __collect_rusage 16 | self.dbHandler.update_db(resources) 17 | File "/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/etc/docker1_monitor.py", line 131, i 18 | n update_db 19 | conn.rollback() 20 | sqlite3.OperationalError: cannot rollback - no transaction is active 21 | [ Fri Jul 24 05:08:35 UTC 2020 ] All jobs have completed, written to /data1/C3N-01719/unique_reads 22 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/dat/Dockermap.dat: -------------------------------------------------------------------------------- 1 | /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data:/import1 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/dat/tumor_sample_names.dat: -------------------------------------------------------------------------------- 1 | C3L-00968.WGS.T.hg38 2 | C3N-01083.WGS.T.hg38 3 | C3N-02224.WGS.T.hg38 4 | C3N-02265.WGS.T.hg38 5 | C3N-02338.WGS.T.hg38 6 | C3N-02429.WGS.T.hg38 7 | C3L-00581.WGS.T.hg38 8 | C3L-01560.WGS.T.hg38 9 | C3N-00194.WGS.T.HET_BL42vN.hg38 10 | C3N-00494.WGS.T.HET_oQ9OMK.hg38 11 | C3N-00194.WGS.T.HET_oQLVMY.hg38 12 | C3N-00150.WGS.T.HET_qo3N87.hg38 13 | C3N-00312.WGS.T.HET_r05gGk.hg38 14 | C3N-00312.WGS.T.HET_r05vJW.hg38 15 | C3L-00583.WGS.T.HET_r28DqE.hg38 16 | C3N-00168.WGS.T.HET_r2VOW4.hg38 17 | C3N-00168.WGS.T.HET_r2VP3K.hg38 18 | C3N-00577.WGS.T.HET_r5Nwy2.hg38 19 | C3L-00606.WGS.T.HET_r6mAW4.hg38 20 | C3L-00606.WGS.T.HET_r6mD5p.hg38 21 | C3N-00148.WGS.T.HET_r6wDN2.hg38 22 | C3L-00447.WGS.T.HET_rkL2E4.hg38 23 | C3L-00418.WGS.T.HET_rkwKwB.hg38 24 | C3L-01861.WGS.T.HET_rlrBXK.hg38 25 | C3N-00149.WGS.T.HET_rnlJZK.hg38 26 | C3N-00149.WGS.T.HET_rnlOL4.hg38 27 | C3L-01836.WGS.T.HET_ro9412.hg38 28 | C3L-01286.WGS.T.HET_roYrkB.hg38 29 | C3L-00907.WGS.T.HET_rp9Omk.hg38 30 | C3N-00953.WGS.T.HET_rpvExK.hg38 31 | C3N-01524.WGS.T.HET_rvrNDp.hg38 32 | C3N-01522.WGS.T.HET_rvWV16.hg38 33 | C3N-00437.WGS.T.HET_voJ7MV.hg38 34 | C3N-01220.WGS.T.HET_wAgPl1.hg38 35 | C3N-00733.WGS.T.HET_x2YPQz.hg38 36 | C3N-01200.WGS.T.HET_xBGLLn.hg38 37 | C3N-01200.WGS.T.HET_xBGV2J.hg38 38 | C3N-00390.WGS.T.HET_xnE37P.hg38 39 | C3N-00390.WGS.T.HET_xnEk99.hg38 40 | C3N-00317.WGS.T.HET_xv7PEE.hg38 41 | C3N-00314.WGS.T.HET_xvVWjl.hg38 42 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/dat/worklog: -------------------------------------------------------------------------------- 1 | cp /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/Case_Lists/4E.cases_work_list.dat case_names.dat 2 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/project_config-host.sh: -------------------------------------------------------------------------------- 1 | # Define host-specific project paths and configuration 2 | # This is for katmai 3 | 4 | BATCH_NAME="CCRCC.ITH.rerun" 5 | PROJECT="run_samples."$BATCH_NAME".compute1" 6 | 7 | # All paths here are relative to host 8 | BAMMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/BamMap/storage1.BamMap.dat" 9 | 10 | # The list of case list 11 | CASEMAP="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/CPTAC3.catalog/CPTAC3.cases.dat" 12 | 13 | # Installation directory of BICSEQ2.DL 14 | BICSEQ_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/BICSEQ2" 15 | 16 | # Principal workflow output directory. /data1 will map to $OUTD_H 17 | OUTBASE_H="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/outputs" 18 | OUTD_H="$OUTBASE_H/$PROJECT" 19 | 20 | # Define directories to be mapped to /data2, etc. If more than DATA4, adjust call to process_cases.sh accordingly 21 | # data2: chrom reference (./hg38) and mappability 22 | DATA2="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/inputs/" 23 | # data3: gene annotation file. using updated one (19940 lines) copied from MGI 24 | DATA3="/storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/Datasets/cached.annotation" 25 | # data4: directory to the scripts for running the current batch, espcially the project config 26 | DATA4=${BICSEQ_H}"/testing/docker_call/${PROJECT}" 27 | ## data5: directory with all the scripts, including codes under src 28 | DATA5=${BICSEQ_H} 29 | 30 | # set this to 1 if running on MGI 31 | IS_MGI=0 32 | MGI_LSF_GROUP="/yigewu/bicseq2" 33 | 34 | # set this to 1 if running on Compute1 35 | IS_COMPUTE1=1 36 | COMPUTE1_LSF_GROUP="/yigewu/bicseq2" 37 | -------------------------------------------------------------------------------- /testing/docker_call/run_samples.CCRCC.ITH.rerun.compute1/start_docker_to_launch_jobs.compute1.sh: -------------------------------------------------------------------------------- 1 | bash /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/BATCH.UCEC.rerun/scripts/WUDocker/start_docker.sh -I mwyczalkowski/bicseq2 -M compute1 /storage1/fs1/dinglab/Active/Projects/CPTAC3/Analysis/WGS_CNV_Somatic/ /storage1/fs1/home1/Active/home/yigewu:/home/yigewu/ /storage1/fs1/m.wyczalkowski/Active/Primary/Resources/References/ /storage1/fs1/m.wyczalkowski/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ 2 | -------------------------------------------------------------------------------- /testing/test_data/chromosomes.18-20.dat: -------------------------------------------------------------------------------- 1 | chr18 2 | chr19 3 | chr20 4 | -------------------------------------------------------------------------------- /testing/test_data/chromosomes.20.dat: -------------------------------------------------------------------------------- 1 | chr20 2 | -------------------------------------------------------------------------------- /testing/test_data/chromosomes.8.11.dat: -------------------------------------------------------------------------------- 1 | 8 2 | 11 3 | -------------------------------------------------------------------------------- /testing/test_data/chromosomes.dat: -------------------------------------------------------------------------------- 1 | chr1 2 | chr2 3 | chr3 4 | chr4 5 | chr5 6 | chr6 7 | chr7 8 | chr8 9 | chr9 10 | chr10 11 | chr11 12 | chr12 13 | chr13 14 | chr14 15 | chr15 16 | chr16 17 | chr17 18 | chr18 19 | chr19 20 | chr20 21 | chr21 22 | chr22 23 | chrX 24 | chrY 25 | --------------------------------------------------------------------------------