├── ._README.MD
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.MD
├── build_pypi.sh
├── changeseq
    ├── NUC_SIMPLE
    ├── __init__.py
    ├── alignReads.py
    ├── callVariants.py
    ├── changeseq.py
    ├── findCleavageSites.py
    ├── log.py
    ├── mergeReads.py
    ├── referenceFree.py
    ├── refseq_gene_name.list
    ├── refseq_gene_name.py
    ├── test.yaml
    ├── utility.py
    ├── validation.py
    └── visualization.py
├── conda_build
    ├── conda_build_config.yaml
    └── meta.yaml
├── example_output.png
├── requirements.txt
├── scripts
    ├── NUC_SIMPLE
    ├── __init__.py
    ├── site_pvalue.R
    ├── test.py
    ├── test_align.py
    └── test_ga.py
├── setup.cfg
├── setup.py
└── test
    ├── CIRCLEseq_MergedTest.yaml
    ├── CIRCLEseq_StandardTest.yaml
    ├── __init__.py
    ├── data
        ├── MergedOutput
        │   ├── aligned
        │   │   ├── TestSample.bam
        │   │   ├── TestSample.bam.bai
        │   │   ├── TestSample.sam
        │   │   ├── TestSample_sorted.bam
        │   │   ├── control_TestSample.bam
        │   │   ├── control_TestSample.bam.bai
        │   │   ├── control_TestSample.sam
        │   │   └── control_TestSample_sorted.bam
        │   ├── fastq
        │   │   ├── TestSample_merged.fastq.gz
        │   │   └── control_TestSample_merged.fastq.gz
        │   ├── identified
        │   │   ├── TestSample_CONTROL_coordinates.txt
        │   │   ├── TestSample_NUCLEASE_coordinates.txt
        │   │   ├── TestSample_count.txt
        │   │   ├── TestSample_identified_matched.txt
        │   │   └── TestSample_identified_unmatched.txt
        │   └── visualization
        │   │   └── TestSample_offtargets.svg
        ├── StandardOutput
        │   ├── aligned
        │   │   ├── TestSample.bam
        │   │   ├── TestSample.bam.bai
        │   │   ├── TestSample.sam
        │   │   ├── TestSample_sorted.bam
        │   │   ├── control_TestSample.bam
        │   │   ├── control_TestSample.bam.bai
        │   │   ├── control_TestSample.sam
        │   │   └── control_TestSample_sorted.bam
        │   ├── identified
        │   │   ├── TestSample_CONTROL_coordinates.txt
        │   │   ├── TestSample_NUCLEASE_coordinates.txt
        │   │   ├── TestSample_count.txt
        │   │   ├── TestSample_identified_matched.txt
        │   │   └── TestSample_identified_unmatched.txt
        │   ├── variants
        │   │   ├── TestSample_Variants.txt
        │   │   └── TestSample_mpileupCall.txt
        │   └── visualization
        │   │   └── TestSample_offtargets.svg
        └── input
        │   ├── CIRCLEseq_test_genome.fa
        │   ├── CIRCLEseq_test_genome.fa.amb
        │   ├── CIRCLEseq_test_genome.fa.ann
        │   ├── CIRCLEseq_test_genome.fa.bwt
        │   ├── CIRCLEseq_test_genome.fa.fai
        │   ├── CIRCLEseq_test_genome.fa.pac
        │   ├── CIRCLEseq_test_genome.fa.sa
        │   ├── TEST.r1.fastq.gz
        │   ├── TEST.r2.fastq.gz
        │   ├── TEST_control.r1.fastq.gz
        │   └── TEST_control.r2.fastq.gz
    ├── scripts
        ├── CIRCLEseq_prepare_test_data.sh
        ├── CIRCLEseq_prepare_test_reference.sh
        ├── CIRCLEseq_test_bed.R
        └── Test.sh
    ├── test_circleseq_merged.py
    ├── test_circleseq_std.py
    └── utils.py


/._README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/._README.MD


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | dist/
 3 | changeseq.egg-info/
 4 | *.py[cod]
 5 | test/output
 6 | <<<<<<< HEAD
 7 | *.DS_Store
 8 | =======
 9 | .DS_Store
10 | >>>>>>> add-testing
11 | 
12 | # PyCharm Pref Folder
13 | .idea
14 | 
15 | # C extensions
16 | *.so
17 | 
18 | # Packages
19 | *.egg
20 | *.egg-info
21 | dist
22 | build
23 | eggs
24 | parts
25 | bin
26 | var
27 | sdist
28 | develop-eggs
29 | .installed.cfg
30 | lib64
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | 
35 | # Unit test / coverage reports
36 | .coverage
37 | .tox
38 | nosetests.xml
39 | htmlcov
40 | 
41 | # Translations
42 | *.mo
43 | 
44 | # Mr Developer
45 | .mr.developer.cfg
46 | .project
47 | .pydevproject
48 | 
49 | # Complexity
50 | output/*.html
51 | output/*/index.html
52 | 
53 | # Sphinx
54 | docs/_build
55 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.org
 2 | 
 3 | language: python
 4 | 
 5 | python:
 6 |   - "2.7"
 7 | 
 8 | before_install:
 9 |   - cd test
10 |   - git clone https://github.com/lh3/bwa.git
11 |   - cd bwa
12 |   - git checkout tags/v0.7.13
13 |   - make
14 |   - cd ..
15 |   - PATH=`pwd`/bwa:$PATH
16 |   - git clone https://github.com/samtools/htslib.git
17 |   - cd htslib
18 |   - git checkout tags/1.3
19 |   - make
20 |   - cd ..
21 |   - git clone https://github.com/samtools/samtools.git
22 |   - cd samtools
23 |   - git checkout tags/1.3
24 |   - make
25 |   - cd ..
26 |   - PATH=`pwd`/samtools:$PATH
27 |   - cd ..
28 |   - git clone git://github.com/samtools/htslib.git
29 |   - cd htslib
30 |   - git checkout tags/1.7
31 |   - cd ..
32 |   - git clone git://github.com/samtools/bcftools.git
33 |   - cd bcftools
34 |   - git checkout tags/1.6
35 |   - make
36 |   - cd ..
37 |   - PATH=`pwd`/bcftools:$PATH
38 | 
39 | # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
40 | install:
41 |   - pip install --upgrade pip setuptools wheel
42 |   - pip install --only-binary=numpy,scipy numpy scipy
43 |   - pip install -r requirements.txt
44 | 
45 | # command to run tests, e.g. python setup.py test
46 | script:
47 |     cd test && nosetests --exe -v
48 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
  1 | 
  2 | [![Version][version-shield]][version-url]
  3 | [![Python versions][python-shield]][python-url]
  4 | [![Platforms][platform-shield]][python-url]
  5 | 
  6 | 
  7 | # CHANGE-seq: Circularization for High-throughput Analysis Nuclease Genome-wide Effects by Sequencing
  8 | 
  9 | This is a repository for CHANGE-seq analytical software, which takes sample-specific paired-end FASTQ files as input and produces a list of CHANGE-seq detected off-target cleavage sites as output.
 10 | 
 11 | # Summary
 12 | 
 13 | This package implements a pipeline that takes in reads from the CHANGE-seq assay and returns detected cleavage sites as output. The individual pipeline steps are:
 14 | 
 15 | 1. **Merge**: Merge read1 an read2 for easier mapping to genome.
 16 | 2. **Read Alignment**: Merged paired end reads from the assay are aligned to the reference genome using the BWA-MEM algorithm with default parameters (Li. H, 2009).
 17 | 3. **Cleavage Site Identification**: Mapped sites are analyzed to determine which represent high-quality cleavage sites.
 18 | 4. **Visualization of Results**: Identified on-target and off-target cleavage sites are rendered as a color-coded alignment map for easy analysis of results.
 19 | 
 20 | # Installation
 21 | 
 22 | The most easiest way to install change-seq pipeline is via conda.
 23 | 
 24 | ```
 25 | 
 26 | conda create -n changeseq -c conda-forge -c bioconda -c anaconda -c omnia -c tsailabSJ changeseq
 27 | 
 28 | source activate changeseq
 29 | 
 30 | changeseq.py -h
 31 | 
 32 | ## BWA 0.7.17 and samtools 1.9 are automatically installed
 33 | 
 34 | ## If Homer is available, the identified off-targets will be annotated using "annotatePeaks.pl", specify the genome version in the YAML file.
 35 | 
 36 | 
 37 | ```
 38 | 
 39 | Alternatively, you can git clone this repository and install
 40 | 
 41 | ```
 42 | 
 43 | git clone https://github.com/tsailabSJ/changeseq
 44 | 
 45 | cd changeseq
 46 | 
 47 | pip install -r requirements.txt
 48 | 
 49 | python setup.py install
 50 | 
 51 | changeseq.py -h
 52 | 
 53 | ## Please install BWA and samtools if you choose this option
 54 | 
 55 | ```
 56 | 
 57 | ## Download Reference Genome
 58 | 
 59 | The CHANGEseq package requires a reference genome for read mapping. You can use any genome of your choosing, but for all of our testing and original CHANGE-seq analyses we use hg19 ([download](http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta)). Be sure to (g)unzip the FASTA file before use if it is compressed.
 60 | 
 61 | # Usage
 62 | 
 63 | The change-seq pipeline requires a manifest yaml file specifying input files, output directory, and pipeline parameters. Once the yaml file is created, users can simply run ``change_seq.py all --manifest /path/to/manifest.yaml``
 64 | 
 65 | 
 66 | Below is an example ``manifest.yaml`` file::
 67 | 
 68 |     reference_genome: /data/joung/genomes/Homo_sapiens_assembly19.fasta
 69 |     analysis_folder: /data/joung/CHANGE-Seq/test2
 70 |     
 71 |     bwa: bwa
 72 |     samtools: samtools
 73 |     
 74 |     read_threshold: 4
 75 |     window_size: 3
 76 |     mapq_threshold: 50
 77 |     start_threshold: 1
 78 |     gap_threshold: 3
 79 |     mismatch_threshold: 6
 80 |     search_radius: 30
 81 |     merged_analysis: True
 82 |     
 83 |     samples:
 84 |         U2OS_exp1_VEGFA_site_1:
 85 |             target: GGGTGGGGGGAGTTTGCTCCNGG
 86 |             read1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/1_S1_L001_R1_001.fastq.gz
 87 |             read2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/1_S1_L001_R2_001.fastq.gz
 88 |             controlread1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R1_001.fastq.gz
 89 |             controlread2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R2_001.fastq.gz
 90 |             description: U2OS_exp1
 91 |         U2OS_exp1_EMX1:
 92 |             target: GAGTCCGAGCAGAAGAAGAANGG
 93 |             read1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/2_S2_L001_R1_001.fastq.gz
 94 |             read2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/2_S2_L001_R2_001.fastq.gz
 95 |             controlread1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R1_001.fastq.gz
 96 |             controlread2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R2_001.fastq.gz
 97 |             description: U2OS_exp1
 98 | 
 99 | ## Quickstart
100 | 
101 | ```
102 | 
103 | git clone https://github.com/tsailabSJ/changeseq
104 | 
105 | cd changeseq/test
106 | 
107 | changeseq.py all --manifest CIRCLEseq_MergedTest.yaml
108 | 
109 | ```
110 | 
111 | ## Example Output
112 | 
113 | ![x](example_output.png)
114 | 
115 | # Writing A Manifest File
116 | When running the end-to-end analysis functionality of the CHANGEseq package a number of inputs are required. To simplify the formatting of these inputs and to encourage reproducibility, these parameters are inputted into the pipeline via a manifest formatted as a YAML file. YAML files allow easy-to-read specification of key-value pairs. This allows us to easily specify our parameters. The following fields are required in the manifest:
117 | 
118 | - `reference_genome`: The absolute path to the reference genome FASTA file.
119 | - `output_folder`: The absolute path to the folder in which all pipeline outputs will be saved.
120 | - `bwa`: The absolute path to the `bwa` executable
121 | - `samtools`: The absolute path to the `samtools` executable
122 | - `read_threshold`: The minimum number of reads at a location for that location to be called as a site. We recommend leaving it to the default value of 4.
123 | - `window_size`: Size of the sliding window, we recommend leaving it to the default value of 3.
124 | - `mapq_threshold`: Minimum read mapping quality score. We recommend leaving it to the default value of 50.
125 | - `start_threshold`: Tolerance for breakpoint location. We recommend leaving it to the default value of 1.
126 | - `gap_threshold`: Distance between breakpoints. We recommend leaving it to the default value of 3 for Cas9.
127 | - `mismatch_threshold`: Number of tolerated gaps in the fuzzy target search setp. We recommend leaving it to the default value of 6.
128 | - `read_length`: Fastq file read length, default is 151.
129 | - `PAM`: PAM sequence, default is NGG.
130 | - `genome`: used for homer peak annotation, e.g., hg19, hg38, mm9, or mm10.
131 | - `merged_analysis`: Whether or not the paired read merging step should takingTrue
132 | - `samples`: Lists the samples you wish to analyze and the details for each. Each sample name should be nested under the top level samples key, and each sample detail should be nested under the sample name. See the sample manifest for an example.
133 |     - For each sample, you must provide the following parameters:
134 |         - `target`: Target sequence for that sample. Accepts degenerate bases.
135 |         - `read1`: The absolute path to the .FASTQ(.gz) file containing the read1 reads.
136 |         - `read2`: The absolute path to the .FASTQ(.gz) file containing the read2 reads.
137 |         - `controlread1`: The absolute path to the .FASTQ(.gz) file containing the control read1 reads.
138 |         - `controlread2`: The absolute path to the .FASTQ(.gz) file containing the control read2 reads.
139 |         - `description`: A brief description of the sample
140 | 
141 | 
142 | # Pipeline Output
143 | When running the full pipeline, the results of each step are outputted to the `output_folder` in a separate folder for each step. The output folders and their respective contents are as follows:
144 | 
145 | - `output_folder/aligned`: Contains an alignment `.sam`, alignment `.bam`, sorted `bam`, and `.bai` index file for each sample.
146 | - `output_folder/fastq`: Merged `.fastq.gz` files for each sample.
147 | - `output_folder/identified`: Contains tab-delimited `.txt` files for each sample containing the identified DSBs, control DSBs, filtered DSBs, and read quantification.
148 | - `output_folder/visualization`: Contains a `.svg` vector image representing an alignment of all detected off-targets to the targetsite for each sample.
149 | 
150 | # FAQ
151 | 
152 | ## Homer installation
153 | 
154 | ```
155 | 
156 | conda install -c bioconda homer
157 | 
158 | # To install genome annotation
159 | # Ref: http://homer.ucsd.edu/homer/introduction/configure.html
160 | 
161 | ## Suppose you want to install hg19, follow the command here:
162 | 
163 | annotatePeaks.pl xxx hg19
164 | 
165 | ## You should be able to see:
166 | 
167 | !!!!Genome hg19 not found in /rgs01/project_space/tsaigrp/Genomics/common/anaconda3/envs/changeseq/share/homer-4.11-2/.//config.txt
168 | 
169 | 	To check if is available, run "perl /rgs01/project_space/tsaigrp/Genomics/common/anaconda3/envs/changeseq/share/homer-4.11-2/.//configureHomer.pl -list"
170 | 	If so, add it by typing "perl /rgs01/project_space/tsaigrp/Genomics/common/anaconda3/envs/changeseq/share/homer-4.11-2/.//configureHomer.pl -install hg19"
171 | 
172 | ## Copy and paste the perl command to install genome annotation
173 | ```
174 | 
175 | 
176 | 
177 | [version-shield]: https://img.shields.io/conda/v/tsailabsj/changeseq.svg
178 | [version-url]: https://anaconda.org/tsailabSJ/changeseq
179 | [python-shield]: https://img.shields.io/pypi/pyversions/changeseq.svg
180 | [python-url]: https://pypi.python.org/pypi/changeseq
181 | [platform-shield]: https://anaconda.org/tsailabsj/changeseq/badges/platforms.svg
182 | 


--------------------------------------------------------------------------------
/build_pypi.sh:
--------------------------------------------------------------------------------
1 | python setup.py sdist
2 | python setup.py bdist_wheel
3 | twine upload dist/*
4 | 
5 | 


--------------------------------------------------------------------------------
/changeseq/NUC_SIMPLE:
--------------------------------------------------------------------------------
 1 | #
 2 | # This matrix was created by Todd Lowe   12/10/92
 3 | #
 4 | # Uses ambiguous nucleotide codes, probabilities rounded to
 5 | #  nearest integer
 6 | #
 7 | # Lowest score = -4, Highest score = 5
 8 | #
 9 | # Modified by Shengdar Tsai 1/23/16
10 |     A   T   G   C   N
11 | A   10  -5  -5  -5   10
12 | T   -5  10  -5  -5   10
13 | G   -5  -5  10  -5   10
14 | C   -5  -5  -5  10   10
15 | N   10  10  10  10   10


--------------------------------------------------------------------------------
/changeseq/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Using __init__.py to organize the structure.
3 | """
4 | 
5 | __version__ = "1.2.9.1"


--------------------------------------------------------------------------------
/changeseq/alignReads.py:
--------------------------------------------------------------------------------
 1 | """
 2 | alignReads
 3 | """
 4 | 
 5 | from __future__ import print_function
 6 | 
 7 | import subprocess
 8 | import os
 9 | import logging
10 | 
11 | logger = logging.getLogger('root')
12 | logger.propagate = False
13 | 
14 | def alignReads(BWA_path, HG19_path, read1, read2, outfile):
15 | 
16 |     sample_name = os.path.basename(outfile).split('.')[0]
17 |     output_folder = os.path.dirname(outfile)
18 |     base_name = os.path.join(output_folder, sample_name)
19 |     sam_filename = outfile
20 |     bam_filename = base_name + '.bam'
21 | 
22 |     if not os.path.exists(output_folder):
23 |         os.makedirs(output_folder)
24 | 
25 |     # Check if genome is already indexed by bwa
26 |     index_files_extensions = ['.pac', '.amb', '.ann', '.bwt', '.sa']
27 | 
28 |     genome_indexed = True
29 |     for extension in index_files_extensions:
30 |         if not os.path.isfile(HG19_path + extension):
31 |             genome_indexed = False
32 |             break
33 | 
34 |     # If the genome is not already indexed, index it
35 |     if not genome_indexed:
36 |         logger.info('Genome index files not detected. Running BWA to generate indices.')
37 |         bwa_index_command = '{0} index {1}'.format(BWA_path, HG19_path)
38 |         logger.info('Running bwa command: %s', bwa_index_command)
39 |         subprocess.call(bwa_index_command.split())
40 |         logger.info('BWA genome index generated')
41 |     else:
42 |         logger.info('BWA genome index found.')
43 | 
44 |     # Run paired end alignment against the genome
45 |     logger.info('Running paired end mapping for {0}'.format(sample_name))
46 |     bwa_alignment_command = '{0} mem {1} {2} {3} > {4}'.format(BWA_path, HG19_path, read1, read2, sam_filename)
47 |     samtools_sam_to_bam_command = 'samtools sort -o {0} {1}'.format(bam_filename, sam_filename)
48 |     samtools_index_command = 'samtools index {0}'.format(bam_filename)
49 |     samtools_sort_by_name_command = 'samtools sort -o {0} -n {1}'.format("".join([base_name, '_sorted.bam']), bam_filename)
50 | 
51 |     # Open the outfile and redirect the output of the alignment to it.
52 |     logger.info(bwa_alignment_command)
53 |     subprocess.check_call(bwa_alignment_command, shell=True)
54 |     logger.info('Paired end mapping for {0} completed.'.format(sample_name))
55 | 
56 |     # Convert SAM to BAM file
57 |     logger.info(samtools_sam_to_bam_command)
58 |     subprocess.check_call(samtools_sam_to_bam_command, shell=True)
59 |     logger.info('Sorting by coordinate position for {0} complete.'.format(sample_name))
60 | 
61 |     # Index BAM file
62 |     logger.info(samtools_index_command)
63 |     subprocess.check_call(samtools_index_command, shell=True)
64 |     logger.info('Indexing for {0} complete.'.format(sample_name))
65 | 
66 |     # Sort BAM file by name
67 |     logger.info(samtools_sort_by_name_command)
68 |     subprocess.check_call(samtools_sort_by_name_command, shell=True)
69 |     logger.info('Sorting for {0} by name complete.'.format(sample_name))
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/changeseq/callVariants.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import subprocess
  4 | import sys
  5 | import os
  6 | import argparse
  7 | import regex
  8 | import re
  9 | import HTSeq
 10 | import pyfaidx
 11 | from findCleavageSites import get_sequence, regexFromSequence, alignSequences, reverseComplement, extendedPattern, realignedSequences
 12 | 
 13 | 
 14 | """
 15 | Run samtools:mpileup and get all identified variants in the window sequences
 16 | """
 17 | def snpCall(matched_file, reference, bam_file, out, search_radius):
 18 |     basename = os.path.basename(out)
 19 |     output_folder = os.path.dirname(out)
 20 | 
 21 |     # open matched file
 22 |     regions = list()
 23 |     with open(matched_file, 'rU') as f:
 24 |         f.readline()
 25 |         for line in f:
 26 |             site = line.strip().split('\t')
 27 |             #  chromosome, windowStart, windowEnd, strand, bam, region_basename (=Targetsite_Name)
 28 |             regions.append([site[0], int(site[6]) - search_radius, int(site[7]) + search_radius, '*', bam_file, '_'.join([site[26], site[3]])])
 29 | 
 30 |     print('Running samtools:mpileup for %s' % basename, file=sys.stderr)
 31 |     out_vcf = os.path.join(output_folder, basename + '_mpileup_output')
 32 |     if os.path.exists(out_vcf):
 33 |         subprocess.check_call('rm -r %s' % out_vcf, shell=True, env=os.environ.copy())
 34 |     os.makedirs(out_vcf)
 35 |     process_mpileup = open(os.path.join(out_vcf, 'logFile_mpileup'), 'w')
 36 | 
 37 |     for item in regions:
 38 |         chromosome, windowStart, windowEnd, strand, bam_file, region_basename = item
 39 |         region = '%s%s%s%s%s' % (chromosome, ":", int(windowStart), "-", int(windowEnd))
 40 |         output = os.path.join(out_vcf, region_basename + '.vcf')
 41 | 
 42 |         cl_vcf = 'samtools mpileup -v --region %s --fasta-ref %s %s > %s' % (region, reference, bam_file, output)
 43 |         subprocess.check_call(cl_vcf, shell=True, env=os.environ.copy(), stderr=process_mpileup, stdout=process_mpileup)
 44 |     process_mpileup.close()
 45 | 
 46 |     print('Collecting variants for %s' % basename, file=sys.stderr)
 47 |     out_bcf = os.path.join(output_folder, basename + '_output_bcftools')
 48 |     if os.path.exists(out_bcf):
 49 |         subprocess.check_call('rm -r %s' % out_bcf, shell=True, env=os.environ.copy())
 50 |     os.makedirs(out_bcf)
 51 |     process_bcftools = open(os.path.join(out_bcf, 'logFile_bcftools'), 'w')
 52 | 
 53 |     vcf_files = [f for f in os.listdir(out_vcf) if os.path.isfile(os.path.join(out_vcf, f))]
 54 |     for arch in vcf_files:
 55 |         if not arch.startswith('.') and arch.endswith('.vcf'):
 56 |             name = arch[:-4]
 57 |             output = os.path.join(out_bcf, name + '_BCFcall.vcf')
 58 | 
 59 |             cl_bcf = 'bcftools call -v -c %s > %s' % (os.path.join(out_vcf, arch), output)
 60 |             subprocess.check_call(cl_bcf, shell=True, env=os.environ.copy(), stderr=process_bcftools, stdout=process_bcftools)
 61 |     process_bcftools.close()
 62 | 
 63 |     print('Collecting significant variant calls for %s' % basename, file=sys.stderr)
 64 |     out_svc = os.path.join(output_folder, basename + '_output_svc')
 65 |     if os.path.exists(out_svc):
 66 |         subprocess.check_call('rm -r %s' % out_svc, shell=True, env=os.environ.copy())
 67 |     os.makedirs(out_svc)
 68 |     process_svc = open(os.path.join(out_svc, 'logFile_svc'), 'w')
 69 | 
 70 |     bcf_files = [f for f in os.listdir(out_bcf) if os.path.isfile(os.path.join(out_bcf, f))]
 71 |     for arch in bcf_files:
 72 |         if not arch.startswith('.') and arch.endswith('.vcf'):
 73 |             name = arch[:-12]
 74 |             output = os.path.join(out_svc, name + '_SIGNFcall.txt')
 75 | 
 76 |             cl_sed = "sed -n '/##/!p' %s | awk 'FNR>1' > %s" % (os.path.join(out_bcf, arch), output)
 77 |             subprocess.check_call(cl_sed, shell=True, env=os.environ.copy(), stderr=process_svc, stdout=process_svc)
 78 |     process_svc.close()
 79 | 
 80 |     print('Consolidating all the significant variant calls for %s' % basename, file=sys.stderr)
 81 |     header = ['targetsite', 'site_name', 'chromosome', 'one_based_position', 'reference', 'variant', 'quality', 'genotype', 'depth', 'PL']
 82 |     variants = list()
 83 | 
 84 |     svc_files = [f for f in os.listdir(out_svc) if os.path.isfile(os.path.join(out_svc, f))]
 85 |     for arch in svc_files:
 86 |         if not arch.startswith('.') and arch.endswith('.txt'):
 87 |             tag = arch[:-14]
 88 |             f = open(os.path.join(out_svc, arch), 'r')
 89 |             reads = f.readlines()
 90 |             f.close()
 91 | 
 92 |             for line in reads:
 93 |                 item = line.split()
 94 |                 if 'INDEL' in item[7]:
 95 |                     variants.append(
 96 |                         [basename, tag] + item[:2] + item[3:6] + [str(int(item[9][0])) + '|' + str(int(item[9][2]))] +
 97 |                         [item[7].split(';')[3][3:]] + ['_'.join(item[9][4:].split(','))])
 98 |                 else:
 99 |                     variants.append(
100 |                         [basename, tag] + item[:2] + item[3:6] + [str(int(item[9][0])) + '|' + str(int(item[9][2]))] +
101 |                         [item[7].split(';')[0][3:]] + ['_'.join(item[9][4:].split(','))])
102 | 
103 |     out_file = open(out + '_mpileupCall.txt', 'w')
104 |     print(*header, sep='\t', file=out_file)
105 |     for item in variants:
106 |         print(*item, sep='\t', file=out_file)
107 |     out_file.close()
108 | 
109 |     print('Cleaning up directive for %s' % basename, file=sys.stderr)
110 |     subprocess.check_call('rm -r %s' % out_vcf, shell=True, env=os.environ.copy())
111 |     subprocess.check_call('rm -r %s' % out_bcf, shell=True, env=os.environ.copy())
112 |     subprocess.check_call('rm -r %s' % out_svc, shell=True, env=os.environ.copy())
113 | 
114 |     print('Done running samtools:mpileup for %s' % basename, file=sys.stderr)
115 |     return variants
116 | 
117 | 
118 | """
119 | Obtain variant off-target sequences
120 | """
121 | def realignVariantBulge(bulge_sequence, window_sequence_variant, bulge_strand):
122 |     bseq = bulge_sequence.replace('-', '')
123 |     if bulge_strand == '+':
124 |         m_bulge = re.search(bseq, window_sequence_variant, re.I)
125 |     else:
126 |         m_bulge = re.search(bseq, reverseComplement(window_sequence_variant), re.I)
127 |     variant_bseq = m_bulge.group()
128 |     variant_bseq = variant_bseq[:bulge_sequence.find('-')] + '-' + variant_bseq[bulge_sequence.find('-'):]
129 |     return variant_bseq
130 | 
131 | 
132 | def SNPreader(snp_file):
133 |     ga = HTSeq.GenomicArray("auto", stranded=False, typecode='O')
134 | 
135 |     for snp in snp_file:
136 |         basename, snpID, chromosome, one_based_position, reference, variant, quality, genotype, depth, PL = snp
137 |         position = int(one_based_position) - 1
138 |         key = '_'.join([basename, chromosome])
139 |         ga[HTSeq.GenomicInterval(chromosome, position, position + 1, ".")] = [position, reference, variant, genotype, quality, key]
140 |     return ga
141 | 
142 | 
143 | def arrayOffTargets(matched_file, search_radius):
144 |     offtargets_dict = {}
145 |     gi_dict = {}
146 | 
147 |     with open(matched_file, 'r') as g:
148 |         g.readline()
149 |         for line in g:
150 |             site = line.strip().split('\t')
151 | 
152 |             Chromosome = site[0]
153 |             start = int(site[6]) - search_radius
154 |             end = int(site[7]) + search_radius
155 |             Name = site[3]
156 | 
157 |             offtargets_dict[Name] = site
158 | 
159 |             #  create a genomic interval for each window sequence
160 |             gi_dict[Name] = HTSeq.GenomicInterval(Chromosome, start, end, ".")
161 |     return offtargets_dict, gi_dict
162 | 
163 | 
164 | def snpAdjustment(matched_file, snp_file, out, mismatch_threshold, search_radius):
165 |     output_file = open(out + '_Variants.txt', 'w')
166 |     print('Chromosome', 'Start', 'End', 'Name', 'ReadCount', 'Strand',
167 |           'Variant_WindowSequence',
168 |           'Variant_Site_SubstitutionsOnly.Sequence', 'Variant_Site_SubstitutionsOnly.NumSubstitutions',
169 |           'Variant_Site_SubstitutionsOnly.Strand',
170 |           'Variant_Site_GapsAllowed.Sequence', 'Variant_Site_GapsAllowed.Length', 
171 |           'Variant_Site_GapsAllowed.Substitutions', 'Variant_Site_GapsAllowed.Insertions', 'Variant_Site_GapsAllowed.Deletions',
172 |           'Variant_Site_GapsAllowed.Strand',
173 |           'Cell', 'Targetsite', 'TargetSequence', 'Variant_RealignedTargetSequence',
174 |           'Reference', 'Variant', 'Genotype', 'Quality',
175 |           sep='\t', file=output_file)
176 |     output_file.close()
177 | 
178 |     basename = os.path.basename(out)
179 |     offtargets, gi_offtargets = arrayOffTargets(matched_file, search_radius)
180 |     ga_snp = SNPreader(snp_file)
181 | 
182 |     for name in offtargets:
183 |         variant_flag = False
184 |         site = offtargets[name]
185 |         gi = gi_offtargets[name]
186 | 
187 |         chromosome = site[0]
188 |         window_sequence = site[9]
189 |         window_sequence = window_sequence.upper()
190 |         cell, targetsite = site[25:27]
191 |         TargetSequence = site[28]
192 |         output01 = site[0:6]
193 |         output03 = [cell, targetsite, TargetSequence]
194 |         ots_nb, ots_bu = site[10], site[15]
195 | 
196 |         #  obtain variant window sequence
197 |         wkey = '_'.join([basename, chromosome])
198 |         insert_start, insert_end, insert_var, snp_data = list(), list(), list(), {}
199 | 
200 |         for i, v in ga_snp[gi].steps():
201 |             if v:
202 |                 position, reference, variant, genotype, quality, key = v
203 |                 if key == wkey:
204 |                     variant = variant.split(',')[0]
205 |                     for n, pos in enumerate(range(gi.start, gi.end)):
206 |                         if pos == int(position):
207 |                             insert_var.append(variant.lower())
208 |                             insert_start.append(n)
209 |                             end_pos = n + len(reference)
210 |                             insert_end.append(end_pos)
211 |                             snp_data[str(position)] = [position, reference, variant, genotype, quality]
212 | 
213 |         tri = 0
214 |         window_sequence_variant = ''
215 |         for i in range(len(insert_var)):
216 |             variant = insert_var[i]
217 |             pos = insert_start[i]
218 |             window_sequence_variant += window_sequence[tri:pos] + variant.lower()
219 |             tri = insert_end[i]
220 |         window_sequence_variant += window_sequence[tri:]
221 | 
222 |         #  variant off-target sequences: only proceed if there is a variant in the window sequence
223 |         window_sequence_var = window_sequence_variant.upper()
224 |         if window_sequence_var != window_sequence:
225 |             offtarget_sequence_no_bulge, mismatches, offtarget_sequence_length, chosen_alignment_strand_m, start_no_bulge, end_no_bulge, \
226 |             realigned_target, \
227 |             bulged_offtarget_sequence, length, score, substitutions, insertions, deletions, chosen_alignment_strand_b, bulged_start, bulged_end = \
228 |                 alignSequences(TargetSequence, window_sequence_var, max_score=mismatch_threshold)
229 | 
230 |             variant_ots_no_bulge, variant_ots_bulge = '', ''
231 | 
232 |             #  get variant sequence if the off-target sequences have changed by considering the variant window
233 |             if ots_nb != offtarget_sequence_no_bulge:
234 |                 variant_flag = True
235 |                 if chosen_alignment_strand_m == '+':
236 |                     m_no_bulge = re.search(offtarget_sequence_no_bulge, window_sequence_variant, re.I)
237 |                 else:
238 |                     m_no_bulge = re.search(offtarget_sequence_no_bulge, reverseComplement(window_sequence_variant), re.I)
239 |                 variant_ots_no_bulge = m_no_bulge.group()
240 | 
241 |             if ots_bu != bulged_offtarget_sequence:
242 |                 variant_flag = True
243 |                 variant_ots_bulge = realignVariantBulge(bulged_offtarget_sequence, window_sequence_variant, chosen_alignment_strand_b)
244 | 
245 |             # collect and write variant data if we have variant off-target sequence(s)
246 |             if variant_flag:
247 |                 total_genotype, total_reference, total_variant, total_quality = '', '', '', ''
248 |                 for pos in snp_data:
249 |                     position, reference, variant, genotype, quality = snp_data[pos]
250 |                     if total_genotype != '':
251 |                         total_genotype += ''.join([':', genotype])
252 |                         total_reference += ''.join([':', reference])
253 |                         total_variant += ''.join([':', variant])
254 |                         total_quality += ''.join([':', quality])
255 |                     else:
256 |                         total_genotype += ''.join([genotype])
257 |                         total_reference += ''.join([reference])
258 |                         total_variant += ''.join([variant])
259 |                         total_quality += ''.join([quality])
260 | 
261 |                 output02 = [variant_ots_no_bulge, mismatches, chosen_alignment_strand_m,
262 |                             variant_ots_bulge, length, substitutions, insertions, deletions, chosen_alignment_strand_b]
263 |                 output04 = [total_reference, total_variant, total_genotype, total_quality]
264 |                 output_line = output01 + [window_sequence_variant] + output02 + output03 + [realigned_target] + output04
265 | 
266 |                 with open(out + '_Variants.txt', 'a') as output_file:
267 |                     print(*output_line, sep='\t', file=output_file)
268 | 
269 | 
270 | """
271 | Main function
272 | """
273 | def getVariants(matched_file, ref, bam_file, out, search_radius, mismatch_threshold):
274 |     basename = os.path.basename(out)
275 |     output_folder = os.path.dirname(out)
276 |     if not os.path.exists(output_folder):
277 |         os.makedirs(output_folder)
278 | 
279 |     snp_file = snpCall(matched_file, ref, bam_file, out, search_radius)
280 | 
281 |     print('Obtaining Variant Off-Target Sequences for %s' % basename, file=sys.stderr)
282 |     snpAdjustment(matched_file, snp_file, out, mismatch_threshold, search_radius)
283 | 
284 | 
285 | def main():
286 |     parser = argparse.ArgumentParser(description='Implement samtools:mpileup to identify genomic variants and adjust the off-target sequence when required.')
287 |     parser.add_argument('--matched_file', help="full_path_to/matched file in 'identified' folder", required=True)
288 |     parser.add_argument('--ref', help="Reference Genome Fasta", required=True)
289 |     parser.add_argument('--bam', help="Sorted BAM file", required=True)
290 |     parser.add_argument('--search_radius', help="Search radius around the position window", default=20, type=int)
291 |     parser.add_argument('--mismatch_threshold', help='Maximum score threshold', default=7, type=int)
292 |     parser.add_argument('--out', help="Output file basename, with full path", required=True)
293 |     args = parser.parse_args()
294 | 
295 |     getVariants(args.matched_file, args.ref, args.bam, args.out, args.search_radius, args.mismatch_threshold)
296 | 
297 | if __name__ == "__main__":
298 |     main()
299 | 


--------------------------------------------------------------------------------
/changeseq/changeseq.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | circleseq.py as the wrapper for CIRCLE-seq analysis
  6 | """
  7 | 
  8 | from alignReads import alignReads
  9 | from visualization import visualizeOfftargets
 10 | from mergeReads import mergeReads
 11 | import argparse
 12 | import os
 13 | import sys
 14 | import subprocess
 15 | import traceback
 16 | import log
 17 | import yaml
 18 | import validation
 19 | import findCleavageSites
 20 | import callVariants
 21 | 
 22 | logger = log.createCustomLogger('root')
 23 | p_dir = os.path.dirname(os.path.realpath(__file__))
 24 | 
 25 | class CircleSeq:
 26 | 
 27 |     def __init__(self):
 28 |         self.search_radius = 20
 29 |         self.window_size = 3
 30 |         self.mapq_threshold = 50
 31 |         self.start_threshold = 1
 32 |         self.gap_threshold = 3
 33 |         self.mismatch_threshold = 6
 34 |         self.read_threshold = 6
 35 |         self.merged_analysis = True
 36 |         self.all_chromosomes = False
 37 |         self.variant_analysis = False
 38 |         self.genome = None
 39 |         self.refseq_names = None
 40 | 
 41 | 
 42 |     def parseManifest(self, manifest_path, sample='all'):
 43 |         logger.info('Loading manifest...')
 44 | 
 45 |         with open(manifest_path, 'r') as f:
 46 |             manifest_data = yaml.load(f)
 47 | 
 48 |         try:
 49 |             # Validate manifest data
 50 |             validation.validateManifest(manifest_data)
 51 | 
 52 |             self.BWA_path  = manifest_data['bwa']
 53 |             self.reference_genome = manifest_data['reference_genome']
 54 |             self.analysis_folder = manifest_data['analysis_folder']
 55 | 
 56 |             # Allow the user to specify read threshold, window_size and search_radius if they'd like
 57 |             if 'search_radius' in manifest_data:
 58 |                 self.search_radius = manifest_data['search_radius']
 59 |             if 'window_size' in manifest_data:
 60 |                 self.window_size = manifest_data['window_size']
 61 |             if 'mapq_threshold' in manifest_data:
 62 |                 self.mapq_threshold = manifest_data['mapq_threshold']
 63 |             if 'start_threshold' in manifest_data:
 64 |                 self.start_threshold = manifest_data['start_threshold']
 65 |             if 'gap_threshold' in manifest_data:
 66 |                 self.gap_threshold = manifest_data['gap_threshold']
 67 |             if 'mismatch_threshold' in manifest_data:
 68 |                 self.mismatch_threshold = manifest_data['mismatch_threshold']
 69 |             if 'read_threshold' in manifest_data:
 70 |                 self.read_threshold = manifest_data['read_threshold']
 71 |             if 'merged_analysis' in manifest_data:
 72 |                 self.merged_analysis = manifest_data['merged_analysis']
 73 |             if 'all_chromosomes' in manifest_data:
 74 |                 self.all_chromosomes = manifest_data['all_chromosomes']
 75 |             if 'variant_analysis' in manifest_data:
 76 |                 self.variant_analysis = manifest_data['variant_analysis']
 77 |             if 'genome' in manifest_data:
 78 |                 self.genome = manifest_data['genome']
 79 |                 if self.genome in ['hg38','hg19']:
 80 |                     self.refseq_names = p_dir+"/refseq_gene_name.py"
 81 |             # Allow the user to specify PAM seq. Yichao 4/29/2020
 82 |             if 'PAM' in manifest_data:
 83 |                 self.PAM = manifest_data['PAM']
 84 |             else:
 85 |                 self.PAM = "NGG"
 86 |             # Allow the user to specify Read Length. Yichao 4/29/2020
 87 |             if 'read_length' in manifest_data:
 88 |                 self.read_length = manifest_data['read_length']
 89 |             else:
 90 |                 self.read_length = 151
 91 |             # Allow the user to specify Read Count cutoff. Yichao 4/29/2020
 92 |             if 'read_count_cutoff' in manifest_data:
 93 |                 self.read_count_cutoff = manifest_data['read_count_cutoff']
 94 |             else:
 95 |                 self.read_count_cutoff = 6
 96 | 
 97 |             # Do not allow to run variant_analysis with merged_analysis
 98 |             if self.merged_analysis and self.variant_analysis:
 99 |                 logger.error('merged_analysis is not compatible with variant_analysis. Please remove one option.')
100 |                 sys.exit()
101 | 
102 |             if sample == 'all':
103 |                 self.samples = manifest_data['samples']
104 |             else:
105 |                 self.samples = {}
106 |                 self.samples[sample] = manifest_data['samples'][sample]
107 |             # Make folders for output
108 |             for folder in ['aligned', 'identified', 'fastq', 'visualization', 'variants']:
109 |                 output_folder = os.path.join(self.analysis_folder, folder)
110 |                 if not os.path.exists(output_folder):
111 |                     os.makedirs(output_folder)
112 | 
113 |         except Exception as e:
114 |             logger.error('Incorrect or malformed manifest file. Please ensure your manifest contains all required fields.')
115 |             sys.exit()
116 | 
117 |     def alignReads(self):
118 |         if self.merged_analysis:
119 |             logger.info('Merging reads...')
120 |             try:
121 |                 self.merged = {}
122 |                 for sample in self.samples:
123 |                     sample_merge_path = os.path.join(self.analysis_folder, 'fastq', sample + '_merged.fastq.gz')
124 |                     control_sample_merge_path = os.path.join(self.analysis_folder, 'fastq', 'control_' + sample + '_merged.fastq.gz')
125 |                     mergeReads(self.samples[sample]['read1'],
126 |                                self.samples[sample]['read2'],
127 |                                sample_merge_path)
128 |                     mergeReads(self.samples[sample]['controlread1'],
129 |                                self.samples[sample]['controlread2'],
130 |                                control_sample_merge_path)
131 | 
132 |                     sample_alignment_path = os.path.join(self.analysis_folder, 'aligned', sample + '.sam')
133 |                     control_sample_alignment_path = os.path.join(self.analysis_folder, 'aligned', 'control_' + sample + '.sam')
134 | 
135 |                     alignReads(self.BWA_path,
136 |                                self.reference_genome,
137 |                                sample_merge_path,
138 |                                '',
139 |                                sample_alignment_path)
140 | 
141 |                     alignReads(self.BWA_path,
142 |                                self.reference_genome,
143 |                                control_sample_merge_path,
144 |                                '',
145 |                                control_sample_alignment_path)
146 | 
147 |                     self.merged[sample] = sample_alignment_path
148 |                     logger.info('Finished merging and aligning reads.')
149 | 
150 |             except Exception as e:
151 |                 logger.error('Error aligning')
152 |                 logger.error(traceback.format_exc())
153 |                 quit()
154 |         else:
155 |             logger.info('Aligning reads...')
156 |             try:
157 |                 self.aligned = {}
158 |                 self.aligned_sorted = {}
159 |                 for sample in self.samples:
160 |                     sample_alignment_path = os.path.join(self.analysis_folder, 'aligned', sample + '.sam')
161 |                     control_sample_alignment_path = os.path.join(self.analysis_folder, 'aligned', 'control_' + sample + '.sam')
162 |                     alignReads(self.BWA_path,
163 |                                self.reference_genome,
164 |                                self.samples[sample]['read1'],
165 |                                self.samples[sample]['read2'],
166 |                                sample_alignment_path)
167 |                     alignReads(self.BWA_path,
168 |                                self.reference_genome,
169 |                                self.samples[sample]['controlread1'],
170 |                                self.samples[sample]['controlread2'],
171 |                                control_sample_alignment_path)
172 |                     self.aligned[sample] = sample_alignment_path
173 |                     self.aligned_sorted[sample] = os.path.join(self.analysis_folder, 'aligned', sample + '_sorted.bam')
174 |                     logger.info('Finished aligning reads to genome.')
175 | 
176 |             except Exception as e:
177 |                 logger.error('Error aligning')
178 |                 logger.error(traceback.format_exc())
179 |                 quit()
180 | 
181 |     def findCleavageSites(self):
182 |         logger.info('Identifying off-target cleavage sites.')
183 | 
184 |         try:
185 |             for sample in self.samples:
186 |                 if self.merged_analysis:
187 |                     sorted_bam_file = os.path.join(self.analysis_folder, 'aligned', sample + '.bam')
188 |                     control_sorted_bam_file = os.path.join(self.analysis_folder, 'aligned', 'control_' + sample + '.bam')
189 |                 else:
190 |                     sorted_bam_file = os.path.join(self.analysis_folder, 'aligned', sample + '_sorted.bam')
191 |                     control_sorted_bam_file = os.path.join(self.analysis_folder, 'aligned', 'control_' + sample + '_sorted.bam')
192 |                 identified_sites_file = os.path.join(self.analysis_folder, 'identified', sample)
193 |                 logger.info('Window: {0}, MAPQ: {1}, Gap: {2}, Start {3}, Mismatches {4}, Search_Radius {5}'.format(self.window_size, self.mapq_threshold, self.gap_threshold, self.start_threshold, self.mismatch_threshold, self.search_radius))
194 |                 findCleavageSites.compare(self.reference_genome, sorted_bam_file, control_sorted_bam_file, self.samples[sample]['target'],
195 |                                           self.search_radius, self.window_size, self.mapq_threshold, self.gap_threshold,
196 |                                           self.start_threshold, self.mismatch_threshold, sample, self.samples[sample]['description'],
197 |                                           identified_sites_file, self.all_chromosomes, merged=self.merged_analysis,read_count_cutoff=self.read_threshold,read_length=self.read_length)
198 |         except Exception as e:
199 |             logger.error('Error identifying off-target cleavage site.')
200 |             logger.error(traceback.format_exc())
201 |             quit()
202 | 
203 |     def visualize(self):
204 |         logger.info('Visualizing off-target sites')
205 | 
206 |         # try:
207 |             # for sample in self.samples:
208 |                 # if sample != 'control':
209 |                     # infile = os.path.join(self.analysis_folder, 'identified', sample + '_identified_matched.txt')
210 |                     # outfile = os.path.join(self.analysis_folder, 'visualization', sample + '_offtargets')
211 |                     # visualizeOfftargets(infile, outfile, title=sample)
212 | 
213 |             # logger.info('Finished visualizing off-target sites')
214 | 
215 |         # except Exception as e:
216 |             # logger.error('Error visualizing off-target sites.')
217 |             # logger.error(traceback.format_exc())
218 | 
219 |         for sample in self.samples: ## 4/29/2020 Yichao solved: visualization stopped when sample has no off-target
220 |             if sample != 'control':
221 |                 try:
222 |                     infile = os.path.join(self.analysis_folder, 'identified', sample + '_identified_matched.txt')
223 |                     outfile = os.path.join(self.analysis_folder, 'visualization', sample + '_offtargets')
224 |                     visualizeOfftargets(infile, outfile, title=sample,PAM=self.PAM,genome=self.genome,refseq_names=self.refseq_names)
225 |                 except Exception as e:
226 |                     logger.error('Error visualizing off-target sites: %s'%(sample))
227 |                     logger.error(traceback.format_exc())
228 |         logger.info('Finished visualizing off-target sites')
229 | 
230 | 
231 |     def callVariants(self):
232 | 
233 |         try:
234 |             if self.variant_analysis:
235 |                 logger.info('Identifying genomic variants')
236 | 
237 |                 for sample in self.samples:
238 |                     sorted_bam_file = os.path.join(self.analysis_folder, 'aligned', sample + '.bam')
239 |                     identified_sites_file = os.path.join(self.analysis_folder, 'identified', sample + '_identified_matched.txt')
240 |                     variants_basename = os.path.join(self.analysis_folder, 'variants', sample)
241 |                     logger.info('Mismatches {0}, Search_Radius {1}'.format(self.mismatch_threshold, self.search_radius))
242 |                     callVariants.getVariants(identified_sites_file, self.reference_genome, sorted_bam_file, variants_basename, self.search_radius, self.mismatch_threshold)
243 | 
244 |                 logger.info('Finished identifying genomic variants')
245 | 
246 |         except Exception as e:
247 |             logger.error('Error identifying genomic variants.')
248 |             logger.error(traceback.format_exc())
249 |             quit()
250 | 
251 |     def parallel(self, manifest_path, lsf, run='all'):
252 |         logger.info('Submitting parallel jobs')
253 |         current_script = __file__
254 | 
255 |         try:
256 |             for sample in self.samples:
257 |                 cmd = 'python {0} {1} --manifest {2} --sample {3}'.format(current_script, run, manifest_path, sample)
258 |                 logger.info(cmd)
259 |                 subprocess.call(lsf.split() + [cmd])
260 |             logger.info('Finished job submission')
261 | 
262 |         except Exception as e:
263 |             logger.error('Error submitting jobs.')
264 |             logger.error(traceback.format_exc())
265 | 
266 |     def referenceFree(self):
267 |         pass
268 | 
269 | def parse_args():
270 |     parser = argparse.ArgumentParser()
271 | 
272 |     subparsers = parser.add_subparsers(description='Individual Step Commands',
273 |                                        help='Use this to run individual steps of the pipeline',
274 |                                        dest='command')
275 | 
276 |     all_parser = subparsers.add_parser('all', help='Run all steps of the pipeline')
277 |     all_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
278 |     all_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
279 | 
280 |     parallel_parser = subparsers.add_parser('parallel', help='Run all steps of the pipeline in parallel')
281 |     parallel_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
282 |     parallel_parser.add_argument('--lsf', '-l', help='Specify LSF CMD', default='bsub -R rusage[mem=32000] -P Genomics -q standard')
283 |     parallel_parser.add_argument('--run', '-r', help='Specify which steps of pipepline to run (all, align, identify, visualize, variants)', default='all')
284 | 
285 |     align_parser = subparsers.add_parser('align', help='Run alignment only')
286 |     align_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
287 |     align_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
288 | 
289 |     merge_parser = subparsers.add_parser('merge', help='Merge paired end reads')
290 |     merge_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
291 |     merge_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
292 | 
293 |     identify_parser = subparsers.add_parser('identify', help='Run identification only')
294 |     identify_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
295 |     identify_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
296 | 
297 |     visualize_parser = subparsers.add_parser('visualize', help='Run visualization only')
298 |     visualize_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
299 |     visualize_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
300 | 
301 |     variants_parser = subparsers.add_parser('variants', help='Run variants analysis only')
302 |     variants_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
303 |     variants_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
304 | 
305 |     reference_free_parser = subparsers.add_parser('reference-free', help='Run reference-free discovery only')
306 |     reference_free_parser.add_argument('--manifest', '-m', help='Specify the manifest Path', required=True)
307 |     reference_free_parser.add_argument('--sample', '-s', help='Specify sample to process (default is all)', default='all')
308 | 
309 |     return parser.parse_args()
310 | 
311 | def main():
312 |     args = parse_args()
313 | 
314 |     if args.command == 'all':
315 |         c = CircleSeq()
316 |         c.parseManifest(args.manifest, args.sample)
317 |         c.alignReads()
318 |         c.findCleavageSites()
319 |         c.visualize()
320 |         c.callVariants()
321 |     elif args.command == 'parallel':
322 |         c = CircleSeq()
323 |         c.parseManifest(args.manifest)
324 |         c.parallel(args.manifest, args.lsf, args.run)
325 |     elif args.command == 'align':
326 |         c = CircleSeq()
327 |         c.parseManifest(args.manifest, args.sample)
328 |         c.alignReads()
329 |     elif args.command == 'identify':
330 |         c = CircleSeq()
331 |         c.parseManifest(args.manifest, args.sample)
332 |         c.findCleavageSites()
333 |     elif args.command == 'merge':
334 |         c = CircleSeq()
335 |         c.parseManifest(args.manifest, args.sample)
336 |         c.mergeAlignReads()
337 |     elif args.command == 'visualize':
338 |         c = CircleSeq()
339 |         c.parseManifest(args.manifest, args.sample)
340 |         c.visualize()
341 |     elif args.command == 'variants':
342 |         c = CircleSeq()
343 |         c.parseManifest(args.manifest, args.sample)
344 |         c.callVariants()
345 | 
346 | if __name__ == '__main__':
347 |     main()
348 | 


--------------------------------------------------------------------------------
/changeseq/log.py:
--------------------------------------------------------------------------------
 1 | """
 2 | log.py
 3 | =====
 4 | 
 5 | Setup logging utils for nested module logging
 6 | 
 7 | Adapted from the accepted answer here: http://stackoverflow.com/questions/7621897/python-logging-module-globally
 8 | """
 9 | 
10 | import logging
11 | 
12 | def createCustomLogger(name):
13 |     formatter = logging.Formatter(fmt='[%(asctime)s][%(levelname)s][%(module)s] %(message)s', datefmt='%m/%d %I:%M:%S%p')
14 | 
15 |     handler = logging.StreamHandler()
16 |     handler.setFormatter(formatter)
17 | 
18 |     logger = logging.getLogger(name)
19 |     logger.setLevel(logging.DEBUG)
20 |     logger.addHandler(handler)
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/changeseq/mergeReads.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import itertools
 4 | import gzip
 5 | from utility import reverseComplement, fq
 6 | 
 7 | def mergeReads(fastq1_filename, fastq2_filename, out):
 8 |     fastq1_file = fq(fastq1_filename)
 9 |     fastq2_file = fq(fastq2_filename)
10 | 
11 |     with gzip.open(out, 'wb') as o:
12 |         for r1, r2 in itertools.izip(fastq1_file, fastq2_file):
13 |             merged_sequence = reverseComplement(r1[1]) + r2[1]
14 |             merged_quality_scores = r1[3][::-1] + r2[3]
15 |             print(r1[0], file=o)
16 |             print(merged_sequence, file=o)
17 |             print(r1[2], file=o)
18 |             print(merged_quality_scores, file=o)
19 | 
20 | def main():
21 |     parser = argparse.ArgumentParser(description='Merge CIRCLE-seq reads for alignment.')
22 |     parser.add_argument('--read1', help='Read 1 filename', required=True)
23 |     parser.add_argument('--read2', help='Read 2 filename', required=True)
24 |     parser.add_argument('--out', help='Output filename', required=True)
25 | 
26 |     args = parser.parse_args()
27 | 
28 |     mergeReads(args.read1, args.read2, args.out)
29 | 
30 | if __name__ == "__main__":
31 |     main()


--------------------------------------------------------------------------------
/changeseq/referenceFree.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import itertools
  5 | import regex
  6 | import re
  7 | import gzip
  8 | import sys
  9 | import collections
 10 | from findCleavageSites import regexFromSequence, alignSequences, reverseComplement, extendedPattern, realignedSequences
 11 | 
 12 | """
 13 | FASTQ generator function from umi package
 14 | """
 15 | def fq(file):
 16 |     if re.search('.gz$', file):
 17 |         fastq = gzip.open(file, 'rb')
 18 |     else:
 19 |         fastq = open(file, 'r')
 20 |     with fastq as f:
 21 |         while True:
 22 |             l1 = f.readline()
 23 |             if not l1:
 24 |                 break
 25 |             l2 = f.readline()
 26 |             l3 = f.readline()
 27 |             l4 = f.readline()
 28 |             yield [l1, l2, l3, l4]
 29 | 
 30 | """
 31 | Main function to find off-target sites in reference-free fashion
 32 | """
 33 | def analyze(fastq1_filename, fastq2_filename, targetsite, out_base, name='', cells='', mismatch_threshold=7):
 34 | 
 35 |     read_count = 0
 36 |     c = collections.Counter()
 37 |     d = collections.defaultdict(list)
 38 | 
 39 |     fastq1_file = fq(fastq1_filename)
 40 |     fastq2_file = fq(fastq2_filename)
 41 |     for r1, r2 in itertools.izip(fastq1_file, fastq2_file):
 42 |         r1_sequence = r1[1].rstrip('\n')
 43 |         r2_sequence = r2[1].rstrip('\n')
 44 |         joined_seq = reverseComplement(r1_sequence) + r2_sequence
 45 |         truncated_joined_seq = joined_seq[130:170]
 46 | 
 47 |         sequence_data = alignSequences(targetsite, truncated_joined_seq, max_score=mismatch_threshold)
 48 |         offtarget, mismatch, length, strand, start, end, realigned_target = sequence_data[:7]
 49 | 
 50 |         if offtarget:
 51 |             c[offtarget] += 1
 52 |             d[offtarget].append(joined_seq)
 53 | 
 54 |         read_count += 1
 55 |         if not read_count % 100000:
 56 |             print(read_count/float(1000000), end=" ", file=sys.stderr)
 57 | 
 58 |     print('Finished tabulating reference-free discovery counts.', file=sys.stderr)
 59 |     out_filename = out_base + '.txt'
 60 | 
 61 |     with open(out_filename, 'w') as o:
 62 |         for target_sequence, target_count in c.most_common():
 63 |             print(target_sequence, target_count, file=o)
 64 |             off_target_fasta_filename = '{0}_{1:04d}_{2}.fasta'.format(out_base, target_count, target_sequence)
 65 |             with open(off_target_fasta_filename, 'w') as off_target_fasta_file:
 66 |                 j = 0
 67 |                 for sequence in d[target_sequence]:
 68 |                     j += 1
 69 |                     print('>{0:04d}_{1}_{2}'.format(target_count, target_sequence, j), file=off_target_fasta_file)
 70 |                     print(sequence, file=off_target_fasta_file)
 71 | 
 72 | def join_write_output(fastq1_filename, fastq2_filename, out):
 73 |     fastq1_file = fq(fastq1_filename)
 74 |     fastq2_file = fq(fastq2_filename)
 75 | 
 76 |     with open(out, 'w') as o:
 77 |         for r1, r2 in itertools.izip(fastq1_file, fastq2_file):
 78 |             header = '>{0}'.format(r1[0])
 79 |             r1_sequence = r1[1].rstrip('\n')
 80 |             r2_sequence = r2[1].rstrip('\n')
 81 |             joined_seq = reverseComplement(r1_sequence) + r2_sequence
 82 |             print(header, end='', file=o)
 83 |             print(joined_seq, file=o)
 84 | 
 85 | 
 86 | def main():
 87 |     parser = argparse.ArgumentParser(description='Identify off-target candidates from Illumina short read sequencing data.')
 88 |     parser.add_argument('--fq1', help='FASTQ Read 1', required=True)
 89 |     parser.add_argument('--fq2', help='FASTQ Read 2', required=True)
 90 |     parser.add_argument('--targetsite', help='Targetsite Sequence', required=True)
 91 |     parser.add_argument('--name', help='Targetsite Name', required=False)
 92 |     parser.add_argument('--cells', help='Cells', required=False)
 93 |     parser.add_argument('--mismatch_threshold', help='Maximum score threshold', default=7, type=int)
 94 |     parser.add_argument('--out', help='Output file base', required=True)
 95 |     args = parser.parse_args()
 96 | 
 97 |     analyze(args.fq1, args.fq2, args.targetsite, args.out, args.name, args.cells, args.mismatch_threshold)
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/changeseq/test.yaml:
--------------------------------------------------------------------------------
 1 | reference_genome: /Users/shengdar/genomes/Homo_sapiens_assembly19.fasta
 2 | analysis_folder: /Users/shengdar/Local/circleseq-test/merged
 3 | 
 4 | bwa: bwa
 5 | samtools: samtools
 6 | 
 7 | read_threshold: 4
 8 | window_size: 3
 9 | mapq_threshold: 50
10 | start_threshold: 1
11 | gap_threshold: 3
12 | mismatch_threshold: 6
13 | merged_analysis: True
14 | 
15 | samples:
16 |     U2OS_exp1_VEGFA_site_1:
17 |         target: GGGTGGGGGGAGTTTGCTCCNGG
18 |         read1: /Users/shengdar/Local/circleseq-test/1_S1_subset_100000_R1.fastq
19 |         read2: /Users/shengdar/Local/circleseq-test/1_S1_subset_100000_R2.fastq
20 |         controlread1: /Users/shengdar/Local/circleseq-test/4_S4_subset_R1.fastq
21 |         controlread2: /Users/shengdar/Local/circleseq-test/4_S4_subset_R2.fastq
22 |         description: U2OS_exp1_VEGFA_site_1


--------------------------------------------------------------------------------
/changeseq/utility.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import re
 3 | import gzip
 4 | """
 5 | FASTQ generator function from umi package
 6 | """
 7 | 
 8 | def fq(file):
 9 |     if re.search('.gz$', file):
10 |         fastq = gzip.open(file, 'rb')
11 |     else:
12 |         fastq = open(file, 'r')
13 |     with fastq as f:
14 |         while True:
15 |             l1 = f.readline().rstrip('\n')
16 |             if not l1:
17 |                 break
18 |             l2 = f.readline().rstrip('\n')
19 |             l3 = f.readline().rstrip('\n')
20 |             l4 = f.readline().rstrip('\n')
21 |             yield [l1, l2, l3, l4]
22 | 
23 | def reverseComplement(sequence):
24 |     transtab = string.maketrans("ACGT","TGCA")
25 |     return sequence.translate(transtab)[::-1]
26 | 


--------------------------------------------------------------------------------
/changeseq/validation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | validation.py
  3 | =============
  4 | 
  5 | Contains utils for validating the filetype and existence of manifest-defined files/folders
  6 | 
  7 | """
  8 | 
  9 | import logging
 10 | import os
 11 | import sys
 12 | from distutils.spawn import find_executable
 13 | 
 14 | logger = logging.getLogger('root')
 15 | 
 16 | 
 17 | def exists(filepath):
 18 |     if not os.path.isfile(filepath):
 19 |         logger.error('{0} does not exist'.format(filepath))
 20 |         sys.exit()
 21 | 
 22 | 
 23 | def checkIfBinary(filepath):
 24 |     executable = find_executable(filepath)
 25 | 
 26 |     if executable is None:
 27 |         logger.error('Executable binary not found at {0}'.format(filepath))
 28 |         sys.exit()
 29 | 
 30 |     # First check if file exists
 31 |     exists(executable)
 32 | 
 33 |     # Check if file is a valid binary
 34 |     # Adapted from http://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python
 35 |     textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
 36 |     is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))
 37 | 
 38 |     if not is_binary_string(open(executable, 'rb').read(1024)):
 39 |         logger.error('{0} is not a valid binary'.format(executable))
 40 |         sys.exit()
 41 | 
 42 | 
 43 | def checkIfFasta(filepath):
 44 |     # First check if file exists
 45 |     exists(os.path.abspath(filepath))
 46 | 
 47 | 
 48 | def checkIfFolder(folderpath):
 49 |     # Check if the folder exists
 50 |     if not os.path.isdir(os.path.abspath(folderpath)):
 51 |         logger.error('{0} is not a valid folder path'.format(folderpath))
 52 |         sys.exit()
 53 | 
 54 | 
 55 | def checkIfValidUndemultiplexed(undemultiplexed):
 56 |     # Check if read1, read2, index1, and index2 exist
 57 |     fields = ['forward', 'reverse', 'index1', 'index2']
 58 | 
 59 |     if set(fields) != set(undemultiplexed.keys()):
 60 |         logger.error('Undemultiplexed field must contain references to "forward", "reverse", "index1", "index2"')
 61 |         sys.exit()
 62 | 
 63 |     invalid_file = False
 64 |     for field in fields:
 65 |         if not os.path.isfile(undemultiplexed[field]):
 66 |             logger.error('"read1" undemultiplexed field does not reference a valid file')
 67 |             invalid_file = True
 68 | 
 69 |     if invalid_file:
 70 |         sys.exit()
 71 | 
 72 | 
 73 | def checkIfValidSamples(samples):
 74 |     # # Check if control is one of the samples
 75 |     # if 'control' not in samples:
 76 |     #     logger.error('A control sample must be specified')
 77 |     #     sys.exit()
 78 | 
 79 |     if len(samples.keys()) == 0:
 80 |         logger.error('No samples defined')
 81 |         sys.exit()
 82 | 
 83 |     for sample in samples:
 84 |         if 'read1' not in samples[sample] or 'read2' not in samples[sample]:
 85 |             logger.error('read1 and read2 must be specified for {0} sample'.format(sample))
 86 |             sys.exit()
 87 |         if 'controlread1' not in samples[sample] or 'controlread2' not in samples[sample]:
 88 |             logger.error('controlread1 and controlread2 must be specified for {0} sample'.format(sample))
 89 |             sys.exit()
 90 |         if 'target' not in samples[sample]:
 91 |             logger.error('target sequence must be specified for {0} sample'.format(sample))
 92 |             sys.exit()
 93 | 
 94 | def validateManifest(manifest_data):
 95 |     # Check if manifest contains the required fields
 96 |     fields = ['bwa', 'reference_genome', 'analysis_folder', 'samples']
 97 |     missing_fields = False
 98 | 
 99 |     for field in fields:
100 |         if field not in manifest_data.keys():
101 |             logger.error('"{0}" field must be specified in manifest'.format(field))
102 |             missing_fields = True
103 | 
104 |     if missing_fields:
105 |         sys.exit()
106 | 
107 |     # Now validate each field
108 |     checkIfBinary(manifest_data['bwa'])
109 |     checkIfBinary(manifest_data['samtools'])
110 |     checkIfFasta(manifest_data['reference_genome'])
111 |     checkIfValidSamples(manifest_data['samples'])


--------------------------------------------------------------------------------
/changeseq/visualization.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 |     
  3 | import svgwrite
  4 | import os
  5 | import logging
  6 | import argparse
  7 | import pandas as pd
  8 | 
  9 | ### 2017-October-11: Adapt plots to new output; inputs are managed using "argparse".
 10 | 
 11 | logger = logging.getLogger('root')
 12 | logger.propagate = False
 13 | 
 14 | boxWidth = 10
 15 | box_size = 15
 16 | v_spacing = 3
 17 | 
 18 | # colors = {'G': '#F5F500', 'A': '#FF5454', 'T': '#00D118', 'C': '#26A8FF', 'N': '#B3B3B3', '-': '#B3B3B3'}
 19 | colors = {'G': '#F5F500', 'A': '#FF5454', 'T': '#00D118', 'C': '#26A8FF', 'N': '#B3B3B3', 'R': '#B3B3B3', '-': '#B3B3B3'}
 20 | for c in ['Y','S','W','K','M','B','D','H','V','.']:
 21 |     colors[c] = "#B3B3B3"
 22 |     
 23 | def refseqID_to_HGNC_symbol(x,myDict):
 24 |     if "(" in x:
 25 |         ID = x.split()[1].split(",")[0].replace(")","").replace("(","")
 26 |         # print (ID)
 27 |         if ID in myDict:
 28 |             gene = myDict[ID]
 29 |             # print (ID,gene)
 30 |             return x.replace(ID,gene)
 31 |     return x
 32 | 
 33 | def reformat_homer_annotation(r):
 34 |     if r.Annotation =="Intergenic":
 35 |         return "%s (%s)"%(r.Annotation,r['Gene Name'])
 36 |     return r.Annotation
 37 | def parse_homer(identified,homer_output,genome,refseq_names=None):
 38 |     select_col="Annotation"
 39 |     command = "annotatePeaks.pl %s %s > %s"%(identified,genome,homer_output)
 40 |     os.system(command)
 41 |     df = pd.read_csv(identified,sep="\t")
 42 |     df.index = df['Genomic Coordinate'].to_list()
 43 |     df2 = pd.read_csv(homer_output,sep="\t",index_col=0)
 44 |     df2[select_col] = df2.apply(reformat_homer_annotation,axis=1)
 45 |     df['Annotation'] = df2[select_col]
 46 |     if refseq_names!=None:
 47 |         myDict = parse_HGNC(refseq_names)
 48 |         df['Annotation'] = [refseqID_to_HGNC_symbol(x,myDict) for x in df.Annotation]
 49 |     out = identified.replace(".txt",".annot.tsv")
 50 |     df.to_csv(out,sep="\t",index=False)
 51 |     return out
 52 | 
 53 | def get_int(x):
 54 | 	try:
 55 | 		x = float(x)
 56 | 	except:
 57 | 		return ""
 58 | 	return int(x)
 59 | 
 60 | def parse_HGNC(f):
 61 |     refseq = "#name"
 62 |     symbol = "name2"
 63 |     df = pd.read_csv(f,sep="\t")
 64 |     # print (df.head())
 65 |     df = df[[refseq,symbol]]
 66 |     df = df.dropna()
 67 |     df.index = df[refseq].to_list()
 68 |     # print (df.head())
 69 |     return df[symbol].to_dict()
 70 | def parseSitesFile(infile):
 71 |     offtargets = []
 72 |     total_seq = 0
 73 |     with open(infile, 'r') as f:
 74 |         f.readline()
 75 |         for line in f:
 76 |             line = line.rstrip('\n')
 77 |             line_items = line.split('\t')
 78 |             # offtarget_reads = line_items[4]
 79 |             # no_bulge_offtarget_sequence = line_items[10]
 80 |             # bulge_offtarget_sequence = line_items[15]
 81 |             # target_seq = line_items[28]
 82 |             # realigned_target_seq = line_items[29]
 83 |             offtarget_reads = line_items[4]
 84 |             no_bulge_offtarget_sequence = line_items[7]
 85 |             bulge_offtarget_sequence = line_items[9]
 86 |             target_seq = line_items[14]
 87 |             realigned_target_seq = line_items[15]
 88 |             coord = line_items[3]
 89 |             num_mismatch = get_int(line_items[8])
 90 |             try:
 91 |                 annot = line_items[16]
 92 |             except:
 93 |                 annot = ""
 94 | 
 95 |             if no_bulge_offtarget_sequence != '' or bulge_offtarget_sequence != '':
 96 |                 if no_bulge_offtarget_sequence:
 97 |                     total_seq += 1
 98 |                 if bulge_offtarget_sequence:
 99 |                     total_seq += 1
100 |                 offtargets.append({'seq': no_bulge_offtarget_sequence.strip(),
101 |                                    'bulged_seq': bulge_offtarget_sequence.strip(),
102 |                                    'reads': int(offtarget_reads.strip()),
103 |                                    'coord': str(coord),
104 |                                    'annot': str(annot),
105 |                                    'num_mismatch': str(num_mismatch),
106 |                                    'target_seq': target_seq.strip(),
107 |                                    'realigned_target_seq': realigned_target_seq.strip()
108 |                                    })
109 |     offtargets = sorted(offtargets, key=lambda x: x['reads'], reverse=True)
110 |     return offtargets, target_seq, total_seq
111 | 
112 | # 3/6/2020 Yichao
113 | def check_mismatch(a,b):
114 |     from Bio.Data import IUPACData
115 |     dna_dict = IUPACData.ambiguous_dna_values
116 |     set_a = dna_dict[a.upper()]
117 |     set_b = dna_dict[b.upper()]
118 |     overlap = list(set(list(set_a)).intersection(list(set_b)))
119 |     if len(overlap) == 0:
120 |         return True
121 |     else:
122 |         return False
123 | from Bio import SeqUtils
124 | def find_PAM(seq,PAM):
125 | 	try:
126 | 		PAM_index = seq.index(PAM)
127 | 	except:
128 | 		# PAM on the left
129 | 		left_search = SeqUtils.nt_search(seq[:len(PAM)], PAM)
130 | 		if len(left_search)>1:
131 | 			PAM_index = left_search[1]
132 | 		else:
133 | 			right_search = SeqUtils.nt_search(seq[-len(PAM):], PAM)
134 | 			if len(right_search)>1:
135 | 				PAM_index = len(seq)-len(PAM)
136 | 			else:
137 | 				print ("PAM: %s not found in %s. Set PAM index to 20"%(PAM,seq))
138 | 				PAM_index=20
139 | 	return PAM_index
140 | 
141 | def visualizeOfftargets(infile, outfile, title, PAM, genome=None,refseq_names=None):
142 | 
143 |     output_folder = os.path.dirname(outfile)
144 |     if not os.path.exists(output_folder):
145 |         os.makedirs(output_folder)
146 | 
147 | 
148 |     if genome!=None:
149 |         infile = parse_homer(infile,outfile+".raw.homer.tsv",genome,refseq_names=refseq_names)
150 |     # Get offtargets array from file
151 |     offtargets, target_seq, total_seq = parseSitesFile(infile)
152 | 
153 |     # Initiate canvas
154 |     dwg = svgwrite.Drawing(outfile + '.svg', profile='full', size=(u'100%', 100 + total_seq*(box_size + 1)))
155 | 
156 |     if title is not None:
157 |         # Define top and left margins
158 |         x_offset = 20
159 |         y_offset = 50
160 |         dwg.add(dwg.text(title, insert=(x_offset, 30), style="font-size:20px; font-family:Courier"))
161 |     else:
162 |         # Define top and left margins
163 |         x_offset = 20
164 |         y_offset = 20
165 | 
166 |     # Draw ticks
167 |     # if target_seq.find('N') >= 0:
168 |         # p = target_seq.index('N')
169 |         # if p > len(target_seq) / 2:  # PAM on the right end
170 |             # tick_locations = [1, len(target_seq)] + range(p, len(target_seq))  # limits and PAM
171 |             # tick_locations += [x + p - 20 + 1 for x in range(p)[::10][1:]]  # intermediate values
172 |             # tick_locations = list(set(tick_locations))
173 |             # tick_locations.sort()
174 |             # tick_legend = [p, 10, 1] + ['P', 'A', 'M']
175 |         # else:
176 |             # tick_locations = range(2, 6) + [14, len(target_seq)]  # complementing PAM and limits
177 |             # tick_legend = ['P', 'A', 'M', '1', '10'] + [str(len(target_seq) - 4)]
178 | 
179 |         # for x, y in zip(tick_locations, tick_legend):
180 |             # dwg.add(dwg.text(y, insert=(x_offset + (x - 1) * box_size + 2, y_offset - 2), style="font-size:10px; font-family:Courier"))
181 |     # else:
182 |         # tick_locations = [1, len(target_seq)]  # limits
183 |         # tick_locations += range(len(target_seq) + 1)[::10][1:]
184 |         # tick_locations.sort()
185 |         # for x in tick_locations:
186 |             # dwg.add(dwg.text(str(x), insert=(x_offset + (x - 1) * box_size + 2, y_offset - 2), style="font-size:10px; font-family:Courier"))
187 |     ## Assume PAM is on the right end Yichao rewrite visualization code, generic PAM
188 |     ## PAM can be on the left or right, Yichao 0713
189 |     tick_locations = []
190 |     tick_legend = []
191 |     # PAM_index = target_seq.index(PAM)
192 |     PAM_index = find_PAM(target_seq,PAM)
193 |     count = 0
194 |     for i in range(PAM_index,0,-1):
195 |         count = count+1
196 |         if count % 10 == 0:
197 |             tick_legend.append(count)
198 |             # print (count,i)
199 |             tick_locations.append(i)
200 |     if len(PAM)>=3:
201 |         tick_legend+=['P', 'A', 'M']+['-']*(len(PAM)-3)
202 |     else:
203 |         tick_legend+=["PAM"]+['-']*(len(PAM)-3)
204 |     tick_locations+=range(PAM_index+1,len(target_seq)+1)
205 |     if PAM_index == 0:
206 |         tick_legend = []
207 |         tick_locations = []
208 |         tick_legend+=['P', 'A', 'M']+['-']*(len(PAM)-3)
209 |         tick_locations+=range(1,len(PAM)+1)
210 |         count = 0
211 |         for i in range(len(PAM)+1,len(target_seq)+1):
212 |             count = count+1
213 |             if count % 10 == 0 or count == 1:
214 |                 tick_legend.append(count)
215 |                 # print (count,i)
216 |                 tick_locations.append(i)
217 |     # print (zip(tick_locations, tick_legend))
218 |     for x,y in zip(tick_locations, tick_legend):
219 |         dwg.add(dwg.text(y, insert=(x_offset + (x - 1) * box_size + 2, y_offset - 2), style="font-size:10px; font-family:Courier"))
220 | 
221 |     # Draw reference sequence row
222 |     for i, c in enumerate(target_seq):
223 |         y = y_offset
224 |         x = x_offset + i * box_size
225 |         dwg.add(dwg.rect((x, y), (box_size, box_size), fill=colors[c]))
226 |         dwg.add(dwg.text(c, insert=(x + 3, y + box_size - 3), fill='black', style="font-size:15px; font-family:Courier"))
227 |     dwg.add(dwg.text('Reads', insert=(x_offset + box_size * len(target_seq) + 16, y_offset + box_size - 3), style="font-size:15px; font-family:Courier"))
228 |     dwg.add(dwg.text('Mismatches', insert=(box_size * (len(target_seq) + 1) + 90, y_offset + box_size - 3), style="font-size:15px; font-family:Courier"))
229 |     dwg.add(dwg.text('Coordinates', insert=(box_size * (len(target_seq) + 1) + 200, y_offset + box_size - 3), style="font-size:15px; font-family:Courier"))
230 |     if genome!=None:
231 |         dwg.add(dwg.text('Annotation', insert=(box_size * (len(target_seq) + 1) + 450, y_offset + box_size - 3), style="font-size:15px; font-family:Courier"))
232 | 
233 |     # Draw aligned sequence rows
234 |     y_offset += 1  # leave some extra space after the reference row
235 |     line_number = 0  # keep track of plotted sequences
236 |     for j, seq in enumerate(offtargets):
237 |         realigned_target_seq = offtargets[j]['realigned_target_seq']
238 |         no_bulge_offtarget_sequence = offtargets[j]['seq']
239 |         bulge_offtarget_sequence = offtargets[j]['bulged_seq']
240 | 
241 |         if no_bulge_offtarget_sequence != '':
242 |             k = 0
243 |             line_number += 1
244 |             y = y_offset + line_number * box_size
245 |             for i, (c, r) in enumerate(zip(no_bulge_offtarget_sequence, target_seq)):
246 |                 x = x_offset + k * box_size
247 |                 if r == '-':
248 |                     if 0 < k < len(target_seq):
249 |                         x = x_offset + (k - 0.25) * box_size
250 |                         dwg.add(dwg.rect((x, box_size * 1.4 + y), (box_size*0.6, box_size*0.6), fill=colors[c]))
251 |                         dwg.add(dwg.text(c, insert=(x+1, 2 * box_size + y - 2), fill='black', style="font-size:10px; font-family:Courier"))
252 |                 elif c == r:
253 |                     dwg.add(dwg.text(u"\u2022", insert=(x + 4.5, 2 * box_size + y - 4), fill='black', style="font-size:10px; font-family:Courier"))
254 |                     k += 1
255 |                 elif r == 'N':
256 |                     dwg.add(dwg.text(c, insert=(x + 3, 2 * box_size + y - 3), fill='black', style="font-size:15px; font-family:Courier"))
257 |                     k += 1
258 |                 else:
259 |                     dwg.add(dwg.rect((x, box_size + y), (box_size, box_size), fill=colors[c]))
260 |                     dwg.add(dwg.text(c, insert=(x + 3, 2 * box_size + y - 3), fill='black', style="font-size:15px; font-family:Courier"))
261 |                     k += 1
262 |         if bulge_offtarget_sequence != '':
263 |             k = 0
264 |             line_number += 1
265 |             y = y_offset + line_number * box_size
266 |             for i, (c, r) in enumerate(zip(bulge_offtarget_sequence, realigned_target_seq)):
267 |                 x = x_offset + k * box_size
268 |                 if r == '-':
269 |                     if 0 < k < len(realigned_target_seq):
270 |                         x = x_offset + (k - 0.25) * box_size
271 |                         dwg.add(dwg.rect((x, box_size * 1.4 + y), (box_size*0.6, box_size*0.6), fill=colors[c]))
272 |                         dwg.add(dwg.text(c, insert=(x+1, 2 * box_size + y - 2), fill='black', style="font-size:10px; font-family:Courier"))
273 |                 elif c == r:
274 |                     dwg.add(dwg.text(u"\u2022", insert=(x + 4.5, 2 * box_size + y - 4), fill='black', style="font-size:10px; font-family:Courier"))
275 |                     k += 1
276 |                 elif r == 'N':
277 |                     dwg.add(dwg.text(c, insert=(x + 3, 2 * box_size + y - 3), fill='black', style="font-size:15px; font-family:Courier"))
278 |                     k += 1
279 |                 else:
280 |                     dwg.add(dwg.rect((x, box_size + y), (box_size, box_size), fill=colors[c]))
281 |                     dwg.add(dwg.text(c, insert=(x + 3, 2 * box_size + y - 3), fill='black', style="font-size:15px; font-family:Courier"))
282 |                     k += 1
283 | 
284 |         if no_bulge_offtarget_sequence == '' or bulge_offtarget_sequence == '':
285 |             reads_text = dwg.text(str(seq['reads']), insert=(box_size * (len(target_seq) + 1) + 20, y_offset + box_size * (line_number + 2) - 2),
286 |                                   fill='black', style="font-size:15px; font-family:Courier")
287 |             dwg.add(reads_text)
288 |             mismatch_text = dwg.text(seq['num_mismatch'], insert=(box_size * (len(target_seq) + 1) + 130, y_offset + box_size * (line_number + 2) - 2),
289 |                                   fill='black', style="font-size:15px; font-family:Courier")
290 |             dwg.add(mismatch_text)
291 |             mismatch_text = dwg.text(seq['coord'], insert=(box_size * (len(target_seq) + 1) + 200, y_offset + box_size * (line_number + 2) - 2),
292 |                                   fill='black', style="font-size:15px; font-family:Courier")
293 |             dwg.add(mismatch_text)
294 |             if genome!= None:
295 |                 annot_text = dwg.text(seq['annot'], insert=(box_size * (len(target_seq) + 1) + 450, y_offset + box_size * (line_number + 2) - 2),
296 |                                       fill='black', style="font-size:15px; font-family:Courier")
297 |                 dwg.add(annot_text)
298 |         else:
299 |             reads_text = dwg.text(str(seq['reads']), insert=(box_size * (len(target_seq) + 1) + 20, y_offset + box_size * (line_number + 1) + 5),
300 |                                   fill='black', style="font-size:15px; font-family:Courier")
301 |             dwg.add(reads_text)
302 |             mismatch_text = dwg.text(seq['num_mismatch'], insert=(box_size * (len(target_seq) + 1) + 130, y_offset + box_size * (line_number + 1) + 5),
303 |                                   fill='black', style="font-size:15px; font-family:Courier")
304 |             dwg.add(mismatch_text)
305 |             mismatch_text = dwg.text(seq['coord'], insert=(box_size * (len(target_seq) + 1) + 200, y_offset + box_size * (line_number + 1) + 5),
306 |                                   fill='black', style="font-size:15px; font-family:Courier")
307 |             dwg.add(mismatch_text)
308 |             if genome!= None:
309 |                 annot_text = dwg.text(seq['annot'], insert=(box_size * (len(target_seq) + 1) + 450, y_offset + box_size * (line_number + 1) + 5),
310 |                                       fill='black', style="font-size:15px; font-family:Courier")
311 |                 dwg.add(annot_text)
312 |             reads_text02 = dwg.text(u"\u007D", insert=(box_size * (len(target_seq) + 1) + 7, y_offset + box_size * (line_number + 1) + 5),
313 |                                   fill='black', style="font-size:23px; font-family:Courier")
314 |             dwg.add(reads_text02)
315 |     dwg.save()
316 | 
317 | def main():
318 |     parser = argparse.ArgumentParser(description='Plot visualization plots for re-aligned reads.')
319 |     parser.add_argument("-f","--identified_file", help="FullPath/output file from reAlignment_circleseq.py", required=True)
320 |     parser.add_argument("-o","--outfile", help="FullPath/VIZ", required=True)
321 |     parser.add_argument("-t","--title", help="Plot title", required=True)
322 |     parser.add_argument("-g","--genome", help="if specified, homer annotation will be performed", default=None)
323 |     parser.add_argument("-a","--annotation", help="refseqID to gene name mapping", default=None)
324 |     parser.add_argument("--PAM", help="PAM sequence", default="NGG")    
325 |     args = parser.parse_args()
326 | 
327 |     print(args)
328 | 
329 |     visualizeOfftargets(args.identified_file, args.outfile, args.title, args.PAM,args.genome,args.annotation)
330 | 
331 | if __name__ == "__main__":
332 | 
333 |     main()
334 | 


--------------------------------------------------------------------------------
/conda_build/conda_build_config.yaml:
--------------------------------------------------------------------------------
1 | python:
2 |   - 2.7
3 | 


--------------------------------------------------------------------------------
/conda_build/meta.yaml:
--------------------------------------------------------------------------------
  1 | {% set name = "changeseq" %}
  2 | {% set version = "1.2.8" %}
  3 | {% set file_ext = "tar.gz" %}
  4 | {% set hash_type = "sha256" %}
  5 | {% set hash_value = "42dde92e84e63369e4c0f2d6f1135952a6478644df9a6f303d3f93507e1f6573" %}
  6 | 
  7 | package:
  8 |   name: "{{ name|lower }}"
  9 |   version: "{{ version }}"
 10 | 
 11 | source:
 12 |   fn: '{{ name }}-{{ version }}.{{ file_ext }}'
 13 |   url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.{{ file_ext }}
 14 |   '{{ hash_type }}': '{{ hash_value }}'
 15 | 
 16 | build:
 17 |   number: 0
 18 |   script: python setup.py install --single-version-externally-managed --record=record.txt
 19 | 
 20 | requirements:
 21 |   host:
 22 |     - pip
 23 |     - python
 24 |   run:
 25 |     - python
 26 |     - bwa=0.7.17
 27 |     - htseq
 28 |     - matplotlib
 29 |     - numpy
 30 |     - pandas
 31 |     - pyfaidx
 32 |     - pygments
 33 |     - pysam
 34 |     - pyyaml
 35 |     - regex
 36 |     - scipy
 37 |     - setuptools
 38 |     - sqlite
 39 |     - statsmodels
 40 |     - svgwrite
 41 |     - yaml
 42 |     - zlib
 43 |     - htslib=1.9
 44 |     - samtools=1.9
 45 | 
 46 | test:
 47 |   imports:
 48 |     - changeseq
 49 | 
 50 | about:
 51 |   home: https://github.com/tsailabSJ/changeseq
 52 |   license: GNU General Public License v2 (GPLv2)
 53 |   license_family: GPL2
 54 |   license_file: ''
 55 |   summary: Bioinformatic pipeline for the CHANGE-seq assay.
 56 |   description: "[![Version][version-shield]][version-url]\n[![Python versions][python-shield]][python-url]\n[![Platforms][platform-shield]][python-url]\n\n\n# CHANGE-seq: Circularization for High-throughput\
 57 |     \ Analysis Nuclease Genome-wide Effects by Sequencing\n\nThis is a repository for CHANGE-seq analytical software, which takes sample-specific paired-end FASTQ files as input and produces a list of CHANGE-seq\
 58 |     \ detected off-target cleavage sites as output.\n\n# Summary\n\nThis package implements a pipeline that takes in reads from the CHANGE-seq assay and returns detected cleavage sites as output. The individual\
 59 |     \ pipeline steps are:\n\n1. **Merge**: Merge read1 an read2 for easier mapping to genome.\n2. **Read Alignment**: Merged paired end reads from the assay are aligned to the reference genome using the\
 60 |     \ BWA-MEM algorithm with default parameters (Li. H, 2009).\n3. **Cleavage Site Identification**: Mapped sites are analyzed to determine which represent high-quality cleavage sites.\n4. **Visualization\
 61 |     \ of Results**: Identified on-target and off-target cleavage sites are rendered as a color-coded alignment map for easy analysis of results.\n\n# Installation\n\nThe most easiest way to install change-seq\
 62 |     \ pipeline is via conda.\n\n```\n\nconda create -n changeseq -c conda-forge -c bioconda -c anaconda -c omnia -c tsailabSJ changeseq\n\nsource activate changeseq\n\nchangeseq.py -h\n\n## BWA 0.7.17 and\
 63 |     \ samtools 1.9 are automatically installed\n\n```\n\nAlternatively, you can git clone this repository and install\n\n```\n\ngit clone https://github.com/tsailabSJ/changeseq\n\ncd changeseq\n\npip install\
 64 |     \ -r requirements.txt\n\npython setup.py install\n\nchangeseq.py -h\n\n## Please install BWA and samtools if you choose this option\n\n```\n\n## Download Reference Genome\n\nThe CHANGEseq package requires\
 65 |     \ a reference genome for read mapping. You can use any genome of your choosing, but for all of our testing and original CHANGE-seq analyses we use hg19 ([download](http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta)).\
 66 |     \ Be sure to (g)unzip the FASTA file before use if it is compressed.\n\n# Usage\n\nThe change-seq pipeline requires a manifest yaml file specifying input files, output directory, and pipeline parameters.\
 67 |     \ Once the yaml file is created, users can simply run ``change_seq.py all --manifest /path/to/manifest.yaml``\n\n\nBelow is an example ``manifest.yaml`` file::\n\n    reference_genome: /data/joung/genomes/Homo_sapiens_assembly19.fasta\n\
 68 |     \    analysis_folder: /data/joung/CHANGE-Seq/test2\n\n    bwa: bwa\n    samtools: samtools\n\n    read_threshold: 4\n    window_size: 3\n    mapq_threshold: 50\n    start_threshold: 1\n    gap_threshold:\
 69 |     \ 3\n    mismatch_threshold: 6\n    search_radius: 30\n    merged_analysis: True\n\n    samples:\n        U2OS_exp1_VEGFA_site_1:\n            target: GGGTGGGGGGAGTTTGCTCCNGG\n            read1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/1_S1_L001_R1_001.fastq.gz\n\
 70 |     \            read2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/1_S1_L001_R2_001.fastq.gz\n            controlread1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R1_001.fastq.gz\n\
 71 |     \            controlread2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R2_001.fastq.gz\n            description: U2OS_exp1\n        U2OS_exp1_EMX1:\n            target:\
 72 |     \ GAGTCCGAGCAGAAGAAGAANGG\n            read1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/2_S2_L001_R1_001.fastq.gz\n            read2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/2_S2_L001_R2_001.fastq.gz\n\
 73 |     \            controlread1: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R1_001.fastq.gz\n            controlread2: /data/joung/sequencing_fastq/150902_M01326_0235_000000000-AHLT8/fastq/4_S4_L001_R2_001.fastq.gz\n\
 74 |     \            description: U2OS_exp1\n\n## Quickstart\n\n```\n\ngit clone https://github.com/tsailabSJ/changeseq\n\ncd changeseq/test\n\nchangeseq.py all --manifest CIRCLEseq_MergedTest.yaml\n\n```\n\
 75 |     \n# Writing A Manifest File\nWhen running the end-to-end analysis functionality of the CHANGEseq package a number of inputs are required. To simplify the formatting of these inputs and to encourage\
 76 |     \ reproducibility, these parameters are inputted into the pipeline via a manifest formatted as a YAML file. YAML files allow easy-to-read specification of key-value pairs. This allows us to easily specify\
 77 |     \ our parameters. The following fields are required in the manifest:\n\n- `reference_genome`: The absolute path to the reference genome FASTA file.\n- `output_folder`: The absolute path to the folder\
 78 |     \ in which all pipeline outputs will be saved.\n- `bwa`: The absolute path to the `bwa` executable\n- `samtools`: The absolute path to the `samtools` executable\n- `read_threshold`: The minimum number\
 79 |     \ of reads at a location for that location to be called as a site. We recommend leaving it to the default value of 4.\n- `window_size`: Size of the sliding window, we recommend leaving it to the default\
 80 |     \ value of 3.\n- `mapq_threshold`: Minimum read mapping quality score. We recommend leaving it to the default value of 50.\n- `start_threshold`: Tolerance for breakpoint location. We recommend leaving\
 81 |     \ it to the default value of 1.\n- `gap_threshold`: Distance between breakpoints. We recommend leaving it to the default value of 3 for Cas9.\n- `mismatch_threshold`: Number of tolerated gaps in the\
 82 |     \ fuzzy target search setp. We recommend leaving it to the default value of 6.\n- `read_length`: Fastq file read length, default is 151.\n- `PAM`: PAM sequence, default is NGG.\n- `merged_analysis`:\
 83 |     \ Whether or not the paired read merging step should takingTrue\n- `samples`: Lists the samples you wish to analyze and the details for each. Each sample name should be nested under the top level samples\
 84 |     \ key, and each sample detail should be nested under the sample name. See the sample manifest for an example.\n    - For each sample, you must provide the following parameters:\n        - `target`:\
 85 |     \ Target sequence for that sample. Accepts degenerate bases.\n        - `read1`: The absolute path to the .FASTQ(.gz) file containing the read1 reads.\n        - `read2`: The absolute path to the .FASTQ(.gz)\
 86 |     \ file containing the read2 reads.\n        - `controlread1`: The absolute path to the .FASTQ(.gz) file containing the control read1 reads.\n        - `controlread2`: The absolute path to the .FASTQ(.gz)\
 87 |     \ file containing the control read2 reads.\n        - `description`: A brief description of the sample\n\n\n# Pipeline Output\nWhen running the full pipeline, the results of each step are outputted\
 88 |     \ to the `output_folder` in a separate folder for each step. The output folders and their respective contents are as follows:\n\n- `output_folder/aligned`: Contains an alignment `.sam`, alignment `.bam`,\
 89 |     \ sorted `bam`, and `.bai` index file for each sample.\n- `output_folder/fastq`: Merged `.fastq.gz` files for each sample.\n- `output_folder/identified`: Contains tab-delimited `.txt` files for each\
 90 |     \ sample containing the identified DSBs, control DSBs, filtered DSBs, and read quantification.\n- `output_folder/visualization`: Contains a `.svg` vector image representing an alignment of all detected\
 91 |     \ off-targets to the targetsite for each sample.\n\n# FAQ\n\nNone yet, we will keep this updated as needed.\n\n[version-shield]: https://img.shields.io/conda/v/tsailabsj/changeseq.svg\n[version-url]:\
 92 |     \ https://anaconda.org/tsailabSJ/changeseq\n[python-shield]: https://img.shields.io/pypi/pyversions/changeseq.svg\n[python-url]: https://pypi.python.org/pypi/changeseq\n[platform-shield]: https://anaconda.org/tsailabsj/changeseq/badges/platforms.svg\n\
 93 |     \n\n"
 94 |   doc_url: ''
 95 |   dev_url: ''
 96 | 
 97 | extra:
 98 |   recipe-maintainers:
 99 |     - YichaoOU
100 | 


--------------------------------------------------------------------------------
/example_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/example_output.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | argparse>=1.4.0
 2 | PyYAML>=3.11
 3 | regex>=2018.01.10
 4 | HTSeq>=0.6.1p1
 5 | pyfaidx>=0.2.7
 6 | statsmodels>=0.6.1
 7 | pysam>=0.9.1.4
 8 | svgwrite>=1.1.6
 9 | numpy>=1.11.1
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/scripts/NUC_SIMPLE:
--------------------------------------------------------------------------------
 1 | #
 2 | # This matrix was created by Todd Lowe   12/10/92
 3 | #
 4 | # Uses ambiguous nucleotide codes, probabilities rounded to
 5 | #  nearest integer
 6 | #
 7 | # Lowest score = -4, Highest score = 5
 8 | #
 9 | # Modified by Shengdar Tsai 1/23/16
10 |     A   T   G   C   N
11 | A   10  -5  -5  -5   10
12 | T   -5  10  -5  -5   10
13 | G   -5  -5  10  -5   10
14 | C   -5  -5  -5  10   10
15 | N   10  10  10  10   10


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'shengdar'
2 | 


--------------------------------------------------------------------------------
/scripts/site_pvalue.R:
--------------------------------------------------------------------------------
 1 | #!/apps/lab/aryee/R/R-3.2.3/bin/Rscript --vanilla
 2 | 
 3 | # Usage example using test data from the circleseq repository:
 4 | #  ./site_pvalue.R ../test/U2OS_EMX1_counts.txt ../test/U2OS_EMX1_counts_pval.txt 
 5 | 
 6 | # Usage example using a larger test dataset on erisone:
 7 | #  ./site_pvalue.R /data/joung/CIRCLE-Seq/complete_analysis/160122_937aa31/output/U2OS_EMX1_counts.txt U2OS_EMX1_counts_pval.txt 
 8 | 
 9 | library("ggplot2")
10 | library("scales")
11 | 
12 | args <- commandArgs(TRUE)
13 | infile <- args[1]
14 | outfile <- args[2]
15 | #infile <- "../test/U2OS_EMX1_counts.txt"
16 | # Read in counts
17 | message("Reading ", infile)
18 | dat <- read.delim(infile, comment.char = "", header=TRUE)
19 | 
20 | # Condition on having observed at least one read
21 | bg <- dat$Control_Position_Reads
22 | bg <- bg[bg>0] 
23 | 
24 | # Model control distribution as exponential
25 | message("Calculating p-values")
26 | lambda <- mean(bg)
27 | pval <- 1 - pexp(dat$Nuclease_Position_Reads, rate=1/lambda)
28 | dat$pvalue <- pval
29 | 
30 | # Model control distribution empirically
31 | background_cdf <- ecdf(bg)
32 | pval_empirical <- 1 - background_cdf(dat$Nuclease_Position_Reads)
33 | 
34 | message("Saving diagnostic plots to pvalue_diagnostics.pdf")
35 | # Diagnostic plots
36 | pdf(file="pvalue_diagnostics.pdf", width=6, height=2.5)
37 | p <- ggplot(dat, aes(1+Control_Position_Reads)) + scale_x_continuous(limits=c(0,100)) + scale_y_log10(labels=comma) + geom_histogram(binwidth=2, na.rm=TRUE) + theme_bw() + ggtitle("Control_Position_Reads")
38 | suppressWarnings(print(p))
39 | p <- ggplot(dat, aes(1+Nuclease_Position_Reads)) + scale_x_continuous(limits=c(0,100)) + scale_y_log10(labels=comma) + geom_histogram(binwidth=2, na.rm=TRUE) + theme_bw() + ggtitle("Nuclease_Position_Reads")
40 | suppressWarnings(print(p))
41 | idx <- sample(length(pval), min(length(pval), 10000))
42 | plot(pval_empirical[idx], pval[idx], xlab="Empirical p-value", ylab="Exponential model p-value")
43 | abline(0,1)
44 | dev.off()
45 | 
46 | message("Writing output table ", outfile)
47 | write.table(dat, file=outfile, sep="\t", quote=FALSE, row.names=FALSE)
48 | 
49 | 


--------------------------------------------------------------------------------
/scripts/test.py:
--------------------------------------------------------------------------------
  1 | import regex
  2 | import nwalign as nw
  3 | import swalign
  4 | import string
  5 | 
  6 | def reverseComplement(sequence):
  7 |     transtab = string.maketrans("ACGT","TGCA")
  8 |     return sequence.translate(transtab)[::-1]
  9 | 
 10 | def regexFromSequence(seq, lookahead=True, indels=1, errors=7):
 11 |     """
 12 |     Given a sequence with ambiguous base characters, returns a regex that matches for
 13 |     the explicit (unambiguous) base characters
 14 |     """
 15 |     IUPAC_notation_regex = {'N': '[ATCGN]',
 16 |                             'Y': '[CTY]',
 17 |                             'R': '[AGR]',
 18 |                             'W': '[ATW]',
 19 |                             'S': '[CGS]',
 20 |                             'A': 'A',
 21 |                             'T': 'T',
 22 |                             'C': 'C',
 23 |                             'G': 'G'}
 24 | 
 25 |     pattern = ''
 26 | 
 27 |     for c in seq:
 28 |         pattern += IUPAC_notation_regex[c]
 29 | 
 30 |     if lookahead:
 31 |         pattern = '(?b:' + pattern + ')'
 32 | 
 33 |     pattern_standard = pattern + '{{s<={0}}}'.format(errors)
 34 |     pattern_gap = pattern + '{{i<={0},d<={0},s<={1},3i+3d+1s<={1}}}'.format(indels, errors)
 35 |     return pattern_standard, pattern_gap
 36 | 
 37 | """
 38 | Given a targetsite and window, use a fuzzy regex to align the targetsite to
 39 | the window. Returns the best match.
 40 | """
 41 | def alignSequences(targetsite_sequence, window_sequence, max_mismatches=7):
 42 |     # Try both strands
 43 |     query_regex_standard, query_regex_gap = regexFromSequence(targetsite_sequence, errors=max_mismatches)
 44 | 
 45 |     alignments = list()
 46 |     alignments.append(('+', 'standard', regex.search(query_regex_standard, window_sequence, regex.BESTMATCH)))
 47 |     alignments.append(('-', 'standard', regex.search(query_regex_standard, reverseComplement(window_sequence), regex.BESTMATCH)))
 48 |     alignments.append(('+', 'gapped', regex.search(query_regex_gap, window_sequence, regex.BESTMATCH)))
 49 |     alignments.append(('-', 'gapped', regex.search(query_regex_gap, reverseComplement(window_sequence), regex.BESTMATCH)))
 50 | 
 51 |     top_distance_score = 0
 52 |     chosen_alignment = None
 53 |     for i, aln in enumerate(alignments):
 54 |         strand, alignment_type, match = aln
 55 |         if match != None:
 56 |             substitutions, insertions, deletions = match.fuzzy_counts
 57 |             distance_score = substitutions + (insertions + deletions) * 3
 58 |             if distance_score > top_distance_score:
 59 |                 chosen_alignment = match
 60 |                 top_distance_score = distance_score
 61 |                 print(match, distance_score)
 62 | 
 63 |     if chosen_alignment:
 64 |         match_sequence = chosen_alignment.group()
 65 |         distance = sum(chosen_alignment.fuzzy_counts)
 66 |         length = len(match_sequence)
 67 |         start = chosen_alignment.start()
 68 |         end = chosen_alignment.end()
 69 |         return [match_sequence, distance, length, strand, start, end]
 70 |     else:
 71 |         return [''] * 6
 72 | 
 73 | 
 74 | 
 75 | 
 76 |     # if forward_alignment is None and reverse_alignment is None:
 77 |     #     return ['', '', '', '', '', '']
 78 |     # else:
 79 |     #     if forward_alignment is None and reverse_alignment is not None:
 80 |     #         strand = '-'
 81 |     #         alignment = reverse_alignment
 82 |     #     elif reverse_alignment is None and forward_alignment is not None:
 83 |     #         strand = '+'
 84 |     #         alignment = forward_alignment
 85 |     #     elif forward_alignment is not None and reverse_alignment is not None:
 86 |     #         forward_distance = sum(forward_alignment.fuzzy_counts)
 87 |     #         reverse_distance = sum(reverse_alignment.fuzzy_counts)
 88 |     #
 89 |     #         if forward_distance > reverse_distance:
 90 |     #             strand = '-'
 91 |     #             alignment = reverse_alignment
 92 |     #         else:
 93 |     #             strand = '+'
 94 |     #             alignment = forward_alignment
 95 |     #
 96 |     #     match_sequence = alignment.group()
 97 |     #     distance = sum(alignment.fuzzy_counts)
 98 |     #     length = len(match_sequence)
 99 |     #     start = alignment.start()
100 |     #     end = alignment.end()
101 |     #
102 |     #     return [match_sequence, distance, length, strand, start, end]
103 | 
104 | def alignSequences2(ref_seq, query_seq):
105 |     match = 2
106 |     mismatch = -1
107 |     ref_length = len(ref_seq)
108 |     matches_required = len(ref_seq) - 1 - 7  # allow up to 8 mismatches
109 |     scoring = swalign.NucleotideScoringMatrix(match, mismatch)
110 |     sw = swalign.LocalAlignment(scoring, gap_penalty=-3, gap_extension_penalty=-100, prefer_gap_runs=True)  # you can also choose gap penalties, etc...
111 |     # sw = swalign.LocalAlignment(scoring, gap_penalty=-10, gap_extension_penalty=-0.5, prefer_gap_runs=True)  # you can also choose gap penalties, etc...
112 |     forward_alignment = sw.align(ref_seq, query_seq)
113 |     reverse_alignment = sw.align(ref_seq, reverseComplement(query_seq))
114 |     if forward_alignment.matches >= matches_required and forward_alignment.matches > reverse_alignment.matches:
115 |         start_pad = forward_alignment.r_pos
116 |         start = forward_alignment.q_pos - start_pad
117 |         end_pad = ref_length - forward_alignment.r_end
118 |         end = forward_alignment.q_end + end_pad
119 |         strand = "+"
120 |         return [forward_alignment.query[start:end], ref_length - forward_alignment.matches - 1, end - start, strand, start, end]
121 |     elif reverse_alignment.matches >= matches_required and reverse_alignment.matches > forward_alignment.matches:
122 |         start_pad = reverse_alignment.r_pos
123 |         start = reverse_alignment.q_pos - start_pad
124 |         end_pad = ref_length - reverse_alignment.r_end
125 |         end = reverse_alignment.q_end + end_pad
126 |         strand = "-"
127 |         return [reverse_alignment.query[start:end], ref_length - reverse_alignment.matches - 1, end - start, strand, start, end]
128 |     else:
129 |         return ["", "", "", "", "", ""]
130 | 
131 | 
132 | def main():
133 |     # target = 'TTTNCTGATGGTCCATGTCTGTTACTC'
134 | 
135 | 
136 |     # windowsequence = 'AATGTGTGTCTGCTGGAAGCTCCTATTCTTCCGCCATTTTCCAGTCCTCCAGAAGTTTCCTGATGGTCCATGTCTGAATTAGACACCCCTCTTCTTTGTTCCAGTTGCACCTGTAATTCTTCAGCATAGTACTTCTTAAACTGTTTTTAA'
137 |     # windowsequence = 'GGCCTGAGTCCGAGCAGAAGCAAGAAGGGCTCCCATCACATCAAC'
138 | 
139 |     target = 'TTTNGGGACGGGGAGAAGGAAAAGAGG'
140 |     windowsequence = 'AATTTGGGGGGATTCATTACTCTATTTGGATTTGTTAGGGAGGAAGGCAGGTGGGATTTTTCTTCTCATTCTTATCTCTTTCCTTCTTCCCGTCCCAGAAAGAAACTAAGAATAATAACCAAATTATTAAAATGACTCACCGCCCTTCCA'
141 | 
142 |     print(alignSequences(target, windowsequence, max_mismatches=7))
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     main()


--------------------------------------------------------------------------------
/scripts/test_align.py:
--------------------------------------------------------------------------------
 1 | import nwalign as nw
 2 | import Levenshtein as l
 3 | import difflib
 4 | import os
 5 | 
 6 | def main():
 7 | 
 8 |     # a = 'GCCTGAGTCCGAGCAGAAGAAGAAGGGCTCCCATCACATCAAC'
 9 |     # b = 'GAGTCGAGCAGAAGAAGAANGG'
10 | 
11 |     a = 'AATGTGTGTCTGCTGGAAGCTCCTATTCTTCCGCCATTTTCCAGTCCTCCAGAAGTTTCCTGATGGTCCATGTCTGAATTAGACACCCCTCTTCTTTGTTCCAGTTGCACCTGTAATTCTTCAGCATAGTACTTCTTAAACTGTTTTTAA'
12 |     b= 'TTTNCTGATGGTCCATGTCTGTTACTC'
13 | 
14 |     print(l.distance(a, b))
15 |     print(l.editops(a, b))
16 |     print(l.matching_blocks(l.editops(a,b), a, b))
17 | 
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()


--------------------------------------------------------------------------------
/scripts/test_ga.py:
--------------------------------------------------------------------------------
 1 | import HTSeq
 2 | 
 3 | def main():
 4 |     ga = HTSeq.GenomicArray("auto", typecode='O', stranded=False)
 5 |     position = HTSeq.GenomicPosition('chr1', 123203, '.')
 6 | 
 7 |     ga[HTSeq.GenomicInterval( "chr1", 100000, 101000 , "." )] = [0.05, 0.002, 0.04, 0.005]
 8 | 
 9 |     iv = HTSeq.GenomicInterval( "chr1", 100000, 130000 , "." )
10 | 
11 |     for interval, value in ga[iv].steps():
12 |         print(interval, value)
13 | 
14 | if __name__ == "__main__":
15 |     main()


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [nosetests]
2 | verbosity=1
3 | detailed-errors=1
4 | exe=1
5 | where=test/


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # from distutils.core import setup
 5 | from setuptools import setup, find_packages
 6 | import changeseq
 7 | ## conda skeleton can't find readme
 8 | import os
 9 | if os.path.isfile("README.MD"):
10 | 	with open("README.MD", "r") as fh:
11 | 		long_description = fh.read()
12 | else:
13 | 	long_description="change-seq"
14 | 
15 | setup(
16 | 	name='changeseq',
17 | 	version=str(changeseq.__version__), # update visualization, run homer peak annotation if available
18 | 	description="Bioinformatic pipeline for the CHANGE-seq assay.",
19 | 	author="Shengdar Q Tsai, Martin Aryee, Ved V Topkar, Jose Malagon-Lopez",
20 | 	author_email='STSAI4@mgh.harvard.edu, Aryee.Martin@mgh.harvard.edu, vedtopkar@gmail.com, jose.lopez@mail.harvard.edu',
21 | 	url='https://github.com/tsailabSJ/changeseq',
22 | 	# packages=['changeseq','data'],
23 | 	packages=find_packages(),
24 | 	# package_dir={'changeseq':'changeseq'},
25 | 	license='LICENSE',
26 | 	scripts=['changeseq/changeseq.py','changeseq/alignReads.py','changeseq/visualization.py',
27 | 		'changeseq/callVariants.py','changeseq/findCleavageSites.py','changeseq/log.py',
28 | 		'changeseq/mergeReads.py','changeseq/referenceFree.py','changeseq/utility.py',
29 | 		'changeseq/validation.py','changeseq/refseq_gene_name.py'],
30 | 	package_data={'test': ["test/*"]},
31 | 	# package_data={'':["README.md","data/refseq_gene_name.py"]},
32 | 	include_package_data=True,
33 | 	long_description=long_description,
34 | 	long_description_content_type='text/markdown'	,
35 | 	keywords='changeseq',
36 | 	classifiers=[
37 | 		'Development Status :: 4 - Beta',
38 | 		'Intended Audience :: Science/Research',
39 | 		'Topic :: Scientific/Engineering :: Bio-Informatics',
40 | 		'Topic :: Scientific/Engineering :: Visualization',
41 | 		'Topic :: Scientific/Engineering :: Information Analysis',
42 | 		'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
43 | 		'Operating System :: Unix',
44 | 		'Natural Language :: English',
45 | 		"Programming Language :: Python :: 2",
46 | 		'Programming Language :: Python :: 2.6',
47 | 		'Programming Language :: Python :: 2.7'
48 | 	]
49 | )
50 | 


--------------------------------------------------------------------------------
/test/CIRCLEseq_MergedTest.yaml:
--------------------------------------------------------------------------------
 1 | reference_genome: data/input/CIRCLEseq_test_genome.fa
 2 | analysis_folder: data/MergedOutput
 3 | 
 4 | bwa: bwa
 5 | samtools: samtools
 6 | 
 7 | window_size: 3
 8 | mapq_threshold: 50
 9 | start_threshold: 1
10 | gap_threshold: 3
11 | mismatch_threshold: 6
12 | merged_analysis: True
13 | 
14 | samples:
15 |     TestSample:
16 |         target: GAGTCCGAGCAGAAGAAGAANGG
17 |         read1: data/input/TEST.r1.fastq.gz
18 |         read2: data/input/TEST.r2.fastq.gz
19 |         controlread1: data/input/TEST_control.r1.fastq.gz
20 |         controlread2: data/input/TEST_control.r2.fastq.gz
21 |         description: TestCell
22 | 


--------------------------------------------------------------------------------
/test/CIRCLEseq_StandardTest.yaml:
--------------------------------------------------------------------------------
 1 | reference_genome: data/input/CIRCLEseq_test_genome.fa
 2 | analysis_folder: data/StandardOutput
 3 | 
 4 | bwa: bwa
 5 | samtools: samtools
 6 | 
 7 | window_size: 3
 8 | mapq_threshold: 50
 9 | start_threshold: 1
10 | gap_threshold: 3
11 | mismatch_threshold: 6
12 | merged_analysis: False
13 | variant_analysis: True
14 | 
15 | samples:
16 |     TestSample:
17 |         target: GAGTCCGAGCAGAAGAAGAANGG
18 |         read1: data/input/TEST.r1.fastq.gz
19 |         read2: data/input/TEST.r2.fastq.gz
20 |         controlread1: data/input/TEST_control.r1.fastq.gz
21 |         controlread2: data/input/TEST_control.r2.fastq.gz
22 |         description: TestCell
23 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/__init__.py


--------------------------------------------------------------------------------
/test/data/MergedOutput/aligned/TestSample.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/aligned/TestSample.bam


--------------------------------------------------------------------------------
/test/data/MergedOutput/aligned/TestSample.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/aligned/TestSample.bam.bai


--------------------------------------------------------------------------------
/test/data/MergedOutput/aligned/TestSample_sorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/aligned/TestSample_sorted.bam


--------------------------------------------------------------------------------
/test/data/MergedOutput/aligned/control_TestSample.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/aligned/control_TestSample.bam


--------------------------------------------------------------------------------
/test/data/MergedOutput/aligned/control_TestSample.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/aligned/control_TestSample.bam.bai


--------------------------------------------------------------------------------
/test/data/MergedOutput/aligned/control_TestSample_sorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/aligned/control_TestSample_sorted.bam


--------------------------------------------------------------------------------
/test/data/MergedOutput/fastq/TestSample_merged.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/fastq/TestSample_merged.fastq.gz


--------------------------------------------------------------------------------
/test/data/MergedOutput/fastq/control_TestSample_merged.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/fastq/control_TestSample_merged.fastq.gz


--------------------------------------------------------------------------------
/test/data/MergedOutput/identified/TestSample_CONTROL_coordinates.txt:
--------------------------------------------------------------------------------
1 | #Name	Targetsite_Sequence	Cells	BAM	Read1_chr	Read1_start_position	Read1_strand	Read2_chr	Read2_start_position	Read2_strand
2 | 


--------------------------------------------------------------------------------
/test/data/MergedOutput/identified/TestSample_NUCLEASE_coordinates.txt:
--------------------------------------------------------------------------------
  1 | #Name	Targetsite_Sequence	Cells	BAM	Read1_chr	Read1_start_position	Read1_strand	Read2_chr	Read2_start_position	Read2_strand
  2 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  3 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  4 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  5 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  6 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  7 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  8 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
  9 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 10 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 11 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 12 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 13 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 14 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 15 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 16 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 17 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 18 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 19 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 20 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 21 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 22 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 23 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 24 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 25 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 26 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 27 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 28 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 29 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10018	+
 30 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 31 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 32 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 33 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10018	+
 34 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 35 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 36 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 37 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 38 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 39 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 40 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 41 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 42 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 43 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 44 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 45 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 46 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 47 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10018	+
 48 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 49 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 50 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 51 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 52 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 53 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 54 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 55 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 56 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 57 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 58 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 59 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 60 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 61 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 62 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 63 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 64 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10018	+
 65 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 66 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 67 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 68 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 69 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10018	+
 70 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 71 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 72 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 73 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 74 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 75 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 76 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 77 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 78 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 79 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10018	+
 80 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 81 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 82 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 83 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 84 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 85 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 86 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 87 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 88 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 89 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 90 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 91 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 92 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 93 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 94 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 95 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 96 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 97 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 98 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
 99 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
100 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
101 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
102 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
103 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
104 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	2	10016	-	2	10017	+
105 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10004	-	8	10006	+
106 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
107 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
108 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
109 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
110 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
111 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
112 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
113 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
114 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
115 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
116 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
117 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
118 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
119 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
120 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
121 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
122 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
123 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
124 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
125 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
126 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
127 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
128 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
129 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
130 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
131 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
132 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
133 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
134 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
135 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
136 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
137 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
138 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
139 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
140 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
141 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
142 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
143 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
144 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
145 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
146 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
147 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
148 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
149 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
150 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
151 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
152 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
153 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
154 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
155 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
156 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
157 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
158 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
159 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
160 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
161 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
162 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
163 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
164 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
165 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
166 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
167 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
168 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
169 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
170 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
171 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
172 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
173 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
174 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
175 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
176 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
177 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	8	10005	-	8	10006	+
178 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
179 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
180 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
181 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
182 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
183 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
184 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
185 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
186 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
187 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
188 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
189 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
190 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
191 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
192 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
193 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
194 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
195 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
196 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
197 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
198 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
199 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
200 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
201 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
202 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
203 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
204 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
205 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
206 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
207 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
208 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
209 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
210 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
211 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
212 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
213 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
214 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
215 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
216 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
217 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
218 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
219 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
220 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
221 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10005	-	1	10006	+
222 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
223 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
224 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
225 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
226 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
227 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
228 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
229 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
230 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
231 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
232 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
233 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
234 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
235 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
236 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10005	-	1	10006	+
237 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
238 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
239 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	1	10004	-	1	10005	+
240 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
241 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
242 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
243 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
244 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
245 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
246 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
247 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
248 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
249 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
250 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
251 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
252 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
253 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
254 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
255 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
256 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
257 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
258 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
259 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
260 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
261 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	12	10005	-	12	10006	+
262 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
263 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
264 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
265 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
266 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
267 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
268 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
269 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
270 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
271 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
272 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
273 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
274 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
275 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
276 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
277 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
278 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
279 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
280 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
281 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
282 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
283 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
284 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
285 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
286 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
287 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
288 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
289 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample.bam	4	10016	-	4	10017	+
290 | 


--------------------------------------------------------------------------------
/test/data/MergedOutput/identified/TestSample_count.txt:
--------------------------------------------------------------------------------
 1 | #Chromosome	zero_based_Position	Nuclease_Position_Reads	Control_Position_Reads	Nuclease_Window_Reads	Control_Window_Reads	p_Value	narrow_p_Value	control_p_Value	control_narrow_p_Value
 2 | 1	10004	60.0	0.0	124.0	0.0	0.0	0.0	0.0	0.0
 3 | 1	10005	62.0	0.0	124.0	0.0	0.0	0.0	0.0	0.0
 4 | 1	10006	2.0	0.0	124.0	0.0	0.0	0.0	0.0	0.0
 5 | 8	10004	1.0	0.0	146.0	0.0	0.0	0.0	0.0	0.0
 6 | 8	10005	72.0	0.0	146.0	0.0	0.0	0.0	0.0	0.0
 7 | 8	10006	73.0	0.0	146.0	0.0	0.0	0.0	0.0	0.0
 8 | 2	10016	103.0	0.0	206.0	0.0	0.0	0.0	0.0	0.0
 9 | 2	10017	97.0	0.0	206.0	0.0	0.0	0.0	0.0	0.0
10 | 2	10018	6.0	0.0	206.0	0.0	0.0	0.0	0.0	0.0
11 | 12	10005	22.0	0.0	44.0	0.0	0.0	0.0	0.0	0.0
12 | 12	10006	22.0	0.0	44.0	0.0	0.0	0.0	0.0	0.0
13 | 4	10016	28.0	0.0	56.0	0.0	0.0	0.0	0.0	0.0
14 | 4	10017	28.0	0.0	56.0	0.0	0.0	0.0	0.0	0.0
15 | 


--------------------------------------------------------------------------------
/test/data/MergedOutput/identified/TestSample_identified_matched.txt:
--------------------------------------------------------------------------------
1 | Chromosome	Start	End	Genomic Coordinate	Nuclease_Read_Count	Strand	Control_Read_Count	Site_Sequence	Site_Substitution_Number	Site_Sequence_Gaps_Allowed	File_Name	Cell	Target_site	Full_Name	Target_Sequence	Realigned_Target_Sequence
2 | 12	10000	10023	12:10000-10023	44	-	0.0	GAGTTAGAGCAGAAAAAAAATGG	4		TestSample.bam	TestCell	TestSample	TestSample_TestCell_12:10000-10023_44	GAGTCCGAGCAGAAGAAGAANGG	none
3 | 1	10000	10023	1:10000-10023	124	-	0.0	GAAGTAGAGCAGAAGAAGAAGCG	5	AAGT-AGAGCAGAAGAAGAAGCG	TestSample.bam	TestCell	TestSample	TestSample_TestCell_1:10000-10023_124	GAGTCCGAGCAGAAGAAGAANGG	GAGTCCGAGCAGAAGAAGAANGG
4 | 2	10000	10023	2:10000-10023	206	+	0.0	GAGTCCGAGCAGAAGAAGAAGGG	0		TestSample.bam	TestCell	TestSample	TestSample_TestCell_2:10000-10023_206	GAGTCCGAGCAGAAGAAGAANGG	none
5 | 4	10000	10023	4:10000-10023	56	+	0.0	CACTCCAAGTAGAAGAAGAAAAG	5		TestSample.bam	TestCell	TestSample	TestSample_TestCell_4:10000-10023_56	GAGTCCGAGCAGAAGAAGAANGG	none
6 | 8	10000	10023	8:10000-10023	146	-	0.0	AAGGCCAAGCAGAAGAGTAATGG	5		TestSample.bam	TestCell	TestSample	TestSample_TestCell_8:10000-10023_146	GAGTCCGAGCAGAAGAAGAANGG	none
7 | 


--------------------------------------------------------------------------------
/test/data/MergedOutput/identified/TestSample_identified_unmatched.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/MergedOutput/identified/TestSample_identified_unmatched.txt


--------------------------------------------------------------------------------
/test/data/MergedOutput/visualization/TestSample_offtargets.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8" ?>
2 | <svg baseProfile="full" height="196" version="1.1" width="100%" xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink"><defs /><text style="font-size:20px; font-family:Courier" x="20" y="30">TestSample</text><text style="font-size:10px; font-family:Courier" x="172" y="48">10</text><text style="font-size:10px; font-family:Courier" x="22" y="48">20</text><text style="font-size:10px; font-family:Courier" x="322" y="48">P</text><text style="font-size:10px; font-family:Courier" x="337" y="48">A</text><text style="font-size:10px; font-family:Courier" x="352" y="48">M</text><rect fill="#F5F500" height="15" width="15" x="20" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="35" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="38" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="50" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="53" y="62">G</text><rect fill="#00D118" height="15" width="15" x="65" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="68" y="62">T</text><rect fill="#26A8FF" height="15" width="15" x="80" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="62">C</text><rect fill="#26A8FF" height="15" width="15" x="95" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="62">C</text><rect fill="#F5F500" height="15" width="15" x="110" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="113" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="125" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="128" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="140" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="143" y="62">G</text><rect fill="#26A8FF" height="15" width="15" x="155" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="158" y="62">C</text><rect fill="#FF5454" height="15" width="15" x="170" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="173" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="185" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="188" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="200" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="203" y="62">A</text><rect fill="#FF5454" height="15" width="15" x="215" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="218" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="230" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="233" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="245" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="248" y="62">A</text><rect fill="#FF5454" height="15" width="15" x="260" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="263" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="275" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="278" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="290" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="293" y="62">A</text><rect fill="#FF5454" height="15" width="15" x="305" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="308" y="62">A</text><rect fill="#B3B3B3" height="15" width="15" x="320" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="62">N</text><rect fill="#F5F500" height="15" width="15" x="335" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="62">G</text><rect fill="#F5F500" height="15" width="15" x="350" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="353" y="62">G</text><text style="font-size:15px; font-family:Courier" x="381" y="62">Reads</text><text fill="black" style="font-size:10px; font-family:Courier" x="24.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="84.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="99.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="92">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="93">G</text><text fill="black" style="font-size:10px; font-family:Courier" x="339.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="92">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="94">206</text><rect fill="#FF5454" height="15" width="15" x="20" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="108">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="107">•</text><rect fill="#F5F500" height="15" width="15" x="65" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="68" y="108">G</text><text fill="black" style="font-size:10px; font-family:Courier" x="84.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="99.5" y="107">•</text><rect fill="#FF5454" height="15" width="15" x="110" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="113" y="108">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="107">•</text><rect fill="#F5F500" height="15" width="15" x="260" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="263" y="108">G</text><rect fill="#00D118" height="15" width="15" x="275" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="278" y="108">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="107">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="108">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="339.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="107">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="109">146</text><text fill="black" style="font-size:10px; font-family:Courier" x="24.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="122">•</text><rect fill="#FF5454" height="15" width="15" x="50" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="53" y="123">A</text><rect fill="#F5F500" height="15" width="15" x="65" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="68" y="123">G</text><rect fill="#00D118" height="15" width="15" x="80" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="123">T</text><rect fill="#FF5454" height="15" width="15" x="95" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="123">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="122">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="123">G</text><rect fill="#26A8FF" height="15" width="15" x="335" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="123">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="122">•</text><rect fill="#FF5454" height="15" width="15" x="20" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="138">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="137">•</text><rect fill="#B3B3B3" height="15" width="15" x="80" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="138">-</text><rect fill="#FF5454" height="15" width="15" x="95" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="138">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="137">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="138">G</text><rect fill="#26A8FF" height="15" width="15" x="335" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="138">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="137">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="131">124</text><text fill="black" style="font-size:23px; font-family:Courier" x="367" y="131">}</text><rect fill="#26A8FF" height="15" width="15" x="20" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="153">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="152">•</text><rect fill="#26A8FF" height="15" width="15" x="50" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="53" y="153">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="84.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="99.5" y="152">•</text><rect fill="#FF5454" height="15" width="15" x="110" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="113" y="153">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="152">•</text><rect fill="#00D118" height="15" width="15" x="155" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="158" y="153">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="152">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="153">A</text><rect fill="#FF5454" height="15" width="15" x="335" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="153">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="152">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="154">56</text><text fill="black" style="font-size:10px; font-family:Courier" x="24.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="167">•</text><rect fill="#00D118" height="15" width="15" x="80" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="168">T</text><rect fill="#FF5454" height="15" width="15" x="95" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="168">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="167">•</text><rect fill="#FF5454" height="15" width="15" x="230" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="233" y="168">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="167">•</text><rect fill="#FF5454" height="15" width="15" x="275" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="278" y="168">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="167">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="168">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="339.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="167">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="169">44</text></svg>


--------------------------------------------------------------------------------
/test/data/StandardOutput/aligned/TestSample.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/aligned/TestSample.bam


--------------------------------------------------------------------------------
/test/data/StandardOutput/aligned/TestSample.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/aligned/TestSample.bam.bai


--------------------------------------------------------------------------------
/test/data/StandardOutput/aligned/TestSample_sorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/aligned/TestSample_sorted.bam


--------------------------------------------------------------------------------
/test/data/StandardOutput/aligned/control_TestSample.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/aligned/control_TestSample.bam


--------------------------------------------------------------------------------
/test/data/StandardOutput/aligned/control_TestSample.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/aligned/control_TestSample.bam.bai


--------------------------------------------------------------------------------
/test/data/StandardOutput/aligned/control_TestSample_sorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/aligned/control_TestSample_sorted.bam


--------------------------------------------------------------------------------
/test/data/StandardOutput/identified/TestSample_CONTROL_coordinates.txt:
--------------------------------------------------------------------------------
1 | #Name	Targetsite_Sequence	Cells	BAM	Read1_chr	Read1_start_position	Read1_strand	Read2_chr	Read1_start_position	Read2_strand
2 | 


--------------------------------------------------------------------------------
/test/data/StandardOutput/identified/TestSample_NUCLEASE_coordinates.txt:
--------------------------------------------------------------------------------
  1 | #Name	Targetsite_Sequence	Cells	BAM	Read1_chr	Read1_start_position	Read1_strand	Read2_chr	Read1_start_position	Read2_strand
  2 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
  3 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
  4 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10017	+	4	10016	-
  5 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
  6 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
  7 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
  8 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
  9 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 10 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
 11 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 12 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 13 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 14 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 15 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 16 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 17 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 18 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
 19 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 20 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 21 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10017	+	4	10016	-
 22 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 23 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 24 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 25 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 26 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 27 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 28 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 29 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 30 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 31 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10018	+
 32 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 33 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10005	-
 34 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 35 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 36 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 37 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 38 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
 39 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 40 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 41 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10016	+
 42 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 43 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 44 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 45 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 46 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 47 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 48 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 49 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 50 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 51 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 52 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 53 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
 54 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 55 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 56 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 57 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 58 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 59 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10016	+
 60 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 61 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 62 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 63 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	-	8	10006	+
 64 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10016	+
 65 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
 66 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10005	-	12	10006	+
 67 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 68 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 69 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 70 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 71 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 72 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 73 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 74 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 75 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 76 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 77 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10017	+	4	10016	-
 78 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 79 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 80 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 81 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 82 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 83 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 84 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 85 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 86 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 87 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 88 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 89 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
 90 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
 91 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 92 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
 93 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 94 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 95 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 96 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
 97 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
 98 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
 99 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
100 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
101 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
102 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
103 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
104 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
105 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
106 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
107 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10016	+
108 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10004	-
109 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
110 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
111 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
112 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
113 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
114 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10018	+
115 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
116 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
117 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
118 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
119 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
120 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
121 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
122 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
123 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
124 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
125 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
126 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
127 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10005	-	12	10006	+
128 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
129 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
130 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
131 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
132 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
133 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10018	+
134 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
135 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
136 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10005	-
137 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
138 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10005	-	12	10006	+
139 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
140 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
141 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
142 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
143 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
144 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
145 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
146 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
147 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
148 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
149 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
150 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
151 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
152 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
153 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
154 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
155 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
156 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
157 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
158 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
159 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
160 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
161 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
162 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
163 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
164 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
165 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10016	+
166 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
167 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
168 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
169 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
170 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
171 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
172 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
173 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
174 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
175 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
176 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
177 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
178 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
179 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
180 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
181 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
182 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
183 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
184 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
185 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
186 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
187 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
188 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
189 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
190 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
191 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
192 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	-	1	10005	+
193 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
194 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
195 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
196 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
197 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10017	+	4	10016	-
198 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
199 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
200 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
201 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
202 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
203 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
204 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
205 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10004	-
206 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
207 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
208 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
209 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
210 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
211 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
212 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
213 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
214 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
215 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
216 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
217 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
218 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
219 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
220 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	4	10016	-	4	10017	+
221 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10006	+	1	10005	-
222 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
223 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
224 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
225 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
226 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
227 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
228 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
229 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
230 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
231 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	-	8	10006	+
232 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
233 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
234 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
235 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
236 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
237 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
238 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	-	1	10006	+
239 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10018	+
240 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
241 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
242 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
243 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
244 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
245 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
246 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
247 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
248 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	+	2	10016	-
249 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
250 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10005	-	8	10006	+
251 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10005	-
252 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10017	+	2	10016	-
253 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	+	1	10004	-
254 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	+	12	10005	-
255 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
256 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
257 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
258 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	-	8	10006	+
259 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	2	10016	-	2	10017	+
260 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10005	-	1	10005	+
261 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
262 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
263 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
264 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
265 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	1	10004	-	1	10005	+
266 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	12	10006	-	12	10006	+
267 | TestSample	GAGTCCGAGCAGAAGAAGAANGG	TestCell	TestSample_sorted.bam	8	10006	+	8	10005	-
268 | 


--------------------------------------------------------------------------------
/test/data/StandardOutput/identified/TestSample_count.txt:
--------------------------------------------------------------------------------
 1 | #Chromosome	zero_based_Position	Nuclease_Position_Reads	Control_Position_Reads	Nuclease_Window_Reads	Control_Window_Reads	p_Value	narrow_p_Value	control_p_Value	control_narrow_p_Value
 2 | 1	10004	47.0	0.0	108.0	0.0	0.0	0.0	0.0	0.0
 3 | 1	10005	59.0	0.0	108.0	0.0	0.0	0.0	0.0	0.0
 4 | 1	10006	2.0	0.0	108.0	0.0	0.0	0.0	0.0	0.0
 5 | 8	10004	1.0	0.0	126.0	0.0	0.0	0.0	0.0	0.0
 6 | 8	10005	59.0	0.0	126.0	0.0	0.0	0.0	0.0	0.0
 7 | 8	10006	66.0	0.0	126.0	0.0	0.0	0.0	0.0	0.0
 8 | 12	10004	1.0	0.0	70.0	0.0	0.0	0.0	0.0	0.0
 9 | 12	10005	21.0	0.0	70.0	0.0	0.0	0.0	0.0	0.0
10 | 12	10006	48.0	0.0	70.0	0.0	0.0	0.0	0.0	0.0
11 | 2	10016	107.0	0.0	184.0	0.0	0.0	0.0	0.0	0.0
12 | 2	10017	73.0	0.0	184.0	0.0	0.0	0.0	0.0	0.0
13 | 2	10018	4.0	0.0	184.0	0.0	0.0	0.0	0.0	0.0
14 | 4	10016	22.0	0.0	44.0	0.0	0.0	0.0	0.0	0.0
15 | 4	10017	22.0	0.0	44.0	0.0	0.0	0.0	0.0	0.0
16 | 


--------------------------------------------------------------------------------
/test/data/StandardOutput/identified/TestSample_identified_matched.txt:
--------------------------------------------------------------------------------
1 | Chromosome	Start	End	Name	ReadCount	Strand	MappingPositionStart	MappingPositionEnd	WindowName	WindowSequence	Site_SubstitutionsOnly.Sequence	Site_SubstitutionsOnly.NumSubstitutions	Site_SubstitutionsOnly.Strand	Site_SubstitutionsOnly.Start	Site_SubstitutionsOnly.End	Site_GapsAllowed.Sequence	Site_GapsAllowed.Length	Site_GapsAllowed.Score	Site_GapsAllowed.Substitutions	Site_GapsAllowed.Insertions	Site_GapsAllowed.Deletions	Site_GapsAllowed.Strand	Site_GapsAllowed.Start	Site_GapsAllowed.End	FileName	Cell	Targetsite	FullName	TargetSequence	RealignedTargetSequence	Position.Pvalue	Narrow.Pvalue	Position.Control.Pvalue	Narrow.Control.Pvalue
2 | 12	10000	10023	12:10000-10023	70	-	10004	10007	12:[10004,10007)/.	TTTTCACTTTCCTTTACCATTTTTTTTCTGCTCTAACTCTACC	GAGTTAGAGCAGAAAAAAAATGG	4	-	10000	10023										TestSample_sorted.bam	TestCell	TestSample	TestSample_TestCell_12:10000-10023_70	GAGTCCGAGCAGAAGAAGAANGG	none	0.0	0.0	0.0	0.0
3 | 1	10000	10023	1:10000-10023	108	-	10004	10007	1:[10004,10007)/.	GAACTTGCGGAAGGTCCGCTTCTTCTTCTGCTCTACTTCTGCC	GAAGTAGAGCAGAAGAAGAAGCG	5	-	10000	10023	AAGTA-GAGCAGAAGAAGAAGCG	22	6	3	0	1	-	10000	10022	TestSample_sorted.bam	TestCell	TestSample	TestSample_TestCell_1:10000-10023_108	GAGTCCGAGCAGAAGAAGAANGG	GAGTCCGAGCAGAAGAAGAANGG	0.0	0.0	0.0	0.0
4 | 2	10000	10023	2:10000-10023	184	+	10016	10019	2:[10016,10019)/.	GCCTGAGTCCGAGCAGAAGAAGAAGGGCTCCCATCACATCAAC	GAGTCCGAGCAGAAGAAGAAGGG	0	+	10000	10023										TestSample_sorted.bam	TestCell	TestSample	TestSample_TestCell_2:10000-10023_184	GAGTCCGAGCAGAAGAAGAANGG	none	0.0	0.0	0.0	0.0
5 | 4	10000	10023	4:10000-10023	44	+	10016	10018	4:[10016,10018)/.	TGATCACTCCAAGTAGAAGAAGAAAAGCTAGCTTCCATATAA	CACTCCAAGTAGAAGAAGAAAAG	5	+	10000	10023										TestSample_sorted.bam	TestCell	TestSample	TestSample_TestCell_4:10000-10023_44	GAGTCCGAGCAGAAGAAGAANGG	none	0.0	0.0	0.0	0.0
6 | 8	10000	10023	8:10000-10023	126	-	10004	10007	8:[10004,10007)/.	GCACTAGAATCCCAGGCCATTACTCTTCTGCTTGGCCTTTTGG	AAGGCCAAGCAGAAGAGTAATGG	5	-	10000	10023										TestSample_sorted.bam	TestCell	TestSample	TestSample_TestCell_8:10000-10023_126	GAGTCCGAGCAGAAGAAGAANGG	none	0.0	0.0	0.0	0.0
7 | 


--------------------------------------------------------------------------------
/test/data/StandardOutput/identified/TestSample_identified_unmatched.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/StandardOutput/identified/TestSample_identified_unmatched.txt


--------------------------------------------------------------------------------
/test/data/StandardOutput/variants/TestSample_Variants.txt:
--------------------------------------------------------------------------------
1 | Chromosome	Start	End	Name	ReadCount	Strand	Variant_WindowSequence	Variant_Site_SubstitutionsOnly.Sequence	Variant_Site_SubstitutionsOnly.NumSubstitutions	Variant_Site_SubstitutionsOnly.Strand	Variant_Site_GapsAllowed.Sequence	Variant_Site_GapsAllowed.Length	Variant_Site_GapsAllowed.Substitutions	Variant_Site_GapsAllowed.Insertions	Variant_Site_GapsAllowed.Deletions	Variant_Site_GapsAllowed.Strand	Cell	Targetsite	TargetSequence	Variant_RealignedTargetSequence	Reference	Variant	Genotype	Quality
2 | 4	10000	10023	4:10000-10023	44	+	TGATCACTCCAAGcAGAAGAAGAAAAGCTAGCTTCCATATAA	CACTCCAAGcAGAAGAAGAAAAG	4	+							TestCell	TestSample	GAGTCCGAGCAGAAGAAGAANGG	none	T	C	1|1	162.998
3 | 


--------------------------------------------------------------------------------
/test/data/StandardOutput/variants/TestSample_mpileupCall.txt:
--------------------------------------------------------------------------------
1 | targetsite	site_name	chromosome	one_based_position	reference	variant	quality	genotype	depth	PL
2 | TestSample	TestSample_12:10000-10023	12	10000	A	T	162.998	1|1	25	196_75_0
3 | TestSample	TestSample_12:10000-10023	12	10025	A	G	119.008	0|1	23	149_0_159
4 | TestSample	TestSample_4:10000-10023	4	10010	T	C	162.998	1|1	23	196_69_0
5 | 


--------------------------------------------------------------------------------
/test/data/StandardOutput/visualization/TestSample_offtargets.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8" ?>
2 | <svg baseProfile="full" height="196" version="1.1" width="100%" xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink"><defs /><text style="font-size:20px; font-family:Courier" x="20" y="30">TestSample</text><text style="font-size:10px; font-family:Courier" x="22" y="48">20</text><text style="font-size:10px; font-family:Courier" x="157" y="48">10</text><text style="font-size:10px; font-family:Courier" x="307" y="48">1</text><text style="font-size:10px; font-family:Courier" x="322" y="48">P</text><text style="font-size:10px; font-family:Courier" x="337" y="48">A</text><text style="font-size:10px; font-family:Courier" x="352" y="48">M</text><rect fill="#F5F500" height="15" width="15" x="20" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="35" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="38" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="50" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="53" y="62">G</text><rect fill="#00D118" height="15" width="15" x="65" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="68" y="62">T</text><rect fill="#26A8FF" height="15" width="15" x="80" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="62">C</text><rect fill="#26A8FF" height="15" width="15" x="95" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="62">C</text><rect fill="#F5F500" height="15" width="15" x="110" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="113" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="125" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="128" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="140" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="143" y="62">G</text><rect fill="#26A8FF" height="15" width="15" x="155" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="158" y="62">C</text><rect fill="#FF5454" height="15" width="15" x="170" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="173" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="185" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="188" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="200" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="203" y="62">A</text><rect fill="#FF5454" height="15" width="15" x="215" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="218" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="230" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="233" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="245" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="248" y="62">A</text><rect fill="#FF5454" height="15" width="15" x="260" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="263" y="62">A</text><rect fill="#F5F500" height="15" width="15" x="275" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="278" y="62">G</text><rect fill="#FF5454" height="15" width="15" x="290" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="293" y="62">A</text><rect fill="#FF5454" height="15" width="15" x="305" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="308" y="62">A</text><rect fill="#B3B3B3" height="15" width="15" x="320" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="62">N</text><rect fill="#F5F500" height="15" width="15" x="335" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="62">G</text><rect fill="#F5F500" height="15" width="15" x="350" y="50" /><text fill="black" style="font-size:15px; font-family:Courier" x="353" y="62">G</text><text style="font-size:15px; font-family:Courier" x="381" y="62">Reads</text><text fill="black" style="font-size:10px; font-family:Courier" x="24.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="84.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="99.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="92">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="93">G</text><text fill="black" style="font-size:10px; font-family:Courier" x="339.5" y="92">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="92">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="94">184</text><rect fill="#FF5454" height="15" width="15" x="20" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="108">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="107">•</text><rect fill="#F5F500" height="15" width="15" x="65" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="68" y="108">G</text><text fill="black" style="font-size:10px; font-family:Courier" x="84.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="99.5" y="107">•</text><rect fill="#FF5454" height="15" width="15" x="110" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="113" y="108">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="107">•</text><rect fill="#F5F500" height="15" width="15" x="260" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="263" y="108">G</text><rect fill="#00D118" height="15" width="15" x="275" y="96" /><text fill="black" style="font-size:15px; font-family:Courier" x="278" y="108">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="107">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="108">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="339.5" y="107">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="107">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="109">126</text><text fill="black" style="font-size:10px; font-family:Courier" x="24.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="122">•</text><rect fill="#FF5454" height="15" width="15" x="50" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="53" y="123">A</text><rect fill="#F5F500" height="15" width="15" x="65" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="68" y="123">G</text><rect fill="#00D118" height="15" width="15" x="80" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="123">T</text><rect fill="#FF5454" height="15" width="15" x="95" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="123">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="122">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="122">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="123">G</text><rect fill="#26A8FF" height="15" width="15" x="335" y="111" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="123">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="122">•</text><rect fill="#FF5454" height="15" width="15" x="20" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="138">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="137">•</text><rect fill="#FF5454" height="15" width="15" x="80" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="138">A</text><rect fill="#B3B3B3" height="15" width="15" x="95" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="138">-</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="137">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="137">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="138">G</text><rect fill="#26A8FF" height="15" width="15" x="335" y="126" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="138">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="137">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="131">108</text><text fill="black" style="font-size:23px; font-family:Courier" x="367" y="131">}</text><text fill="black" style="font-size:10px; font-family:Courier" x="24.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="54.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="152">•</text><rect fill="#00D118" height="15" width="15" x="80" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="83" y="153">T</text><rect fill="#FF5454" height="15" width="15" x="95" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="98" y="153">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="114.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="159.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="152">•</text><rect fill="#FF5454" height="15" width="15" x="230" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="233" y="153">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="152">•</text><rect fill="#FF5454" height="15" width="15" x="275" y="141" /><text fill="black" style="font-size:15px; font-family:Courier" x="278" y="153">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="152">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="153">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="339.5" y="152">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="152">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="154">70</text><rect fill="#26A8FF" height="15" width="15" x="20" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="23" y="168">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="39.5" y="167">•</text><rect fill="#26A8FF" height="15" width="15" x="50" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="53" y="168">C</text><text fill="black" style="font-size:10px; font-family:Courier" x="69.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="84.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="99.5" y="167">•</text><rect fill="#FF5454" height="15" width="15" x="110" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="113" y="168">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="129.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="144.5" y="167">•</text><rect fill="#00D118" height="15" width="15" x="155" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="158" y="168">T</text><text fill="black" style="font-size:10px; font-family:Courier" x="174.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="189.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="204.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="219.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="234.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="249.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="264.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="279.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="294.5" y="167">•</text><text fill="black" style="font-size:10px; font-family:Courier" x="309.5" y="167">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="323" y="168">A</text><rect fill="#FF5454" height="15" width="15" x="335" y="156" /><text fill="black" style="font-size:15px; font-family:Courier" x="338" y="168">A</text><text fill="black" style="font-size:10px; font-family:Courier" x="354.5" y="167">•</text><text fill="black" style="font-size:15px; font-family:Courier" x="380" y="169">44</text></svg>


--------------------------------------------------------------------------------
/test/data/input/CIRCLEseq_test_genome.fa.amb:
--------------------------------------------------------------------------------
1 | 100270 5 0
2 | 


--------------------------------------------------------------------------------
/test/data/input/CIRCLEseq_test_genome.fa.ann:
--------------------------------------------------------------------------------
 1 | 100270 5 11
 2 | 0 2 (null)
 3 | 0 20178 0
 4 | 0 8 (null)
 5 | 20178 20023 0
 6 | 0 1 (null)
 7 | 40201 20023 0
 8 | 0 12 (null)
 9 | 60224 20023 0
10 | 0 4 (null)
11 | 80247 20023 0
12 | 


--------------------------------------------------------------------------------
/test/data/input/CIRCLEseq_test_genome.fa.bwt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/CIRCLEseq_test_genome.fa.bwt


--------------------------------------------------------------------------------
/test/data/input/CIRCLEseq_test_genome.fa.fai:
--------------------------------------------------------------------------------
1 | 2	20178	3	20178	20179
2 | 8	20023	20185	20023	20024
3 | 1	20023	40212	20023	20024
4 | 12	20023	60240	20023	20024
5 | 4	20023	80267	20023	20024
6 | 


--------------------------------------------------------------------------------
/test/data/input/CIRCLEseq_test_genome.fa.pac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/CIRCLEseq_test_genome.fa.pac


--------------------------------------------------------------------------------
/test/data/input/CIRCLEseq_test_genome.fa.sa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/CIRCLEseq_test_genome.fa.sa


--------------------------------------------------------------------------------
/test/data/input/TEST.r1.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/TEST.r1.fastq.gz


--------------------------------------------------------------------------------
/test/data/input/TEST.r2.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/TEST.r2.fastq.gz


--------------------------------------------------------------------------------
/test/data/input/TEST_control.r1.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/TEST_control.r1.fastq.gz


--------------------------------------------------------------------------------
/test/data/input/TEST_control.r2.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsailabSJ/changeseq/73e1424c3713140e0d04ba227786a0144e43faaa/test/data/input/TEST_control.r2.fastq.gz


--------------------------------------------------------------------------------
/test/scripts/CIRCLEseq_prepare_test_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #####################################################################################
 3 | ### CIRCLEseq_prepare_test_data.sh: assemble the fastq files for the test
 4 | #####################################################################################
 5 | ### Regions
 6 | on_target="2:73160981-73161004"
 7 | off_target01="8:120587494-120587517"
 8 | off_target02="1:234492858-234492881"
 9 | off_target03="12:73504668-73504691"
10 | off_target04="4:48639390-48639413"
11 | hotspots="1:121485221-121485228"
12 | 
13 | ### Get the names of reads that overlap with the selected test regionsq
14 | samtools view sample.bam $on_target $off_target01 $off_target02 $off_target03 $off_target04 $hotspots | cut -f1 | sort | uniq > sample_read_names.txt
15 | samtools view control.bam $on_target $off_target01 $off_target02 $off_target03 $off_target04 $hotspots | cut -f1 | sort | uniq > control_read_names.txt
16 | cat sample_read_names.txt control_read_names.txt > read_names.txt
17 | 
18 | ### Subset FASTQs to extract _all_ read pairs where at least one of the reads falls in a specified test region
19 | zcat fastq/128_S3_L001_R1_001.fastq.gz | grep -F -A3 --no-group-separator -f read_names.txt | gzip -c > TEST.r1.fastq.gz
20 | zcat fastq/128_S3_L001_R2_001.fastq.gz | grep -F -A3 --no-group-separator -f read_names.txt | gzip -c > TEST.r2.fastq.gz
21 | zcat fastq/Negative_S1_L001_R1_001.fastq.gz | grep -F -A3 --no-group-separator -f read_names.txt | gzip -c > TEST_control.r1.fastq.gz
22 | zcat fastq/Negative_S1_L001_R2_001.fastq.gz | grep -F -A3 --no-group-separator -f read_names.txt | gzip -c > TEST_control.r2.fastq.gz
23 | 


--------------------------------------------------------------------------------
/test/scripts/CIRCLEseq_prepare_test_reference.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #####################################################################################
 3 | ### CIRCLEseq_prepare_test_genome.sh: assemble reference test
 4 | #####################################################################################
 5 | ### Get chromosomes
 6 | wget ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.chromosome.1.fa.gz
 7 | wget ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.chromosome.2.fa.gz
 8 | wget ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.chromosome.4.fa.gz
 9 | wget ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.chromosome.8.fa.gz
10 | wget ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.chromosome.12.fa.gz
11 | 
12 | ### Asemble reference
13 | cat *.fa.gz > Homo_sapiens.GRCh37.subset.fa.gz
14 | gunzip Homo_sapiens.GRCh37.subset.fa.gz
15 | samtools faidx Homo_sapiens.GRCh37.subset.fa
16 | 
17 | ### Pad test regions with 10kb on either side
18 | bedtools slop -i CIRCLEseq_test.bed -g Homo_sapiens.GRCh37.subset.fa.fai -b 10000 > CIRCLEseq_test_padded.bed
19 | 
20 | ### Extract sequences from reference file for each paded interval
21 | bedtools getfasta -fi Homo_sapiens.GRCh37.subset.fa -bed CIRCLEseq_test_padded.bed -fo CIRCLEseq_test_genome.fa -name
22 | 


--------------------------------------------------------------------------------
/test/scripts/CIRCLEseq_test_bed.R:
--------------------------------------------------------------------------------
 1 | ######################################################################################################quote
 2 | ### test_regions_BED.R: make bed file with regions including 
 3 | ###                     on-target site, 2 off-target sites without variants,
 4 | ###                     2 off-target sites with variants, and 1 region without off-targets.
 5 | ######################################################################################################
 6 | bed = data.frame(chr=c('2', '8', '1', '12', '4'), start=c(73160981, 120587494, 234492858, 73504668, 48639390), end=c(73161159, 120587517, 234492881, 73504691, 48639413), name=c('2', '8', '1', '12', '4'))
 7 | 
 8 | write.table(bed, 'CIRCLEseq_test.bed', quote=FALSE, row.names=FALSE, col.names=FALSE, sep='\t')
 9 | 
10 | ### Information about the sites
11 | on_target="2:73160981-73161004"
12 | off_target01="8:120587494-120587517"
13 | off_target02="1:234492858-234492881"
14 | off_target_with_variantWindowOnly="12:73504668-73504691"
15 | off_target_with_variants="4:48639390-48639413"
16 | hotspots="2:73161104-73161159"
17 | ######################################################################################################
18 | ######################################################################################################
19 | 


--------------------------------------------------------------------------------
/test/scripts/Test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python ../../circleseq/circleseq.py all --manifest ../CIRCLEseq_MergedTest.yaml
3 | 
4 | python ../../circleseq/circleseq.py all --manifest ../CIRCLEseq_StandardTest.yaml
5 | 


--------------------------------------------------------------------------------
/test/test_circleseq_merged.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | test_circleseq_merged
 6 | ----------------------------------
 7 | 
 8 | Tests for `circleseq` module.
 9 | """
10 | 
11 | import yaml
12 | import unittest
13 | import os
14 | import shutil
15 | import utils
16 | from circleseq import circleseq
17 | 
18 | TEST_OUTPUT_PATH = 'tmp'
19 | 
20 | TEST_MANIFEST_PATH = os.path.join('CIRCLEseq_MergedTest.yaml')
21 | 
22 | CORRECT_ALIGNED_OUTPUT = 'data/MergedOutput/aligned'
23 | CORRECT_IDENTIFIED_OUTPUT = 'data/MergedOutput/identified'
24 | CORRECT_MERGED_OUTPUT = 'data/MergedOutput/merged'
25 | CORRECT_VISUALIZATION_OUTPUT = 'data/MergedOutput/visualization'
26 | 
27 | CORRECT_ALL_OUTPUT = 'data/MergedOutput'
28 | 
29 | class FullPipelineTestCase(unittest.TestCase):
30 | 
31 |     def setUp(self):
32 |         pass
33 | 
34 |     def testFullPipeline(self):
35 |         c = circleseq.CircleSeq()
36 |         c.parseManifest(TEST_MANIFEST_PATH)
37 | 
38 |         # Align and test the alignment output
39 |         c.alignReads()
40 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, "aligned"), CORRECT_ALIGNED_OUTPUT))
41 | 
42 |         # Find cleavage sites
43 |         c.findCleavageSites()
44 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, 'identified'), CORRECT_IDENTIFIED_OUTPUT))
45 | 
46 |         # Visualize filtered sites
47 |         c.visualize()
48 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, 'visualization'), CORRECT_VISUALIZATION_OUTPUT))
49 | 
50 | 
51 |     def tearDown(self):
52 |         pass
53 | 
54 | if __name__ == '__main__':
55 |     unittest.main()


--------------------------------------------------------------------------------
/test/test_circleseq_std.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | test_circleseq_std
 6 | ----------------------------------
 7 | 
 8 | Tests for `circleseq` module.
 9 | """
10 | 
11 | import yaml
12 | import unittest
13 | import os
14 | import shutil
15 | import utils
16 | from circleseq import circleseq
17 | 
18 | TEST_OUTPUT_PATH = 'tmp'
19 | 
20 | TEST_MANIFEST_PATH = os.path.join('CIRCLEseq_StandardTest.yaml')
21 | 
22 | CORRECT_ALIGNED_OUTPUT = 'data/StandardOutput/aligned'
23 | CORRECT_IDENTIFIED_OUTPUT = 'data/StandardOutput/identified'
24 | CORRECT_VARIANTS_OUTPUT = 'data/StandardOutput/variants'
25 | CORRECT_VISUALIZATION_OUTPUT = 'data/StandardOutput/visualization'
26 | 
27 | CORRECT_ALL_OUTPUT = 'data'
28 | 
29 | class FullPipelineTestCase(unittest.TestCase):
30 | 
31 |     def setUp(self):
32 |         pass
33 | 
34 |     def testFullPipeline(self):
35 |         c = circleseq.CircleSeq()
36 |         c.parseManifest(TEST_MANIFEST_PATH)
37 |         
38 |         # Align and test the alignment output
39 |         c.alignReads()
40 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, "aligned"), CORRECT_ALIGNED_OUTPUT))
41 | 
42 |         # Find cleavage sites
43 |         c.findCleavageSites()
44 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, 'identified'), CORRECT_IDENTIFIED_OUTPUT))
45 | 
46 |         # Visualize filtered sites
47 |         c.visualize()
48 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, 'visualization'), CORRECT_VISUALIZATION_OUTPUT))
49 | 
50 |         # Look for genomic variants
51 |         c.callVariants()
52 |         self.assertTrue(utils.checkFolderEquality(os.path.join(c.analysis_folder, 'variants'), CORRECT_VARIANTS_OUTPUT))
53 | 
54 | 
55 |     def tearDown(self):
56 |         pass
57 | 
58 | if __name__ == '__main__':
59 |     unittest.main()


--------------------------------------------------------------------------------
/test/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import os
 4 | import sys
 5 | import inspect
 6 | import filecmp
 7 | from itertools import islice
 8 | 
 9 | def checkFolderEquality(folder1, folder2):
10 |     """
11 |     Given two folders, check if there are the same number of files,
12 |     that the names of files are the same, and that the files with the same
13 |     names are the same.
14 |     """
15 | 
16 |     folder1_files = [x for x in os.listdir(folder1) if not x.startswith('.')]
17 |     folder2_files = [x for x in os.listdir(folder2) if not x.startswith('.')]
18 | 
19 |     if set(folder1_files) != set(folder2_files):
20 |         print('Folders do not have the same filenames.')
21 |         return False
22 | 
23 |     for f in folder1_files:
24 |         file1 = os.path.join(folder1, f)
25 |         file2 = os.path.join(folder2, f)
26 | 
27 |         if f.split('.')[-1] == 'sam':
28 |             with open(file1, 'r') as a, open(file2, 'r') as b:
29 |                 for line1, line2 in zip(a,b):
30 |                     if line1.startswith('@'):
31 |                         continue
32 |                     elif line1 != line2:
33 |                         return False
34 |         else:
35 |             if not filecmp.cmp(file1, file2):
36 |                 print('{0} does not match between folders.'.format(f))
37 |                 return False
38 | 
39 |     return True
40 | 
41 | 
42 | def head(filepath, n=10):
43 |     with open(filepath) as f:
44 |         for line in islice(f, n):
45 |             print(line)
46 | 


--------------------------------------------------------------------------------