├── HowToCompile.txt ├── README.md ├── groundTruth ├── Readme.md └── generateTruthPipeline.py ├── requirements.txt ├── src ├── .ipynb_checkpoints │ └── Parameter Tuning-checkpoint.ipynb ├── Makefile ├── build │ └── Makefile ├── exception │ ├── .DS_Store │ ├── FileDoesNotExistException.cpp │ ├── FileDoesNotExistException.h │ ├── InvalidInputException.cpp │ ├── InvalidInputException.h │ ├── InvalidOperationException.cpp │ ├── InvalidOperationException.h │ ├── InvalidOrderOfOperationsException.cpp │ ├── InvalidOrderOfOperationsException.h │ ├── InvalidScoreException.cpp │ ├── InvalidScoreException.h │ ├── InvalidStateException.cpp │ └── InvalidStateException.h ├── nonltr │ ├── .DS_Store │ ├── .joseph_client.cpp.swp │ ├── ChromDetector.cpp │ ├── ChromDetector.h │ ├── ChromDetectorMaxima.cpp │ ├── ChromDetectorMaxima.h │ ├── ChromListMaker.cpp │ ├── ChromListMaker.h │ ├── Chromosome.cpp │ ├── Chromosome.h │ ├── ChromosomeOneDigit.cpp │ ├── ChromosomeOneDigit.h │ ├── ChromosomeRandom.cpp │ ├── ChromosomeRandom.h │ ├── DetectorMaxima.cpp │ ├── DetectorMaxima.h │ ├── EnrichmentMarkovView.cpp │ ├── EnrichmentMarkovView.h │ ├── HMM.cpp │ ├── HMM.h │ ├── IChromosome.h │ ├── ITableView.h │ ├── KmerHashTable.cpp │ ├── KmerHashTable.h │ ├── LocationList.cpp │ ├── LocationList.h │ ├── LocationListCollection.cpp │ ├── LocationListCollection.h │ ├── Scanner.cpp │ ├── Scanner.cpp~ │ ├── Scanner.h │ ├── Scanner.h~ │ ├── Scorer.cpp │ ├── Scorer.h │ ├── TableBuilder.cpp │ ├── TableBuilder.h │ ├── Trainer.cpp │ ├── Trainer.h │ └── joseph_client.cpp ├── test │ ├── AlignUtility.cpp │ └── TestTr.cpp ├── tr │ ├── .DS_Store │ ├── BackwardTr.cpp │ ├── BackwardTr.h │ ├── Candidate.cpp │ ├── Candidate.h │ ├── DetectorTr.cpp │ ├── DetectorTr.h │ ├── FilterTr.cpp │ ├── FilterTr.h │ ├── ForwardTr.cpp │ ├── ForwardTr.h │ ├── ITrVisitor.h │ ├── LtrTe.cpp │ ├── LtrTe.h │ ├── MatchTr.cpp │ ├── MatchTr.h │ ├── PairContainer.cpp │ ├── PairContainer.h │ ├── ScorerTr.cpp │ ├── ScorerTr.h │ ├── Tr.cpp │ ├── Tr.h │ ├── TrCollector.cpp │ ├── TrCollector.h │ ├── TrCsVisitor.cpp │ ├── TrCsVisitor.h │ ├── TrKVisitor.cpp │ ├── TrKVisitor.h │ ├── TrPptVisitor.cpp │ ├── TrPptVisitor.h │ ├── TrSineVisitor.cpp │ └── TrSineVisitor.h └── utility │ ├── .DS_Store │ ├── AlignVisitor.h │ ├── EmptyLocation.cpp │ ├── EmptyLocation.h │ ├── EmptyTSD.cpp │ ├── EmptyTSD.h │ ├── EmptyTail.cpp │ ├── EmptyTail.h │ ├── GlobAlign.cpp │ ├── GlobAlign.h │ ├── GlobAlignE.cpp │ ├── GlobAlignE.h │ ├── ILocation.h │ ├── ITSD.h │ ├── ITail.cpp │ ├── ITail.h │ ├── LCS.cpp │ ├── LCS.h │ ├── LCSLen.cpp │ ├── LCSLen.h │ ├── LCSubStr.cpp │ ├── LCSubStr.h │ ├── LocAlign.cpp │ ├── LocAlign.h │ ├── LocAlignE.cpp │ ├── LocAlignE.h │ ├── LocAlignUtility.cpp │ ├── Location.cpp │ ├── Location.h │ ├── NW.cpp │ ├── NW.h │ ├── TSD.cpp │ ├── TSD.h │ ├── Tail.cpp │ ├── Tail.h │ ├── TailFinder.cpp │ ├── TailFinder.h │ ├── Util.cpp │ └── Util.h └── visualize.py /HowToCompile.txt: -------------------------------------------------------------------------------- 1 | Compiling the source code 2 | 3 | Requirement: GNU gcc8.2 or higher. Please change the name (CXX) of the compiler 4 | in the Makefile. 5 | 6 | If on MacOS, please uncomment the compiler flags (CXXFLAGS) in the Makefile. (Marked #MacOS). 7 | 8 | On MacOS you may need to run: 9 | > brew install libomp 10 | 11 | To locate the source directory: 12 | > cd src 13 | 14 | The following command makes the required directories: 15 | > make bin 16 | 17 | The following command makes the binary that is located under the ``bin'' directory: 18 | > make tr -j 19 | 20 | To find the binary: 21 | > cd ../bin 22 | 23 | To find help information: 24 | > LtrDetector -help 25 | 26 | Example run with the defaults: 27 | > LtrDetector -chromDir ~/Data/zmays/Fa -destDir ~/Data/zmays/detector 28 | 29 | Example run on 4 cores 30 | > LtrDetector -chromDir ~/Data/thaliana/Fa -destDir ~/Data/thaliana/detector -nThreads 4 31 | 32 | 33 | To run visualization tool: 34 | Step 1: Run LtrDetector with these three flags -rawScores -cleanedScores -bedFormat 35 | > LtrDetector -chromDir ~/Data/thaliana/Fa -destDir ~/Data/thaliana/detector -rawScores -cleanedScores -bedFormat 36 | 37 | Step 2: To run the visualization tool on chr1, pass LTR-RTs found in chr1 (.bed), output directory where the graphs will be stored, and the scores file (raw or cleaned). 38 | > python visualize.py ~/Data/thaliana/detector/chr1Detector.bed ~/Data/thaliana/detector/test ~/Data/thaliana/detector/chr1RawScores.csv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LtrDetector 2 | 3 | https://www.biorxiv.org/content/biorxiv/early/2018/10/22/448969.full.pdf 4 | 5 | 6 | 7 | ## Compiling source code 8 | 9 | You must have the GNU compiler. Follow the steps to obtain a compatible version. 10 | 11 | ### MacOS 12 | Use HomeBrew: 13 | ``` brew install gcc``` 14 | 15 | Run this command to see list of gcc files installed by HomeBrew: 16 | ```brew list gcc ``` 17 | 18 | Example: 19 | joseph:LtrDetector jdv$ brew list gcc 20 | 21 | /usr/local/Cellar/gcc/8.2.0/bin/c++-8 22 | 23 | /usr/local/Cellar/gcc/8.2.0/bin/cpp-8 24 | 25 | /usr/local/Cellar/gcc/8.2.0/bin/g++-8 26 | 27 | On Makefile, change the CXX value to g++-Version as indicated by the listing. In the example above, 28 | CXX = g++-8 29 | 30 | ### Linux (Ubuntu) 31 | The GNU compiler comes standard. If you do not have it, run: 32 | 33 | ``` sudo apt-get install gcc ``` 34 | 35 | ## Compile 36 | 37 | To locate the source directory: 38 | ``` cd src ``` 39 | 40 | The following command makes the required directories: 41 | ``` make bin``` 42 | 43 | The following command makes the binary that is located under the ``bin`` directory: 44 | ``` make tr -j``` 45 | 46 | To find the binary: 47 | ```cd ../bin``` 48 | 49 | To find help information: 50 | ``` LtrDetector -help ``` 51 | 52 | 53 | ### Example run with the defaults: 54 | 55 | ```LtrDetector -chromDir ~/Data/zmays/Fa -destDir ~/Data/zmays/detector``` 56 | 57 | ### Example run on 4 cores: 58 | 59 | ```LtrDetector -chromDir ~/Data/thaliana/Fa -destDir ~/Data/thaliana/detector -nThreads 4``` 60 | 61 | ### Output Explanation 62 | 63 | LtrDetector output is formatted as a BED file with 18 columns, which correspond to: 64 | 65 | 1. Sequence identifier. 66 | 2. Retrotransposon start coordinate within sequence. 67 | 3. Retrotransposon end coordinate within sequence. 68 | 4. Left LTR start coordinate. 69 | 5. Left LTR end coordinate. 70 | 6. Right LTR start coordinate. 71 | 7. Right LTR end coordinate. 72 | 8. % Identity between left and right LTRs (0-100). 73 | 9. Left Target Site Duplication start coordinate. 74 | 10. Left Target Site Duplication end coordinate. 75 | 11. Right Target Site Duplication start coordinate. 76 | 12. Right Target Site Duplication end coordinate. 77 | 13. Polypurine Tract start coordinate. 78 | 14. Polupurine Tract end coordinate. 79 | 15. Strand on chromosome (+ or -). 80 | 16. Percentage of purines in Polypurine Tract (0-100). 81 | 17. TG motif start coordinate. 82 | 18. CA motif end coordinate. 83 | 84 | Dashes on any column pertaining to the Target Site Duplication or Polypurine Tract indicate that no such element was found. 85 | 86 | This output format provides up to three alternatives for the boundary of the element. Columns 2 and 3 are set to be immediately inside of the TSDs if they exist, 87 | otherwise they are the boundaries denoted by the alignment adjustment step. Columns 4 and 7 report the alignment 88 | adjusted boundaries directly. Columns 17 and 18 report the boundaries based on the TG..CA box, if one exists. 89 | 90 | 91 | ### To run visualization tool: 92 | 93 | Step 1: Run LtrDetector with these three flags -rawScores -cleanedScores -bedFormat 94 | 95 | ```LtrDetector -chromDir ~/Data/thaliana/Fa -destDir ~/Data/thaliana/detector -rawScores -cleanedScores -bedFormat``` 96 | 97 | Step 2: Create a virtual environment using venv module of Python3 98 | ``` python3 -m venv myEnvironment``` 99 | myEnvironment is the name of a folder that will house your virtual environment. You can name it whatever you want. 100 | 101 | Step 3: Activate virtual environment 102 | ``` source myEnvironment/bin/activate ``` 103 | You must now run the following commands from the same terminal. If you want to use multiple terminals, the virtual environment must be activated in all of them. 104 | 105 | Step 4: Install python dependencies using provided requirements.txt file. 106 | ``` pip install -r requirements.txt ``` 107 | 108 | Step 5: To run the visualization tool on chr1, pass LTR-RTs found in chr1 (.bed), output directory where the graphs will be stored, and the scores file (raw or cleaned). 109 | 110 | ```python visualize.py ~/Data/thaliana/detector/chr1Detector.bed ~/Data/thaliana/detector/test ~/Data/thaliana/detector/chr1RawScores.csv``` 111 | 112 | | -arg | Description | Default | 113 | | ---------------- | ----------- | ------- | 114 | | -chromDir | Directory containing files to be scanned. Files must have .fa extension. | required | 115 | | -destDir | Output directory where the results are stored. | required | 116 | ( IMPORTANT: Files under the output directory are deleted at the start of the program.) 117 | | -minLen | Minimum length of complete LTR-RTs. Constrains scoring system and filters. | 400 | 118 | | -maxLen | Maximum length of complete LTR-RTs. Constrains scoring system and filters. | 22000 | 119 | | -minLenLTR | Minimum length of LTR direct repeat. Constrains filters. | 100 | 120 | | -maxLenLTR | Maximum length of LTR direct repeat. Constrains filters. | 6000 | 121 | ( Note run time is highly dependent on this parameter, as it provides an upper bound for alignment length in the boundary adjustment step.) 122 | | -id | Minimum identity [0-100] between 5' and 3' LTRs. | 85 | 123 | | -k | Length of k-mers to adjust scoring system. Tradeoff between noise and resistance to mutation. | 13 | 124 | | -plateauSeed | Minimum length of plateaus to be initially considered 'Keep' in merging step. | 10 | 125 | | -nThreads | Number of cores to be used. | 1 | 126 | | -gapTol | Number of base pairs that two plateaus can differ by in height/distance. Affects both plateau merging and pairing steps. | 200 | 127 | |-seqLevel| Forces parallel execution on sequences within multi-FASTA file. Loads all sequences into memory|disabled| 128 | | -rawScores | prints the raw scores to a file called xxxxRawScores.txt under the output directory. | disabled | 129 | | -cleanedScores | prints the scores after merging to a file called xxxxCleanedScores.txt under the output directory. | disabled | 130 | | -nested | searches for nested elements. Results are stored in seperate files (marked as xxxxNestedDetector.bed) under the output directory | disabled | 131 | | -bedFormat | prints BED format without additional annotations (PPT and TSD). | disabled | 132 | | -help | prints this help message. | disabled | 133 | 134 | 135 | ### License 136 | 137 | Academic/non-profit use: The software is provided as-is under the GNU GPLv3. 138 | 139 | Any restrictions to use for-profit: License needed. 140 | -------------------------------------------------------------------------------- /groundTruth/Readme.md: -------------------------------------------------------------------------------- 1 | ### Build Ground Truth 2 | 3 | 1. Establish a parent directory Ex: zmays 4 | 5 | 2. Place all FASTA files (extension .fa) into /Fa Ex: zmays/Fa 6 | 7 | 3. Place all RepeatMasker output files into /Out Ex: zmays/Out 8 | All RepeatMasker files must start with the name of the chromosome (can exclude .fa) Ex: chromosome1.fa -> chromosome1.out or chrX.fa -> chrX.fa.out 9 | 10 | 4. Obtain the Repbase download of all LTRS for the given genome in FASTA format. Ex: Zea_mays_LTR_Retrotransposon.txt 11 | 12 | 5. ```python generateTruthPipeline.py ``` 13 | Ex: ```python generateTruthPipeline.py zmays Zea_mays_LTR_Retrotransposon.txt``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.0.3 2 | numpy==1.16.2 3 | pandas==0.24.2 4 | -------------------------------------------------------------------------------- /src/exception/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioinformaticsToolsmith/LtrDetector/c22633037e5c6740098227df7a1b2ead1ef21ceb/src/exception/.DS_Store -------------------------------------------------------------------------------- /src/exception/FileDoesNotExistException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FileDoesNotExistException.cpp 3 | * 4 | * Created on: Apr 30, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "FileDoesNotExistException.h" 9 | 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace exception{ 16 | 17 | FileDoesNotExistException::FileDoesNotExistException(string massage) { 18 | cerr << "File Does Not Exist Exception" << endl; 19 | cerr << massage << endl; 20 | } 21 | 22 | FileDoesNotExistException::~FileDoesNotExistException() { 23 | // TODO Auto-generated destructor stub 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/exception/FileDoesNotExistException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * FileDoesNotExistException.h 3 | * 4 | * Created on: Apr 30, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef FILEDOESNOTEXISTEXCEPTION_H_ 9 | #define FILEDOESNOTEXISTEXCEPTION_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace exception { 16 | class FileDoesNotExistException { 17 | public: 18 | FileDoesNotExistException(string); 19 | ~FileDoesNotExistException(); 20 | }; 21 | } 22 | 23 | #endif /* FILEDOESNOTEXISTEXCEPTION_H_ */ 24 | -------------------------------------------------------------------------------- /src/exception/InvalidInputException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidInputException.cpp 3 | * 4 | * Created on: May 1, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "InvalidInputException.h" 9 | 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | namespace exception{ 15 | 16 | InvalidInputException::InvalidInputException(string msg) { 17 | cerr << "Invalid Input Exception" << endl; 18 | cerr << msg << endl; 19 | } 20 | 21 | InvalidInputException::~InvalidInputException() { 22 | // TODO Auto-generated destructor stub 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/exception/InvalidInputException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidInputException.h 3 | * 4 | * Created on: May 1, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef INVALIDINPUTEXCEPTION_H_ 9 | #define INVALIDINPUTEXCEPTION_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace exception { 16 | class InvalidInputException { 17 | public: 18 | InvalidInputException(string); 19 | ~InvalidInputException(); 20 | }; 21 | } 22 | 23 | #endif /* INVALIDINPUTEXCEPTION_H_ */ 24 | -------------------------------------------------------------------------------- /src/exception/InvalidOperationException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidOperationException.cpp 3 | * 4 | * Created on: Dec 20, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include 9 | #include "InvalidOperationException.h" 10 | 11 | 12 | namespace exception { 13 | 14 | InvalidOperationException::InvalidOperationException(string msg) : std::runtime_error(msg) { 15 | cerr << "Invalid Operation Exception." << endl; 16 | cerr << what() << endl; 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/exception/InvalidOperationException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidOperationException.h 3 | * 4 | * Created on: Dec 20, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef INVALIDOPERATIONEXCEPTION_H_ 9 | #define INVALIDOPERATIONEXCEPTION_H_ 10 | 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | namespace exception { 17 | 18 | class InvalidOperationException : public std::runtime_error{ 19 | public: 20 | InvalidOperationException(string msg); 21 | //virtual ~InvalidOperationException(); 22 | }; 23 | 24 | } 25 | 26 | #endif /* INVALIDOPERATIONEXCEPTION_H_ */ 27 | -------------------------------------------------------------------------------- /src/exception/InvalidOrderOfOperationsException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidOrderOfOperationsException.cpp 3 | * 4 | * Created on: Apr 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "InvalidOrderOfOperationsException.h" 9 | 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | namespace exception{ 15 | 16 | InvalidOrderOfOperationsException::InvalidOrderOfOperationsException(string massage) { 17 | cerr << "Invalid Order Of Operations Exception" << endl; 18 | cerr << massage << endl; 19 | } 20 | 21 | InvalidOrderOfOperationsException::~InvalidOrderOfOperationsException() { 22 | // TODO Auto-generated destructor stub 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/exception/InvalidOrderOfOperationsException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidOrderOfOperationsException.h 3 | * 4 | * Created on: Apr 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef INVALIDORDEROFOPERATIONSEXCEPTION_H_ 9 | #define INVALIDORDEROFOPERATIONSEXCEPTION_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace exception{ 16 | class InvalidOrderOfOperationsException { 17 | public: 18 | InvalidOrderOfOperationsException(string); 19 | ~InvalidOrderOfOperationsException(); 20 | }; 21 | } 22 | 23 | #endif /* INVALIDORDEROFOPERATIONSEXCEPTION_H_ */ 24 | -------------------------------------------------------------------------------- /src/exception/InvalidScoreException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidScoreException.cpp 3 | * 4 | * Created on: Apr 27, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "InvalidScoreException.h" 9 | 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | namespace exception{ 15 | 16 | InvalidScoreException::InvalidScoreException(string massage) { 17 | cerr << "Invalid Score Exception." << endl; 18 | cerr << massage << endl; 19 | } 20 | 21 | InvalidScoreException::~InvalidScoreException() { 22 | // TODO Auto-generated destructor stub 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/exception/InvalidScoreException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidScoreException.h 3 | * 4 | * Created on: Apr 27, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef INVALIDSCOREEXCEPTION_H_ 9 | #define INVALIDSCOREEXCEPTION_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace exception{ 16 | class InvalidScoreException { 17 | public: 18 | InvalidScoreException(string); 19 | virtual ~InvalidScoreException(); 20 | }; 21 | } 22 | 23 | #endif /* INVALIDSCOREEXCEPTION_H_ */ 24 | -------------------------------------------------------------------------------- /src/exception/InvalidStateException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidStateException.cpp 3 | * 4 | * Created on: Aug 9, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include 9 | #include 10 | #include "InvalidStateException.h" 11 | 12 | using namespace std; 13 | 14 | 15 | namespace exception { 16 | InvalidStateException::InvalidStateException(string msg) : 17 | std::runtime_error(msg) { 18 | cerr << "Invalid State Exception." << endl; 19 | cerr << what() << endl; 20 | } 21 | } 22 | 23 | //InvalidStateException::~InvalidStateException() { 24 | // TODO Auto-generated destructor stub 25 | //} 26 | -------------------------------------------------------------------------------- /src/exception/InvalidStateException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * InvalidStateException.h 3 | * 4 | * Created on: Aug 9, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef INVALIDSTATEEXCEPTION_H_ 9 | #define INVALIDSTATEEXCEPTION_H_ 10 | 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | namespace exception { 17 | class InvalidStateException : public std::runtime_error{ 18 | public: 19 | InvalidStateException(string); 20 | }; 21 | } 22 | 23 | #endif /* INVALIDSTATEEXCEPTION_H_ */ 24 | -------------------------------------------------------------------------------- /src/nonltr/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioinformaticsToolsmith/LtrDetector/c22633037e5c6740098227df7a1b2ead1ef21ceb/src/nonltr/.DS_Store -------------------------------------------------------------------------------- /src/nonltr/.joseph_client.cpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioinformaticsToolsmith/LtrDetector/c22633037e5c6740098227df7a1b2ead1ef21ceb/src/nonltr/.joseph_client.cpp.swp -------------------------------------------------------------------------------- /src/nonltr/ChromDetector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromDetector.cpp 3 | * 4 | * Created on: Nov 8, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include 9 | 10 | #include "ChromDetector.h" 11 | #include "Detector.h" 12 | #include "../utility/Util.h" 13 | 14 | using namespace std; 15 | using namespace nonltr; 16 | using namespace utility; 17 | 18 | ChromDetector::ChromDetector(double s, double w, double pDelta, double b, 19 | double mDelta, vector * scores, 20 | const vector *> * segmentList) { 21 | 22 | regions = new vector *>(); 23 | 24 | for (int i = 0; i < segmentList->size(); i++) { 25 | Detector * detector = new Detector(segmentList->at(i)->at(0), 26 | segmentList->at(i)->at(1), s, w, pDelta, b, mDelta, scores); 27 | vector *> * segRegions = detector->getRegions(); 28 | regions->insert(regions->end(), segRegions->begin(), segRegions->end()); 29 | delete detector; 30 | } 31 | } 32 | 33 | ChromDetector::~ChromDetector() { 34 | Util::deleteInVector(regions); 35 | regions->clear(); 36 | delete regions; 37 | } 38 | 39 | vector *> * ChromDetector::getRegions() { 40 | return regions; 41 | } 42 | -------------------------------------------------------------------------------- /src/nonltr/ChromDetector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromDetector.h 3 | * 4 | * Created on: Nov 8, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef CHROMDETECTOR_H_ 9 | #define CHROMDETECTOR_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace nonltr{ 16 | class ChromDetector { 17 | 18 | private: 19 | vector *> * regions; 20 | 21 | public: 22 | ChromDetector(double, double, double, double, double, vector *, 23 | const vector *> *); 24 | virtual ~ChromDetector(); 25 | vector *> * getRegions(); 26 | }; 27 | } 28 | 29 | #endif /* CHROMDETECTOR_H_ */ 30 | -------------------------------------------------------------------------------- /src/nonltr/ChromDetectorMaxima.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromDetectorMaxima.cpp 3 | * 4 | * Created on: Jun 6, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "ChromDetectorMaxima.h" 9 | 10 | namespace nonltr { 11 | 12 | ChromDetectorMaxima::ChromDetectorMaxima(double s, double w, double m, 13 | double t, double p, int e, vector * oScores, 14 | ChromosomeOneDigit * chrom) { 15 | header = chrom->getHeader(); 16 | start(s, w, m, t, p, e, oScores, chrom->getSegment()); 17 | 18 | } 19 | 20 | ChromDetectorMaxima::ChromDetectorMaxima(double s, double w, double m, 21 | double t, double p, int e, vector * oScores, const vector *> * segmentList) { 23 | header = string("chrUnknown"); 24 | start(s, w, m, t, p, e, oScores, segmentList); 25 | } 26 | 27 | void ChromDetectorMaxima::start(double s, double w, double m, double t, 28 | double p, int e, vector * oScores, 29 | const vector *> * segmentList) { 30 | 31 | regionList = new vector (); 32 | 33 | int segmentCount = segmentList->size(); 34 | for (int i = 0; i < segmentCount; i++) { 35 | int segStart = segmentList->at(i)->at(0); 36 | int segEnd = segmentList->at(i)->at(1); 37 | 38 | // The effective length is shorter than the actual length by 2w 39 | int effLen = 2 * w + 10; 40 | int segLen = segEnd - segStart + 1; 41 | 42 | if (segLen > effLen) { 43 | DetectorMaxima * detector = new DetectorMaxima(segStart, segEnd, s, 44 | w, m, t, p, e, oScores); 45 | 46 | const vector * segRegions = detector->getRegionList(); 47 | int segRegionCount = segRegions->size(); 48 | for (int h = 0; h < segRegionCount; h++) { 49 | regionList->push_back(new Location(*(segRegions->at(h)))); 50 | } 51 | 52 | delete detector; 53 | } else { 54 | cout << "\tSkipping a short segment: "; 55 | cout << segStart << "-" << segEnd << endl; 56 | } 57 | } 58 | } 59 | 60 | ChromDetectorMaxima::~ChromDetectorMaxima() { 61 | Util::deleteInVector(regionList); 62 | regionList->clear(); 63 | delete regionList; 64 | } 65 | 66 | void ChromDetectorMaxima::printIndex(string outputFile) { 67 | printIndex(outputFile, false); 68 | } 69 | 70 | void ChromDetectorMaxima::printIndex(string outputFile, bool canAppend) { 71 | ofstream outIndex; 72 | 73 | if (canAppend) { 74 | outIndex.open(outputFile.c_str(), ios::out | ios::app); 75 | } else { 76 | outIndex.open(outputFile.c_str(), ios::out); 77 | } 78 | 79 | // Write the index of the repeat segment [x,y[ 80 | for (int j = 0; j < regionList->size(); j++) { 81 | outIndex << header << ":"; 82 | outIndex << ((int) (regionList->at(j)->getStart())) << "-"; 83 | outIndex << ((int) (regionList->at(j)->getEnd() + 1)) << " "; 84 | outIndex << endl; 85 | } 86 | 87 | outIndex.close(); 88 | } 89 | 90 | const vector* ChromDetectorMaxima::getRegionList() const { 91 | return regionList; 92 | } 93 | 94 | } /* namespace nonltr */ 95 | -------------------------------------------------------------------------------- /src/nonltr/ChromDetectorMaxima.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromDetectorMaxima.h 3 | * 4 | * Created on: Jun 6, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef CHROMDETECTORMAXIMA_H_ 9 | #define CHROMDETECTORMAXIMA_H_ 10 | 11 | #include 12 | #include 13 | 14 | #include "ChromosomeOneDigit.h" 15 | #include "DetectorMaxima.h" 16 | 17 | #include "../utility/Util.h" 18 | #include "../utility/ILocation.h" 19 | #include "../utility/Location.h" 20 | 21 | using namespace std; 22 | using namespace utility; 23 | 24 | namespace nonltr { 25 | 26 | class ChromDetectorMaxima { 27 | private: 28 | vector * regionList; 29 | string header; 30 | 31 | void start(double, double, double, double, double, int, vector *, 32 | const vector *> *); 33 | 34 | public: 35 | ChromDetectorMaxima(double, double, double, double, double, int, 36 | vector *, ChromosomeOneDigit *); 37 | ChromDetectorMaxima(double, double, double, double, double, int, 38 | vector *, const vector *> *); 39 | virtual ~ChromDetectorMaxima(); 40 | const vector* getRegionList() const; 41 | void printIndex(string); 42 | void printIndex(string, bool); 43 | 44 | }; 45 | 46 | } /* namespace nonltr */ 47 | #endif /* CHROMDETECTORMAXIMA_H_ */ 48 | -------------------------------------------------------------------------------- /src/nonltr/ChromListMaker.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromListMaker.cpp 3 | * 4 | * Created on: Mar 13, 2014 5 | * Author: Hani Zakaira Girgis 6 | * Modified by Alfredo Velasco II 7 | */ 8 | 9 | #include "ChromListMaker.h" 10 | 11 | namespace nonltr { 12 | 13 | ChromListMaker::ChromListMaker(string seqFileIn) { 14 | seqFile = seqFileIn; 15 | chromList = new vector(); 16 | chromOList = new vector(); 17 | chromSplitMap = new unordered_map>(); 18 | chromOSplitMap = 19 | new unordered_map>(); 20 | limit = 0; 21 | passedLimit = false; 22 | } 23 | 24 | ChromListMaker::ChromListMaker(string seqFileIn, int limitIn) { 25 | seqFile = seqFileIn; 26 | chromList = new vector(); 27 | chromOList = new vector(); 28 | chromSplitMap = new unordered_map>(); 29 | chromOSplitMap = 30 | new unordered_map>(); 31 | 32 | limit = limitIn; 33 | if (limit < 1) { 34 | cerr << "Limit must be positive!" << endl; 35 | cerr << "`" << limit << "`" << " is invalid." << endl; 36 | throw std::exception(); 37 | } 38 | passedLimit = false; 39 | } 40 | 41 | ChromListMaker::~ChromListMaker() { 42 | Util::deleteInVector(chromList); 43 | delete chromList; 44 | Util::deleteInVector(chromOList); 45 | delete chromOList; 46 | delete chromSplitMap; 47 | delete chromOSplitMap; 48 | } 49 | 50 | const vector * ChromListMaker::makeChromList() { 51 | ifstream in(seqFile.c_str()); 52 | bool isFirst = true; 53 | bool passedLimit = false; 54 | Chromosome * chrom; 55 | int seqLen = 0; 56 | int seqIndex = 0; 57 | while (in.good()) { 58 | string line; 59 | getline(in, line); 60 | 61 | if (line[0] == '>') { 62 | if (!isFirst) { 63 | if (passedLimit) { 64 | passedLimit = false; 65 | } 66 | pair splitRegion(chrom->getHeader(), seqIndex); 67 | seqLen = 0; 68 | seqIndex = 0; 69 | chrom->finalize(); 70 | if (chrom->getEffectiveSize() > 0) { 71 | chromList->push_back(chrom); 72 | chromSplitMap->emplace(chrom, splitRegion); 73 | } else { 74 | delete chrom; 75 | } 76 | } else { 77 | isFirst = false; 78 | } 79 | 80 | chrom = new Chromosome(); 81 | chrom->setHeader(line); 82 | } else { 83 | seqLen += line.size(); 84 | 85 | if (limit && seqLen > limit) { 86 | passedLimit = true; 87 | while (limit && seqLen > limit) { 88 | const string * base = chrom->getBase(); 89 | string lineSubStr = line.substr(0, 90 | line.size() - seqLen + limit); 91 | line = line.substr(line.size() - seqLen + limit); 92 | 93 | chrom->appendToSequence(lineSubStr); 94 | string oldHeader = chrom->getHeader(); 95 | int baseSize = chrom->getBase()->size(); 96 | 97 | pair splitRegion(chrom->getHeader(), seqIndex); 98 | 99 | chrom->finalize(); 100 | 101 | if (chrom->getEffectiveSize() > 0 102 | || chrom->getSegment()->size() != 0) { 103 | chromList->push_back(chrom); 104 | chromSplitMap->emplace(chrom, splitRegion); 105 | } else { 106 | delete chrom; 107 | } 108 | chrom = new Chromosome(); 109 | chrom->setHeader(oldHeader); 110 | seqLen = line.size(); 111 | seqIndex += baseSize; 112 | } 113 | if (line.size() > 0) { 114 | chrom->appendToSequence(line); 115 | } 116 | 117 | } else { 118 | chrom->appendToSequence(line); 119 | } 120 | } 121 | line.clear(); 122 | } 123 | if (passedLimit) { 124 | passedLimit = false; 125 | } 126 | pair splitRegion(chrom->getHeader(), seqIndex); 127 | chrom->finalize(); 128 | 129 | if (chrom->getEffectiveSize() > 0 || chrom->getSegment()->size() != 0) { 130 | chromList->push_back(chrom); 131 | chromSplitMap->emplace(chrom, splitRegion); 132 | } else { 133 | delete chrom; 134 | } 135 | 136 | in.close(); 137 | 138 | return chromList; 139 | } 140 | 141 | 142 | const vector * ChromListMaker::makeChromOneDigitList() { 143 | ifstream in(seqFile.c_str()); 144 | bool isFirst = true; 145 | 146 | ChromosomeOneDigit * chrom; 147 | int seqLen = 0; 148 | int seqIndex = 0; 149 | while (in.good()) { 150 | string line; 151 | getline(in, line); 152 | if (line[0] == '>') { 153 | if (!isFirst) { 154 | if (passedLimit) { 155 | passedLimit = false; 156 | } 157 | 158 | pair splitRegion(chrom->getHeader(), seqIndex); 159 | seqLen = 0; 160 | seqIndex = 0; 161 | chrom->finalize(); 162 | if (chrom->getEffectiveSize() > 0) { 163 | chromOList->push_back(chrom); 164 | chromOSplitMap->emplace(chrom, splitRegion); 165 | } else { 166 | delete chrom; 167 | } 168 | } else { 169 | isFirst = false; 170 | } 171 | 172 | chrom = new ChromosomeOneDigit(); 173 | chrom->setHeader(line); 174 | } else { 175 | seqLen += line.size(); 176 | if (limit && seqLen > limit) { 177 | 178 | passedLimit = true; 179 | while (limit && seqLen > limit) { 180 | const string * base = chrom->getBase(); 181 | string lineSubStr = line.substr(0, 182 | line.size() - seqLen + limit); 183 | line = line.substr(line.size() - seqLen + limit); 184 | 185 | chrom->appendToSequence(lineSubStr); 186 | 187 | string oldHeader = chrom->getHeader(); 188 | 189 | pair splitRegion(chrom->getHeader(), seqIndex); 190 | chrom->finalize(); 191 | int baseSize = chrom->getBase()->size(); 192 | 193 | if (chrom->getEffectiveSize() > 0 194 | || chrom->getSegment()->size() != 0) { 195 | chromOList->push_back(chrom); 196 | chromOSplitMap->emplace(chrom, splitRegion); 197 | } else { 198 | delete chrom; 199 | } 200 | 201 | chrom = new ChromosomeOneDigit(); 202 | chrom->setHeader(oldHeader); 203 | seqLen = line.size(); 204 | seqIndex += baseSize; 205 | } 206 | 207 | if (line.size() > 0) { 208 | chrom->appendToSequence(line); 209 | } 210 | 211 | } else { 212 | chrom->appendToSequence(line); 213 | } 214 | } 215 | } 216 | if (passedLimit) { 217 | passedLimit = false; 218 | } 219 | pair splitRegion(chrom->getHeader(), seqIndex); 220 | chrom->finalize(); 221 | 222 | if (chrom->getEffectiveSize() > 0 || chrom->getSegment()->size() != 0) { 223 | chromOList->push_back(chrom); 224 | chromOSplitMap->emplace(chrom, splitRegion); 225 | } else { 226 | delete chrom; 227 | } 228 | 229 | in.close(); 230 | return chromOList; 231 | } 232 | 233 | pair ChromListMaker::getStartOfChromosome(Chromosome * chrom) { 234 | if (chromSplitMap->find(chrom) != chromSplitMap->end()) { 235 | return chromSplitMap->at(chrom); 236 | } else { 237 | cerr << "Chromosome is not in the list!" << endl; 238 | throw std::exception(); 239 | } 240 | } 241 | pair ChromListMaker::getStartOfChromosome( 242 | ChromosomeOneDigit * chrom) { 243 | if (chromOSplitMap->find(chrom) != chromOSplitMap->end()) { 244 | return chromOSplitMap->at(chrom); 245 | } else { 246 | cerr << "Chromosome is not in the list!" << endl; 247 | throw std::exception(); 248 | } 249 | } 250 | 251 | } 252 | /* namespace nonltr */ 253 | -------------------------------------------------------------------------------- /src/nonltr/ChromListMaker.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromListMaker.h 3 | * 4 | * Created on: Mar 13, 2014 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef CHROMLISTMAKER_H_ 9 | #define CHROMLISTMAKER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "Chromosome.h" 16 | #include "ChromosomeOneDigit.h" 17 | 18 | #include "../utility/Util.h" 19 | 20 | using namespace std; 21 | using namespace utility; 22 | 23 | namespace nonltr { 24 | 25 | class ChromListMaker { 26 | private: 27 | vector * chromList; 28 | vector * chromOList; 29 | unordered_map> * chromSplitMap; 30 | unordered_map> * chromOSplitMap; 31 | string seqFile; 32 | int limit; 33 | bool passedLimit; 34 | 35 | public: 36 | ChromListMaker(string); 37 | ChromListMaker(string, int); 38 | virtual ~ChromListMaker(); 39 | const vector * makeChromList(); 40 | const vector * makeChromOneDigitList(); 41 | pair getStartOfChromosome(Chromosome *); 42 | pair getStartOfChromosome(ChromosomeOneDigit *); 43 | }; 44 | 45 | } /* namespace nonltr */ 46 | #endif /* CHROMLISTMAKER_H_ */ 47 | -------------------------------------------------------------------------------- /src/nonltr/Chromosome.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Chromosome.h 3 | * 4 | * Created on: Mar 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH 6 | */ 7 | #ifndef CHROMOSOME_H_ 8 | #define CHROMOSOME_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "IChromosome.h" 17 | #include "../exception/InvalidOperationException.h" 18 | #include "../exception/InvalidInputException.h" 19 | #include "../utility/Util.h" 20 | 21 | using namespace std; 22 | using namespace nonltr; 23 | using namespace utility; 24 | using namespace exception; 25 | 26 | namespace nonltr { 27 | class Chromosome: public IChromosome { 28 | public: 29 | Chromosome(); 30 | Chromosome(string); 31 | Chromosome(string, bool); 32 | Chromosome(string, int); 33 | Chromosome(string, int, int); 34 | Chromosome(string &, string&); 35 | Chromosome(string &, string&, int); 36 | 37 | int getGcContent(); 38 | 39 | virtual ~Chromosome(); 40 | 41 | virtual string& getBaseRef(); 42 | virtual string& getHeaderRef(); 43 | 44 | virtual const string* getBase(); 45 | virtual const vector *> * getSegment(); 46 | virtual void printSegmentList(); 47 | virtual string getHeader(); 48 | virtual int size(); 49 | virtual int getEffectiveSize(); 50 | virtual void setHeader(string&); 51 | virtual void setSequence(string&); 52 | virtual void appendToSequence(string&); 53 | virtual void finalize(); 54 | virtual vector * getBaseCount(); 55 | 56 | protected: 57 | string chromFile; 58 | string header; 59 | string base; 60 | int effectiveSize; 61 | int segLength; 62 | 63 | vector *> * segment; 64 | void readFasta(); 65 | void readFasta(int); 66 | void toUpperCase(); 67 | void removeN(); 68 | void mergeSegments(); 69 | virtual void help(int, bool); 70 | void makeSegmentList(); 71 | void calculateEffectiveSize(); 72 | 73 | private: 74 | bool isHeaderReady; 75 | bool isBaseReady; 76 | bool isFinalized; 77 | bool canClean = false; 78 | 79 | void reverseSegments(); 80 | void makeBaseCount(); 81 | vector * baseCount; 82 | }; 83 | } 84 | 85 | #endif /* CHROMOSOME_H_ */ 86 | -------------------------------------------------------------------------------- /src/nonltr/ChromosomeOneDigit.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromosomeOneDigit.cpp 3 | * 4 | * Created on: Jul 31, 2012 5 | * Author: Hani Zakaria Girgis, PhD at the NCB1/NLM/NIH 6 | * A A 7 | * T T 8 | * G G 9 | * C C 10 | * R G or A 11 | * Y T or C 12 | * M A or C 13 | * K G or T 14 | * S G or C 15 | * W A or T 16 | * H A or C or T 17 | * B G or T or C 18 | * V G or C or A 19 | * D G or T or A 20 | * N G or T or A or C 21 | */ 22 | #include 23 | #include 24 | 25 | #include "Chromosome.h" 26 | #include "ChromosomeOneDigit.h" 27 | #include "../exception/InvalidInputException.h" 28 | 29 | using namespace exception; 30 | 31 | namespace nonltr { 32 | 33 | ChromosomeOneDigit::ChromosomeOneDigit() : 34 | Chromosome() { 35 | } 36 | 37 | ChromosomeOneDigit::ChromosomeOneDigit(string fileName) : 38 | Chromosome(fileName) { 39 | help(); 40 | } 41 | 42 | ChromosomeOneDigit::ChromosomeOneDigit(string fileName, int segmentLength, 43 | int maxLength) : 44 | Chromosome(fileName, segmentLength, maxLength) { 45 | help(); 46 | } 47 | 48 | ChromosomeOneDigit::ChromosomeOneDigit(string& seq, string& info) : 49 | Chromosome(seq, info) { 50 | help(); 51 | } 52 | 53 | ChromosomeOneDigit::ChromosomeOneDigit(string& seq, string& info, int length) : 54 | Chromosome(seq, info, length) { 55 | help(); 56 | } 57 | 58 | void ChromosomeOneDigit::help() { 59 | // Build codes 60 | buildCodes(); 61 | // Modify the sequence in the super class 62 | encodeNucleotides(); 63 | } 64 | 65 | void ChromosomeOneDigit::finalize() { 66 | Chromosome::finalize(); 67 | help(); 68 | } 69 | 70 | void ChromosomeOneDigit::buildCodes() { 71 | // Can delete the codes 72 | canClean = true; 73 | 74 | // Make map 75 | codes = new map(); 76 | 77 | // Certain nucleotides 78 | codes->insert(map::value_type('A', (char) 0)); 79 | codes->insert(map::value_type('C', (char) 1)); 80 | codes->insert(map::value_type('G', (char) 2)); 81 | codes->insert(map::value_type('T', (char) 3)); 82 | 83 | // Common uncertain nucleotide 84 | // codes->insert(map::value_type('N', (char) 4)); 85 | 86 | // Uncertain nucleotides 87 | codes->insert(map::value_type('R', codes->at('G'))); 88 | codes->insert(map::value_type('Y', codes->at('C'))); 89 | codes->insert(map::value_type('M', codes->at('A'))); 90 | codes->insert(map::value_type('K', codes->at('T'))); 91 | codes->insert(map::value_type('S', codes->at('G'))); 92 | codes->insert(map::value_type('W', codes->at('T'))); 93 | codes->insert(map::value_type('H', codes->at('C'))); 94 | codes->insert(map::value_type('B', codes->at('T'))); 95 | codes->insert(map::value_type('V', codes->at('A'))); 96 | codes->insert(map::value_type('D', codes->at('T'))); 97 | codes->insert(map::value_type('N', codes->at('C'))); 98 | codes->insert(map::value_type('X', codes->at('G'))); 99 | } 100 | 101 | ChromosomeOneDigit::~ChromosomeOneDigit() { 102 | if (canClean) { 103 | codes->clear(); 104 | delete codes; 105 | } 106 | } 107 | 108 | /** 109 | * This method converts nucleotides in the segments to single digit codes 110 | */ 111 | void ChromosomeOneDigit::encodeNucleotides() { 112 | 113 | for (int s = 0; s < segment->size(); s++) { 114 | int segStart = segment->at(s)->at(0); 115 | int segEnd = segment->at(s)->at(1); 116 | for (int i = segStart; i <= segEnd; i++) { 117 | 118 | if (codes->count(base[i]) > 0) { 119 | base[i] = codes->at(base[i]); 120 | } else { 121 | string msg = "Invalid nucleotide: "; 122 | msg.append(1, base[i]); 123 | throw InvalidInputException(msg); 124 | } 125 | } 126 | } 127 | 128 | // Digitize skipped segments 129 | int segNum = segment->size(); 130 | if (segNum > 0) { 131 | // The first interval - before the first segment 132 | int segStart = 0; 133 | int segEnd = segment->at(0)->at(0) - 1; 134 | 135 | for (int s = 0; s <= segNum; s++) { 136 | for (int i = segStart; i <= segEnd; i++) { 137 | char c = base[i]; 138 | 139 | if (c != 'N') { 140 | if (codes->count(c) > 0) { 141 | base[i] = codes->at(c); 142 | } else { 143 | string msg = "Invalid nucleotide: "; 144 | msg.append(1, c); 145 | throw InvalidInputException(msg); 146 | } 147 | } 148 | } 149 | 150 | // The regular intervals between two segments 151 | if (s < segNum - 1) { 152 | segStart = segment->at(s)->at(1) + 1; 153 | segEnd = segment->at(s + 1)->at(0) - 1; 154 | } 155 | // The last interval - after the last segment 156 | else if (s == segNum - 1) { 157 | segStart = segment->at(s)->at(1) + 1; 158 | segEnd = base.size() - 1; 159 | } 160 | } 161 | } 162 | } 163 | 164 | /** 165 | * Cannot be called on already finalized object. 166 | */ 167 | void ChromosomeOneDigit::makeR() { 168 | makeReverse(); 169 | reverseSegments(); 170 | } 171 | 172 | /** 173 | * Cannot be called on already finalized object. 174 | */ 175 | void ChromosomeOneDigit::makeRC() { 176 | makeComplement(); 177 | makeReverse(); 178 | reverseSegments(); 179 | } 180 | 181 | void ChromosomeOneDigit::makeComplement() { 182 | map complement; 183 | 184 | // Certain nucleotides 185 | complement.insert(map::value_type((char) 0, (char) 3)); 186 | complement.insert(map::value_type((char) 1, (char) 2)); 187 | complement.insert(map::value_type((char) 2, (char) 1)); 188 | complement.insert(map::value_type((char) 3, (char) 0)); 189 | 190 | // Unknown nucleotide 191 | complement.insert(map::value_type('N', 'N')); 192 | 193 | // Convert a sequence to its complement 194 | int seqLen = base.size(); 195 | for (int i = 0; i < seqLen; i++) { 196 | if (complement.count(base[i]) > 0) { 197 | base[i] = complement.at(base[i]); 198 | } else { 199 | cerr << "Error: The digit " << (char) base[i]; 200 | cerr << " does not represent a base." << endl; 201 | exit(2); 202 | } 203 | } 204 | } 205 | 206 | void ChromosomeOneDigit::makeReverse() { 207 | int last = base.size() - 1; 208 | 209 | // Last index to be switched 210 | int middle = base.size() / 2; 211 | 212 | for (int i = 0; i < middle; i++) { 213 | char temp = base[last - i]; 214 | base[last - i] = base[i]; 215 | base[i] = temp; 216 | } 217 | } 218 | 219 | void ChromosomeOneDigit::reverseSegments() { 220 | int segNum = segment->size(); 221 | int lastBase = size() - 1; 222 | 223 | // Calculate the coordinate on the main strand 224 | for (int i = 0; i < segNum; i++) { 225 | vector * seg = segment->at(i); 226 | 227 | int s = lastBase - seg->at(1); 228 | int e = lastBase - seg->at(0); 229 | seg->clear(); 230 | seg->push_back(s); 231 | seg->push_back(e); 232 | } 233 | 234 | // Reverse the regions within the list 235 | int lastRegion = segNum - 1; 236 | int middle = segNum / 2; 237 | for (int i = 0; i < middle; i++) { 238 | vector * temp = segment->at(lastRegion - i); 239 | (*segment)[lastRegion - i] = segment->at(i); 240 | (*segment)[i] = temp; 241 | } 242 | } 243 | 244 | } 245 | -------------------------------------------------------------------------------- /src/nonltr/ChromosomeOneDigit.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromosomeOneDigit.h 3 | * 4 | * Created on: Jul 31, 2012 5 | * Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH 6 | */ 7 | 8 | #ifndef CHROMOSOMEONEDIGIT_H_ 9 | #define CHROMOSOMEONEDIGIT_H_ 10 | 11 | #include 12 | #include "Chromosome.h" 13 | 14 | namespace nonltr { 15 | class ChromosomeOneDigit: public Chromosome { 16 | 17 | private: 18 | bool canClean = false; 19 | 20 | /* Fields */ 21 | map * codes; 22 | 23 | /* Methods */ 24 | void help(); 25 | void buildCodes(); 26 | void encodeNucleotides(); 27 | 28 | void makeReverse(); 29 | void makeComplement(); 30 | void reverseSegments(); 31 | 32 | public: 33 | /* Methods */ 34 | ChromosomeOneDigit(); 35 | ChromosomeOneDigit(string); 36 | ChromosomeOneDigit(string, int, int); 37 | ChromosomeOneDigit(string&, string&); 38 | ChromosomeOneDigit(string&, string&, int); 39 | virtual ~ChromosomeOneDigit(); 40 | virtual void finalize(); 41 | 42 | void makeR(); 43 | void makeRC(); 44 | }; 45 | } 46 | 47 | #endif /* CHROMOSOMEONEDIGIT_H_ */ 48 | -------------------------------------------------------------------------------- /src/nonltr/ChromosomeRandom.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ChromosomeRandom.h 3 | * 4 | * Created on: Feb 4, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef CHROMOSOMERANDOM_H_ 9 | #define CHROMOSOMERANDOM_H_ 10 | 11 | #include 12 | 13 | #include "IChromosome.h" 14 | 15 | namespace nonltr { 16 | 17 | class ChromosomeRandom: public nonltr::IChromosome { 18 | // Key-value pair type. 19 | typedef map::value_type valType; 20 | 21 | private: 22 | int n; 23 | char unread; 24 | IChromosome * oChrom; 25 | vector * alpha; 26 | map * table; 27 | string * rBase; 28 | vector * keyList; 29 | map * codes; 30 | 31 | void fillKeyList(); 32 | void initializeTable(); 33 | void countWords(); 34 | void convertToProbabilities(); 35 | void printTable(); 36 | void generateRandomSequence(); 37 | 38 | public: 39 | ChromosomeRandom(int, IChromosome*, char, vector*); 40 | virtual ~ChromosomeRandom(); 41 | 42 | virtual const string* getBase(); 43 | virtual const vector *> * getSegment(); 44 | virtual string getHeader(); 45 | virtual void printSequence(string); 46 | void printSequence(string, string *); 47 | void printEffectiveSequence(string); 48 | }; 49 | 50 | } /* namespace nonltr */ 51 | #endif /* CHROMOSOMERANDOM_H_ */ 52 | -------------------------------------------------------------------------------- /src/nonltr/DetectorMaxima.h: -------------------------------------------------------------------------------- 1 | /* 2 | * DetectorMaxima.h 3 | * 4 | * Created on: May 31, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef DETECTORMAXIMA_H_ 9 | #define DETECTORMAXIMA_H_ 10 | 11 | #include 12 | #include 13 | 14 | #include "../utility/ILocation.h" 15 | 16 | using namespace std; 17 | using namespace utility; 18 | 19 | namespace nonltr { 20 | 21 | class DetectorMaxima { 22 | private: 23 | 24 | int segStart; 25 | int segEnd; 26 | double s; 27 | double w; 28 | double m; 29 | double t; 30 | double p; 31 | int e; 32 | int halfS; 33 | 34 | vector * oScores; 35 | vector * scores; 36 | vector * mask; 37 | vector * first; 38 | vector * second; 39 | vector * maxima; 40 | // vector *> * allMaxima; 41 | 42 | vector * separatorList; 43 | vector * regionList; 44 | 45 | void makeMask(); 46 | void smooth(); 47 | void deriveFirst(); 48 | void deriveSecond(); 49 | void findMaxima(); 50 | 51 | void findSeparators(); 52 | void findRegions(); 53 | 54 | void extendRegions(); 55 | 56 | int countLessThan(vector *, int, int, double); 57 | 58 | /** 59 | * Credit: http://stackoverflow.com/questions/554204/where-is-round-in-c 60 | */ 61 | inline double round(double number) { 62 | return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5); 63 | } 64 | 65 | public: 66 | DetectorMaxima(int, int, double, double, double, double, double, int, 67 | vector *); 68 | virtual ~DetectorMaxima(); 69 | const vector* getRegionList() const; 70 | const vector* getFirst() const; 71 | const vector* getSecond() const; 72 | 73 | // const vector *>* getAllMaxima() const; 74 | }; 75 | 76 | } /* namespace nonltr */ 77 | #endif /* DETECTORMAXIMA_H_ */ 78 | -------------------------------------------------------------------------------- /src/nonltr/EnrichmentMarkovView.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * EnrichmentMarkovView.cpp 3 | * 4 | * Created on: Apr 17, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | namespace nonltr { 9 | 10 | /** 11 | * The Markov order. It start at 0. 12 | */ 13 | template 14 | EnrichmentMarkovView::EnrichmentMarkovView(int k, int order, int m) : 15 | minObs(m), factor(10000.00), KmerHashTable(k) { 16 | initialize(order); 17 | } 18 | 19 | template 20 | EnrichmentMarkovView::EnrichmentMarkovView(int k, V initValue, int order, 21 | int m) : 22 | minObs(m), factor(10000.00), KmerHashTable(k, initValue) { 23 | initialize(order); 24 | } 25 | 26 | template 27 | void EnrichmentMarkovView::initialize(int order) { 28 | // Test start 29 | // cout << "Testing: " << minObs << endl; 30 | // Test end 31 | 32 | o = order; 33 | if (o < 0) { 34 | string msg("The Markov order must be non-negative integer. "); 35 | msg.append("The invalid input is: "); 36 | msg.append(Util::int2string(o)); 37 | msg.append("."); 38 | throw InvalidInputException(msg); 39 | } 40 | 41 | if (o >= KmerHashTable::k) { 42 | string msg("The Markov order cannot be >= k (k-mer)."); 43 | throw InvalidInputException(msg); 44 | } 45 | 46 | l = 0; 47 | modelList = new vector *>(); 48 | 49 | for (int i = 1; i <= o + 1; i++) { 50 | modelList->push_back(new KmerHashTable(i)); 51 | } 52 | } 53 | 54 | template 55 | EnrichmentMarkovView::~EnrichmentMarkovView() { 56 | Util::deleteInVector(modelList); 57 | delete modelList; 58 | } 59 | 60 | /** 61 | * This method count words of size 1 to order+1 in the input sequence. 62 | * In other words, it updates the background tables. In addition, it 63 | * updates the length of the genome. 64 | * 65 | * sequence: is the input sequence. 66 | * start: the start index - inclosing. 67 | * end: the end index - inclosing. 68 | */ 69 | template 70 | void EnrichmentMarkovView::count(const char * sequence, int start, 71 | int end) { 72 | 73 | // Multiple by 2 if scanning the forward strand and its reverse complement 74 | // l = l + (2 * (end - start + 1)); 75 | l = l + (end - start + 1); 76 | 77 | int modelNumber = modelList->size(); 78 | for (int i = 0; i < modelNumber; i++) { 79 | KmerHashTable * t = modelList->at(i); 80 | t->wholesaleIncrement(sequence, start, end - i); 81 | } 82 | } 83 | 84 | /** 85 | * Normalize the count of words in each model. 86 | * Values stored in these models are multiplied by "factor." 87 | */ 88 | template 89 | void EnrichmentMarkovView::generateProbapilities() { 90 | int modelNumber = modelList->size(); 91 | 92 | for (int m = 0; m < modelNumber; m++) { 93 | KmerHashTable * t = modelList->at(m); 94 | int tSize = t->getMaxTableSize(); 95 | 96 | for (int i = 0; i < tSize; i += 4) { 97 | double sum = 0.0; 98 | 99 | for (int j = i; j < i + 4; j++) { 100 | sum += t->valueOf(j); 101 | } 102 | 103 | for (int j = i; j < i + 4; j++) { 104 | t->insert(j, round(factor * ((double) t->valueOf(j) / sum))); 105 | } 106 | } 107 | } 108 | } 109 | 110 | template 111 | void EnrichmentMarkovView::processTable() { 112 | char base = 4; 113 | int modelNumber = modelList->size(); 114 | 115 | // Make a zero in quaternary form as a string of length k. 116 | string q(""); 117 | for (int x = 0; x < KmerHashTable::k; x++) { 118 | q.append(1, 0); 119 | } 120 | 121 | double lowerP; 122 | double upperP; 123 | for (I y = 0; y < KmerHashTable::maxTableSize; y++) { 124 | if (y % 10000000 == 0) { 125 | cout << "Processing " << y << " keys out of " 126 | << KmerHashTable::maxTableSize; 127 | cout << endl; 128 | } 129 | 130 | const char * qc = q.c_str(); 131 | 132 | // Calculate the expected number of occurrences. 133 | 134 | // a. Calculate probability from lower order models. 135 | // Lower probabilities are the same for four consecutive words of length of k-1 136 | if (y % 4 == 0) { 137 | lowerP = 1.0; 138 | for (int m = 0; m < modelNumber - 1; m++) { 139 | KmerHashTable * oTable = modelList->at(m); 140 | lowerP *= (((double) oTable->valueOf(qc, 0)) / factor); 141 | } 142 | } 143 | 144 | // b. Calculate probability based on the specified order. 145 | KmerHashTable * oTable = modelList->at(modelNumber - 1); 146 | int resultsSize = KmerHashTable::k - o - 1; 147 | 148 | // Upper probabilities are the same for four consecutive words of length of k-1 149 | // The scanning of words or length corresponding to the highest order + 1 150 | // This step is not needed if k = o + 1, i.e. resultsSize = 0. 151 | if (y % 4 == 0) { 152 | if (resultsSize > 0) { 153 | //Initialize the elements of the vector invalid index 154 | vector results = vector(resultsSize, -987); 155 | oTable->wholesaleValueOf(qc, 0, resultsSize - 1, &results, 0); 156 | 157 | upperP = 1.0; 158 | for (int i = 0; i < resultsSize; i++) { 159 | upperP *= (((double) results.at(i)) / factor); 160 | } 161 | results.clear(); 162 | 163 | } else { 164 | upperP = 1.0; 165 | } 166 | } 167 | 168 | // The expected number of occurances 169 | double exp = l * lowerP * upperP 170 | * (((double) oTable->valueOf(qc, resultsSize)) / factor); 171 | 172 | // Calculate the enrichment value. 173 | // Log value 174 | // values[y] = round((log((double) values[y] + 1.0) - log(exp + 1.0))); 175 | 176 | // Raw value 177 | // Requirement: if observed is >= 5 && observed > expected then the value is the difference 178 | // otherwise the value is zero 179 | 180 | V observed = KmerHashTable::values[y]; 181 | 182 | if (observed >= minObs && observed > exp) { 183 | 184 | KmerHashTable::values[y] = round(observed - exp); 185 | } else { 186 | KmerHashTable::values[y] = 0; 187 | } 188 | 189 | /* 190 | KmerHashTable::values[y] = 191 | round( 192 | (((double) KmerHashTable::values[y] + 1.0) 193 | / (exp + 1.0))); 194 | */ 195 | 196 | // Increment the quaternary number: 197 | // 1 - guard against overflow. 198 | if (q[0] == base - 1) { 199 | string z(""); 200 | z.append(1, 0); 201 | q = z + q; 202 | } 203 | 204 | // 2 - increment the quaternary number by 1. 205 | int qLen = q.size(); 206 | for (int i = qLen - 1; i >= 0; i--) { 207 | if (q[i] + 1 < base) { 208 | q[i] = q[i] + 1; 209 | break; 210 | } else { 211 | q[i] = 0; 212 | } 213 | } 214 | } 215 | } 216 | 217 | } /* namespace nonltr */ 218 | -------------------------------------------------------------------------------- /src/nonltr/EnrichmentMarkovView.h: -------------------------------------------------------------------------------- 1 | /* 2 | * EnrichmentMarkovView.h 3 | * 4 | * Created on: Apr 17, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ENRICHMENTMARKOVVIEW_H_ 9 | #define ENRICHMENTMARKOVVIEW_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "KmerHashTable.h" 16 | #include "../utility/Util.h" 17 | #include "../exception/InvalidInputException.h" 18 | 19 | using namespace std; 20 | using namespace utility; 21 | using namespace exception; 22 | 23 | namespace nonltr { 24 | 25 | template 26 | class EnrichmentMarkovView: public KmerHashTable{ 27 | 28 | private: 29 | // The minimum number of the observed k-mers 30 | const int minObs; 31 | 32 | // This template specification should work up to order of 14, 33 | // i.e. word length = 15 34 | vector *> * modelList; 35 | 36 | // Markov order 37 | int o; 38 | 39 | // Total length 40 | long l; 41 | 42 | // Multiplied the probability of word by this factor 43 | // Equivalent to four decimal points. 44 | const double factor; // = 10000.00; 45 | 46 | // Initialize data members 47 | void initialize(int); 48 | 49 | /** 50 | * Credit: http://stackoverflow.com/questions/554204/where-is-round-in-c 51 | */ 52 | inline double round(double number) { 53 | return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5); 54 | } 55 | 56 | public: 57 | EnrichmentMarkovView(int, int, int); 58 | EnrichmentMarkovView(int, V, int, int); 59 | virtual ~EnrichmentMarkovView(); 60 | 61 | void count(const char *, int, int); 62 | void generateProbapilities(); 63 | void processTable(); 64 | }; 65 | } /* namespace nonltr */ 66 | 67 | #include "EnrichmentMarkovView.cpp" 68 | 69 | #endif /* ENRICHMENTMARKOVVIEW_H_ */ 70 | -------------------------------------------------------------------------------- /src/nonltr/HMM.h: -------------------------------------------------------------------------------- 1 | /* 2 | * HMM.h 3 | * 4 | * Created on: Jun 21, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef HMM_H_ 9 | #define HMM_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "../utility/ILocation.h" 17 | 18 | using namespace std; 19 | using namespace utility; 20 | 21 | namespace nonltr { 22 | 23 | class HMM { 24 | private: 25 | const int PRECISION; 26 | double minusInf; 27 | vector * pList; 28 | vector *> * tList; 29 | vector * oList; 30 | 31 | void initializeHelper(); 32 | // Returns the index of the last candidate in the segment 33 | int trainHelper1(int, int, int); 34 | void trainHelper2(int, int, int, int); 35 | void trainPositive(int, int); 36 | void trainNegative(int, int); 37 | void move(int, int); 38 | void checkBase(double); 39 | 40 | /* 41 | inline int getPstvState(int score) { 42 | int state = ceil(log(score) / logBase); 43 | if (state < 0) { 44 | state = 0; 45 | } 46 | return state; 47 | } 48 | 49 | inline int getNgtvState(int score) { 50 | int state = ceil(log(score) / logBase); 51 | if (state < 0) { 52 | state = 0; 53 | } 54 | return state + positiveStateNumber; 55 | } 56 | */ 57 | 58 | inline int getPstvState(int index) { 59 | int state = scoreList->at(index); 60 | return state; 61 | } 62 | 63 | inline int getNgtvState(int index) { 64 | int state = scoreList->at(index); 65 | return state + positiveStateNumber; 66 | } 67 | 68 | protected: 69 | double base; 70 | double logBase; 71 | int stateNumber; 72 | int positiveStateNumber; 73 | 74 | vector * scoreList; 75 | const vector *> * segmentList; 76 | const vector * candidateList; 77 | 78 | void initialize(double, int); 79 | /** 80 | * Credit: http://stackoverflow.com/questions/554204/where-is-round-in-c 81 | */ 82 | inline double round(double number) { 83 | return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5); 84 | } 85 | 86 | public: 87 | HMM(string); // Build a model from file 88 | HMM(double, int); 89 | // HMM(vector *, const vector *> *, 90 | // const vector *, double); 91 | virtual ~HMM(); 92 | void train(vector *, const vector *> *, const vector *); 93 | void normalize(); 94 | double decode(int, int, vector *, vector&); 95 | double decode(int, int, vector *, vector&); 96 | int getPositiveStateNumber(); 97 | void print(); 98 | void print(string); 99 | double getBase(); 100 | }; 101 | 102 | } /* namespace nonltr */ 103 | #endif /* HMM_H_ */ 104 | -------------------------------------------------------------------------------- /src/nonltr/IChromosome.h: -------------------------------------------------------------------------------- 1 | /* 2 | * IChromosome.h 3 | * 4 | * Created on: Feb 4, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ICHROMOSOME_H_ 9 | #define ICHROMOSOME_H_ 10 | 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | namespace nonltr { 17 | 18 | class IChromosome { 19 | public: 20 | //IChromosome(); 21 | //virtual ~IChromosome(); 22 | virtual const string* getBase() = 0; 23 | virtual const vector *> * getSegment() = 0; 24 | virtual string getHeader() = 0; 25 | }; 26 | 27 | } /* namespace tr */ 28 | #endif /* ICHROMOSOME_H_ */ 29 | -------------------------------------------------------------------------------- /src/nonltr/ITableView.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ITableView.h 3 | * 4 | * Created on: Aug 9, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ITABLEVIEW_H_ 9 | #define ITABLEVIEW_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace nonltr { 16 | 17 | template 18 | class ITableView { 19 | public: 20 | virtual V valueOf(const char*) = 0 ; 21 | virtual V valueOf(const char*, int) = 0; 22 | virtual V valueOf(I) = 0; 23 | 24 | virtual int getK() = 0; 25 | virtual I getMaxTableSize() = 0; 26 | virtual const V * getValues() const = 0; 27 | 28 | virtual void wholesaleValueOf(const char *, int, int, vector *) = 0; 29 | virtual void wholesaleValueOf(const char *, int, int, vector *, int) = 0; 30 | }; 31 | 32 | } 33 | 34 | #endif /* ITABLEVIEW_H_ */ 35 | -------------------------------------------------------------------------------- /src/nonltr/KmerHashTable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * KmerHashTable.h 3 | * 4 | * Created on: Jul 25, 2012 5 | * Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH 6 | */ 7 | 8 | #ifndef KMERHASHTABLE_H_ 9 | #define KMERHASHTABLE_H_ 10 | 11 | #include 12 | #include 13 | #include "ITableView.h" 14 | 15 | using namespace std; 16 | using namespace nonltr; 17 | 18 | namespace nonltr { 19 | 20 | template 21 | class KmerHashTable: public ITableView { 22 | 23 | protected: 24 | /* Fields */ 25 | static const int maxKeyLength = 15; 26 | int k; 27 | 28 | 29 | I maxTableSize; 30 | 31 | // The hashed values, i.e. the values of the hash table. 32 | // The index is the 4ry representation of the key 33 | V * values; 34 | V initialValue; 35 | 36 | private: 37 | // [4^0, 4^1, ... , 4^(k-1)] 38 | I * bases; 39 | I * mMinusOne; 40 | void initialize(int, V); 41 | 42 | public: 43 | /* Methods */ 44 | KmerHashTable(int); 45 | KmerHashTable(int, V); 46 | 47 | virtual ~KmerHashTable(); 48 | 49 | I hash(const char *); 50 | I hash(const char *, int); 51 | void hash(const char *, int, int, vector *); 52 | 53 | void insert(const char*, V); 54 | void insert(const char*, int, V); 55 | void insert(I, V); 56 | 57 | void increment(const char*); 58 | void increment(const char*, int); 59 | void wholesaleIncrement(const char*, int, int); 60 | 61 | void addReverseComplement(); 62 | I countNonInitialEntries(); 63 | void getKeys(vector& keys); 64 | void printTable(string); 65 | void checkOverflow(); 66 | 67 | /*Vritual methods from ITableView*/ 68 | virtual V valueOf(const char*); 69 | virtual V valueOf(const char*, int); 70 | virtual V valueOf(I); 71 | virtual void wholesaleValueOf(const char *, int, int, vector *); 72 | virtual void wholesaleValueOf(const char *, int, int, vector *, int); 73 | 74 | virtual int getK(); 75 | virtual I getMaxTableSize(); 76 | virtual V getMaxValue(); 77 | virtual const V * getValues() const; 78 | }; 79 | } 80 | 81 | #include "KmerHashTable.cpp" 82 | 83 | #endif /* KMERHASHTABLE_H_ */ 84 | -------------------------------------------------------------------------------- /src/nonltr/LocationList.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * LocationList.cpp 3 | * 4 | * Created on: Feb 19, 2015 5 | * Author: Hani Zakaria Girgis, PhD 6 | * 7 | * 8 | * An instance of this class holds a list of merged locations. 9 | */ 10 | 11 | #include "LocationList.h" 12 | 13 | namespace nonltr { 14 | 15 | LocationList::LocationList(string chromNameIn) { 16 | chromName = chromNameIn; 17 | regionList = new vector(); 18 | merge(); 19 | } 20 | 21 | LocationList::~LocationList() { 22 | Util::deleteInVector(regionList); 23 | delete regionList; 24 | } 25 | 26 | void LocationList::add(int start, int end) { 27 | regionList->push_back(new Location(start, end)); 28 | } 29 | 30 | void LocationList::merge() { 31 | int regionCount = regionList->size(); 32 | int gg = 0; 33 | while (gg < regionCount) { 34 | ILocation * region = regionList->at(gg); 35 | 36 | int regionStart = region->getStart(); 37 | int regionEnd = region->getEnd(); 38 | 39 | if (gg > 0) { 40 | ILocation * pRegion = regionList->at(gg - 1); 41 | int pStart = pRegion->getStart(); 42 | int pEnd = pRegion->getEnd(); 43 | 44 | if (Util::isOverlapping(pStart, pEnd, regionStart, regionEnd)) { 45 | pRegion->setEnd(regionEnd > pEnd ? regionEnd : pEnd); 46 | regionList->erase(regionList->begin() + gg); 47 | delete region; 48 | regionCount = regionList->size(); 49 | } else { 50 | gg++; 51 | } 52 | } 53 | 54 | if (gg == 0) { 55 | gg++; 56 | } 57 | } 58 | } 59 | 60 | void LocationList::mergeWithAnotherList( 61 | const vector * const otherList) { 62 | //A pre-condition: Ensure that the other list is sorted 63 | for (int h = 1; h < otherList->size(); h++) { 64 | if (otherList->at(h)->getStart() < otherList->at(h - 1)->getStart()) { 65 | throw InvalidStateException( 66 | string("LocationList - The other list is not sorted.")); 67 | } 68 | } 69 | 70 | // Start 71 | vector * mergedList = new vector(); 72 | 73 | int i = 0; 74 | int j = 0; 75 | int iLimit = regionList->size(); 76 | int jLimit = otherList->size(); 77 | 78 | // Continue until one list is finished 79 | while (i < iLimit && j < jLimit) { 80 | ILocation * iLoc = regionList->at(i); 81 | ILocation * jLoc = otherList->at(j); 82 | 83 | if (iLoc->getStart() < jLoc->getStart()) { 84 | mergedList->push_back(iLoc); 85 | i++; 86 | } else { 87 | mergedList->push_back(new Location(*jLoc)); 88 | j++; 89 | } 90 | } 91 | 92 | // Once one list is finished, copy the rest of the other list 93 | if (i == iLimit) { 94 | for (; j < jLimit; j++) { 95 | mergedList->push_back(new Location(*(otherList->at(j)))); 96 | } 97 | } else if (j == jLimit) { 98 | for (; i < iLimit; i++) { 99 | mergedList->push_back(regionList->at(i)); 100 | } 101 | } 102 | 103 | // Once done 104 | // Util::deleteInVector(regionList); 105 | regionList->clear(); // Need to test this line 106 | delete regionList; 107 | regionList = mergedList; 108 | 109 | merge(); 110 | 111 | //A post-condition: Ensure that the list is sorted 112 | for (int h = 1; h < regionList->size(); h++) { 113 | if (regionList->at(h)->getStart() < regionList->at(h - 1)->getStart()) { 114 | throw InvalidStateException(string("This list is not sorted.")); 115 | } 116 | } 117 | } 118 | 119 | void LocationList::print() { 120 | cout << endl << chromName << endl; 121 | for (int i = 0; i < regionList->size(); i++) { 122 | int s = regionList->at(i)->getStart(); 123 | int e = regionList->at(i)->getEnd(); 124 | cout << s << "-" << e << endl; 125 | } 126 | } 127 | 128 | const vector * LocationList::getList() { 129 | return regionList; 130 | } 131 | 132 | void LocationList::convertToRedFormat() { 133 | trim(1); 134 | } 135 | 136 | void LocationList::trim(int x) { 137 | for (int i = regionList->size() - 1; i >= 0; i--) { 138 | ILocation * region = regionList->at(i); 139 | int start = region->getStart(); 140 | int newEnd = region->getEnd() - x; 141 | 142 | if (newEnd < 0 || start > newEnd) { 143 | regionList->erase(regionList->begin() + i); 144 | delete region; 145 | } else { 146 | region->setEnd(newEnd); 147 | } 148 | } 149 | } 150 | 151 | } 152 | 153 | /* namespace nonltr */ 154 | -------------------------------------------------------------------------------- /src/nonltr/LocationList.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LocationList.h 3 | * 4 | * Created on: Feb 19, 2015 5 | * Author: Hani Z. Girgis, PhD 6 | */ 7 | 8 | #ifndef SRC_NONLTR_LOCATIONLIST_H_ 9 | #define SRC_NONLTR_LOCATIONLIST_H_ 10 | 11 | #include 12 | #include "../utility/Util.h" 13 | #include "../utility/ILocation.h" 14 | #include "../utility/Location.h" 15 | #include "../exception/InvalidStateException.h" 16 | 17 | using namespace std; 18 | using namespace utility; 19 | using namespace exception; 20 | 21 | namespace nonltr { 22 | 23 | class LocationList { 24 | private: 25 | string chromName; 26 | vector * regionList; 27 | void merge(); 28 | 29 | public: 30 | LocationList(string); 31 | virtual ~LocationList(); 32 | 33 | void add(int, int); 34 | 35 | /** 36 | * Take a sorted list 37 | */ 38 | void mergeWithAnotherList(const vector * const); 39 | 40 | 41 | /** 42 | * Print locations 43 | */ 44 | void print(); 45 | 46 | const vector * getList(); 47 | void convertToRedFormat(); 48 | void trim(int ); 49 | }; 50 | 51 | } /* namespace nonltr */ 52 | 53 | #endif /* SRC_NONLTR_LOCATIONLIST_H_ */ 54 | -------------------------------------------------------------------------------- /src/nonltr/LocationListCollection.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * LocationListCollection.cpp 3 | * 4 | * Created on: Feb 19, 2015 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "LocationListCollection.h" 9 | 10 | namespace nonltr { 11 | 12 | LocationListCollection::LocationListCollection(string fileNameIn) { 13 | fileName = fileNameIn; 14 | collection = new map(); 15 | readCoordinates(); 16 | } 17 | 18 | LocationListCollection::~LocationListCollection() { 19 | collection->clear(); 20 | delete collection; 21 | } 22 | 23 | void LocationListCollection::readCoordinates() { 24 | Util::checkFile(fileName); 25 | 26 | ifstream in(fileName.c_str()); 27 | LocationList * locList; 28 | string previousChromName(""); 29 | 30 | while (in.good()) { 31 | string line; 32 | getline(in, line); 33 | 34 | if (line.compare(string("")) != 0) { 35 | int colIndex = line.find_last_of(':'); 36 | int dashIndex = line.find_last_of('-'); 37 | 38 | string chromName = line.substr(0, colIndex); 39 | 40 | if (previousChromName.compare(chromName) != 0) { 41 | 42 | cout << "Processing regions of " << chromName << endl; 43 | 44 | locList = new LocationList(chromName); 45 | collection->insert( 46 | map::value_type(chromName, 47 | locList)); 48 | 49 | previousChromName = chromName; 50 | } 51 | 52 | int start = 53 | atoi( 54 | line.substr(colIndex + 1, dashIndex - colIndex - 1).c_str()); 55 | int end = atoi(line.substr(dashIndex + 1).c_str()); 56 | locList->add(start, end); 57 | } 58 | } 59 | 60 | in.close(); 61 | } 62 | 63 | void LocationListCollection::print() { 64 | map::iterator itr_s = collection->begin(); 65 | map::iterator itr_e = collection->end(); 66 | while (itr_s != itr_e) { 67 | collection->at(itr_s->first)->print(); 68 | ++itr_s; 69 | } 70 | } 71 | 72 | LocationList * const LocationListCollection::getLocationList(string header) { 73 | if (collection->count(header) == 0) { 74 | string msg("Regions of "); 75 | msg.append(header); 76 | msg.append(" cannot be found.\n"); 77 | throw InvalidStateException(msg); 78 | } 79 | 80 | return collection->at(header); 81 | } 82 | 83 | void LocationListCollection::convertToRedFormat() { 84 | map::iterator itr_s = collection->begin(); 85 | map::iterator itr_e = collection->end(); 86 | while (itr_s != itr_e) { 87 | collection->at(itr_s->first)->convertToRedFormat(); 88 | ++itr_s; 89 | } 90 | } 91 | 92 | void LocationListCollection::trim(int x) { 93 | map::iterator itr_s = collection->begin(); 94 | map::iterator itr_e = collection->end(); 95 | while (itr_s != itr_e) { 96 | collection->at(itr_s->first)->trim(x); 97 | ++itr_s; 98 | } 99 | } 100 | 101 | } /* namespace nonltr */ 102 | -------------------------------------------------------------------------------- /src/nonltr/LocationListCollection.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LocationListCollection.h 3 | * 4 | * Created on: Feb 19, 2015 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef SRC_NONLTR_LOCATIONLISTCOLLECTION_H_ 9 | #define SRC_NONLTR_LOCATIONLISTCOLLECTION_H_ 10 | 11 | #include 12 | #include 13 | 14 | #include "LocationList.h" 15 | #include "../utility/Util.h" 16 | #include "../exception/InvalidStateException.h" 17 | 18 | using namespace std; 19 | using namespace utility; 20 | 21 | namespace nonltr { 22 | 23 | class LocationListCollection { 24 | 25 | private: 26 | string fileName; 27 | map * collection; 28 | void readCoordinates(); 29 | 30 | public: 31 | LocationListCollection(string); 32 | virtual ~LocationListCollection(); 33 | LocationList * const getLocationList(string); 34 | void print(); 35 | void convertToRedFormat(); 36 | void trim(int ); 37 | }; 38 | 39 | } /* namespace nonltr */ 40 | 41 | #endif /* SRC_NONLTR_LOCATIONLISTCOLLECTION_H_ */ 42 | -------------------------------------------------------------------------------- /src/nonltr/Scanner.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Scanner.h 3 | * 4 | * Created on: Aug 19, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef SCANNER_H_ 9 | #define SCANNER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "Chromosome.h" 16 | #include "ChromosomeOneDigit.h" 17 | #include "HMM.h" 18 | #include "ITableView.h" 19 | #include "Scorer.h" 20 | #include "../utility/Util.h" 21 | #include "../utility/ILocation.h" 22 | #include "../utility/Location.h" 23 | #include "../exception/InvalidInputException.h" 24 | #include "../exception/InvalidStateException.h" 25 | #include "../exception/FileDoesNotExistException.h" 26 | #include "../exception/InvalidOperationException.h" 27 | 28 | using namespace std; 29 | using namespace utility; 30 | using namespace exception; 31 | 32 | namespace nonltr { 33 | 34 | class Scanner { 35 | private: 36 | //string chromFile; 37 | ChromosomeOneDigit * chrom; 38 | const vector *> * segmentList; 39 | Scorer * scorer; 40 | vector * scoreList; 41 | vector * regionList; 42 | int k; 43 | HMM * hmm; 44 | // bool isTrainMode; 45 | 46 | // Methods 47 | void start(); 48 | void check(); 49 | void decode(); 50 | void extendByK(); 51 | int extendByKHelper(int, int, int); 52 | void merge(); 53 | 54 | public: 55 | static const int FRMT_POS = 1; 56 | static const int FRMT_BED = 2; 57 | 58 | Scanner(HMM *, int, ChromosomeOneDigit *, string); 59 | Scanner(HMM *, int, ChromosomeOneDigit *, ITableView *); 60 | virtual ~Scanner(); 61 | void makeForwardCoordinates(); 62 | 63 | void printScores(string, bool); 64 | void printIndex(string, bool, int); 65 | void printMasked(string, Chromosome&, bool); 66 | void mergeWithOtherRegions(const vector *); 67 | const vector* getRegionList(); 68 | unsigned int getTotalRegionLength(); 69 | }; 70 | 71 | } /* namespace nonltr */ 72 | #endif /* SCANNER_H_ */ 73 | -------------------------------------------------------------------------------- /src/nonltr/Scanner.h~: -------------------------------------------------------------------------------- 1 | /* 2 | * Scanner.h 3 | * 4 | * Created on: Aug 19, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef SCANNER_H_ 9 | #define SCANNER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "Chromosome.h" 16 | #include "ChromosomeOneDigit.h" 17 | #include "HMM.h" 18 | #include "ITableView.h" 19 | #include "Scorer.h" 20 | #include "../utility/Util.h" 21 | #include "../utility/ILocation.h" 22 | #include "../utility/Location.h" 23 | #include "../exception/InvalidInputException.h" 24 | #include "../exception/InvalidStateException.h" 25 | #include "../exception/FileDoesNotExistException.h" 26 | #include "../exception/InvalidOperationException.h" 27 | 28 | using namespace std; 29 | using namespace utility; 30 | using namespace exception; 31 | 32 | namespace nonltr { 33 | 34 | class Scanner { 35 | private: 36 | //string chromFile; 37 | ChromosomeOneDigit * chrom; 38 | const vector *> * segmentList; 39 | Scorer * scorer; 40 | vector * scoreList; 41 | vector * regionList; 42 | int k; 43 | HMM * hmm; 44 | // bool isTrainMode; 45 | 46 | // Methods 47 | void start(); 48 | void check(); 49 | void decode(); 50 | void extendByK(); 51 | int extendByKHelper(int, int, int); 52 | void merge(); 53 | 54 | public: 55 | static const int FRMT_POS = 1; 56 | static const int FRMT_BED = 2; 57 | 58 | Scanner(HMM *, int, ChromosomeOneDigit *, string); 59 | Scanner(HMM *, int, ChromosomeOneDigit *, ITableView *); 60 | virtual ~Scanner(); 61 | void makeForwardCoordinates(); 62 | 63 | void printScores(string, bool); 64 | void printIndex(string, bool, int); 65 | void printMasked(string, Chromosome&, bool); 66 | void mergeWithOtherRegions(const vector *); 67 | const vector* getRegionList(); 68 | }; 69 | 70 | } /* namespace nonltr */ 71 | #endif /* SCANNER_H_ */ 72 | -------------------------------------------------------------------------------- /src/nonltr/Scorer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Scorer.cpp 3 | * 4 | * Created on: Aug 3, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | #include "Scorer.h" 8 | 9 | Scorer::Scorer(ChromosomeOneDigit * chromIn, 10 | ITableView * const table) { 11 | chrom = chromIn; 12 | kmerTable = table; 13 | scores = new vector(chrom->getBase()->size(), 0); 14 | k = kmerTable->getK(); 15 | max = -1; 16 | score(); 17 | calculateMax(); 18 | } 19 | 20 | Scorer::~Scorer() { 21 | scores->clear(); 22 | delete scores; 23 | } 24 | 25 | /** 26 | * This method scores each nucleotide in the chromosome. 27 | * The nucleotides represented by 'N' are assigned zero. 28 | */ 29 | void Scorer::score() { 30 | const vector *> * segment = chrom->getSegment(); 31 | const char * segBases = chrom->getBase()->c_str(); 32 | 33 | for (int s = 0; s < segment->size(); s++) { 34 | int start = segment->at(s)->at(0); 35 | int end = segment->at(s)->at(1); 36 | kmerTable->wholesaleValueOf(segBases, start, end - k + 1, scores, 37 | start); 38 | 39 | // Handle the last word from end - k + 2 till the end, inclusive. 40 | for (int i = end - k + 2; i <= end; i++) { 41 | (*scores)[i] = scores->at(i - 1); 42 | } 43 | } 44 | } 45 | 46 | /** 47 | * This method takes the logarithm of the scores according to the base. 48 | * If the score equals zero, it is left the same. 49 | */ 50 | void Scorer::takeLog(double base) { 51 | // Handle the case where base is one 52 | bool isOne = false; 53 | if (fabs(base - 1.0) < std::numeric_limits::epsilon()) { 54 | isOne = true; 55 | } 56 | double logBase = isOne ? log(1.5) : log(base); 57 | 58 | const vector *> * segment = chrom->getSegment(); 59 | for (int s = 0; s < segment->size(); s++) { 60 | int start = segment->at(s)->at(0); 61 | int end = segment->at(s)->at(1); 62 | for (int h = start; h <= end; h++) { 63 | int score = scores->at(h); 64 | 65 | if (score != 0) { 66 | if (!isOne || (isOne && score > 1)) { 67 | (*scores)[h] = ceil(log(score) / logBase); 68 | } 69 | } 70 | } 71 | } 72 | } 73 | 74 | int Scorer::getK() { 75 | return k; 76 | } 77 | 78 | vector* Scorer::getScores() { 79 | return scores; 80 | } 81 | 82 | void Scorer::printScores(string outputFile, bool canAppend) { 83 | ofstream outScores; 84 | if (canAppend) { 85 | outScores.open(outputFile.c_str(), ios::out | ios::app); 86 | } else { 87 | outScores.open(outputFile.c_str(), ios::out); 88 | } 89 | 90 | int step = 50; 91 | outScores << chrom->getHeader() << endl; 92 | int len = scores->size(); 93 | for (int i = 0; i < len; i = i + step) { 94 | int e = (i + step - 1 > len - 1) ? len - 1 : i + step - 1; 95 | for (int k = i; k <= e; k++) { 96 | outScores << scores->at(k) << " "; 97 | } 98 | outScores << endl; 99 | } 100 | outScores << endl; 101 | 102 | outScores.close(); 103 | } 104 | 105 | int Scorer::countLessOrEqual(int thr) { 106 | int count = 0; 107 | const vector *> * segment = chrom->getSegment(); 108 | for (int s = 0; s < segment->size(); s++) { 109 | int start = segment->at(s)->at(0); 110 | int end = segment->at(s)->at(1); 111 | for (int h = start; h <= end; h++) { 112 | if (scores->at(h) <= thr) { 113 | count++; 114 | } 115 | } 116 | } 117 | return count; 118 | } 119 | 120 | void Scorer::calculateMax() { 121 | const vector *> * segmentList = chrom->getSegment(); 122 | int segmentCount = segmentList->size(); 123 | for (int jj = 0; jj < segmentCount; jj++) { 124 | vector * segment = segmentList->at(jj); 125 | int start = segment->at(0); 126 | int end = segment->at(1); 127 | for (int ss = start; ss <= end; ss++) { 128 | int score = scores->at(ss); 129 | if (score > max) { 130 | max = score; 131 | } 132 | } 133 | } 134 | 135 | if (max == -1) { 136 | string msg("Error occurred while finding the maximum score."); 137 | throw InvalidStateException(msg); 138 | } 139 | } 140 | 141 | int Scorer::getMax() { 142 | return max; 143 | } 144 | -------------------------------------------------------------------------------- /src/nonltr/Scorer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Scorer.h 3 | * 4 | * Created on: Aug 3, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef SCORER_H_ 9 | #define SCORER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "ITableView.h" 18 | #include "ChromosomeOneDigit.h" 19 | #include "../utility/Util.h" 20 | #include "../exception/InvalidStateException.h" 21 | 22 | using namespace std; 23 | using namespace nonltr; 24 | using namespace utility; 25 | using namespace exception; 26 | 27 | namespace nonltr { 28 | class Scorer { 29 | private: 30 | /* Fields */ 31 | ChromosomeOneDigit * chrom; 32 | ITableView * kmerTable; 33 | vector * scores; 34 | int k; 35 | int max; 36 | 37 | /* Methods */ 38 | void score(); 39 | void calculateMax(); 40 | 41 | public: 42 | /* Methods */ 43 | Scorer(ChromosomeOneDigit *, ITableView *); 44 | virtual ~Scorer(); 45 | void printScores(string, bool); 46 | vector* getScores(); 47 | int getK(); 48 | void takeLog(double); 49 | int countLessOrEqual(int); 50 | int getMax(); 51 | }; 52 | } 53 | 54 | #endif /* Scorer_H_ */ 55 | -------------------------------------------------------------------------------- /src/nonltr/TableBuilder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TableBuilder.cpp 3 | * 4 | * Created on: Jul 31, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "TableBuilder.h" 9 | 10 | TableBuilder::TableBuilder(string dir, int motifSize, int order, int minObs) { 11 | genomeDir = dir; 12 | k = motifSize; 13 | genomeLength = 0; 14 | // kmerTable = new KmerHashTable(k); 15 | // kmerTable = new EnrichmentView(k); 16 | 17 | // Whenever you change the template, modify line 50 and 70 and the header file line 35 18 | kmerTable = new EnrichmentMarkovView(k, order, minObs); 19 | 20 | buildTable(); 21 | } 22 | 23 | TableBuilder::~TableBuilder() { 24 | delete kmerTable; 25 | } 26 | 27 | void TableBuilder::buildTable() { 28 | vector * fileList = new vector(); 29 | Util::readChromList(genomeDir, fileList, "fa"); 30 | 31 | for (int i = 0; i < fileList->size(); i++) { 32 | cout << "Counting k-mers in " << fileList->at(i) << " ..." << endl; 33 | ChromListMaker * maker = new ChromListMaker(fileList->at(i)); 34 | const vector * chromList = maker->makeChromOneDigitList(); 35 | 36 | for (int h = 0; h < chromList->size(); h++) { 37 | ChromosomeOneDigit * chrom = 38 | dynamic_cast(chromList->at(h)); 39 | if (chrom) { 40 | genomeLength += chrom->getEffectiveSize(); 41 | updateTable(chrom); 42 | } else { 43 | throw InvalidStateException(string("Dynamic cast failed.")); 44 | } 45 | } 46 | 47 | delete maker; 48 | } 49 | // Check if overflow has occurred 50 | kmerTable->checkOverflow(); 51 | 52 | // View 53 | // EnrichmentView * view = dynamic_cast(kmerTable); 54 | EnrichmentMarkovView * view = 55 | dynamic_cast *>(kmerTable); 56 | 57 | if (view) { 58 | view->generateProbapilities(); 59 | view->processTable(); 60 | maxValue = view->getMaxValue(); 61 | } else { 62 | throw InvalidStateException(string("Dynamic cast failed.")); 63 | } 64 | cout << "Enrichment view is ready." << endl; 65 | 66 | fileList->clear(); 67 | delete fileList; 68 | 69 | /* If you would like to see the contents of the table.*/ 70 | // kmerTable-> printTable(); 71 | } 72 | 73 | void TableBuilder::updateTable(ChromosomeOneDigit * chrom) { 74 | // EnrichmentView * view = dynamic_cast(kmerTable); 75 | EnrichmentMarkovView * view = 76 | dynamic_cast *>(kmerTable); 77 | 78 | const vector *> * segment = chrom->getSegment(); 79 | const char * segBases = chrom->getBase()->c_str(); 80 | 81 | for (int s = 0; s < segment->size(); s++) { 82 | int start = segment->at(s)->at(0); 83 | int end = segment->at(s)->at(1); 84 | // cerr << "The segment length is: " << (end-start+1) << endl; 85 | 86 | // Fast, but require some memory proportional to the segment length. 87 | kmerTable->wholesaleIncrement(segBases, start, end - k + 1); 88 | if (view) { 89 | view->count(segBases, start, end); 90 | } else { 91 | throw InvalidStateException(string("Dynamic cast failed.")); 92 | } 93 | 94 | // Slow, but memory efficient 95 | /* 96 | vector hashList = vector(); 97 | kmerTable->hash(segBases, start, end - k + 1, &hashList); 98 | 99 | for (int i = start; i <= end - k + 1; i++) { 100 | kmerTable->increment(segBases, i); 101 | } 102 | */ 103 | } 104 | } 105 | 106 | KmerHashTable * const TableBuilder::getKmerTable() { 107 | return kmerTable; 108 | } 109 | 110 | long TableBuilder::getGenomeLength() { 111 | if (genomeLength < 0) { 112 | string msg("The length of the genome cannot be negative."); 113 | throw InvalidStateException(msg); 114 | } 115 | 116 | return genomeLength; 117 | } 118 | 119 | int TableBuilder::getMaxValue() { 120 | return maxValue; 121 | } 122 | -------------------------------------------------------------------------------- /src/nonltr/TableBuilder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TableBuilder.h 3 | * 4 | * Created on: Jul 31, 2012 5 | * Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH 6 | */ 7 | 8 | #ifndef TABLEBUILDER_H_ 9 | #define TABLEBUILDER_H_ 10 | 11 | #include "KmerHashTable.h" 12 | #include "EnrichmentMarkovView.h" 13 | #include "ChromosomeOneDigit.h" 14 | #include "ChromListMaker.h" 15 | #include "IChromosome.h" 16 | 17 | #include "../utility/Util.h" 18 | #include "../exception/InvalidStateException.h" 19 | 20 | #include 21 | 22 | using namespace std; 23 | using namespace nonltr; 24 | using namespace utility; 25 | using namespace exception; 26 | 27 | namespace nonltr { 28 | class TableBuilder { 29 | private: 30 | /** 31 | * k-mer table 32 | */ 33 | KmerHashTable * kmerTable; 34 | int maxValue; 35 | 36 | /** 37 | * Directory including the FASTA files comprising the genome. 38 | * These files must have the 39 | */ 40 | string genomeDir; 41 | 42 | /** 43 | * The size of the motif 44 | */ 45 | int k; 46 | 47 | /** 48 | * The total length of the whole genome 49 | */ 50 | long genomeLength; 51 | 52 | /** 53 | * Methods 54 | */ 55 | void buildTable(); 56 | void updateTable(ChromosomeOneDigit *); 57 | 58 | public: 59 | TableBuilder(string, int, int, int); 60 | virtual ~TableBuilder(); 61 | KmerHashTable * const getKmerTable(); 62 | void printTable(); 63 | long getGenomeLength(); 64 | int getMaxValue(); 65 | }; 66 | } 67 | 68 | #endif /* TABLEBUILDER_H_ */ 69 | -------------------------------------------------------------------------------- /src/nonltr/Trainer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Trainer.h 3 | * 4 | * Created on: Aug 20, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TRAINER_H_ 9 | #define TRAINER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "TableBuilder.h" 19 | #include "KmerHashTable.h" 20 | #include "HMM.h" 21 | #include "ChromDetectorMaxima.h" 22 | #include "Scorer.h" 23 | #include "ChromListMaker.h" 24 | #include "LocationListCollection.h" 25 | #include "../utility/Util.h" 26 | #include "../exception/InvalidStateException.h" 27 | 28 | using namespace std; 29 | using namespace utility; 30 | using namespace exception; 31 | 32 | namespace nonltr { 33 | 34 | class Trainer { 35 | private: 36 | string genomeDir; 37 | string candidateDir; 38 | string otherDir; 39 | bool canPrintCandidates; 40 | vector * fileList; 41 | int chromCount; 42 | int order; 43 | int k; 44 | int max; // Maximum score in the entire genome 45 | double t; // Score threshold 46 | double tDetector; // threshold for the detector because it uses < not <=; 47 | double p; // Percentage of scores below the threshold, t, in non-repeats 48 | //double r; 49 | double s; // Half width of the mask 50 | long genomeLength; 51 | //vector * sampleList; 52 | TableBuilder * builder; 53 | KmerHashTable * table; 54 | HMM * hmm; 55 | int isCND; 56 | int isCON; 57 | // The minimum number of the observed k-mers 58 | const int minObs; 59 | 60 | void stage1(); 61 | void stage2(); 62 | void stage3(); 63 | //void stage4(); 64 | 65 | public: 66 | Trainer(string, int, int, double, double, string, int); 67 | Trainer(string, int, int, double, double, string, bool, string, int); 68 | Trainer(string, int, int, double, double, int); 69 | Trainer(string, int, int, double, double, bool, string, int); 70 | 71 | void initialize(string, int, int, double, double); 72 | virtual ~Trainer(); 73 | void printTable(string); 74 | void printHmm(string); 75 | HMM*& getHmm(); 76 | KmerHashTable * getTable(); 77 | 78 | }; 79 | 80 | } /* namespace nonltr */ 81 | #endif /* TRAINER_H_ */ 82 | -------------------------------------------------------------------------------- /src/nonltr/joseph_client.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Chromosome.h" 3 | using namespace std; 4 | 5 | int main(){ 6 | 7 | Chromosome my_chrom("dm6.fa.gz"); 8 | my_chrom.printSegmentList(); 9 | 10 | 11 | } -------------------------------------------------------------------------------- /src/test/AlignUtility.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include "../utility/GlobAlignE.h" 7 | 8 | using namespace std; 9 | using namespace utility; 10 | 11 | int main(int argc, char * argv []){ 12 | 13 | string first = string(argv[1]); 14 | const char * seq1 = first.c_str(); 15 | int start1 = 0; 16 | int end1 = first.length(); 17 | string second = string(argv[2]); 18 | 19 | const char * seq2 = second.c_str(); 20 | int start2 = 0; 21 | int end2 = second.length(); 22 | int match = atoi(argv[3]); 23 | int mismatch = atoi(argv[4]); 24 | int gapOpen = atoi(argv[5]); 25 | cout<getIdentity()< 3 | #include 4 | using namespace std; 5 | 6 | namespace tr{ 7 | 8 | Candidate::Candidate(int startIn,int endIn,int heightIn){ 9 | 10 | start = startIn; 11 | end = endIn; 12 | height = heightIn; 13 | } 14 | 15 | Candidate::~Candidate(){ 16 | 17 | } 18 | 19 | int Candidate::getStart(){ 20 | 21 | return start; 22 | } 23 | 24 | int Candidate::getEnd(){ 25 | return end; 26 | } 27 | 28 | int Candidate::getHeight(){ 29 | return height; 30 | } 31 | int Candidate::getAbsHeight(){ 32 | return abs(height); 33 | } 34 | 35 | void Candidate::printCandidate(){ 36 | cout<<"start: "< 12 | #include 13 | #include "BackwardTr.h" 14 | #include "ForwardTr.h" 15 | 16 | using namespace std; 17 | 18 | namespace tr { 19 | 20 | class DetectorTr { 21 | private: 22 | vector* scoreList; 23 | int initValue; 24 | int gap; 25 | 26 | vector * bList; 27 | vector * fList; 28 | 29 | void findTr(); 30 | void findTrHelper(int, vector *); 31 | void findMatch(int, int, int, int); 32 | void sortLastTr(); 33 | void matchBackwardTr(BackwardTr &); 34 | int checkMatch(int, int); 35 | void addToBList(BackwardTr *); 36 | 37 | public: 38 | DetectorTr(vector*, int); 39 | virtual ~DetectorTr(); 40 | vector * getBList(); 41 | }; 42 | 43 | } /* namespace tr */ 44 | #endif /* DETECTORTR_H_ */ 45 | -------------------------------------------------------------------------------- /src/tr/FilterTr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * FilterTr.h 3 | * 4 | * Created on: Dec 14, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef FILTERTR_H_ 9 | #define FILTERTR_H_ 10 | 11 | #include "BackwardTr.h" 12 | #include "LtrTe.h" 13 | 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | namespace tr { 20 | 21 | class FilterTr { 22 | private: 23 | const string * seq; 24 | const char * cSeq; 25 | vector * bList; 26 | vector * teList; 27 | string bedFileName; 28 | string name; 29 | int k; 30 | int init; 31 | int ltrLen; 32 | int ltrId; 33 | int ltrSep; 34 | int tsdW; 35 | int tsdT; 36 | // int tailW; 37 | int min; 38 | int max; 39 | int maxLtrLen; 40 | int minLtrLen; 41 | 42 | const int tailT = 12; 43 | 44 | bool canUseLtr; 45 | bool canUseSine; 46 | bool canUsePpt; 47 | bool canUseTsd; 48 | bool canUseLength; 49 | bool canUseDNA; 50 | 51 | void tightenBounds(); 52 | void adjust(); 53 | void filter(); 54 | void fillTeList(); 55 | void filterAcc2Ltr(); 56 | void filterAcc2Sine(); 57 | void filterAcc2Tsd(); 58 | void filterAcc2Ppt(); 59 | void filterAcc2Length(); 60 | void filterAcc2DNA(); 61 | 62 | int calculateTailWindow(double,int,int); 63 | void removeOverlaps(); 64 | 65 | public: 66 | FilterTr(string,const string *, vector *, int,int,int,int,int,int); 67 | virtual ~FilterTr(); 68 | vector * getTeList(); 69 | bool orderFunction(BackwardTr *, BackwardTr *); 70 | void bedFormat(int,int); 71 | void fullFormat(int,int); 72 | string convertNucleotides(string); 73 | 74 | 75 | }; 76 | 77 | } /* namespace tr */ 78 | #endif /* FILTERTR_H_ */ 79 | -------------------------------------------------------------------------------- /src/tr/ForwardTr.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ForwardTr.cpp 3 | * 4 | * Created on: Dec 12, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | #include "Tr.h" 8 | #include "ForwardTr.h" 9 | #include "BackwardTr.h" 10 | #include "../exception/InvalidStateException.h" 11 | #include "../exception/InvalidOperationException.h" 12 | #include "../utility/Util.h" 13 | 14 | #include 15 | using namespace std; 16 | 17 | using namespace exception; 18 | using namespace utility; 19 | 20 | namespace tr { 21 | 22 | ForwardTr::ForwardTr() : 23 | Tr() { 24 | 25 | } 26 | 27 | ForwardTr::ForwardTr(int s1In, int e1In, int s2In, int e2In) : 28 | Tr(s1In, e1In, s2In, e2In) { 29 | checkState(); 30 | } 31 | 32 | ForwardTr::~ForwardTr() { 33 | } 34 | 35 | ForwardTr::ForwardTr(ForwardTr& o) : 36 | Tr(o) { 37 | checkState(); 38 | } 39 | 40 | void ForwardTr::checkState() { 41 | if (s1 > s2) { 42 | string msg("Invalid direction: s2 must be greater than s1."); 43 | msg.append(" s1: "); 44 | msg.append(Util::int2string(s1)); 45 | msg.append(" s2: "); 46 | msg.append(Util::int2string(s2)); 47 | throw InvalidStateException(msg); 48 | } 49 | } 50 | 51 | void ForwardTr::initialize(int s1, int e1, int s2, int e2) { 52 | Tr::initialize(s1, e1, s2, e2); 53 | checkState(); 54 | } 55 | 56 | /** 57 | * f: is the other TR to be merged with this TR 58 | * r: is the resulting TR 59 | */ 60 | void ForwardTr::merge(ForwardTr * f, ForwardTr& r) { 61 | int s1f = f->getS1(); 62 | int e1f = f->getE1(); 63 | int s2f = f->getS2(); 64 | int e2f = f->getE2(); 65 | 66 | int s1n = (s1 < s1f) ? s1 : s1f; 67 | int e1n = (e1 > e1f) ? e1 : e1f; 68 | int s2n = (s2 < s2f) ? s2 : s2f; 69 | int e2n = (e2 > e2f) ? e2 : e2f; 70 | 71 | r.initialize(s1n, e1n, s2n, e2n); 72 | } 73 | 74 | /** 75 | * f: is the other TR to be merged with this TR 76 | * r: is the resulting TR 77 | */ 78 | void ForwardTr::merge(BackwardTr * b, ForwardTr& r) { 79 | int s1b = b->getS1(); 80 | int e1b = b->getE1(); 81 | int s2b = b->getS2(); 82 | int e2b = b->getE2(); 83 | 84 | int s1n = (s1 < s2b) ? s1 : s2b; 85 | int e1n = (e1 > e2b) ? e1 : e2b; 86 | int s2n = (s2 < s1b) ? s2 : s1b; 87 | int e2n = (e2 > e1b) ? e2 : e1b; 88 | 89 | r.initialize(s1n, e1n, s2n, e2n); 90 | } 91 | 92 | /** 93 | * r: is the flipped TR 94 | */ 95 | void ForwardTr::flip(BackwardTr& r) { 96 | r.initialize(s2, e2, s1, e1); 97 | } 98 | 99 | bool ForwardTr::isOverlapping(BackwardTr* b) { 100 | return isOverlappingOpposite(b); 101 | } 102 | 103 | bool ForwardTr::isOverlapping(ForwardTr* f) { 104 | return isOverlappingSame(f); 105 | } 106 | 107 | int ForwardTr::getStart() const { 108 | return s1; 109 | } 110 | 111 | int ForwardTr::getEnd() const { 112 | return e2; 113 | } 114 | 115 | void ForwardTr::setStart(int sIn) { 116 | string msg("Setting the start of a TR instance is not allowed."); 117 | throw InvalidOperationException(msg); 118 | } 119 | 120 | void ForwardTr::setEnd(int eIn) { 121 | string msg("Setting the end of a TR instance is not allowed."); 122 | throw InvalidOperationException(msg); 123 | } 124 | 125 | int ForwardTr::getLength() { 126 | return e2 - s1 + 1; 127 | } 128 | 129 | string ForwardTr::toString() { 130 | string msg("TwoLTRs "); 131 | msg.append(Util::int2string(getStart())); 132 | msg.append("-"); 133 | msg.append(Util::int2string(getEnd())); 134 | 135 | msg.append(" L_LTR "); 136 | msg.append(Util::int2string(s1)); 137 | msg.append("-"); 138 | msg.append(Util::int2string(e1)); 139 | 140 | //msg.append(" L_LTR_Len "); 141 | //msg.append(Util::int2string((*e1) - (*s1) + 1)); 142 | 143 | msg.append(" R_LTR "); 144 | msg.append(Util::int2string(s2)); 145 | msg.append("-"); 146 | msg.append(Util::int2string(e2)); 147 | 148 | //msg.append(" R_LTR_Len "); 149 | //msg.append(Util::int2string((*e2) - (*s2) + 1)); 150 | 151 | return msg; 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /src/tr/ForwardTr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ForwardTr.h 3 | * 4 | * Created on: Dec 12, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef FORWARDTR_H_ 9 | #define FORWARDTR_H_ 10 | 11 | #include "Tr.h" 12 | 13 | namespace tr { 14 | 15 | class BackwardTr; 16 | 17 | class ForwardTr: public tr::Tr { 18 | public: 19 | ForwardTr(); 20 | ForwardTr(int, int, int, int); 21 | virtual ~ForwardTr(); 22 | ForwardTr(ForwardTr&); 23 | 24 | void merge(BackwardTr *, ForwardTr&); 25 | void merge(ForwardTr *, ForwardTr&); 26 | void flip(BackwardTr&); 27 | 28 | bool isOverlapping(BackwardTr *); 29 | bool isOverlapping(ForwardTr *); 30 | 31 | virtual void initialize(int, int, int, int); 32 | 33 | virtual void checkState(); 34 | virtual int getEnd() const; 35 | virtual int getStart() const; 36 | virtual void setEnd(int); 37 | virtual void setStart(int); 38 | virtual int getLength(); 39 | virtual string toString(); 40 | }; 41 | 42 | } 43 | 44 | #endif /* FORWARDTR_H_ */ 45 | -------------------------------------------------------------------------------- /src/tr/ITrVisitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ITrVisitor.h 3 | * 4 | * Created on: Dec 14, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ITRVISITOR_H_ 9 | #define ITRVISITOR_H_ 10 | 11 | namespace tr { 12 | 13 | class Tr; 14 | 15 | class ITrVisitor { 16 | public: 17 | virtual void visit(Tr *) = 0; 18 | }; 19 | 20 | } /* namespace tr */ 21 | #endif /* ITRVISITOR_H_ */ 22 | -------------------------------------------------------------------------------- /src/tr/LtrTe.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * LtrTe.cpp 3 | * 4 | * Created on: Dec 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "LtrTe.h" 9 | #include "BackwardTr.h" 10 | 11 | #include "../exception/InvalidOperationException.h" 12 | #include "../exception/InvalidStateException.h" 13 | #include "../utility/Util.h" 14 | #include "../utility/ITail.h" 15 | #include "../utility/Tail.h" 16 | #include "../utility/EmptyTail.h" 17 | #include "../utility/EmptyTSD.h" 18 | #include "../utility/Util.h" 19 | 20 | using namespace utility; 21 | using namespace exception; 22 | 23 | namespace tr { 24 | 25 | LtrTe::LtrTe(BackwardTr * ltrIn, ITSD * tsdIn, ITail * pptIn) { 26 | initializer(ltrIn, tsdIn, pptIn); 27 | } 28 | 29 | LtrTe::LtrTe(LtrTe& copy) { 30 | initializer(copy.getLtr(), copy.getTsd(), copy.getPpt()); 31 | } 32 | 33 | LtrTe::LtrTe(LtrTe& another, int offset) { 34 | 35 | BackwardTr * ltr = another.getLtr(); 36 | int s1 = offset + ltr->getS1(); 37 | int e1 = offset + ltr->getE1(); 38 | int s2 = offset + ltr->getS2(); 39 | int e2 = offset + ltr->getE2(); 40 | BackwardTr * shiftedLtr = new BackwardTr(s1, e1, s2, e2); 41 | 42 | ITSD * tsd = another.getTsd(); 43 | 44 | ITSD * shiftedTSD = tsd; 45 | 46 | if(tsd != EmptyTSD::getInstance()){ 47 | 48 | shiftedTSD= new TSD(*tsd,offset); 49 | } 50 | 51 | ITail * ppt = another.getPpt(); 52 | 53 | ITail * shiftedTail = ppt; 54 | 55 | if(ppt!= EmptyTail::getInstance()){ 56 | 57 | shiftedTail= new Tail(*ppt,offset); 58 | } 59 | 60 | 61 | initializer(shiftedLtr,shiftedTSD,shiftedTail); 62 | delete shiftedLtr; 63 | 64 | if(tsd != EmptyTSD::getInstance()){ 65 | 66 | delete shiftedTSD; 67 | } 68 | 69 | if(ppt!= EmptyTail::getInstance()){ 70 | delete shiftedTail; 71 | } 72 | 73 | 74 | } 75 | 76 | void LtrTe::initializer(BackwardTr * ltrIn, ITSD * tsdIn, ITail * pptIn) { 77 | ltr = new BackwardTr(*ltrIn); 78 | tgCaMotif = EmptyLocation::getInstance(); 79 | 80 | setTsd(tsdIn); 81 | setPpt(pptIn); 82 | 83 | if (s > e) { 84 | string msg("The start of the TE must be <= its end. "); 85 | msg.append("The start is: "); 86 | msg.append(Util::int2string(s)); 87 | msg.append(" The end is: "); 88 | msg.append(Util::int2string(e)); 89 | msg.append("."); 90 | throw InvalidStateException(msg); 91 | } 92 | 93 | if (tsd != EmptyTSD::getInstance() 94 | && !(ltr->getStart() > s && ltr->getEnd() < e)) { 95 | string msg("The LTR must be within the TSD. "); 96 | msg.append("The start and the end of the TSD are: "); 97 | msg.append(Util::int2string(s)); 98 | msg.append(":"); 99 | msg.append(Util::int2string(e)); 100 | msg.append(". The start and the end of the LTR are: "); 101 | msg.append(Util::int2string(ltr->getStart())); 102 | msg.append(":"); 103 | msg.append(Util::int2string(ltr->getEnd())); 104 | msg.append("."); 105 | throw InvalidStateException(msg); 106 | } 107 | } 108 | 109 | LtrTe::~LtrTe() { 110 | delete ltr; 111 | 112 | if (tsd != EmptyTSD::getInstance()) { 113 | delete tsd; 114 | } 115 | 116 | if (ppt != EmptyTail::getInstance()) { 117 | delete ppt; 118 | } 119 | 120 | if(tgCaMotif != EmptyLocation::getInstance()){ 121 | delete tgCaMotif; 122 | } 123 | } 124 | 125 | int LtrTe::getStart() const { 126 | return s; 127 | } 128 | 129 | int LtrTe::getEnd() const { 130 | return e; 131 | } 132 | 133 | int LtrTe::getLength() { 134 | return e - s + 1; 135 | } 136 | 137 | BackwardTr * LtrTe::getLtr() { 138 | return ltr; 139 | } 140 | 141 | ITail * LtrTe::getPpt() { 142 | return ppt; 143 | } 144 | 145 | ITSD * LtrTe::getTsd() { 146 | return tsd; 147 | } 148 | 149 | void LtrTe::setNested(bool isNested){ 150 | 151 | nested = isNested; 152 | } 153 | 154 | bool LtrTe::getNested(){ 155 | 156 | return nested; 157 | } 158 | //11/9/17 changed to print in .bed format rather than .coor 159 | string LtrTe::toString(string header) { 160 | string msg(""); 161 | msg.append(header); 162 | msg.append(" "); 163 | msg.append(toStringHelper()); 164 | return msg; 165 | } 166 | 167 | string LtrTe::toString() { 168 | string msg("LTR_TE "); 169 | msg.append(toStringHelper()); 170 | return msg; 171 | } 172 | 173 | string LtrTe::toStringHelper() { 174 | string msg(Util::int2string(s)); 175 | msg.append(" "); 176 | msg.append(Util::int2string(e)); 177 | /*msg.append(" "); 178 | msg.append(ltr->toString()); 179 | msg.append(" "); 180 | msg.append(tsd->toString()); 181 | msg.append(" "); 182 | msg.append(ppt->toString());*/ 183 | return msg; 184 | } 185 | 186 | void LtrTe::setStart(int start) { 187 | string msg("Setting the start of an instance of LTR TE is not allowed."); 188 | throw InvalidOperationException(msg); 189 | } 190 | 191 | void LtrTe::setEnd(int end) { 192 | string msg("Setting the end of an instance of LTR TE is not allowed."); 193 | throw InvalidOperationException(msg); 194 | } 195 | 196 | void LtrTe::setPpt(ITail* pptIn) { 197 | if (pptIn == EmptyTail::getInstance()) { 198 | ppt = pptIn; 199 | } else { 200 | ppt = new Tail(*pptIn); 201 | } 202 | } 203 | 204 | void LtrTe::setTsd(ITSD* tsdIn) { 205 | if (tsdIn == EmptyTSD::getInstance()) { 206 | tsd = tsdIn; 207 | s = ltr->getStart(); 208 | e = ltr->getEnd(); 209 | 210 | } else { 211 | tsd = new TSD(*tsdIn); 212 | s = tsd->getLtTsd()->getEnd()+1; 213 | e = tsd->getRtTsd()->getStart()-1; 214 | //s = tsd->getLtTsd()->getStart(); 215 | //e = tsd->getRtTsd()->getEnd(); 216 | 217 | if(e<=s){ 218 | cerr<<"Error in setting TSD"<getStart() < b->getStart(); 227 | } 228 | 229 | bool LtrTe::getDeleted(){ 230 | return deleted; 231 | } 232 | 233 | void LtrTe::setDeleted(bool status){ 234 | deleted = status; 235 | } 236 | 237 | /* 238 | * Find the first TG in whole element 239 | * Find the last CA in the whole element 240 | * Author: Joseph Valencia and Hani Girgis 241 | */ 242 | ILocation * LtrTe::getTgCaMotif(const string* sequence){ 243 | 244 | int window = 20; 245 | string left =sequence->substr(s,window ); 246 | string right = sequence->substr(e-window+1,window); 247 | string leftWindow = Util::oneDigitToNuc(left); 248 | string rightWindow = Util::oneDigitToNuc(right); 249 | 250 | int startTG = leftWindow.find("TG"); 251 | int endCA = rightWindow.rfind("CA"); 252 | 253 | if(startTG != string::npos && endCA != string::npos){ 254 | tgCaMotif = new Location(startTG +s, endCA+e-window+1); 255 | } 256 | 257 | //cout<<"startTG "< answer = std::make_pair(startTG,endCA); 259 | return tgCaMotif; 260 | 261 | // Modify 262 | // If one of them is -1, return empty location 263 | // Otherwise, return location 264 | } 265 | 266 | } /* namespace tr */ 267 | -------------------------------------------------------------------------------- /src/tr/LtrTe.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LtrTe.h 3 | * 4 | * Created on: Dec 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef LTRTE_H_ 9 | #define LTRTE_H_ 10 | 11 | #include "BackwardTr.h" 12 | #include "../utility/ITSD.h" 13 | #include "../utility/TSD.h" 14 | #include "../utility/ITail.h" 15 | #include "../utility/EmptyLocation.h" 16 | 17 | using namespace utility; 18 | 19 | namespace tr { 20 | 21 | class LtrTe: public ILocation { 22 | private: 23 | BackwardTr * ltr; 24 | ITSD * tsd; 25 | ITail * ppt; 26 | int s; 27 | int e; 28 | void initializer(BackwardTr *, ITSD *, ITail *); 29 | string toStringHelper(); 30 | bool nested; 31 | bool deleted = false; 32 | ILocation * tgCaMotif; 33 | 34 | public: 35 | LtrTe(BackwardTr *, ITSD *, ITail *); 36 | LtrTe(LtrTe &); 37 | LtrTe(LtrTe &,int); 38 | virtual ~LtrTe(); 39 | 40 | virtual int getEnd() const; 41 | virtual int getStart() const; 42 | virtual void setEnd(int); 43 | virtual void setStart(int); 44 | virtual int getLength(); 45 | virtual string toString(); 46 | string toString(string); 47 | 48 | BackwardTr * getLtr(); 49 | ITail * getPpt(); 50 | ITSD * getTsd(); 51 | 52 | void setPpt(ITail *); 53 | void setTsd(ITSD *); 54 | 55 | bool getNested(); 56 | void setNested(bool); 57 | 58 | 59 | bool getDeleted(); 60 | void setDeleted(bool); 61 | 62 | static bool lessThan(LtrTe* a, LtrTe* b); 63 | ILocation * getTgCaMotif(const string* sequence); 64 | 65 | 66 | }; 67 | 68 | } /* namespace tr */ 69 | #endif /* LTRTE_H_ */ 70 | -------------------------------------------------------------------------------- /src/tr/MatchTr.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Created by Joseph V 3 | encia 21 February 2018 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "MatchTr.h" 15 | #include "ForwardTr.h" 16 | #include "Candidate.h" 17 | #include "PairContainer.h" 18 | #include "../utility/Util.h" 19 | #include "../exception/InvalidInputException.h" 20 | 21 | using namespace std; 22 | 23 | using namespace utility; 24 | using namespace exception; 25 | 26 | namespace tr{ 27 | 28 | MatchTr::MatchTr(vector * scoreListIn,int kIn, int initValueIn,int minIn, int maxIn, int ltrMinIn,int plateauLenIn, int gapTolIn, int id) { 29 | 30 | k = kIn; 31 | scoreList = scoreListIn; 32 | 33 | initValue = initValueIn; 34 | 35 | 36 | min = minIn; 37 | max = maxIn; 38 | ltrMin = ltrMinIn; 39 | 40 | minPlateauLen = plateauLenIn; 41 | diffThresh = gapTolIn; 42 | gapTol = gapTolIn; 43 | identity = id; 44 | 45 | bList = new vector(); 46 | 47 | cleanAndMerge(); 48 | 49 | 50 | } 51 | 52 | void MatchTr::cleanAndMerge(){ 53 | 54 | int len = scoreList->size(); 55 | 56 | 57 | for(int i = 0;iat(i); 60 | int next; 61 | 62 | if(curr != initValue){ 63 | 64 | int peakLength = 1; 65 | 66 | for(int j = i+1;jat(j); 68 | if(next == initValue){ 69 | break; 70 | } 71 | peakLength++; 72 | 73 | } 74 | if(peakLength < minPlateauLen){ 75 | 76 | std::tuple discard = make_tuple('D',i, i + peakLength); 77 | 78 | spikes.push_back(discard); 79 | 80 | } 81 | else{ 82 | std::tuple keep = make_tuple('K',i, i + peakLength); 83 | 84 | spikes.push_back(keep); 85 | } 86 | 87 | i += peakLength; 88 | } 89 | } 90 | 91 | forwardMerge(); 92 | 93 | 94 | backwardMerge(); 95 | 96 | //medianSmooth(); 97 | smoothAndCollect(); 98 | 99 | 100 | } 101 | 102 | 103 | void MatchTr::forwardMerge(){ 104 | // Thanks to Robert Hubley for finding and fixing a bug related to the following 3 lines. 105 | if(spikes.size() ==0){ 106 | return; 107 | } 108 | 109 | for (int i = 0; i < spikes.size()-1; i++) 110 | { 111 | char curr_type; 112 | int curr_start; 113 | int curr_end; 114 | std::tie(curr_type, curr_start, curr_end) = spikes.at(i); 115 | 116 | int level = findMedian(curr_start, curr_end); 117 | 118 | char next_type; 119 | int next_start; 120 | int next_end; 121 | std::tie(next_type, next_start, next_end) = spikes.at(i + 1); 122 | 123 | int neighborScore = findMedian(next_start,next_end); 124 | 125 | if (next_type == 'K') 126 | { 127 | if (abs(neighborScore - level) < diffThresh && (curr_end + gapTol) >= next_start) // spikes are at same level and within distance of gapTol 128 | { 129 | 130 | for (int j = curr_end; j <= next_start; j++) //replace curr_end with curr_start 131 | { 132 | 133 | (*scoreList)[j] = neighborScore; 134 | } 135 | if (curr_type == 'D') 136 | { 137 | spikes[i] = make_tuple('K', curr_start,next_start-1); //flip curr to a section to keep 138 | 139 | 140 | } 141 | } 142 | 143 | 144 | } 145 | 146 | else if (curr_type =='K' && next_type == 'D'){ 147 | 148 | if (abs(neighborScore - level) < diffThresh && (curr_end + gapTol) >= next_start) // spikes are at same level and within distance of k 149 | 150 | { 151 | for (int j = curr_end; j <= next_start; j++) //replaced curr_end with curr_start. TODO: Find out why next-start bounds are off at 1824000-1832000 152 | { 153 | 154 | (*scoreList)[j] = level; 155 | } 156 | 157 | spikes[i + 1] = make_tuple('K', curr_start, next_end); //flip next to a section to keep 158 | } 159 | 160 | } 161 | 162 | } 163 | 164 | } 165 | 166 | int MatchTr::findMedian(int start, int end){ 167 | vector section1(scoreList->begin() + start, scoreList->begin() + end); 168 | if(section1.size() ==0){ 169 | return 0; 170 | } 171 | else if(section1.size() ==1){ 172 | return section1.at(0); 173 | } 174 | else if (section1.size()%2 ==0){ 175 | std::nth_element(section1.begin(), section1.begin() + section1.size() / 2, section1.end()); 176 | 177 | return section1[section1.size() / 2]; 178 | } 179 | else{ 180 | std::nth_element(section1.begin(), section1.begin() + section1.size() / 2, section1.end()); 181 | 182 | return section1[(section1.size() / 2)+1]; 183 | } 184 | 185 | } 186 | 187 | void MatchTr::medianSmooth() 188 | { 189 | 190 | vector *temp = new vector(); 191 | 192 | int step = 20; 193 | int i = 0; 194 | 195 | while (i < scoreList->size() - step) 196 | { 197 | 198 | int median = findMedian(i, i + step); 199 | 200 | for (int j = i; j < i + step; j++) 201 | { 202 | temp->push_back(median); 203 | } 204 | 205 | i += step; 206 | } 207 | int score = findMedian(i, scoreList->size()); 208 | for (int j = i; j < scoreList->size(); j++) 209 | { 210 | temp->push_back(score); 211 | } 212 | 213 | scoreList->clear(); 214 | scoreList = temp; 215 | } 216 | 217 | void MatchTr::backwardMerge() 218 | { 219 | // Thanks to Robert Hubley for finding and fixing a bug related to the following 3 lines. 220 | if(spikes.size() ==0){ 221 | return; 222 | } 223 | 224 | for (int i = spikes.size()-1; i>=1; i--) 225 | { 226 | char curr_type; 227 | int curr_start; 228 | int curr_end; 229 | std::tie(curr_type, curr_start, curr_end) = spikes.at(i); 230 | int level = findMedian(curr_start, curr_end); 231 | 232 | char next_type; 233 | int next_start; 234 | int next_end; 235 | std::tie(next_type, next_start, next_end) = spikes.at(i - 1); 236 | 237 | int neighborScore = findMedian(next_start,next_end); 238 | 239 | 240 | if (curr_type == 'K' && next_type == 'D'){ 241 | 242 | if (abs(neighborScore - level) < diffThresh && (curr_start - gapTol) <= next_end) // spikes are at same level and within distance of k 243 | { 244 | for (int j = curr_start; j > next_start; j--) 245 | { 246 | (*scoreList)[j] = level; 247 | } 248 | } 249 | 250 | } 251 | } 252 | } 253 | 254 | void MatchTr::smoothAndCollect(){ 255 | //Repeat collection 256 | int len = scoreList->size(); 257 | for (int i = 0; i < len; i++) 258 | { 259 | 260 | int curr = scoreList->at(i); 261 | int next; 262 | 263 | if (curr != initValue) 264 | { 265 | 266 | int peakLength = 1; 267 | 268 | for (int j = i + 1; j < len; j++) 269 | { 270 | next = scoreList->at(j); 271 | 272 | if (next == initValue) 273 | { 274 | break; 275 | } 276 | peakLength++; 277 | } 278 | 279 | int minSizeKeep = identity * ltrMin / 100; 280 | 281 | if (peakLength >= minSizeKeep) //added this parameter 282 | { 283 | int height = findMedian(i,i+peakLength-1); 284 | 285 | Candidate *keep = new Candidate(i, i + peakLength - 1, height); 286 | plateaus.push_back(keep); 287 | 288 | } 289 | else 290 | { 291 | 292 | for (int k = i; k < i + peakLength; k++) 293 | { 294 | (*scoreList)[k] = initValue; 295 | } 296 | 297 | } 298 | i += peakLength - 1; 299 | } 300 | } 301 | 302 | PairContainer * matcher = new PairContainer(min,max,diffThresh); 303 | 304 | Candidate * curr; 305 | Candidate * match; 306 | BackwardTr * pair; 307 | for (int i = 0; i < plateaus.size(); i++) 308 | { 309 | curr = plateaus.at(i); 310 | match = matcher->hashOrReturn(curr); 311 | if(match!=nullptr){ 312 | pair = new BackwardTr(match->getStart(), match->getEnd(), curr->getStart(), curr->getEnd()); 313 | bList->push_back(pair); 314 | 315 | } 316 | } 317 | 318 | } 319 | 320 | bool MatchTr::isMatch(int firstStart,int secondStart){ 321 | 322 | int firstHeight = scoreList->at(firstStart); 323 | 324 | int matchLoc = firstStart + firstHeight; 325 | 326 | return matchLoc == secondStart; 327 | 328 | } 329 | 330 | vector * MatchTr::getRepeatCandidates(){ 331 | return bList; 332 | } 333 | 334 | MatchTr::~MatchTr(){ 335 | bList->clear(); 336 | delete bList; 337 | } 338 | 339 | vector* MatchTr::getScoreList(){ 340 | return scoreList; 341 | } 342 | 343 | } -------------------------------------------------------------------------------- /src/tr/MatchTr.h: -------------------------------------------------------------------------------- 1 | /* 2 | Created by Joseph Valencia 21 February 2018 3 | */ 4 | #ifndef MATCHTR_H_ 5 | #define MATCHTR_H_ 6 | 7 | #include 8 | #include 9 | #include "BackwardTr.h" 10 | #include "Candidate.h" 11 | //using namespace nonltr; 12 | using namespace std; 13 | namespace tr{ 14 | 15 | class MatchTr{ 16 | 17 | private: 18 | 19 | int initValue; 20 | int k; 21 | string bedFileName; 22 | int min; 23 | int max; 24 | int ltrMin; 25 | int minPlateauLen; 26 | int diffThresh; 27 | int gapTol; 28 | int identity; 29 | 30 | bool isMatch(int,int); 31 | void forwardMerge(); 32 | void backwardMerge(); 33 | void cleanAndMerge(); 34 | void medianSmooth(); 35 | 36 | vector * scoreList; 37 | vector> spikes; 38 | vector plateaus; 39 | vector * bList; 40 | 41 | void smoothAndCollect(); 42 | int findMedian(int,int); 43 | 44 | 45 | public: 46 | vector * getRepeatCandidates(); 47 | MatchTr(vector *,int,int,int,int,int,int,int,int); 48 | virtual ~MatchTr(); 49 | void bedFormat(int,int); 50 | void printFinalScores(int, int); 51 | void adjustBounds(); 52 | vector * getScoreList(); 53 | }; 54 | 55 | } 56 | #endif 57 | -------------------------------------------------------------------------------- /src/tr/PairContainer.cpp: -------------------------------------------------------------------------------- 1 | #include "PairContainer.h" 2 | #include 3 | #include "Candidate.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | namespace tr{ 12 | 13 | 14 | PairContainer::PairContainer(int minDistIn,int maxDistIn,int diffTolIn){ 15 | 16 | minDistance = minDistIn; 17 | maxDistance = maxDistIn; 18 | diffTolerance = diffTolIn; 19 | 20 | allocate(); 21 | 22 | } 23 | PairContainer::~PairContainer(){ 24 | 25 | delete array; 26 | } 27 | 28 | void PairContainer::allocate(){ 29 | 30 | containerSize = ((maxDistance-minDistance)/diffTolerance)+1; 31 | // cout<<"Size="<[containerSize]; 34 | 35 | // cout<<"allocate passed!"<getAbsHeight(); 50 | // cout<<"height"<printCandidate(); 56 | 57 | std::list midBin = array[searchLoc]; 58 | 59 | if (candidate->getHeight() > 0) 60 | { 61 | midBin.emplace_front(candidate); 62 | // cout<<"emplacing"<printCandidate(); 64 | array[searchLoc] = midBin; 65 | return nullptr; 66 | } 67 | 68 | // cout<<"Emplacing"<getHeight() + curr->getHeight()) <= diffTolerance) && ((curr->getStart() + curr->getHeight() + diffTolerance) >= candidate->getStart()) /*&& sameLength(candidate, curr)*/) 84 | { //cout<<1<printCandidate(); 90 | return curr; 91 | } 92 | 93 | 94 | 95 | else{ 96 | // cout<<"3"<printCandidate(); 98 | it = midBin.erase(it); 99 | //cout <<"4"<printCandidate(); 103 | } 104 | 105 | 106 | } 107 | array[searchLoc] = midBin; 108 | 109 | } 110 | 111 | if(searchLoc highBin = array[searchLoc+1]; 114 | 115 | if(!highBin.empty()){ 116 | 117 | //cout << "Inside highbin" << searchLoc+1 << endl; 118 | 119 | auto it = highBin.begin(); 120 | 121 | while (it != highBin.end()) 122 | //for (auto it = highBin.begin(); it != highBin.end(); it++) 123 | { 124 | Candidate *curr = *it; 125 | 126 | if ((abs(candidate->getHeight() + curr->getHeight()) <= diffTolerance) && ((curr->getStart() + curr->getHeight() + diffTolerance) >= candidate->getStart()) /*&& sameLength(candidate, curr)*/) 127 | { 128 | it = highBin.erase(it); 129 | array[searchLoc+1] = highBin; 130 | //cout << "returning" << endl; 131 | // curr->printCandidate(); 132 | 133 | 134 | return curr; 135 | } 136 | 137 | else { 138 | 139 | it =highBin.erase(it); 140 | // array[searchLoc+1] =highBin; 141 | // cout << "erasing" << endl; 142 | // curr->printCandidate(); 143 | } 144 | 145 | } 146 | array[searchLoc+1] = highBin; 147 | } 148 | 149 | } 150 | 151 | if(searchLoc >0){ 152 | 153 | std::list lowBin = array[searchLoc-1]; 154 | 155 | if (!lowBin.empty()) 156 | { //cout<<"Inside lowBin"<getHeight() + curr->getHeight()) <= diffTolerance) && ((curr->getStart() + curr->getHeight() + diffTolerance) >= candidate->getStart()) /*&& sameLength(candidate,curr)*/) 166 | { 167 | it = lowBin.erase(it); 168 | array[searchLoc - 1] = lowBin; 169 | // cout << "returning" << endl; 170 | // curr->printCandidate(); 171 | return curr; 172 | } 173 | 174 | else{ 175 | it =lowBin.erase(it); 176 | //array[searchLoc - 1] = lowBin; 177 | // cout << "erasing" << endl; 178 | // curr->printCandidate(); 179 | } 180 | 181 | } 182 | 183 | array[searchLoc - 1] = lowBin; 184 | } 185 | } 186 | return nullptr; 187 | } 188 | 189 | 190 | 191 | } 192 | 193 | bool PairContainer :: sameLength(Candidate * curr, Candidate * next){ 194 | 195 | int len1 = curr->getEnd()-curr->getStart(); 196 | int len2 = next->getEnd()-next->getStart(); 197 | 198 | int diff = abs(len2-len1); 199 | 200 | return diff<=20; 201 | 202 | 203 | } 204 | 205 | void PairContainer:: empty(){ 206 | 207 | for(int i = 0;i bin = array[i]; 210 | if(!bin.empty()){ 211 | for(auto it = bin.begin();it!=bin.end();it++){ 212 | Candidate * curr = *it; 213 | cout<<"Start: "<< curr->getStart()<< "End: "<getEnd()<<"Height: "<getHeight()< 5 | #include 6 | 7 | using namespace std; 8 | 9 | namespace tr{ 10 | 11 | class PairContainer{ 12 | 13 | private: 14 | std::list * array; 15 | int maxDistance; 16 | int minDistance; 17 | int diffTolerance; 18 | bool sameLength( Candidate *, Candidate *); 19 | int containerSize; 20 | void allocate(); 21 | int computeIndex(int); 22 | 23 | public: 24 | PairContainer(int,int,int); 25 | virtual ~PairContainer(); 26 | Candidate * hashOrReturn(Candidate *); 27 | void empty(); 28 | // void unHash(int); 29 | // Container removeMatch(); 30 | // tuple at(int); 31 | 32 | 33 | }; 34 | 35 | } 36 | #endif -------------------------------------------------------------------------------- /src/tr/ScorerTr.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ScorerTr.cpp 3 | * 4 | * Created on: Nov 30, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "ScorerTr.h" 9 | #include "../nonltr/KmerHashTable.h" 10 | #include "ForwardTr.h" 11 | #include "../nonltr/ChromosomeOneDigit.h" 12 | #include "../utility/Util.h" 13 | #include "../exception/InvalidInputException.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | using namespace std; 23 | 24 | using namespace nonltr; 25 | using namespace utility; 26 | using namespace exception; 27 | 28 | namespace tr { 29 | 30 | const int ScorerTr::INITIAL_VALUE = -10; 31 | const int ScorerTr::INITIAL_SCORE = 0; 32 | 33 | ScorerTr::ScorerTr(ChromosomeOneDigit *chromIn, int motifSizeIn, int minIn,int maxIn) 34 | { 35 | chrom = chromIn; 36 | k = motifSizeIn; 37 | min = minIn; 38 | max = maxIn; 39 | 40 | if (max < min) { 41 | string msg( 42 | "The maximum distance cannot be less than the minimum distance. "); 43 | msg.append("The minimum distance is: "); 44 | msg.append(Util::int2string(min)); 45 | msg.append(". The maximum distance is: "); 46 | msg.append(Util::int2string(max)); 47 | msg.append("."); 48 | throw InvalidInputException(msg); 49 | } 50 | 51 | kmerTable = new KmerHashTable(k, INITIAL_VALUE); 52 | scoreList = new vector(chrom->getBase()->size(), INITIAL_SCORE); 53 | 54 | scoreNew(); 55 | } 56 | 57 | ScorerTr::~ScorerTr() { 58 | delete kmerTable; 59 | scoreList->clear(); 60 | delete scoreList; 61 | 62 | } 63 | 64 | void ScorerTr::score() { 65 | 66 | const vector *> * segment = chrom->getSegment(); 67 | const char * segBases = chrom->getBase()->c_str(); 68 | 69 | for (int s = 0; s < segment->size(); s++) { 70 | 71 | int start = segment->at(s)->at(0); 72 | int end = segment->at(s)->at(1); 73 | 74 | vector * hashList = new vector(); 75 | kmerTable->hash(segBases, start, end - k + 1, hashList); 76 | 77 | // I commented out the +1. It does not make sense not to score the first word of a segment. 78 | for (int i = start ; i <= end - k + 1; i++) { 79 | int keyHash = hashList->at(i - start); 80 | int lastIndex = kmerTable->valueOf(keyHash); 81 | 82 | if (lastIndex != INITIAL_VALUE) { 83 | int d1 = abs(i - lastIndex); 84 | if (d1 >= min && d1 <= max) { 85 | (*scoreList)[i] = lastIndex; 86 | int secondLastIndex = scoreList->at(lastIndex); 87 | if (abs(lastIndex - secondLastIndex) > d1) { 88 | (*scoreList)[lastIndex] = i; 89 | } 90 | } 91 | } 92 | 93 | kmerTable->insert(keyHash, i); 94 | } 95 | hashList->clear(); 96 | delete hashList; 97 | 98 | // Handle last word 99 | for (int i = end - k + 2; i <= end; i++) { 100 | (*scoreList)[i] = scoreList->at(i - 1); 101 | } 102 | } 103 | 104 | // Test code 105 | ofstream output; 106 | string file1 = "scores.txt"; 107 | output.open(file1); 108 | cout<<"Before: " << endl; 109 | for(auto e : *scoreList){ 110 | output << e<< " "; 111 | } 112 | output<size();i++){ 117 | int score = scoreList->at(i); 118 | if(score != INITIAL_SCORE){ 119 | (*scoreList)[i] = score - i; 120 | } 121 | } 122 | 123 | // Test code 124 | cout << "After: " << endl; 125 | for (auto e : *scoreList){ 126 | output < *> * segment = chrom->getSegment(); 136 | const char * segBases = chrom->getBase()->c_str(); 137 | 138 | for (int s = 0; s < segment->size(); s++) { 139 | int start = segment->at(s)->at(0); 140 | int end = segment->at(s)->at(1); 141 | 142 | vector * hashList = new vector(); 143 | kmerTable->hash(segBases, start, end - k + 1, hashList); 144 | 145 | // I commented out the +1. It does not make sense not to score the first word of a segment. 146 | for (int i = start /*+ 1*/; i <= end - k + 1; i++) { 147 | int keyHash = hashList->at(i - start); 148 | int lastIndex = kmerTable->valueOf(keyHash); 149 | if (lastIndex != INITIAL_VALUE) { 150 | 151 | int d1 = abs(i - lastIndex); 152 | if (d1 >= min && d1 <= max) 153 | { 154 | (*scoreList)[i] = lastIndex -i; 155 | int scoreAtLastIndex = scoreList->at(lastIndex); 156 | if (scoreAtLastIndex == INITIAL_SCORE || d1 < abs(scoreAtLastIndex)) 157 | { 158 | (*scoreList)[lastIndex] = i - lastIndex; 159 | } 160 | } 161 | } 162 | 163 | kmerTable->insert(keyHash, i); 164 | } 165 | hashList->clear(); 166 | delete hashList; 167 | 168 | // Handle last word 169 | for (int i = end - k + 2; i <= end; i++) { 170 | (*scoreList)[i] = scoreList->at(i - 1); 171 | } 172 | } 173 | } 174 | 175 | int ScorerTr::findMedian(int start, int end) 176 | { 177 | vector section1(scoreList->begin() + start, scoreList->begin() + end); 178 | if (section1.size() == 0) 179 | { 180 | return 0; 181 | } 182 | else if (section1.size() == 1) 183 | { 184 | return section1.at(0); 185 | } 186 | else if (section1.size() % 2 == 0) 187 | { 188 | std::nth_element(section1.begin(), section1.begin() + section1.size() / 2, section1.end()); 189 | 190 | return section1[section1.size() / 2]; 191 | } 192 | else 193 | { 194 | std::nth_element(section1.begin(), section1.begin() + section1.size() / 2, section1.end()); 195 | 196 | return section1[(section1.size() / 2) + 1]; 197 | } 198 | } 199 | 200 | void ScorerTr::medianSmooth() 201 | { 202 | 203 | vector *temp = new vector(); 204 | 205 | int step = 20; 206 | int i = 0; 207 | 208 | while (i < scoreList->size() - step) 209 | { 210 | 211 | int median = findMedian(i, i + step); 212 | 213 | for (int j =i;j < i + step;j++) 214 | { 215 | temp->push_back(median); 216 | } 217 | 218 | i+=step; 219 | } 220 | int score = findMedian(i,scoreList->size()); 221 | for(int j=i;jsize();j++){ 222 | temp->push_back(score); 223 | } 224 | 225 | scoreList->clear(); 226 | scoreList = temp; 227 | } 228 | 229 | vector* ScorerTr::getScores() { 230 | return scoreList; 231 | } 232 | 233 | int ScorerTr::getInitialScore() { 234 | return INITIAL_SCORE; 235 | } 236 | 237 | } 238 | -------------------------------------------------------------------------------- /src/tr/ScorerTr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ScorerTr.h 3 | * 4 | * Created on: Nov 30, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef SCORERTR_H_ 9 | #define SCORERTR_H_ 10 | 11 | #include "ForwardTr.h" 12 | #include "../nonltr/KmerHashTable.h" 13 | #include "../nonltr/ChromosomeOneDigit.h" 14 | 15 | #include 16 | 17 | using namespace nonltr; 18 | 19 | namespace tr { 20 | class ScorerTr { 21 | private: 22 | KmerHashTable * kmerTable; 23 | 24 | static const int INITIAL_VALUE; 25 | static const int INITIAL_SCORE; 26 | 27 | ChromosomeOneDigit *chrom; 28 | int k; 29 | int min; 30 | int max; 31 | // int minPlateauLen; 32 | // int diffThresh; 33 | // int gapTol; 34 | void medianSmooth(); 35 | int findMedian(int,int); 36 | void score(); 37 | void scoreNew(); 38 | 39 | vector * scoreList; 40 | std::string csvFileName; 41 | 42 | 43 | public: 44 | ScorerTr(ChromosomeOneDigit *, int, int, int); 45 | virtual ~ScorerTr(); 46 | vector* getScores(); 47 | int getInitialScore(); 48 | //void outputScores(int, int); 49 | void scoresFormat(int,int); 50 | //void bedFormat(int,int); 51 | //vector> getSpikes(); 52 | }; 53 | } 54 | 55 | #endif /* SCORERTR_H_ */ 56 | -------------------------------------------------------------------------------- /src/tr/Tr.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Tr.cpp 3 | * 4 | * Created on: Dec 12, 2012 5 | * Author: girgishz 6 | */ 7 | 8 | #include "Tr.h" 9 | #include "ITrVisitor.h" 10 | #include "../exception/InvalidStateException.h" 11 | #include "../utility/Util.h" 12 | 13 | // #include 14 | // ToDo: delete 15 | #include 16 | // end delete 17 | 18 | using namespace std; 19 | using namespace utility; 20 | using namespace exception; 21 | 22 | namespace tr { 23 | 24 | /** 25 | * No-parameter constructor 26 | */ 27 | Tr::Tr() { 28 | s1 = 0; 29 | e1 = 0; 30 | s2 = 0; 31 | e2 = 0; 32 | } 33 | 34 | Tr::Tr(int s1In, int e1In, int s2In, int e2In) { 35 | initialize(s1In, e1In, s2In, e2In); 36 | } 37 | 38 | Tr::Tr(Tr& o) { 39 | initialize(o.getS1(), o.getE1(), o.getS2(), o.getE2()); 40 | 41 | id = o.getIdentity(); 42 | } 43 | 44 | void Tr::initialize(int s1In, int e1In, int s2In, int e2In) { 45 | s1 = s1In; 46 | e1 = e1In; 47 | s2 = s2In; 48 | e2 = e2In; 49 | id = 0; 50 | 51 | if (e1In < s1In) { 52 | string msg("The start of the first TR must be <= its end. "); 53 | msg.append("The start is: "); 54 | msg.append(Util::int2string(s1In)); 55 | msg.append(" The end is: "); 56 | msg.append(Util::int2string(e1In)); 57 | throw InvalidStateException(msg); 58 | } 59 | 60 | if (e2In < s2In) { 61 | string msg("The start of the second TR must be <= its end. "); 62 | msg.append("The start is: "); 63 | msg.append(Util::int2string(s2In)); 64 | msg.append(" The end is: "); 65 | msg.append(Util::int2string(e2In)); 66 | throw InvalidStateException(msg); 67 | } 68 | 69 | // checkState(); 70 | } 71 | 72 | Tr::~Tr() { 73 | } 74 | 75 | int Tr::getS1() { 76 | return s1; 77 | } 78 | 79 | int Tr::getS2() { 80 | return s2; 81 | } 82 | 83 | int Tr::getE1() { 84 | return e1; 85 | } 86 | 87 | int Tr::getE2() { 88 | return e2; 89 | } 90 | 91 | void Tr::setS1(int s1In) { 92 | s1 = s1In; 93 | } 94 | 95 | void Tr::setS2(int s2In) { 96 | s2 = s2In; 97 | } 98 | 99 | void Tr::setE1(int e1In) { 100 | e1 = e1In; 101 | } 102 | 103 | void Tr::setE2(int e2In) { 104 | e2 = e2In; 105 | } 106 | 107 | bool Tr::isOverlappingSame(Tr* same) { 108 | bool cond1 = Util::isOverlapping(s1, e1, same->getS1(), same->getE1()); 109 | bool cond2 = Util::isOverlapping(s2, e2, same->getS2(), same->getE2()); 110 | return cond1 && cond2; 111 | } 112 | 113 | bool Tr::isOverlappingOpposite(Tr* opposite) { 114 | bool cond1 = Util::isOverlapping(s1, e1, opposite->getS2(), 115 | opposite->getE2()); 116 | bool cond2 = Util::isOverlapping(s2, e2, opposite->getS1(), 117 | opposite->getE1()); 118 | 119 | return cond1 && cond2; 120 | } 121 | 122 | void Tr::accept(ITrVisitor * v) { 123 | v->visit(this); 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/tr/Tr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tr.h 3 | * 4 | * Created on: Dec 12, 2012 5 | * Author: Hani Zakaria Girgis. PhD 6 | */ 7 | 8 | #ifndef TR_H_ 9 | #define TR_H_ 10 | 11 | #include 12 | #include "../utility/ILocation.h" 13 | 14 | using namespace std; 15 | using namespace utility; 16 | 17 | namespace tr { 18 | 19 | class ITrVisitor; 20 | 21 | class Tr: public ILocation { 22 | 23 | protected: 24 | int s1; 25 | int e1; 26 | int s2; 27 | int e2; 28 | bool isOverlappingSame(Tr *); 29 | bool isOverlappingOpposite(Tr *); 30 | double id; 31 | 32 | 33 | public: 34 | Tr(); 35 | Tr(int, int, int, int); 36 | Tr(Tr&); 37 | virtual ~Tr(); 38 | int getS1(); 39 | int getE1(); 40 | int getS2(); 41 | int getE2(); 42 | 43 | 44 | void setS1(int); 45 | void setE1(int); 46 | void setS2(int); 47 | void setE2(int); 48 | 49 | void accept(ITrVisitor *); 50 | virtual void initialize(int, int, int, int); 51 | 52 | virtual void checkState() = 0; 53 | 54 | string toString() = 0; 55 | virtual int getEnd() const = 0; 56 | virtual int getStart() const = 0; 57 | virtual void setEnd(int) = 0; 58 | virtual void setStart(int) = 0; 59 | virtual int getLength() = 0; 60 | virtual int getIdentity(); 61 | virtual void setIdentity(int); 62 | }; 63 | 64 | } 65 | 66 | #endif /* TR_H_ */ 67 | -------------------------------------------------------------------------------- /src/tr/TrCollector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TrCollector.h 3 | * 4 | * Created on: Jan 2, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | // 8 | #ifndef TRCOLLECTOR_H_ 9 | #define TRCOLLECTOR_H_ 10 | 11 | #include "LtrTe.h" 12 | #include "../nonltr/ChromosomeOneDigit.h" 13 | 14 | using namespace nonltr; 15 | 16 | namespace tr { 17 | 18 | class TrCollector { 19 | private: 20 | ChromosomeOneDigit * chrom; 21 | int k; 22 | int min; // minimum separation distance between ltr 23 | int max; //absolute maximum ^ 24 | int ltrMin; 25 | int ltrMax; 26 | //int d; // "delta" incremental separation between ltr on iteration 27 | int minPlateauLen; 28 | int diffThresh; 29 | 30 | int gapTol; 31 | int identity; 32 | std::string csvFileName; 33 | std::string bedFileName; 34 | std::string name; 35 | // vector * teList; /*HZG commented out this file*/ 36 | bool bedFormat; 37 | bool printRaw; 38 | bool printClean; 39 | bool displayNested; 40 | 41 | // A list of regular (very likely not nested) TE 42 | vector * teList; 43 | // A list of nested TE 44 | vector * nestedTeList; 45 | 46 | void collect(); 47 | void findNested(); 48 | int findNestedHelper1(int, int, int); 49 | void findNestedHelper2(int, int, int, int); 50 | void scoresFormat(vector*,string,string); 51 | 52 | public: 53 | TrCollector(ChromosomeOneDigit * /*HZG changed this parameter*/,std::string,std::string, int, int,int,int,int,int,int,int,bool,bool,bool,bool); 54 | virtual ~TrCollector(); 55 | // vector * getTeList(); 56 | void printIndex(string, vector * ); 57 | void printMasked(string, vector *); 58 | void outputAnnotation(vector *,string); 59 | 60 | }; 61 | 62 | } /* namespace tr */ 63 | #endif /* TRCOLLECTOR_H_ */ 64 | -------------------------------------------------------------------------------- /src/tr/TrCsVisitor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TrCsVisitor.cpp 3 | * 4 | * Created on: Dec 14, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | //Finds Longest common subsequence 8 | #include "TrCsVisitor.h" 9 | #include "Tr.h" 10 | #include "../utility/LCS.h" 11 | #include "../utility/GlobAlignE.h" 12 | 13 | // ToDo: delete iostream and print statements 14 | #include 15 | 16 | using namespace utility; 17 | 18 | namespace tr { 19 | 20 | TrCsVisitor::TrCsVisitor(const char * seqIn, int minLenIn, int minIdIn) { 21 | seq = seqIn; 22 | isGood = false; 23 | minLen = minLenIn; 24 | minId = minIdIn; 25 | } 26 | 27 | TrCsVisitor::~TrCsVisitor() { 28 | // TODO Auto-generated destructor stub 29 | } 30 | 31 | void TrCsVisitor::visit(Tr* tr) { 32 | int s1 = tr->getS1(); 33 | int e1 = tr->getE1(); 34 | int s2 = tr->getS2(); 35 | int e2 = tr->getE2(); 36 | 37 | //LCS * lcs = new LCS(seq, s1, e1, seq, s2, e2); 38 | //lcs->printLcs(); 39 | //int lcsScore = lcs->getLenCS(); 40 | 41 | double l1 = e1 - s1 + 1; 42 | //double id1 = 100.00 * (double) lcsScore / l1; 43 | 44 | double l2 = e2 - s2 + 1; 45 | //double id2 = 100.00 * (double) lcsScore / l2; 46 | //cout<<"Entering alignment"<getIdentity(); 50 | cout<<"ID="<getIdentity()<printLcs(); 76 | cout << "= = =" << endl; 77 | 78 | // Testing end 79 | 80 | } 81 | */ 82 | //delete lcs; 83 | delete align; 84 | } 85 | 86 | bool TrCsVisitor::getIsGood() { 87 | return isGood; 88 | } 89 | 90 | } /* namespace tr */ 91 | -------------------------------------------------------------------------------- /src/tr/TrCsVisitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TrCsVisitor.h 3 | * 4 | * Created on: Dec 14, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TRCSVISITOR_H_ 9 | #define TRCSVISITOR_H_ 10 | 11 | #include "ITrVisitor.h" 12 | 13 | namespace tr { 14 | 15 | class TrCsVisitor: public tr::ITrVisitor { 16 | private: 17 | const char * seq; 18 | bool isGood; 19 | int minLen; 20 | int minId; 21 | 22 | public: 23 | TrCsVisitor(const char *, int, int); 24 | virtual ~TrCsVisitor(); 25 | virtual void visit(Tr *); 26 | bool getIsGood(); 27 | }; 28 | 29 | } /* namespace tr */ 30 | #endif /* TRCSVISITOR_H_ */ 31 | -------------------------------------------------------------------------------- /src/tr/TrKVisitor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TrKVisitor.cpp 3 | * 4 | * Created on: Dec 14, 2012 5 | * Author: zakarota 6 | */ 7 | 8 | #include "TrKVisitor.h" 9 | 10 | namespace tr { 11 | 12 | TrKVisitor::TrKVisitor(int kIn, int endIn) { 13 | k = kIn; 14 | end = endIn; 15 | } 16 | 17 | TrKVisitor::~TrKVisitor() { 18 | // TODO Auto-generated destructor stub 19 | } 20 | 21 | void TrKVisitor::visit(Tr* tr) { 22 | int e1 = tr->getE1() + k - 1; 23 | if (e1 > end) { 24 | e1 = end; 25 | } 26 | tr->setE1(e1); 27 | 28 | int e2 = tr->getE2() + k - 1; 29 | if (e2 > end) { 30 | e2 = end; 31 | } 32 | tr->setE2(e2); 33 | } 34 | 35 | } /* namespace tr */ 36 | -------------------------------------------------------------------------------- /src/tr/TrKVisitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TrKVisitor.h 3 | * 4 | * Created on: Dec 14, 2012 5 | * Author: zakarota 6 | */ 7 | 8 | #ifndef TRKVISITOR_H_ 9 | #define TRKVISITOR_H_ 10 | 11 | #include "Tr.h" 12 | #include "ITrVisitor.h" 13 | 14 | namespace tr { 15 | 16 | class TrKVisitor: public tr::ITrVisitor { 17 | private: 18 | int k; 19 | int end; 20 | 21 | public: 22 | TrKVisitor(int, int); 23 | virtual ~TrKVisitor(); 24 | virtual void visit(Tr *); 25 | }; 26 | 27 | } /* namespace tr */ 28 | #endif /* TRKVISITOR_H_ */ 29 | -------------------------------------------------------------------------------- /src/tr/TrPptVisitor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TrPptVisitor.cpp 3 | * 4 | * Created on: Dec 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "TrPptVisitor.h" 9 | #include "../exception/InvalidStateException.h" 10 | 11 | namespace tr { 12 | 13 | TrPptVisitor::TrPptVisitor(const string * seqIn) { 14 | seq = seqIn; 15 | win = 500; 16 | } 17 | 18 | TrPptVisitor::~TrPptVisitor() { 19 | 20 | } 21 | 22 | void TrPptVisitor::visit(Tr * trIn) { 23 | tr = trIn; 24 | segStart = tr->getStart(); 25 | } 26 | 27 | void TrPptVisitor:: searchPstv(){ 28 | int (* cTable) = new int[win][2]; 29 | 30 | 31 | } 32 | 33 | } /* namespace tr */ 34 | -------------------------------------------------------------------------------- /src/tr/TrPptVisitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TrPptVisitor.h 3 | * 4 | * Created on: Dec 26, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TRPPTVISITOR_H_ 9 | #define TRPPTVISITOR_H_ 10 | 11 | #include "ITrVisitor.h" 12 | #include "../utility/ILocation.h" 13 | 14 | using namespace utility; 15 | 16 | namespace tr { 17 | 18 | class TrPptVisitor: public ITrVisitor { 19 | private: 20 | const string * seq; 21 | Tr * tr; 22 | int segStart; 23 | int win; 24 | 25 | void searchPstv(); 26 | 27 | public: 28 | TrPptVisitor(const string *); 29 | virtual ~TrPptVisitor(); 30 | virtual void visit(Tr *); 31 | }; 32 | 33 | } /* namespace tr */ 34 | #endif /* TRPPTVISITOR_H_ */ 35 | -------------------------------------------------------------------------------- /src/tr/TrSineVisitor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TrSineVisitor.cpp 3 | * 4 | * Created on: Feb 7, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | /* 8 | The purpose of this visitor is to distinguish between true LTR's and two consecutive Sines. 9 | The existence of TSD's along either side of the two peaks would indicate that they are Sines 10 | rather than LTRs 11 | */ 12 | #include 13 | 14 | #include "TrSineVisitor.h" 15 | #include "../utility/Location.h" 16 | #include "../utility/TailFinder.h" 17 | #include "../utility/TSD.h" 18 | #include 19 | #include 20 | 21 | using namespace utility; 22 | using namespace std; 23 | 24 | namespace tr { 25 | 26 | TrSineVisitor::TrSineVisitor(const string * seqIn, int tailTIn, int tsdWIn, 27 | int tsdTIn) { 28 | seq = seqIn; 29 | tailT = tailTIn; 30 | tsdW = tsdWIn; 31 | tsdT = tsdTIn; 32 | 33 | foundTwoSines = false; 34 | } 35 | 36 | TrSineVisitor::~TrSineVisitor() { 37 | 38 | } 39 | 40 | /** 41 | * ToDo: check if the length of the LTR is similar to that of SINE < 500bp 42 | */ 43 | void TrSineVisitor::visit(Tr* ltr) { 44 | 45 | int s1 = ltr->getS1(); 46 | int e1 = ltr->getE1(); 47 | int s2 = ltr->getS2(); 48 | int e2 = ltr->getE2(); 49 | 50 | // Make locations 51 | Location * loc1 = new Location(s1, e1); 52 | Location * loc2 = new Location(s2, e2); 53 | 54 | // Find PloyA tail in the first LTR 55 | //int win1 = (e1 - s1 + 1) / 2; // checking the first half 56 | 57 | int win1 = calculateTailWindow(0.02,loc1->getLength(),50); 58 | 59 | cout<<"PolyA window 1: "<getLength(),50); 70 | cout<<"PolyA window 2: "<isTailFound() && f2->isTailFound()) { 77 | 78 | vector * first = f1->getTail(); 79 | vector * second = f2->getTail(); 80 | 81 | cout<<"Tail1 ="<< first->at(0)<<":"<at(1)<at(1)<getTail()->at(3) == f2->getTail()->at(3)) { 85 | // Find the first TSD 86 | TSD * t1 = new TSD(seq, loc1, tsdW, (int) 'N'); 87 | TSD * t2 = new TSD(seq, loc2, tsdW, (int) 'N'); 88 | foundTwoSines = (t1->getTsdSize() > tsdT) 89 | && (t2->getTsdSize() > tsdT); 90 | delete t1; 91 | delete t2; 92 | }*/ 93 | foundTwoSines = true; 94 | } 95 | 96 | delete f1; 97 | delete f2; 98 | 99 | delete loc1; 100 | delete loc2; 101 | } 102 | //returns true if this sequence contains two sines rather than an ltr 103 | bool TrSineVisitor::isTwoSines() { 104 | return foundTwoSines; 105 | } 106 | 107 | 108 | // calculate search window based on minimum and size of interior 109 | int TrSineVisitor::calculateTailWindow(double ratio, int lengthElement, int minimum){ 110 | // cout << "length interior: " << lengthElement << endl; 111 | int limit = lengthElement > minimum ? minimum : lengthElement; 112 | int scaled = ceil(ratio*lengthElement); 113 | return scaled > limit ? scaled : limit; 114 | } 115 | 116 | 117 | /* 118 | bool TrSineVisitor::isLtrSine(int s, int e) { 119 | // Make a location object 120 | Location * l = new Location(s, e); 121 | 122 | // Search for Poly-A tail 123 | int tailW = (e - s + 1) / 2; 124 | TailFinder * f = new TailFinder(seq, l, TailFinder::MARK_A, tailW, tailT); 125 | bool isTailFound = f->isTailFound(); 126 | 127 | // Search for TSD 128 | TSD * t = new TSD(seq, l, tsdW, (int) 'N'); 129 | bool isTsdFound = t->getTsdSize() > tsdT; 130 | 131 | // Free memory 132 | delete t; 133 | delete f; 134 | delete l; 135 | if (f1->isTailFound() && f2->isTailFound()) { 136 | 137 | // Combine results 138 | bool result = isTailFound && isTsdFound; 139 | return result; 140 | } 141 | */ 142 | 143 | } /* namespace tr */ 144 | -------------------------------------------------------------------------------- /src/tr/TrSineVisitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TrSineVisitor.h 3 | * 4 | * Created on: Feb 7, 2013 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TRSINEVISITOR_H_ 9 | #define TRSINEVISITOR_H_ 10 | 11 | #include "ITrVisitor.h" 12 | #include "Tr.h" 13 | 14 | namespace tr { 15 | 16 | class TrSineVisitor: public tr::ITrVisitor { 17 | 18 | private: 19 | const string * seq; 20 | int tsdW; 21 | int tsdT; 22 | int tailT; 23 | bool foundTwoSines; 24 | 25 | 26 | 27 | public: 28 | TrSineVisitor(const string *, int, int, int); 29 | virtual ~TrSineVisitor(); 30 | virtual void visit(Tr *); 31 | bool isTwoSines(); 32 | int calculateTailWindow(double,int,int); 33 | }; 34 | 35 | } /* namespace tr */ 36 | #endif /* TRSINEVISITOR_H_ */ 37 | -------------------------------------------------------------------------------- /src/utility/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioinformaticsToolsmith/LtrDetector/c22633037e5c6740098227df7a1b2ead1ef21ceb/src/utility/.DS_Store -------------------------------------------------------------------------------- /src/utility/AlignVisitor.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef ALIGNVISITOR_H_ 3 | #define ALIGNVISITOR_H_ 4 | 5 | #include "ITrVisitor.h" 6 | 7 | namespace tr { 8 | 9 | class TrCsVisitor: public tr::ITrVisitor { 10 | private: 11 | const char * seq; 12 | bool isGood; 13 | int minLen; 14 | int minId; 15 | 16 | public: 17 | TrCsVisitor(const char *, int, int); 18 | virtual ~TrCsVisitor(); 19 | virtual void visit(Tr *); 20 | bool getIsGood(); 21 | }; 22 | 23 | } /* namespace tr */ 24 | #endif /* TRCSVISITOR_H_ */ 25 | -------------------------------------------------------------------------------- /src/utility/EmptyLocation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * EmptyLocation.cpp 3 | * 4 | * Created on: Dec 28, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "EmptyLocation.h" 9 | #include "../exception/InvalidOperationException.h" 10 | 11 | using namespace exception; 12 | 13 | namespace utility { 14 | 15 | EmptyLocation * EmptyLocation::INSTANCE = new EmptyLocation(); 16 | 17 | EmptyLocation * EmptyLocation::getInstance(){ 18 | return INSTANCE; 19 | } 20 | 21 | EmptyLocation::EmptyLocation() { 22 | msg = new string("Empty location does not allow this operation."); 23 | } 24 | 25 | EmptyLocation::~EmptyLocation() { 26 | delete msg; 27 | } 28 | 29 | string EmptyLocation::toString() { 30 | return string("Empty"); 31 | } 32 | 33 | int EmptyLocation::getEnd() const { 34 | throw InvalidOperationException(*msg); 35 | } 36 | 37 | int EmptyLocation::getStart() const { 38 | throw InvalidOperationException(*msg); 39 | } 40 | 41 | void EmptyLocation::setEnd(int int1) { 42 | throw InvalidOperationException(*msg); 43 | } 44 | 45 | void EmptyLocation::setStart(int int1) { 46 | throw InvalidOperationException(*msg); 47 | } 48 | 49 | int EmptyLocation::getLength() { 50 | throw InvalidOperationException(*msg); 51 | } 52 | 53 | } /* namespace tr */ 54 | -------------------------------------------------------------------------------- /src/utility/EmptyLocation.h: -------------------------------------------------------------------------------- 1 | /* 2 | * EmptyLocation.h 3 | * 4 | * Created on: Dec 28, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef EMPTYLOCATION_H_ 9 | #define EMPTYLOCATION_H_ 10 | 11 | #include "ILocation.h" 12 | 13 | namespace utility { 14 | 15 | class EmptyLocation: public ILocation { 16 | private: 17 | string * msg; 18 | static EmptyLocation * INSTANCE; 19 | EmptyLocation(); 20 | virtual ~EmptyLocation(); 21 | 22 | public: 23 | virtual int getEnd() const; 24 | virtual int getStart() const; 25 | virtual void setEnd(int); 26 | virtual void setStart(int); 27 | virtual int getLength(); 28 | virtual string toString(); 29 | 30 | static EmptyLocation * getInstance(); 31 | 32 | }; 33 | 34 | } /* namespace tr */ 35 | #endif /* EMPTYLOCATION_H_ */ 36 | -------------------------------------------------------------------------------- /src/utility/EmptyTSD.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * EmptyTSD.cpp 3 | * 4 | * Created on: Dec 28, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "Util.h" 9 | #include "EmptyTSD.h" 10 | #include "../exception/InvalidOperationException.h" 11 | 12 | using namespace exception; 13 | 14 | namespace utility { 15 | 16 | EmptyTSD * EmptyTSD::INSTANCE = new EmptyTSD(); 17 | 18 | EmptyTSD::EmptyTSD() { 19 | msg = new string("Empty TSD does not allow this operation."); 20 | } 21 | 22 | EmptyTSD::~EmptyTSD() { 23 | delete msg; 24 | } 25 | 26 | string EmptyTSD::toString() { 27 | return string("Empty"); 28 | } 29 | 30 | EmptyTSD * EmptyTSD::getInstance() { 31 | return INSTANCE; 32 | } 33 | 34 | ILocation* EmptyTSD::getLtTsd() { 35 | throw InvalidOperationException(*msg); 36 | } 37 | 38 | ILocation* EmptyTSD::getRtTsd() { 39 | throw InvalidOperationException(*msg); 40 | } 41 | 42 | int EmptyTSD::getTsdSize() { 43 | throw InvalidOperationException(*msg); 44 | } 45 | 46 | } /* namespace utility */ 47 | -------------------------------------------------------------------------------- /src/utility/EmptyTSD.h: -------------------------------------------------------------------------------- 1 | /* 2 | * EmptyTSD.h 3 | * 4 | * Created on: Dec 28, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef EMPTYTSD_H_ 9 | #define EMPTYTSD_H_ 10 | 11 | #include "ITSD.h" 12 | #include "ILocation.h" 13 | 14 | namespace utility { 15 | 16 | class EmptyTSD : public ITSD{ 17 | private: 18 | string * msg; 19 | static EmptyTSD * INSTANCE; 20 | EmptyTSD(); 21 | 22 | public: 23 | virtual ~EmptyTSD(); 24 | virtual ILocation * getLtTsd(); 25 | virtual ILocation * getRtTsd(); 26 | virtual int getTsdSize(); 27 | virtual string toString(); 28 | static EmptyTSD * getInstance(); 29 | }; 30 | 31 | } /* namespace utility */ 32 | #endif /* EMPTYTSD_H_ */ 33 | -------------------------------------------------------------------------------- /src/utility/EmptyTail.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * EmptyTail.cpp 3 | * 4 | * Created on: Dec 28, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "EmptyTail.h" 9 | #include "../exception/InvalidOperationException.h" 10 | 11 | using namespace exception; 12 | 13 | namespace utility { 14 | 15 | EmptyTail * EmptyTail::INSTANCE = new EmptyTail(); 16 | 17 | EmptyTail::EmptyTail() { 18 | msg = new string("Empty Tail does not allow this operation."); 19 | } 20 | 21 | EmptyTail::~EmptyTail() { 22 | delete msg; 23 | } 24 | 25 | EmptyTail* EmptyTail::getInstance() { 26 | return INSTANCE; 27 | } 28 | 29 | string EmptyTail::toString() { 30 | return string("Empty"); 31 | } 32 | 33 | int EmptyTail::getEnd() const { 34 | throw InvalidOperationException(*msg); 35 | } 36 | 37 | int EmptyTail::getStart() const { 38 | throw InvalidOperationException(*msg); 39 | } 40 | 41 | void EmptyTail::setEnd(int int1) { 42 | throw InvalidOperationException(*msg); 43 | } 44 | 45 | void EmptyTail::setStart(int int1) { 46 | throw InvalidOperationException(*msg); 47 | } 48 | 49 | int EmptyTail::getLength() { 50 | throw InvalidOperationException(*msg); 51 | } 52 | 53 | double EmptyTail::getPercentage() const { 54 | throw InvalidOperationException(*msg); 55 | } 56 | 57 | void EmptyTail::setPercentage(double double1) { 58 | throw InvalidOperationException(*msg); 59 | } 60 | 61 | string EmptyTail::getStrand() const { 62 | throw InvalidOperationException(*msg); 63 | } 64 | 65 | void EmptyTail::setStrand(string allocator) { 66 | throw InvalidOperationException(*msg); 67 | } 68 | 69 | } /* namespace tr */ 70 | -------------------------------------------------------------------------------- /src/utility/EmptyTail.h: -------------------------------------------------------------------------------- 1 | /* 2 | * EmptyTail.h 3 | * 4 | * Created on: Dec 28, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef EMPTYTAIL_H_ 9 | #define EMPTYTAIL_H_ 10 | 11 | #include 12 | #include "ITail.h" 13 | 14 | using namespace std; 15 | 16 | namespace utility { 17 | 18 | class EmptyTail: public ITail { 19 | private: 20 | string * msg; 21 | static EmptyTail * INSTANCE; 22 | EmptyTail(); 23 | 24 | public: 25 | virtual ~EmptyTail(); 26 | static EmptyTail * getInstance(); 27 | 28 | // Inherited from ILocation 29 | virtual int getEnd() const; 30 | virtual int getStart() const; 31 | virtual void setEnd(int); 32 | virtual void setStart(int); 33 | virtual int getLength(); 34 | virtual string toString(); 35 | 36 | // Methods specific to tail objects. 37 | virtual double getPercentage() const; 38 | virtual void setPercentage(double); 39 | virtual string getStrand() const; 40 | virtual void setStrand(string); 41 | }; 42 | 43 | } /* namespace tr */ 44 | #endif /* EMPTYTAIL_H_ */ 45 | -------------------------------------------------------------------------------- /src/utility/GlobAlign.h: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Author: Joseph Valencia 4 | * Date: 12/14/17 5 | * Bioinformatics Toolsmith Laboratory, University of Tulsa 6 | * */ 7 | #ifndef Glob_Align_H_ 8 | #include 9 | 10 | using namespace std; 11 | 12 | class GlobAlign{ 13 | 14 | private: 15 | const char * seq1; //first sequence to be aligned 16 | int start1; 17 | int end1; 18 | const char * seq2;//second sequence to be aligned 19 | int start2; 20 | int end2; 21 | int len1; 22 | int len2; 23 | int lenTotal; 24 | int match; //score for base pair match 25 | int mismatch;//score for base pair mismatch 26 | int gapOpen; //cost to open a gap 27 | int gapContinue; //cost to continue a gap 28 | int alignmentScore; 29 | string topString; 30 | string bottomString; 31 | public: 32 | GlobAlign(const char*,int,int,const char *,int,int, int,int,int,int); 33 | // virtual GlobAlign(); 34 | double getIdentity() const; 35 | void findAlignment(); 36 | void printAlignment(); //display LocAlign 37 | int getScore(); 38 | int getLength(); 39 | 40 | }; 41 | #endif 42 | -------------------------------------------------------------------------------- /src/utility/GlobAlignE.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Author: Joseph Valencia 3 | * Date: 12/14/17 4 | * Bioinformatics Toolsmith Laboratory, University of Tulsa 5 | * */ 6 | #include 7 | #include "../exception/InvalidStateException.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "GlobAlignE.h" 16 | 17 | using namespace std; 18 | using namespace utility; 19 | using namespace exception; 20 | 21 | GlobAlignE::GlobAlignE(const char * seq1In, int start1In, int end1In, const char * seq2In, 22 | int start2In, int end2In, int matchIn, int mismatchIn, int gapOpenIn, int gapContinueIn){ 23 | 24 | seq1 = seq1In; 25 | start1 = start1In; 26 | end1 = end1In; 27 | 28 | seq2 = seq2In; 29 | start2 = start2In; 30 | end2 = end2In; 31 | 32 | len1 = end1 - start1 + 2; 33 | len2 = end2 - start2 + 2; 34 | 35 | //Incremental score storage 36 | matches = new int[len1]; 37 | upperGap = new int[len1]; 38 | lowerGap = new int[len1]; 39 | 40 | 41 | 42 | //Incremental length storage 43 | matchLen = new int[len1]; 44 | upperLen = new int[len1]; 45 | lowerLen = new int[len1]; 46 | 47 | //Incremental identity storage 48 | matchId = new int[len1]; 49 | upperId = new int[len1]; 50 | lowerId = new int[len1]; 51 | 52 | match = matchIn; 53 | mismatch = mismatchIn; 54 | gapOpen = gapOpenIn; 55 | gapContinue = gapContinueIn; 56 | findAlignment(); 57 | 58 | } 59 | 60 | void GlobAlignE::findAlignment(){ 61 | 62 | int shorter = min(len2,len1)-1; 63 | int lenDiff = abs(len2-len1); 64 | int maxDiff=0; 65 | 66 | if (lenDiff >=1){ 67 | maxDiff += -gapOpen- (lenDiff*gapContinue); 68 | } 69 | 70 | maxDiff+= (mismatch* shorter)-1; 71 | 72 | const int negativeInf = maxDiff; 73 | 74 | matches[0]= 0; 75 | upperGap[0] = negativeInf; 76 | lowerGap[0] = negativeInf; 77 | 78 | matchLen[0] =0; 79 | upperLen[0] =0; 80 | lowerLen[0] =0; 81 | 82 | matchId[0] =0; 83 | upperId[0] = 0; 84 | lowerId[0] =0; 85 | 86 | //initial values 87 | for (int i = 1; i'){ 265 | 266 | while(c!='\n'){ 267 | c = ifs.get(); 268 | 269 | } 270 | } 271 | 272 | string string1 =""; 273 | 274 | while (ifs.good()) { 275 | 276 | 277 | if (c!='\n'){ 278 | string1+=c; 279 | } 280 | c = ifs.get(); 281 | } 282 | 283 | ifs.close(); 284 | 285 | 286 | ifstream ifs2; 287 | 288 | ifs2.open (argv[2], ifstream::in); 289 | 290 | c = ifs2.get(); 291 | 292 | if(c == '>'){ 293 | 294 | while(c!='\n'){ 295 | c = ifs2.get(); 296 | } 297 | } 298 | 299 | string string2 =""; 300 | 301 | while (ifs2.good()) { 302 | 303 | if(c!='\n'){ 304 | string2+=c; 305 | } 306 | c = ifs2.get(); 307 | } 308 | 309 | ifs2.close(); 310 | 311 | std::transform(string1.begin(),string1.end(),string1.begin(),::toupper); 312 | std::transform(string2.begin(),string2.end(),string2.begin(),::toupper); 313 | 314 | GlobAlignE * align = new GlobAlignE(string1.c_str(),0,string1.size()-1,string2.c_str(),0,string2.size()-1,1,-1,4,1); 315 | cout <<"SCORE:"<getScore()<getIdentity()<getLength()< 9 | 10 | using namespace std; 11 | 12 | namespace utility{ 13 | 14 | class GlobAlignE{ 15 | 16 | private: 17 | const char * seq1; //first sequence to be aligned 18 | int start1; 19 | int end1; 20 | const char * seq2;//second sequence to be aligned 21 | int start2; 22 | int end2; 23 | int len1; 24 | int len2; 25 | int lenTotal; 26 | int match; //score for base pair match 27 | int mismatch;//score for base pair mismatch 28 | int gapOpen; //cost to open a gap 29 | int gapContinue; //cost to continue a gap 30 | int * matches; 31 | int * upperGap; 32 | int * lowerGap; 33 | int * matchLen; 34 | int * upperLen; 35 | int * lowerLen; 36 | int * matchId; 37 | int * upperId; 38 | int * lowerId; 39 | int alignmentScore; 40 | int alignmentLength; 41 | int totalMatches; 42 | string topString; 43 | string bottomString; 44 | public: 45 | GlobAlignE(const char*,int,int,const char *,int,int, int,int,int,int); 46 | //GlobAlignE(string,string,int,int,int,int); 47 | virtual ~GlobAlignE(); 48 | void findAlignment(); 49 | double getIdentity(); 50 | int getLength(); 51 | void printAlignment(); //display LocAlign 52 | int getScore(); 53 | int getLengthAlignment(); 54 | 55 | }; 56 | } 57 | #endif -------------------------------------------------------------------------------- /src/utility/ILocation.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ILocation.h 3 | * 4 | * Created on: Dec 20, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ILOCATION_H_ 9 | #define ILOCATION_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace utility { 16 | 17 | class ILocation { 18 | public: 19 | //ILocation(); 20 | virtual int getEnd() const = 0; 21 | virtual int getStart() const = 0; 22 | virtual void setEnd(int) = 0; 23 | virtual void setStart(int) = 0; 24 | virtual int getLength() = 0; 25 | virtual string toString() = 0; 26 | //virtual ~ILocation(); 27 | }; 28 | 29 | } 30 | 31 | #endif /* ILOCATION_H_ */ 32 | -------------------------------------------------------------------------------- /src/utility/ITSD.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ITSD.h 3 | * 4 | * Created on: Dec 31, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ITSD_H_ 9 | #define ITSD_H_ 10 | 11 | #include 12 | #include "ILocation.h" 13 | 14 | using namespace std; 15 | 16 | namespace utility { 17 | 18 | class ITSD { 19 | public: 20 | virtual ILocation * getLtTsd() = 0; 21 | virtual ILocation * getRtTsd() = 0; 22 | virtual int getTsdSize() = 0; 23 | virtual string toString() = 0; 24 | }; 25 | 26 | } /* namespace utility */ 27 | #endif /* ITSD_H_ */ 28 | -------------------------------------------------------------------------------- /src/utility/ITail.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ITail.cpp 3 | * 4 | * Created on: Dec 31, 2012 5 | * Author: zakarota 6 | */ 7 | 8 | #include "ITail.h" 9 | 10 | 11 | namespace utility { 12 | 13 | ITail::ITail() { 14 | // TODO Auto-generated constructor stub 15 | 16 | } 17 | 18 | ITail::~ITail() { 19 | // TODO Auto-generated destructor stub 20 | } 21 | 22 | } /* namespace utility */ 23 | -------------------------------------------------------------------------------- /src/utility/ITail.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ITail.h 3 | * 4 | * Created on: Dec 31, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef ITAIL_H_ 9 | #define ITAIL_H_ 10 | 11 | #include 12 | #include "ILocation.h" 13 | 14 | using namespace std; 15 | 16 | namespace utility { 17 | 18 | class ITail : public utility :: ILocation { 19 | public: 20 | 21 | //ITail(); 22 | //virtual ~ITail(); 23 | // Inherited from ILocation 24 | virtual int getEnd() const = 0; 25 | virtual int getStart() const= 0; 26 | virtual void setEnd(int)= 0; 27 | virtual void setStart(int)= 0; 28 | virtual int getLength()= 0; 29 | virtual string toString()= 0; 30 | 31 | // Methods specific to tail objects. 32 | virtual double getPercentage() const= 0; 33 | virtual void setPercentage(double)= 0; 34 | virtual string getStrand() const= 0; 35 | virtual void setStrand(string)= 0; 36 | 37 | }; 38 | 39 | } /* namespace utility */ 40 | #endif /* ITAIL_H_ */ 41 | -------------------------------------------------------------------------------- /src/utility/LCS.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * LCS.cpp 3 | * 4 | * Created on: Dec 4, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "LCS.h" 9 | 10 | #include 11 | #include "../exception/InvalidStateException.h" 12 | 13 | #include 14 | #include 15 | //test 16 | using namespace std; 17 | using namespace utility; 18 | using namespace exception; 19 | 20 | const string LCS::SAME = "same"; 21 | const string LCS::OPPOSITE = "opposite"; 22 | 23 | LCS::LCS(const char * seq1In, int start1In, int end1In, const char * seq2In, 24 | int start2In, int end2In) { 25 | seq1 = seq1In; 26 | start1 = start1In; 27 | end1 = end1In; 28 | 29 | seq2 = seq2In; 30 | start2 = start2In; 31 | end2 = end2In; 32 | 33 | len1 = end1 - start1 + 2; 34 | len2 = end2 - start2 + 2; 35 | lenTotal = len1 * len2; 36 | 37 | cTable = new int[lenTotal]; 38 | for (int i = 0; i < lenTotal; i++) { 39 | cTable[i] = 0; 40 | } 41 | 42 | bTable = new int[lenTotal]; 43 | for (int i = 0; i < lenTotal; i++) { 44 | bTable[i] = 0; 45 | } 46 | 47 | findLcs(); 48 | } 49 | 50 | LCS::~LCS() { 51 | delete[] cTable; 52 | delete[] bTable; 53 | } 54 | 55 | void LCS::findLcs() { 56 | for (int i = 1; i < len1; i++) { 57 | char base1 = seq1[start1 + i - 1]; 58 | int iM1Index = (i - 1) * len2; 59 | int iIndex = i * len2; 60 | 61 | for (int j = 1; j < len2; j++) { 62 | int ijIndex = iIndex + j; 63 | 64 | if (base1 == seq2[start2 + j - 1]) { 65 | cTable[ijIndex] = cTable[iM1Index + j - 1] + 1; 66 | bTable[ijIndex] = DIAGONAL; 67 | } else { 68 | if (cTable[iM1Index + j] > cTable[iIndex + j - 1]) { 69 | cTable[ijIndex] = cTable[iM1Index + j]; 70 | bTable[ijIndex] = UP; 71 | } else { 72 | cTable[ijIndex] = cTable[iIndex + j - 1]; 73 | bTable[ijIndex] = LEFT; 74 | } 75 | } 76 | } 77 | } 78 | 79 | // Testing 80 | /* 81 | for (int i = 0; i < len1; i++) { 82 | int iIndex = i * len2; 83 | for (int j = 0; j < len2; j++) { 84 | cout << cTable[iIndex + j] << " "; 85 | } 86 | cout << endl; 87 | } 88 | cout << endl; 89 | cout << endl; 90 | 91 | for (int i = 0; i < len1; i++) { 92 | int iIndex = i * len2; 93 | for (int j = 0; j < len2; j++) { 94 | cout << bTable[iIndex + j] << " "; 95 | } 96 | cout << endl; 97 | } 98 | cout << endl; 99 | */ 100 | // End 101 | } 102 | 103 | int LCS::getLenCS(){ 104 | return cTable[lenTotal-1]; 105 | } 106 | 107 | void LCS::printLcs() { 108 | vector * rev = new vector(); 109 | 110 | int i = len1 - 1; 111 | int j = len2 - 1; 112 | 113 | while (i != 0 && j != 0) { 114 | int iIndex = i * len2; 115 | switch (bTable[iIndex + j]) { 116 | case DIAGONAL: 117 | // Test start 118 | cout << start1 + i - 1 << endl; 119 | // Test end 120 | rev->push_back(seq1[start1 + i - 1]); 121 | i--; 122 | j--; 123 | break; 124 | case UP: 125 | i--; 126 | break; 127 | case LEFT: 128 | j--; 129 | break; 130 | default: 131 | string msg = "Invalid direction in the bTable: "; 132 | msg.append(1, bTable[iIndex + j]); 133 | msg.append(1, '.'); 134 | throw InvalidStateException(msg); 135 | break; 136 | } 137 | } 138 | 139 | int size = rev->size(); 140 | cout << "Rev size is: " << size << endl; 141 | 142 | for (int i = size - 1; i >= 0; i--) { 143 | cout << /*(int)*/ rev->at(i); 144 | } 145 | cout << endl; 146 | 147 | rev->clear(); 148 | delete rev; 149 | } 150 | -------------------------------------------------------------------------------- /src/utility/LCS.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LCS.h 3 | * 4 | * Created on: Dec 4, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef LCS_H_ 9 | #define LCS_H_ 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace utility{ 16 | class LCS { 17 | private: 18 | const char * seq1; 19 | int start1; 20 | int end1; 21 | const char * seq2; 22 | int start2; 23 | int end2; 24 | 25 | int len1; 26 | int len2; 27 | int lenTotal; 28 | // int lenCS; 29 | 30 | int * cTable; 31 | int * bTable; 32 | static const int UP = 1; 33 | static const int DIAGONAL = 2; 34 | static const int LEFT = 3; 35 | 36 | public: 37 | static const string SAME; 38 | static const string OPPOSITE; 39 | 40 | LCS(const char *, int, int, const char *, int, int); 41 | virtual ~LCS(); 42 | void findLcs(); 43 | int getLenCS(); 44 | void printLcs(); 45 | }; 46 | } 47 | 48 | #endif /* LCS_H_ */ 49 | -------------------------------------------------------------------------------- /src/utility/LCSLen.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * LCSLen.cpp 3 | * 4 | * Created on: Dec 6, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "LCSLen.h" 9 | #include "Util.h" 10 | #include "../exception/InvalidInputException.h" 11 | 12 | #include 13 | 14 | using namespace std; 15 | using namespace exception; 16 | 17 | namespace utility { 18 | 19 | LCSLen::LCSLen(const char * seq1In, int start1In, int end1In, 20 | const char * seq2In, int start2In, int end2In) { 21 | seq1 = seq1In; 22 | start1 = start1In; 23 | end1 = end1In; 24 | 25 | seq2 = seq2In; 26 | start2 = start2In; 27 | end2 = end2In; 28 | 29 | if(start1 < 0 || end1 < 0 || start1 > end1){ 30 | string msg("Invalid Input. Start1 is "); 31 | msg.append(Util::int2string(start1)); 32 | msg.append(". End 1 is "); 33 | msg.append(Util::int2string(end1)); 34 | msg.append("."); 35 | throw InvalidInputException(msg); 36 | } 37 | 38 | if(start2 < 0 || end2 < 0 || start2 > end2){ 39 | string msg("Invalid Input. Start2 is "); 40 | msg.append(Util::int2string(start2)); 41 | msg.append(". End2 is "); 42 | msg.append(Util::int2string(end2)); 43 | msg.append("."); 44 | throw InvalidInputException(msg); 45 | } 46 | 47 | // Validate input 48 | cout << start1 << " " << end1 << endl; 49 | cout << start2 << " " << end2 << endl; 50 | 51 | 52 | len1 = end1 - start1 + 2; 53 | len2 = end2 - start2 + 2; 54 | 55 | lenTotal = 2 * len2; 56 | cTable = new int[lenTotal]; 57 | 58 | for (int i = 0; i < lenTotal; i++) { 59 | cTable[i] = 0; 60 | } 61 | 62 | findLcs(); 63 | } 64 | 65 | LCSLen::~LCSLen() { 66 | delete[] cTable; 67 | } 68 | 69 | void LCSLen::findLcs() { 70 | int iM1Index = 0; 71 | int iIndex = len2; 72 | 73 | for (int i = 1; i < len1; i++) { 74 | char base1 = seq1[start1 + i - 1]; 75 | 76 | for (int j = 1; j < len2; j++) { 77 | int ijIndex = iIndex + j; 78 | if (base1 == seq2[start2 + j - 1]) { 79 | cTable[ijIndex] = cTable[iM1Index + j - 1] + 1; 80 | } else { 81 | if (cTable[iM1Index + j] > cTable[iIndex + j - 1]) { 82 | cTable[ijIndex] = cTable[iM1Index + j]; 83 | } else { 84 | cTable[ijIndex] = cTable[iIndex + j - 1]; 85 | } 86 | } 87 | } 88 | 89 | if(i != len1-1){ 90 | for(int h = 0; h < len2; h++){ 91 | cTable[h] = cTable[len2+h]; 92 | } 93 | } 94 | } 95 | lenCS = cTable[lenTotal-1]; 96 | } 97 | 98 | int LCSLen::getLenCS(){ 99 | return lenCS; 100 | } 101 | 102 | } 103 | /* namespace utility */ 104 | -------------------------------------------------------------------------------- /src/utility/LCSLen.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LCSLen.h 3 | * 4 | * Created on: Dec 6, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef LCSLEN_H_ 9 | #define LCSLEN_H_ 10 | 11 | namespace utility { 12 | 13 | class LCSLen { 14 | private: 15 | const char * seq1; 16 | int start1; 17 | int end1; 18 | const char * seq2; 19 | int start2; 20 | int end2; 21 | 22 | int len1; 23 | int len2; 24 | int lenTotal; 25 | int lenCS; 26 | 27 | int * cTable; 28 | void findLcs(); 29 | 30 | public: 31 | LCSLen(const char *, int, int, const char *, int, int); 32 | virtual ~LCSLen(); 33 | int getLenCS(); 34 | }; 35 | 36 | } /* namespace utility */ 37 | #endif /* LCSLEN_H_ */ 38 | -------------------------------------------------------------------------------- /src/utility/LCSubStr.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * LCSubStr.cpp 3 | * 4 | * Created on: Dec 19, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "LCSubStr.h" 9 | #include "Util.h" 10 | #include "Location.h" 11 | 12 | // Delete start 13 | #include 14 | // Delete end 15 | 16 | namespace utility { 17 | 18 | LCSubStr::LCSubStr(const char * seq1In, ILocation * loc1, const char * seq2In, 19 | ILocation * loc2) { 20 | 21 | int start1In = loc1->getStart(); 22 | int end1In = loc1->getEnd(); 23 | int start2In = loc2->getStart(); 24 | int end2In = loc2->getEnd(); 25 | 26 | if (end1In - start1In > end2In - start1In) { 27 | seq1 = seq1In; 28 | start1 = start1In; 29 | end1 = end1In; 30 | len1 = end1 - start1 + 2; 31 | 32 | seq2 = seq2In; 33 | start2 = start2In; 34 | end2 = end2In; 35 | len2 = end2 - start2 + 2; 36 | 37 | isFirstShorter = false; 38 | } else { 39 | seq1 = seq2In; 40 | start1 = start2In; 41 | end1 = end2In; 42 | len1 = end1 - start1 + 2; 43 | 44 | seq2 = seq1In; 45 | start2 = start1In; 46 | end2 = end1In; 47 | len2 = end2 - start2 + 2; 48 | 49 | isFirstShorter = true; 50 | } 51 | 52 | lenTotal = 2 * len2; 53 | cTable = new int[lenTotal]; 54 | 55 | for (int i = 0; i < lenTotal; i++) { 56 | cTable[i] = 0; 57 | } 58 | lenCS = 0; 59 | 60 | subStr1 = new vector(); 61 | subStr2 = new vector(); 62 | 63 | findLCSubStr(); 64 | } 65 | 66 | LCSubStr::~LCSubStr() { 67 | delete[] cTable; 68 | 69 | Util::deleteInVector(subStr1); 70 | delete subStr1; 71 | 72 | Util::deleteInVector(subStr2); 73 | delete subStr2; 74 | } 75 | 76 | void LCSubStr::findLCSubStr() { 77 | int iM1Index = 0; 78 | int iIndex = len2; 79 | 80 | for (int i = 1; i < len1; i++) { 81 | char base1 = seq1[start1 + i - 1]; 82 | 83 | for (int j = 1; j < len2; j++) { 84 | int ijIndex = iIndex + j; 85 | if (base1 == seq2[start2 + j - 1]) { 86 | cTable[ijIndex] = cTable[iM1Index + j - 1] + 1; 87 | 88 | if (cTable[ijIndex] == lenCS) { 89 | subStr1->push_back( 90 | new Location(start1 + i - lenCS, start1 + i - 1)); 91 | subStr2->push_back( 92 | new Location(start2 + j - lenCS, start2 + j - 1)); 93 | } else if (cTable[ijIndex] > lenCS) { 94 | lenCS = cTable[ijIndex]; 95 | 96 | Util::deleteInVector(subStr1); 97 | Util::deleteInVector(subStr2); 98 | 99 | subStr1->push_back( 100 | new Location(start1 + i - lenCS, start1 + i - 1)); 101 | subStr2->push_back( 102 | new Location(start2 + j - lenCS, start2 + j - 1)); 103 | } 104 | } else { 105 | cTable[ijIndex] = 0; 106 | } 107 | } 108 | 109 | if (i != len1 - 1) { 110 | for (int h = 0; h < len2; h++) { 111 | cTable[h] = cTable[len2 + h]; 112 | } 113 | } 114 | } 115 | 116 | // Test start 117 | // for (int i = 0; i < subStr1->size(); i++) { 118 | // cout << "STD 1: " << subStr1->at(i)->toString() << endl; 119 | // cout << "STD 2: " << subStr2->at(i)->toString() << endl; 120 | // } 121 | 122 | // for (int i = 0; i < lenTotal; i++) { 123 | // cout << cTable[i]; 124 | // } 125 | // cout << endl; 126 | // Test end 127 | 128 | } 129 | 130 | int LCSubStr::getLenCS() { 131 | return lenCS; 132 | } 133 | 134 | vector *> * LCSubStr::getCSubStr() { 135 | vector *> * r = new vector *>(); 136 | if (isFirstShorter) { 137 | r->push_back(subStr2); 138 | r->push_back(subStr1); 139 | } else { 140 | r->push_back(subStr1); 141 | r->push_back(subStr2); 142 | } 143 | 144 | return r; 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /src/utility/LCSubStr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LCSubStr.h 3 | * 4 | * Created on: Dec 19, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef LCSUBSTR_H_ 9 | #define LCSUBSTR_H_ 10 | 11 | #include 12 | 13 | #include "ILocation.h" 14 | 15 | using namespace std; 16 | 17 | namespace utility { 18 | 19 | class LCSubStr { 20 | private: 21 | const char * seq1; 22 | int start1; 23 | int end1; 24 | int len1; 25 | 26 | const char * seq2; 27 | int start2; 28 | int end2; 29 | int len2; 30 | 31 | bool isFirstShorter; 32 | int lenTotal; 33 | 34 | int * cTable; 35 | int lenCS; 36 | 37 | vector< ILocation * > * subStr1; 38 | vector< ILocation * > * subStr2; 39 | 40 | void findLCSubStr(); 41 | 42 | public: 43 | LCSubStr(const char *, ILocation *, const char *, ILocation *); 44 | virtual ~LCSubStr(); 45 | int getLenCS(); 46 | vector< vector< ILocation * > *> * getCSubStr(); 47 | }; 48 | 49 | } 50 | 51 | #endif /* LCSUBSTR_H_ */ 52 | -------------------------------------------------------------------------------- /src/utility/LocAlign.h: -------------------------------------------------------------------------------- 1 | #ifndef Loc_Align_H_ 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | namespace utility{ 8 | 9 | class LocAlign{ 10 | 11 | private: 12 | const char * seq1; //first sequence to be aligned 13 | int start1; 14 | int end1; 15 | const char * seq2;//second sequence to be aligned 16 | int start2; 17 | int end2; 18 | int len1; 19 | int len2; 20 | int lenTotal; 21 | int match; //score for base pair match 22 | int mismatch;//score for base pair mismatch 23 | int gapOpen; //cost to open a gap 24 | int gapContinue; //cost to continue a gap 25 | int * matches; 26 | int * upperGap; 27 | int * lowerGap; 28 | int * matchLen; 29 | int * upperLen; 30 | int * lowerLen; 31 | int * matchId; 32 | int * upperId; 33 | int * lowerId; 34 | int alignmentScore; 35 | int alignmentLength; 36 | int totalMatches; 37 | int queryStart; 38 | int queryEnd; 39 | int referenceStart; 40 | int referenceEnd; 41 | string topString; 42 | string bottomString; 43 | public: 44 | LocAlign(const char*,int,int,const char *,int,int, int,int,int,int); 45 | 46 | //virtual ~LocAlign(); 47 | void findAlignment(); 48 | tuplefindMax(int *); 49 | double getIdentity(); 50 | int getLength(); 51 | void printAlignment(); //display LocAlign 52 | int getScore(); 53 | int getLengthAlignment(); 54 | int getQueryStart(); 55 | int getQueryEnd(); 56 | int getReferenceStart(); 57 | int getReferenceEnd(); 58 | 59 | }; 60 | } 61 | #endif -------------------------------------------------------------------------------- /src/utility/LocAlignE.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Author: Joseph Valencia 3 | * Date: 12/14/17 4 | * Bioinformatics Toolsmith Laboratory, University of Tulsa 5 | * */ 6 | #include 7 | #include "../exception/InvalidStateException.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "GlobAlignE.h" 15 | 16 | using namespace std; 17 | using namespace utility; 18 | using namespace exception; 19 | 20 | GlobAlignE::GlobAlignE(const char * seq1In, int start1In, int end1In, const char * seq2In, 21 | int start2In, int end2In, int matchIn, int mismatchIn, int gapOpenIn, int gapContinueIn){ 22 | 23 | seq1 = seq1In; 24 | start1 = start1In; 25 | end1 = end1In; 26 | 27 | seq2 = seq2In; 28 | start2 = start2In; 29 | end2 = end2In; 30 | 31 | len1 = end1 - start1 + 2; 32 | len2 = end2 - start2 + 2; 33 | match = matchIn; 34 | mismatch = mismatchIn; 35 | gapOpen = gapOpenIn; 36 | gapContinue = gapContinueIn; 37 | findAlignment(); 38 | 39 | } 40 | 41 | int GlobAlignE::findAlignment(){ 42 | 43 | int shorter = min(len2,len1)-1; 44 | int lenDiff = abs(len2-len1); 45 | int maxDiff=0; 46 | 47 | if (lenDiff >=1){ 48 | maxDiff += -gapOpen- (lenDiff*gapContinue); 49 | } 50 | 51 | maxDiff+= (mismatch* shorter)-1; 52 | 53 | const int negativeInf = maxDiff; 54 | 55 | int matches[len1]; 56 | int upperGap[len1]; 57 | int lowerGap[len2]; 58 | 59 | matches[0]= 0; 60 | upperGap[0] = negativeInf; 61 | lowerGap[0] = negativeInf; 62 | 63 | for (int i = 1; ifindAlignment()< 9 | 10 | using namespace std; 11 | 12 | namespace utility{ 13 | 14 | class GlobAlignE{ 15 | 16 | private: 17 | const char * seq1; //first sequence to be aligned 18 | int start1; 19 | int end1; 20 | const char * seq2;//second sequence to be aligned 21 | int start2; 22 | int end2; 23 | int len1; 24 | int len2; 25 | int lenTotal; 26 | int match; //score for base pair match 27 | int mismatch;//score for base pair mismatch 28 | int gapOpen; //cost to open a gap 29 | int gapContinue; //cost to continue a gap 30 | int alignmentScore; 31 | string topString; 32 | string bottomString; 33 | public: 34 | GlobAlignE(const char*,int,int,const char *,int,int, int,int,int,int); 35 | // virtual ~LocAlign(); 36 | int findAlignment(); 37 | void printAlignment(); //display LocAlign 38 | int getScore(); 39 | int getLengthAlignment(); 40 | 41 | }; 42 | } 43 | #endif -------------------------------------------------------------------------------- /src/utility/LocAlignUtility.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../utility/LocAlign.h" 7 | using namespace std; 8 | using namespace utility; 9 | 10 | 11 | int main(int argc, char *argv[]) 12 | { 13 | 14 | ifstream ifs; 15 | 16 | ifs.open(argv[1], ifstream::in); 17 | cout << "FILE OPENED" << endl; 18 | char c = ifs.get(); 19 | 20 | if (c == '>') 21 | { 22 | 23 | while (c != '\n') 24 | { 25 | c = ifs.get(); 26 | } 27 | } 28 | 29 | string string1 = ""; 30 | 31 | while (ifs.good()) 32 | { 33 | 34 | if (c != '\n') 35 | { 36 | string1 += c; 37 | } 38 | c = ifs.get(); 39 | } 40 | 41 | ifs.close(); 42 | 43 | ifstream ifs2; 44 | 45 | ifs2.open(argv[2], ifstream::in); 46 | 47 | c = ifs2.get(); 48 | 49 | if (c == '>') 50 | { 51 | 52 | while (c != '\n') 53 | { 54 | c = ifs2.get(); 55 | } 56 | } 57 | 58 | string string2 = ""; 59 | 60 | while (ifs2.good()) 61 | { 62 | 63 | 64 | if (c != '\n') 65 | { 66 | string2 += c; 67 | } 68 | c = ifs2.get(); 69 | } 70 | 71 | ifs2.close(); 72 | 73 | std::transform(string1.begin(), string1.end(), string1.begin(), ::toupper); 74 | std::transform(string2.begin(), string2.end(), string2.begin(), ::toupper); 75 | 76 | //cout <getIdentity() << endl; 84 | } 85 | -------------------------------------------------------------------------------- /src/utility/Location.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Location.cpp 3 | * 4 | * Created on: Dec 19, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "Location.h" 9 | #include "Util.h" 10 | #include "../exception/InvalidInputException.h" 11 | 12 | using namespace exception; 13 | 14 | namespace utility { 15 | 16 | Location::Location(int startIn, int endIn) { 17 | initialize(startIn, endIn); 18 | } 19 | 20 | Location::Location(ILocation& cp) { 21 | initialize(cp.getStart(), cp.getEnd()); 22 | } 23 | 24 | void Location::initialize(int startIn, int endIn) { 25 | start = startIn; 26 | end = endIn; 27 | check(); 28 | 29 | } 30 | 31 | void Location::check() { 32 | if (start < 0 || end < 0 || start > end) { 33 | string msg("Invalid Input. Start is "); 34 | msg.append(Util::int2string(start)); 35 | msg.append(". End is "); 36 | msg.append(Util::int2string(end)); 37 | msg.append("."); 38 | throw InvalidInputException(msg); 39 | } 40 | } 41 | 42 | Location::~Location() { 43 | } 44 | 45 | int Location::getEnd() const { 46 | return end; 47 | } 48 | 49 | int Location::getStart() const { 50 | return start; 51 | } 52 | 53 | void Location::setEnd(int endIn) { 54 | end = endIn; 55 | check(); 56 | } 57 | 58 | void Location::setStart(int startIn) { 59 | start = startIn; 60 | check(); 61 | } 62 | 63 | int Location::getLength() { 64 | return end - start + 1; 65 | } 66 | 67 | string Location::toString() { 68 | string msg = (Util::int2string(start)); 69 | msg.append("\t"); 70 | msg.append(Util::int2string(end)); 71 | 72 | return msg; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/utility/Location.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Location.h 3 | * 4 | * Created on: Dec 19, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef LOCATION_H_ 9 | #define LOCATION_H_ 10 | 11 | #include "ILocation.h" 12 | 13 | #include 14 | 15 | using namespace std; 16 | 17 | namespace utility { 18 | 19 | class Location : public ILocation{ 20 | private: 21 | int start; 22 | int end; 23 | void initialize(int, int); 24 | void check(); 25 | 26 | public: 27 | Location(int, int); 28 | Location(ILocation&); 29 | virtual ~Location(); 30 | 31 | int getEnd() const; 32 | int getStart() const; 33 | void setEnd(int); 34 | void setStart(int); 35 | int getLength(); 36 | string toString(); 37 | }; 38 | 39 | } 40 | 41 | #endif /* LOCATION_H_ */ 42 | -------------------------------------------------------------------------------- /src/utility/NW.cpp: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | * 3 | * needleman_wunsch.cpp 4 | * 5 | * Author: Benjamin T James 6 | */ 7 | #include "NW.h" 8 | #include 9 | 10 | //flags that can be combined 11 | #define HORIZ 1 12 | #define VERT 2 13 | #define DIAG 4 14 | using namespace std; 15 | void needleman_wunsch::fill(int i, int j) 16 | { 17 | if (i == 0 || j == 0) { 18 | if (i == j) { 19 | int offset = at(i, j); 20 | score[offset] = 0; 21 | direction[offset] = DIAG; // for backtracking 22 | horiz_gap_len[offset] = 0; 23 | vert_gap_len[offset] = 0; 24 | } else if (i == 0) { 25 | int offset = at(0, j); 26 | int last_offset = at(0, j-1); 27 | score[offset] = score[last_offset] + gap(j); 28 | horiz_gap_len[offset] = 0; 29 | vert_gap_len[offset] = j; 30 | direction[offset] = VERT; 31 | } else { // j == 0 32 | int offset = at(i, 0); 33 | int last_offset = at(i-1, 0); 34 | score[offset] = score[last_offset] + gap(i); 35 | horiz_gap_len[offset] = i; 36 | vert_gap_len[offset] = 0; 37 | direction[offset] = HORIZ; 38 | } 39 | return; 40 | } 41 | int i_diag = at(i-1, j-1); 42 | int i_horiz = at(i-1, j); 43 | int i_vert = at(i, j-1); 44 | int i_cur = at(i, j); 45 | 46 | int hlen = horiz_gap_len[i_horiz] + 1; 47 | int vlen = vert_gap_len[i_vert] + 1; 48 | 49 | int diag_score = score[i_diag] + match_score(s1[i], s2[j]); 50 | int horiz_score = score[i_horiz] + gap(hlen); 51 | int vert_score = score[i_vert] + gap(vlen); 52 | score[i_cur] = std::max(std::max(diag_score, horiz_score), vert_score); 53 | direction[i_cur] = 0; 54 | 55 | // we could match multiple high scores 56 | if (score[i_cur] == diag_score) { 57 | direction[i_cur] |= DIAG; 58 | } 59 | if (score[i_cur] == vert_score) { 60 | direction[i_cur] |= VERT; 61 | vert_gap_len[i_cur] = vlen; 62 | } else { 63 | vert_gap_len[i_cur] = 0; 64 | } 65 | if (score[i_cur] == horiz_score) { 66 | direction[i_cur] |= HORIZ; 67 | horiz_gap_len[i_cur] = hlen; 68 | } else { 69 | horiz_gap_len[i_cur] = 0; 70 | } 71 | } 72 | 73 | std::pair 74 | needleman_wunsch::backtrack() 75 | { 76 | std::string a1 = "", a2 = ""; 77 | int cur_i = l1 - 1; 78 | int cur_j = l2 - 1; 79 | while (cur_i >= 0 && cur_j >= 0) { 80 | uint8_t dir = direction[at(cur_i, cur_j)]; 81 | if (dir & DIAG) { 82 | a1 += s1[cur_i--]; 83 | a2 += s2[cur_j--]; 84 | } else if (dir & HORIZ) { 85 | a1 += s1[cur_i--]; 86 | a2 += '-'; 87 | } else if (dir & VERT) { 88 | a1 += '-'; 89 | a2 += s2[cur_j--]; 90 | } 91 | } 92 | std::string r1(a1.rbegin(), a1.rend()); 93 | std::string r2(a2.rbegin(), a2.rend()); 94 | return std::make_pair(r1, r2); 95 | } 96 | 97 | 98 | std::pair 99 | needleman_wunsch::align() 100 | { 101 | for (int i = 0; i < l1; i++) { 102 | for (int j = 0; j < l2; j++) { 103 | fill(i, j); 104 | } 105 | } 106 | return backtrack(); 107 | } 108 | double needleman_wunsch::identity(std::pair alignment) const 109 | { 110 | int len = alignment.first.length(); 111 | double count = 0; 112 | for (int i = 0; i < len; i++) { 113 | if (alignment.first[i] == alignment.second[i]) { 114 | count++; 115 | } 116 | } 117 | return (double)count / len; 118 | } 119 | 120 | int needleman_wunsch::gap(int gaplen) const 121 | { 122 | return sigma + (gaplen - 1) * epsilon; 123 | } 124 | 125 | int transform1(char a) 126 | { 127 | int ret = -1; 128 | switch (a) { 129 | case 'a': 130 | case 'A': 131 | ret = 0; 132 | break; 133 | case 'c': 134 | case 'C': 135 | ret = 1; 136 | break; 137 | case 'g': 138 | case 'G': 139 | ret = 2; 140 | break; 141 | case 't': 142 | case 'T': 143 | ret = 3; 144 | break; 145 | default: 146 | ret = a; 147 | break; 148 | } 149 | return ret; 150 | } 151 | int needleman_wunsch::match_score(char a, char b) const 152 | { 153 | int ta = transform1(a); 154 | int tb = transform1(b); 155 | return scoring_matrix[ta][tb]; 156 | } 157 | needleman_wunsch::needleman_wunsch(const std::string &s1_, const std::string& s2_, const int m[4][4], int sigma_, int epsilon_) 158 | { 159 | int l1_ = s1_.length(); 160 | int l2_ = s2_.length(); 161 | if (l1_ >= l2_) { 162 | l1 = l1_; 163 | l2 = l2_; 164 | s1 = s1_; 165 | s2 = s2_; 166 | } else { 167 | l1 = l2_; 168 | l2 = l1_; 169 | s1 = s2_; 170 | s2 = s1_; 171 | } 172 | sigma = sigma_; 173 | epsilon = epsilon_; 174 | for (int i = 0; i < 4; i++) { 175 | for (int j = 0; j < 4; j++) { 176 | scoring_matrix[i][j] = (int)m[i][j]; 177 | } 178 | } 179 | int matlen = l1 * l2; 180 | score = new int[matlen]; 181 | direction = new uint8_t[matlen]; 182 | horiz_gap_len = new int[matlen]; 183 | vert_gap_len = new int[matlen]; 184 | } 185 | 186 | int main(){ 187 | // const std::string string1 = "GCTCCCTGGAGGTAGGAGCGTACGTCGGAGGCGTCAGCAGCATGTCTCAGTGGGACCGTTGGTTCCATTGTTAGATTGTTGATCACTACTTGTGTGTTTTGAAGCTATCTAACGCACGTTGACTTAGTATCTCTATAGTTCCGATTGACTAATTACACCTCGAGTACATTTAAGTGACTCTTAGGTAATGCGTTAGGCAAGCAAAATCTGACGCCCACGTACACCGATGCCCATAGAGTCAGGAGGGGCATTAGTCTCGGACGACATCGACGGCGATATCAGGCTTTCTACTCCGCCCTTAAGGACACCGAAACTCGGTTATGAAGAACGCGTGGCATTAAGCCGCTGCCCACTGTGGTTGTCAGGCTCGTGTATCAAGCATAACATAACAGCGGGACCAAGCATTCAGGCGTTTTGATTAAGACCGATGTACCAAGAACGACGAGGTACGGGGTGACAACAAAGTTCTCTAAGGATACATGATTGGGGGCTCAGCAATGAATCTGATCTTCCATAGAAGGATAGTACCTCTCCGTAGTCTCACTTCGCGGACTGCCGTTCAGTTTTCCTATACATTGCTCTCGAATTGCGCGTTTAAGTTTGCTTCAGTTGGGAACACGATTTTGGTGTAGAACGTTAGAAAAGTAACTCAGAGGGGTGCGGTGTAAGTTGTTCACCTTCTGCTGGGCAATCACGGTGAGCCCTTCCAGCGTGCCACGAATTCGATACCCCACGTGATCTAGCTGGCTGGCCCAACCGCATGTTGGAACGTGAGACGGCCAGACACCGAGCACAGGTATTGACCTCCGGGCAAACACTCGGATCGATCTTCGTACAACGTCTTTGTGTTTCCCTATTGAATTTTCCCCGCGTCATGTTCGATCCATCACGACCAACGAGGTGGACCAAGGAGTGAATTCTGAAGATCCGAAACTTTTTAATGTAAACTACCGATGTGAAAAACCAAAAAATTCGTTAGGCTTACTACCAGAATAGAGTT"; 188 | // const std::string string2 = "cCtCtCCtGGAGGTAGGAGCGTACGaCGGAGGCGTCAGCAGCATacGTCgCAGTGGGACccGTTGGgTCCATTGTTAGATTGTTGATCACTACTTGaTgtTTTGagCTATCTAACGccGTTGACTTAGTtATCTCTATAGtttcGATTGAaTAATTaaCAtCTCgAGttActTTAAGTGACTCTTAGGTAATGCGTtTAGGCaAGCAAAATCTGACGCCCACGTtccGATGCCCATAGgAGTCAGcGAggGCATTAGTCTCGGACGaaTCGcACGGCGATATCAGGCTTtttACTCgCGCCCTTAAGGACACCGAAACTCgGTttATGAAGtACgcgcGTcggATTAacaGCTGCccCACttGTGGTTGTCAGaGCTCGTGTATCAAggCATAACATaACAGCGGGACCAAGCtgcATTCAGaGCGaTTtgATTAAGACcaTGTACCAaaGgAACGACGcAGgacGGGGTGaaACAAatCTCTAAGGATACATGATtGGGGGcTCcAGCAATGAATCTgATCTTCCATAGAAaGGATAGtcCTCTCCGcAGTCTCatTCGCgaCTGCCGTTcatTTTCCTATcgACATtcTCTcaATTGaCGCGTTTagTTTGcCTTCAGTgGGGAACACGATTTTGGTGTAGAACGTTAGAAAAGTAAccAGAGGGGTGCGGTGTgAgtGTTCACCTTCTGCTGGGCAATCgCGGTaGAGCCCTTCCAGCGTGCCACGAatgATACccCCAcggTGATCTAGCTGGCTGGCCCAACCGCAgGTTGGAActGAGACGGCCAGACACccGAGCacacAGGTATTGACCTCCGGGCAAACACTcgATCacggATCTTCGTACAACGTCTTTtgGTTTCCCTATTggAATTTTccCGCGTCATGTTCGATCCATCACGACCAACGAGGaTGgACCAAGGAGcgaTTCTGAAGATCcgaCgaaaACTTttTTAATGTAAACTACCGATGTGAAAAACCAAAatTCGTTAGGCTcTACTACCAGAATAGAGTT"; 189 | string string1 = "GCTCCCTGGAGGTAGG"; 190 | string string2 ="cCtCtCCtGGAGGTAG"; 191 | 192 | const std::string& in1 = string1; 193 | const std::string& in2 = string2; 194 | 195 | const int array [4][4]={{1,-1,-1,1},{-1,1,-1,-1},{-1,-1,1,-1},{-1,-1,-1,1}}; 196 | 197 | 198 | needleman_wunsch * alignment = new needleman_wunsch(in1,in2,array,4,1); 199 | cout << "ALIGNMENT IS" << alignment->align().first<align().second< 12 | 13 | class needleman_wunsch { 14 | public: 15 | needleman_wunsch(const std::string& s1, const std::string& s2, const int matrix_[4][4], int sigma_, int epsilon_); 16 | ~needleman_wunsch() { 17 | delete[] score; 18 | delete[] direction; 19 | delete[] horiz_gap_len; 20 | delete[] vert_gap_len; 21 | } 22 | double identity(std::pair p) const; 23 | std::pair 24 | align(); 25 | private: 26 | int gap(int gap_len) const; 27 | int match_score(char a, char b) const; 28 | inline int at(int a, int b) const { return a * l2 + b; }; 29 | void fill(int,int); 30 | std::pair backtrack(); 31 | int scoring_matrix[4][4]; 32 | int sigma, epsilon; 33 | std::string s1, s2; 34 | int l1, l2; 35 | 36 | int *score; 37 | uint8_t *direction; 38 | int *horiz_gap_len; 39 | int *vert_gap_len; 40 | }; 41 | 42 | 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /src/utility/TSD.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TSD.cpp 3 | * 4 | * Created on: Dec 21, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include "TSD.h" 9 | #include "Location.h" 10 | #include "EmptyLocation.h" 11 | #include "Util.h" 12 | #include "LCSubStr.h" 13 | #include "../exception/InvalidStateException.h" 14 | 15 | #include 16 | 17 | using namespace std; 18 | using namespace exception; 19 | 20 | namespace utility { 21 | 22 | TSD::TSD(const string * seq, ILocation * te, int w, int init) { 23 | const char * cSeq = seq->c_str(); 24 | 25 | // Determine the left window 26 | int ltEnd = te->getStart() - 1; 27 | if (ltEnd < 0) { 28 | ltEnd = 0; 29 | } 30 | int ltStart = ltEnd - w + 1; 31 | if (ltStart < 0) { 32 | ltStart = 0; 33 | } 34 | int temp = ltEnd; 35 | for (int i = ltEnd; i >= ltStart; i--) { 36 | if (cSeq[i] == init) { 37 | break; 38 | } else { 39 | temp = i; 40 | } 41 | } 42 | ltStart = temp; 43 | Location * ltWin = new Location(ltStart, ltEnd); 44 | 45 | // Determine the right window 46 | int rtStart = te->getEnd() + 1; 47 | int rtEnd = rtStart + w - 1; 48 | int lastIndex = seq->size() - 1; 49 | if (rtEnd > lastIndex) { 50 | rtEnd = lastIndex; 51 | } 52 | int temp1 = rtStart; 53 | for (int i = rtStart; i <= rtEnd; i++) { 54 | if (cSeq[i] == init) { 55 | break; 56 | } else { 57 | temp1 = i; 58 | } 59 | } 60 | rtEnd = temp1; 61 | Location * rtWin = new Location(rtStart, rtEnd); 62 | 63 | // Determine the closest TSD 64 | LCSubStr * lcss = new LCSubStr(cSeq, ltWin, cSeq, rtWin); 65 | 66 | vector *> * r = lcss->getCSubStr(); 67 | 68 | int min = 1000000; 69 | int minIndex = -1; 70 | 71 | int tsdFound = r->at(0)->size(); 72 | 73 | for (int j = 0; j < tsdFound; j++) { 74 | 75 | ILocation * lt = r->at(0)->at(j); 76 | ILocation * rt = r->at(1)->at(j); 77 | 78 | int ltDis = te->getStart() - lt->getEnd(); 79 | int rtDis = rt->getStart() - te->getEnd(); 80 | int dis = ltDis + rtDis; 81 | 82 | if (ltDis < 0 || rtDis < 0) { 83 | string msg("Distance cannot be negative. The left distance is: "); 84 | msg.append(Util::int2string(ltDis)); 85 | msg.append(" The right distance is: "); 86 | msg.append(Util::int2string(rtDis)); 87 | throw InvalidStateException(msg); 88 | } 89 | 90 | if (dis < min) { 91 | min = dis; 92 | minIndex = j; 93 | } 94 | } 95 | 96 | int ltSize; 97 | int rtSize; 98 | if (minIndex == -1) { 99 | ltTsd = EmptyLocation::getInstance(); 100 | rtTsd = EmptyLocation::getInstance(); 101 | ltSize = 0; 102 | rtSize = 0; 103 | 104 | } else { 105 | ltTsd = new Location(*(r->at(0)->at(minIndex))); 106 | rtTsd = new Location(*(r->at(1)->at(minIndex))); 107 | ltSize = ltTsd->getLength(); 108 | rtSize = rtTsd->getLength(); 109 | 110 | } 111 | 112 | if (ltSize != rtSize) { 113 | string msg("The two sites must have the same length. "); 114 | msg.append("The length of the left site is: "); 115 | msg.append(Util::int2string(ltSize)); 116 | msg.append(" The length of the right site is: "); 117 | msg.append(Util::int2string(rtSize)); 118 | msg.append("."); 119 | throw InvalidStateException(msg); 120 | } 121 | tsdSize = ltSize; 122 | 123 | // Free resources 124 | delete lcss; 125 | delete ltWin; 126 | delete rtWin; 127 | } 128 | 129 | TSD::TSD(ITSD& copy) { 130 | ltTsd = new Location(*copy.getLtTsd()); 131 | rtTsd = new Location(*copy.getRtTsd()); 132 | tsdSize = copy.getTsdSize(); 133 | } 134 | 135 | TSD::TSD(ITSD& copy, int offset) { 136 | 137 | 138 | ltTsd = new Location(copy.getLtTsd()->getStart()+offset,copy.getLtTsd()->getEnd()+offset); 139 | 140 | rtTsd = new Location(copy.getRtTsd()->getStart()+offset,copy.getRtTsd()->getEnd()+offset); 141 | tsdSize = copy.getTsdSize(); 142 | } 143 | 144 | TSD::~TSD() { 145 | if (ltTsd != EmptyLocation::getInstance()) { 146 | delete ltTsd; 147 | } 148 | 149 | if (rtTsd != EmptyLocation::getInstance()) { 150 | delete rtTsd; 151 | } 152 | } 153 | 154 | ILocation * TSD::getLtTsd() { 155 | return ltTsd; 156 | } 157 | 158 | ILocation * TSD::getRtTsd() { 159 | return rtTsd; 160 | } 161 | 162 | int TSD::getTsdSize() { 163 | return tsdSize; 164 | } 165 | 166 | string TSD::toString() { 167 | string msg("L_TSD "); 168 | msg.append(ltTsd->toString()); 169 | msg.append(" R_TSD "); 170 | msg.append(rtTsd->toString()); 171 | return msg; 172 | } 173 | 174 | } /* namespace utility */ 175 | -------------------------------------------------------------------------------- /src/utility/TSD.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TSD.h 3 | * 4 | * Created on: Dec 21, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TSD_H_ 9 | #define TSD_H_ 10 | 11 | #include "ITSD.h" 12 | #include "ILocation.h" 13 | 14 | namespace utility { 15 | 16 | class TSD: public ITSD { 17 | private: 18 | ILocation * ltTsd; 19 | ILocation * rtTsd; 20 | int tsdSize; 21 | 22 | public: 23 | TSD(const string *, ILocation *, int, int); 24 | TSD(ITSD&); 25 | TSD(ITSD&,int); 26 | virtual ~TSD(); 27 | 28 | virtual ILocation * getLtTsd(); 29 | virtual ILocation * getRtTsd(); 30 | virtual int getTsdSize(); 31 | virtual string toString(); 32 | }; 33 | 34 | } /* namespace utility */ 35 | #endif /* TSD_H_ */ 36 | -------------------------------------------------------------------------------- /src/utility/Tail.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Tail.cpp 3 | * 4 | * Created on: Dec 27, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #include 9 | #include "Tail.h" 10 | #include "Util.h" 11 | 12 | namespace utility { 13 | 14 | Tail::Tail(int startIn, int endIn, string strandIn, double percentageIn) { 15 | initialize(startIn, endIn, strandIn, percentageIn); 16 | } 17 | 18 | Tail::Tail(ITail& copy) { 19 | initialize(copy.getStart(), copy.getEnd(), copy.getStrand(), 20 | copy.getPercentage()); 21 | } 22 | 23 | Tail::Tail(ITail& copy,int offset) { 24 | initialize(copy.getStart()+offset, copy.getEnd()+offset, copy.getStrand(), 25 | copy.getPercentage()); 26 | } 27 | 28 | void Tail::initialize(int startIn, int endIn, string strandIn, 29 | double percentageIn) { 30 | start = startIn; 31 | end = endIn; 32 | strand = strandIn; 33 | percentage = percentageIn; 34 | } 35 | 36 | Tail::~Tail() { 37 | // TODO Auto-generated destructor stub 38 | } 39 | 40 | int Tail::getStart() const { 41 | return start; 42 | } 43 | 44 | void Tail::setStart(int startIn) { 45 | start = startIn; 46 | } 47 | 48 | int Tail::getEnd() const { 49 | return end; 50 | } 51 | 52 | void Tail::setEnd(int endIn) { 53 | end = endIn; 54 | } 55 | 56 | int Tail::getLength() { 57 | return end - start + 1; 58 | } 59 | 60 | string Tail::toString() { 61 | string msg =""; 62 | msg.append(Util::int2string(start)); 63 | msg.append("\t"); 64 | msg.append(Util::int2string(end)); 65 | msg.append("\t"); 66 | msg.append(strand); 67 | msg.append("\t"); 68 | msg.append(Util::double2string(percentage)); 69 | return msg; 70 | } 71 | 72 | double Tail::getPercentage() const { 73 | return percentage; 74 | } 75 | 76 | void Tail::setPercentage(double percentageIn) { 77 | percentage = percentageIn; 78 | } 79 | 80 | string Tail::getStrand() const { 81 | return strand; 82 | } 83 | 84 | void Tail::setStrand(string strandIn) { 85 | strand = strandIn; 86 | } 87 | 88 | } /* namespace tr */ 89 | -------------------------------------------------------------------------------- /src/utility/Tail.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tail.h 3 | * 4 | * Created on: Dec 27, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TAIL_H_ 9 | #define TAIL_H_ 10 | 11 | #include 12 | #include "ITail.h" 13 | 14 | using namespace std; 15 | 16 | namespace utility { 17 | 18 | class Tail: public utility::ITail { 19 | private: 20 | int start; 21 | int end; 22 | double percentage; 23 | string strand; 24 | void initialize(int, int, string, double); 25 | 26 | public: 27 | Tail(int, int, string, double); 28 | Tail(ITail&); 29 | Tail(ITail&,int); 30 | virtual ~Tail(); 31 | 32 | // Inherited from ILocation 33 | virtual int getEnd() const; 34 | virtual int getStart() const; 35 | virtual void setEnd(int); 36 | virtual void setStart(int); 37 | virtual int getLength(); 38 | virtual string toString(); 39 | 40 | // Methods specific to tail objects. 41 | virtual double getPercentage() const; 42 | virtual void setPercentage(double); 43 | virtual string getStrand() const; 44 | virtual void setStrand(string); 45 | }; 46 | 47 | } /* namespace tr */ 48 | #endif /* TAIL_H_ */ 49 | -------------------------------------------------------------------------------- /src/utility/TailFinder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TailFinder.h 3 | * 4 | * Created on: Nov 27, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef TAILFINDER_H_ 9 | #define TAILFINDER_H_ 10 | 11 | #include 12 | #include 13 | 14 | #include "ILocation.h" 15 | 16 | using namespace std; 17 | 18 | namespace utility { 19 | class TailFinder { 20 | private: 21 | const string * seq; 22 | ILocation * loc; 23 | int whichTail; 24 | int win; 25 | int minLen; 26 | 27 | int seedLen; 28 | int gapLen; 29 | 30 | vector * tail; 31 | 32 | void findMark(); 33 | void findMarkA(string *, vector *, int, int); 34 | void findMarkP(string *, vector *, int, int); 35 | 36 | 37 | public: 38 | string prettyFormatChrom(string *); 39 | static const int MARK_A = 1; 40 | static const int MARK_P = 2; 41 | TailFinder(const string *, ILocation *, int, int,int,int, int); 42 | virtual ~TailFinder(); 43 | vector * getTail(); 44 | bool isTailFound(); 45 | }; 46 | } 47 | 48 | #endif /* TAILFINDER_H_ */ 49 | -------------------------------------------------------------------------------- /src/utility/Util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Util.h 3 | * 4 | * Created on: Apr 24, 2012 5 | * Author: Hani Zakaria Girgis, PhD 6 | */ 7 | 8 | #ifndef UTIL_H_ 9 | #define UTIL_H_ 10 | 11 | #include "Location.h" 12 | #include "../exception/FileDoesNotExistException.h" 13 | #include "../exception/InvalidInputException.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using namespace std; 24 | using namespace utility; 25 | using namespace exception; 26 | 27 | namespace utility { 28 | class Util { 29 | private: 30 | Util(); 31 | ~Util(); 32 | 33 | public: 34 | static string * emptyString; 35 | static string fileSeparator; 36 | static void readFasta(string, vector *, vector *, bool); 37 | static void readFasta(string, vector *, vector *); 38 | static void readCoordinates(string, vector *); 39 | static void readChromList(string, vector *, string); 40 | static void toUpperCase(string*); 41 | static void toUpperCase(string&); 42 | static string int2string(int); 43 | static string double2string(double); 44 | static string long2string(long); 45 | static void deleteFile(string); 46 | static void deleteFilesUnderDirectory(string); 47 | static void checkFile(string); 48 | static bool isOverlapping(int, int, int, int); 49 | static void revCompDig(string *, string *); 50 | static void revCompDig(const char* sequence, int, int, string *); 51 | static string oneDigitToNuc(const string &input); 52 | 53 | static void writeFasta(const string&, const string&, const string&); 54 | 55 | static int sumTotalLength(const vector *); 56 | 57 | 58 | /** 59 | * Delete the objects pointed to by pointers in a vector. 60 | * It does not delete the vector itself. 61 | * 62 | * Credit: http://stackoverflow.com/questions/594089/does-stdvector-clear-do-delete-free-memory-on-each-element 63 | */ 64 | template 65 | static void deleteInVector(vector * deleteMe) { 66 | while (!deleteMe->empty()) { 67 | delete deleteMe->back(); 68 | deleteMe->pop_back(); 69 | } 70 | 71 | // Set the size to zero 72 | deleteMe->clear(); 73 | 74 | // Set the capacity to zero 75 | vector empty; 76 | deleteMe->swap(empty); 77 | } 78 | }; 79 | } 80 | 81 | #endif /* UTIL_H_ */ 82 | -------------------------------------------------------------------------------- /visualize.py: -------------------------------------------------------------------------------- 1 | import os ,sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def slice(bedfile,name, score_file): 7 | 8 | if not os.path.exists(name): 9 | os.mkdir(name) 10 | 11 | coords = [] 12 | 13 | with open(bedfile,'r') as infile: 14 | 15 | lines = infile.readlines() 16 | 17 | for l in lines: 18 | fields = l.split() 19 | info = (fields[1],fields[2],fields[4],fields[5]) 20 | coords.append(info) 21 | 22 | scores =[] 23 | 24 | with open(score_file,'r') as infile: 25 | scores = infile.readlines() 26 | 27 | 28 | for el in coords: 29 | 30 | print(el) 31 | start = int(el[0]) 32 | end = int(el[1]) 33 | 34 | 35 | 36 | mid1 = int(el[2]) 37 | mid2 = int(el[3]) 38 | 39 | 40 | bounds =[start,mid1,mid2,end-1] 41 | print(bounds) 42 | 43 | slice =name+"/"+el[0]+"-"+el[1]+".png" 44 | 45 | curr = scores[start:end] 46 | 47 | x = [int(l.split(',')[0]) for l in curr] 48 | y = [int(l.split(',')[1]) for l in curr] 49 | 50 | 51 | 52 | d = {"idx":x, "score":y} 53 | 54 | 55 | df = pd.DataFrame(d,index = d["idx"]) 56 | 57 | graph = df.plot(x = "idx", y = "score") 58 | 59 | #f,ax = plt.subplots(1) 60 | 61 | graph.plot(bounds,[0.0]*len(bounds),"rv") 62 | 63 | fig = graph.get_figure() 64 | #plt.plot( bounds, marker = "v", markerfacecolor ="r") 65 | fig.savefig(slice) 66 | 67 | 68 | if __name__ == "__main__": 69 | 70 | chrom = sys.argv[1] 71 | output_dir = sys.argv[2] 72 | score_file = sys.argv[3] 73 | 74 | 75 | 76 | slice(chrom,output_dir,score_file) 77 | 78 | 79 | 80 | 81 | 82 | --------------------------------------------------------------------------------