├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── external ├── CuTest.c ├── CuTest.h └── license.txt ├── inc ├── common.h ├── common.mk ├── sharedMaf.h ├── test.common.h └── test.sharedMaf.h ├── lib ├── Makefile ├── allTests.c ├── common.c ├── createVersionSources.py ├── mafToolsTest.py ├── sharedMaf.c ├── test.sharedMaf.c └── test.sharedMaf.py ├── mafComparator ├── .gitignore ├── Makefile ├── README.md ├── example │ ├── a.maf │ └── b.maf └── src │ ├── __init__.py │ ├── allTests.c │ ├── cString.c │ ├── cString.h │ ├── comparatorAPI.c │ ├── comparatorAPI.h │ ├── comparatorRandom.c │ ├── comparatorRandom.h │ ├── mafComparator.c │ ├── mafPairCounter.c │ ├── test.comparatorAPI.c │ ├── test.comparatorAPI.h │ ├── test.comparatorRandom.c │ ├── test.comparatorRandom.h │ ├── test.mafComparator.py │ └── testRand.c ├── mafCoverage ├── Makefile ├── README.md └── src │ ├── allTests.c │ ├── mafCoverage.c │ ├── mafCoverage.h │ ├── mafCoverageAPI.c │ ├── mafCoverageAPI.h │ ├── test.mafCoverage.py │ ├── test.mafCoverageAPI.c │ └── test.mafCoverageAPI.h ├── mafDuplicateFilter ├── Makefile ├── README.md └── src │ ├── mafDuplicateFilter.c │ └── test.mafDuplicateFilter.py ├── mafExtractor ├── Makefile ├── README.md └── src │ ├── allTests.c │ ├── mafExtractor.c │ ├── mafExtractor.h │ ├── mafExtractorAPI.c │ ├── mafExtractorAPI.h │ ├── test.mafExtractor.c │ ├── test.mafExtractor.h │ └── test.mafExtractor.py ├── mafFilter ├── Makefile ├── README.md └── src │ ├── mafFilter.c │ └── test.mafFilter.py ├── mafPairCoverage ├── Makefile ├── README.md └── src │ ├── allTests.c │ ├── mafPairCoverage.c │ ├── mafPairCoverage.h │ ├── mafPairCoverageAPI.c │ ├── mafPairCoverageAPI.h │ ├── test.mafPairCoverage.py │ ├── test.mafPairCoverageAPI.c │ └── test.mafPairCoverageAPI.h ├── mafPositionFinder ├── Makefile ├── README.md └── src │ ├── mafPositionFinder.c │ └── test.mafPositionFinder.py ├── mafRowOrderer ├── Makefile ├── README.md └── src │ ├── mafRowOrderer.c │ └── test.mafRowOrderer.py ├── mafSorter ├── Makefile ├── README.md └── src │ ├── mafSorter.c │ └── test.mafSorter.py ├── mafStats ├── Makefile ├── README.md └── src │ ├── allTests.c │ ├── mafStats.c │ ├── mafStats.h │ ├── test.mafStats.c │ └── test.mafStats.h ├── mafStrander ├── Makefile ├── README.md └── src │ ├── mafStrander.c │ └── test.mafStrander.py ├── mafToFastaStitcher ├── Makefile ├── README.md └── src │ ├── allTests.c │ ├── mafToFastaStitcher.c │ ├── mafToFastaStitcher.h │ ├── mafToFastaStitcherAPI.c │ ├── mafToFastaStitcherAPI.h │ ├── test.mafToFastaStitcher.py │ ├── test.mafToFastaStitcherAPI.c │ └── test.mafToFastaStitcherAPI.h ├── mafTransitiveClosure ├── Makefile ├── README.md └── src │ ├── allTests.c │ ├── mafTransitiveClosure.c │ ├── mafTransitiveClosure.h │ ├── test.mafTransitiveClosure.c │ ├── test.mafTransitiveClosure.h │ └── test.mafTransitiveClosure.py └── mafValidator ├── Makefile ├── README.md └── src ├── mafValidator.py └── test.mafValidator.py /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | *.o 3 | *.pyc 4 | *.a 5 | buildVersion* 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2009-2014 by 2 | Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 3 | Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 4 | Mark Diekhans (markd@soe.ucsc.edu) 5 | ... and other members of the Reconstruction Team of David Haussler's 6 | lab (BME Dept. UCSC). 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include inc/common.mk 2 | 3 | ############################## 4 | # These modules are dependent and are 5 | # only included if their depedencies exist! 6 | ifeq ($(wildcard ${sonLibPath}/../Makefile),) 7 | Comparator = 8 | TransitiveClosure = 9 | Stats = 10 | ToFasta = 11 | PairCoverage = 12 | Coverage = 13 | $(warning Because dependency ${sonLibPath} is missing mafComparator, mafTransitiveClosure, mafStats, mafToFastaStitcher, mafPairCoverage, mafCoverage will not be built / tested / cleaned. See README.md for information about dependencies.) 14 | else 15 | Comparator = mafComparator 16 | Stats = mafStats 17 | ToFasta = mafToFastaStitcher 18 | PairCoverage = mafPairCoverage 19 | Coverage = mafCoverage 20 | ifeq ($(wildcard ${sonLibPath}/stPinchesAndCacti.a),) 21 | TransitiveClosure = 22 | $(warning Because dependency ${sonLibPath}/pinchesAndCacti is missing mafTransitiveClosure will not be built / tested / cleaned. See README.md for information about dependencies.) 23 | else 24 | TransitiveClosure = mafTransitiveClosure 25 | endif # sonlib 26 | endif # pinches 27 | ############################## 28 | dependentModules= ${Comparator} ${TransitiveClosure} ${Stats} ${ToFasta} ${PairCoverage} ${Coverage} 29 | 30 | modules = lib ${dependentModules} mafValidator mafPositionFinder mafExtractor mafSorter mafDuplicateFilter mafFilter mafStrander mafRowOrderer 31 | 32 | .PHONY: all %.all clean %.clean test %.test 33 | .SECONDARY: 34 | 35 | all: ${modules:%=%.all} 36 | 37 | %.all: 38 | cd $* && make all 39 | 40 | clean: ${modules:%=%.clean} 41 | 42 | %.clean: 43 | cd $* && make clean 44 | 45 | test: ${modules:%=%.test} ${Warnings:%=%.warn} 46 | @echo 'mafTools tests complete.' 47 | 48 | %.test: 49 | cd $* && make test 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mafTools 2 | 3 | **mafTools** is a collection of tools that operate on Multiple Alignment Format ([maf](http://genome.ucsc.edu/FAQ/FAQformat.html#format5)) files. 4 | 5 | ## Authors 6 | [Dent Earl](https://github.com/dentearl/), [Benedict Paten](https://github.com/benedictpaten/), [Mark Diekhans](https://github.com/diekhans) 7 | 8 | ## Dependencies 9 | With the exception of the python dependencies, when a component is missing a dependency it will not be built, tested or cleaned by the Makefile. If the python dependencies are missing then some of the modules will fail to function and all of the modules' tests will fail. The sonLib and pinchesAndCacti dependencies should be built and placed in the same parent directory as mafTools. 10 | * [python 2.7](http://www.python.org/): all modules. 11 | * [scipy](http://www.scipy.org/) 12 | * [numpy](http://numpy.scipy.org/) 13 | * [sonLib](https://github.com/benedictpaten/sonLib/): mafComparator, mafStats, mafTransitiveClosure, mafToFastaStitcher, mafPairCoverage. 14 | * [pinchesAndCacti](https://github.com/benedictpaten/pinchesAndCacti): mafTransitiveClosure. 15 | 16 | ## Installation 17 | 0. Install dependencies. 18 | 1. Download or clone the mafTools package. Consider making it a sibling directory to sonLib/ and pinchesAndCacti. 19 | 2. cd into mafTools directory. 20 | 3. Type make. 21 | 22 | ## Components 23 | * **mafComparator** A program to compare two maf files by sampling. Useful when testing predicted alignments against known true alignments. 24 | * **mafCoverage** A program to calculate the amount of alignment coverage between a target sequence and all other sequences in a maf file. 25 | * **mafDuplicateFilter** A program to filter alignment blocks to remove duplicate species. One sequence per species is allowed to remain, chosen by comparing the sequence to the consensus for the block and computing a similarity bit score between the IUPAC formatted consensus and the sequence. The highest scoring duplicate stays, or in the case of ties, the sequence closest to the start of the file stays. 26 | * **mafExtractor** A program to extract all alignment blocks that contain a region in a particular sequence. Useful for isolating regions of interest in large maf files. 27 | * **mafFilter** A program to filter a maf based on sequence names. Can be used to include or exclude sequence names. Useful for removing extraneous sequences from maf files. 28 | * **mafPairCoverage** A program to compare the number of aligned positions between any pair of sequences within a maf file. Can use the * wildcard character to specify a species name. Can use a BED file to limit region of inspection to just intervals specified in the bed. Outputs total lengths of sequencs, number of aligned positions, percent coverage and in the case where a bed file was specified the number of bases within and outside of the region. 29 | * **mafPositionFinder** A program to search for a position in a particular sequence. Useful for determining where in maf a particular part of the alignment resides. 30 | * **mafRowOrderer** A program to order maf lines within blocks. Useful for moving a reference species to the top of all blocks. Species not specified in the ordering are automatically trimmed from the results. 31 | * **mafSorter** A program to sort all of the blocks in a MAF based on the (absolute) start position of one of the sequences. Blocks without the sequence are placed at the start of the output in their original order. 32 | * **mafStats** A program to read a maf file and report back summary statistics about the file contents. 33 | * **mafStrander** A program to enforce, when possible, a particular strandedness for blocks for a given species and strand orientation. 34 | * **mafToFastaStitcher** A program to convert a reference-based MAF file to a multiple sequence fasta. Requires both a .maf and a fasta containing complete sequences for all entries in the maf. 35 | * **mafTransitiveClosure** A program to perform the transitive closure on an alignment. That is it checks every column of the alignment and looks for situations where a position A is aligned to B in one part of a file and B is aligned to C in another part of the file. The transitive closure of this relationship would be a single column with A, B and C all present. Useful for when you have pairwise alignments and you wish to turn them into something more resembling a multiple alignment. 36 | * **mafValidator** A program to assess whether or not a given maf file's formatting is valid. 37 | 38 | ## External tools 39 | * mafTools internal tests use Asim Jalis' [CuTest](http://cutest.sourceforge.net/) C unit testing framework (included in external/). The license for CuTest is spelled out in external/license.txt. 40 | * mafTools internal tests will use [valgrind](http://www.valgrind.org/) __if__ installed on your system. 41 | 42 | ## How to Cite: 43 | Genome Res. 2014 Dec;24(12):2077-89. doi: 10.1101/gr.174920.114. Epub 2014 Oct 1. 44 | Alignathon: a competitive assessment of whole-genome alignment methods. 45 | -------------------------------------------------------------------------------- /external/CuTest.h: -------------------------------------------------------------------------------- 1 | #ifndef CU_TEST_H 2 | #define CU_TEST_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define CUTEST_VERSION "CuTest 1.5" 9 | 10 | /* CuString */ 11 | 12 | char* CuStrAlloc(int size); 13 | char* CuStrCopy(const char* old); 14 | 15 | #define CU_ALLOC(TYPE) ((TYPE*) malloc(sizeof(TYPE))) 16 | 17 | #define HUGE_STRING_LEN 8192 18 | #define STRING_MAX 256 19 | #define STRING_INC 256 20 | 21 | typedef struct 22 | { 23 | int length; 24 | int size; 25 | char* buffer; 26 | } CuString; 27 | 28 | void CuStringInit(CuString* str); 29 | CuString* CuStringNew(void); 30 | void CuStringRead(CuString* str, const char* path); 31 | void CuStringAppend(CuString* str, const char* text); 32 | void CuStringAppendChar(CuString* str, char ch); 33 | void CuStringAppendFormat(CuString* str, const char* format, ...); 34 | void CuStringInsert(CuString* str, const char* text, int pos); 35 | void CuStringResize(CuString* str, int newSize); 36 | void CuStringDelete(CuString* str); 37 | 38 | /* CuTest */ 39 | 40 | typedef struct CuTest CuTest; 41 | 42 | typedef void (*TestFunction)(CuTest *); 43 | 44 | struct CuTest 45 | { 46 | char* name; 47 | TestFunction function; 48 | int failed; 49 | int ran; 50 | const char* message; 51 | jmp_buf *jumpBuf; 52 | }; 53 | 54 | void CuTestInit(CuTest* t, const char* name, TestFunction function); 55 | CuTest* CuTestNew(const char* name, TestFunction function); 56 | void CuTestRun(CuTest* tc); 57 | void CuTestDelete(CuTest *t); 58 | 59 | /* Internal versions of assert functions -- use the public versions */ 60 | void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message); 61 | void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition); 62 | void CuAssertStrEquals_LineMsg(CuTest* tc, 63 | const char* file, int line, const char* message, 64 | const char* expected, const char* actual); 65 | void CuAssertIntEquals_LineMsg(CuTest* tc, 66 | const char* file, int line, const char* message, 67 | int expected, int actual); 68 | void CuAssertDblEquals_LineMsg(CuTest* tc, 69 | const char* file, int line, const char* message, 70 | double expected, double actual, double delta); 71 | void CuAssertPtrEquals_LineMsg(CuTest* tc, 72 | const char* file, int line, const char* message, 73 | void* expected, void* actual); 74 | ////////////////////////////////////////////////// 75 | // added by dent earl, dent.earl (a) gmail com 76 | void CuAssertUInt32Equals_LineMsg(CuTest* tc, 77 | const char* file, int line, const char* message, 78 | uint64_t expected, uint64_t actual); 79 | ////////////////////////////////////////////////// 80 | 81 | /* public assert functions */ 82 | 83 | #define CuFail(tc, ms) CuFail_Line( (tc), __FILE__, __LINE__, NULL, (ms)) 84 | #define CuAssert(tc, ms, cond) CuAssert_Line((tc), __FILE__, __LINE__, (ms), (cond)) 85 | #define CuAssertTrue(tc, cond) CuAssert_Line((tc), __FILE__, __LINE__, "assert failed", (cond)) 86 | 87 | #define CuAssertStrEquals(tc,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 88 | #define CuAssertStrEquals_Msg(tc,ms,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 89 | #define CuAssertIntEquals(tc,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 90 | #define CuAssertIntEquals_Msg(tc,ms,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 91 | #define CuAssertDblEquals(tc,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac),(dl)) 92 | #define CuAssertDblEquals_Msg(tc,ms,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac),(dl)) 93 | #define CuAssertPtrEquals(tc,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 94 | #define CuAssertPtrEquals_Msg(tc,ms,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 95 | ////////////////////////////////////////////////// 96 | // added by dent earl, dent.earl (a) gmail com 97 | #define CuAssertUInt32Equals(tc,ex,ac) CuAssertUInt32Equals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 98 | #define CuAssertUInt32Equals_Msg(tc,ex,ac) CuAssertUInt32Equals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 99 | ////////////////////////////////////////////////// 100 | 101 | #define CuAssertPtrNotNull(tc,p) CuAssert_Line((tc),__FILE__,__LINE__,"null pointer unexpected",(p != NULL)) 102 | #define CuAssertPtrNotNullMsg(tc,msg,p) CuAssert_Line((tc),__FILE__,__LINE__,(msg),(p != NULL)) 103 | 104 | /* CuSuite */ 105 | 106 | #define MAX_TEST_CASES 1024 107 | 108 | #define SUITE_ADD_TEST(SUITE,TEST) CuSuiteAdd(SUITE, CuTestNew(#TEST, TEST)) 109 | 110 | typedef struct 111 | { 112 | int count; 113 | CuTest* list[MAX_TEST_CASES]; 114 | int failCount; 115 | 116 | } CuSuite; 117 | 118 | 119 | void CuSuiteInit(CuSuite* testSuite); 120 | CuSuite* CuSuiteNew(void); 121 | void CuSuiteDelete(CuSuite *testSuite); 122 | void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase); 123 | void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2); 124 | void CuSuiteRun(CuSuite* testSuite); 125 | void CuSuiteSummary(CuSuite* testSuite, CuString* summary); 126 | void CuSuiteDetails(CuSuite* testSuite, CuString* details); 127 | 128 | #endif /* CU_TEST_H */ 129 | -------------------------------------------------------------------------------- /external/license.txt: -------------------------------------------------------------------------------- 1 | NOTE 2 | 3 | The license is based on the zlib/libpng license. For more details see 4 | http://www.opensource.org/licenses/zlib-license.html. The intent of the 5 | license is to: 6 | 7 | - keep the license as simple as possible 8 | - encourage the use of CuTest in both free and commercial applications 9 | and libraries 10 | - keep the source code together 11 | - give credit to the CuTest contributors for their work 12 | 13 | If you ship CuTest in source form with your source distribution, the 14 | following license document must be included with it in unaltered form. 15 | If you find CuTest useful we would like to hear about it. 16 | 17 | LICENSE 18 | 19 | Copyright (c) 2003 Asim Jalis 20 | 21 | This software is provided 'as-is', without any express or implied 22 | warranty. In no event will the authors be held liable for any damages 23 | arising from the use of this software. 24 | 25 | Permission is granted to anyone to use this software for any purpose, 26 | including commercial applications, and to alter it and redistribute it 27 | freely, subject to the following restrictions: 28 | 29 | 1. The origin of this software must not be misrepresented; you must not 30 | claim that you wrote the original software. If you use this software in 31 | a product, an acknowledgment in the product documentation would be 32 | appreciated but is not required. 33 | 34 | 2. Altered source versions must be plainly marked as such, and must not 35 | be misrepresented as being the original software. 36 | 37 | 3. This notice may not be removed or altered from any source 38 | distribution. 39 | -------------------------------------------------------------------------------- /inc/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef COMMON_H_ 26 | #define COMMON_H_ 27 | #include 28 | #include 29 | 30 | extern int g_verbose_flag; 31 | extern int g_debug_flag; 32 | extern const int kMaxStringLength; 33 | extern const int kMaxMessageLength; 34 | extern const int kMaxSeqName; 35 | 36 | void de_verbose(char const *fmt, ...); 37 | void de_debug(char const *fmt, ...); 38 | void* de_malloc(size_t n); 39 | int64_t de_getline(char **s, int64_t *n, FILE *f); 40 | FILE* de_fopen(const char *s, char const *mode); 41 | char* de_strdup(const char *s); 42 | char* de_strndup(const char *s, size_t n); 43 | void failBadFormat(void); 44 | void usageMessage(char shortopt, const char *name, const char *description); 45 | char* stringReplace(const char *string, const char a, const char b); 46 | int minint(int a, int b); 47 | char* de_strtok(char **s, char t); 48 | unsigned countChar(char *s, const char c); 49 | char** extractSubStrings(char *nameList, unsigned n, const char delineator); 50 | 51 | #endif // COMMON_H_ 52 | -------------------------------------------------------------------------------- /inc/common.mk: -------------------------------------------------------------------------------- 1 | # we do specific stuff for specific host for now. 2 | HOSTNAME = $(shell hostname) 3 | MACH = $(shell uname -m) 4 | SYS = $(shell uname -s) 5 | 6 | #C compiler 7 | ifeq (${SYS},FreeBSD) 8 | # default FreeBSD gcc (4.2.1) has warning bug 9 | # cxx = gcc46 -std=c99 -Wno-unused-but-set-variable 10 | cxx = gcc34 -std=c99 -Wno-unused-but-set-variable 11 | cpp = g++ 12 | lm = -lm 13 | else ifeq (${SYS},Darwin) # This is to deal with the Mavericks replacing gcc with clang fully 14 | cxx = clang -std=c99 -stdlib=libstdc++ 15 | cpp = clang++ -stdlib=libstdc++ 16 | lm = 17 | else 18 | cxx = gcc -std=c99 -Wno-unused-but-set-variable 19 | cpp = g++ 20 | lm = -lm 21 | endif 22 | 23 | # subset of JPL suggested flags (removed: -Wtraditional -Wcast-qual -Wconversion) 24 | jpl_flags = -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes 25 | 26 | #Release compiler flags 27 | cflags_opt = -O3 -Wall -Werror --pedantic -funroll-loops -DNDEBUG ${jpl_flags} 28 | 29 | #Debug flags (slow) 30 | cflags_dbg = -Wall -Werror --pedantic -g -fno-inline -DBEN_DEBUG ${jpl_flags} 31 | 32 | #Ultra Debug flags (really slow) 33 | cflags_ultraDbg = -Wall -Werror --pedantic -g -fno-inline -DBEN_DEBUG -BEN_ULTRA_DEBUG 34 | 35 | #Profile flags 36 | cflags_prof = -Wall -Werror --pedantic -pg -O3 -g 37 | 38 | sonLibPath = ../../sonLib/lib 39 | 40 | #Flags to use 41 | cflags = ${cflags_opt} -I ${sonLibPath} -I ../inc -I ../external 42 | testFlags = -O0 -g -Wall -Werror --pedantic -I ${sonLibPath} -I ../inc -I ../external 43 | #cflags = ${cflags_dbg} 44 | 45 | # location of Tokyo cabinet 46 | ifneq ($(wildcard /hive/groups/recon/local/include/tcbdb.h),) 47 | # hgwdev hive install 48 | tcPrefix = /hive/groups/recon/local 49 | tokyoCabinetIncl = -I ${tcPrefix}/include 50 | tokyoCabinetLib = -L${tcPrefix}/lib -Wl,-rpath,${tcPrefix}/lib -ltokyocabinet -lz -lbz2 -lpthread 51 | else ifneq ($(wildcard /opt/local/include/tcbdb.h),) 52 | # OS/X with TC installed from MacPorts 53 | tcPrefix = /opt/local 54 | tokyoCabinetIncl = -I ${tcPrefix}/include 55 | tokyoCabinetLib = -L${tcPrefix}/lib -Wl,-rpath,${tcPrefix}/lib -ltokyocabinet -lz -lbz2 -lpthread 56 | else ifneq ($(wildcard /usr/local/include/tcbdb.h),) 57 | # /usr/local install (FreeBSD, etc) 58 | tcPrefix = /usr/local 59 | tokyoCabinetIncl = -I ${tcPrefix}/include 60 | tokyoCabinetLib = -L ${tcPrefix}/lib -Wl,-rpath,${tcPrefix}/lib -ltokyocabinet -lz -lbz2 -lpthread 61 | else 62 | # default 63 | tokyoCabinetIncl = 64 | tokyoCabinetLib = -ltokyocabinet -lz -lbz2 -lpthread 65 | endif 66 | 67 | cflags += ${tokyoCabinetIncl} 68 | 69 | # location of mysql 70 | ifneq ($(wildcard /usr/include/mysql/mysql.h),) 71 | mysqlIncl = -I /usr/include/mysql -DHAVE_MYSQL=1 72 | ifneq ($(wildcard /usr/lib64/mysql/libmysqlclient.a),) 73 | mysqlLibs = /usr/lib64/mysql/libmysqlclient.a 74 | else 75 | mysqlLibs = /usr/lib/libmysqlclient.a 76 | endif 77 | else ifneq ($(wildcard /usr/local/mysql/include/mysql.h),) 78 | mysqlIncl = -I /usr/local/mysql/include -DHAVE_MYSQL=1 79 | mysqlLibs = -L/usr/local/mysql/lib -lmysqlclient 80 | endif 81 | 82 | # location of PostgreSQL 83 | ifneq ($(wildcard /usr/local/include/libpq-fe.h),) 84 | pgsqlIncl = -I /usr/local/include -DHAVE_POSTGRESQL=1 85 | pgsqlLibs = -L /usr/local/lib -lpq 86 | else ifneq ($(wildcard /usr/include/libpq-fe.h),) 87 | pgsqlIncl = -DHAVE_POSTGRESQL=1 88 | pgsqlLibs = /usr/lib64/libpq.a -lkrb5 -lgssapi -lcrypto -lssl -lcrypt -lldap 89 | endif 90 | 91 | dblibs = ${tokyoCabinetLib} ${mysqlLibs} ${pgsqlLibs} 92 | -------------------------------------------------------------------------------- /inc/sharedMaf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef SHAREDMAF_H_ 26 | #define SHAREDMAF_H_ 27 | #include 28 | #include 29 | 30 | typedef struct mafFileApi mafFileApi_t; 31 | typedef struct mafBlock mafBlock_t; 32 | typedef struct mafLine mafLine_t; 33 | 34 | // creators, destroyers 35 | mafFileApi_t* maf_newMfa(const char *filename, char const *mode); 36 | mafBlock_t* maf_newMafBlock(void); 37 | mafBlock_t* maf_newMafBlockFromString(const char *s, uint64_t lineNumber); 38 | mafBlock_t* maf_newMafBlockListFromString(const char *s, uint64_t lineNumber); 39 | mafLine_t* maf_newMafLine(void); 40 | mafLine_t* maf_newMafLineFromString(const char *s, uint64_t lineNumber); 41 | mafBlock_t* maf_copyMafBlock(mafBlock_t *orig); 42 | mafBlock_t* maf_copyMafBlockList(mafBlock_t *orig); 43 | mafLine_t* maf_copyMafLine(mafLine_t *orig); 44 | mafLine_t* maf_copyMafLineList(mafLine_t *orig); 45 | void maf_destroyMafLineList(mafLine_t *ml); 46 | void maf_destroyMafBlockList(mafBlock_t *mb); 47 | void maf_destroyMfa(mafFileApi_t *mfa); 48 | void maf_mafBlock_destroySequenceMatrix(char **mat, unsigned n); 49 | // read / write 50 | mafBlock_t* maf_readAll(mafFileApi_t *mfa); 51 | mafBlock_t* maf_readBlock(mafFileApi_t *mfa); 52 | mafBlock_t* maf_readBlockHeader(mafFileApi_t *mfa); 53 | mafBlock_t* maf_readBlockBody(mafFileApi_t *mfa); 54 | void maf_writeAll(mafFileApi_t *mfa, mafBlock_t *mb); 55 | void maf_writeBlock(mafFileApi_t *mfa, mafBlock_t *mb); 56 | uint64_t maf_mafFileApi_getLineNumber(mafFileApi_t *mfa); 57 | // getters 58 | char* maf_mafFileApi_getFilename(mafFileApi_t *mfa); 59 | uint64_t maf_mafFileApi_getLineNumber(mafFileApi_t *mfa); 60 | mafLine_t* maf_mafBlock_getHeadLine(mafBlock_t *mb); 61 | mafLine_t* maf_mafBlock_getTailLine(mafBlock_t *mb); 62 | uint64_t maf_mafBlock_getLineNumber(mafBlock_t *mb); 63 | uint64_t maf_mafBlock_getNumberOfLines(mafBlock_t *b); 64 | uint64_t maf_mafBlock_getNumberOfSequences(mafBlock_t *b); 65 | char* maf_mafBlock_getStrandArray(mafBlock_t *mb); 66 | int* maf_mafBlock_getStrandIntArray(mafBlock_t *mb); 67 | uint64_t* maf_mafBlock_getPosCoordStartArray(mafBlock_t *mb); 68 | uint64_t* maf_mafBlock_getPosCoordLeftArray(mafBlock_t *mb); 69 | uint64_t* maf_mafBlock_getStartArray(mafBlock_t *mb); 70 | uint64_t* maf_mafBlock_getSourceLengthArray(mafBlock_t *mb); 71 | uint64_t* maf_mafBlock_getSequenceLengthArray(mafBlock_t *mb); 72 | char** maf_mafBlock_getSpeciesArray(mafBlock_t *mb); 73 | mafBlock_t* maf_mafBlock_getNext(mafBlock_t *mb); 74 | char** maf_mafBlock_getSequenceMatrix(mafBlock_t *mb, unsigned n, unsigned m); 75 | mafLine_t** maf_mafBlock_getMafLineArray_seqOnly(mafBlock_t *mb); 76 | uint64_t maf_mafBlock_getSequenceFieldLength(mafBlock_t *mb); 77 | char* maf_mafLine_getLine(mafLine_t *ml); 78 | uint64_t maf_mafLine_getLineNumber(mafLine_t *ml); 79 | char maf_mafLine_getType(mafLine_t *ml); 80 | char* maf_mafLine_getSpecies(mafLine_t *ml); 81 | uint64_t maf_mafLine_getStart(mafLine_t *ml); 82 | uint64_t maf_mafLine_getLength(mafLine_t *ml); 83 | char maf_mafLine_getStrand(mafLine_t *ml); 84 | uint64_t maf_mafLine_getSourceLength(mafLine_t *ml); 85 | char* maf_mafLine_getSequence(mafLine_t *ml); 86 | uint64_t maf_mafLine_getSequenceFieldLength(mafLine_t *ml); 87 | mafLine_t* maf_mafLine_getNext(mafLine_t *ml); 88 | // setters 89 | void maf_mafBlock_setHeadLine(mafBlock_t *mb, mafLine_t *ml); 90 | void maf_mafBlock_setTailLine(mafBlock_t *mb, mafLine_t *ml); 91 | void maf_mafBlock_setNumberOfLines(mafBlock_t *mb, uint64_t n); 92 | void maf_mafBlock_incrementNumberOfLines(mafBlock_t *mb); 93 | void maf_mafBlock_decrementNumberOfLines(mafBlock_t *mb); 94 | void maf_mafBlock_setNumberOfSequences(mafBlock_t *mb, uint64_t n); 95 | void maf_mafBlock_incrementNumberOfSequences(mafBlock_t *mb); 96 | void maf_mafBlock_decrementNumberOfSequences(mafBlock_t *mb); 97 | void maf_mafBlock_setLineNumber(mafBlock_t *mb, uint64_t n); 98 | void maf_mafBlock_incrementLineNumber(mafBlock_t *mb); 99 | void maf_mafBlock_decrementLineNumber(mafBlock_t *mb); 100 | void maf_mafBlock_setSequenceFieldLength(mafBlock_t *mb, uint64_t sfl); 101 | void maf_mafBlock_setNext(mafBlock_t *mb, mafBlock_t *next); 102 | void maf_mafBlock_appendToAlignmentBlock(mafBlock_t *m, char *s); 103 | void maf_mafLine_setLine(mafLine_t *ml, char *line); 104 | void maf_mafLine_setLineNumber(mafLine_t *ml, uint64_t n); 105 | void maf_mafLine_setType(mafLine_t *ml, char c); 106 | void maf_mafLine_setSpecies(mafLine_t *ml, char *s); 107 | void maf_mafLine_setStrand(mafLine_t *ml, char c); 108 | void maf_mafLine_setStart(mafLine_t *ml, uint64_t n); 109 | void maf_mafLine_setLength(mafLine_t *ml, uint64_t n); 110 | void maf_mafLine_setSourceLength(mafLine_t *ml, uint64_t n); 111 | void maf_mafLine_setSequence(mafLine_t *ml, char *s); 112 | void maf_mafLine_setNext(mafLine_t *ml, mafLine_t *next); 113 | // utilities 114 | unsigned maf_mafBlock_getNumberOfBlocks(mafBlock_t *b); 115 | bool maf_mafBlock_containsSequence(mafBlock_t *m); 116 | char* maf_mafLine_imputeLine(mafLine_t* ml); 117 | uint64_t maf_mafLine_getNumberOfSequences(mafLine_t *m); 118 | uint64_t maf_mafLine_getPositiveCoord(mafLine_t *ml); 119 | uint64_t maf_mafLine_getPositiveLeftCoord(mafLine_t *ml); 120 | unsigned umax(unsigned a, unsigned b); 121 | uint64_t countNonGaps(char *seq); 122 | void maf_mafBlock_flipStrand(mafBlock_t *mb); 123 | void reverseComplementSequence(char *s, size_t n); // in-place reverse complement 124 | void complementSequence(char *s, size_t n); 125 | char complementChar(char c); 126 | char *copySpeciesName(const char *s); // hg18.chr1 -> hg18 127 | char *copyChromosomeName(const char *s); // hg18.chr1 -> chr1 128 | // print 129 | void maf_mafBlock_printList(mafBlock_t *m); 130 | void maf_mafBlock_print(mafBlock_t *m); 131 | #endif // SHAREDMAF_H_ 132 | -------------------------------------------------------------------------------- /inc/test.common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_COMMON_H_ 26 | #define TEST_COMMON_H_ 27 | #include 28 | #include "CuTest.h" 29 | #include "common.h" 30 | 31 | static void test_de_malloc(CuTest *testCase) { 32 | assert(testCase != NULL); 33 | char *t = (char *) de_malloc(100); 34 | CuAssertTrue(testCase, t != NULL); 35 | for (int i = 0; i < 100; ++i) { 36 | t[i] = 0; 37 | CuAssertIntEquals(testCase, t[i], 0); 38 | } 39 | free(t); 40 | } 41 | 42 | CuSuite* common_TestSuite(void) { 43 | CuSuite* suite = CuSuiteNew(); 44 | SUITE_ADD_TEST(suite, test_de_malloc); 45 | return suite; 46 | } 47 | 48 | #endif // TEST_COMMON_H_ 49 | -------------------------------------------------------------------------------- /inc/test.sharedMaf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_SHAREDMAF_H_ 26 | #define TEST_SHAREDMAF_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include "CuTest.h" 35 | #include "common.h" 36 | #include "sharedMaf.h" 37 | 38 | int createTmpFolder(void); 39 | void writeStringToTmpFile(char *s); 40 | bool filesAreIdentical(char *fileA, char *fileB); 41 | 42 | CuSuite* mafShared_TestSuite(void); 43 | #endif // TEST_SHAREDMAF_H_ 44 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash 2 | include ../inc/common.mk 3 | .SECONDARY: 4 | .PHONY: all clean test 5 | 6 | cc = gcc 7 | args = -std=c99 -O3 -Wextra -Wall -Werror -pedantic -I ../external/ -I ../inc/ 8 | inc = ../inc 9 | 10 | objects = common.o sharedMaf.o ../external/CuTest.a 11 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a 12 | 13 | all: ${objects} 14 | 15 | clean: 16 | rm -f allTests *.o *.pyc 17 | 18 | allTests: allTests.c ${inc}/test.sharedMaf.h test.sharedMaf.c ${testObjects} 19 | mkdir -p test 20 | ${cc} -g -O0 ${args} allTests.c test.sharedMaf.c ${testObjects} -o $@.tmp ${lm} 21 | mv $@.tmp $@ 22 | 23 | %.o: %.c ${inc}/%.h 24 | ${cc} -O3 -c ${args} $< -o $@.tmp 25 | mv $@.tmp $@ 26 | 27 | sharedMaf.o: sharedMaf.c ${inc}/sharedMaf.h 28 | ${cc} -O3 -c ${args} sharedMaf.c -o $@.tmp ${lm} 29 | mv $@.tmp $@ 30 | 31 | test/%.o: %.c ${inc}/%.h 32 | mkdir -p $(dir $@) 33 | ${cc} -g -O0 -c ${args} $< -o $*.tmp ${lm} 34 | mv $*.tmp $@ 35 | 36 | test/sharedMaf.o: sharedMaf.c ${inc}/sharedMaf.h 37 | mkdir -p $(dir $@) 38 | ${cc} -g -O0 -c ${args} sharedMaf.c -o $@.tmp ${lm} 39 | mv $@.tmp $@ 40 | 41 | test: allTests 42 | ./allTests && python2.7 test.sharedMaf.py --verbose && rm -rf ./allTests ./test ./test_tmp 43 | 44 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 45 | ${cc} -c ${args} $< 46 | ar rc CuTest.a CuTest.o 47 | ranlib CuTest.a 48 | rm -f CuTest.o 49 | mv CuTest.a $@ 50 | -------------------------------------------------------------------------------- /lib/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #include 26 | #include 27 | #include "CuTest.h" 28 | #include "test.common.h" 29 | #include "test.sharedMaf.h" 30 | 31 | CuSuite* mafShared_TestSuite(void); 32 | 33 | int include_RunAllTests(void) { 34 | CuString *output = CuStringNew(); 35 | CuSuite *suite = CuSuiteNew(); 36 | CuSuite *common_s = common_TestSuite(); 37 | CuSuite *maf_s = mafShared_TestSuite(); 38 | CuSuiteAddSuite(suite, common_s); 39 | CuSuiteAddSuite(suite, maf_s); 40 | CuSuiteRun(suite); 41 | CuSuiteSummary(suite, output); 42 | CuSuiteDetails(suite, output); 43 | printf("%s\n", output->buffer); 44 | CuStringDelete(output); 45 | int status = (suite->failCount > 0); 46 | free(common_s); 47 | free(maf_s); 48 | CuSuiteDelete(suite); 49 | return status; 50 | } 51 | int main(void) { 52 | return include_RunAllTests(); 53 | } 54 | -------------------------------------------------------------------------------- /lib/createVersionSources.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | import os 3 | import subprocess 4 | import sys 5 | import time 6 | sys.path.append( 7 | os.path.abspath( 8 | os.path.join(os.path.dirname(sys.argv[0]), '../../inc/'))) 9 | import mafToolsTest as mtt 10 | 11 | BOILERPLATE = '''/* 12 | * Copyright (C) 2009-2014 by 13 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 14 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 15 | * Mark Diekhans (markd@soe.ucsc.edu) 16 | * ... and other members of the Reconstruction Team of David Haussler's 17 | * lab (BME Dept. UCSC). 18 | * 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy 20 | * of this software and associated documentation files (the "Software"), to deal 21 | * in the Software without restriction, including without limitation the rights 22 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 23 | * copies of the Software, and to permit persons to whom the Software is 24 | * furnished to do so, subject to the following conditions: 25 | * 26 | * The above copyright notice and this permission notice shall be included in 27 | * all copies or substantial portions of the Software. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 34 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 35 | * THE SOFTWARE. 36 | */ 37 | ''' 38 | GIT = mtt.which('git') 39 | 40 | 41 | def runCommand(cmd): 42 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) 43 | pout, perr = p.communicate() 44 | mtt.handleReturnCode(p.returncode, cmd) 45 | return pout 46 | 47 | 48 | def getBranch(): 49 | branchList = runCommand([GIT, 'branch']).split('\n') 50 | for b in branchList: 51 | if b.startswith('* '): 52 | return b[2:] 53 | 54 | 55 | def getSha(): 56 | return runCommand([GIT, 'rev-parse', 'HEAD']).strip() 57 | 58 | 59 | def writeHeader(location): 60 | f = open(os.path.join(location, 'buildVersion.h'), 'w') 61 | f.write(BOILERPLATE) 62 | f.write('#ifndef _BUILD_VERSION_H_\n') 63 | f.write('#define _BUILD_VERSION_H_\n') 64 | f.write('extern const char g_build_date[];\n') 65 | f.write('extern const char g_build_git_branch[];\n') 66 | f.write('extern const char g_build_git_sha[];\n') 67 | f.write('#endif // _BUILD_VERSION_H_\n') 68 | f.close() 69 | 70 | 71 | def writeSource(location, buildDate, buildBranch, buildSha): 72 | f = open(os.path.join(location, 'buildVersion.c'), 'w') 73 | f.write(BOILERPLATE) 74 | f.write('#include "buildVersion.h"\n\n') 75 | f.write('const char g_build_date[] = "%s";\n' % buildDate) 76 | f.write('const char g_build_git_branch[] = "%s";\n' % buildBranch) 77 | f.write('const char g_build_git_sha[] = "%s";\n' % buildSha) 78 | f.close() 79 | 80 | 81 | def main(): 82 | if GIT is None: 83 | raise RuntimeError('Error, unable to locate git, is it installed?') 84 | location = os.path.join(os.curdir, 'src') 85 | buildDate = time.strftime('%Y-%m-%dT%H:%M%Z', time.localtime()) # gmtime() 86 | buildBranch = getBranch() 87 | buildSha = getSha() 88 | writeHeader(location) 89 | writeSource(location, buildDate, buildBranch, buildSha) 90 | 91 | 92 | if __name__ == '__main__': 93 | main() 94 | -------------------------------------------------------------------------------- /lib/test.sharedMaf.py: -------------------------------------------------------------------------------- 1 | ################################################## 2 | # Copyright (C) 2012 by 3 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | # ... and other members of the Reconstruction Team of David Haussler's 5 | # lab (BME Dept. UCSC). 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | ################################################## 25 | import os 26 | import sys 27 | import unittest 28 | import mafToolsTest as mtt 29 | 30 | class SharedMafLibraryTest(unittest.TestCase): 31 | def testMemory(self): 32 | """ sharedMaf.h should be memory clean. 33 | """ 34 | mtt.makeTempDirParent() 35 | valgrind = mtt.which('valgrind') 36 | if valgrind is None: 37 | return 38 | tmpDir = os.path.abspath(mtt.makeTempDir('allTests')) 39 | cmd = mtt.genericValgrind(tmpDir) 40 | cmd.append(os.path.abspath(os.path.join(os.curdir, 'allTests'))) 41 | mtt.runCommandsS([cmd], tmpDir) 42 | self.assertTrue(mtt.noMemoryErrors(os.path.join(tmpDir, 'valgrind.xml'))) 43 | mtt.removeDir(tmpDir) 44 | 45 | if __name__ == '__main__': 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /mafComparator/.gitignore: -------------------------------------------------------------------------------- 1 | tempTestFiles/* 2 | test/* 3 | src/buildVersion.* 4 | -------------------------------------------------------------------------------- /mafComparator/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2009-2013 by 2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 4 | # Mark Diekhans (markd@soe.ucsc.edu) 5 | # ... and other members of the Reconstruction Team of David Haussler's 6 | # lab (BME Dept. UCSC). 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | include ../inc/common.mk 27 | binPath = ../bin 28 | dependencies = $(wildcard ../inc/common.*) $(wildcard ../lib/common.*) $(wildcard ../inc/sharedMaf.*) $(wildcard ../lib/sharedMaf.*) $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a ${sonLibPath}/stPinchesAndCacti.a src/allTests.c 29 | extraAPI = src/cString.c ../lib/sharedMaf.o ../external/CuTest.a ../lib/common.o src/comparatorRandom.o src/comparatorAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o 30 | testAPI = src/cString.c test/sharedMaf.o ../external/CuTest.a test/common.o test/comparatorRandom.o test/comparatorAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o 31 | progs = $(foreach f, mafComparator mafPairCounter, ${binPath}/$f) 32 | testObjects = test/test.comparatorAPI.o test/test.comparatorRandom.o 33 | sources = $(foreach f, comparatorAPI cString comparatorRandom test.comparatorAPI test.comparatorRandom, src/$f.c) src/allTests.c src/mafComparator.c src/mafPairCounter.c src/testRand.c 34 | 35 | .PHONY: all clean test buildVersion 36 | 37 | all: buildVersion ${progs} 38 | buildVersion: src/buildVersion.c 39 | src/buildVersion.c: ${sources} ${dependecies} 40 | @python ../lib/createVersionSources.py 41 | 42 | ../lib/%.o: ../lib/%.c ../inc/%.h 43 | cd ../lib/ && make 44 | 45 | ${binPath}/%: src/%.c ${extraAPI} 46 | @mkdir -p $(dir $@) 47 | ${cxx} -o $@.tmp $^ ${cflags} ${lm} 48 | mv $@.tmp $@ 49 | 50 | test/%: src/%.c ${testAPI} $(wildcard src/*.h) 51 | @mkdir -p $(dir $@) 52 | ${cxx} -o $@.tmp $^ ${testFlags} ${lm} 53 | mv $@.tmp $@ 54 | 55 | ${binPath}/%.py: src/%.py 56 | @mkdir -p $(dir $@) 57 | cp $< $@.tmp 58 | chmod +x $@.tmp 59 | mv $@.tmp $@ 60 | 61 | %.o: %.c %.h 62 | ${cxx} -c $< -o $@.tmp ${cflags} 63 | mv $@.tmp $@ 64 | test/%.o: ../lib/%.c ../inc/%.h 65 | mkdir -p $(dir $@) 66 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm} 67 | mv $@.tmp $@ 68 | test/test.comparatorAPI.o: src/test.comparatorAPI.c src/test.comparatorAPI.h test/comparatorAPI.o 69 | mkdir -p $(dir $@) 70 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm} 71 | mv $@.tmp $@ 72 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a 73 | mkdir -p $(dir $@) 74 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm} 75 | mv $@.tmp $@ 76 | 77 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 78 | ${cxx} -c $< ${cflags} 79 | ar rc CuTest.a CuTest.o 80 | ranlib CuTest.a 81 | rm -f CuTest.o 82 | mv CuTest.a $@ 83 | 84 | test: buildVersion test/allTests test/mafComparator test/testRand 85 | ./test/allTests && python2.7 src/test.mafComparator.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir 86 | 87 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a 88 | mkdir -p $(dir $@) 89 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm} 90 | mv $@.tmp $@ 91 | 92 | # to actually use the testRand program, comment out the rm -rf on the "test:" rule and run "make test", 93 | # then you may run test/testRand 94 | test/testRand: src/testRand.c ${testAPI} ${sonLibPath}/sonLib.a 95 | mkdir -p $(dir $@) 96 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm} 97 | mv $@.tmp $@ 98 | 99 | clean: 100 | rm -f *.o ${progs} src/*.o && rm -rf ./test/ src/buildVersion.c src/buildVersion.h 101 | -------------------------------------------------------------------------------- /mafComparator/README.md: -------------------------------------------------------------------------------- 1 | # mafComparator 2 | 3 | February 2011 -- August 2012 4 | 5 | ## Authors 6 | 7 | [Dent Earl](https://github.com/dentearl/), [Benedict Paten](https://github.com/benedictpaten/) 8 | 9 | ## Description 10 | This program takes two [MAF](http://genome.ucsc.edu/FAQ/FAQformat#format5) files and compares them to one another. 11 | Specifically, for each ordered pair of sequences in the first MAF it 12 | samples a predefined number of sample homology tests (see below), then 13 | reads the second MAF checking to see which, if any, of the sampled pairs, 14 | is present. The comparison is then reversed and repeated. Statistics are 15 | then reported in an XML formatted file. MafComparator is suitable for 16 | running over very large alignments (those with many positions), because 17 | it does not attempt to hold everything in memory but instead takes a 18 | sampling approach. 19 | 20 | For two sets of pairwise alignments, **A** and **B**, a homology test is 21 | defined as follows. Pick a pair of aligned positions in **A**, called a 22 | homology pair -- the **AB** homology test returns _true_ if the pair is present in **B**, 23 | otherwise it returns _false_. The set of possible homology tests for the 24 | ordered pair (**A**, **B**) is not necessarily equivalent to the set of 25 | possible (**B**, **A**) homology tests. We call the proportion of _true_ tests 26 | (as a percentage of the total of a set of **C** many homology tests), from 27 | (**A**, **B**) **A~B**. 28 | 29 | If **A** is the set of true pairwise alignments and **B** the predicted set of 30 | alignments then **A~B** (over large enough **C**), is a proxy to 31 | [_sensitivity_](http://en.wikipedia.org/wiki/Sensitivity_and_specificity) 32 | of **B** in predicted the set of correctly aligned pairs in **A**. Conversely 33 | **B~A** (over large enough **C**) is a proxy to the 34 | [_specificity_](http://en.wikipedia.org/wiki/Sensitivity_and_specificity) of the 35 | aligned pairs in **B** with respect to the set of correctly aligned pairs 36 | in **A**. 37 | 38 | ## Dependencies 39 | * sonLib https://github.com/benedictpaten/sonLib/ 40 | 41 | ## Installation 42 | 1. Download the package. Consider making the parent of mafComparator a sibling directory to sonLib. 43 | 2. cd into the directory. 44 | 3. Type make. 45 | 46 | ## Use 47 | mafComparator --maf1=FILE1 --maf2=FILE2 --out=OUT.xml [options] 48 | 49 | ### Options 50 | * mafComparator, version 0.6 July 2012 51 | * -a --logLevel : Set the log level. [off, critical, info, debug] in ascending order 52 | * --maf1 : The location of the first MAF file. If comparing true to predicted alignments, this is the truth. 53 | * --maf2 : The location of the second MAF file. 54 | * --out : The output XML formatted results file. 55 | * --samples : The ideal number of sample homology tests to perform for the two comparisons (i.e. file1 -> file and file2 -> file1). This number is an ideal because pairs are sampled and thus the actual number may be slightly higher or slightly lower than this value. If this value is equal to or greater than the total number of pairs in a file, then all pairs will be tested. [default 1000000] 56 | * -g --near : The number of bases in either sequence to allow a match to slip by. I.e. --near=n (where _n_ is a non-negative integer) will consider a homology test for a given pair (**S1**:_x_, **S2**:_y_) where **S1** and **S2** are sequences and _x_ and _y_ are positions in the respective sequences, to be a true homology test so long as there is a pair within the other alignment (**S1**:_w_, **S2**:_z_) where EITHER (_w_ is equal to _x_ and _y_ - _n_ <= _z_ <= _y_ + _n_) OR (_x_ - _n_ <= _w_ <= _x_ + _n_ and _y_ is equal to _z_). 57 | * --bedFiles : The location of bed file(s) used to filter the pairwise comparisons. Comma separated list. 58 | * --wigglePairs : The key-value paired names of sequences (comma separated pairs, colon separeted key values)to create output that isolates event counts to specific regions of one genome (the first genome in the pair). The asterisk, \*, can be used as wildcard character. i.e. hg19\*:mm9\* will match hg19.chr1 and mm9.chr1 etc etc resulting in all pairs between hg19\* and mm9\*. This feature ignores any intervals described with the --bedFiles option. 59 | * --wiggleRegionStart : The starting base (inclusive) of the sub-region to analyze. Do not set if you wish to use the entire sequence. 60 | * --wiggleRegionStop : The ending base (inclusive) of the sub-region to analyze. Do not set if you wish to use the entire sequence. 61 | * --wiggleBinLength : The length of the bins when the --wigglePairs option is invoked. [default: 100000] 62 | * --numberOfPairs : A pair of comma separated positive integers representing the total number of pairs in maf1 and maf2 (in that order). These numbers are double checked by mafComparator as it runs, a discrpency will cause an error. If these values are known prior to the analysis (either because the analysis has been run before or by use of the mafPairCounter program) this option provides about a 15% speedup. Example: --numberOfPairs 2847390129,228470192212 63 | * --legitSequences : A list of comma separated key value pairs, which themselves are colon (:) separated. Each pair is a sequence name and source length. These values are normally determined by reading all sequences and source lengths from maf1 and then again from maf2 and then finding the intersection of the two sets. The source lengths are verified by mafComparator is it runs and discrepncies will cause errors. If this option is invoked it can result in a speedup of about 15%. Example: --legitSequences apple.chr1:100,apple.chr2:102,pineapple.chr1:2010 64 | * -s --seed : An integer to seed the random number generator. Omitting this causes the seed to be pseudorandom (via time() and getpid()). The seed value is always stored in the output xml. 65 | * -v --version : Print current version number. 66 | * -h --help : Print this help screen. 67 | 68 | ## Example 69 | Two mafs are included in the example/ directory and can be compared using the command: 70 | 71 | $ mafComparator --maf1 example/a.maf --maf2 example/b.maf --out comparison_a-b.xml 72 | 73 | You may note in the output that there are no comparisons for the sequences that are found only in b.maf, i.e. sequences D, E and F. The hash of sequence names used for comparisons is populated using the intersection of the sequence names from the --maf1 and --maf2 inputs. Sequences that only appear in --maf1 or only appear in --maf2 input are ignored. 74 | -------------------------------------------------------------------------------- /mafComparator/example/a.maf: -------------------------------------------------------------------------------- 1 | ##maf version=1 scoring=tba.v8 2 | 3 | a score=23262.0 4 | s A.chr0 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG 5 | s B.chr1 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG 6 | s C.chr1 28741140 38 + 161576975 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG 7 | 8 | a score=5062.0 9 | s A.chr0 241163 6 + 4622798 TAAAGA 10 | s C.chr1 28862317 6 + 161576975 TAAAGA 11 | 12 | a score=6636.0 13 | s A.chr0 249182 13 + 4622798 gcagctgaaaaca 14 | s C.chr1 28869787 13 + 161576975 gcagctgaaaaca 15 | 16 | -------------------------------------------------------------------------------- /mafComparator/example/b.maf: -------------------------------------------------------------------------------- 1 | ##maf version=1 scoring=tba.v8 2 | 3 | a score=23262.0 4 | s B.chr1 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG 5 | s C.chr1 28741140 38 + 161576975 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG 6 | s D.chr0 28741140 35 + 161576975 AAA----AATGTTAACCAAATGA---ATTGTCTCTTACGGTG 7 | 8 | a score=5062.0 9 | s A.chr0 241163 6 + 4622798 TAA----AGA 10 | s C.chr1 28862317 6 + 161576975 TAA----AGA 11 | s E.chr9 500000 10 + 100000000 TAACCCCAGA 12 | 13 | a score=6636.0 14 | s A.chr0 249182 13 + 4622798 gcagctgaaaaca 15 | s C.chr1 28869787 13 + 161576975 gcagctgaaaaca 16 | s F.chr5 800000 5 + 20000000 gca--------ca 17 | 18 | -------------------------------------------------------------------------------- /mafComparator/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComparativeGenomicsToolkit/mafTools/259e5b47fa2ee17ff5ad1bba9cebf2992cbb7228/mafComparator/src/__init__.py -------------------------------------------------------------------------------- /mafComparator/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | #include "comparatorAPI.h" 30 | #include "test.comparatorAPI.h" 31 | #include "test.comparatorRandom.h" 32 | 33 | CuSuite* comparatorAPI_TestSuite(void); 34 | CuSuite* comparatorRandom_TestSuite(void); 35 | 36 | int comparator_RunAllTests(void) { 37 | CuString *output = CuStringNew(); 38 | CuSuite *suite = CuSuiteNew(); 39 | CuSuite *comparatorAPI_s = comparatorAPI_TestSuite(); 40 | CuSuite *comparatorRandom_s = comparatorRandom_TestSuite(); 41 | CuSuiteAddSuite(suite, comparatorAPI_s); 42 | CuSuiteAddSuite(suite, comparatorRandom_s); 43 | CuSuiteRun(suite); 44 | CuSuiteSummary(suite, output); 45 | CuSuiteDetails(suite, output); 46 | printf("%s\n", output->buffer); 47 | CuStringDelete(output); 48 | int status = (suite->failCount > 0); 49 | free(comparatorAPI_s); 50 | free(comparatorRandom_s); 51 | CuSuiteDelete(suite); 52 | return status; 53 | } 54 | int main(void) { 55 | return comparator_RunAllTests(); 56 | } 57 | -------------------------------------------------------------------------------- /mafComparator/src/cString.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2009-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 5 | * Mark Diekhans (markd@soe.ucsc.edu) 6 | * ... and other members of the Reconstruction Team of David Haussler's 7 | * lab (BME Dept. UCSC). 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to deal 11 | * in the Software without restriction, including without limitation the rights 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | * THE SOFTWARE. 26 | */ 27 | 28 | 29 | #include "cString.h" 30 | 31 | /* 32 | * Comparison function to sort strings alphabetically 33 | */ 34 | int cStr_compare(const void *a, const void *b) { 35 | const char **ia = (const char **)a; 36 | const char **ib = (const char **)b; 37 | return strcmp(*ia, *ib); 38 | } 39 | 40 | /* 41 | * Comparison function to sort strings in descending order 42 | */ 43 | int cStr_compareDesc(const void *a, const void *b) { 44 | const char **ia = (const char **)a; 45 | const char **ib = (const char **)b; 46 | return -1 * strcmp(*ia, *ib); 47 | } 48 | 49 | /* 50 | * In-place substitution to lower-case string 51 | */ 52 | void cStr_lowerCase(char *string) { 53 | char *p; 54 | for (p=string; *p != '\0'; p++) { 55 | *p = tolower(*p); 56 | } 57 | } 58 | 59 | /* 60 | * In-place substitution to upper-case string 61 | */ 62 | void cStr_upperCase(char *string) { 63 | char *p; 64 | for (p=string; *p != '\0'; p++) { 65 | *p = toupper(*p); 66 | } 67 | } 68 | 69 | /* 70 | * Check if "string" starts with "query" and ignores case 71 | * if "ignorecase" == 1 72 | */ 73 | int cStr_startsWith(char *string, char *query, int ignorecase) { 74 | assert(strlen(string) > 0); 75 | assert(strlen(query) > 0); 76 | 77 | int i = 0; 78 | while(1) { 79 | if (query[i] == '\0') { 80 | return 1; 81 | } 82 | if (ignorecase) { 83 | if (tolower(string[i]) != tolower(query[i])) { 84 | return 0; 85 | } 86 | } else { 87 | if (string[i] != query[i]) { 88 | return 0; 89 | } 90 | } 91 | i++; 92 | } 93 | } 94 | 95 | int64_t cStr_getIntLength(int64_t n) { 96 | int64_t count = 0; 97 | do { 98 | count++; 99 | } while ((n /= 10) > 0); 100 | 101 | if (n < 0) { 102 | count++; 103 | } 104 | return count; 105 | } 106 | 107 | /* reverse: reverse string s in place */ 108 | void cStr_reverse(char *s) { 109 | int i, j; 110 | char c; 111 | 112 | for (i = 0, j = strlen(s) - 1; i < j; i++, j--) { 113 | c = s[i]; 114 | s[i] = s[j]; 115 | s[j] = c; 116 | } 117 | } 118 | 119 | /* itoa: convert n to characters in s */ 120 | void cStr_itoa(int n, char *s) { 121 | int i, sign; 122 | 123 | if ((sign = n) < 0) /* record sign */ 124 | n = -n; /* make n positive */ 125 | i = 0; 126 | do { /* generate digits in reverse order */ 127 | s[i++] = n % 10 + '0'; /* get next digit */ 128 | } while ((n /= 10) > 0);/* delete it */ 129 | if (sign < 0) 130 | s[i++] = '-'; 131 | s[i] = '\0'; 132 | cStr_reverse(s); 133 | } 134 | 135 | void cStr_appendChar(char *s, char c) { 136 | int len = strlen(s); 137 | s[len] = c; 138 | s[len + 1] = '\0'; 139 | } 140 | 141 | char *cStr_getStringFromIntArray(int64_t *array, int64_t size, const char sep) { 142 | int64_t i; 143 | int numChars = 0; 144 | char *string = NULL; 145 | char buffer[64]; 146 | 147 | for (i = 0; i < size; i++) { 148 | numChars += cStr_getIntLength(array[i]); 149 | } 150 | numChars += (size - 1); 151 | 152 | string = st_malloc(sizeof(char) * (numChars + 1)); 153 | string[0] = '\0'; 154 | 155 | cStr_itoa(array[0], buffer); 156 | strcat(string, buffer); 157 | for (i = 1; i < size; i++) { 158 | cStr_appendChar(string, sep); 159 | cStr_itoa(array[i], buffer); 160 | strcat(string, buffer); 161 | } 162 | 163 | i = strlen(string); 164 | string[i+1] = '\0'; 165 | 166 | return string; 167 | } 168 | -------------------------------------------------------------------------------- /mafComparator/src/cString.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2009-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 5 | * Mark Diekhans (markd@soe.ucsc.edu) 6 | * ... and other members of the Reconstruction Team of David Haussler's 7 | * lab (BME Dept. UCSC). 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to deal 11 | * in the Software without restriction, including without limitation the rights 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | * THE SOFTWARE. 26 | */ 27 | 28 | 29 | 30 | #ifndef CSTRING_H_ 31 | #define CSTRING_H_ 32 | 33 | #include "commonC.h" 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | /* 41 | * Comparison function to sort strings alphabetically 42 | */ 43 | int cStr_compare(const void *a, const void *b); 44 | 45 | /* 46 | * Comparison function to sort strings in descending order 47 | */ 48 | int cStr_compareDesc(const void *a, const void *b); 49 | 50 | /* 51 | * In-place substitution to lower-case string 52 | */ 53 | void cStr_lowerCase(char *string); 54 | 55 | /* 56 | * In-place substitution to upper-case string 57 | */ 58 | void cStr_upperCase(char *string); 59 | 60 | /* 61 | * Check if "string" starts with "query" and ignores case 62 | * if "ignorecase" == 1 63 | */ 64 | int cStr_startsWith(char *string, char *query, int ignorecase); 65 | 66 | int64_t cStr_getIntLength(int64_t n); 67 | 68 | void cStr_reverse(char *s); 69 | 70 | void cStr_itoa(int n, char *s); 71 | 72 | void cStr_appendChar(char *s, char c); 73 | 74 | char *cStr_getStringFromIntArray(int64_t *array, int64_t size, const char sep); 75 | 76 | #endif /* CSTRING_H_ */ 77 | -------------------------------------------------------------------------------- /mafComparator/src/comparatorRandom.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2009-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #ifndef _COMPARATOR_RANDOM_H_ 27 | #define _COMPARATOR_RANDOM_H_ 28 | 29 | #include 30 | #include "sonLib.h" 31 | 32 | // Makes a draw from a random binomial with parameters n, p 33 | // Uses 34 | // BTPE (Binomial, Trinagle, Parallelogram, Exponential) 35 | // Kachitvichyanukul, Voratas and Schmeiser, Bruce W. (1988) 36 | // Binomial Random Variate Generation, Communications of the ACM, 31(2): 216-222 37 | uint64_t rbinom(const uint64_t n, const double p); 38 | // NOT MULTITHREAD SAFE. 39 | 40 | #endif // _COMPARATOR_RANDOM_H_ 41 | -------------------------------------------------------------------------------- /mafComparator/src/mafPairCounter.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2009-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 5 | * Mark Diekhans (markd@soe.ucsc.edu) 6 | * ... and other members of the Reconstruction Team of David Haussler's 7 | * lab (BME Dept. UCSC). 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to deal 11 | * in the Software without restriction, including without limitation the rights 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | * THE SOFTWARE. 26 | */ 27 | 28 | #include 29 | #include "sonLib.h" 30 | #include "common.h" 31 | #include "comparatorAPI.h" 32 | #include "buildVersion.h" 33 | 34 | const char *g_version = "version 0.1 July 2012"; 35 | 36 | void version(void); 37 | void usage(void); 38 | int parseOptions(int argc, char **argv, char **maf, char **maf2, char **seqList); 39 | stSet* buildSet(char *listOfLegitSequences); 40 | 41 | void version(void) { 42 | fprintf(stderr, "mafPairCounter, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date, 43 | g_build_git_branch, g_build_git_sha); 44 | } 45 | void usage(void) { 46 | version(); 47 | fprintf(stderr, "Usage: $ mafPairCounter --maf=FILE\n\n"); 48 | fprintf(stderr, "This program is used to count the number of pairs of aligned positions\n" 49 | "that are contained in a maf file. Can be run to determine all possible pairs, or\n" 50 | "a subset as defined either by using the --sequences option or by the intersection\n" 51 | "of the sequences present in --maf and present in --maf2.\n\n"); 52 | fprintf(stderr, "Options:\n"); 53 | usageMessage('h', "help", "Show this help message and exit."); 54 | usageMessage('\0', "maf", "The location of the MAF file. " 55 | "The number of pairs contained in the file will be counted and " 56 | "reported in stdout."); 57 | usageMessage('\0', "sequences", "Comma separated list of sequences allowed to be in pairs. " 58 | "To allow all sequences, either specify *every* sequence or don't invoke " 59 | "this option. Leaving --sequences off results in all sequences being used."); 60 | usageMessage('\0', "maf2", "IF specificied, this is the location of the second MAF file. " 61 | "Using this option causes --sequences option to be ignored. Sequences will " 62 | "be discovered by intersection of sequences present in both maf files, pairs " 63 | "reported will be from the --maf option."); 64 | usageMessage('v', "version", "Print current version number."); 65 | } 66 | int parseOptions(int argc, char **argv, char **maf, char **maf2, char **seqList) { 67 | static const char *optString = "v:h:"; 68 | static const struct option longOpts[] = { 69 | {"maf", required_argument, 0, 0}, 70 | {"maf2", required_argument, 0, 0}, 71 | {"sequences", required_argument, 0, 0}, 72 | {"version", no_argument, 0, 'v'}, 73 | {"help", no_argument, 0, 'h'}, 74 | {0, 0, 0, 0 }}; 75 | int longIndex = 0; 76 | int key = getopt_long(argc, argv, optString, longOpts, &longIndex); 77 | while (key != -1) { 78 | switch (key) { 79 | case 0: 80 | if (strcmp("maf", longOpts[longIndex].name) == 0) { 81 | *maf = stString_copy(optarg); 82 | break; 83 | } 84 | if (strcmp("maf2", longOpts[longIndex].name) == 0) { 85 | *maf2 = stString_copy(optarg); 86 | break; 87 | } 88 | if (strcmp("sequences", longOpts[longIndex].name) == 0) { 89 | *seqList = stString_copy(optarg); 90 | break; 91 | } 92 | case 'v': 93 | version(); 94 | exit(EXIT_SUCCESS); 95 | break; 96 | case 'h': 97 | usage(); 98 | exit(EXIT_SUCCESS); 99 | break; 100 | default: 101 | usage(); 102 | exit(EXIT_SUCCESS); 103 | break; 104 | } 105 | key = getopt_long(argc, argv, optString, longOpts, &longIndex); 106 | } 107 | if (*maf == NULL) { 108 | usage(); 109 | fprintf(stderr, "\nError, specify --maf\n"); 110 | exit(2); 111 | } 112 | FILE *fileHandle = de_fopen(*maf, "r"); 113 | fclose(fileHandle); 114 | if (*maf2 != NULL) { 115 | fileHandle = de_fopen(*maf2, "r"); 116 | fclose(fileHandle); 117 | if (*seqList != NULL) { 118 | free(seqList); 119 | seqList = NULL; 120 | } 121 | } 122 | return optind; 123 | } 124 | stSet* buildSet(char *listOfLegitSequences) { 125 | char *spaceSepFiles = stringReplace(listOfLegitSequences, ',', ' '); 126 | char *currentLocation = spaceSepFiles; 127 | char *currentWord; 128 | stSet *legitSeqsSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); 129 | while ((currentWord = stString_getNextWord(¤tLocation)) != NULL) { 130 | stSet_insert(legitSeqsSet, stString_copy(currentWord)); 131 | free(currentWord); 132 | } 133 | free(spaceSepFiles); 134 | return legitSeqsSet; 135 | } 136 | int main(int argc, char **argv) { 137 | char *maf = NULL; 138 | char *maf2 = NULL; 139 | char *listOfLegitSequences = NULL; 140 | stSet *legitSeqsSet = NULL; 141 | stSet *maf1SeqSet = NULL; 142 | stSet *maf2SeqSet = NULL; 143 | stHash *sequenceLengthHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free); 144 | parseOptions(argc, argv, &maf, &maf2, &listOfLegitSequences); 145 | if (listOfLegitSequences != NULL) { 146 | legitSeqsSet = buildSet(listOfLegitSequences); 147 | } 148 | if (maf2 != NULL) { 149 | // build legitHash by intersection 150 | maf1SeqSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); 151 | maf2SeqSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); 152 | populateNames(maf, maf1SeqSet, sequenceLengthHash); 153 | populateNames(maf2, maf2SeqSet, sequenceLengthHash); 154 | legitSeqsSet = stSet_getIntersection(maf1SeqSet, maf2SeqSet); 155 | } 156 | uint64_t numberOfPairs = countPairsInMaf(maf, legitSeqsSet); 157 | printf("%"PRIu64"\n", numberOfPairs); 158 | // clean up 159 | if (legitSeqsSet != NULL) { 160 | stSet_destruct(legitSeqsSet); 161 | } 162 | if (maf1SeqSet != NULL) { 163 | stSet_destruct(maf1SeqSet); 164 | stSet_destruct(maf2SeqSet); 165 | } 166 | free(maf); 167 | free(maf2); 168 | free(listOfLegitSequences); 169 | stHash_destruct(sequenceLengthHash); 170 | return(EXIT_SUCCESS); 171 | } 172 | -------------------------------------------------------------------------------- /mafComparator/src/test.comparatorAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_COMPARATOR_API_H_ 26 | #define TEST_COMPARATOR_API_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "CuTest.h" 32 | #include "common.h" 33 | #include "sonLib.h" 34 | #include "comparatorAPI.h" 35 | 36 | CuSuite* comparatorAPI_TestSuite(void); 37 | 38 | #endif // TEST_COMPARATOR_API_H_ 39 | -------------------------------------------------------------------------------- /mafComparator/src/test.comparatorRandom.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_COMPARATOR_RANDOM_H_ 26 | #define TEST_COMPARATOR_RANDOM_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "CuTest.h" 32 | #include "common.h" 33 | #include "sonLib.h" 34 | #include "comparatorRandom.h" 35 | 36 | CuSuite* comparatorRandom_TestSuite(void); 37 | 38 | #endif // TEST_COMPARATOR_RANDOM_H_ 39 | -------------------------------------------------------------------------------- /mafComparator/src/testRand.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2009-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #include 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | #include "comparatorAPI.h" 30 | #include "comparatorRandom.h" 31 | 32 | int main(int argc, char **argv) { 33 | if (argc == 5) { 34 | st_randomSeed(atoi(argv[4])); 35 | } else if (argc == 4) { 36 | st_randomSeed(time(NULL)); 37 | } else { 38 | fprintf(stderr, "Usage: %s numberOfSamples n p [optional: randomSeed]\n", argv[0]); 39 | return EXIT_FAILURE; 40 | } 41 | uint64_t numSamples = atoi(argv[1]); 42 | uint64_t n = atoi(argv[2]); 43 | double p = atof(argv[3]); 44 | for (uint64_t i = 0; i < numSamples; ++i) { 45 | printf("%" PRIu64 "\n", rbinom(n, p)); 46 | } 47 | return EXIT_SUCCESS; 48 | } 49 | -------------------------------------------------------------------------------- /mafCoverage/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2009-2013 by 2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 4 | # Mark Diekhans (markd@soe.ucsc.edu) 5 | # ... and other members of the Reconstruction Team of David Haussler's 6 | # lab (BME Dept. UCSC). 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | include ../inc/common.mk 27 | SHELL:=/bin/bash 28 | bin = ../bin 29 | inc = ../inc 30 | lib = ../lib 31 | PROGS = mafCoverage 32 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a src/allTests.c 33 | extraAPI := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafCoverageAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o 34 | testAPI := test/sharedMaf.o test/common.o ../external/CuTest.a test/mafCoverageAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o 35 | testObjects := test/test.mafCoverageAPI.o 36 | sources := src/mafCoverage.c src/mafCoverage.h 37 | 38 | .PHONY: all clean test buildVersion 39 | 40 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 41 | buildVersion: src/buildVersion.c 42 | src/buildVersion.c: ${sources} ${dependencies} 43 | @python ../lib/createVersionSources.py 44 | 45 | ../lib/%.o: ../lib/%.c ../inc/%.h 46 | cd ../lib/ && make 47 | 48 | ${bin}/mafCoverage: src/mafCoverage.c ${dependencies} ${extraAPI} 49 | mkdir -p $(dir $@) 50 | ${cxx} $< ${extraAPI} -o $@.tmp ${cflags} ${lm} 51 | mv $@.tmp $@ 52 | %.o: %.c %.h 53 | ${cxx} -c $< -o $@.tmp ${cflags} 54 | mv $@.tmp $@ 55 | %/mafCoverageAPI.o: src/mafCoverageAPI.c src/mafCoverageAPI.h 56 | ${cxx} -c $< -o $@.tmp ${cflags} 57 | mv $@.tmp $@ 58 | 59 | test: buildVersion test/allTests test/mafCoverage 60 | ./test/allTests && python2.7 src/test.mafCoverage.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir 61 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a 62 | mkdir -p $(dir $@) 63 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm} 64 | mv $@.tmp $@ 65 | test/mafCoverage: src/mafCoverage.c ${dependencies} ${testAPI} 66 | mkdir -p $(dir $@) 67 | ${cxx} $< ${testAPI} -o $@.tmp ${testFlags} ${lm} 68 | mv $@.tmp $@ 69 | test/%.o: ${lib}/%.c ${inc}/%.h 70 | mkdir -p $(dir $@) 71 | ${cxx} -c $< -o $@.tmp ${testFlags} 72 | mv $@.tmp $@ 73 | test/test.mafCoverageAPI.o: src/test.mafCoverageAPI.c src/test.mafCoverageAPI.h test/mafCoverageAPI.o 74 | mkdir -p $(dir $@) 75 | ${cxx} -c $< -o $@.tmp ${testFlags} 76 | mv $@.tmp $@ 77 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a 78 | mkdir -p $(dir $@) 79 | ${cxx} -c $< -o $@.tmp ${testFlags} 80 | mv $@.tmp $@ 81 | 82 | clean: 83 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 84 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 85 | ${cxx} -c $< ${cflags} 86 | ar rc CuTest.a CuTest.o 87 | ranlib CuTest.a 88 | rm -f CuTest.o 89 | mv CuTest.a $@ 90 | -------------------------------------------------------------------------------- /mafCoverage/README.md: -------------------------------------------------------------------------------- 1 | # mafCoverage 2 | 3 | December 2013 4 | 5 | ## Author 6 | 7 | [Benedict Paten](https://github.com/benedictpaten/) 8 | 9 | ## Description 10 | mafCoverage is a program that will look through a maf file block by block and check for the coverage of all other sequences onto one user-specified sequence. 11 | 12 | The input need not be transitively closed as mafCoverage builds a bit array for the user-specied sequence and stores only presence-absense data. Duplications are only counted once. 13 | 14 | ## Installation 15 | 1. Download the package. 16 | 2. cd into the directory. 17 | 3. Type make. 18 | 19 | ## Use 20 | mafCoverage ... file this in 21 | 22 | ### Options 23 | ```shell 24 | Usage: mafCoverage [maf file] 25 | 26 | Reports the pairwise (n-)coverage between a specified genome and all other genomes in the given maf, using a tab delimited format. 27 | Output table format has fields: querySpecies targetSpecies lengthOfQueryGenome coverage n-coverages (if specified) 28 | For a pair of genomes A and B, the coverage of B on A is the proportion of sites in A that align to a base in B. 29 | The n-coverage of B on A is the proportion of sites in A that align to n or more sites in B. 30 | Options: 31 | -h, --help show this help message and exit. 32 | -m, --maf path to maf file. 33 | -s, --speciesOrChr species or species.chromosome name, e.g. `hg19' or 'hg19.chr1', 34 | if not specified reports results for every possible species.wildcard at 35 | the end. 36 | -n, --nCoverage report all n-coverages, for 1 <= n <= 128 instead of just 37 | for n=1 (the default). 38 | -i, --identity report coverage of identical bases. 39 | -l, --logLevel Set logging level, either 'CRITICAL'/'INFO'/'DEBUG'. 40 | -a, --ignoreSpecies Do all chromosomes-against-all-chromosomes coverage. 41 | ``` 42 | 43 | 44 | ## Example 45 | $ mafCoverage --maf path/to/maf.maf --speciesOrChr hg19 ... 46 | ... 47 | 48 | -------------------------------------------------------------------------------- /mafCoverage/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | #include "mafCoverageAPI.h" 30 | #include "test.mafCoverageAPI.h" 31 | 32 | CuSuite* coverage_TestSuite(void); 33 | 34 | int coverage_RunAllTests(void) { 35 | CuString *output = CuStringNew(); 36 | CuSuite *suite = CuSuiteNew(); 37 | CuSuite *coverage_s = coverage_TestSuite(); 38 | CuSuiteAddSuite(suite, coverage_s); 39 | CuSuiteRun(suite); 40 | CuSuiteSummary(suite, output); 41 | CuSuiteDetails(suite, output); 42 | // printf("%s\n", output->buffer); 43 | CuStringDelete(output); 44 | int status = (suite->failCount > 0); 45 | free(coverage_s); 46 | CuSuiteDelete(suite); 47 | return status; 48 | } 49 | int main(void) { 50 | return coverage_RunAllTests(); 51 | } 52 | -------------------------------------------------------------------------------- /mafCoverage/src/mafCoverage.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #include 26 | #include // ceil() 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include "common.h" 34 | #include "sharedMaf.h" 35 | #include "mafCoverage.h" 36 | #include "mafCoverageAPI.h" 37 | #include "buildVersion.h" 38 | #include "sonLib.h" 39 | 40 | static char *mafFileName = NULL; 41 | static stSet *speciesOrChromosomeNames = NULL; 42 | static bool nCoverage = 0, identity = 0, ignoreSpecies = 0; 43 | 44 | const char *g_version = "version 0.1 May 2013"; 45 | uint64_t getRegionSize(char *seq1, stHash *intervalsHash); 46 | 47 | void version(void) { 48 | fprintf(stderr, "mafCoverage, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date, g_build_git_branch, g_build_git_sha); 49 | } 50 | 51 | void usage(void) { 52 | version(); 53 | fprintf(stderr, "Usage: mafCoverage [maf file] \n\n" 54 | "Reports the pairwise (n-)coverage between a specified genome and all other genomes in the given maf, using a tab delimited format.\n" 55 | "Output table format has fields: querySpecies\ttargetSpecies\tlengthOfQueryGenome\tcoverage\tn-coverages (if specified)\n" 56 | "For a pair of genomes A and B, the coverage of B on A is the proportion of sites in A that align to a base in B.\n" 57 | "The n-coverage of B on A is the proportion of sites in A that align to n or more sites in B.\n"); 58 | fprintf(stderr, "Options: \n"); 59 | usageMessage('h', "help", "show this help message and exit."); 60 | usageMessage('m', "maf", "path to maf file. use - for stdin."); 61 | usageMessage('s', "speciesOrChr", 62 | "species or species.chromosome name, e.g. `hg19' or 'hg19.chr1', if not specified reports results for every possible species." 63 | "wildcard at the end."); 64 | usageMessage('n', "nCoverage", "report all n-coverages, for 1 <= n <= 128 instead of just for n=1 (the default)."); 65 | usageMessage('i', "identity", "report coverage of identical bases."); 66 | usageMessage('l', "logLevel", "Set logging level, either 'CRITICAL'/'INFO'/'DEBUG'."); 67 | usageMessage('a', "ignoreSpecies", "Do all chromosomes-against-all-chromosomes coverage."); 68 | exit(EXIT_FAILURE); 69 | } 70 | 71 | static void parseOptions(int argc, char **argv) { 72 | int c; 73 | speciesOrChromosomeNames = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); 74 | while (1) { 75 | static struct option longOptions[] = { { "help", no_argument, 0, 'h' }, { "maf", required_argument, 0, 'm' }, { "speciesOrChr", 76 | required_argument, 0, 's' }, { "nCoverage", no_argument, 0, 'n' }, { "identity", no_argument, 0, 'i' }, { "logLevel", 77 | required_argument, 0, 'l' }, { "ignoreSpecies", no_argument, 0, 'a' }, { 0, 0, 0, 0 } }; 78 | int longIndex = 0; 79 | c = getopt_long(argc, argv, "m:s:hnl:a", longOptions, &longIndex); 80 | if (c == -1) 81 | break; 82 | switch (c) { 83 | case 's': 84 | stSet_insert(speciesOrChromosomeNames, stString_copy(optarg)); 85 | break; 86 | case 'm': 87 | mafFileName = stString_copy(optarg); 88 | break; 89 | case 'n': 90 | nCoverage = 1; 91 | break; 92 | case 'i': 93 | identity = 1; 94 | break; 95 | case 'l': 96 | st_setLogLevelFromString(optarg); 97 | break; 98 | case 'h': 99 | usage(); 100 | break; 101 | case 'a': 102 | ignoreSpecies = 1; 103 | break; 104 | default: 105 | abort(); 106 | } 107 | } 108 | //Check we have the essentials. 109 | if (mafFileName == NULL) { 110 | fprintf(stderr, "Error, specify --maf\n"); 111 | usage(); 112 | } 113 | // Check there's nothing left over on the command line 114 | if (optind < argc) { 115 | fprintf(stderr, "Unexpected input arguments\n"); 116 | usage(); 117 | } 118 | } 119 | 120 | int main(int argc, char **argv) { 121 | parseOptions(argc, argv); 122 | //Work out the structure of the chromosomes of the query sequence 123 | stHash *sequenceNamesToSequenceSizes = getMapOfSequenceNamesToSizesFromMaf(mafFileName); 124 | stHashIterator *sequenceNameIt = stHash_getIterator(sequenceNamesToSequenceSizes); 125 | char *sequenceName; 126 | while ((sequenceName = stHash_getNext(sequenceNameIt)) != NULL) { 127 | st_logDebug("Got a sequence name: %s with length %" PRId64 "\n", sequenceName, stIntTuple_get(stHash_search(sequenceNamesToSequenceSizes, sequenceName), 0)); 128 | } 129 | stHash_destructIterator(sequenceNameIt); 130 | stList *sequenceNames = stHash_getKeys(sequenceNamesToSequenceSizes); 131 | //If the species/chr name is not specified then replace with all possible species name. 132 | if (stSet_size(speciesOrChromosomeNames) == 0) { 133 | st_logInfo("As no species name was specified, using all possible species names\n"); 134 | stSet_destruct(speciesOrChromosomeNames); 135 | speciesOrChromosomeNames = getSpeciesNames(sequenceNames, ignoreSpecies); 136 | } else { //Sanity checks on the input species/chr 137 | stList *names = stSet_getList(speciesOrChromosomeNames); 138 | assert(stList_length(names) == 1); 139 | char *speciesOrChrName = stList_get(names, 0); 140 | stList_destruct(names); 141 | if (ignoreSpecies) { 142 | if (stHash_search(sequenceNamesToSequenceSizes, speciesOrChrName) == NULL) { 143 | st_errAbort("Chromosome name not recognised (perhaps you gave a species name but have specified --ignoreSpecies?): %s\n", 144 | speciesOrChrName); 145 | } 146 | } else { 147 | stSet *speciesNames = getSpeciesNames(sequenceNames, ignoreSpecies); 148 | if (stSet_search(speciesNames, speciesOrChrName) == NULL && stHash_search(sequenceNamesToSequenceSizes, speciesOrChrName) 149 | == NULL) { 150 | st_errAbort("Species or chr name name not recognised: %s\n", speciesOrChrName); 151 | } 152 | stSet_destruct(speciesNames); 153 | } 154 | } 155 | //Print header 156 | nGenomeCoverage_reportHeader(stdout, nCoverage); 157 | //For each of the chosen species calculate species 158 | stSetIterator *speciesOrChrNamesIt = stSet_getIterator(speciesOrChromosomeNames); 159 | char *speciesOrChrName; 160 | while ((speciesOrChrName = stSet_getNext(speciesOrChrNamesIt)) != NULL) { 161 | st_logInfo("Computing the coverages for species/chr: %s\n", speciesOrChrName); 162 | //Build the coverage data structure 163 | NGenomeCoverage *nGC = nGenomeCoverage_construct(sequenceNamesToSequenceSizes, speciesOrChrName, ignoreSpecies); 164 | nGenomeCoverage_populate(nGC, mafFileName, identity); 165 | //Report 166 | nGenomeCoverage_report(nGC, stdout, nCoverage); 167 | //cleanup loop 168 | nGenomeCoverage_destruct(nGC); 169 | } 170 | //Cleanup 171 | stList_destruct(sequenceNames); 172 | stSet_destructIterator(speciesOrChrNamesIt); 173 | stHash_destruct(sequenceNamesToSequenceSizes); 174 | stSet_destruct(speciesOrChromosomeNames); 175 | free(mafFileName); 176 | // while(1); 177 | return EXIT_SUCCESS; 178 | } 179 | -------------------------------------------------------------------------------- /mafCoverage/src/mafCoverage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef _MAF_COVERAGE_H_ 26 | #define _MAF_COVERAGE_H_ 27 | 28 | #include 29 | #include 30 | #include "common.h" 31 | #include "sharedMaf.h" 32 | #include "sonLib.h" 33 | 34 | void version(void); 35 | void usage(void); 36 | 37 | #endif // _MAF_COVERAGE_H_ 38 | -------------------------------------------------------------------------------- /mafCoverage/src/mafCoverageAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #ifndef _MAF_COVERAGE_API_H_ 27 | #define _MAF_COVERAGE_API_H_ 28 | 29 | #include 30 | #include 31 | #include "common.h" 32 | #include "sharedMaf.h" 33 | #include "sonLib.h" 34 | #include "mafCoverage.h" 35 | 36 | bool is_wild(const char *s); 37 | bool searchMatched(mafLine_t *ml, const char *seq); 38 | bool searchMatched_(const char *target, const char *seq); 39 | 40 | /* 41 | * Iterates through the maf and builds a hash of sequence names to coordinates. 42 | * Lengths are specified by an stIntTuple. 43 | */ 44 | stHash *getMapOfSequenceNamesToSizesFromMaf(char *mafFileName); 45 | 46 | /* 47 | * Each sequence name is comprised of two fields separated by a period. The first is the species field, the second is the 48 | * chromosome field. This function returns the set of distinct species names from the set of sequence names. If ignoreSpeciesNames 49 | * is true then just gets returns set of sequence names. 50 | */ 51 | stSet *getSpeciesNames(stList *sequenceNames, bool ignoreSpeciesNames); 52 | 53 | /* 54 | * Gets the subset of the hash for all sequences involving the given species. 55 | */ 56 | stHash *getMapOfSequenceNamesToSequenceSizesForGivenSpeciesOrChr(stHash *sequenceNamesToSequenceSizes, char *speciesOrChrName, bool ignoreSpeciesNames); 57 | 58 | /* 59 | * Returns the combined length of all the sequences in the set. 60 | */ 61 | int64_t getTotalLengthOfSequences(stHash *sequenceSizes); 62 | 63 | /* 64 | * The pairwise coverage object. 65 | */ 66 | 67 | typedef struct _pairwiseCoverage PairwiseCoverage; 68 | 69 | PairwiseCoverage *pairwiseCoverage_construct(const stHash *sequenceNamesToSequenceSizeForGivenSpecies); 70 | 71 | void pairwiseCoverage_destruct(PairwiseCoverage *pC); 72 | 73 | /* 74 | * Returns the coverage of the target genome on query species, that is the proportion of bases in the query aligned to one 75 | * or more positions in the target. 76 | */ 77 | double pairwiseCoverage_calculateCoverage(PairwiseCoverage *pC); 78 | 79 | /* 80 | * Returns an array of the n-coverages upto but excluding 128, with the index corresponding to n. 81 | */ 82 | double *pairwiseCoverage_calculateNCoverages(PairwiseCoverage *pC); 83 | 84 | /* 85 | * Increases the coverage count of a given sequence position. 86 | */ 87 | char *pairwiseCoverage_getCoverageArrayForSequence(PairwiseCoverage *pC, char *sequenceName); 88 | 89 | /* 90 | * Returns non-zero if successful, if maximum coverage achieved (so can't be increased) returns 0. 91 | */ 92 | bool pairwiseCoverageArray_increase(char *sequenceCoverageArray, int64_t position); 93 | 94 | /* 95 | * An all-against-a-given-species object. 96 | */ 97 | 98 | typedef struct _nGenomeCoverage NGenomeCoverage; 99 | 100 | void nGenomeCoverage_destruct(NGenomeCoverage *nGC); 101 | 102 | NGenomeCoverage *nGenomeCoverage_construct(stHash *sequenceSizes, char *speciesName, bool ignoreSpeciesNames); 103 | 104 | /* 105 | * Iterate through a maf file and populate the species coverages. 106 | */ 107 | void nGenomeCoverage_populate(NGenomeCoverage *nGC, char *mafFileName, bool requireIdentityForMatch); 108 | 109 | /* 110 | * Reports stats in tab delimited format. 111 | */ 112 | void nGenomeCoverage_reportHeader(FILE *out, bool includeNCoverage); 113 | void nGenomeCoverage_report(NGenomeCoverage *nGC, FILE *out, bool includeNCoverage); 114 | 115 | #endif // _MAF_COVERAGE_API_H_ 116 | -------------------------------------------------------------------------------- /mafCoverage/src/test.mafCoverageAPI.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include "CuTest.h" 35 | #include "common.h" 36 | #include "sharedMaf.h" 37 | #include "mafCoverageAPI.h" 38 | 39 | static stHash *sequenceNamesToSequenceSizes = NULL; 40 | 41 | static void setup() { 42 | sequenceNamesToSequenceSizes = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, (void(*)(void *)) stIntTuple_destruct); 43 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("bat.man"), stIntTuple_construct1(50)); 44 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("spider.man"), stIntTuple_construct1(1)); 45 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("bat.fink"), stIntTuple_construct1(7)); 46 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("danger.mouse"), stIntTuple_construct1(12)); 47 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("penfold"), stIntTuple_construct1(12)); 48 | } 49 | 50 | static void teardown() { 51 | if (sequenceNamesToSequenceSizes != NULL) { 52 | stHash_destruct(sequenceNamesToSequenceSizes); 53 | } 54 | } 55 | 56 | static void test_is_wild_0(CuTest *testCase) { 57 | CuAssertTrue(testCase, is_wild("hg19*")); 58 | CuAssertTrue(testCase, is_wild("hg19.chr19*")); 59 | CuAssertTrue(testCase, !is_wild("hg19.chr19")); 60 | CuAssertTrue(testCase, !is_wild("hg19.chr1*9")); 61 | CuAssertTrue(testCase, !is_wild("aoeuaoeunstaoeunshtonuts.chrcrhrc.huaoeunsatohunt.")); 62 | CuAssertTrue(testCase, is_wild("aoeuaoeunstaoeunshtonuts.chrcrhrc.huaoeunsatohunt.*")); 63 | } 64 | static void test_searchMatched_0(CuTest *testCase) { 65 | mafLine_t *ml = maf_newMafLineFromString("s hg19.chr19 123480 13 + 1234870098734 ACGTACGTACGTA", 1); 66 | CuAssertTrue(testCase, searchMatched(ml, "hg19.chr19")); 67 | CuAssertTrue(testCase, searchMatched(ml, "hg19*")); 68 | CuAssertTrue(testCase, searchMatched(ml, "h*")); 69 | CuAssertTrue(testCase, searchMatched(ml, "*")); 70 | CuAssertTrue(testCase, !searchMatched(ml, "mm9")); 71 | maf_destroyMafLineList(ml); 72 | } 73 | 74 | static void test_getSpeciesNames(CuTest *testCase) { 75 | setup(); 76 | stList *sequenceNames = stHash_getKeys(sequenceNamesToSequenceSizes); 77 | stSet *speciesNames = getSpeciesNames(sequenceNames, 0); 78 | CuAssertIntEquals(testCase, 4, stSet_size(speciesNames)); 79 | CuAssertTrue(testCase, stSet_search(speciesNames, "bat") != NULL); 80 | CuAssertTrue(testCase, stSet_search(speciesNames, "spider") != NULL); 81 | CuAssertTrue(testCase, stSet_search(speciesNames, "danger") != NULL); 82 | CuAssertTrue(testCase, stSet_search(speciesNames, "penfold") != NULL); 83 | stSet_destruct(speciesNames); 84 | stList_destruct(sequenceNames); 85 | teardown(); 86 | } 87 | 88 | static void test_getMapOfSequenceNamesToSequenceSizesForGivenSpecies(CuTest *testCase) { 89 | setup(); 90 | stHash *sequenceNamesToSequenceSizeForGivenSpecies = getMapOfSequenceNamesToSequenceSizesForGivenSpeciesOrChr(sequenceNamesToSequenceSizes, 91 | "bat", 0); 92 | CuAssertIntEquals(testCase, 2, stHash_size(sequenceNamesToSequenceSizeForGivenSpecies)); 93 | CuAssertIntEquals(testCase, 50, stIntTuple_get(stHash_search(sequenceNamesToSequenceSizeForGivenSpecies, "bat.man"), 0)); 94 | CuAssertIntEquals(testCase, 7, stIntTuple_get(stHash_search(sequenceNamesToSequenceSizeForGivenSpecies, "bat.fink"), 0)); 95 | stHash_destruct(sequenceNamesToSequenceSizeForGivenSpecies); 96 | teardown(); 97 | } 98 | 99 | static void test_getTotalLengthOfSequences(CuTest *testCase) { 100 | setup(); 101 | CuAssertIntEquals(testCase, 82, getTotalLengthOfSequences(sequenceNamesToSequenceSizes)); 102 | teardown(); 103 | } 104 | 105 | static void test_pairwiseCoverage(CuTest *testCase) { 106 | setup(); 107 | PairwiseCoverage *pC = pairwiseCoverage_construct(sequenceNamesToSequenceSizes); 108 | //Check coverage is 0 when we start 109 | CuAssertDblEquals(testCase, 0.0, pairwiseCoverage_calculateCoverage(pC), 0.0); 110 | double *nCoverages = pairwiseCoverage_calculateNCoverages(pC); 111 | CuAssertDblEquals(testCase, 1.0, nCoverages[0], 0.0); 112 | for (int64_t i = 1; i <= SCHAR_MAX; i++) { 113 | CuAssertDblEquals(testCase, 0.0, nCoverages[i], 0.0); 114 | } 115 | free(nCoverages); 116 | 117 | //Add some coverage 118 | char *coverageArray = pairwiseCoverage_getCoverageArrayForSequence(pC, "spider.man"); 119 | CuAssertTrue(testCase, coverageArray != NULL); 120 | pairwiseCoverageArray_increase(coverageArray, 0); 121 | coverageArray = pairwiseCoverage_getCoverageArrayForSequence(pC, "penfold"); 122 | CuAssertTrue(testCase, coverageArray != NULL); 123 | pairwiseCoverageArray_increase(coverageArray, 2); 124 | pairwiseCoverageArray_increase(coverageArray, 2); 125 | 126 | //Now recalculate the coverages 127 | CuAssertDblEquals(testCase, 2.0/82.0, pairwiseCoverage_calculateCoverage(pC), 0.0); 128 | nCoverages = pairwiseCoverage_calculateNCoverages(pC); 129 | CuAssertDblEquals(testCase, 1.0, nCoverages[0], 0.0); 130 | CuAssertDblEquals(testCase, 2.0/82.0, nCoverages[1], 0.0); 131 | CuAssertDblEquals(testCase, 1.0/82.0, nCoverages[2], 0.0); 132 | free(nCoverages); 133 | nCoverages = pairwiseCoverage_calculateNCoverages(pC); 134 | for (int64_t i = 3; i <= SCHAR_MAX; i++) { 135 | CuAssertDblEquals(testCase, 0.0, nCoverages[i], 0.0); 136 | } 137 | free(nCoverages); 138 | 139 | pairwiseCoverage_destruct(pC); 140 | teardown(); 141 | } 142 | 143 | static void test_nGenomeCoverage(CuTest *testCase) { 144 | setup(); 145 | //Just build a single nGenomeCoverage and check the report functions work as expected. 146 | NGenomeCoverage *nGC = nGenomeCoverage_construct(sequenceNamesToSequenceSizes, "bat", 0); 147 | // nGenomeCoverage_reportHeader(stderr, 1); 148 | // nGenomeCoverage_report(nGC, stderr, 1); 149 | nGenomeCoverage_destruct(nGC); 150 | teardown(); 151 | } 152 | 153 | CuSuite* coverage_TestSuite(void) { 154 | CuSuite* suite = CuSuiteNew(); 155 | (void) test_is_wild_0; 156 | (void) test_searchMatched_0; 157 | SUITE_ADD_TEST(suite, test_is_wild_0); 158 | SUITE_ADD_TEST(suite, test_searchMatched_0); 159 | SUITE_ADD_TEST(suite, test_getSpeciesNames); 160 | SUITE_ADD_TEST(suite, test_getMapOfSequenceNamesToSequenceSizesForGivenSpecies); 161 | SUITE_ADD_TEST(suite, test_getTotalLengthOfSequences); 162 | SUITE_ADD_TEST(suite, test_pairwiseCoverage); 163 | SUITE_ADD_TEST(suite, test_nGenomeCoverage); 164 | return suite; 165 | } 166 | -------------------------------------------------------------------------------- /mafCoverage/src/test.mafCoverageAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_MAF_COVERAGE_API_H_ 26 | #define TEST_MAF_COVERAGE_API_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "CuTest.h" 33 | #include "common.h" 34 | #include "sharedMaf.h" 35 | #include "mafCoverageAPI.h" 36 | 37 | CuSuite* coverage_TestSuite(void); 38 | 39 | #endif // TEST_MAF_COVERAGE_API_H_ 40 | -------------------------------------------------------------------------------- /mafDuplicateFilter/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafDuplicateFilter 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o 9 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a test/buildVersion.o 10 | sources = src/mafDuplicateFilter.c 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ../lib/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafDuplicateFilter: src/mafDuplicateFilter.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafDuplicateFilter: src/mafDuplicateFilter.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm 30 | mv $@.tmp $@ 31 | 32 | %.o: %.c %.h 33 | ${cxx} -c ${cflags} $< -o $@.tmp 34 | mv $@.tmp $@ 35 | test/%.o: ${lib}/%.c ${inc}/%.h 36 | mkdir -p $(dir $@) 37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | mkdir -p $(dir $@) 41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 42 | mv $@.tmp $@ 43 | 44 | clean: 45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 46 | 47 | test: buildVersion test/mafDuplicateFilter 48 | python2.7 src/test.mafDuplicateFilter.py --verbose && rm -rf test/ && rmdir ./tempTestDir 49 | 50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 51 | ${cxx} -c ${cflags} $< 52 | ar rc CuTest.a CuTest.o 53 | ranlib CuTest.a 54 | rm -f CuTest.o 55 | mv CuTest.a $@ 56 | -------------------------------------------------------------------------------- /mafDuplicateFilter/README.md: -------------------------------------------------------------------------------- 1 | # mafDuplicateFilter 2 | 3 | 16 April 2012 4 | 5 | ## Author 6 | [Dent Earl](https://github.com/dentearl/) 7 | 8 | ## Description 9 | mafDuplicateFilter is a program to filter out duplications from a Multiple Alignment Format (maf) file. This program assumes the sequence name field is formatted as in "speciesName.chromosomeName" using the first period charater, ".", as the delimiter between the species name and the chromosome name. For every block present in the alignment, mBDF looks for any duplicated species within the block. Instead of stripping out all copies of the duplication, the sequence with the highest similarity to the consensus of the block is left, all others are removed. Sequence similarity is computed as a bit score in comparison to the IUPAC-enabled consensus. Ties are resolved by picking the sequence that appears earliest in the file. 10 | 11 | ## Installation 12 | 1. Download the package. 13 | 2. cd into the directory. 14 | 3. Type make. 15 | 16 | ## Use 17 | mafDuplicateFilter --maf mafWithDuplicates.maf > pruned.maf 18 | 19 | ### Options 20 | * -h, --help show this help message and exit. 21 | * -m, --maf path to maf file. 22 | 23 | ## Example 24 | $ ./mafDuplicateFilter --maf mafWithDuplicates.maf > mafPruned.maf 25 | 26 | -------------------------------------------------------------------------------- /mafExtractor/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafExtractor 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c src/mafExtractor.h 8 | API = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafExtractorAPI.o src/buildVersion.o 9 | testAPI = test/sharedMaf.o ../external/CuTest.a test/common.o test/mafExtractorAPI.o test/buildVersion.o 10 | testObjects := test/test.mafExtractor.o 11 | sources = src/mafExtractor.c src/mafExtractor.h 12 | 13 | .PHONY: all clean test buildVersion 14 | 15 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 16 | buildVersion: src/buildVersion.c 17 | src/buildVersion.c: ${sources} ${dependencies} 18 | @python ../lib/createVersionSources.py 19 | 20 | ../lib/%.o: ../lib/%.c ../inc/%.h 21 | cd ../lib/ && make 22 | 23 | ${bin}/mafExtractor: src/mafExtractor.c ${dependencies} ${API} 24 | mkdir -p $(dir $@) 25 | ${cxx} ${cflags} -O3 $< ${API} -o $@.tmp -lm 26 | mv $@.tmp $@ 27 | 28 | test/mafExtractor: src/mafExtractor.c ${dependencies} ${testAPI} 29 | mkdir -p $(dir $@) 30 | ${cxx} ${cflags} -g -O0 $< ${testAPI} -o $@.tmp 31 | mv $@.tmp $@ 32 | 33 | %.o: %.c %.h 34 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp 35 | mv $@.tmp $@ 36 | 37 | test/%.o: ${lib}/%.c ${inc}/%.h 38 | mkdir -p $(dir $@) 39 | ${cxx} -c $< -o $@.tmp ${cflags} -g -O0 40 | mv $@.tmp $@ 41 | test/%.o: src/%.c src/%.h 42 | mkdir -p $(dir $@) 43 | ${cxx} -c $< -o $@.tmp ${cflags} -g -O0 44 | mv $@.tmp $@ 45 | 46 | clean: 47 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ temTestDir/ src/buildVersion.c src/buildVersion.h 48 | 49 | test: buildVersion test/allTests test/mafExtractor 50 | ./test/allTests && python2.7 src/test.mafExtractor.py --verbose && rmdir ./tempTestDir && rm -rf ./test/ 51 | 52 | test/allTests: src/allTests.c ${testObjects} ${testAPI} 53 | mkdir -p $(dir $@) 54 | ${cxx} $^ -o $@.tmp ${cflags} -g -O0 55 | mv $@.tmp $@ 56 | 57 | test/test.mafExtractor.o: src/test.mafExtractor.c src/test.mafExtractor.h ${testAPI} 58 | mkdir -p $(dir $@) 59 | ${cxx} -c $< -o $@.tmp ${cflags} -I src/ -g -O0 60 | mv $@.tmp $@ 61 | 62 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 63 | ${cxx} -c ${cflags} $< 64 | ar rc CuTest.a CuTest.o 65 | ranlib CuTest.a 66 | rm -f CuTest.o 67 | mv CuTest.a $@ 68 | -------------------------------------------------------------------------------- /mafExtractor/README.md: -------------------------------------------------------------------------------- 1 | # mafExtractor 2 | 3 | 14 Feb 2012 4 | 5 | ## Author 6 | 7 | [Dent Earl](https://github.com/dentearl/) 8 | 9 | ## Description 10 | mafExtractor is a program that will look through a maf file for a particular sequence name and region. If a match is found then the block containing the querry will be printed to standard out. By default blocks are trimmed such that only columns that contain the targeted sequence region are included. Use --soft to include an entire block if any part of the block falls within the targeted region. 11 | 12 | __BE AWARE!__ At present mafExtractor doesn't handle maf lines of type e, q, or i. The s lines will be properly processed but these other types of lines will be ignored which could lead to inconsistent data and confusion. 13 | 14 | ## Installation 15 | 1. Download the package. 16 | 2. cd into the directory. 17 | 3. Type make. 18 | 19 | ## Use 20 | mafExtractor --seq [sequence name (and possibly chr)] --pos [position to search for] [options] < myFile.maf 21 | 22 | ### Options 23 | * -h, --help show this help message and exit. 24 | * -s, --seq sequence _name.chr_ e.g. `hg18.chr2'. 25 | * --start start of the region, inclusive. Must be a positive number. 26 | * --stop end of the region, inclusive. Must be a positive number. 27 | * --soft include entire block even if it has gaps or over-hangs. default=false. 28 | * -v, --verbose turns on verbose output. 29 | 30 | ## Example 31 | $ ./mafBlockExractor --seq hg19.chr20 --start 500 --stop 1000 < example.maf 32 | ##maf version=1 33 | 34 | #a score=0 pctid=99.2 35 | #s hg19.chr20 0 795 + 73767698 GAT... 36 | ... 37 | 38 | -------------------------------------------------------------------------------- /mafExtractor/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | #include "mafExtractorAPI.h" 30 | #include "test.mafExtractor.h" 31 | 32 | CuSuite* extractor_TestSuite(void); 33 | int extractor_RunAllTests(void); 34 | 35 | int extractor_RunAllTests(void) { 36 | CuString *output = CuStringNew(); 37 | CuSuite *suite = CuSuiteNew(); 38 | CuSuite *extractor_s = extractor_TestSuite(); 39 | CuSuiteAddSuite(suite, extractor_s); 40 | CuSuiteRun(suite); 41 | CuSuiteSummary(suite, output); 42 | CuSuiteDetails(suite, output); 43 | printf("%s\n", output->buffer); 44 | CuStringDelete(output); 45 | int status = (suite->failCount > 0); 46 | free(extractor_s); 47 | CuSuiteDelete(suite); 48 | return status; 49 | } 50 | int main(void) { 51 | return extractor_RunAllTests(); 52 | } 53 | -------------------------------------------------------------------------------- /mafExtractor/src/mafExtractor.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "common.h" 32 | #include "sharedMaf.h" 33 | #include "mafExtractor.h" 34 | #include "mafExtractorAPI.h" 35 | #include "buildVersion.h" 36 | 37 | const char *g_version = "version 0.2 September 2012"; 38 | 39 | void version(void) { 40 | fprintf(stderr, "mafExtractor, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date, 41 | g_build_git_branch, g_build_git_sha); 42 | } 43 | void usage(void) { 44 | version(); 45 | fprintf(stderr, "Usage: mafExtractor --maf [maf file] --seq [sequence name (and possibly chr)] " 46 | "--start [start of region, inclusive, 0 based] --stop [end of region, inclusive] " 47 | "[options]\n\n" 48 | "mafExtractor is a program that will look through a maf file for a\n" 49 | "particular sequence name and region. If a match is found then the block\n" 50 | "containing the querry will be printed to standard out.\n\n"); 51 | fprintf(stderr, "Options: \n"); 52 | usageMessage('h', "help", "show this help message and exit."); 53 | usageMessage('m', "maf", "path to maf file. use - for stdin."); 54 | usageMessage('s', "seq", "sequence name, e.g. `hg18.chr2'."); 55 | usageMessage('\0', "start", "start of region, inclusive, 0 based."); 56 | usageMessage('\0', "stop", "end of region, inclusive, 0 based."); 57 | usageMessage('\0', "soft", "include entire block even if it has gaps or over-hangs. default=false."); 58 | usageMessage('\0', "first", "only check the first line of each block."); 59 | usageMessage('v', "verbose", "turns on verbose output."); 60 | exit(EXIT_FAILURE); 61 | } 62 | void parseOptions(int argc, char **argv, char *filename, char *seqName, uint64_t *start, 63 | uint64_t *stop, bool *isSoft, bool *checkFirstLineOnly) { 64 | extern int g_debug_flag; 65 | extern int g_verbose_flag; 66 | int c; 67 | bool setSName = false, setStart = false, setStop = false, setMName = false; 68 | int64_t value = 0; 69 | while (1) { 70 | static struct option longOptions[] = { 71 | {"debug", no_argument, 0, 'd'}, 72 | {"verbose", no_argument, 0, 'v'}, 73 | {"help", no_argument, 0, 'h'}, 74 | {"version", no_argument, 0, 0}, 75 | {"maf", required_argument, 0, 'm'}, 76 | {"seq", required_argument, 0, 's'}, 77 | {"start", required_argument, 0, 0}, 78 | {"stop", required_argument, 0, 0}, 79 | {"soft", no_argument, 0, 0}, 80 | {"first", no_argument, 0, 0}, 81 | {0, 0, 0, 0} 82 | }; 83 | int longIndex = 0; 84 | c = getopt_long(argc, argv, "m:s:h:v:d", 85 | longOptions, &longIndex); 86 | if (c == -1) 87 | break; 88 | switch (c) { 89 | case 0: 90 | if (strcmp("start", longOptions[longIndex].name) == 0) { 91 | value = strtoll(optarg, NULL, 10); 92 | if (value < 0) { 93 | fprintf(stderr, "Error, --start %" PRIi64 " must be nonnegative.\n", value); 94 | usage(); 95 | } 96 | *start = value; 97 | setStart = true; 98 | } else if (strcmp("stop", longOptions[longIndex].name) == 0) { 99 | value = strtoll(optarg, NULL, 10); 100 | if (value < 0) { 101 | fprintf(stderr, "Error, --stop %" PRIi64 " must be nonnegative.\n", value); 102 | usage(); 103 | } 104 | *stop = value; 105 | setStop = true; 106 | } else if (strcmp("soft", longOptions[longIndex].name) == 0) { 107 | *isSoft = true; 108 | } else if (strcmp("first", longOptions[longIndex].name) == 0) { 109 | *checkFirstLineOnly = true; 110 | } else if (strcmp("version", longOptions[longIndex].name) == 0) { 111 | version(); 112 | exit(EXIT_SUCCESS); 113 | } 114 | break; 115 | case 'm': 116 | setMName = true; 117 | strncpy(filename, optarg, kMaxSeqName); 118 | break; 119 | case 's': 120 | setSName = true; 121 | strncpy(seqName, optarg, kMaxSeqName); 122 | break; 123 | case 'v': 124 | g_verbose_flag++; 125 | break; 126 | case 'd': 127 | g_debug_flag = 1; 128 | break; 129 | case 'h': 130 | case '?': 131 | usage(); 132 | break; 133 | default: 134 | abort(); 135 | } 136 | } 137 | if (!(setMName && setSName && setStart && setStop)) { 138 | fprintf(stderr, "Error, specify --maf --seq --start --stop\n"); 139 | usage(); 140 | } 141 | if (*start > *stop) { 142 | uint64_t t = *start; 143 | *start = *stop; 144 | *stop = t; 145 | } 146 | // Check there's nothing left over on the command line 147 | if (optind < argc) { 148 | char *errorString = de_malloc(kMaxSeqName); 149 | strcpy(errorString, "Unexpected arguments:"); 150 | while (optind < argc) { 151 | strcat(errorString, " "); 152 | strcat(errorString, argv[optind++]); 153 | } 154 | fprintf(stderr, "%s\n", errorString); 155 | usage(); 156 | } 157 | } 158 | 159 | int main(int argc, char **argv) { 160 | extern const int kMaxStringLength; 161 | char seq[kMaxSeqName]; 162 | char filename[kMaxStringLength]; 163 | uint64_t start, stop; 164 | bool isSoft = false; 165 | bool checkFirstLineOnly = false; 166 | parseOptions(argc, argv, filename, seq, &start, &stop, &isSoft, &checkFirstLineOnly); 167 | mafFileApi_t *mfa = maf_newMfa(filename, "r"); 168 | 169 | processBody(mfa, seq, start, stop, isSoft, checkFirstLineOnly); 170 | maf_destroyMfa(mfa); 171 | 172 | return EXIT_SUCCESS; 173 | } 174 | -------------------------------------------------------------------------------- /mafExtractor/src/mafExtractor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef _BLOCK_EXTRACTOR_H_ 26 | #define _BLOCK_EXTRACTOR_H_ 27 | 28 | #include 29 | #include 30 | #include "common.h" 31 | #include "sharedMaf.h" 32 | 33 | void version(void); 34 | void usage(void); 35 | void parseOptions(int argc, char **argv, char *filename, char *seqName, uint64_t *start, 36 | uint64_t *stop, bool *isSoft, bool *checkFirstLineOnly); 37 | 38 | #endif // _BLOCK_EXTRACTOR_H_ 39 | -------------------------------------------------------------------------------- /mafExtractor/src/mafExtractorAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2014 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #ifndef _BLOCK_EXTRACTOR_API_H_ 27 | #define _BLOCK_EXTRACTOR_API_H_ 28 | 29 | #include 30 | #include 31 | #include "common.h" 32 | #include "sharedMaf.h" 33 | 34 | bool checkRegion(uint64_t targetStart, uint64_t targetStop, uint64_t lineStart, 35 | uint64_t length, uint64_t sourceLength, char strand); 36 | bool searchMatched(mafLine_t *ml, const char *seq, uint64_t start, uint64_t stop); 37 | void printHeader(void); 38 | uint64_t getTargetColumns(bool **targetColumns, uint64_t *n, mafBlock_t *b, const char *seq, 39 | uint64_t start, uint64_t stop); 40 | void printTargetColumns(bool *targetColumns, uint64_t n); 41 | int64_t **createOffsets(uint64_t n); 42 | void destroyOffsets(int64_t **offs, uint64_t n); 43 | mafBlock_t *processBlockForSplice(mafBlock_t *b, uint64_t blockNumber, const char *seq, 44 | uint64_t start, uint64_t stop, bool store); 45 | mafBlock_t *spliceBlock(mafBlock_t *mb, uint64_t l, uint64_t r, int64_t **offsetArray); 46 | void checkBlock(mafBlock_t *b, uint64_t blockNumber, const char *seq, uint64_t start, 47 | uint64_t stop, bool *printedHeader, bool isSoft, bool checkFirstLineOnly); 48 | void processBody(mafFileApi_t *mfa, char *seq, uint64_t start, uint64_t stop, bool isSoft, 49 | bool checkFirstLineOnly); 50 | uint64_t sumBool(bool *array, uint64_t n); 51 | void printOffsetArray(int64_t **offsetArray, uint64_t n); 52 | 53 | #endif // _BLOCK_EXTRACTOR_API_H_ 54 | -------------------------------------------------------------------------------- /mafExtractor/src/test.mafExtractor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_EXTRACTOR_API_H_ 26 | #define TEST_EXTRACTOR_API_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "CuTest.h" 33 | #include "common.h" 34 | #include "sharedMaf.h" 35 | #include "mafExtractorAPI.h" 36 | 37 | CuSuite* extractor_TestSuite(void); 38 | 39 | #endif // TEST_EXTRACTOR_API_H_ 40 | -------------------------------------------------------------------------------- /mafFilter/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafFilter 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o 9 | testObjects = test/common.o test/sharedMaf.o ../external/CuTest.a test/buildVersion.o 10 | sources = src/mafFilter.c 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ../lib/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafFilter: src/mafFilter.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafFilter: src/mafFilter.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm 30 | mv $@.tmp $@ 31 | 32 | %.o: %.c %.h 33 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp 34 | mv $@.tmp $@ 35 | test/%.o: ${lib}/%.c ${inc}/%.h 36 | mkdir -p $(dir $@) 37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | mkdir -p $(dir $@) 41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 42 | mv $@.tmp $@ 43 | 44 | clean: 45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 46 | 47 | test: buildVersion test/mafFilter 48 | python2.7 src/test.mafFilter.py --verbose && rm -rf test/ && rmdir ./tempTestDir 49 | 50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 51 | ${cxx} -c ${cflags} $< 52 | ar rc CuTest.a CuTest.o 53 | ranlib CuTest.a 54 | rm -f CuTest.o 55 | mv CuTest.a $@ 56 | -------------------------------------------------------------------------------- /mafFilter/README.md: -------------------------------------------------------------------------------- 1 | # mafFilter 2 | 3 | 28 May 2012 4 | 5 | ## Author 6 | 7 | [Dent Earl](https://github.com/dentearl/) 8 | 9 | ## Description 10 | mafFilter is a program that will look through a maf file block by block and excise out sequence lines that match criteria established by the user on the command line. For example one can filter out all sequence lines that start with 'hg18' using --exclude or filter for sequence lines starting with only 'hg19', 'mm9' and 'rn4' using --include. 11 | 12 | ## Installation 13 | 1. Download the package. 14 | 2. cd into the directory. 15 | 3. Type make. 16 | 17 | ## Use 18 | mafFilter --maf [path to maf] [options] 19 | 20 | ### Options 21 | * -h, --help show this help message and exit. 22 | * -m, --maf path to maf file. 23 | * -i, --includeSeq comma separated list of sequence names to include 24 | * -e, --excludeSeq comma separated list of sequence names to exclude 25 | * -g, --noDegreeGT filter out all blocks with degree greater than this value. 26 | * -l, --noDegreeLT filter out all blocks with degree less than this value. 27 | * -v, --verbose turns on verbose output. 28 | 29 | ## Example 30 | $ ./mafFilter --maf example.maf --include hg18,mm9,rn4,banana 31 | ##maf version=1 32 | a score=0 33 | s banana.chr1 0 10 + 1000000 ACGTACGTAC 34 | ... 35 | 36 | 37 | -------------------------------------------------------------------------------- /mafPairCoverage/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2009-2013 by 2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 4 | # Mark Diekhans (markd@soe.ucsc.edu) 5 | # ... and other members of the Reconstruction Team of David Haussler's 6 | # lab (BME Dept. UCSC). 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | include ../inc/common.mk 27 | SHELL:=/bin/bash 28 | bin = ../bin 29 | inc = ../inc 30 | lib = ../lib 31 | PROGS = mafPairCoverage 32 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a src/allTests.c 33 | extraAPI := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafPairCoverageAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o 34 | testAPI := test/sharedMaf.o test/common.o ../external/CuTest.a test/mafPairCoverageAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o 35 | testObjects := test/test.mafPairCoverageAPI.o 36 | sources := src/mafPairCoverage.c src/mafPairCoverage.h 37 | 38 | .PHONY: all clean test buildVersion 39 | 40 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 41 | buildVersion: src/buildVersion.c 42 | src/buildVersion.c: ${sources} ${dependencies} 43 | @python ../lib/createVersionSources.py 44 | 45 | ../lib/%.o: ../lib/%.c ../inc/%.h 46 | cd ../lib/ && make 47 | 48 | ${bin}/mafPairCoverage: src/mafPairCoverage.c ${dependencies} ${extraAPI} 49 | mkdir -p $(dir $@) 50 | ${cxx} $< ${extraAPI} -o $@.tmp ${cflags} -lm 51 | mv $@.tmp $@ 52 | %.o: %.c %.h 53 | ${cxx} -c $< -o $@.tmp ${cflags} 54 | mv $@.tmp $@ 55 | %/mafPairCoverageAPI.o: src/mafPairCoverageAPI.c src/mafPairCoverageAPI.h 56 | ${cxx} -c $< -o $@.tmp ${cflags} 57 | mv $@.tmp $@ 58 | 59 | test: buildVersion test/allTests test/mafPairCoverage 60 | ./test/allTests && python2.7 src/test.mafPairCoverage.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir 61 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a 62 | mkdir -p $(dir $@) 63 | ${cxx} $^ -o $@.tmp ${testFlags} -lm 64 | mv $@.tmp $@ 65 | test/mafPairCoverage: src/mafPairCoverage.c ${dependencies} ${testAPI} 66 | mkdir -p $(dir $@) 67 | ${cxx} $< ${testAPI} -o $@.tmp ${testFlags} -lm 68 | mv $@.tmp $@ 69 | test/%.o: ${lib}/%.c ${inc}/%.h 70 | mkdir -p $(dir $@) 71 | ${cxx} -c $< -o $@.tmp ${testFlags} 72 | mv $@.tmp $@ 73 | test/test.mafPairCoverageAPI.o: src/test.mafPairCoverageAPI.c src/test.mafPairCoverageAPI.h test/mafPairCoverageAPI.o 74 | mkdir -p $(dir $@) 75 | ${cxx} -c $< -o $@.tmp ${testFlags} 76 | mv $@.tmp $@ 77 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a 78 | mkdir -p $(dir $@) 79 | ${cxx} -c $< -o $@.tmp ${testFlags} 80 | mv $@.tmp $@ 81 | 82 | clean: 83 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 84 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 85 | ${cxx} -c $< ${cflags} 86 | ar rc CuTest.a CuTest.o 87 | ranlib CuTest.a 88 | rm -f CuTest.o 89 | mv CuTest.a $@ 90 | -------------------------------------------------------------------------------- /mafPairCoverage/README.md: -------------------------------------------------------------------------------- 1 | # mafPairCoverage 2 | 3 | 7 May 2013 4 | 5 | ## Author 6 | 7 | [Dent Earl](https://github.com/dentearl/) 8 | 9 | ## Description 10 | mafPairCoverage is a program that will look through a maf file block by block and check for a particular pair of sequences (allowing input sequence to end in wildcard *) and count the number of aligned positions where the two sequences have residues aligned. Coverage of genome A onto genome B is then symmetrically calculated as the number of aligned positions divided by the total size of genome B. 11 | 12 | __BE AWARE!__ The input maf should be transitively closed (if you are unsure you can use the tool mafTransitiveClosure to transitively close the alignment) to insure that the coverage numbers are accurate. 13 | 14 | ## Installation 15 | 1. Download the package. 16 | 2. cd into the directory. 17 | 3. Type make. 18 | 19 | ## Use 20 | mafPairCoverage --seq1 [sequence name] --seq2 [sequence name] --maf myFile.maf [options] 21 | 22 | ### Options 23 | * -h, --help show this help message and exit. 24 | * --seq1 sequence _name.chr_ e.g. `hg19*'. May end in * to indicate wildcard. 25 | * --seq2 sequence _name.chr_ e.g. `mm9.chr2'. May end in * to indicate wildcard. 26 | * --maf input maf file. 27 | * --bed path to 3 column bedfile that will define regions of interest in output. 28 | * --bin_start starting position (inclusive) of the sub-region to analyze. 29 | * --bin_end ending position (inclusive) of the sub-region to analyze. 30 | * --bin_length the length of each bin within the region. default=1000 31 | * -v, --verbose turns on verbose output. 32 | 33 | ## Example 34 | $ ./mafPairCoverage --seq1 hg19* --seq2 mm9* --maf example.maf 35 | ... 36 | 37 | -------------------------------------------------------------------------------- /mafPairCoverage/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | #include "mafPairCoverageAPI.h" 30 | #include "test.mafPairCoverageAPI.h" 31 | 32 | CuSuite* pairCoverage_TestSuite(void); 33 | 34 | int pairCoverage_RunAllTests(void) { 35 | CuString *output = CuStringNew(); 36 | CuSuite *suite = CuSuiteNew(); 37 | CuSuite *pairCoverage_s = pairCoverage_TestSuite(); 38 | CuSuiteAddSuite(suite, pairCoverage_s); 39 | CuSuiteRun(suite); 40 | CuSuiteSummary(suite, output); 41 | CuSuiteDetails(suite, output); 42 | printf("%s\n", output->buffer); 43 | CuStringDelete(output); 44 | int status = (suite->failCount > 0); 45 | free(pairCoverage_s); 46 | CuSuiteDelete(suite); 47 | return status; 48 | } 49 | int main(void) { 50 | return pairCoverage_RunAllTests(); 51 | } 52 | -------------------------------------------------------------------------------- /mafPairCoverage/src/mafPairCoverage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef _PAIR_COVERAGE_H_ 26 | #define _PAIR_COVERAGE_H_ 27 | 28 | #include 29 | #include 30 | #include "common.h" 31 | #include "sharedMaf.h" 32 | #include "sonLib.h" 33 | 34 | void version(void); 35 | void usage(void); 36 | void parseOptions(int argc, char **argv, char *filename, char *seq1Name, 37 | char *seq2Name, stHash *intervalsHashn, int64_t *bin_start, 38 | int64_t *bin_end, int64_t *bin_length); 39 | void reportResults(char *seq1, char *seq2, stHash *seq1Hash, stHash *seq2Hash, 40 | uint64_t *alignedPositions); 41 | void reportResultsRegion(char *seq1, char *seq2, stHash *seq1Hash, 42 | stHash *seq2Hash, uint64_t *alignedPositions, 43 | stHash *intervalsHash); 44 | 45 | #endif // _PAIR_COVERAGE_H_ 46 | -------------------------------------------------------------------------------- /mafPairCoverage/src/mafPairCoverageAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #ifndef _PAIR_COVERAGE_API_H_ 27 | #define _PAIR_COVERAGE_API_H_ 28 | 29 | #include 30 | #include 31 | #include "common.h" 32 | #include "sharedMaf.h" 33 | #include "sonLib.h" 34 | #include "mafPairCoverage.h" 35 | 36 | typedef struct mafCoverageCount mafCoverageCount_t; 37 | typedef struct _BinContainer BinContainer; 38 | 39 | mafCoverageCount_t* createMafCoverageCount(void); 40 | uint64_t mafCoverageCount_getSourceLength(mafCoverageCount_t *mcct); 41 | uint64_t mafCoverageCount_getObservedLength(mafCoverageCount_t *mcct); 42 | uint64_t mafCoverageCount_getCount(mafCoverageCount_t *mcct); 43 | uint64_t mafCoverageCount_getInRegion(mafCoverageCount_t *mcct); 44 | uint64_t mafCoverageCount_getOutRegion(mafCoverageCount_t *mcct); 45 | void mafCoverageCount_setSourceLength(mafCoverageCount_t *mcct, uint64_t n); 46 | void mafCoverageCount_setCount(mafCoverageCount_t *mcct, uint64_t n); 47 | void mafCoverageCount_setInRegion(mafCoverageCount_t *mcct, uint64_t n); 48 | void mafCoverageCount_setOutRegion(mafCoverageCount_t *mcct, uint64_t n); 49 | int64_t binContainer_getBinStart(BinContainer *bc); 50 | int64_t binContainer_getBinEnd(BinContainer *bc); 51 | int64_t binContainer_getBinLength(BinContainer *bc); 52 | int64_t binContainer_getNumBins(BinContainer *bc); 53 | uint64_t* binContainer_getBins(BinContainer *bc); 54 | uint64_t binContainer_accessBin(BinContainer *bc, int64_t i); 55 | void binContainer_setBinStart(BinContainer *bc, int64_t i); 56 | void binContainer_setBinEnd(BinContainer *bc, int64_t i); 57 | void binContainer_setBinLength(BinContainer *bc, int64_t); 58 | void binContainer_incrementPosition(BinContainer *bc, int64_t i); 59 | void binContainer_incrementBin(BinContainer *bc, int64_t i); 60 | void binContainer_setBinValue(BinContainer *bc, int64_t i, int64_t v); 61 | bool is_wild(const char *s); 62 | bool inInterval(stHash *intervalsHash, char *seq, uint64_t pos); 63 | bool searchMatched(mafLine_t *ml, const char *seq); 64 | bool searchMatched_(const char *target, const char *seq); 65 | void compareLines(mafLine_t *ml1, mafLine_t *ml2, stHash *seq1Hash, 66 | stHash *seq2Hash, uint64_t *alignedPositions, 67 | stHash *intervalsHash, BinContainer *bc); 68 | void wrapDestroyMafLine(void *p); 69 | void checkBlock(mafBlock_t *b, const char *seq1, const char *seq2, 70 | stHash *seq1Hash, stHash *seq2Hash, uint64_t *alignedPositions, 71 | stHash *intervalsHash, BinContainer *bc); 72 | void processBody(mafFileApi_t *mfa, char *seq1, char *seq2, stHash *seq1Hash, 73 | stHash *seq2Hash, 74 | uint64_t *alignedPositions, stHash *intervalsHash, 75 | BinContainer *bc); 76 | void parseBedFile(const char *filepath, stHash *intervalsHash); 77 | void reportResultsBins(char *seq1, char *seq2, BinContainer *bin_container); 78 | BinContainer* binContainer_init(void); 79 | BinContainer* binContainer_construct(int64_t bin_start, int64_t bin_end, 80 | int64_t bin_length); 81 | void binContainer_destruct(BinContainer *bc); 82 | 83 | #endif // _PAIR_COVERAGE_API_H_ 84 | -------------------------------------------------------------------------------- /mafPairCoverage/src/test.mafPairCoverageAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_PAIR_COVERAGE_API_H_ 26 | #define TEST_PAIR_COVERAGE_API_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "CuTest.h" 33 | #include "common.h" 34 | #include "sharedMaf.h" 35 | #include "mafPairCoverageAPI.h" 36 | 37 | CuSuite* extractor_TestSuite(void); 38 | 39 | #endif // TEST_PAIR_COVERAGE_API_H_ 40 | -------------------------------------------------------------------------------- /mafPositionFinder/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafPositionFinder 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o 9 | testObjects = test/common.o test/sharedMaf.o ../external/CuTest.a test/buildVersion.o 10 | sources = src/mafPositionFinder.c 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ../lib/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafPositionFinder: src/mafPositionFinder.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafPositionFinder: src/mafPositionFinder.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm 30 | mv $@.tmp $@ 31 | 32 | %.o: %.c %.h 33 | ${cxx} -O3 -c ${args} $< -o $@.tmp -lm 34 | mv $@.tmp $@ 35 | test/%.o: ${lib}/%.c ${inc}/%.h 36 | mkdir -p $(dir $@) 37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp -lm 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | mkdir -p $(dir $@) 41 | ${cxx} -c $< -o $@.tmp ${cflags} -g -O0 -lm 42 | mv $@.tmp $@ 43 | 44 | clean: 45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 46 | 47 | test: buildVersion test/mafPositionFinder 48 | python2.7 src/test.mafPositionFinder.py --verbose && rm -rf test/ && rmdir ./tempTestDir 49 | 50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 51 | ${cxx} -c ${cflags} $< 52 | ar rc CuTest.a CuTest.o 53 | ranlib CuTest.a 54 | rm -f CuTest.o 55 | mv CuTest.a $@ 56 | -------------------------------------------------------------------------------- /mafPositionFinder/README.md: -------------------------------------------------------------------------------- 1 | # mafPositionFinder 2 | 3 | 10 Feb 2012 4 | 5 | ## Author 6 | 7 | [Dent Earl](https://github.com/dentearl/) 8 | 9 | ## Description 10 | mafPositionFinder is a program that will look through a maf file for a particular sequence name and location. If a match is found the line number and first few fields are returned. If no match is found nothing is returned. 11 | 12 | ## Installation 13 | 1. Download the package. 14 | 2. cd into the directory. 15 | 3. Type make. 16 | 17 | ## Use 18 | mafPositionFinder --maf [path to maf] --seq [sequence name (and possibly chr)] --pos [position to search for, zero based coordinates] [options] 19 | 20 | ### Options 21 | * -h, --help show this help message and exit. 22 | * -m, --maf path to maf file. 23 | * -s, --seq sequence _name.chr_ e.g. `hg18.chr2'. 24 | * -p, --pos position along the chromosome you are searching for. Must be a non negative number. 25 | * -v, --verbose turns on verbose output. 26 | 27 | ## Example 28 | $ ./mafPositionFinder --maf example.maf --seq apple.chr20 --pos 500 29 | block 3, line 4: s apple.chr20 0 795 + 73767698 ...AATTG ->G<- ACCCG... 30 | 31 | We see from this example that position 500 of apple.chr20 is located at line 4 of example.maf, is part of a block that starts at line 3, and that the base at this position is G flanked by AATTG on the left and ACCCG on the right. 32 | -------------------------------------------------------------------------------- /mafRowOrderer/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafRowOrderer 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o 9 | testObjects = test/common.o test/sharedMaf.o ../external/CuTest.a test/buildVersion.o 10 | sources = src/mafRowOrderer.c 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ../lib/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafRowOrderer: src/mafRowOrderer.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafRowOrderer: src/mafRowOrderer.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm 30 | mv $@.tmp $@ 31 | 32 | %.o: %.c %.h 33 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp 34 | mv $@.tmp $@ 35 | test/%.o: ${lib}/%.c ${inc}/%.h 36 | mkdir -p $(dir $@) 37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | mkdir -p $(dir $@) 41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 42 | mv $@.tmp $@ 43 | 44 | clean: 45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 46 | 47 | test: buildVersion test/mafRowOrderer 48 | python2.7 src/test.mafRowOrderer.py --verbose && rm -rf test/ && rmdir ./tempTestDir 49 | 50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 51 | ${cxx} -c ${cflags} $< 52 | ar rc CuTest.a CuTest.o 53 | ranlib CuTest.a 54 | rm -f CuTest.o 55 | mv CuTest.a $@ 56 | -------------------------------------------------------------------------------- /mafRowOrderer/README.md: -------------------------------------------------------------------------------- 1 | # mafRowOrderer 2 | 3 | 4 October 2012 4 | 5 | ## Author 6 | 7 | [Dent Earl](https://github.com/dentearl/) 8 | 9 | ## Description 10 | mafRowOrderer is a program that will look through a maf file block by block and order the maf lines within a block according to the order provided. Species not in the established ordered are excised. Comments are excised. Non sequnece lines ('^s') are excised. 11 | 12 | ## Installation 13 | 1. Download the package. 14 | 2. cd into the directory. 15 | 3. Type make. 16 | 17 | ## Use 18 | mafRowOrderer --maf [path to maf] --order [comma separated list of species] 19 | 20 | ### Options 21 | * -h, --help show this help message and exit. 22 | * -m, --maf path to maf file. 23 | * --order comma separated list of species names 24 | * -v, --verbose turns on verbose output. 25 | 26 | ## Example 27 | $ ./mafRowOrderer --maf example.maf --order hg18,mm9,rn4,banana 28 | ##maf version=1 29 | a score=0 30 | s banana.chr1 0 10 + 1000000 ACGTACGTAC 31 | ... 32 | 33 | 34 | -------------------------------------------------------------------------------- /mafSorter/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafSorter 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o 9 | testObjects = ./test/common.o ./test/sharedMaf.o ../external/CuTest.a test/buildVersion.o 10 | sources = src/mafSorter.c 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ${lib}/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafSorter: src/mafSorter.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafSorter: src/mafSorter.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm 30 | mv $@.tmp $@ 31 | 32 | %.o: %.c %.h 33 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp 34 | mv $@.tmp $@ 35 | test/%.o: ${lib}/%.c ${inc}/%.h 36 | mkdir -p $(dir $@) 37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | mkdir -p $(dir $@) 41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 42 | mv $@.tmp $@ 43 | 44 | clean: 45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 46 | 47 | test: buildVersion test/mafSorter 48 | python2.7 src/test.mafSorter.py --verbose && rm -rf test/ && rmdir ./tempTestDir 49 | 50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 51 | ${cxx} -c ${cflags} $< 52 | ar rc CuTest.a CuTest.o 53 | ranlib CuTest.a 54 | rm -f CuTest.o 55 | mv CuTest.a $@ 56 | -------------------------------------------------------------------------------- /mafSorter/README.md: -------------------------------------------------------------------------------- 1 | # mafSorter 2 | 3 | 19 March 2012 4 | 5 | ## Author 6 | [Dent Earl](https://github.com/dentearl/) 7 | 8 | ## Description 9 | mafSorter is a program that will sort the blocks of a maf in ascending order of the sequence start field of the specified sequence name. Blocks that do not contain the specified sequence will be output at the start of the maf in the order they appear in the input, followed by the sorted blocks. Blocks where the target sequence appears twice will be tagged with the largest start value. 10 | 11 | ## Installation 12 | 1. Download the package. 13 | 2. cd into the directory. 14 | 3. Type make. 15 | 16 | ## Use 17 | mafSorter --seq [sequence name (and possibly chr)] [options] < myFile.maf 18 | 19 | ### Options 20 | * -h, --help show this help message and exit. 21 | * -s, --seq sequence _name.chr_ e.g. `hg18.chr2'. 22 | * -v, --verbose turns on verbose output. 23 | 24 | ## Example 25 | $ ./mafSorter --seq hg19.chr20 < example.maf 26 | ##maf version=1 27 | 28 | #a score=0 pctid=99.2 29 | #s hg19.chr20 0 795 + 73767698 GAT... 30 | ... 31 | 32 | -------------------------------------------------------------------------------- /mafStats/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafStats 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/test.mafStats.o ${sonLibPath}/sonLib.a src/buildVersion.o 9 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a src/test.mafStats.o ${sonLibPath}/sonLib.a test/buildVersion.o 10 | sources = src/mafStats.c src/mafStats.h 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ../lib/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafStats: src/mafStats.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} $< ${objects} -o $@.tmp ${cflags} ${lm} 25 | mv $@.tmp $@ 26 | 27 | test/mafStats: src/mafStats.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} $< src/allTests.c ${testObjects} -o $@.tmp ${testFlags} ${lm} 30 | mv $@.tmp $@ 31 | %.o: %.c %.h 32 | ${cxx} -c $< -o $@.tmp ${cflags} 33 | mv $@.tmp $@ 34 | test/%.o: ${lib}/%.c ${inc}/%.h 35 | @echo apple 36 | mkdir -p $(dir $@) 37 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm} 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | @echo orange 41 | mkdir -p $(dir $@) 42 | ${cxx} -c $< -o $@.tmp ${testFlags} 43 | mv $@.tmp $@ 44 | 45 | clean: 46 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 47 | 48 | test: buildVersion test/allTests 49 | test/allTests && rm -rf ./test/ 50 | 51 | test/allTests: src/allTests.c ${testObjects} 52 | mkdir -p $(dir $@) 53 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm} 54 | mv $@.tmp $@ 55 | 56 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 57 | ${cxx} -c $< ${cflags} 58 | ar rc CuTest.a CuTest.o 59 | ranlib CuTest.a 60 | rm -f CuTest.o 61 | mv CuTest.a $@ 62 | -------------------------------------------------------------------------------- /mafStats/README.md: -------------------------------------------------------------------------------- 1 | # mafStats 2 | 5 July 2012 3 | 4 | ## Author 5 | [Dent Earl](https://github.com/dentearl/) 6 | 7 | ## Description 8 | A program to read MAF file and report back statistics about the contents. 9 | 10 | ## Installation 11 | 1. Download the package. 12 | 2. cd into the directory. 13 | 3. Type make. 14 | 15 | ## Use 16 | mafStats --maf mafFile.maf [options] 17 | 18 | ### Options 19 | * -h, --help show this help message and exit. 20 | * -m, --maf path to maf file. 21 | 22 | ### Example 23 | $ mafStats --maf smallDemo.maf 24 | smallDemo.maf 25 | ------------------------------ 26 | File size: 66.13 MB 27 | Lines: 212986 28 | Header lines: 5 29 | s lines: 144592 30 | e lines: 0 31 | i lines: 0 32 | q lines: 0 33 | Blank lines: 68388 34 | Comment lines: 1 35 | Sequence chars: 49181016 ( 77.65%) 36 | Gap chars: 14154166 ( 22.35%) 37 | Blocks: 34194 38 | Ave block area: 1852.23 39 | Max block area: 37840 40 | Ave seq field length: 340.14 41 | Max seq field length: 7568 42 | Ave seq count in block: 4.23 43 | Max seq count in block: 5 44 | 10 unique sequences, ordered by # bases present: 45 | simHuman.chr1: 5311230 ( 10.80%) 46 | simHuman.chr0: 5225141 ( 10.62%) 47 | simDog.chr1: 5048843 ( 10.27%) 48 | simDog.chr0: 5023883 ( 10.22%) 49 | simCow.chr1: 4989381 ( 10.14%) 50 | simCow.chr0: 4979544 ( 10.12%) 51 | simMouse.chr1: 4671434 ( 9.50%) 52 | simMouse.chr0: 4654920 ( 9.46%) 53 | simRat.chr1: 4654607 ( 9.46%) 54 | simRat.chr0: 4622033 ( 9.40%) 55 | total: 49181016 (100.00%) 56 | 57 | -------------------------------------------------------------------------------- /mafStats/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | 30 | CuSuite* mafStats_TestSuite(void); 31 | 32 | int mafStats_RunAllTests(void) { 33 | return 0; 34 | } 35 | int main(void) { 36 | return mafStats_RunAllTests(); 37 | } 38 | -------------------------------------------------------------------------------- /mafStats/src/mafStats.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef _MAFSTATS_H_ 26 | #define _MAFSTATS_H_ 27 | 28 | #include 29 | #include 30 | 31 | typedef struct stats { 32 | char *filename; 33 | uint64_t numLines; 34 | uint64_t numHeaderLines; 35 | uint64_t numSeqLines; 36 | uint64_t numBlocks; 37 | uint64_t numELines; 38 | uint64_t numILines; 39 | uint64_t numQLines; 40 | uint64_t numCommentLines; 41 | uint64_t numGapCharacters; 42 | uint64_t numSeqCharacters; 43 | uint64_t numColumns; 44 | uint64_t sumSeqField; 45 | uint64_t maxSeqField; 46 | uint64_t sumNumSpeciesInBlock; 47 | uint64_t maxNumSpeciesInBlock; 48 | uint64_t sumBlockArea; 49 | uint64_t maxBlockArea; 50 | stHash *seqHash; // keyed with names, valued with uint64_t count of bases present 51 | } stats_t; 52 | typedef struct seq { 53 | char *name; 54 | uint64_t count; 55 | } seq_t; 56 | 57 | void version(void); 58 | void usage(void); 59 | void parseOptions(int argc, char **argv, char **filename); 60 | stats_t* stats_create(char *filename); 61 | void stats_destroy(stats_t *stats); 62 | void countCharacters(char *seq, stats_t *stats); 63 | void processBlock(mafBlock_t *mb, stats_t *stats); 64 | void recordStats(mafFileApi_t *mfa, stats_t *stats); 65 | void readFilesize(struct stat *fileStat, char **filesizeString); 66 | int cmp_seq(const void *a, const void *b); 67 | void reportHash(stHash *hash); 68 | void reportStats(stats_t *stats); 69 | 70 | #endif // _MAFSTATS_H_ 71 | -------------------------------------------------------------------------------- /mafStats/src/test.mafStats.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include "CuTest.h" 27 | -------------------------------------------------------------------------------- /mafStats/src/test.mafStats.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComparativeGenomicsToolkit/mafTools/259e5b47fa2ee17ff5ad1bba9cebf2992cbb7228/mafStats/src/test.mafStats.h -------------------------------------------------------------------------------- /mafStrander/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafStrander 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c 8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o 9 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a test/buildVersion.o 10 | sources = src/mafStrander.c 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ../lib/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafStrander: src/mafStrander.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafStrander: src/mafStrander.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm 30 | mv $@.tmp $@ 31 | 32 | %.o: %.c %.h 33 | ${cxx} -c ${cflags} $< -o $@.tmp 34 | mv $@.tmp $@ 35 | test/%.o: ${lib}/%.c ${inc}/%.h 36 | mkdir -p $(dir $@) 37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 38 | mv $@.tmp $@ 39 | test/%.o: src/%.c src/%.h 40 | mkdir -p $(dir $@) 41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp 42 | mv $@.tmp $@ 43 | 44 | clean: 45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 46 | 47 | test: buildVersion test/mafStrander 48 | python2.7 src/test.mafStrander.py --verbose && rm -rf test/ && rmdir ./tempTestDir 49 | 50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 51 | ${cxx} -c ${cflags} $< 52 | ar rc CuTest.a CuTest.o 53 | ranlib CuTest.a 54 | rm -f CuTest.o 55 | mv CuTest.a $@ 56 | -------------------------------------------------------------------------------- /mafStrander/README.md: -------------------------------------------------------------------------------- 1 | # mafStrander 2 | 3 | 3 October 2012 4 | 5 | ## Author 6 | [Dent Earl](https://github.com/dentearl/) 7 | 8 | ## Description 9 | mafStrander is a program to coerce a particular strandedness out for all blocks based the strandedness of a target sequence. When a block contains the target sequence but in the flipped orientation (relative to the --strand option) then the block is flipped, i.e. all start coordinates are transformed, and all sequence fields are reverse-complemented. If the block contains the target sequence multiple times and with conflicing strands (i.e. both + and - strands are observed), then nothing is done. 10 | 11 | ## Installation 12 | 1. Download the package. 13 | 2. cd into the directory. 14 | 3. Type make. 15 | 16 | ## Use 17 | mafStrander --maf alignment.maf --seq hg18 --strand + > positive.maf 18 | 19 | ### Options 20 | * -h, --help show this help message and exit. 21 | * --maf input alignment maf file. 22 | * --seq sequence to base block strandedness upon. (string comparison only done for length of input, i.e. --seq=hg18 will match hg18.chr1, hg18.chr2, etc etc) 23 | * --strand strand to enforce, when possible. may be + or -, defaults to +. 24 | 25 | ## Example 26 | $ mafStrander --maf alignment.maf --seq hg18 --strand + > positive.maf 27 | 28 | -------------------------------------------------------------------------------- /mafToFastaStitcher/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2009-2013 by 2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com) 4 | # Mark Diekhans (markd@soe.ucsc.edu) 5 | # ... and other members of the Reconstruction Team of David Haussler's 6 | # lab (BME Dept. UCSC). 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | include ../inc/common.mk 27 | SHELL:=/bin/bash 28 | bin = ../bin 29 | inc = ../inc 30 | lib = ../lib 31 | PROGS = mafToFastaStitcher 32 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a src/allTests.c 33 | extraAPI := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafToFastaStitcherAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o 34 | testAPI := test/sharedMaf.o test/common.o ../external/CuTest.a test/mafToFastaStitcherAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o 35 | testObjects := test/test.mafToFastaStitcherAPI.o 36 | sources := src/mafToFastaStitcher.c src/mafToFastaStitcher.h 37 | 38 | .PHONY: all clean test buildVersion 39 | 40 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 41 | buildVersion: src/buildVersion.c 42 | src/buildVersion.c: ${sources} ${dependencies} 43 | @python ../lib/createVersionSources.py 44 | 45 | ../lib/%.o: ../lib/%.c ../inc/%.h 46 | cd ../lib/ && make 47 | 48 | ${bin}/mafToFastaStitcher: src/mafToFastaStitcher.c ${dependencies} ${extraAPI} 49 | mkdir -p $(dir $@) 50 | ${cxx} $< ${extraAPI} -o $@.tmp ${cflags} ${lm} 51 | mv $@.tmp $@ 52 | %.o: %.c %.h 53 | ${cxx} -c $< -o $@.tmp ${cflags} 54 | mv $@.tmp $@ 55 | %/mafToFastaStitcherAPI.o: src/mafToFastaStitcherAPI.c src/mafToFastaStitcherAPI.h 56 | ${cxx} -c $< -o $@.tmp ${cflags} ${lm} 57 | mv $@.tmp $@ 58 | 59 | test: buildVersion test/allTests test/mafToFastaStitcher 60 | ./test/allTests && python2.7 src/test.mafToFastaStitcher.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir 61 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a 62 | mkdir -p $(dir $@) 63 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm} 64 | mv $@.tmp $@ 65 | test/mafToFastaStitcher: src/mafToFastaStitcher.c ${dependencies} ${testAPI} 66 | mkdir -p $(dir $@) 67 | ${cxx} $< ${testAPI} -o $@.tmp ${testFlags} ${lm} 68 | mv $@.tmp $@ 69 | test/%.o: ${lib}/%.c ${inc}/%.h 70 | mkdir -p $(dir $@) 71 | ${cxx} -c $< -o $@.tmp ${testFlags} 72 | mv $@.tmp $@ 73 | test/test.mafToFastaStitcherAPI.o: src/test.mafToFastaStitcherAPI.c src/test.mafToFastaStitcherAPI.h test/mafToFastaStitcherAPI.o 74 | mkdir -p $(dir $@) 75 | ${cxx} -c $< -o $@.tmp ${testFlags} 76 | mv $@.tmp $@ 77 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a 78 | mkdir -p $(dir $@) 79 | ${cxx} -c $< -o $@.tmp ${testFlags} 80 | mv $@.tmp $@ 81 | 82 | clean: 83 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 84 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 85 | ${cxx} -c $< ${cflags} 86 | ar rc CuTest.a CuTest.o 87 | ranlib CuTest.a 88 | rm -f CuTest.o 89 | mv CuTest.a $@ 90 | -------------------------------------------------------------------------------- /mafToFastaStitcher/README.md: -------------------------------------------------------------------------------- 1 | # mafToFastaStitcher 2 | 3 | 15 October 2012 4 | 5 | ## Author 6 | [Dent Earl](https://github.com/dentearl/) 7 | 8 | ## Description 9 | mafToFastaStitcherStrander is a program to take a multiple alignment format (MAF) file, some sequences, and then stitch together the alignment into a single multiple sequence fasta (MFA) file. 10 | 11 | As an aside, the intended output is just dissimilar enough to what is created by multiz's maf2fasta that this tool is necessary but the output is similar enough as to be frustrating. 12 | 13 | ## Dependencies 14 | * sonLib https://github.com/benedictpaten/sonLib/ 15 | 16 | ## Installation 17 | 1. Download the package. 18 | 2. cd into the directory. 19 | 3. Type make. 20 | 21 | ## Use 22 | mafToFastaStitcher --maf alignment.maf --seqs seq.fa[,seq2.fa,...] --outMfa output.mfa --breakpointPenalty 10 [options] 23 | 24 | ### Options 25 | * -h, --help show this help message and exit. 26 | * --maf input alignment maf file. 27 | * --seqs comma separated list of fasta sequences. each fasta may contain multiple entries. all sequences in the input alignment must be accounted for with an element in a fasta. 28 | * --outMfa multiple sequence fasta output file. 29 | * --breakpointPenalty number of N characters to insert into a sequence when a breakpoint is detected. 30 | * --interstitialSequence maximum length of interstitial sequence to be added (from a fasta) into the fasta before a breakpoint is declared and the --breakpointPenalty number of N's is added instead. 31 | * --outMaf optional output to single block maf in addition to multiple sequence fasta output. 32 | 33 | ## Example 34 | $ mafToFastaStitcher --maf alignment.maf --seqs seq.fa,seq2.fa --breakpointPenalty 5 --outMfa output.mfa 35 | 36 | ## Detailed input and output example 37 | 38 | # Input maf 39 | ## maf version=1 40 | 41 | a score=0.0 status=test.input 42 | s ref.chr1 10 10 + 100 ACGTACGTAC 43 | s seq1.chr@ 0 10 + 100 AAAAAAAAAA 44 | s seq2.chr& 10 5 + 100 -----CCCCC 45 | s seq6.chr1 10 5 + 100 -----GGGGG 46 | s seq7.chr20 0 5 + 100 AAAAA----- 47 | 48 | a score=0.0 status=test.input 49 | s ref.chr1 20 10 + 100 GTACGTACGT 50 | s seq2.chr!! 5 5 + 100 CCCCC----- 51 | s seq3.chr0 20 5 + 100 -----GGGGG 52 | s seq6.chr1 22 5 + 100 GGGGG----- 53 | 54 | a score=0.0 status=test.input 55 | s ref.chr1 30 10 + 100 ACGTACGTAC 56 | s seq4.chr1 0 5 - 100 GG-----GGG 57 | s seq5.chr2 0 10 + 100 CCCCCCCCCC 58 | s seq7.chr20 42 5 + 100 -----AAAAA 59 | 60 | # Input sequences 61 | Here in a single file, but could be broken across multiple files 62 | > ref.chr1 63 | ggggggggggACGTACGTACGTACGTACGTACGTACGTACgg 64 | > seq1.chr@ 65 | AAAAAAAAAAgg 66 | > seq2.chr& 67 | aaaaaaaaaaCCCCCaa 68 | > seq2.chr!! 69 | aaaaaCCCCCaa 70 | > seq3.chr0 71 | aaaaaaaaaaaaaaaaaaaGGGGGaa 72 | seq4.chr1 73 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 74 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaCCCCC 75 | > seq6.chr1 76 | aaaaaaaaaGGGGGaaaaaaaGGGGGaa 77 | > seq7.chr20 78 | AAAAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAATT 79 | 80 | # Expected multiple sequence fasta output 81 | Note that when a sequence is only represented with a single chromosome, that chromosome will persist (as in ref.chr1) but when multiple chromosomes are present in the MAF that they are collapsed together. 82 | > ref.chr1 83 | ACGTACGTAC------------GTACGTACGT------------------ 84 | -------------------ACGTACGTAC 85 | > seq1 86 | AAAAAAAAAA-----------------AAAAA------------------ 87 | ----------------------------- 88 | > seq2 89 | -----CCCCCNNNNN-------CCCCC----------------------- 90 | ----------------------------- 91 | > seq6 92 | -----GGGGG-----aaaaaaaGGGGG----------------------- 93 | ----------------------------- 94 | > seq7 95 | AAAAA---------------------------gggggggggggggggggg 96 | ggggggggggggggggggg-----AAAAA 97 | > seq3 98 | ---------------------------GGGGG------------------ 99 | ----------------------------- 100 | > seq4 101 | -------------------------------------------------- 102 | -------------------GG-----GGG 103 | > seq5 104 | -------------------------------------------------- 105 | -------------------CCCCCCCCCC 106 | 107 | 108 | # optional maf output 109 | where --breakpointPenalty is 5 (as seen in seq2) and --interstitialSequence is *at least* 17, as seen in seq7 (the long string of g's is pulled in from the fasta). 110 | a score=0.0 status=test.expected 111 | s ref.chr1 10 30 + 100 ACGTACGTAC------------GTACGTACGT-------------------------------------ACGTACGTAC 112 | s seq1 0 15 + 15 AAAAAAAAAA-----------------AAAAA----------------------------------------------- 113 | s seq2 0 15 + 15 -----CCCCCNNNNN-------CCCCC---------------------------------------------------- 114 | s seq6 0 17 + 17 -----GGGGG-----aaaaaaaGGGGG---------------------------------------------------- 115 | s seq7 0 47 + 47 AAAAA---------------------------ggggggggggggggggggggggggggggggggggggg-----AAAAA 116 | s seq3 0 5 + 5 ---------------------------GGGGG----------------------------------------------- 117 | s seq4 0 5 - 5 ---------------------------------------------------------------------GG-----GGG 118 | s seq5 0 10 + 10 ---------------------------------------------------------------------CCCCCCCCCC 119 | 120 | -------------------------------------------------------------------------------- /mafToFastaStitcher/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include "CuTest.h" 29 | #include "sonLib.h" 30 | #include "mafToFastaStitcher.h" 31 | #include "mafToFastaStitcherAPI.h" 32 | #include "test.mafToFastaStitcherAPI.h" 33 | 34 | CuSuite* mafToFastaStitcher_TestSuite(void); 35 | 36 | int mtfs_RunAllTests(void) { 37 | CuString *output = CuStringNew(); 38 | CuSuite *suite = CuSuiteNew(); 39 | CuSuite *mtfs_s = mafToFastaStitcher_TestSuite(); 40 | CuSuiteAddSuite(suite, mtfs_s); 41 | CuSuiteRun(suite); 42 | CuSuiteSummary(suite, output); 43 | CuSuiteDetails(suite, output); 44 | printf("%s\n", output->buffer); 45 | CuStringDelete(output); 46 | int status = (suite->failCount > 0); 47 | free(mtfs_s); 48 | CuSuiteDelete(suite); 49 | return status; 50 | } 51 | int main(void) { 52 | return mtfs_RunAllTests(); 53 | } 54 | -------------------------------------------------------------------------------- /mafToFastaStitcher/src/mafToFastaStitcher.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | #include // mac os x toupper() 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "common.h" 33 | #include "CuTest.h" 34 | #include "sharedMaf.h" 35 | #include "sonLib.h" 36 | #include "mafToFastaStitcher.h" 37 | #include "mafToFastaStitcherAPI.h" 38 | #include "buildVersion.h" 39 | 40 | const char *g_version = "v0.1 Oct 2012"; 41 | 42 | void version(void); 43 | void usage(void); 44 | 45 | void parseOptions(int argc, char **argv, options_t *options) { 46 | int c; 47 | bool setMafName = false, setSeqNames = false, setOutName = false, 48 | setBreakpointPenalty = false, setInterstitialSequence = false; 49 | 50 | size_t i; 51 | while (1) { 52 | static struct option long_options[] = { 53 | {"debug", no_argument, 0, 'd'}, 54 | {"verbose", no_argument, 0, 'v'}, 55 | {"help", no_argument, 0, 'h'}, 56 | {"version", no_argument, 0, 0}, 57 | {"maf", required_argument, 0, 0}, 58 | {"seqs", required_argument, 0, 0}, 59 | {"outMfa", required_argument, 0, 0}, 60 | {"outMaf", required_argument, 0, 0}, 61 | {"breakpointPenalty", required_argument, 0, 0}, 62 | {"interstitialSequence", required_argument, 0, 0}, 63 | {"referenceSequence", required_argument, 0, 0}, 64 | {0, 0, 0, 0} 65 | }; 66 | int option_index = 0; 67 | c = getopt_long(argc, argv, "d:m:s:h:v:t", long_options, &option_index); 68 | if (c == -1) 69 | break; 70 | switch (c) { 71 | case 0: 72 | if (strcmp("version", long_options[option_index].name) == 0) { 73 | version(); 74 | exit(EXIT_SUCCESS); 75 | } 76 | if (strcmp("maf", long_options[option_index].name) == 0) { 77 | setMafName = true; 78 | options->maf = stString_copy(optarg); 79 | break; 80 | } 81 | if (strcmp("seqs", long_options[option_index].name) == 0) { 82 | setSeqNames = true; 83 | options->seqs = stString_copy(optarg); 84 | break; 85 | } 86 | if (strcmp("outMaf", long_options[option_index].name) == 0) { 87 | setOutName = true; 88 | options->outMaf = stString_copy(optarg); 89 | break; 90 | } 91 | if (strcmp("outMfa", long_options[option_index].name) == 0) { 92 | setOutName = true; 93 | options->outMfa = stString_copy(optarg); 94 | break; 95 | } 96 | if (strcmp("breakpointPenalty", long_options[option_index].name) == 0) { 97 | setBreakpointPenalty = true; 98 | i = sscanf(optarg, "%" PRIu64, &(options->breakpointPenalty)); 99 | assert(i == 1); 100 | break; 101 | } 102 | if (strcmp("interstitialSequence", long_options[option_index].name) == 0) { 103 | setInterstitialSequence = true; 104 | i = sscanf(optarg, "%" PRIu64, &(options->interstitialSequence)); 105 | assert(i == 1); 106 | break; 107 | } 108 | if (strcmp("referenceSequence", long_options[option_index].name) == 0) { 109 | options->reference = stString_copy(optarg); 110 | break; 111 | } 112 | break; 113 | case 'v': 114 | g_verbose_flag++; 115 | break; 116 | case 'd': 117 | g_debug_flag = 1; 118 | break; 119 | case 'h': 120 | case '?': 121 | usage(); 122 | break; 123 | default: 124 | abort(); 125 | } 126 | } 127 | if (!setMafName) { 128 | fprintf(stderr, "specify --maf\n"); 129 | usage(); 130 | } 131 | if (!setOutName) { 132 | fprintf(stderr, "specify --outMaf or --outMfa\n"); 133 | usage(); 134 | } 135 | if (!setSeqNames) { 136 | fprintf(stderr, "specify --seqs\n"); 137 | usage(); 138 | } 139 | if (!setBreakpointPenalty) { 140 | fprintf(stderr, "specify --breakpointPenalty\n"); 141 | usage(); 142 | } 143 | if (!setInterstitialSequence) { 144 | fprintf(stderr, "specify --interstitialSequence\n"); 145 | usage(); 146 | } 147 | // Check there's nothing left over on the command line 148 | if (optind < argc) { 149 | char *errorString = st_malloc(kMaxStringLength); 150 | strcpy(errorString, "Unexpected arguments:"); 151 | while (optind < argc) { 152 | strcat(errorString, " "); 153 | strcat(errorString, argv[optind++]); 154 | } 155 | fprintf(stderr, "%s\n", errorString); 156 | free(errorString); 157 | usage(); 158 | } 159 | } 160 | void version(void) { 161 | fprintf(stderr, "mafToFastaStitcher, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date, 162 | g_build_git_branch, g_build_git_sha); 163 | } 164 | void usage(void) { 165 | version(); 166 | fprintf(stderr, "Usage: mafToFastaStitcher --maf mafFile.maf --seqs seq1.fa,seq2.fa[,...] --breakpointPenalty 5 --interstitialSequence 20 --outMfa output.mfa \n\n" 167 | "\n\n"); 168 | fprintf(stderr, "Options: \n"); 169 | usageMessage('h', "help", "show this message and exit."); 170 | usageMessage('m', "maf", "path to the maf file."); 171 | usageMessage('\0', "seqs", "comma separated list of fasta sequences. each fasta may contain multiple entries. all sequences in the input alignment must be accounted for with an element in a fasta."); 172 | usageMessage('\0', "outMfa", "multiple sequence fasta output file."); 173 | usageMessage('\0', "breakpointPenalty", "number of `N' characters to insert into a sequence when a breakpoint is detected."); 174 | usageMessage('\0', "interstitialSequence", "maximum length of interstitial sequence to be added (from a fasta) into the fasta before a breakpoint is declared and the --breakpointPenalty number of N's is added instead."); 175 | usageMessage('\0', "outMaf", "multiple alignment format output file."); 176 | usageMessage('\0', "reference", "optional. The name of the reference sequence. All intervening reference sequence between the first and last block of the input --maf will be read out in the output."); 177 | usageMessage('v', "verbose", "turns on verbose output."); 178 | exit(EXIT_FAILURE); 179 | } 180 | int main(int argc, char **argv) { 181 | options_t *options = options_construct(); 182 | stHash *sequenceHash = NULL; // keyed on fasta headers, valued with mtfseq_t pointers 183 | stHash *alignmentHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyRow); // keyed on species names, valued with row_t pointers 184 | stList *rowOrder = stList_construct3(0, free); // when adding keys to alignmentHash, append to this list 185 | parseOptions(argc, argv, options); 186 | // read fastas, populate sequenceHash 187 | de_verbose("Creating sequence hash.\n"); 188 | sequenceHash = createSequenceHash(options->seqs); 189 | mafFileApi_t *mfapi = maf_newMfa(options->maf, "r"); 190 | de_verbose("Creating alignment hash.\n"); 191 | buildAlignmentHash(mfapi, alignmentHash, sequenceHash, rowOrder, options); 192 | if (options->outMfa != NULL) { 193 | // fasta output 194 | de_verbose("Writing fasta output.\n"); 195 | writeFastaOut(alignmentHash, rowOrder, options); 196 | } 197 | if (options->outMaf != NULL) { 198 | // maf output 199 | de_verbose("Writing maf output.\n"); 200 | writeMafOut(alignmentHash, rowOrder, options); 201 | } 202 | // cleanup 203 | maf_destroyMfa(mfapi); 204 | stHash_destruct(alignmentHash); 205 | stHash_destruct(sequenceHash); 206 | stList_destruct(rowOrder); 207 | destroyOptions(options); 208 | return(EXIT_SUCCESS); 209 | } 210 | -------------------------------------------------------------------------------- /mafToFastaStitcher/src/mafToFastaStitcher.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef MAFTOFASTASTITCHER_H_ 26 | #define MAFTOFASTASTITCHER_H_ 27 | #include 28 | #include "sonLib.h" 29 | #include "common.h" 30 | #include "CuTest.h" 31 | #include "sharedMaf.h" 32 | #include "mafToFastaStitcherAPI.h" 33 | 34 | void usage(void); 35 | void parseOptions(int argc, char **argv, options_t *options); 36 | 37 | #endif // MAFTOFASTASTITCHER_H_ 38 | -------------------------------------------------------------------------------- /mafToFastaStitcher/src/mafToFastaStitcherAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef MAFTOFASTASTITCHER_API_H_ 26 | #define MAFTOFASTASTITCHER_API_H_ 27 | #include 28 | #include "common.h" 29 | #include "CuTest.h" 30 | #include "sharedMaf.h" 31 | #include "sonLib.h" 32 | 33 | typedef struct _options { 34 | // used to hold all the command line options 35 | char *maf; 36 | char *seqs; 37 | char *outMfa; 38 | char *outMaf; 39 | char *reference; 40 | uint64_t breakpointPenalty; 41 | uint64_t interstitialSequence; 42 | } options_t; 43 | typedef struct _sequence { 44 | // used to store fasta sequence elements 45 | char *seq; // DNA sequence 46 | uint64_t index; // first empty position in *seq 47 | uint64_t memLength; // size of the *seq buffer 48 | } mtfseq_t; 49 | typedef struct _row { 50 | // used to store the ultimate output of the utility, 51 | // a single element of either a multiple fasta alignment (mfa) 52 | // or a single row in a multiple alignment format (maf) file. 53 | char *name; 54 | char *prevName; // 55 | char *sequence; 56 | bool multipleNames; // initalized false, if prevName is ever != name, then this should be set permanently true 57 | uint64_t start; 58 | uint64_t length; 59 | uint64_t prevRightPos; // rightmost position in the sequence, 0 based 60 | char strand; // `+' `-' or `*' when both strands have been observed (multipleNames should be set true) 61 | char prevStrand; // 62 | uint64_t sourceLength; 63 | uint64_t index; // first empty position in *sequence 64 | uint64_t memLength; //size of the *sequence buffer 65 | } row_t; 66 | 67 | options_t* options_construct(void); 68 | void destroyOptions(options_t *o); 69 | mtfseq_t* newMtfseq(uint64_t length); 70 | void resizeMtfseq(mtfseq_t *m); 71 | void resizeRowSequence(row_t *r); 72 | void destroyMtfseq(void *p); 73 | row_t* newRow(uint64_t length); 74 | void destroyRow(void *row); 75 | row_t* mafLineToRow(mafLine_t *ml); 76 | stHash* mafBlockToBlockHash(mafBlock_t *mb, stList *orderList); 77 | stHash* createSequenceHash(char *fastas); 78 | void seq_copyIn(mtfseq_t *mtfss, char *src); 79 | void row_copyIn(row_t *row, char *src); 80 | void addSequencesToHash(stHash *hash, char *filename); 81 | void reportSequenceHash(stHash *hash); 82 | void penalize(stHash *hash, char *name, uint64_t n); 83 | void extendSequence(row_t *r, uint64_t n); 84 | void interstitialInsert(stHash *alignHash, stHash *seqHash, char *name, uint64_t pos, char strand, uint64_t n); 85 | char* extractSubSequence(mtfseq_t *mtfs, char strand, uint64_t pos, uint64_t n); 86 | void addMafLineToRow(row_t *row, mafLine_t *ml); 87 | void addMafBlockToRowHash(stHash *alignHash, stHash *seqHash, stList *order, mafBlock_t *mb, options_t *options); 88 | void prependGaps(row_t *r, uint64_t n); 89 | void buildAlignmentHash(mafFileApi_t *mfapi, stHash *alignmentHash, stHash *sequenceHash, 90 | stList *rowOrder, options_t *options); 91 | void writeFastaOut(stHash *alignmentHash, stList *rowOrder, options_t *options); 92 | void writeMafOut(stHash *alignmentHash, stList *rowOrder, options_t *options); 93 | uint64_t nearestTwo(uint64_t n); 94 | #endif // MAFTOFASTASTITCHER_API_H_ 95 | -------------------------------------------------------------------------------- /mafToFastaStitcher/src/test.mafToFastaStitcherAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_MAFTOFASTASTITCHER_H_ 26 | #define TEST_MAFTOFASTASTITCHER_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "CuTest.h" 32 | #include "common.h" 33 | #include "sonLib.h" 34 | #include "mafToFastaStitcher.h" 35 | #include "mafToFastaStitcherAPI.h" 36 | 37 | CuSuite* mtfs_TestSuite(void); 38 | 39 | #endif // TEST_MAFTOFASTASTITCHER_API_H_ 40 | -------------------------------------------------------------------------------- /mafTransitiveClosure/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | SHELL:=/bin/bash 3 | bin = ../bin 4 | inc = ../inc 5 | lib = ../lib 6 | PROGS = mafTransitiveClosure 7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/stPinchesAndCacti.a ${sonLibPath}/sonLib.a src/allTests.c 8 | objects := ${lib}/common.o ${lib}/sharedMaf.o ${sonLibPath}/stPinchesAndCacti.a ${sonLibPath}/sonLib.a ../external/CuTest.a src/test.mafTransitiveClosure.o src/buildVersion.o 9 | testObjects := test/sharedMaf.o test/common.o ${sonLibPath}/stPinchesAndCacti.a ${sonLibPath}/sonLib.a ../external/CuTest.a src/test.mafTransitiveClosure.o test/buildVersion.o 10 | sources := src/mafTransitiveClosure.c src/mafTransitiveClosure.h 11 | 12 | .PHONY: all clean test buildVersion 13 | 14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f) 15 | buildVersion: src/buildVersion.c 16 | src/buildVersion.c: ${sources} ${dependencies} 17 | @python ${lib}/createVersionSources.py 18 | 19 | ../lib/%.o: ../lib/%.c ../inc/%.h 20 | cd ../lib/ && make 21 | 22 | ${bin}/mafTransitiveClosure: src/mafTransitiveClosure.c ${dependencies} ${objects} 23 | mkdir -p $(dir $@) 24 | ${cxx} $< src/allTests.c ${objects} -o $@.tmp ${cflags} -lm 25 | mv $@.tmp $@ 26 | 27 | test/mafTransitiveClosure: src/mafTransitiveClosure.c ${dependencies} ${testObjects} 28 | mkdir -p $(dir $@) 29 | ${cxx} $< src/allTests.c ${testObjects} -o $@.tmp ${testFlags} -lm 30 | mv $@.tmp $@ 31 | %.o: %.c ${inc}/%.h 32 | ${cxx} -c $< -o $@.tmp ${cflags} 33 | mv $@.tmp $@ 34 | %.o: %.c %.h 35 | ${cxx} -c $< -o $@.tmp ${cflags} 36 | mv $@.tmp $@ 37 | test/%.o: ${lib}/%.c ${inc}/%.h 38 | mkdir -p $(dir $@) 39 | ${cxx} -c $< -o $@.tmp ${testFlags} 40 | mv $@.tmp $@ 41 | test/%.o: src/%.c src/%.h 42 | mkdir -p $(dir $@) 43 | ${cxx} -c $< -o $@.tmp ${testFlags} 44 | mv $@.tmp $@ 45 | test/allTests: src/allTests.c ${testObjects} ${sonLibPath}/sonLib.a 46 | mkdir -p $(dir $@) 47 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm} 48 | mv $@.tmp $@ 49 | 50 | clean: 51 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h 52 | 53 | test: buildVersion test/mafTransitiveClosure 54 | test/mafTransitiveClosure --test && python2.7 src/test.mafTransitiveClosure.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir 55 | 56 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h 57 | ${cxx} -c $< ${cflags} 58 | ar rc CuTest.a CuTest.o 59 | ranlib CuTest.a 60 | rm -f CuTest.o 61 | mv CuTest.a $@ 62 | -------------------------------------------------------------------------------- /mafTransitiveClosure/README.md: -------------------------------------------------------------------------------- 1 | # mafTransitiveClosure 2 | 24 May 2012 3 | 4 | ## Author 5 | [Dent Earl](https://github.com/dentearl/) 6 | 7 | ## Description 8 | A program to perform the transitive closure on an alignment. That is it checks every column of the alignment and looks for situations where a position A is aligned to B in one part of a file and B is aligned to C in another part of the file. The transitive closure of this relationship would be a single column with A, B and C all present. Useful for when you have pairwise alignments and you wish to turn them into something more resembling a multiple alignment. 9 | 10 | ## Dependencies 11 | * sonLib https://github.com/benedictpaten/sonLib/ 12 | * pinchesAndCacti https://github.com/benedictpaten/pinchesAndCacti 13 | 14 | ## Installation 15 | 1. Download the package. 16 | 2. cd into the directory. 17 | 3. Type make. 18 | 19 | ## Use 20 | mafTransitiveClosure --maf mafFile.maf > transitivelyClosed.maf 21 | 22 | ### Options 23 | * -h, --help show this help message and exit. 24 | * -m, --maf path to maf file. 25 | * -v, --verbose turns on verbose output. 26 | -------------------------------------------------------------------------------- /mafTransitiveClosure/src/allTests.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #include 26 | #include 27 | #include "CuTest.h" 28 | #include "mafTransitiveClosure.h" 29 | #include "test.mafTransitiveClosure.h" 30 | 31 | int mafTransitiveClosure_RunAllTests(void) { 32 | CuString *output = CuStringNew(); 33 | CuSuite *suite = CuSuiteNew(); 34 | CuSuite *maf_s = mafTransitiveClosure_TestSuite(); 35 | CuSuiteAddSuite(suite, maf_s); 36 | printf("\n"); 37 | CuSuiteRun(suite); 38 | CuSuiteSummary(suite, output); 39 | CuSuiteDetails(suite, output); 40 | printf("%s\n", output->buffer); 41 | CuStringDelete(output); 42 | int status = (suite->failCount > 0); 43 | free(maf_s); 44 | CuSuiteDelete(suite); 45 | return status; 46 | } 47 | -------------------------------------------------------------------------------- /mafTransitiveClosure/src/mafTransitiveClosure.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef MAFTRANSITIVECLOSURE_H_ 26 | #define MAFTRANSITIVECLOSURE_H_ 27 | #include "sonLib.h" 28 | #include "stPinchGraphs.h" 29 | #include "common.h" 30 | #include "CuTest.h" 31 | #include "sharedMaf.h" 32 | 33 | typedef struct mafTcSeq { 34 | // maf tc (trasitive closure) sequence 35 | char *name; 36 | char *sequence; 37 | uint64_t length; 38 | } mafTcSeq_t; 39 | typedef struct mafTcRegion { 40 | // region or interval 41 | uint64_t start; 42 | uint64_t end; 43 | struct mafTcRegion *next; 44 | } mafTcRegion_t; 45 | typedef struct mafTcComparisonOrder { 46 | // This struct is used in adding alignment information into the thread set. 47 | /* for this sequence matrix: 48 | 0123456789 49 | 0 AC---ACG-G 50 | 1 ACG--ACGGC 51 | 2 A-G-TACGGC 52 | 3 ACGTTACGGC 53 | a comparison order is {3: [3, 3]}, {2: [4, 4]}, 54 | {1: [8, 8]}, {1: [2, 2]}, {0: [9, 9]}, {0: [5, 7]}, {0: [0, 1]} 55 | [note that the ordering of these values is arbitrary, though consistent with the 56 | output of the algo in the code which appends new structs to the head of the list] 57 | e.g. the first block to process uses 0 as its reference and it starts at column 1 58 | and ends at column 1. The second block to process uses 1 as its reference and it 59 | starts at column 2 and ends at column 2 (it is only one column), etc. 60 | */ 61 | uint64_t ref; // 62 | mafTcRegion_t *region; 63 | struct mafTcComparisonOrder *next; 64 | } mafTcComparisonOrder_t; 65 | typedef struct mafCoordinatePair { 66 | /* this struct is used to store pairs of coordinates 67 | */ 68 | int64_t a; 69 | int64_t b; 70 | } mafCoordinatePair_t; 71 | typedef struct mafBlockSort { 72 | /* this struct is used to sort a sequence matrix by the number of gaps in each row 73 | */ 74 | int64_t value; // value to sort upon 75 | mafLine_t *ml; 76 | } mafBlockSort_t; 77 | 78 | void usage(void); 79 | mafTcSeq_t* newMafTcSeq(char *name, uint64_t length); 80 | mafTcComparisonOrder_t* newMafTcComparisonOrder(void); 81 | mafTcRegion_t* newMafTcRegion(uint64_t start, uint64_t end); 82 | mafCoordinatePair_t* newCoordinatePairArray(uint64_t numSeqs, char **seqs); 83 | void destroyMafTcSeq(void *p); 84 | void destroyMafTcRegionList(mafTcRegion_t *r); 85 | void destroyMafTcRegion(mafTcRegion_t *r); 86 | void destroyMafTcComparisonOrder(mafTcComparisonOrder_t *c); 87 | void destroyCoordinatePairArray(mafCoordinatePair_t *cp); 88 | uint64_t hashMafTcSeq(const mafTcSeq_t *mtcs); 89 | int hashCompareMafTcSeq(const mafTcSeq_t *m1, const mafTcSeq_t *m2); 90 | char* createNSequence(uint64_t length); 91 | void addSequenceValuesToMtcSeq(mafLine_t *ml, mafTcSeq_t *mtcs); 92 | void parseOptions(int argc, char **argv, char *filename); 93 | stPinchThreadSet* buildThreadSet(stHash *hash); 94 | void walkBlockAddingAlignments(mafBlock_t *mb, stPinchThreadSet *threadSet); 95 | void addAlignmentsToThreadSet(mafFileApi_t *mfa, stPinchThreadSet *threadSet); 96 | void createSequenceHash(mafFileApi_t *mfa, stHash **hash, stHash **nameHash); 97 | mafTcRegion_t* getComparisonOrderFromRow(char **mat, uint64_t row, mafTcComparisonOrder_t **done, 98 | mafTcRegion_t *todo, int containsGaps); 99 | mafTcComparisonOrder_t *getComparisonOrderFromMatrix(char **mat, uint64_t rowLength, uint64_t colLength, 100 | uint64_t *lengths, int **vizMat); 101 | void processPairForPinching(stPinchThreadSet *threadSet, stPinchThread *a, uint64_t aGlobalStart, 102 | uint64_t aGlobalLength, int aStrand, 103 | char *aSeq, stPinchThread *b, uint64_t bGlobalStart, uint64_t bGlobalLength, 104 | int bStrand, char *bSeq, uint64_t regionStart, uint64_t regionEnd, 105 | mafCoordinatePair_t aBookmark, mafCoordinatePair_t bBookmark, 106 | int aContainsGaps, int bContainsGaps, 107 | void (*pinchFunction)(stPinchThread *, stPinchThread *, int64_t, int64_t, int64_t, bool)); 108 | int64_t localSeqCoords(uint64_t p, char *s, mafCoordinatePair_t *bookmark, int containsGaps); 109 | int64_t localSeqCoordsToGlobalPositiveCoords(int64_t c, uint64_t start, uint64_t sourceLength, char strand); 110 | int64_t localSeqCoordsToGlobalPositiveStartCoords(int64_t c, uint64_t start, uint64_t sourceLength, 111 | char strand, uint64_t length); 112 | void mafBlock_sortBlockByIncreasingGap(mafBlock_t *mb); 113 | void walkBlockAddingSequence(mafBlock_t *mb, stHash *hash, stHash *nameHash); 114 | void reportSequenceHash(stHash *hash, stHash *nameHash); 115 | void destroyVizMatrix(int **mat, unsigned n); 116 | int cmp_by_gaps(const void *a, const void *b); 117 | uint64_t getMaxNameLength(stHash *hash); 118 | void getMaxFieldLengths(stHash *hash, stHash *nameHash, stPinchBlock *block, uint64_t *maxStart, 119 | uint64_t *maxLength, uint64_t *maxSource); 120 | char* getSequenceSubset(char *seq, int64_t start, char strand, int64_t length); 121 | void reportTransitiveClosure(stPinchThreadSet *threadSet, stHash *hash, stHash *nameHash); 122 | // debugging tools 123 | int** getVizMatrix(mafBlock_t *mb, unsigned n, unsigned m); 124 | void updateVizMatrix(int **mat, mafTcComparisonOrder_t *co); 125 | void printVizMatrix(int **mat, uint64_t n, uint64_t m); 126 | void printTodoArray(mafTcRegion_t *reg, unsigned max); 127 | // test suite 128 | CuSuite* mafTransitiveClosure_TestSuite(void); 129 | int mafTransitiveClosure_RunAllTests(void); 130 | 131 | #endif // MAFTRANSITIVECLOSURE_H_ 132 | -------------------------------------------------------------------------------- /mafTransitiveClosure/src/test.mafTransitiveClosure.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 by 3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com) 4 | * ... and other members of the Reconstruction Team of David Haussler's 5 | * lab (BME Dept. UCSC). 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef TEST_MAFTRANSITIVECLOSURE_H_ 26 | #define TEST_MAFTRANSITIVECLOSURE_H_ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "CuTest.h" 32 | #include "common.h" 33 | #include "sonLib.h" 34 | #include "stPinchGraphs.h" 35 | #include "mafTransitiveClosure.h" 36 | 37 | void printRegionList(mafTcRegion_t *reg, FILE *ofp); 38 | bool regionListsAreEqual(mafTcRegion_t *expected, mafTcRegion_t *obs, bool verbose); 39 | void printTestComparisonOrder(mafTcComparisonOrder_t *co); 40 | bool comparisonOrdersAreEqual(mafTcComparisonOrder_t *eo, mafTcComparisonOrder_t *oo, bool verbose); 41 | bool mafBlocksAreEqual(mafBlock_t *input, mafBlock_t *expected, bool verbose); 42 | void test_reverseComplement(CuTest *testCase); 43 | void test_rowAlignmentBlockComparisonOrdering_0(CuTest *testCase); 44 | void test_rowAlignmentBlockComparisonOrdering_1(CuTest *testCase); 45 | void test_rowAlignmentBlockComparisonOrdering_2(CuTest *testCase); 46 | void test_rowAlignmentBlockComparisonOrdering_3(CuTest *testCase); 47 | void test_matrixAlignmentBlockComparisonOrdering_0(CuTest *testCase); 48 | void test_matrixAlignmentBlockComparisonOrdering_1(CuTest *testCase); 49 | void test_matrixAlignmentBlockComparisonOrdering_2(CuTest *testCase); 50 | void test_matrixAlignmentBlockComparisonOrdering_3(CuTest *testCase); 51 | void test_matrixAlignmentBlockComparisonOrdering_4(CuTest *testCase); 52 | void test_addSequenceValuesToMtcSeq_0(CuTest *testCase); 53 | void test_localSeqCoords_0(CuTest *testCase); 54 | void test_localSeqCoordsToGlobalPositiveCoords_0(CuTest *testCase); 55 | void test_localSeqCoordsToGlobalPositiveStartCoords_0(CuTest *testCase); 56 | void test_coordinateTransforms_0(CuTest *testCase); 57 | void test_coordinateTransforms_1(CuTest *testCase); 58 | void test_mafBlockGapSorting_0(CuTest *testCase); 59 | CuSuite* mafTransitiveClosure_TestSuite(void); 60 | 61 | #endif // TEST_MAFTRANSITIVECLOSURE_H_ 62 | -------------------------------------------------------------------------------- /mafValidator/Makefile: -------------------------------------------------------------------------------- 1 | include ../inc/common.mk 2 | binPath = ../bin 3 | 4 | progs = $(foreach f,mafValidator.py, ${binPath}/$f) 5 | 6 | .PHONY: all clean test 7 | 8 | all: ${progs} 9 | 10 | ${binPath}/%.py : src/%.py 11 | @mkdir -p $(dir $@) 12 | cp $< $@.tmp 13 | chmod +x $@.tmp 14 | mv $@.tmp $@ 15 | 16 | test : 17 | python src/test.mafValidator.py -v && rmdir tempTestDir 18 | 19 | clean : 20 | rm -f ${progs} 21 | -------------------------------------------------------------------------------- /mafValidator/README.md: -------------------------------------------------------------------------------- 1 | # mafValidator 2 | 3 | 10 October 2011 4 | 5 | ## Authors 6 | 7 | [Dent Earl](https://github.com/dentearl/) 8 | 9 | ## Description 10 | mafValidator is a script to validate the formatting and basic data contained in a maf file. 11 | 12 | ## Dependencies 13 | * Python 2.6 ≤ version < 3.0 14 | 15 | ## Installation 16 | 1. Download the package. 17 | 2. cd into the directory. 18 | 3. Type make. 19 | 20 | ## Use 21 | mafValidator.py --maf=FILE [options] 22 | 23 | ### Options 24 | * mafValidator.py 25 | * --help : show this help message and exit 26 | * --maf : path to the maf file to test 27 | * --testChromNames : Expects that the source field will be formatted with .chrN e.g. "hg19.chr1" default=False 28 | * --ignoreDuplicateColumns : Turn off the checks for duplicate columns, may be useful for pairwise-only alignments. default=duplicate checking is on. 29 | * --validateSequence Turn on checks to make sure all sequence fields are consistent. Slows things down considerably. 30 | 31 | ## Test 32 | make test 33 | --------------------------------------------------------------------------------