├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── external
├── CuTest.c
├── CuTest.h
└── license.txt
├── inc
├── common.h
├── common.mk
├── sharedMaf.h
├── test.common.h
└── test.sharedMaf.h
├── lib
├── Makefile
├── allTests.c
├── common.c
├── createVersionSources.py
├── mafToolsTest.py
├── sharedMaf.c
├── test.sharedMaf.c
└── test.sharedMaf.py
├── mafComparator
├── .gitignore
├── Makefile
├── README.md
├── example
│ ├── a.maf
│ └── b.maf
└── src
│ ├── __init__.py
│ ├── allTests.c
│ ├── cString.c
│ ├── cString.h
│ ├── comparatorAPI.c
│ ├── comparatorAPI.h
│ ├── comparatorRandom.c
│ ├── comparatorRandom.h
│ ├── mafComparator.c
│ ├── mafPairCounter.c
│ ├── test.comparatorAPI.c
│ ├── test.comparatorAPI.h
│ ├── test.comparatorRandom.c
│ ├── test.comparatorRandom.h
│ ├── test.mafComparator.py
│ └── testRand.c
├── mafCoverage
├── Makefile
├── README.md
└── src
│ ├── allTests.c
│ ├── mafCoverage.c
│ ├── mafCoverage.h
│ ├── mafCoverageAPI.c
│ ├── mafCoverageAPI.h
│ ├── test.mafCoverage.py
│ ├── test.mafCoverageAPI.c
│ └── test.mafCoverageAPI.h
├── mafDuplicateFilter
├── Makefile
├── README.md
└── src
│ ├── mafDuplicateFilter.c
│ └── test.mafDuplicateFilter.py
├── mafExtractor
├── Makefile
├── README.md
└── src
│ ├── allTests.c
│ ├── mafExtractor.c
│ ├── mafExtractor.h
│ ├── mafExtractorAPI.c
│ ├── mafExtractorAPI.h
│ ├── test.mafExtractor.c
│ ├── test.mafExtractor.h
│ └── test.mafExtractor.py
├── mafFilter
├── Makefile
├── README.md
└── src
│ ├── mafFilter.c
│ └── test.mafFilter.py
├── mafPairCoverage
├── Makefile
├── README.md
└── src
│ ├── allTests.c
│ ├── mafPairCoverage.c
│ ├── mafPairCoverage.h
│ ├── mafPairCoverageAPI.c
│ ├── mafPairCoverageAPI.h
│ ├── test.mafPairCoverage.py
│ ├── test.mafPairCoverageAPI.c
│ └── test.mafPairCoverageAPI.h
├── mafPositionFinder
├── Makefile
├── README.md
└── src
│ ├── mafPositionFinder.c
│ └── test.mafPositionFinder.py
├── mafRowOrderer
├── Makefile
├── README.md
└── src
│ ├── mafRowOrderer.c
│ └── test.mafRowOrderer.py
├── mafSorter
├── Makefile
├── README.md
└── src
│ ├── mafSorter.c
│ └── test.mafSorter.py
├── mafStats
├── Makefile
├── README.md
└── src
│ ├── allTests.c
│ ├── mafStats.c
│ ├── mafStats.h
│ ├── test.mafStats.c
│ └── test.mafStats.h
├── mafStrander
├── Makefile
├── README.md
└── src
│ ├── mafStrander.c
│ └── test.mafStrander.py
├── mafToFastaStitcher
├── Makefile
├── README.md
└── src
│ ├── allTests.c
│ ├── mafToFastaStitcher.c
│ ├── mafToFastaStitcher.h
│ ├── mafToFastaStitcherAPI.c
│ ├── mafToFastaStitcherAPI.h
│ ├── test.mafToFastaStitcher.py
│ ├── test.mafToFastaStitcherAPI.c
│ └── test.mafToFastaStitcherAPI.h
├── mafTransitiveClosure
├── Makefile
├── README.md
└── src
│ ├── allTests.c
│ ├── mafTransitiveClosure.c
│ ├── mafTransitiveClosure.h
│ ├── test.mafTransitiveClosure.c
│ ├── test.mafTransitiveClosure.h
│ └── test.mafTransitiveClosure.py
└── mafValidator
├── Makefile
├── README.md
└── src
├── mafValidator.py
└── test.mafValidator.py
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | *.o
3 | *.pyc
4 | *.a
5 | buildVersion*
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) 2009-2014 by
2 | Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
3 | Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
4 | Mark Diekhans (markd@soe.ucsc.edu)
5 | ... and other members of the Reconstruction Team of David Haussler's
6 | lab (BME Dept. UCSC).
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining a copy
9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 |
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | include inc/common.mk
2 |
3 | ##############################
4 | # These modules are dependent and are
5 | # only included if their depedencies exist!
6 | ifeq ($(wildcard ${sonLibPath}/../Makefile),)
7 | Comparator =
8 | TransitiveClosure =
9 | Stats =
10 | ToFasta =
11 | PairCoverage =
12 | Coverage =
13 | $(warning Because dependency ${sonLibPath} is missing mafComparator, mafTransitiveClosure, mafStats, mafToFastaStitcher, mafPairCoverage, mafCoverage will not be built / tested / cleaned. See README.md for information about dependencies.)
14 | else
15 | Comparator = mafComparator
16 | Stats = mafStats
17 | ToFasta = mafToFastaStitcher
18 | PairCoverage = mafPairCoverage
19 | Coverage = mafCoverage
20 | ifeq ($(wildcard ${sonLibPath}/stPinchesAndCacti.a),)
21 | TransitiveClosure =
22 | $(warning Because dependency ${sonLibPath}/pinchesAndCacti is missing mafTransitiveClosure will not be built / tested / cleaned. See README.md for information about dependencies.)
23 | else
24 | TransitiveClosure = mafTransitiveClosure
25 | endif # sonlib
26 | endif # pinches
27 | ##############################
28 | dependentModules= ${Comparator} ${TransitiveClosure} ${Stats} ${ToFasta} ${PairCoverage} ${Coverage}
29 |
30 | modules = lib ${dependentModules} mafValidator mafPositionFinder mafExtractor mafSorter mafDuplicateFilter mafFilter mafStrander mafRowOrderer
31 |
32 | .PHONY: all %.all clean %.clean test %.test
33 | .SECONDARY:
34 |
35 | all: ${modules:%=%.all}
36 |
37 | %.all:
38 | cd $* && make all
39 |
40 | clean: ${modules:%=%.clean}
41 |
42 | %.clean:
43 | cd $* && make clean
44 |
45 | test: ${modules:%=%.test} ${Warnings:%=%.warn}
46 | @echo 'mafTools tests complete.'
47 |
48 | %.test:
49 | cd $* && make test
50 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mafTools
2 |
3 | **mafTools** is a collection of tools that operate on Multiple Alignment Format ([maf](http://genome.ucsc.edu/FAQ/FAQformat.html#format5)) files.
4 |
5 | ## Authors
6 | [Dent Earl](https://github.com/dentearl/), [Benedict Paten](https://github.com/benedictpaten/), [Mark Diekhans](https://github.com/diekhans)
7 |
8 | ## Dependencies
9 | With the exception of the python dependencies, when a component is missing a dependency it will not be built, tested or cleaned by the Makefile. If the python dependencies are missing then some of the modules will fail to function and all of the modules' tests will fail. The sonLib
and pinchesAndCacti
dependencies should be built and placed in the same parent directory as mafTools
.
10 | * [python 2.7](http://www.python.org/): all modules.
11 | * [scipy](http://www.scipy.org/)
12 | * [numpy](http://numpy.scipy.org/)
13 | * [sonLib](https://github.com/benedictpaten/sonLib/): mafComparator, mafStats, mafTransitiveClosure, mafToFastaStitcher, mafPairCoverage.
14 | * [pinchesAndCacti](https://github.com/benedictpaten/pinchesAndCacti): mafTransitiveClosure.
15 |
16 | ## Installation
17 | 0. Install dependencies.
18 | 1. Download or clone the mafTools
package. Consider making it a sibling directory to sonLib/
and pinchesAndCacti
.
19 | 2. cd
into mafTools
directory.
20 | 3. Type make
.
21 |
22 | ## Components
23 | * **mafComparator** A program to compare two maf files by sampling. Useful when testing predicted alignments against known true alignments.
24 | * **mafCoverage** A program to calculate the amount of alignment coverage between a target sequence and all other sequences in a maf file.
25 | * **mafDuplicateFilter** A program to filter alignment blocks to remove duplicate species. One sequence per species is allowed to remain, chosen by comparing the sequence to the consensus for the block and computing a similarity bit score between the IUPAC formatted consensus and the sequence. The highest scoring duplicate stays, or in the case of ties, the sequence closest to the start of the file stays.
26 | * **mafExtractor** A program to extract all alignment blocks that contain a region in a particular sequence. Useful for isolating regions of interest in large maf files.
27 | * **mafFilter** A program to filter a maf based on sequence names. Can be used to include or exclude sequence names. Useful for removing extraneous sequences from maf files.
28 | * **mafPairCoverage** A program to compare the number of aligned positions between any pair of sequences within a maf file. Can use the * wildcard character to specify a species name. Can use a BED file to limit region of inspection to just intervals specified in the bed. Outputs total lengths of sequencs, number of aligned positions, percent coverage and in the case where a bed file was specified the number of bases within and outside of the region.
29 | * **mafPositionFinder** A program to search for a position in a particular sequence. Useful for determining where in maf a particular part of the alignment resides.
30 | * **mafRowOrderer** A program to order maf lines within blocks. Useful for moving a reference species to the top of all blocks. Species not specified in the ordering are automatically trimmed from the results.
31 | * **mafSorter** A program to sort all of the blocks in a MAF based on the (absolute) start position of one of the sequences. Blocks without the sequence are placed at the start of the output in their original order.
32 | * **mafStats** A program to read a maf file and report back summary statistics about the file contents.
33 | * **mafStrander** A program to enforce, when possible, a particular strandedness for blocks for a given species and strand orientation.
34 | * **mafToFastaStitcher** A program to convert a reference-based MAF file to a multiple sequence fasta. Requires both a .maf and a fasta containing complete sequences for all entries in the maf.
35 | * **mafTransitiveClosure** A program to perform the transitive closure on an alignment. That is it checks every column of the alignment and looks for situations where a position A is aligned to B in one part of a file and B is aligned to C in another part of the file. The transitive closure of this relationship would be a single column with A, B and C all present. Useful for when you have pairwise alignments and you wish to turn them into something more resembling a multiple alignment.
36 | * **mafValidator** A program to assess whether or not a given maf file's formatting is valid.
37 |
38 | ## External tools
39 | * mafTools internal tests use Asim Jalis' [CuTest](http://cutest.sourceforge.net/) C unit testing framework (included in external/
). The license for CuTest is spelled out in external/license.txt.
40 | * mafTools internal tests will use [valgrind](http://www.valgrind.org/) __if__ installed on your system.
41 |
42 | ## How to Cite:
43 | Genome Res. 2014 Dec;24(12):2077-89. doi: 10.1101/gr.174920.114. Epub 2014 Oct 1.
44 | Alignathon: a competitive assessment of whole-genome alignment methods.
45 |
--------------------------------------------------------------------------------
/external/CuTest.h:
--------------------------------------------------------------------------------
1 | #ifndef CU_TEST_H
2 | #define CU_TEST_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #define CUTEST_VERSION "CuTest 1.5"
9 |
10 | /* CuString */
11 |
12 | char* CuStrAlloc(int size);
13 | char* CuStrCopy(const char* old);
14 |
15 | #define CU_ALLOC(TYPE) ((TYPE*) malloc(sizeof(TYPE)))
16 |
17 | #define HUGE_STRING_LEN 8192
18 | #define STRING_MAX 256
19 | #define STRING_INC 256
20 |
21 | typedef struct
22 | {
23 | int length;
24 | int size;
25 | char* buffer;
26 | } CuString;
27 |
28 | void CuStringInit(CuString* str);
29 | CuString* CuStringNew(void);
30 | void CuStringRead(CuString* str, const char* path);
31 | void CuStringAppend(CuString* str, const char* text);
32 | void CuStringAppendChar(CuString* str, char ch);
33 | void CuStringAppendFormat(CuString* str, const char* format, ...);
34 | void CuStringInsert(CuString* str, const char* text, int pos);
35 | void CuStringResize(CuString* str, int newSize);
36 | void CuStringDelete(CuString* str);
37 |
38 | /* CuTest */
39 |
40 | typedef struct CuTest CuTest;
41 |
42 | typedef void (*TestFunction)(CuTest *);
43 |
44 | struct CuTest
45 | {
46 | char* name;
47 | TestFunction function;
48 | int failed;
49 | int ran;
50 | const char* message;
51 | jmp_buf *jumpBuf;
52 | };
53 |
54 | void CuTestInit(CuTest* t, const char* name, TestFunction function);
55 | CuTest* CuTestNew(const char* name, TestFunction function);
56 | void CuTestRun(CuTest* tc);
57 | void CuTestDelete(CuTest *t);
58 |
59 | /* Internal versions of assert functions -- use the public versions */
60 | void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message);
61 | void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition);
62 | void CuAssertStrEquals_LineMsg(CuTest* tc,
63 | const char* file, int line, const char* message,
64 | const char* expected, const char* actual);
65 | void CuAssertIntEquals_LineMsg(CuTest* tc,
66 | const char* file, int line, const char* message,
67 | int expected, int actual);
68 | void CuAssertDblEquals_LineMsg(CuTest* tc,
69 | const char* file, int line, const char* message,
70 | double expected, double actual, double delta);
71 | void CuAssertPtrEquals_LineMsg(CuTest* tc,
72 | const char* file, int line, const char* message,
73 | void* expected, void* actual);
74 | //////////////////////////////////////////////////
75 | // added by dent earl, dent.earl (a) gmail com
76 | void CuAssertUInt32Equals_LineMsg(CuTest* tc,
77 | const char* file, int line, const char* message,
78 | uint64_t expected, uint64_t actual);
79 | //////////////////////////////////////////////////
80 |
81 | /* public assert functions */
82 |
83 | #define CuFail(tc, ms) CuFail_Line( (tc), __FILE__, __LINE__, NULL, (ms))
84 | #define CuAssert(tc, ms, cond) CuAssert_Line((tc), __FILE__, __LINE__, (ms), (cond))
85 | #define CuAssertTrue(tc, cond) CuAssert_Line((tc), __FILE__, __LINE__, "assert failed", (cond))
86 |
87 | #define CuAssertStrEquals(tc,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac))
88 | #define CuAssertStrEquals_Msg(tc,ms,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac))
89 | #define CuAssertIntEquals(tc,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac))
90 | #define CuAssertIntEquals_Msg(tc,ms,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac))
91 | #define CuAssertDblEquals(tc,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac),(dl))
92 | #define CuAssertDblEquals_Msg(tc,ms,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac),(dl))
93 | #define CuAssertPtrEquals(tc,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac))
94 | #define CuAssertPtrEquals_Msg(tc,ms,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac))
95 | //////////////////////////////////////////////////
96 | // added by dent earl, dent.earl (a) gmail com
97 | #define CuAssertUInt32Equals(tc,ex,ac) CuAssertUInt32Equals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac))
98 | #define CuAssertUInt32Equals_Msg(tc,ex,ac) CuAssertUInt32Equals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac))
99 | //////////////////////////////////////////////////
100 |
101 | #define CuAssertPtrNotNull(tc,p) CuAssert_Line((tc),__FILE__,__LINE__,"null pointer unexpected",(p != NULL))
102 | #define CuAssertPtrNotNullMsg(tc,msg,p) CuAssert_Line((tc),__FILE__,__LINE__,(msg),(p != NULL))
103 |
104 | /* CuSuite */
105 |
106 | #define MAX_TEST_CASES 1024
107 |
108 | #define SUITE_ADD_TEST(SUITE,TEST) CuSuiteAdd(SUITE, CuTestNew(#TEST, TEST))
109 |
110 | typedef struct
111 | {
112 | int count;
113 | CuTest* list[MAX_TEST_CASES];
114 | int failCount;
115 |
116 | } CuSuite;
117 |
118 |
119 | void CuSuiteInit(CuSuite* testSuite);
120 | CuSuite* CuSuiteNew(void);
121 | void CuSuiteDelete(CuSuite *testSuite);
122 | void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase);
123 | void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2);
124 | void CuSuiteRun(CuSuite* testSuite);
125 | void CuSuiteSummary(CuSuite* testSuite, CuString* summary);
126 | void CuSuiteDetails(CuSuite* testSuite, CuString* details);
127 |
128 | #endif /* CU_TEST_H */
129 |
--------------------------------------------------------------------------------
/external/license.txt:
--------------------------------------------------------------------------------
1 | NOTE
2 |
3 | The license is based on the zlib/libpng license. For more details see
4 | http://www.opensource.org/licenses/zlib-license.html. The intent of the
5 | license is to:
6 |
7 | - keep the license as simple as possible
8 | - encourage the use of CuTest in both free and commercial applications
9 | and libraries
10 | - keep the source code together
11 | - give credit to the CuTest contributors for their work
12 |
13 | If you ship CuTest in source form with your source distribution, the
14 | following license document must be included with it in unaltered form.
15 | If you find CuTest useful we would like to hear about it.
16 |
17 | LICENSE
18 |
19 | Copyright (c) 2003 Asim Jalis
20 |
21 | This software is provided 'as-is', without any express or implied
22 | warranty. In no event will the authors be held liable for any damages
23 | arising from the use of this software.
24 |
25 | Permission is granted to anyone to use this software for any purpose,
26 | including commercial applications, and to alter it and redistribute it
27 | freely, subject to the following restrictions:
28 |
29 | 1. The origin of this software must not be misrepresented; you must not
30 | claim that you wrote the original software. If you use this software in
31 | a product, an acknowledgment in the product documentation would be
32 | appreciated but is not required.
33 |
34 | 2. Altered source versions must be plainly marked as such, and must not
35 | be misrepresented as being the original software.
36 |
37 | 3. This notice may not be removed or altered from any source
38 | distribution.
39 |
--------------------------------------------------------------------------------
/inc/common.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef COMMON_H_
26 | #define COMMON_H_
27 | #include
28 | #include
29 |
30 | extern int g_verbose_flag;
31 | extern int g_debug_flag;
32 | extern const int kMaxStringLength;
33 | extern const int kMaxMessageLength;
34 | extern const int kMaxSeqName;
35 |
36 | void de_verbose(char const *fmt, ...);
37 | void de_debug(char const *fmt, ...);
38 | void* de_malloc(size_t n);
39 | int64_t de_getline(char **s, int64_t *n, FILE *f);
40 | FILE* de_fopen(const char *s, char const *mode);
41 | char* de_strdup(const char *s);
42 | char* de_strndup(const char *s, size_t n);
43 | void failBadFormat(void);
44 | void usageMessage(char shortopt, const char *name, const char *description);
45 | char* stringReplace(const char *string, const char a, const char b);
46 | int minint(int a, int b);
47 | char* de_strtok(char **s, char t);
48 | unsigned countChar(char *s, const char c);
49 | char** extractSubStrings(char *nameList, unsigned n, const char delineator);
50 |
51 | #endif // COMMON_H_
52 |
--------------------------------------------------------------------------------
/inc/common.mk:
--------------------------------------------------------------------------------
1 | # we do specific stuff for specific host for now.
2 | HOSTNAME = $(shell hostname)
3 | MACH = $(shell uname -m)
4 | SYS = $(shell uname -s)
5 |
6 | #C compiler
7 | ifeq (${SYS},FreeBSD)
8 | # default FreeBSD gcc (4.2.1) has warning bug
9 | # cxx = gcc46 -std=c99 -Wno-unused-but-set-variable
10 | cxx = gcc34 -std=c99 -Wno-unused-but-set-variable
11 | cpp = g++
12 | lm = -lm
13 | else ifeq (${SYS},Darwin) # This is to deal with the Mavericks replacing gcc with clang fully
14 | cxx = clang -std=c99 -stdlib=libstdc++
15 | cpp = clang++ -stdlib=libstdc++
16 | lm =
17 | else
18 | cxx = gcc -std=c99 -Wno-unused-but-set-variable
19 | cpp = g++
20 | lm = -lm
21 | endif
22 |
23 | # subset of JPL suggested flags (removed: -Wtraditional -Wcast-qual -Wconversion)
24 | jpl_flags = -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes
25 |
26 | #Release compiler flags
27 | cflags_opt = -O3 -Wall -Werror --pedantic -funroll-loops -DNDEBUG ${jpl_flags}
28 |
29 | #Debug flags (slow)
30 | cflags_dbg = -Wall -Werror --pedantic -g -fno-inline -DBEN_DEBUG ${jpl_flags}
31 |
32 | #Ultra Debug flags (really slow)
33 | cflags_ultraDbg = -Wall -Werror --pedantic -g -fno-inline -DBEN_DEBUG -BEN_ULTRA_DEBUG
34 |
35 | #Profile flags
36 | cflags_prof = -Wall -Werror --pedantic -pg -O3 -g
37 |
38 | sonLibPath = ../../sonLib/lib
39 |
40 | #Flags to use
41 | cflags = ${cflags_opt} -I ${sonLibPath} -I ../inc -I ../external
42 | testFlags = -O0 -g -Wall -Werror --pedantic -I ${sonLibPath} -I ../inc -I ../external
43 | #cflags = ${cflags_dbg}
44 |
45 | # location of Tokyo cabinet
46 | ifneq ($(wildcard /hive/groups/recon/local/include/tcbdb.h),)
47 | # hgwdev hive install
48 | tcPrefix = /hive/groups/recon/local
49 | tokyoCabinetIncl = -I ${tcPrefix}/include
50 | tokyoCabinetLib = -L${tcPrefix}/lib -Wl,-rpath,${tcPrefix}/lib -ltokyocabinet -lz -lbz2 -lpthread
51 | else ifneq ($(wildcard /opt/local/include/tcbdb.h),)
52 | # OS/X with TC installed from MacPorts
53 | tcPrefix = /opt/local
54 | tokyoCabinetIncl = -I ${tcPrefix}/include
55 | tokyoCabinetLib = -L${tcPrefix}/lib -Wl,-rpath,${tcPrefix}/lib -ltokyocabinet -lz -lbz2 -lpthread
56 | else ifneq ($(wildcard /usr/local/include/tcbdb.h),)
57 | # /usr/local install (FreeBSD, etc)
58 | tcPrefix = /usr/local
59 | tokyoCabinetIncl = -I ${tcPrefix}/include
60 | tokyoCabinetLib = -L ${tcPrefix}/lib -Wl,-rpath,${tcPrefix}/lib -ltokyocabinet -lz -lbz2 -lpthread
61 | else
62 | # default
63 | tokyoCabinetIncl =
64 | tokyoCabinetLib = -ltokyocabinet -lz -lbz2 -lpthread
65 | endif
66 |
67 | cflags += ${tokyoCabinetIncl}
68 |
69 | # location of mysql
70 | ifneq ($(wildcard /usr/include/mysql/mysql.h),)
71 | mysqlIncl = -I /usr/include/mysql -DHAVE_MYSQL=1
72 | ifneq ($(wildcard /usr/lib64/mysql/libmysqlclient.a),)
73 | mysqlLibs = /usr/lib64/mysql/libmysqlclient.a
74 | else
75 | mysqlLibs = /usr/lib/libmysqlclient.a
76 | endif
77 | else ifneq ($(wildcard /usr/local/mysql/include/mysql.h),)
78 | mysqlIncl = -I /usr/local/mysql/include -DHAVE_MYSQL=1
79 | mysqlLibs = -L/usr/local/mysql/lib -lmysqlclient
80 | endif
81 |
82 | # location of PostgreSQL
83 | ifneq ($(wildcard /usr/local/include/libpq-fe.h),)
84 | pgsqlIncl = -I /usr/local/include -DHAVE_POSTGRESQL=1
85 | pgsqlLibs = -L /usr/local/lib -lpq
86 | else ifneq ($(wildcard /usr/include/libpq-fe.h),)
87 | pgsqlIncl = -DHAVE_POSTGRESQL=1
88 | pgsqlLibs = /usr/lib64/libpq.a -lkrb5 -lgssapi -lcrypto -lssl -lcrypt -lldap
89 | endif
90 |
91 | dblibs = ${tokyoCabinetLib} ${mysqlLibs} ${pgsqlLibs}
92 |
--------------------------------------------------------------------------------
/inc/sharedMaf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef SHAREDMAF_H_
26 | #define SHAREDMAF_H_
27 | #include
28 | #include
29 |
30 | typedef struct mafFileApi mafFileApi_t;
31 | typedef struct mafBlock mafBlock_t;
32 | typedef struct mafLine mafLine_t;
33 |
34 | // creators, destroyers
35 | mafFileApi_t* maf_newMfa(const char *filename, char const *mode);
36 | mafBlock_t* maf_newMafBlock(void);
37 | mafBlock_t* maf_newMafBlockFromString(const char *s, uint64_t lineNumber);
38 | mafBlock_t* maf_newMafBlockListFromString(const char *s, uint64_t lineNumber);
39 | mafLine_t* maf_newMafLine(void);
40 | mafLine_t* maf_newMafLineFromString(const char *s, uint64_t lineNumber);
41 | mafBlock_t* maf_copyMafBlock(mafBlock_t *orig);
42 | mafBlock_t* maf_copyMafBlockList(mafBlock_t *orig);
43 | mafLine_t* maf_copyMafLine(mafLine_t *orig);
44 | mafLine_t* maf_copyMafLineList(mafLine_t *orig);
45 | void maf_destroyMafLineList(mafLine_t *ml);
46 | void maf_destroyMafBlockList(mafBlock_t *mb);
47 | void maf_destroyMfa(mafFileApi_t *mfa);
48 | void maf_mafBlock_destroySequenceMatrix(char **mat, unsigned n);
49 | // read / write
50 | mafBlock_t* maf_readAll(mafFileApi_t *mfa);
51 | mafBlock_t* maf_readBlock(mafFileApi_t *mfa);
52 | mafBlock_t* maf_readBlockHeader(mafFileApi_t *mfa);
53 | mafBlock_t* maf_readBlockBody(mafFileApi_t *mfa);
54 | void maf_writeAll(mafFileApi_t *mfa, mafBlock_t *mb);
55 | void maf_writeBlock(mafFileApi_t *mfa, mafBlock_t *mb);
56 | uint64_t maf_mafFileApi_getLineNumber(mafFileApi_t *mfa);
57 | // getters
58 | char* maf_mafFileApi_getFilename(mafFileApi_t *mfa);
59 | uint64_t maf_mafFileApi_getLineNumber(mafFileApi_t *mfa);
60 | mafLine_t* maf_mafBlock_getHeadLine(mafBlock_t *mb);
61 | mafLine_t* maf_mafBlock_getTailLine(mafBlock_t *mb);
62 | uint64_t maf_mafBlock_getLineNumber(mafBlock_t *mb);
63 | uint64_t maf_mafBlock_getNumberOfLines(mafBlock_t *b);
64 | uint64_t maf_mafBlock_getNumberOfSequences(mafBlock_t *b);
65 | char* maf_mafBlock_getStrandArray(mafBlock_t *mb);
66 | int* maf_mafBlock_getStrandIntArray(mafBlock_t *mb);
67 | uint64_t* maf_mafBlock_getPosCoordStartArray(mafBlock_t *mb);
68 | uint64_t* maf_mafBlock_getPosCoordLeftArray(mafBlock_t *mb);
69 | uint64_t* maf_mafBlock_getStartArray(mafBlock_t *mb);
70 | uint64_t* maf_mafBlock_getSourceLengthArray(mafBlock_t *mb);
71 | uint64_t* maf_mafBlock_getSequenceLengthArray(mafBlock_t *mb);
72 | char** maf_mafBlock_getSpeciesArray(mafBlock_t *mb);
73 | mafBlock_t* maf_mafBlock_getNext(mafBlock_t *mb);
74 | char** maf_mafBlock_getSequenceMatrix(mafBlock_t *mb, unsigned n, unsigned m);
75 | mafLine_t** maf_mafBlock_getMafLineArray_seqOnly(mafBlock_t *mb);
76 | uint64_t maf_mafBlock_getSequenceFieldLength(mafBlock_t *mb);
77 | char* maf_mafLine_getLine(mafLine_t *ml);
78 | uint64_t maf_mafLine_getLineNumber(mafLine_t *ml);
79 | char maf_mafLine_getType(mafLine_t *ml);
80 | char* maf_mafLine_getSpecies(mafLine_t *ml);
81 | uint64_t maf_mafLine_getStart(mafLine_t *ml);
82 | uint64_t maf_mafLine_getLength(mafLine_t *ml);
83 | char maf_mafLine_getStrand(mafLine_t *ml);
84 | uint64_t maf_mafLine_getSourceLength(mafLine_t *ml);
85 | char* maf_mafLine_getSequence(mafLine_t *ml);
86 | uint64_t maf_mafLine_getSequenceFieldLength(mafLine_t *ml);
87 | mafLine_t* maf_mafLine_getNext(mafLine_t *ml);
88 | // setters
89 | void maf_mafBlock_setHeadLine(mafBlock_t *mb, mafLine_t *ml);
90 | void maf_mafBlock_setTailLine(mafBlock_t *mb, mafLine_t *ml);
91 | void maf_mafBlock_setNumberOfLines(mafBlock_t *mb, uint64_t n);
92 | void maf_mafBlock_incrementNumberOfLines(mafBlock_t *mb);
93 | void maf_mafBlock_decrementNumberOfLines(mafBlock_t *mb);
94 | void maf_mafBlock_setNumberOfSequences(mafBlock_t *mb, uint64_t n);
95 | void maf_mafBlock_incrementNumberOfSequences(mafBlock_t *mb);
96 | void maf_mafBlock_decrementNumberOfSequences(mafBlock_t *mb);
97 | void maf_mafBlock_setLineNumber(mafBlock_t *mb, uint64_t n);
98 | void maf_mafBlock_incrementLineNumber(mafBlock_t *mb);
99 | void maf_mafBlock_decrementLineNumber(mafBlock_t *mb);
100 | void maf_mafBlock_setSequenceFieldLength(mafBlock_t *mb, uint64_t sfl);
101 | void maf_mafBlock_setNext(mafBlock_t *mb, mafBlock_t *next);
102 | void maf_mafBlock_appendToAlignmentBlock(mafBlock_t *m, char *s);
103 | void maf_mafLine_setLine(mafLine_t *ml, char *line);
104 | void maf_mafLine_setLineNumber(mafLine_t *ml, uint64_t n);
105 | void maf_mafLine_setType(mafLine_t *ml, char c);
106 | void maf_mafLine_setSpecies(mafLine_t *ml, char *s);
107 | void maf_mafLine_setStrand(mafLine_t *ml, char c);
108 | void maf_mafLine_setStart(mafLine_t *ml, uint64_t n);
109 | void maf_mafLine_setLength(mafLine_t *ml, uint64_t n);
110 | void maf_mafLine_setSourceLength(mafLine_t *ml, uint64_t n);
111 | void maf_mafLine_setSequence(mafLine_t *ml, char *s);
112 | void maf_mafLine_setNext(mafLine_t *ml, mafLine_t *next);
113 | // utilities
114 | unsigned maf_mafBlock_getNumberOfBlocks(mafBlock_t *b);
115 | bool maf_mafBlock_containsSequence(mafBlock_t *m);
116 | char* maf_mafLine_imputeLine(mafLine_t* ml);
117 | uint64_t maf_mafLine_getNumberOfSequences(mafLine_t *m);
118 | uint64_t maf_mafLine_getPositiveCoord(mafLine_t *ml);
119 | uint64_t maf_mafLine_getPositiveLeftCoord(mafLine_t *ml);
120 | unsigned umax(unsigned a, unsigned b);
121 | uint64_t countNonGaps(char *seq);
122 | void maf_mafBlock_flipStrand(mafBlock_t *mb);
123 | void reverseComplementSequence(char *s, size_t n); // in-place reverse complement
124 | void complementSequence(char *s, size_t n);
125 | char complementChar(char c);
126 | char *copySpeciesName(const char *s); // hg18.chr1 -> hg18
127 | char *copyChromosomeName(const char *s); // hg18.chr1 -> chr1
128 | // print
129 | void maf_mafBlock_printList(mafBlock_t *m);
130 | void maf_mafBlock_print(mafBlock_t *m);
131 | #endif // SHAREDMAF_H_
132 |
--------------------------------------------------------------------------------
/inc/test.common.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_COMMON_H_
26 | #define TEST_COMMON_H_
27 | #include
28 | #include "CuTest.h"
29 | #include "common.h"
30 |
31 | static void test_de_malloc(CuTest *testCase) {
32 | assert(testCase != NULL);
33 | char *t = (char *) de_malloc(100);
34 | CuAssertTrue(testCase, t != NULL);
35 | for (int i = 0; i < 100; ++i) {
36 | t[i] = 0;
37 | CuAssertIntEquals(testCase, t[i], 0);
38 | }
39 | free(t);
40 | }
41 |
42 | CuSuite* common_TestSuite(void) {
43 | CuSuite* suite = CuSuiteNew();
44 | SUITE_ADD_TEST(suite, test_de_malloc);
45 | return suite;
46 | }
47 |
48 | #endif // TEST_COMMON_H_
49 |
--------------------------------------------------------------------------------
/inc/test.sharedMaf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_SHAREDMAF_H_
26 | #define TEST_SHAREDMAF_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 | #include "CuTest.h"
35 | #include "common.h"
36 | #include "sharedMaf.h"
37 |
38 | int createTmpFolder(void);
39 | void writeStringToTmpFile(char *s);
40 | bool filesAreIdentical(char *fileA, char *fileB);
41 |
42 | CuSuite* mafShared_TestSuite(void);
43 | #endif // TEST_SHAREDMAF_H_
44 |
--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | SHELL=/bin/bash
2 | include ../inc/common.mk
3 | .SECONDARY:
4 | .PHONY: all clean test
5 |
6 | cc = gcc
7 | args = -std=c99 -O3 -Wextra -Wall -Werror -pedantic -I ../external/ -I ../inc/
8 | inc = ../inc
9 |
10 | objects = common.o sharedMaf.o ../external/CuTest.a
11 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a
12 |
13 | all: ${objects}
14 |
15 | clean:
16 | rm -f allTests *.o *.pyc
17 |
18 | allTests: allTests.c ${inc}/test.sharedMaf.h test.sharedMaf.c ${testObjects}
19 | mkdir -p test
20 | ${cc} -g -O0 ${args} allTests.c test.sharedMaf.c ${testObjects} -o $@.tmp ${lm}
21 | mv $@.tmp $@
22 |
23 | %.o: %.c ${inc}/%.h
24 | ${cc} -O3 -c ${args} $< -o $@.tmp
25 | mv $@.tmp $@
26 |
27 | sharedMaf.o: sharedMaf.c ${inc}/sharedMaf.h
28 | ${cc} -O3 -c ${args} sharedMaf.c -o $@.tmp ${lm}
29 | mv $@.tmp $@
30 |
31 | test/%.o: %.c ${inc}/%.h
32 | mkdir -p $(dir $@)
33 | ${cc} -g -O0 -c ${args} $< -o $*.tmp ${lm}
34 | mv $*.tmp $@
35 |
36 | test/sharedMaf.o: sharedMaf.c ${inc}/sharedMaf.h
37 | mkdir -p $(dir $@)
38 | ${cc} -g -O0 -c ${args} sharedMaf.c -o $@.tmp ${lm}
39 | mv $@.tmp $@
40 |
41 | test: allTests
42 | ./allTests && python2.7 test.sharedMaf.py --verbose && rm -rf ./allTests ./test ./test_tmp
43 |
44 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
45 | ${cc} -c ${args} $<
46 | ar rc CuTest.a CuTest.o
47 | ranlib CuTest.a
48 | rm -f CuTest.o
49 | mv CuTest.a $@
50 |
--------------------------------------------------------------------------------
/lib/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #include
26 | #include
27 | #include "CuTest.h"
28 | #include "test.common.h"
29 | #include "test.sharedMaf.h"
30 |
31 | CuSuite* mafShared_TestSuite(void);
32 |
33 | int include_RunAllTests(void) {
34 | CuString *output = CuStringNew();
35 | CuSuite *suite = CuSuiteNew();
36 | CuSuite *common_s = common_TestSuite();
37 | CuSuite *maf_s = mafShared_TestSuite();
38 | CuSuiteAddSuite(suite, common_s);
39 | CuSuiteAddSuite(suite, maf_s);
40 | CuSuiteRun(suite);
41 | CuSuiteSummary(suite, output);
42 | CuSuiteDetails(suite, output);
43 | printf("%s\n", output->buffer);
44 | CuStringDelete(output);
45 | int status = (suite->failCount > 0);
46 | free(common_s);
47 | free(maf_s);
48 | CuSuiteDelete(suite);
49 | return status;
50 | }
51 | int main(void) {
52 | return include_RunAllTests();
53 | }
54 |
--------------------------------------------------------------------------------
/lib/createVersionSources.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2.7
2 | import os
3 | import subprocess
4 | import sys
5 | import time
6 | sys.path.append(
7 | os.path.abspath(
8 | os.path.join(os.path.dirname(sys.argv[0]), '../../inc/')))
9 | import mafToolsTest as mtt
10 |
11 | BOILERPLATE = '''/*
12 | * Copyright (C) 2009-2014 by
13 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
14 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
15 | * Mark Diekhans (markd@soe.ucsc.edu)
16 | * ... and other members of the Reconstruction Team of David Haussler's
17 | * lab (BME Dept. UCSC).
18 | *
19 | * Permission is hereby granted, free of charge, to any person obtaining a copy
20 | * of this software and associated documentation files (the "Software"), to deal
21 | * in the Software without restriction, including without limitation the rights
22 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23 | * copies of the Software, and to permit persons to whom the Software is
24 | * furnished to do so, subject to the following conditions:
25 | *
26 | * The above copyright notice and this permission notice shall be included in
27 | * all copies or substantial portions of the Software.
28 | *
29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
35 | * THE SOFTWARE.
36 | */
37 | '''
38 | GIT = mtt.which('git')
39 |
40 |
41 | def runCommand(cmd):
42 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True)
43 | pout, perr = p.communicate()
44 | mtt.handleReturnCode(p.returncode, cmd)
45 | return pout
46 |
47 |
48 | def getBranch():
49 | branchList = runCommand([GIT, 'branch']).split('\n')
50 | for b in branchList:
51 | if b.startswith('* '):
52 | return b[2:]
53 |
54 |
55 | def getSha():
56 | return runCommand([GIT, 'rev-parse', 'HEAD']).strip()
57 |
58 |
59 | def writeHeader(location):
60 | f = open(os.path.join(location, 'buildVersion.h'), 'w')
61 | f.write(BOILERPLATE)
62 | f.write('#ifndef _BUILD_VERSION_H_\n')
63 | f.write('#define _BUILD_VERSION_H_\n')
64 | f.write('extern const char g_build_date[];\n')
65 | f.write('extern const char g_build_git_branch[];\n')
66 | f.write('extern const char g_build_git_sha[];\n')
67 | f.write('#endif // _BUILD_VERSION_H_\n')
68 | f.close()
69 |
70 |
71 | def writeSource(location, buildDate, buildBranch, buildSha):
72 | f = open(os.path.join(location, 'buildVersion.c'), 'w')
73 | f.write(BOILERPLATE)
74 | f.write('#include "buildVersion.h"\n\n')
75 | f.write('const char g_build_date[] = "%s";\n' % buildDate)
76 | f.write('const char g_build_git_branch[] = "%s";\n' % buildBranch)
77 | f.write('const char g_build_git_sha[] = "%s";\n' % buildSha)
78 | f.close()
79 |
80 |
81 | def main():
82 | if GIT is None:
83 | raise RuntimeError('Error, unable to locate git, is it installed?')
84 | location = os.path.join(os.curdir, 'src')
85 | buildDate = time.strftime('%Y-%m-%dT%H:%M%Z', time.localtime()) # gmtime()
86 | buildBranch = getBranch()
87 | buildSha = getSha()
88 | writeHeader(location)
89 | writeSource(location, buildDate, buildBranch, buildSha)
90 |
91 |
92 | if __name__ == '__main__':
93 | main()
94 |
--------------------------------------------------------------------------------
/lib/test.sharedMaf.py:
--------------------------------------------------------------------------------
1 | ##################################################
2 | # Copyright (C) 2012 by
3 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | # ... and other members of the Reconstruction Team of David Haussler's
5 | # lab (BME Dept. UCSC).
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | ##################################################
25 | import os
26 | import sys
27 | import unittest
28 | import mafToolsTest as mtt
29 |
30 | class SharedMafLibraryTest(unittest.TestCase):
31 | def testMemory(self):
32 | """ sharedMaf.h should be memory clean.
33 | """
34 | mtt.makeTempDirParent()
35 | valgrind = mtt.which('valgrind')
36 | if valgrind is None:
37 | return
38 | tmpDir = os.path.abspath(mtt.makeTempDir('allTests'))
39 | cmd = mtt.genericValgrind(tmpDir)
40 | cmd.append(os.path.abspath(os.path.join(os.curdir, 'allTests')))
41 | mtt.runCommandsS([cmd], tmpDir)
42 | self.assertTrue(mtt.noMemoryErrors(os.path.join(tmpDir, 'valgrind.xml')))
43 | mtt.removeDir(tmpDir)
44 |
45 | if __name__ == '__main__':
46 | unittest.main()
47 |
--------------------------------------------------------------------------------
/mafComparator/.gitignore:
--------------------------------------------------------------------------------
1 | tempTestFiles/*
2 | test/*
3 | src/buildVersion.*
4 |
--------------------------------------------------------------------------------
/mafComparator/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2009-2013 by
2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
4 | # Mark Diekhans (markd@soe.ucsc.edu)
5 | # ... and other members of the Reconstruction Team of David Haussler's
6 | # lab (BME Dept. UCSC).
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | # THE SOFTWARE.
25 |
26 | include ../inc/common.mk
27 | binPath = ../bin
28 | dependencies = $(wildcard ../inc/common.*) $(wildcard ../lib/common.*) $(wildcard ../inc/sharedMaf.*) $(wildcard ../lib/sharedMaf.*) $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a ${sonLibPath}/stPinchesAndCacti.a src/allTests.c
29 | extraAPI = src/cString.c ../lib/sharedMaf.o ../external/CuTest.a ../lib/common.o src/comparatorRandom.o src/comparatorAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o
30 | testAPI = src/cString.c test/sharedMaf.o ../external/CuTest.a test/common.o test/comparatorRandom.o test/comparatorAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o
31 | progs = $(foreach f, mafComparator mafPairCounter, ${binPath}/$f)
32 | testObjects = test/test.comparatorAPI.o test/test.comparatorRandom.o
33 | sources = $(foreach f, comparatorAPI cString comparatorRandom test.comparatorAPI test.comparatorRandom, src/$f.c) src/allTests.c src/mafComparator.c src/mafPairCounter.c src/testRand.c
34 |
35 | .PHONY: all clean test buildVersion
36 |
37 | all: buildVersion ${progs}
38 | buildVersion: src/buildVersion.c
39 | src/buildVersion.c: ${sources} ${dependecies}
40 | @python ../lib/createVersionSources.py
41 |
42 | ../lib/%.o: ../lib/%.c ../inc/%.h
43 | cd ../lib/ && make
44 |
45 | ${binPath}/%: src/%.c ${extraAPI}
46 | @mkdir -p $(dir $@)
47 | ${cxx} -o $@.tmp $^ ${cflags} ${lm}
48 | mv $@.tmp $@
49 |
50 | test/%: src/%.c ${testAPI} $(wildcard src/*.h)
51 | @mkdir -p $(dir $@)
52 | ${cxx} -o $@.tmp $^ ${testFlags} ${lm}
53 | mv $@.tmp $@
54 |
55 | ${binPath}/%.py: src/%.py
56 | @mkdir -p $(dir $@)
57 | cp $< $@.tmp
58 | chmod +x $@.tmp
59 | mv $@.tmp $@
60 |
61 | %.o: %.c %.h
62 | ${cxx} -c $< -o $@.tmp ${cflags}
63 | mv $@.tmp $@
64 | test/%.o: ../lib/%.c ../inc/%.h
65 | mkdir -p $(dir $@)
66 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm}
67 | mv $@.tmp $@
68 | test/test.comparatorAPI.o: src/test.comparatorAPI.c src/test.comparatorAPI.h test/comparatorAPI.o
69 | mkdir -p $(dir $@)
70 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm}
71 | mv $@.tmp $@
72 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a
73 | mkdir -p $(dir $@)
74 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm}
75 | mv $@.tmp $@
76 |
77 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
78 | ${cxx} -c $< ${cflags}
79 | ar rc CuTest.a CuTest.o
80 | ranlib CuTest.a
81 | rm -f CuTest.o
82 | mv CuTest.a $@
83 |
84 | test: buildVersion test/allTests test/mafComparator test/testRand
85 | ./test/allTests && python2.7 src/test.mafComparator.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir
86 |
87 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a
88 | mkdir -p $(dir $@)
89 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm}
90 | mv $@.tmp $@
91 |
92 | # to actually use the testRand program, comment out the rm -rf on the "test:" rule and run "make test",
93 | # then you may run test/testRand
94 | test/testRand: src/testRand.c ${testAPI} ${sonLibPath}/sonLib.a
95 | mkdir -p $(dir $@)
96 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm}
97 | mv $@.tmp $@
98 |
99 | clean:
100 | rm -f *.o ${progs} src/*.o && rm -rf ./test/ src/buildVersion.c src/buildVersion.h
101 |
--------------------------------------------------------------------------------
/mafComparator/README.md:
--------------------------------------------------------------------------------
1 | # mafComparator
2 |
3 | February 2011 -- August 2012
4 |
5 | ## Authors
6 |
7 | [Dent Earl](https://github.com/dentearl/), [Benedict Paten](https://github.com/benedictpaten/)
8 |
9 | ## Description
10 | This program takes two [MAF](http://genome.ucsc.edu/FAQ/FAQformat#format5) files and compares them to one another.
11 | Specifically, for each ordered pair of sequences in the first MAF it
12 | samples a predefined number of sample homology tests (see below), then
13 | reads the second MAF checking to see which, if any, of the sampled pairs,
14 | is present. The comparison is then reversed and repeated. Statistics are
15 | then reported in an XML formatted file. MafComparator is suitable for
16 | running over very large alignments (those with many positions), because
17 | it does not attempt to hold everything in memory but instead takes a
18 | sampling approach.
19 |
20 | For two sets of pairwise alignments, **A** and **B**, a homology test is
21 | defined as follows. Pick a pair of aligned positions in **A**, called a
22 | homology pair -- the **AB** homology test returns _true_ if the pair is present in **B**,
23 | otherwise it returns _false_. The set of possible homology tests for the
24 | ordered pair (**A**, **B**) is not necessarily equivalent to the set of
25 | possible (**B**, **A**) homology tests. We call the proportion of _true_ tests
26 | (as a percentage of the total of a set of **C** many homology tests), from
27 | (**A**, **B**) **A~B**.
28 |
29 | If **A** is the set of true pairwise alignments and **B** the predicted set of
30 | alignments then **A~B** (over large enough **C**), is a proxy to
31 | [_sensitivity_](http://en.wikipedia.org/wiki/Sensitivity_and_specificity)
32 | of **B** in predicted the set of correctly aligned pairs in **A**. Conversely
33 | **B~A** (over large enough **C**) is a proxy to the
34 | [_specificity_](http://en.wikipedia.org/wiki/Sensitivity_and_specificity) of the
35 | aligned pairs in **B** with respect to the set of correctly aligned pairs
36 | in **A**.
37 |
38 | ## Dependencies
39 | * sonLib https://github.com/benedictpaten/sonLib/
40 |
41 | ## Installation
42 | 1. Download the package. Consider making the parent of mafComparator a sibling directory to sonLib
.
43 | 2. cd
into the directory.
44 | 3. Type make
.
45 |
46 | ## Use
47 | mafComparator --maf1=FILE1 --maf2=FILE2 --out=OUT.xml [options]
48 |
49 | ### Options
50 | * mafComparator, version 0.6 July 2012
51 | * -a --logLevel
: Set the log level. [off, critical, info, debug] in ascending order
52 | * --maf1
: The location of the first MAF file. If comparing true to predicted alignments, this is the truth.
53 | * --maf2
: The location of the second MAF file.
54 | * --out
: The output XML formatted results file.
55 | * --samples
: The ideal number of sample homology tests to perform for the two comparisons (i.e. file1 -> file and file2 -> file1). This number is an ideal because pairs are sampled and thus the actual number may be slightly higher or slightly lower than this value. If this value is equal to or greater than the total number of pairs in a file, then all pairs will be tested. [default 1000000]
56 | * -g --near
: The number of bases in either sequence to allow a match to slip by. I.e. --near=n
(where _n_ is a non-negative integer) will consider a homology test for a given pair (**S1**:_x_, **S2**:_y_) where **S1** and **S2** are sequences and _x_ and _y_ are positions in the respective sequences, to be a true homology test so long as there is a pair within the other alignment (**S1**:_w_, **S2**:_z_) where EITHER (_w_ is equal to _x_ and _y_ - _n_ <= _z_ <= _y_ + _n_) OR (_x_ - _n_ <= _w_ <= _x_ + _n_ and _y_ is equal to _z_).
57 | * --bedFiles
: The location of bed file(s) used to filter the pairwise comparisons. Comma separated list.
58 | * --wigglePairs
: The key-value paired names of sequences (comma separated pairs, colon separeted key values)to create output that isolates event counts to specific regions of one genome (the first genome in the pair). The asterisk, \*, can be used as wildcard character. i.e. hg19\*:mm9\* will match hg19.chr1 and mm9.chr1 etc etc resulting in all pairs between hg19\* and mm9\*. This feature ignores any intervals described with the --bedFiles
option.
59 | * --wiggleRegionStart
: The starting base (inclusive) of the sub-region to analyze. Do not set if you wish to use the entire sequence.
60 | * --wiggleRegionStop
: The ending base (inclusive) of the sub-region to analyze. Do not set if you wish to use the entire sequence.
61 | * --wiggleBinLength
: The length of the bins when the --wigglePairs
option is invoked. [default: 100000]
62 | * --numberOfPairs
: A pair of comma separated positive integers representing the total number of pairs in maf1 and maf2 (in that order). These numbers are double checked by mafComparator as it runs, a discrpency will cause an error. If these values are known prior to the analysis (either because the analysis has been run before or by use of the mafPairCounter program) this option provides about a 15% speedup. Example: --numberOfPairs 2847390129,228470192212
63 | * --legitSequences
: A list of comma separated key value pairs, which themselves are colon (:) separated. Each pair is a sequence name and source length. These values are normally determined by reading all sequences and source lengths from maf1 and then again from maf2 and then finding the intersection of the two sets. The source lengths are verified by mafComparator is it runs and discrepncies will cause errors. If this option is invoked it can result in a speedup of about 15%. Example: --legitSequences apple.chr1:100,apple.chr2:102,pineapple.chr1:2010
64 | * -s --seed
: An integer to seed the random number generator. Omitting this causes the seed to be pseudorandom (via time()
and getpid()
). The seed value is always stored in the output xml.
65 | * -v --version
: Print current version number.
66 | * -h --help
: Print this help screen.
67 |
68 | ## Example
69 | Two mafs are included in the example/ directory and can be compared using the command:
70 |
71 | $ mafComparator --maf1 example/a.maf --maf2 example/b.maf --out comparison_a-b.xml
72 |
73 | You may note in the output that there are no comparisons for the sequences that are found only in b.maf
, i.e. sequences D, E and F. The hash of sequence names used for comparisons is populated using the intersection of the sequence names from the --maf1
and --maf2
inputs. Sequences that only appear in --maf1
or only appear in --maf2
input are ignored.
74 |
--------------------------------------------------------------------------------
/mafComparator/example/a.maf:
--------------------------------------------------------------------------------
1 | ##maf version=1 scoring=tba.v8
2 |
3 | a score=23262.0
4 | s A.chr0 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG
5 | s B.chr1 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG
6 | s C.chr1 28741140 38 + 161576975 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
7 |
8 | a score=5062.0
9 | s A.chr0 241163 6 + 4622798 TAAAGA
10 | s C.chr1 28862317 6 + 161576975 TAAAGA
11 |
12 | a score=6636.0
13 | s A.chr0 249182 13 + 4622798 gcagctgaaaaca
14 | s C.chr1 28869787 13 + 161576975 gcagctgaaaaca
15 |
16 |
--------------------------------------------------------------------------------
/mafComparator/example/b.maf:
--------------------------------------------------------------------------------
1 | ##maf version=1 scoring=tba.v8
2 |
3 | a score=23262.0
4 | s B.chr1 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG
5 | s C.chr1 28741140 38 + 161576975 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
6 | s D.chr0 28741140 35 + 161576975 AAA----AATGTTAACCAAATGA---ATTGTCTCTTACGGTG
7 |
8 | a score=5062.0
9 | s A.chr0 241163 6 + 4622798 TAA----AGA
10 | s C.chr1 28862317 6 + 161576975 TAA----AGA
11 | s E.chr9 500000 10 + 100000000 TAACCCCAGA
12 |
13 | a score=6636.0
14 | s A.chr0 249182 13 + 4622798 gcagctgaaaaca
15 | s C.chr1 28869787 13 + 161576975 gcagctgaaaaca
16 | s F.chr5 800000 5 + 20000000 gca--------ca
17 |
18 |
--------------------------------------------------------------------------------
/mafComparator/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComparativeGenomicsToolkit/mafTools/259e5b47fa2ee17ff5ad1bba9cebf2992cbb7228/mafComparator/src/__init__.py
--------------------------------------------------------------------------------
/mafComparator/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include
27 | #include
28 | #include "CuTest.h"
29 | #include "comparatorAPI.h"
30 | #include "test.comparatorAPI.h"
31 | #include "test.comparatorRandom.h"
32 |
33 | CuSuite* comparatorAPI_TestSuite(void);
34 | CuSuite* comparatorRandom_TestSuite(void);
35 |
36 | int comparator_RunAllTests(void) {
37 | CuString *output = CuStringNew();
38 | CuSuite *suite = CuSuiteNew();
39 | CuSuite *comparatorAPI_s = comparatorAPI_TestSuite();
40 | CuSuite *comparatorRandom_s = comparatorRandom_TestSuite();
41 | CuSuiteAddSuite(suite, comparatorAPI_s);
42 | CuSuiteAddSuite(suite, comparatorRandom_s);
43 | CuSuiteRun(suite);
44 | CuSuiteSummary(suite, output);
45 | CuSuiteDetails(suite, output);
46 | printf("%s\n", output->buffer);
47 | CuStringDelete(output);
48 | int status = (suite->failCount > 0);
49 | free(comparatorAPI_s);
50 | free(comparatorRandom_s);
51 | CuSuiteDelete(suite);
52 | return status;
53 | }
54 | int main(void) {
55 | return comparator_RunAllTests();
56 | }
57 |
--------------------------------------------------------------------------------
/mafComparator/src/cString.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
5 | * Mark Diekhans (markd@soe.ucsc.edu)
6 | * ... and other members of the Reconstruction Team of David Haussler's
7 | * lab (BME Dept. UCSC).
8 | *
9 | * Permission is hereby granted, free of charge, to any person obtaining a copy
10 | * of this software and associated documentation files (the "Software"), to deal
11 | * in the Software without restriction, including without limitation the rights
12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | * copies of the Software, and to permit persons to whom the Software is
14 | * furnished to do so, subject to the following conditions:
15 | *
16 | * The above copyright notice and this permission notice shall be included in
17 | * all copies or substantial portions of the Software.
18 | *
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 | * THE SOFTWARE.
26 | */
27 |
28 |
29 | #include "cString.h"
30 |
31 | /*
32 | * Comparison function to sort strings alphabetically
33 | */
34 | int cStr_compare(const void *a, const void *b) {
35 | const char **ia = (const char **)a;
36 | const char **ib = (const char **)b;
37 | return strcmp(*ia, *ib);
38 | }
39 |
40 | /*
41 | * Comparison function to sort strings in descending order
42 | */
43 | int cStr_compareDesc(const void *a, const void *b) {
44 | const char **ia = (const char **)a;
45 | const char **ib = (const char **)b;
46 | return -1 * strcmp(*ia, *ib);
47 | }
48 |
49 | /*
50 | * In-place substitution to lower-case string
51 | */
52 | void cStr_lowerCase(char *string) {
53 | char *p;
54 | for (p=string; *p != '\0'; p++) {
55 | *p = tolower(*p);
56 | }
57 | }
58 |
59 | /*
60 | * In-place substitution to upper-case string
61 | */
62 | void cStr_upperCase(char *string) {
63 | char *p;
64 | for (p=string; *p != '\0'; p++) {
65 | *p = toupper(*p);
66 | }
67 | }
68 |
69 | /*
70 | * Check if "string" starts with "query" and ignores case
71 | * if "ignorecase" == 1
72 | */
73 | int cStr_startsWith(char *string, char *query, int ignorecase) {
74 | assert(strlen(string) > 0);
75 | assert(strlen(query) > 0);
76 |
77 | int i = 0;
78 | while(1) {
79 | if (query[i] == '\0') {
80 | return 1;
81 | }
82 | if (ignorecase) {
83 | if (tolower(string[i]) != tolower(query[i])) {
84 | return 0;
85 | }
86 | } else {
87 | if (string[i] != query[i]) {
88 | return 0;
89 | }
90 | }
91 | i++;
92 | }
93 | }
94 |
95 | int64_t cStr_getIntLength(int64_t n) {
96 | int64_t count = 0;
97 | do {
98 | count++;
99 | } while ((n /= 10) > 0);
100 |
101 | if (n < 0) {
102 | count++;
103 | }
104 | return count;
105 | }
106 |
107 | /* reverse: reverse string s in place */
108 | void cStr_reverse(char *s) {
109 | int i, j;
110 | char c;
111 |
112 | for (i = 0, j = strlen(s) - 1; i < j; i++, j--) {
113 | c = s[i];
114 | s[i] = s[j];
115 | s[j] = c;
116 | }
117 | }
118 |
119 | /* itoa: convert n to characters in s */
120 | void cStr_itoa(int n, char *s) {
121 | int i, sign;
122 |
123 | if ((sign = n) < 0) /* record sign */
124 | n = -n; /* make n positive */
125 | i = 0;
126 | do { /* generate digits in reverse order */
127 | s[i++] = n % 10 + '0'; /* get next digit */
128 | } while ((n /= 10) > 0);/* delete it */
129 | if (sign < 0)
130 | s[i++] = '-';
131 | s[i] = '\0';
132 | cStr_reverse(s);
133 | }
134 |
135 | void cStr_appendChar(char *s, char c) {
136 | int len = strlen(s);
137 | s[len] = c;
138 | s[len + 1] = '\0';
139 | }
140 |
141 | char *cStr_getStringFromIntArray(int64_t *array, int64_t size, const char sep) {
142 | int64_t i;
143 | int numChars = 0;
144 | char *string = NULL;
145 | char buffer[64];
146 |
147 | for (i = 0; i < size; i++) {
148 | numChars += cStr_getIntLength(array[i]);
149 | }
150 | numChars += (size - 1);
151 |
152 | string = st_malloc(sizeof(char) * (numChars + 1));
153 | string[0] = '\0';
154 |
155 | cStr_itoa(array[0], buffer);
156 | strcat(string, buffer);
157 | for (i = 1; i < size; i++) {
158 | cStr_appendChar(string, sep);
159 | cStr_itoa(array[i], buffer);
160 | strcat(string, buffer);
161 | }
162 |
163 | i = strlen(string);
164 | string[i+1] = '\0';
165 |
166 | return string;
167 | }
168 |
--------------------------------------------------------------------------------
/mafComparator/src/cString.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
5 | * Mark Diekhans (markd@soe.ucsc.edu)
6 | * ... and other members of the Reconstruction Team of David Haussler's
7 | * lab (BME Dept. UCSC).
8 | *
9 | * Permission is hereby granted, free of charge, to any person obtaining a copy
10 | * of this software and associated documentation files (the "Software"), to deal
11 | * in the Software without restriction, including without limitation the rights
12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | * copies of the Software, and to permit persons to whom the Software is
14 | * furnished to do so, subject to the following conditions:
15 | *
16 | * The above copyright notice and this permission notice shall be included in
17 | * all copies or substantial portions of the Software.
18 | *
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 | * THE SOFTWARE.
26 | */
27 |
28 |
29 |
30 | #ifndef CSTRING_H_
31 | #define CSTRING_H_
32 |
33 | #include "commonC.h"
34 |
35 | #include
36 | #include
37 | #include
38 | #include
39 |
40 | /*
41 | * Comparison function to sort strings alphabetically
42 | */
43 | int cStr_compare(const void *a, const void *b);
44 |
45 | /*
46 | * Comparison function to sort strings in descending order
47 | */
48 | int cStr_compareDesc(const void *a, const void *b);
49 |
50 | /*
51 | * In-place substitution to lower-case string
52 | */
53 | void cStr_lowerCase(char *string);
54 |
55 | /*
56 | * In-place substitution to upper-case string
57 | */
58 | void cStr_upperCase(char *string);
59 |
60 | /*
61 | * Check if "string" starts with "query" and ignores case
62 | * if "ignorecase" == 1
63 | */
64 | int cStr_startsWith(char *string, char *query, int ignorecase);
65 |
66 | int64_t cStr_getIntLength(int64_t n);
67 |
68 | void cStr_reverse(char *s);
69 |
70 | void cStr_itoa(int n, char *s);
71 |
72 | void cStr_appendChar(char *s, char c);
73 |
74 | char *cStr_getStringFromIntArray(int64_t *array, int64_t size, const char sep);
75 |
76 | #endif /* CSTRING_H_ */
77 |
--------------------------------------------------------------------------------
/mafComparator/src/comparatorRandom.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #ifndef _COMPARATOR_RANDOM_H_
27 | #define _COMPARATOR_RANDOM_H_
28 |
29 | #include
30 | #include "sonLib.h"
31 |
32 | // Makes a draw from a random binomial with parameters n, p
33 | // Uses
34 | // BTPE (Binomial, Trinagle, Parallelogram, Exponential)
35 | // Kachitvichyanukul, Voratas and Schmeiser, Bruce W. (1988)
36 | // Binomial Random Variate Generation, Communications of the ACM, 31(2): 216-222
37 | uint64_t rbinom(const uint64_t n, const double p);
38 | // NOT MULTITHREAD SAFE.
39 |
40 | #endif // _COMPARATOR_RANDOM_H_
41 |
--------------------------------------------------------------------------------
/mafComparator/src/mafPairCounter.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
5 | * Mark Diekhans (markd@soe.ucsc.edu)
6 | * ... and other members of the Reconstruction Team of David Haussler's
7 | * lab (BME Dept. UCSC).
8 | *
9 | * Permission is hereby granted, free of charge, to any person obtaining a copy
10 | * of this software and associated documentation files (the "Software"), to deal
11 | * in the Software without restriction, including without limitation the rights
12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | * copies of the Software, and to permit persons to whom the Software is
14 | * furnished to do so, subject to the following conditions:
15 | *
16 | * The above copyright notice and this permission notice shall be included in
17 | * all copies or substantial portions of the Software.
18 | *
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 | * THE SOFTWARE.
26 | */
27 |
28 | #include
29 | #include "sonLib.h"
30 | #include "common.h"
31 | #include "comparatorAPI.h"
32 | #include "buildVersion.h"
33 |
34 | const char *g_version = "version 0.1 July 2012";
35 |
36 | void version(void);
37 | void usage(void);
38 | int parseOptions(int argc, char **argv, char **maf, char **maf2, char **seqList);
39 | stSet* buildSet(char *listOfLegitSequences);
40 |
41 | void version(void) {
42 | fprintf(stderr, "mafPairCounter, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date,
43 | g_build_git_branch, g_build_git_sha);
44 | }
45 | void usage(void) {
46 | version();
47 | fprintf(stderr, "Usage: $ mafPairCounter --maf=FILE\n\n");
48 | fprintf(stderr, "This program is used to count the number of pairs of aligned positions\n"
49 | "that are contained in a maf file. Can be run to determine all possible pairs, or\n"
50 | "a subset as defined either by using the --sequences option or by the intersection\n"
51 | "of the sequences present in --maf and present in --maf2.\n\n");
52 | fprintf(stderr, "Options:\n");
53 | usageMessage('h', "help", "Show this help message and exit.");
54 | usageMessage('\0', "maf", "The location of the MAF file. "
55 | "The number of pairs contained in the file will be counted and "
56 | "reported in stdout.");
57 | usageMessage('\0', "sequences", "Comma separated list of sequences allowed to be in pairs. "
58 | "To allow all sequences, either specify *every* sequence or don't invoke "
59 | "this option. Leaving --sequences off results in all sequences being used.");
60 | usageMessage('\0', "maf2", "IF specificied, this is the location of the second MAF file. "
61 | "Using this option causes --sequences option to be ignored. Sequences will "
62 | "be discovered by intersection of sequences present in both maf files, pairs "
63 | "reported will be from the --maf option.");
64 | usageMessage('v', "version", "Print current version number.");
65 | }
66 | int parseOptions(int argc, char **argv, char **maf, char **maf2, char **seqList) {
67 | static const char *optString = "v:h:";
68 | static const struct option longOpts[] = {
69 | {"maf", required_argument, 0, 0},
70 | {"maf2", required_argument, 0, 0},
71 | {"sequences", required_argument, 0, 0},
72 | {"version", no_argument, 0, 'v'},
73 | {"help", no_argument, 0, 'h'},
74 | {0, 0, 0, 0 }};
75 | int longIndex = 0;
76 | int key = getopt_long(argc, argv, optString, longOpts, &longIndex);
77 | while (key != -1) {
78 | switch (key) {
79 | case 0:
80 | if (strcmp("maf", longOpts[longIndex].name) == 0) {
81 | *maf = stString_copy(optarg);
82 | break;
83 | }
84 | if (strcmp("maf2", longOpts[longIndex].name) == 0) {
85 | *maf2 = stString_copy(optarg);
86 | break;
87 | }
88 | if (strcmp("sequences", longOpts[longIndex].name) == 0) {
89 | *seqList = stString_copy(optarg);
90 | break;
91 | }
92 | case 'v':
93 | version();
94 | exit(EXIT_SUCCESS);
95 | break;
96 | case 'h':
97 | usage();
98 | exit(EXIT_SUCCESS);
99 | break;
100 | default:
101 | usage();
102 | exit(EXIT_SUCCESS);
103 | break;
104 | }
105 | key = getopt_long(argc, argv, optString, longOpts, &longIndex);
106 | }
107 | if (*maf == NULL) {
108 | usage();
109 | fprintf(stderr, "\nError, specify --maf\n");
110 | exit(2);
111 | }
112 | FILE *fileHandle = de_fopen(*maf, "r");
113 | fclose(fileHandle);
114 | if (*maf2 != NULL) {
115 | fileHandle = de_fopen(*maf2, "r");
116 | fclose(fileHandle);
117 | if (*seqList != NULL) {
118 | free(seqList);
119 | seqList = NULL;
120 | }
121 | }
122 | return optind;
123 | }
124 | stSet* buildSet(char *listOfLegitSequences) {
125 | char *spaceSepFiles = stringReplace(listOfLegitSequences, ',', ' ');
126 | char *currentLocation = spaceSepFiles;
127 | char *currentWord;
128 | stSet *legitSeqsSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free);
129 | while ((currentWord = stString_getNextWord(¤tLocation)) != NULL) {
130 | stSet_insert(legitSeqsSet, stString_copy(currentWord));
131 | free(currentWord);
132 | }
133 | free(spaceSepFiles);
134 | return legitSeqsSet;
135 | }
136 | int main(int argc, char **argv) {
137 | char *maf = NULL;
138 | char *maf2 = NULL;
139 | char *listOfLegitSequences = NULL;
140 | stSet *legitSeqsSet = NULL;
141 | stSet *maf1SeqSet = NULL;
142 | stSet *maf2SeqSet = NULL;
143 | stHash *sequenceLengthHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free);
144 | parseOptions(argc, argv, &maf, &maf2, &listOfLegitSequences);
145 | if (listOfLegitSequences != NULL) {
146 | legitSeqsSet = buildSet(listOfLegitSequences);
147 | }
148 | if (maf2 != NULL) {
149 | // build legitHash by intersection
150 | maf1SeqSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free);
151 | maf2SeqSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free);
152 | populateNames(maf, maf1SeqSet, sequenceLengthHash);
153 | populateNames(maf2, maf2SeqSet, sequenceLengthHash);
154 | legitSeqsSet = stSet_getIntersection(maf1SeqSet, maf2SeqSet);
155 | }
156 | uint64_t numberOfPairs = countPairsInMaf(maf, legitSeqsSet);
157 | printf("%"PRIu64"\n", numberOfPairs);
158 | // clean up
159 | if (legitSeqsSet != NULL) {
160 | stSet_destruct(legitSeqsSet);
161 | }
162 | if (maf1SeqSet != NULL) {
163 | stSet_destruct(maf1SeqSet);
164 | stSet_destruct(maf2SeqSet);
165 | }
166 | free(maf);
167 | free(maf2);
168 | free(listOfLegitSequences);
169 | stHash_destruct(sequenceLengthHash);
170 | return(EXIT_SUCCESS);
171 | }
172 |
--------------------------------------------------------------------------------
/mafComparator/src/test.comparatorAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_COMPARATOR_API_H_
26 | #define TEST_COMPARATOR_API_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include "CuTest.h"
32 | #include "common.h"
33 | #include "sonLib.h"
34 | #include "comparatorAPI.h"
35 |
36 | CuSuite* comparatorAPI_TestSuite(void);
37 |
38 | #endif // TEST_COMPARATOR_API_H_
39 |
--------------------------------------------------------------------------------
/mafComparator/src/test.comparatorRandom.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_COMPARATOR_RANDOM_H_
26 | #define TEST_COMPARATOR_RANDOM_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include "CuTest.h"
32 | #include "common.h"
33 | #include "sonLib.h"
34 | #include "comparatorRandom.h"
35 |
36 | CuSuite* comparatorRandom_TestSuite(void);
37 |
38 | #endif // TEST_COMPARATOR_RANDOM_H_
39 |
--------------------------------------------------------------------------------
/mafComparator/src/testRand.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #include
26 | #include
27 | #include
28 | #include "CuTest.h"
29 | #include "comparatorAPI.h"
30 | #include "comparatorRandom.h"
31 |
32 | int main(int argc, char **argv) {
33 | if (argc == 5) {
34 | st_randomSeed(atoi(argv[4]));
35 | } else if (argc == 4) {
36 | st_randomSeed(time(NULL));
37 | } else {
38 | fprintf(stderr, "Usage: %s numberOfSamples n p [optional: randomSeed]\n", argv[0]);
39 | return EXIT_FAILURE;
40 | }
41 | uint64_t numSamples = atoi(argv[1]);
42 | uint64_t n = atoi(argv[2]);
43 | double p = atof(argv[3]);
44 | for (uint64_t i = 0; i < numSamples; ++i) {
45 | printf("%" PRIu64 "\n", rbinom(n, p));
46 | }
47 | return EXIT_SUCCESS;
48 | }
49 |
--------------------------------------------------------------------------------
/mafCoverage/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2009-2013 by
2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
4 | # Mark Diekhans (markd@soe.ucsc.edu)
5 | # ... and other members of the Reconstruction Team of David Haussler's
6 | # lab (BME Dept. UCSC).
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | # THE SOFTWARE.
25 |
26 | include ../inc/common.mk
27 | SHELL:=/bin/bash
28 | bin = ../bin
29 | inc = ../inc
30 | lib = ../lib
31 | PROGS = mafCoverage
32 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a src/allTests.c
33 | extraAPI := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafCoverageAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o
34 | testAPI := test/sharedMaf.o test/common.o ../external/CuTest.a test/mafCoverageAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o
35 | testObjects := test/test.mafCoverageAPI.o
36 | sources := src/mafCoverage.c src/mafCoverage.h
37 |
38 | .PHONY: all clean test buildVersion
39 |
40 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
41 | buildVersion: src/buildVersion.c
42 | src/buildVersion.c: ${sources} ${dependencies}
43 | @python ../lib/createVersionSources.py
44 |
45 | ../lib/%.o: ../lib/%.c ../inc/%.h
46 | cd ../lib/ && make
47 |
48 | ${bin}/mafCoverage: src/mafCoverage.c ${dependencies} ${extraAPI}
49 | mkdir -p $(dir $@)
50 | ${cxx} $< ${extraAPI} -o $@.tmp ${cflags} ${lm}
51 | mv $@.tmp $@
52 | %.o: %.c %.h
53 | ${cxx} -c $< -o $@.tmp ${cflags}
54 | mv $@.tmp $@
55 | %/mafCoverageAPI.o: src/mafCoverageAPI.c src/mafCoverageAPI.h
56 | ${cxx} -c $< -o $@.tmp ${cflags}
57 | mv $@.tmp $@
58 |
59 | test: buildVersion test/allTests test/mafCoverage
60 | ./test/allTests && python2.7 src/test.mafCoverage.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir
61 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a
62 | mkdir -p $(dir $@)
63 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm}
64 | mv $@.tmp $@
65 | test/mafCoverage: src/mafCoverage.c ${dependencies} ${testAPI}
66 | mkdir -p $(dir $@)
67 | ${cxx} $< ${testAPI} -o $@.tmp ${testFlags} ${lm}
68 | mv $@.tmp $@
69 | test/%.o: ${lib}/%.c ${inc}/%.h
70 | mkdir -p $(dir $@)
71 | ${cxx} -c $< -o $@.tmp ${testFlags}
72 | mv $@.tmp $@
73 | test/test.mafCoverageAPI.o: src/test.mafCoverageAPI.c src/test.mafCoverageAPI.h test/mafCoverageAPI.o
74 | mkdir -p $(dir $@)
75 | ${cxx} -c $< -o $@.tmp ${testFlags}
76 | mv $@.tmp $@
77 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a
78 | mkdir -p $(dir $@)
79 | ${cxx} -c $< -o $@.tmp ${testFlags}
80 | mv $@.tmp $@
81 |
82 | clean:
83 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
84 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
85 | ${cxx} -c $< ${cflags}
86 | ar rc CuTest.a CuTest.o
87 | ranlib CuTest.a
88 | rm -f CuTest.o
89 | mv CuTest.a $@
90 |
--------------------------------------------------------------------------------
/mafCoverage/README.md:
--------------------------------------------------------------------------------
1 | # mafCoverage
2 |
3 | December 2013
4 |
5 | ## Author
6 |
7 | [Benedict Paten](https://github.com/benedictpaten/)
8 |
9 | ## Description
10 | mafCoverage is a program that will look through a maf file block by block and check for the coverage of all other sequences onto one user-specified sequence.
11 |
12 | The input need not be transitively closed as mafCoverage
builds a bit array for the user-specied sequence and stores only presence-absense data. Duplications are only counted once.
13 |
14 | ## Installation
15 | 1. Download the package.
16 | 2. cd
into the directory.
17 | 3. Type make
.
18 |
19 | ## Use
20 | mafCoverage ... file this in
21 |
22 | ### Options
23 | ```shell
24 | Usage: mafCoverage [maf file]
25 |
26 | Reports the pairwise (n-)coverage between a specified genome and all other genomes in the given maf, using a tab delimited format.
27 | Output table format has fields: querySpecies targetSpecies lengthOfQueryGenome coverage n-coverages (if specified)
28 | For a pair of genomes A and B, the coverage of B on A is the proportion of sites in A that align to a base in B.
29 | The n-coverage of B on A is the proportion of sites in A that align to n or more sites in B.
30 | Options:
31 | -h, --help show this help message and exit.
32 | -m, --maf path to maf file.
33 | -s, --speciesOrChr species or species.chromosome name, e.g. `hg19' or 'hg19.chr1',
34 | if not specified reports results for every possible species.wildcard at
35 | the end.
36 | -n, --nCoverage report all n-coverages, for 1 <= n <= 128 instead of just
37 | for n=1 (the default).
38 | -i, --identity report coverage of identical bases.
39 | -l, --logLevel Set logging level, either 'CRITICAL'/'INFO'/'DEBUG'.
40 | -a, --ignoreSpecies Do all chromosomes-against-all-chromosomes coverage.
41 | ```
42 |
43 |
44 | ## Example
45 | $ mafCoverage --maf path/to/maf.maf --speciesOrChr hg19 ...
46 | ...
47 |
48 |
--------------------------------------------------------------------------------
/mafCoverage/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include
27 | #include
28 | #include "CuTest.h"
29 | #include "mafCoverageAPI.h"
30 | #include "test.mafCoverageAPI.h"
31 |
32 | CuSuite* coverage_TestSuite(void);
33 |
34 | int coverage_RunAllTests(void) {
35 | CuString *output = CuStringNew();
36 | CuSuite *suite = CuSuiteNew();
37 | CuSuite *coverage_s = coverage_TestSuite();
38 | CuSuiteAddSuite(suite, coverage_s);
39 | CuSuiteRun(suite);
40 | CuSuiteSummary(suite, output);
41 | CuSuiteDetails(suite, output);
42 | // printf("%s\n", output->buffer);
43 | CuStringDelete(output);
44 | int status = (suite->failCount > 0);
45 | free(coverage_s);
46 | CuSuiteDelete(suite);
47 | return status;
48 | }
49 | int main(void) {
50 | return coverage_RunAllTests();
51 | }
52 |
--------------------------------------------------------------------------------
/mafCoverage/src/mafCoverage.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #include
26 | #include // ceil()
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include "common.h"
34 | #include "sharedMaf.h"
35 | #include "mafCoverage.h"
36 | #include "mafCoverageAPI.h"
37 | #include "buildVersion.h"
38 | #include "sonLib.h"
39 |
40 | static char *mafFileName = NULL;
41 | static stSet *speciesOrChromosomeNames = NULL;
42 | static bool nCoverage = 0, identity = 0, ignoreSpecies = 0;
43 |
44 | const char *g_version = "version 0.1 May 2013";
45 | uint64_t getRegionSize(char *seq1, stHash *intervalsHash);
46 |
47 | void version(void) {
48 | fprintf(stderr, "mafCoverage, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date, g_build_git_branch, g_build_git_sha);
49 | }
50 |
51 | void usage(void) {
52 | version();
53 | fprintf(stderr, "Usage: mafCoverage [maf file] \n\n"
54 | "Reports the pairwise (n-)coverage between a specified genome and all other genomes in the given maf, using a tab delimited format.\n"
55 | "Output table format has fields: querySpecies\ttargetSpecies\tlengthOfQueryGenome\tcoverage\tn-coverages (if specified)\n"
56 | "For a pair of genomes A and B, the coverage of B on A is the proportion of sites in A that align to a base in B.\n"
57 | "The n-coverage of B on A is the proportion of sites in A that align to n or more sites in B.\n");
58 | fprintf(stderr, "Options: \n");
59 | usageMessage('h', "help", "show this help message and exit.");
60 | usageMessage('m', "maf", "path to maf file. use - for stdin.");
61 | usageMessage('s', "speciesOrChr",
62 | "species or species.chromosome name, e.g. `hg19' or 'hg19.chr1', if not specified reports results for every possible species."
63 | "wildcard at the end.");
64 | usageMessage('n', "nCoverage", "report all n-coverages, for 1 <= n <= 128 instead of just for n=1 (the default).");
65 | usageMessage('i', "identity", "report coverage of identical bases.");
66 | usageMessage('l', "logLevel", "Set logging level, either 'CRITICAL'/'INFO'/'DEBUG'.");
67 | usageMessage('a', "ignoreSpecies", "Do all chromosomes-against-all-chromosomes coverage.");
68 | exit(EXIT_FAILURE);
69 | }
70 |
71 | static void parseOptions(int argc, char **argv) {
72 | int c;
73 | speciesOrChromosomeNames = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free);
74 | while (1) {
75 | static struct option longOptions[] = { { "help", no_argument, 0, 'h' }, { "maf", required_argument, 0, 'm' }, { "speciesOrChr",
76 | required_argument, 0, 's' }, { "nCoverage", no_argument, 0, 'n' }, { "identity", no_argument, 0, 'i' }, { "logLevel",
77 | required_argument, 0, 'l' }, { "ignoreSpecies", no_argument, 0, 'a' }, { 0, 0, 0, 0 } };
78 | int longIndex = 0;
79 | c = getopt_long(argc, argv, "m:s:hnl:a", longOptions, &longIndex);
80 | if (c == -1)
81 | break;
82 | switch (c) {
83 | case 's':
84 | stSet_insert(speciesOrChromosomeNames, stString_copy(optarg));
85 | break;
86 | case 'm':
87 | mafFileName = stString_copy(optarg);
88 | break;
89 | case 'n':
90 | nCoverage = 1;
91 | break;
92 | case 'i':
93 | identity = 1;
94 | break;
95 | case 'l':
96 | st_setLogLevelFromString(optarg);
97 | break;
98 | case 'h':
99 | usage();
100 | break;
101 | case 'a':
102 | ignoreSpecies = 1;
103 | break;
104 | default:
105 | abort();
106 | }
107 | }
108 | //Check we have the essentials.
109 | if (mafFileName == NULL) {
110 | fprintf(stderr, "Error, specify --maf\n");
111 | usage();
112 | }
113 | // Check there's nothing left over on the command line
114 | if (optind < argc) {
115 | fprintf(stderr, "Unexpected input arguments\n");
116 | usage();
117 | }
118 | }
119 |
120 | int main(int argc, char **argv) {
121 | parseOptions(argc, argv);
122 | //Work out the structure of the chromosomes of the query sequence
123 | stHash *sequenceNamesToSequenceSizes = getMapOfSequenceNamesToSizesFromMaf(mafFileName);
124 | stHashIterator *sequenceNameIt = stHash_getIterator(sequenceNamesToSequenceSizes);
125 | char *sequenceName;
126 | while ((sequenceName = stHash_getNext(sequenceNameIt)) != NULL) {
127 | st_logDebug("Got a sequence name: %s with length %" PRId64 "\n", sequenceName, stIntTuple_get(stHash_search(sequenceNamesToSequenceSizes, sequenceName), 0));
128 | }
129 | stHash_destructIterator(sequenceNameIt);
130 | stList *sequenceNames = stHash_getKeys(sequenceNamesToSequenceSizes);
131 | //If the species/chr name is not specified then replace with all possible species name.
132 | if (stSet_size(speciesOrChromosomeNames) == 0) {
133 | st_logInfo("As no species name was specified, using all possible species names\n");
134 | stSet_destruct(speciesOrChromosomeNames);
135 | speciesOrChromosomeNames = getSpeciesNames(sequenceNames, ignoreSpecies);
136 | } else { //Sanity checks on the input species/chr
137 | stList *names = stSet_getList(speciesOrChromosomeNames);
138 | assert(stList_length(names) == 1);
139 | char *speciesOrChrName = stList_get(names, 0);
140 | stList_destruct(names);
141 | if (ignoreSpecies) {
142 | if (stHash_search(sequenceNamesToSequenceSizes, speciesOrChrName) == NULL) {
143 | st_errAbort("Chromosome name not recognised (perhaps you gave a species name but have specified --ignoreSpecies?): %s\n",
144 | speciesOrChrName);
145 | }
146 | } else {
147 | stSet *speciesNames = getSpeciesNames(sequenceNames, ignoreSpecies);
148 | if (stSet_search(speciesNames, speciesOrChrName) == NULL && stHash_search(sequenceNamesToSequenceSizes, speciesOrChrName)
149 | == NULL) {
150 | st_errAbort("Species or chr name name not recognised: %s\n", speciesOrChrName);
151 | }
152 | stSet_destruct(speciesNames);
153 | }
154 | }
155 | //Print header
156 | nGenomeCoverage_reportHeader(stdout, nCoverage);
157 | //For each of the chosen species calculate species
158 | stSetIterator *speciesOrChrNamesIt = stSet_getIterator(speciesOrChromosomeNames);
159 | char *speciesOrChrName;
160 | while ((speciesOrChrName = stSet_getNext(speciesOrChrNamesIt)) != NULL) {
161 | st_logInfo("Computing the coverages for species/chr: %s\n", speciesOrChrName);
162 | //Build the coverage data structure
163 | NGenomeCoverage *nGC = nGenomeCoverage_construct(sequenceNamesToSequenceSizes, speciesOrChrName, ignoreSpecies);
164 | nGenomeCoverage_populate(nGC, mafFileName, identity);
165 | //Report
166 | nGenomeCoverage_report(nGC, stdout, nCoverage);
167 | //cleanup loop
168 | nGenomeCoverage_destruct(nGC);
169 | }
170 | //Cleanup
171 | stList_destruct(sequenceNames);
172 | stSet_destructIterator(speciesOrChrNamesIt);
173 | stHash_destruct(sequenceNamesToSequenceSizes);
174 | stSet_destruct(speciesOrChromosomeNames);
175 | free(mafFileName);
176 | // while(1);
177 | return EXIT_SUCCESS;
178 | }
179 |
--------------------------------------------------------------------------------
/mafCoverage/src/mafCoverage.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef _MAF_COVERAGE_H_
26 | #define _MAF_COVERAGE_H_
27 |
28 | #include
29 | #include
30 | #include "common.h"
31 | #include "sharedMaf.h"
32 | #include "sonLib.h"
33 |
34 | void version(void);
35 | void usage(void);
36 |
37 | #endif // _MAF_COVERAGE_H_
38 |
--------------------------------------------------------------------------------
/mafCoverage/src/mafCoverageAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #ifndef _MAF_COVERAGE_API_H_
27 | #define _MAF_COVERAGE_API_H_
28 |
29 | #include
30 | #include
31 | #include "common.h"
32 | #include "sharedMaf.h"
33 | #include "sonLib.h"
34 | #include "mafCoverage.h"
35 |
36 | bool is_wild(const char *s);
37 | bool searchMatched(mafLine_t *ml, const char *seq);
38 | bool searchMatched_(const char *target, const char *seq);
39 |
40 | /*
41 | * Iterates through the maf and builds a hash of sequence names to coordinates.
42 | * Lengths are specified by an stIntTuple.
43 | */
44 | stHash *getMapOfSequenceNamesToSizesFromMaf(char *mafFileName);
45 |
46 | /*
47 | * Each sequence name is comprised of two fields separated by a period. The first is the species field, the second is the
48 | * chromosome field. This function returns the set of distinct species names from the set of sequence names. If ignoreSpeciesNames
49 | * is true then just gets returns set of sequence names.
50 | */
51 | stSet *getSpeciesNames(stList *sequenceNames, bool ignoreSpeciesNames);
52 |
53 | /*
54 | * Gets the subset of the hash for all sequences involving the given species.
55 | */
56 | stHash *getMapOfSequenceNamesToSequenceSizesForGivenSpeciesOrChr(stHash *sequenceNamesToSequenceSizes, char *speciesOrChrName, bool ignoreSpeciesNames);
57 |
58 | /*
59 | * Returns the combined length of all the sequences in the set.
60 | */
61 | int64_t getTotalLengthOfSequences(stHash *sequenceSizes);
62 |
63 | /*
64 | * The pairwise coverage object.
65 | */
66 |
67 | typedef struct _pairwiseCoverage PairwiseCoverage;
68 |
69 | PairwiseCoverage *pairwiseCoverage_construct(const stHash *sequenceNamesToSequenceSizeForGivenSpecies);
70 |
71 | void pairwiseCoverage_destruct(PairwiseCoverage *pC);
72 |
73 | /*
74 | * Returns the coverage of the target genome on query species, that is the proportion of bases in the query aligned to one
75 | * or more positions in the target.
76 | */
77 | double pairwiseCoverage_calculateCoverage(PairwiseCoverage *pC);
78 |
79 | /*
80 | * Returns an array of the n-coverages upto but excluding 128, with the index corresponding to n.
81 | */
82 | double *pairwiseCoverage_calculateNCoverages(PairwiseCoverage *pC);
83 |
84 | /*
85 | * Increases the coverage count of a given sequence position.
86 | */
87 | char *pairwiseCoverage_getCoverageArrayForSequence(PairwiseCoverage *pC, char *sequenceName);
88 |
89 | /*
90 | * Returns non-zero if successful, if maximum coverage achieved (so can't be increased) returns 0.
91 | */
92 | bool pairwiseCoverageArray_increase(char *sequenceCoverageArray, int64_t position);
93 |
94 | /*
95 | * An all-against-a-given-species object.
96 | */
97 |
98 | typedef struct _nGenomeCoverage NGenomeCoverage;
99 |
100 | void nGenomeCoverage_destruct(NGenomeCoverage *nGC);
101 |
102 | NGenomeCoverage *nGenomeCoverage_construct(stHash *sequenceSizes, char *speciesName, bool ignoreSpeciesNames);
103 |
104 | /*
105 | * Iterate through a maf file and populate the species coverages.
106 | */
107 | void nGenomeCoverage_populate(NGenomeCoverage *nGC, char *mafFileName, bool requireIdentityForMatch);
108 |
109 | /*
110 | * Reports stats in tab delimited format.
111 | */
112 | void nGenomeCoverage_reportHeader(FILE *out, bool includeNCoverage);
113 | void nGenomeCoverage_report(NGenomeCoverage *nGC, FILE *out, bool includeNCoverage);
114 |
115 | #endif // _MAF_COVERAGE_API_H_
116 |
--------------------------------------------------------------------------------
/mafCoverage/src/test.mafCoverageAPI.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 | #include "CuTest.h"
35 | #include "common.h"
36 | #include "sharedMaf.h"
37 | #include "mafCoverageAPI.h"
38 |
39 | static stHash *sequenceNamesToSequenceSizes = NULL;
40 |
41 | static void setup() {
42 | sequenceNamesToSequenceSizes = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, (void(*)(void *)) stIntTuple_destruct);
43 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("bat.man"), stIntTuple_construct1(50));
44 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("spider.man"), stIntTuple_construct1(1));
45 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("bat.fink"), stIntTuple_construct1(7));
46 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("danger.mouse"), stIntTuple_construct1(12));
47 | stHash_insert(sequenceNamesToSequenceSizes, stString_copy("penfold"), stIntTuple_construct1(12));
48 | }
49 |
50 | static void teardown() {
51 | if (sequenceNamesToSequenceSizes != NULL) {
52 | stHash_destruct(sequenceNamesToSequenceSizes);
53 | }
54 | }
55 |
56 | static void test_is_wild_0(CuTest *testCase) {
57 | CuAssertTrue(testCase, is_wild("hg19*"));
58 | CuAssertTrue(testCase, is_wild("hg19.chr19*"));
59 | CuAssertTrue(testCase, !is_wild("hg19.chr19"));
60 | CuAssertTrue(testCase, !is_wild("hg19.chr1*9"));
61 | CuAssertTrue(testCase, !is_wild("aoeuaoeunstaoeunshtonuts.chrcrhrc.huaoeunsatohunt."));
62 | CuAssertTrue(testCase, is_wild("aoeuaoeunstaoeunshtonuts.chrcrhrc.huaoeunsatohunt.*"));
63 | }
64 | static void test_searchMatched_0(CuTest *testCase) {
65 | mafLine_t *ml = maf_newMafLineFromString("s hg19.chr19 123480 13 + 1234870098734 ACGTACGTACGTA", 1);
66 | CuAssertTrue(testCase, searchMatched(ml, "hg19.chr19"));
67 | CuAssertTrue(testCase, searchMatched(ml, "hg19*"));
68 | CuAssertTrue(testCase, searchMatched(ml, "h*"));
69 | CuAssertTrue(testCase, searchMatched(ml, "*"));
70 | CuAssertTrue(testCase, !searchMatched(ml, "mm9"));
71 | maf_destroyMafLineList(ml);
72 | }
73 |
74 | static void test_getSpeciesNames(CuTest *testCase) {
75 | setup();
76 | stList *sequenceNames = stHash_getKeys(sequenceNamesToSequenceSizes);
77 | stSet *speciesNames = getSpeciesNames(sequenceNames, 0);
78 | CuAssertIntEquals(testCase, 4, stSet_size(speciesNames));
79 | CuAssertTrue(testCase, stSet_search(speciesNames, "bat") != NULL);
80 | CuAssertTrue(testCase, stSet_search(speciesNames, "spider") != NULL);
81 | CuAssertTrue(testCase, stSet_search(speciesNames, "danger") != NULL);
82 | CuAssertTrue(testCase, stSet_search(speciesNames, "penfold") != NULL);
83 | stSet_destruct(speciesNames);
84 | stList_destruct(sequenceNames);
85 | teardown();
86 | }
87 |
88 | static void test_getMapOfSequenceNamesToSequenceSizesForGivenSpecies(CuTest *testCase) {
89 | setup();
90 | stHash *sequenceNamesToSequenceSizeForGivenSpecies = getMapOfSequenceNamesToSequenceSizesForGivenSpeciesOrChr(sequenceNamesToSequenceSizes,
91 | "bat", 0);
92 | CuAssertIntEquals(testCase, 2, stHash_size(sequenceNamesToSequenceSizeForGivenSpecies));
93 | CuAssertIntEquals(testCase, 50, stIntTuple_get(stHash_search(sequenceNamesToSequenceSizeForGivenSpecies, "bat.man"), 0));
94 | CuAssertIntEquals(testCase, 7, stIntTuple_get(stHash_search(sequenceNamesToSequenceSizeForGivenSpecies, "bat.fink"), 0));
95 | stHash_destruct(sequenceNamesToSequenceSizeForGivenSpecies);
96 | teardown();
97 | }
98 |
99 | static void test_getTotalLengthOfSequences(CuTest *testCase) {
100 | setup();
101 | CuAssertIntEquals(testCase, 82, getTotalLengthOfSequences(sequenceNamesToSequenceSizes));
102 | teardown();
103 | }
104 |
105 | static void test_pairwiseCoverage(CuTest *testCase) {
106 | setup();
107 | PairwiseCoverage *pC = pairwiseCoverage_construct(sequenceNamesToSequenceSizes);
108 | //Check coverage is 0 when we start
109 | CuAssertDblEquals(testCase, 0.0, pairwiseCoverage_calculateCoverage(pC), 0.0);
110 | double *nCoverages = pairwiseCoverage_calculateNCoverages(pC);
111 | CuAssertDblEquals(testCase, 1.0, nCoverages[0], 0.0);
112 | for (int64_t i = 1; i <= SCHAR_MAX; i++) {
113 | CuAssertDblEquals(testCase, 0.0, nCoverages[i], 0.0);
114 | }
115 | free(nCoverages);
116 |
117 | //Add some coverage
118 | char *coverageArray = pairwiseCoverage_getCoverageArrayForSequence(pC, "spider.man");
119 | CuAssertTrue(testCase, coverageArray != NULL);
120 | pairwiseCoverageArray_increase(coverageArray, 0);
121 | coverageArray = pairwiseCoverage_getCoverageArrayForSequence(pC, "penfold");
122 | CuAssertTrue(testCase, coverageArray != NULL);
123 | pairwiseCoverageArray_increase(coverageArray, 2);
124 | pairwiseCoverageArray_increase(coverageArray, 2);
125 |
126 | //Now recalculate the coverages
127 | CuAssertDblEquals(testCase, 2.0/82.0, pairwiseCoverage_calculateCoverage(pC), 0.0);
128 | nCoverages = pairwiseCoverage_calculateNCoverages(pC);
129 | CuAssertDblEquals(testCase, 1.0, nCoverages[0], 0.0);
130 | CuAssertDblEquals(testCase, 2.0/82.0, nCoverages[1], 0.0);
131 | CuAssertDblEquals(testCase, 1.0/82.0, nCoverages[2], 0.0);
132 | free(nCoverages);
133 | nCoverages = pairwiseCoverage_calculateNCoverages(pC);
134 | for (int64_t i = 3; i <= SCHAR_MAX; i++) {
135 | CuAssertDblEquals(testCase, 0.0, nCoverages[i], 0.0);
136 | }
137 | free(nCoverages);
138 |
139 | pairwiseCoverage_destruct(pC);
140 | teardown();
141 | }
142 |
143 | static void test_nGenomeCoverage(CuTest *testCase) {
144 | setup();
145 | //Just build a single nGenomeCoverage and check the report functions work as expected.
146 | NGenomeCoverage *nGC = nGenomeCoverage_construct(sequenceNamesToSequenceSizes, "bat", 0);
147 | // nGenomeCoverage_reportHeader(stderr, 1);
148 | // nGenomeCoverage_report(nGC, stderr, 1);
149 | nGenomeCoverage_destruct(nGC);
150 | teardown();
151 | }
152 |
153 | CuSuite* coverage_TestSuite(void) {
154 | CuSuite* suite = CuSuiteNew();
155 | (void) test_is_wild_0;
156 | (void) test_searchMatched_0;
157 | SUITE_ADD_TEST(suite, test_is_wild_0);
158 | SUITE_ADD_TEST(suite, test_searchMatched_0);
159 | SUITE_ADD_TEST(suite, test_getSpeciesNames);
160 | SUITE_ADD_TEST(suite, test_getMapOfSequenceNamesToSequenceSizesForGivenSpecies);
161 | SUITE_ADD_TEST(suite, test_getTotalLengthOfSequences);
162 | SUITE_ADD_TEST(suite, test_pairwiseCoverage);
163 | SUITE_ADD_TEST(suite, test_nGenomeCoverage);
164 | return suite;
165 | }
166 |
--------------------------------------------------------------------------------
/mafCoverage/src/test.mafCoverageAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_MAF_COVERAGE_API_H_
26 | #define TEST_MAF_COVERAGE_API_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include "CuTest.h"
33 | #include "common.h"
34 | #include "sharedMaf.h"
35 | #include "mafCoverageAPI.h"
36 |
37 | CuSuite* coverage_TestSuite(void);
38 |
39 | #endif // TEST_MAF_COVERAGE_API_H_
40 |
--------------------------------------------------------------------------------
/mafDuplicateFilter/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafDuplicateFilter
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o
9 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a test/buildVersion.o
10 | sources = src/mafDuplicateFilter.c
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ../lib/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafDuplicateFilter: src/mafDuplicateFilter.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm
25 | mv $@.tmp $@
26 |
27 | test/mafDuplicateFilter: src/mafDuplicateFilter.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm
30 | mv $@.tmp $@
31 |
32 | %.o: %.c %.h
33 | ${cxx} -c ${cflags} $< -o $@.tmp
34 | mv $@.tmp $@
35 | test/%.o: ${lib}/%.c ${inc}/%.h
36 | mkdir -p $(dir $@)
37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | mkdir -p $(dir $@)
41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
42 | mv $@.tmp $@
43 |
44 | clean:
45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
46 |
47 | test: buildVersion test/mafDuplicateFilter
48 | python2.7 src/test.mafDuplicateFilter.py --verbose && rm -rf test/ && rmdir ./tempTestDir
49 |
50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
51 | ${cxx} -c ${cflags} $<
52 | ar rc CuTest.a CuTest.o
53 | ranlib CuTest.a
54 | rm -f CuTest.o
55 | mv CuTest.a $@
56 |
--------------------------------------------------------------------------------
/mafDuplicateFilter/README.md:
--------------------------------------------------------------------------------
1 | # mafDuplicateFilter
2 |
3 | 16 April 2012
4 |
5 | ## Author
6 | [Dent Earl](https://github.com/dentearl/)
7 |
8 | ## Description
9 | mafDuplicateFilter is a program to filter out duplications from a Multiple Alignment Format (maf) file. This program assumes the sequence name field is formatted as in "speciesName.chromosomeName" using the first period charater, ".", as the delimiter between the species name and the chromosome name. For every block present in the alignment, mBDF looks for any duplicated species within the block. Instead of stripping out all copies of the duplication, the sequence with the highest similarity to the consensus of the block is left, all others are removed. Sequence similarity is computed as a bit score in comparison to the IUPAC-enabled consensus. Ties are resolved by picking the sequence that appears earliest in the file.
10 |
11 | ## Installation
12 | 1. Download the package.
13 | 2. cd
into the directory.
14 | 3. Type make
.
15 |
16 | ## Use
17 | mafDuplicateFilter --maf mafWithDuplicates.maf > pruned.maf
18 |
19 | ### Options
20 | * -h, --help
show this help message and exit.
21 | * -m, --maf
path to maf file.
22 |
23 | ## Example
24 | $ ./mafDuplicateFilter --maf mafWithDuplicates.maf > mafPruned.maf
25 |
26 |
--------------------------------------------------------------------------------
/mafExtractor/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafExtractor
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c src/mafExtractor.h
8 | API = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafExtractorAPI.o src/buildVersion.o
9 | testAPI = test/sharedMaf.o ../external/CuTest.a test/common.o test/mafExtractorAPI.o test/buildVersion.o
10 | testObjects := test/test.mafExtractor.o
11 | sources = src/mafExtractor.c src/mafExtractor.h
12 |
13 | .PHONY: all clean test buildVersion
14 |
15 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
16 | buildVersion: src/buildVersion.c
17 | src/buildVersion.c: ${sources} ${dependencies}
18 | @python ../lib/createVersionSources.py
19 |
20 | ../lib/%.o: ../lib/%.c ../inc/%.h
21 | cd ../lib/ && make
22 |
23 | ${bin}/mafExtractor: src/mafExtractor.c ${dependencies} ${API}
24 | mkdir -p $(dir $@)
25 | ${cxx} ${cflags} -O3 $< ${API} -o $@.tmp -lm
26 | mv $@.tmp $@
27 |
28 | test/mafExtractor: src/mafExtractor.c ${dependencies} ${testAPI}
29 | mkdir -p $(dir $@)
30 | ${cxx} ${cflags} -g -O0 $< ${testAPI} -o $@.tmp
31 | mv $@.tmp $@
32 |
33 | %.o: %.c %.h
34 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp
35 | mv $@.tmp $@
36 |
37 | test/%.o: ${lib}/%.c ${inc}/%.h
38 | mkdir -p $(dir $@)
39 | ${cxx} -c $< -o $@.tmp ${cflags} -g -O0
40 | mv $@.tmp $@
41 | test/%.o: src/%.c src/%.h
42 | mkdir -p $(dir $@)
43 | ${cxx} -c $< -o $@.tmp ${cflags} -g -O0
44 | mv $@.tmp $@
45 |
46 | clean:
47 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ temTestDir/ src/buildVersion.c src/buildVersion.h
48 |
49 | test: buildVersion test/allTests test/mafExtractor
50 | ./test/allTests && python2.7 src/test.mafExtractor.py --verbose && rmdir ./tempTestDir && rm -rf ./test/
51 |
52 | test/allTests: src/allTests.c ${testObjects} ${testAPI}
53 | mkdir -p $(dir $@)
54 | ${cxx} $^ -o $@.tmp ${cflags} -g -O0
55 | mv $@.tmp $@
56 |
57 | test/test.mafExtractor.o: src/test.mafExtractor.c src/test.mafExtractor.h ${testAPI}
58 | mkdir -p $(dir $@)
59 | ${cxx} -c $< -o $@.tmp ${cflags} -I src/ -g -O0
60 | mv $@.tmp $@
61 |
62 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
63 | ${cxx} -c ${cflags} $<
64 | ar rc CuTest.a CuTest.o
65 | ranlib CuTest.a
66 | rm -f CuTest.o
67 | mv CuTest.a $@
68 |
--------------------------------------------------------------------------------
/mafExtractor/README.md:
--------------------------------------------------------------------------------
1 | # mafExtractor
2 |
3 | 14 Feb 2012
4 |
5 | ## Author
6 |
7 | [Dent Earl](https://github.com/dentearl/)
8 |
9 | ## Description
10 | mafExtractor is a program that will look through a maf file for a particular sequence name and region. If a match is found then the block containing the querry will be printed to standard out. By default blocks are trimmed such that only columns that contain the targeted sequence region are included. Use --soft
to include an entire block if any part of the block falls within the targeted region.
11 |
12 | __BE AWARE!__ At present mafExtractor doesn't handle maf lines of type e
, q
, or i
. The s
lines will be properly processed but these other types of lines will be ignored which could lead to inconsistent data and confusion.
13 |
14 | ## Installation
15 | 1. Download the package.
16 | 2. cd
into the directory.
17 | 3. Type make
.
18 |
19 | ## Use
20 | mafExtractor --seq [sequence name (and possibly chr)] --pos [position to search for] [options] < myFile.maf
21 |
22 | ### Options
23 | * -h, --help
show this help message and exit.
24 | * -s, --seq
sequence _name.chr_ e.g. `hg18.chr2'.
25 | * --start
start of the region, inclusive. Must be a positive number.
26 | * --stop
end of the region, inclusive. Must be a positive number.
27 | * --soft
include entire block even if it has gaps or over-hangs. default=false.
28 | * -v, --verbose
turns on verbose output.
29 |
30 | ## Example
31 | $ ./mafBlockExractor --seq hg19.chr20 --start 500 --stop 1000 < example.maf
32 | ##maf version=1
33 |
34 | #a score=0 pctid=99.2
35 | #s hg19.chr20 0 795 + 73767698 GAT...
36 | ...
37 |
38 |
--------------------------------------------------------------------------------
/mafExtractor/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2014 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include
27 | #include
28 | #include "CuTest.h"
29 | #include "mafExtractorAPI.h"
30 | #include "test.mafExtractor.h"
31 |
32 | CuSuite* extractor_TestSuite(void);
33 | int extractor_RunAllTests(void);
34 |
35 | int extractor_RunAllTests(void) {
36 | CuString *output = CuStringNew();
37 | CuSuite *suite = CuSuiteNew();
38 | CuSuite *extractor_s = extractor_TestSuite();
39 | CuSuiteAddSuite(suite, extractor_s);
40 | CuSuiteRun(suite);
41 | CuSuiteSummary(suite, output);
42 | CuSuiteDetails(suite, output);
43 | printf("%s\n", output->buffer);
44 | CuStringDelete(output);
45 | int status = (suite->failCount > 0);
46 | free(extractor_s);
47 | CuSuiteDelete(suite);
48 | return status;
49 | }
50 | int main(void) {
51 | return extractor_RunAllTests();
52 | }
53 |
--------------------------------------------------------------------------------
/mafExtractor/src/mafExtractor.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include "common.h"
32 | #include "sharedMaf.h"
33 | #include "mafExtractor.h"
34 | #include "mafExtractorAPI.h"
35 | #include "buildVersion.h"
36 |
37 | const char *g_version = "version 0.2 September 2012";
38 |
39 | void version(void) {
40 | fprintf(stderr, "mafExtractor, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date,
41 | g_build_git_branch, g_build_git_sha);
42 | }
43 | void usage(void) {
44 | version();
45 | fprintf(stderr, "Usage: mafExtractor --maf [maf file] --seq [sequence name (and possibly chr)] "
46 | "--start [start of region, inclusive, 0 based] --stop [end of region, inclusive] "
47 | "[options]\n\n"
48 | "mafExtractor is a program that will look through a maf file for a\n"
49 | "particular sequence name and region. If a match is found then the block\n"
50 | "containing the querry will be printed to standard out.\n\n");
51 | fprintf(stderr, "Options: \n");
52 | usageMessage('h', "help", "show this help message and exit.");
53 | usageMessage('m', "maf", "path to maf file. use - for stdin.");
54 | usageMessage('s', "seq", "sequence name, e.g. `hg18.chr2'.");
55 | usageMessage('\0', "start", "start of region, inclusive, 0 based.");
56 | usageMessage('\0', "stop", "end of region, inclusive, 0 based.");
57 | usageMessage('\0', "soft", "include entire block even if it has gaps or over-hangs. default=false.");
58 | usageMessage('\0', "first", "only check the first line of each block.");
59 | usageMessage('v', "verbose", "turns on verbose output.");
60 | exit(EXIT_FAILURE);
61 | }
62 | void parseOptions(int argc, char **argv, char *filename, char *seqName, uint64_t *start,
63 | uint64_t *stop, bool *isSoft, bool *checkFirstLineOnly) {
64 | extern int g_debug_flag;
65 | extern int g_verbose_flag;
66 | int c;
67 | bool setSName = false, setStart = false, setStop = false, setMName = false;
68 | int64_t value = 0;
69 | while (1) {
70 | static struct option longOptions[] = {
71 | {"debug", no_argument, 0, 'd'},
72 | {"verbose", no_argument, 0, 'v'},
73 | {"help", no_argument, 0, 'h'},
74 | {"version", no_argument, 0, 0},
75 | {"maf", required_argument, 0, 'm'},
76 | {"seq", required_argument, 0, 's'},
77 | {"start", required_argument, 0, 0},
78 | {"stop", required_argument, 0, 0},
79 | {"soft", no_argument, 0, 0},
80 | {"first", no_argument, 0, 0},
81 | {0, 0, 0, 0}
82 | };
83 | int longIndex = 0;
84 | c = getopt_long(argc, argv, "m:s:h:v:d",
85 | longOptions, &longIndex);
86 | if (c == -1)
87 | break;
88 | switch (c) {
89 | case 0:
90 | if (strcmp("start", longOptions[longIndex].name) == 0) {
91 | value = strtoll(optarg, NULL, 10);
92 | if (value < 0) {
93 | fprintf(stderr, "Error, --start %" PRIi64 " must be nonnegative.\n", value);
94 | usage();
95 | }
96 | *start = value;
97 | setStart = true;
98 | } else if (strcmp("stop", longOptions[longIndex].name) == 0) {
99 | value = strtoll(optarg, NULL, 10);
100 | if (value < 0) {
101 | fprintf(stderr, "Error, --stop %" PRIi64 " must be nonnegative.\n", value);
102 | usage();
103 | }
104 | *stop = value;
105 | setStop = true;
106 | } else if (strcmp("soft", longOptions[longIndex].name) == 0) {
107 | *isSoft = true;
108 | } else if (strcmp("first", longOptions[longIndex].name) == 0) {
109 | *checkFirstLineOnly = true;
110 | } else if (strcmp("version", longOptions[longIndex].name) == 0) {
111 | version();
112 | exit(EXIT_SUCCESS);
113 | }
114 | break;
115 | case 'm':
116 | setMName = true;
117 | strncpy(filename, optarg, kMaxSeqName);
118 | break;
119 | case 's':
120 | setSName = true;
121 | strncpy(seqName, optarg, kMaxSeqName);
122 | break;
123 | case 'v':
124 | g_verbose_flag++;
125 | break;
126 | case 'd':
127 | g_debug_flag = 1;
128 | break;
129 | case 'h':
130 | case '?':
131 | usage();
132 | break;
133 | default:
134 | abort();
135 | }
136 | }
137 | if (!(setMName && setSName && setStart && setStop)) {
138 | fprintf(stderr, "Error, specify --maf --seq --start --stop\n");
139 | usage();
140 | }
141 | if (*start > *stop) {
142 | uint64_t t = *start;
143 | *start = *stop;
144 | *stop = t;
145 | }
146 | // Check there's nothing left over on the command line
147 | if (optind < argc) {
148 | char *errorString = de_malloc(kMaxSeqName);
149 | strcpy(errorString, "Unexpected arguments:");
150 | while (optind < argc) {
151 | strcat(errorString, " ");
152 | strcat(errorString, argv[optind++]);
153 | }
154 | fprintf(stderr, "%s\n", errorString);
155 | usage();
156 | }
157 | }
158 |
159 | int main(int argc, char **argv) {
160 | extern const int kMaxStringLength;
161 | char seq[kMaxSeqName];
162 | char filename[kMaxStringLength];
163 | uint64_t start, stop;
164 | bool isSoft = false;
165 | bool checkFirstLineOnly = false;
166 | parseOptions(argc, argv, filename, seq, &start, &stop, &isSoft, &checkFirstLineOnly);
167 | mafFileApi_t *mfa = maf_newMfa(filename, "r");
168 |
169 | processBody(mfa, seq, start, stop, isSoft, checkFirstLineOnly);
170 | maf_destroyMfa(mfa);
171 |
172 | return EXIT_SUCCESS;
173 | }
174 |
--------------------------------------------------------------------------------
/mafExtractor/src/mafExtractor.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef _BLOCK_EXTRACTOR_H_
26 | #define _BLOCK_EXTRACTOR_H_
27 |
28 | #include
29 | #include
30 | #include "common.h"
31 | #include "sharedMaf.h"
32 |
33 | void version(void);
34 | void usage(void);
35 | void parseOptions(int argc, char **argv, char *filename, char *seqName, uint64_t *start,
36 | uint64_t *stop, bool *isSoft, bool *checkFirstLineOnly);
37 |
38 | #endif // _BLOCK_EXTRACTOR_H_
39 |
--------------------------------------------------------------------------------
/mafExtractor/src/mafExtractorAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2014 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #ifndef _BLOCK_EXTRACTOR_API_H_
27 | #define _BLOCK_EXTRACTOR_API_H_
28 |
29 | #include
30 | #include
31 | #include "common.h"
32 | #include "sharedMaf.h"
33 |
34 | bool checkRegion(uint64_t targetStart, uint64_t targetStop, uint64_t lineStart,
35 | uint64_t length, uint64_t sourceLength, char strand);
36 | bool searchMatched(mafLine_t *ml, const char *seq, uint64_t start, uint64_t stop);
37 | void printHeader(void);
38 | uint64_t getTargetColumns(bool **targetColumns, uint64_t *n, mafBlock_t *b, const char *seq,
39 | uint64_t start, uint64_t stop);
40 | void printTargetColumns(bool *targetColumns, uint64_t n);
41 | int64_t **createOffsets(uint64_t n);
42 | void destroyOffsets(int64_t **offs, uint64_t n);
43 | mafBlock_t *processBlockForSplice(mafBlock_t *b, uint64_t blockNumber, const char *seq,
44 | uint64_t start, uint64_t stop, bool store);
45 | mafBlock_t *spliceBlock(mafBlock_t *mb, uint64_t l, uint64_t r, int64_t **offsetArray);
46 | void checkBlock(mafBlock_t *b, uint64_t blockNumber, const char *seq, uint64_t start,
47 | uint64_t stop, bool *printedHeader, bool isSoft, bool checkFirstLineOnly);
48 | void processBody(mafFileApi_t *mfa, char *seq, uint64_t start, uint64_t stop, bool isSoft,
49 | bool checkFirstLineOnly);
50 | uint64_t sumBool(bool *array, uint64_t n);
51 | void printOffsetArray(int64_t **offsetArray, uint64_t n);
52 |
53 | #endif // _BLOCK_EXTRACTOR_API_H_
54 |
--------------------------------------------------------------------------------
/mafExtractor/src/test.mafExtractor.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2014 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_EXTRACTOR_API_H_
26 | #define TEST_EXTRACTOR_API_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include "CuTest.h"
33 | #include "common.h"
34 | #include "sharedMaf.h"
35 | #include "mafExtractorAPI.h"
36 |
37 | CuSuite* extractor_TestSuite(void);
38 |
39 | #endif // TEST_EXTRACTOR_API_H_
40 |
--------------------------------------------------------------------------------
/mafFilter/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafFilter
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o
9 | testObjects = test/common.o test/sharedMaf.o ../external/CuTest.a test/buildVersion.o
10 | sources = src/mafFilter.c
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ../lib/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafFilter: src/mafFilter.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm
25 | mv $@.tmp $@
26 |
27 | test/mafFilter: src/mafFilter.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm
30 | mv $@.tmp $@
31 |
32 | %.o: %.c %.h
33 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp
34 | mv $@.tmp $@
35 | test/%.o: ${lib}/%.c ${inc}/%.h
36 | mkdir -p $(dir $@)
37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | mkdir -p $(dir $@)
41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
42 | mv $@.tmp $@
43 |
44 | clean:
45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
46 |
47 | test: buildVersion test/mafFilter
48 | python2.7 src/test.mafFilter.py --verbose && rm -rf test/ && rmdir ./tempTestDir
49 |
50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
51 | ${cxx} -c ${cflags} $<
52 | ar rc CuTest.a CuTest.o
53 | ranlib CuTest.a
54 | rm -f CuTest.o
55 | mv CuTest.a $@
56 |
--------------------------------------------------------------------------------
/mafFilter/README.md:
--------------------------------------------------------------------------------
1 | # mafFilter
2 |
3 | 28 May 2012
4 |
5 | ## Author
6 |
7 | [Dent Earl](https://github.com/dentearl/)
8 |
9 | ## Description
10 | mafFilter is a program that will look through a maf file block by block and excise out sequence lines that match criteria established by the user on the command line. For example one can filter out all sequence lines that start with 'hg18' using --exclude
or filter for sequence lines starting with only 'hg19', 'mm9' and 'rn4' using --include
.
11 |
12 | ## Installation
13 | 1. Download the package.
14 | 2. cd
into the directory.
15 | 3. Type make
.
16 |
17 | ## Use
18 | mafFilter --maf [path to maf] [options]
19 |
20 | ### Options
21 | * -h, --help
show this help message and exit.
22 | * -m, --maf
path to maf file.
23 | * -i, --includeSeq
comma separated list of sequence names to include
24 | * -e, --excludeSeq
comma separated list of sequence names to exclude
25 | * -g, --noDegreeGT
filter out all blocks with degree greater than this value.
26 | * -l, --noDegreeLT
filter out all blocks with degree less than this value.
27 | * -v, --verbose
turns on verbose output.
28 |
29 | ## Example
30 | $ ./mafFilter --maf example.maf --include hg18,mm9,rn4,banana
31 | ##maf version=1
32 | a score=0
33 | s banana.chr1 0 10 + 1000000 ACGTACGTAC
34 | ...
35 |
36 |
37 |
--------------------------------------------------------------------------------
/mafPairCoverage/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2009-2013 by
2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
4 | # Mark Diekhans (markd@soe.ucsc.edu)
5 | # ... and other members of the Reconstruction Team of David Haussler's
6 | # lab (BME Dept. UCSC).
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | # THE SOFTWARE.
25 |
26 | include ../inc/common.mk
27 | SHELL:=/bin/bash
28 | bin = ../bin
29 | inc = ../inc
30 | lib = ../lib
31 | PROGS = mafPairCoverage
32 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a src/allTests.c
33 | extraAPI := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafPairCoverageAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o
34 | testAPI := test/sharedMaf.o test/common.o ../external/CuTest.a test/mafPairCoverageAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o
35 | testObjects := test/test.mafPairCoverageAPI.o
36 | sources := src/mafPairCoverage.c src/mafPairCoverage.h
37 |
38 | .PHONY: all clean test buildVersion
39 |
40 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
41 | buildVersion: src/buildVersion.c
42 | src/buildVersion.c: ${sources} ${dependencies}
43 | @python ../lib/createVersionSources.py
44 |
45 | ../lib/%.o: ../lib/%.c ../inc/%.h
46 | cd ../lib/ && make
47 |
48 | ${bin}/mafPairCoverage: src/mafPairCoverage.c ${dependencies} ${extraAPI}
49 | mkdir -p $(dir $@)
50 | ${cxx} $< ${extraAPI} -o $@.tmp ${cflags} -lm
51 | mv $@.tmp $@
52 | %.o: %.c %.h
53 | ${cxx} -c $< -o $@.tmp ${cflags}
54 | mv $@.tmp $@
55 | %/mafPairCoverageAPI.o: src/mafPairCoverageAPI.c src/mafPairCoverageAPI.h
56 | ${cxx} -c $< -o $@.tmp ${cflags}
57 | mv $@.tmp $@
58 |
59 | test: buildVersion test/allTests test/mafPairCoverage
60 | ./test/allTests && python2.7 src/test.mafPairCoverage.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir
61 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a
62 | mkdir -p $(dir $@)
63 | ${cxx} $^ -o $@.tmp ${testFlags} -lm
64 | mv $@.tmp $@
65 | test/mafPairCoverage: src/mafPairCoverage.c ${dependencies} ${testAPI}
66 | mkdir -p $(dir $@)
67 | ${cxx} $< ${testAPI} -o $@.tmp ${testFlags} -lm
68 | mv $@.tmp $@
69 | test/%.o: ${lib}/%.c ${inc}/%.h
70 | mkdir -p $(dir $@)
71 | ${cxx} -c $< -o $@.tmp ${testFlags}
72 | mv $@.tmp $@
73 | test/test.mafPairCoverageAPI.o: src/test.mafPairCoverageAPI.c src/test.mafPairCoverageAPI.h test/mafPairCoverageAPI.o
74 | mkdir -p $(dir $@)
75 | ${cxx} -c $< -o $@.tmp ${testFlags}
76 | mv $@.tmp $@
77 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a
78 | mkdir -p $(dir $@)
79 | ${cxx} -c $< -o $@.tmp ${testFlags}
80 | mv $@.tmp $@
81 |
82 | clean:
83 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
84 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
85 | ${cxx} -c $< ${cflags}
86 | ar rc CuTest.a CuTest.o
87 | ranlib CuTest.a
88 | rm -f CuTest.o
89 | mv CuTest.a $@
90 |
--------------------------------------------------------------------------------
/mafPairCoverage/README.md:
--------------------------------------------------------------------------------
1 | # mafPairCoverage
2 |
3 | 7 May 2013
4 |
5 | ## Author
6 |
7 | [Dent Earl](https://github.com/dentearl/)
8 |
9 | ## Description
10 | mafPairCoverage is a program that will look through a maf file block by block and check for a particular pair of sequences (allowing input sequence to end in wildcard *) and count the number of aligned positions where the two sequences have residues aligned. Coverage of genome A onto genome B is then symmetrically calculated as the number of aligned positions divided by the total size of genome B.
11 |
12 | __BE AWARE!__ The input maf should be transitively closed (if you are unsure you can use the tool mafTransitiveClosure to transitively close the alignment) to insure that the coverage numbers are accurate.
13 |
14 | ## Installation
15 | 1. Download the package.
16 | 2. cd
into the directory.
17 | 3. Type make
.
18 |
19 | ## Use
20 | mafPairCoverage --seq1 [sequence name] --seq2 [sequence name] --maf myFile.maf [options]
21 |
22 | ### Options
23 | * -h, --help
show this help message and exit.
24 | * --seq1
sequence _name.chr_ e.g. `hg19*'. May end in * to indicate wildcard.
25 | * --seq2
sequence _name.chr_ e.g. `mm9.chr2'. May end in * to indicate wildcard.
26 | * --maf
input maf file.
27 | * --bed
path to 3 column bedfile that will define regions of interest in output.
28 | * --bin_start
starting position (inclusive) of the sub-region to analyze.
29 | * --bin_end
ending position (inclusive) of the sub-region to analyze.
30 | * --bin_length
the length of each bin within the region. default=1000
31 | * -v, --verbose
turns on verbose output.
32 |
33 | ## Example
34 | $ ./mafPairCoverage --seq1 hg19* --seq2 mm9* --maf example.maf
35 | ...
36 |
37 |
--------------------------------------------------------------------------------
/mafPairCoverage/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include
27 | #include
28 | #include "CuTest.h"
29 | #include "mafPairCoverageAPI.h"
30 | #include "test.mafPairCoverageAPI.h"
31 |
32 | CuSuite* pairCoverage_TestSuite(void);
33 |
34 | int pairCoverage_RunAllTests(void) {
35 | CuString *output = CuStringNew();
36 | CuSuite *suite = CuSuiteNew();
37 | CuSuite *pairCoverage_s = pairCoverage_TestSuite();
38 | CuSuiteAddSuite(suite, pairCoverage_s);
39 | CuSuiteRun(suite);
40 | CuSuiteSummary(suite, output);
41 | CuSuiteDetails(suite, output);
42 | printf("%s\n", output->buffer);
43 | CuStringDelete(output);
44 | int status = (suite->failCount > 0);
45 | free(pairCoverage_s);
46 | CuSuiteDelete(suite);
47 | return status;
48 | }
49 | int main(void) {
50 | return pairCoverage_RunAllTests();
51 | }
52 |
--------------------------------------------------------------------------------
/mafPairCoverage/src/mafPairCoverage.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef _PAIR_COVERAGE_H_
26 | #define _PAIR_COVERAGE_H_
27 |
28 | #include
29 | #include
30 | #include "common.h"
31 | #include "sharedMaf.h"
32 | #include "sonLib.h"
33 |
34 | void version(void);
35 | void usage(void);
36 | void parseOptions(int argc, char **argv, char *filename, char *seq1Name,
37 | char *seq2Name, stHash *intervalsHashn, int64_t *bin_start,
38 | int64_t *bin_end, int64_t *bin_length);
39 | void reportResults(char *seq1, char *seq2, stHash *seq1Hash, stHash *seq2Hash,
40 | uint64_t *alignedPositions);
41 | void reportResultsRegion(char *seq1, char *seq2, stHash *seq1Hash,
42 | stHash *seq2Hash, uint64_t *alignedPositions,
43 | stHash *intervalsHash);
44 |
45 | #endif // _PAIR_COVERAGE_H_
46 |
--------------------------------------------------------------------------------
/mafPairCoverage/src/mafPairCoverageAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2011-2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #ifndef _PAIR_COVERAGE_API_H_
27 | #define _PAIR_COVERAGE_API_H_
28 |
29 | #include
30 | #include
31 | #include "common.h"
32 | #include "sharedMaf.h"
33 | #include "sonLib.h"
34 | #include "mafPairCoverage.h"
35 |
36 | typedef struct mafCoverageCount mafCoverageCount_t;
37 | typedef struct _BinContainer BinContainer;
38 |
39 | mafCoverageCount_t* createMafCoverageCount(void);
40 | uint64_t mafCoverageCount_getSourceLength(mafCoverageCount_t *mcct);
41 | uint64_t mafCoverageCount_getObservedLength(mafCoverageCount_t *mcct);
42 | uint64_t mafCoverageCount_getCount(mafCoverageCount_t *mcct);
43 | uint64_t mafCoverageCount_getInRegion(mafCoverageCount_t *mcct);
44 | uint64_t mafCoverageCount_getOutRegion(mafCoverageCount_t *mcct);
45 | void mafCoverageCount_setSourceLength(mafCoverageCount_t *mcct, uint64_t n);
46 | void mafCoverageCount_setCount(mafCoverageCount_t *mcct, uint64_t n);
47 | void mafCoverageCount_setInRegion(mafCoverageCount_t *mcct, uint64_t n);
48 | void mafCoverageCount_setOutRegion(mafCoverageCount_t *mcct, uint64_t n);
49 | int64_t binContainer_getBinStart(BinContainer *bc);
50 | int64_t binContainer_getBinEnd(BinContainer *bc);
51 | int64_t binContainer_getBinLength(BinContainer *bc);
52 | int64_t binContainer_getNumBins(BinContainer *bc);
53 | uint64_t* binContainer_getBins(BinContainer *bc);
54 | uint64_t binContainer_accessBin(BinContainer *bc, int64_t i);
55 | void binContainer_setBinStart(BinContainer *bc, int64_t i);
56 | void binContainer_setBinEnd(BinContainer *bc, int64_t i);
57 | void binContainer_setBinLength(BinContainer *bc, int64_t);
58 | void binContainer_incrementPosition(BinContainer *bc, int64_t i);
59 | void binContainer_incrementBin(BinContainer *bc, int64_t i);
60 | void binContainer_setBinValue(BinContainer *bc, int64_t i, int64_t v);
61 | bool is_wild(const char *s);
62 | bool inInterval(stHash *intervalsHash, char *seq, uint64_t pos);
63 | bool searchMatched(mafLine_t *ml, const char *seq);
64 | bool searchMatched_(const char *target, const char *seq);
65 | void compareLines(mafLine_t *ml1, mafLine_t *ml2, stHash *seq1Hash,
66 | stHash *seq2Hash, uint64_t *alignedPositions,
67 | stHash *intervalsHash, BinContainer *bc);
68 | void wrapDestroyMafLine(void *p);
69 | void checkBlock(mafBlock_t *b, const char *seq1, const char *seq2,
70 | stHash *seq1Hash, stHash *seq2Hash, uint64_t *alignedPositions,
71 | stHash *intervalsHash, BinContainer *bc);
72 | void processBody(mafFileApi_t *mfa, char *seq1, char *seq2, stHash *seq1Hash,
73 | stHash *seq2Hash,
74 | uint64_t *alignedPositions, stHash *intervalsHash,
75 | BinContainer *bc);
76 | void parseBedFile(const char *filepath, stHash *intervalsHash);
77 | void reportResultsBins(char *seq1, char *seq2, BinContainer *bin_container);
78 | BinContainer* binContainer_init(void);
79 | BinContainer* binContainer_construct(int64_t bin_start, int64_t bin_end,
80 | int64_t bin_length);
81 | void binContainer_destruct(BinContainer *bc);
82 |
83 | #endif // _PAIR_COVERAGE_API_H_
84 |
--------------------------------------------------------------------------------
/mafPairCoverage/src/test.mafPairCoverageAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_PAIR_COVERAGE_API_H_
26 | #define TEST_PAIR_COVERAGE_API_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include "CuTest.h"
33 | #include "common.h"
34 | #include "sharedMaf.h"
35 | #include "mafPairCoverageAPI.h"
36 |
37 | CuSuite* extractor_TestSuite(void);
38 |
39 | #endif // TEST_PAIR_COVERAGE_API_H_
40 |
--------------------------------------------------------------------------------
/mafPositionFinder/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafPositionFinder
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o
9 | testObjects = test/common.o test/sharedMaf.o ../external/CuTest.a test/buildVersion.o
10 | sources = src/mafPositionFinder.c
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ../lib/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafPositionFinder: src/mafPositionFinder.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm
25 | mv $@.tmp $@
26 |
27 | test/mafPositionFinder: src/mafPositionFinder.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm
30 | mv $@.tmp $@
31 |
32 | %.o: %.c %.h
33 | ${cxx} -O3 -c ${args} $< -o $@.tmp -lm
34 | mv $@.tmp $@
35 | test/%.o: ${lib}/%.c ${inc}/%.h
36 | mkdir -p $(dir $@)
37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp -lm
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | mkdir -p $(dir $@)
41 | ${cxx} -c $< -o $@.tmp ${cflags} -g -O0 -lm
42 | mv $@.tmp $@
43 |
44 | clean:
45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
46 |
47 | test: buildVersion test/mafPositionFinder
48 | python2.7 src/test.mafPositionFinder.py --verbose && rm -rf test/ && rmdir ./tempTestDir
49 |
50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
51 | ${cxx} -c ${cflags} $<
52 | ar rc CuTest.a CuTest.o
53 | ranlib CuTest.a
54 | rm -f CuTest.o
55 | mv CuTest.a $@
56 |
--------------------------------------------------------------------------------
/mafPositionFinder/README.md:
--------------------------------------------------------------------------------
1 | # mafPositionFinder
2 |
3 | 10 Feb 2012
4 |
5 | ## Author
6 |
7 | [Dent Earl](https://github.com/dentearl/)
8 |
9 | ## Description
10 | mafPositionFinder is a program that will look through a maf file for a particular sequence name and location. If a match is found the line number and first few fields are returned. If no match is found nothing is returned.
11 |
12 | ## Installation
13 | 1. Download the package.
14 | 2. cd
into the directory.
15 | 3. Type make
.
16 |
17 | ## Use
18 | mafPositionFinder --maf [path to maf] --seq [sequence name (and possibly chr)] --pos [position to search for, zero based coordinates] [options]
19 |
20 | ### Options
21 | * -h, --help
show this help message and exit.
22 | * -m, --maf
path to maf file.
23 | * -s, --seq
sequence _name.chr_ e.g. `hg18.chr2'.
24 | * -p, --pos
position along the chromosome you are searching for. Must be a non negative number.
25 | * -v, --verbose
turns on verbose output.
26 |
27 | ## Example
28 | $ ./mafPositionFinder --maf example.maf --seq apple.chr20 --pos 500
29 | block 3, line 4: s apple.chr20 0 795 + 73767698 ...AATTG ->G<- ACCCG...
30 |
31 | We see from this example that position 500 of apple.chr20 is located at line 4 of example.maf, is part of a block that starts at line 3, and that the base at this position is G flanked by AATTG on the left and ACCCG on the right.
32 |
--------------------------------------------------------------------------------
/mafRowOrderer/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafRowOrderer
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o
9 | testObjects = test/common.o test/sharedMaf.o ../external/CuTest.a test/buildVersion.o
10 | sources = src/mafRowOrderer.c
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ../lib/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafRowOrderer: src/mafRowOrderer.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm
25 | mv $@.tmp $@
26 |
27 | test/mafRowOrderer: src/mafRowOrderer.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm
30 | mv $@.tmp $@
31 |
32 | %.o: %.c %.h
33 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp
34 | mv $@.tmp $@
35 | test/%.o: ${lib}/%.c ${inc}/%.h
36 | mkdir -p $(dir $@)
37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | mkdir -p $(dir $@)
41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
42 | mv $@.tmp $@
43 |
44 | clean:
45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
46 |
47 | test: buildVersion test/mafRowOrderer
48 | python2.7 src/test.mafRowOrderer.py --verbose && rm -rf test/ && rmdir ./tempTestDir
49 |
50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
51 | ${cxx} -c ${cflags} $<
52 | ar rc CuTest.a CuTest.o
53 | ranlib CuTest.a
54 | rm -f CuTest.o
55 | mv CuTest.a $@
56 |
--------------------------------------------------------------------------------
/mafRowOrderer/README.md:
--------------------------------------------------------------------------------
1 | # mafRowOrderer
2 |
3 | 4 October 2012
4 |
5 | ## Author
6 |
7 | [Dent Earl](https://github.com/dentearl/)
8 |
9 | ## Description
10 | mafRowOrderer is a program that will look through a maf file block by block and order the maf lines within a block according to the order provided. Species not in the established ordered are excised. Comments are excised. Non sequnece lines ('^s') are excised.
11 |
12 | ## Installation
13 | 1. Download the package.
14 | 2. cd
into the directory.
15 | 3. Type make
.
16 |
17 | ## Use
18 | mafRowOrderer --maf [path to maf] --order [comma separated list of species]
19 |
20 | ### Options
21 | * -h, --help
show this help message and exit.
22 | * -m, --maf
path to maf file.
23 | * --order
comma separated list of species names
24 | * -v, --verbose
turns on verbose output.
25 |
26 | ## Example
27 | $ ./mafRowOrderer --maf example.maf --order hg18,mm9,rn4,banana
28 | ##maf version=1
29 | a score=0
30 | s banana.chr1 0 10 + 1000000 ACGTACGTAC
31 | ...
32 |
33 |
34 |
--------------------------------------------------------------------------------
/mafSorter/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafSorter
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o
9 | testObjects = ./test/common.o ./test/sharedMaf.o ../external/CuTest.a test/buildVersion.o
10 | sources = src/mafSorter.c
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ${lib}/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafSorter: src/mafSorter.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm
25 | mv $@.tmp $@
26 |
27 | test/mafSorter: src/mafSorter.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm
30 | mv $@.tmp $@
31 |
32 | %.o: %.c %.h
33 | ${cxx} -O3 -c ${cflags} $< -o $@.tmp
34 | mv $@.tmp $@
35 | test/%.o: ${lib}/%.c ${inc}/%.h
36 | mkdir -p $(dir $@)
37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | mkdir -p $(dir $@)
41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
42 | mv $@.tmp $@
43 |
44 | clean:
45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
46 |
47 | test: buildVersion test/mafSorter
48 | python2.7 src/test.mafSorter.py --verbose && rm -rf test/ && rmdir ./tempTestDir
49 |
50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
51 | ${cxx} -c ${cflags} $<
52 | ar rc CuTest.a CuTest.o
53 | ranlib CuTest.a
54 | rm -f CuTest.o
55 | mv CuTest.a $@
56 |
--------------------------------------------------------------------------------
/mafSorter/README.md:
--------------------------------------------------------------------------------
1 | # mafSorter
2 |
3 | 19 March 2012
4 |
5 | ## Author
6 | [Dent Earl](https://github.com/dentearl/)
7 |
8 | ## Description
9 | mafSorter is a program that will sort the blocks of a maf in ascending order of the sequence start field of the specified sequence name. Blocks that do not contain the specified sequence will be output at the start of the maf in the order they appear in the input, followed by the sorted blocks. Blocks where the target sequence appears twice will be tagged with the largest start value.
10 |
11 | ## Installation
12 | 1. Download the package.
13 | 2. cd
into the directory.
14 | 3. Type make
.
15 |
16 | ## Use
17 | mafSorter --seq [sequence name (and possibly chr)] [options] < myFile.maf
18 |
19 | ### Options
20 | * -h, --help
show this help message and exit.
21 | * -s, --seq
sequence _name.chr_ e.g. `hg18.chr2'.
22 | * -v, --verbose
turns on verbose output.
23 |
24 | ## Example
25 | $ ./mafSorter --seq hg19.chr20 < example.maf
26 | ##maf version=1
27 |
28 | #a score=0 pctid=99.2
29 | #s hg19.chr20 0 795 + 73767698 GAT...
30 | ...
31 |
32 |
--------------------------------------------------------------------------------
/mafStats/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafStats
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/test.mafStats.o ${sonLibPath}/sonLib.a src/buildVersion.o
9 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a src/test.mafStats.o ${sonLibPath}/sonLib.a test/buildVersion.o
10 | sources = src/mafStats.c src/mafStats.h
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ../lib/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafStats: src/mafStats.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} $< ${objects} -o $@.tmp ${cflags} ${lm}
25 | mv $@.tmp $@
26 |
27 | test/mafStats: src/mafStats.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} $< src/allTests.c ${testObjects} -o $@.tmp ${testFlags} ${lm}
30 | mv $@.tmp $@
31 | %.o: %.c %.h
32 | ${cxx} -c $< -o $@.tmp ${cflags}
33 | mv $@.tmp $@
34 | test/%.o: ${lib}/%.c ${inc}/%.h
35 | @echo apple
36 | mkdir -p $(dir $@)
37 | ${cxx} -c $< -o $@.tmp ${testFlags} ${lm}
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | @echo orange
41 | mkdir -p $(dir $@)
42 | ${cxx} -c $< -o $@.tmp ${testFlags}
43 | mv $@.tmp $@
44 |
45 | clean:
46 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
47 |
48 | test: buildVersion test/allTests
49 | test/allTests && rm -rf ./test/
50 |
51 | test/allTests: src/allTests.c ${testObjects}
52 | mkdir -p $(dir $@)
53 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm}
54 | mv $@.tmp $@
55 |
56 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
57 | ${cxx} -c $< ${cflags}
58 | ar rc CuTest.a CuTest.o
59 | ranlib CuTest.a
60 | rm -f CuTest.o
61 | mv CuTest.a $@
62 |
--------------------------------------------------------------------------------
/mafStats/README.md:
--------------------------------------------------------------------------------
1 | # mafStats
2 | 5 July 2012
3 |
4 | ## Author
5 | [Dent Earl](https://github.com/dentearl/)
6 |
7 | ## Description
8 | A program to read MAF file and report back statistics about the contents.
9 |
10 | ## Installation
11 | 1. Download the package.
12 | 2. cd
into the directory.
13 | 3. Type make
.
14 |
15 | ## Use
16 | mafStats --maf mafFile.maf [options]
17 |
18 | ### Options
19 | * -h, --help
show this help message and exit.
20 | * -m, --maf
path to maf file.
21 |
22 | ### Example
23 | $ mafStats --maf smallDemo.maf
24 | smallDemo.maf
25 | ------------------------------
26 | File size: 66.13 MB
27 | Lines: 212986
28 | Header lines: 5
29 | s lines: 144592
30 | e lines: 0
31 | i lines: 0
32 | q lines: 0
33 | Blank lines: 68388
34 | Comment lines: 1
35 | Sequence chars: 49181016 ( 77.65%)
36 | Gap chars: 14154166 ( 22.35%)
37 | Blocks: 34194
38 | Ave block area: 1852.23
39 | Max block area: 37840
40 | Ave seq field length: 340.14
41 | Max seq field length: 7568
42 | Ave seq count in block: 4.23
43 | Max seq count in block: 5
44 | 10 unique sequences, ordered by # bases present:
45 | simHuman.chr1: 5311230 ( 10.80%)
46 | simHuman.chr0: 5225141 ( 10.62%)
47 | simDog.chr1: 5048843 ( 10.27%)
48 | simDog.chr0: 5023883 ( 10.22%)
49 | simCow.chr1: 4989381 ( 10.14%)
50 | simCow.chr0: 4979544 ( 10.12%)
51 | simMouse.chr1: 4671434 ( 9.50%)
52 | simMouse.chr0: 4654920 ( 9.46%)
53 | simRat.chr1: 4654607 ( 9.46%)
54 | simRat.chr0: 4622033 ( 9.40%)
55 | total: 49181016 (100.00%)
56 |
57 |
--------------------------------------------------------------------------------
/mafStats/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include
27 | #include
28 | #include "CuTest.h"
29 |
30 | CuSuite* mafStats_TestSuite(void);
31 |
32 | int mafStats_RunAllTests(void) {
33 | return 0;
34 | }
35 | int main(void) {
36 | return mafStats_RunAllTests();
37 | }
38 |
--------------------------------------------------------------------------------
/mafStats/src/mafStats.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef _MAFSTATS_H_
26 | #define _MAFSTATS_H_
27 |
28 | #include
29 | #include
30 |
31 | typedef struct stats {
32 | char *filename;
33 | uint64_t numLines;
34 | uint64_t numHeaderLines;
35 | uint64_t numSeqLines;
36 | uint64_t numBlocks;
37 | uint64_t numELines;
38 | uint64_t numILines;
39 | uint64_t numQLines;
40 | uint64_t numCommentLines;
41 | uint64_t numGapCharacters;
42 | uint64_t numSeqCharacters;
43 | uint64_t numColumns;
44 | uint64_t sumSeqField;
45 | uint64_t maxSeqField;
46 | uint64_t sumNumSpeciesInBlock;
47 | uint64_t maxNumSpeciesInBlock;
48 | uint64_t sumBlockArea;
49 | uint64_t maxBlockArea;
50 | stHash *seqHash; // keyed with names, valued with uint64_t count of bases present
51 | } stats_t;
52 | typedef struct seq {
53 | char *name;
54 | uint64_t count;
55 | } seq_t;
56 |
57 | void version(void);
58 | void usage(void);
59 | void parseOptions(int argc, char **argv, char **filename);
60 | stats_t* stats_create(char *filename);
61 | void stats_destroy(stats_t *stats);
62 | void countCharacters(char *seq, stats_t *stats);
63 | void processBlock(mafBlock_t *mb, stats_t *stats);
64 | void recordStats(mafFileApi_t *mfa, stats_t *stats);
65 | void readFilesize(struct stat *fileStat, char **filesizeString);
66 | int cmp_seq(const void *a, const void *b);
67 | void reportHash(stHash *hash);
68 | void reportStats(stats_t *stats);
69 |
70 | #endif // _MAFSTATS_H_
71 |
--------------------------------------------------------------------------------
/mafStats/src/test.mafStats.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include "CuTest.h"
27 |
--------------------------------------------------------------------------------
/mafStats/src/test.mafStats.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComparativeGenomicsToolkit/mafTools/259e5b47fa2ee17ff5ad1bba9cebf2992cbb7228/mafStats/src/test.mafStats.h
--------------------------------------------------------------------------------
/mafStrander/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafStrander
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c
8 | objects = ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/buildVersion.o
9 | testObjects := test/sharedMaf.o test/common.o ../external/CuTest.a test/buildVersion.o
10 | sources = src/mafStrander.c
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ../lib/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafStrander: src/mafStrander.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} ${cflags} -O3 $< ${objects} -o $@.tmp -lm
25 | mv $@.tmp $@
26 |
27 | test/mafStrander: src/mafStrander.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} ${cflags} -g -O0 $< ${testObjects} -o $@.tmp -lm
30 | mv $@.tmp $@
31 |
32 | %.o: %.c %.h
33 | ${cxx} -c ${cflags} $< -o $@.tmp
34 | mv $@.tmp $@
35 | test/%.o: ${lib}/%.c ${inc}/%.h
36 | mkdir -p $(dir $@)
37 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
38 | mv $@.tmp $@
39 | test/%.o: src/%.c src/%.h
40 | mkdir -p $(dir $@)
41 | ${cxx} -g -O0 -c ${cflags} $< -o $@.tmp
42 | mv $@.tmp $@
43 |
44 | clean:
45 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
46 |
47 | test: buildVersion test/mafStrander
48 | python2.7 src/test.mafStrander.py --verbose && rm -rf test/ && rmdir ./tempTestDir
49 |
50 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
51 | ${cxx} -c ${cflags} $<
52 | ar rc CuTest.a CuTest.o
53 | ranlib CuTest.a
54 | rm -f CuTest.o
55 | mv CuTest.a $@
56 |
--------------------------------------------------------------------------------
/mafStrander/README.md:
--------------------------------------------------------------------------------
1 | # mafStrander
2 |
3 | 3 October 2012
4 |
5 | ## Author
6 | [Dent Earl](https://github.com/dentearl/)
7 |
8 | ## Description
9 | mafStrander is a program to coerce a particular strandedness out for all blocks based the strandedness of a target sequence. When a block contains the target sequence but in the flipped orientation (relative to the --strand
option) then the block is flipped, i.e. all start coordinates are transformed, and all sequence fields are reverse-complemented. If the block contains the target sequence multiple times and with conflicing strands (i.e. both + and - strands are observed), then nothing is done.
10 |
11 | ## Installation
12 | 1. Download the package.
13 | 2. cd
into the directory.
14 | 3. Type make
.
15 |
16 | ## Use
17 | mafStrander --maf alignment.maf --seq hg18 --strand + > positive.maf
18 |
19 | ### Options
20 | * -h, --help
show this help message and exit.
21 | * --maf
input alignment maf file.
22 | * --seq
sequence to base block strandedness upon. (string comparison only done for length of input, i.e. --seq=hg18 will match hg18.chr1, hg18.chr2, etc etc)
23 | * --strand
strand to enforce, when possible. may be + or -, defaults to +.
24 |
25 | ## Example
26 | $ mafStrander --maf alignment.maf --seq hg18 --strand + > positive.maf
27 |
28 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2009-2013 by
2 | # Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
3 | # Benedict Paten (benedict@soe.ucsc.edu, benedictpaten@gmail.com)
4 | # Mark Diekhans (markd@soe.ucsc.edu)
5 | # ... and other members of the Reconstruction Team of David Haussler's
6 | # lab (BME Dept. UCSC).
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | # THE SOFTWARE.
25 |
26 | include ../inc/common.mk
27 | SHELL:=/bin/bash
28 | bin = ../bin
29 | inc = ../inc
30 | lib = ../lib
31 | PROGS = mafToFastaStitcher
32 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/sonLib.a src/allTests.c
33 | extraAPI := ${lib}/common.o ${lib}/sharedMaf.o ../external/CuTest.a src/mafToFastaStitcherAPI.o ${sonLibPath}/sonLib.a src/buildVersion.o
34 | testAPI := test/sharedMaf.o test/common.o ../external/CuTest.a test/mafToFastaStitcherAPI.o ${sonLibPath}/sonLib.a test/buildVersion.o
35 | testObjects := test/test.mafToFastaStitcherAPI.o
36 | sources := src/mafToFastaStitcher.c src/mafToFastaStitcher.h
37 |
38 | .PHONY: all clean test buildVersion
39 |
40 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
41 | buildVersion: src/buildVersion.c
42 | src/buildVersion.c: ${sources} ${dependencies}
43 | @python ../lib/createVersionSources.py
44 |
45 | ../lib/%.o: ../lib/%.c ../inc/%.h
46 | cd ../lib/ && make
47 |
48 | ${bin}/mafToFastaStitcher: src/mafToFastaStitcher.c ${dependencies} ${extraAPI}
49 | mkdir -p $(dir $@)
50 | ${cxx} $< ${extraAPI} -o $@.tmp ${cflags} ${lm}
51 | mv $@.tmp $@
52 | %.o: %.c %.h
53 | ${cxx} -c $< -o $@.tmp ${cflags}
54 | mv $@.tmp $@
55 | %/mafToFastaStitcherAPI.o: src/mafToFastaStitcherAPI.c src/mafToFastaStitcherAPI.h
56 | ${cxx} -c $< -o $@.tmp ${cflags} ${lm}
57 | mv $@.tmp $@
58 |
59 | test: buildVersion test/allTests test/mafToFastaStitcher
60 | ./test/allTests && python2.7 src/test.mafToFastaStitcher.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir
61 | test/allTests: src/allTests.c ${testAPI} ${testObjects} ${sonLibPath}/sonLib.a
62 | mkdir -p $(dir $@)
63 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm}
64 | mv $@.tmp $@
65 | test/mafToFastaStitcher: src/mafToFastaStitcher.c ${dependencies} ${testAPI}
66 | mkdir -p $(dir $@)
67 | ${cxx} $< ${testAPI} -o $@.tmp ${testFlags} ${lm}
68 | mv $@.tmp $@
69 | test/%.o: ${lib}/%.c ${inc}/%.h
70 | mkdir -p $(dir $@)
71 | ${cxx} -c $< -o $@.tmp ${testFlags}
72 | mv $@.tmp $@
73 | test/test.mafToFastaStitcherAPI.o: src/test.mafToFastaStitcherAPI.c src/test.mafToFastaStitcherAPI.h test/mafToFastaStitcherAPI.o
74 | mkdir -p $(dir $@)
75 | ${cxx} -c $< -o $@.tmp ${testFlags}
76 | mv $@.tmp $@
77 | test/%.o: src/%.c src/%.h ${sonLibPath}/sonLib.a
78 | mkdir -p $(dir $@)
79 | ${cxx} -c $< -o $@.tmp ${testFlags}
80 | mv $@.tmp $@
81 |
82 | clean:
83 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
84 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
85 | ${cxx} -c $< ${cflags}
86 | ar rc CuTest.a CuTest.o
87 | ranlib CuTest.a
88 | rm -f CuTest.o
89 | mv CuTest.a $@
90 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/README.md:
--------------------------------------------------------------------------------
1 | # mafToFastaStitcher
2 |
3 | 15 October 2012
4 |
5 | ## Author
6 | [Dent Earl](https://github.com/dentearl/)
7 |
8 | ## Description
9 | mafToFastaStitcherStrander is a program to take a multiple alignment format (MAF) file, some sequences, and then stitch together the alignment into a single multiple sequence fasta (MFA) file.
10 |
11 | As an aside, the intended output is just dissimilar enough to what is created by multiz's maf2fasta that this tool is necessary but the output is similar enough as to be frustrating.
12 |
13 | ## Dependencies
14 | * sonLib https://github.com/benedictpaten/sonLib/
15 |
16 | ## Installation
17 | 1. Download the package.
18 | 2. cd
into the directory.
19 | 3. Type make
.
20 |
21 | ## Use
22 | mafToFastaStitcher --maf alignment.maf --seqs seq.fa[,seq2.fa,...] --outMfa output.mfa --breakpointPenalty 10 [options]
23 |
24 | ### Options
25 | * -h, --help
show this help message and exit.
26 | * --maf
input alignment maf file.
27 | * --seqs
comma separated list of fasta sequences. each fasta may contain multiple entries. all sequences in the input alignment must be accounted for with an element in a fasta.
28 | * --outMfa
multiple sequence fasta output file.
29 | * --breakpointPenalty
number of N
characters to insert into a sequence when a breakpoint is detected.
30 | * --interstitialSequence
maximum length of interstitial sequence to be added (from a fasta) into the fasta before a breakpoint is declared and the --breakpointPenalty
number of N
's is added instead.
31 | * --outMaf
optional output to single block maf in addition to multiple sequence fasta output.
32 |
33 | ## Example
34 | $ mafToFastaStitcher --maf alignment.maf --seqs seq.fa,seq2.fa --breakpointPenalty 5 --outMfa output.mfa
35 |
36 | ## Detailed input and output example
37 |
38 | # Input maf
39 | ## maf version=1
40 |
41 | a score=0.0 status=test.input
42 | s ref.chr1 10 10 + 100 ACGTACGTAC
43 | s seq1.chr@ 0 10 + 100 AAAAAAAAAA
44 | s seq2.chr& 10 5 + 100 -----CCCCC
45 | s seq6.chr1 10 5 + 100 -----GGGGG
46 | s seq7.chr20 0 5 + 100 AAAAA-----
47 |
48 | a score=0.0 status=test.input
49 | s ref.chr1 20 10 + 100 GTACGTACGT
50 | s seq2.chr!! 5 5 + 100 CCCCC-----
51 | s seq3.chr0 20 5 + 100 -----GGGGG
52 | s seq6.chr1 22 5 + 100 GGGGG-----
53 |
54 | a score=0.0 status=test.input
55 | s ref.chr1 30 10 + 100 ACGTACGTAC
56 | s seq4.chr1 0 5 - 100 GG-----GGG
57 | s seq5.chr2 0 10 + 100 CCCCCCCCCC
58 | s seq7.chr20 42 5 + 100 -----AAAAA
59 |
60 | # Input sequences
61 | Here in a single file, but could be broken across multiple files
62 | > ref.chr1
63 | ggggggggggACGTACGTACGTACGTACGTACGTACGTACgg
64 | > seq1.chr@
65 | AAAAAAAAAAgg
66 | > seq2.chr&
67 | aaaaaaaaaaCCCCCaa
68 | > seq2.chr!!
69 | aaaaaCCCCCaa
70 | > seq3.chr0
71 | aaaaaaaaaaaaaaaaaaaGGGGGaa
72 | seq4.chr1
73 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
74 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaCCCCC
75 | > seq6.chr1
76 | aaaaaaaaaGGGGGaaaaaaaGGGGGaa
77 | > seq7.chr20
78 | AAAAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAATT
79 |
80 | # Expected multiple sequence fasta output
81 | Note that when a sequence is only represented with a single chromosome, that chromosome will persist (as in ref.chr1) but when multiple chromosomes are present in the MAF that they are collapsed together.
82 | > ref.chr1
83 | ACGTACGTAC------------GTACGTACGT------------------
84 | -------------------ACGTACGTAC
85 | > seq1
86 | AAAAAAAAAA-----------------AAAAA------------------
87 | -----------------------------
88 | > seq2
89 | -----CCCCCNNNNN-------CCCCC-----------------------
90 | -----------------------------
91 | > seq6
92 | -----GGGGG-----aaaaaaaGGGGG-----------------------
93 | -----------------------------
94 | > seq7
95 | AAAAA---------------------------gggggggggggggggggg
96 | ggggggggggggggggggg-----AAAAA
97 | > seq3
98 | ---------------------------GGGGG------------------
99 | -----------------------------
100 | > seq4
101 | --------------------------------------------------
102 | -------------------GG-----GGG
103 | > seq5
104 | --------------------------------------------------
105 | -------------------CCCCCCCCCC
106 |
107 |
108 | # optional maf output
109 | where --breakpointPenalty
is 5 (as seen in seq2) and --interstitialSequence
is *at least* 17, as seen in seq7 (the long string of g
's is pulled in from the fasta).
110 | a score=0.0 status=test.expected
111 | s ref.chr1 10 30 + 100 ACGTACGTAC------------GTACGTACGT-------------------------------------ACGTACGTAC
112 | s seq1 0 15 + 15 AAAAAAAAAA-----------------AAAAA-----------------------------------------------
113 | s seq2 0 15 + 15 -----CCCCCNNNNN-------CCCCC----------------------------------------------------
114 | s seq6 0 17 + 17 -----GGGGG-----aaaaaaaGGGGG----------------------------------------------------
115 | s seq7 0 47 + 47 AAAAA---------------------------ggggggggggggggggggggggggggggggggggggg-----AAAAA
116 | s seq3 0 5 + 5 ---------------------------GGGGG-----------------------------------------------
117 | s seq4 0 5 - 5 ---------------------------------------------------------------------GG-----GGG
118 | s seq5 0 10 + 10 ---------------------------------------------------------------------CCCCCCCCCC
119 |
120 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include
27 | #include
28 | #include "CuTest.h"
29 | #include "sonLib.h"
30 | #include "mafToFastaStitcher.h"
31 | #include "mafToFastaStitcherAPI.h"
32 | #include "test.mafToFastaStitcherAPI.h"
33 |
34 | CuSuite* mafToFastaStitcher_TestSuite(void);
35 |
36 | int mtfs_RunAllTests(void) {
37 | CuString *output = CuStringNew();
38 | CuSuite *suite = CuSuiteNew();
39 | CuSuite *mtfs_s = mafToFastaStitcher_TestSuite();
40 | CuSuiteAddSuite(suite, mtfs_s);
41 | CuSuiteRun(suite);
42 | CuSuiteSummary(suite, output);
43 | CuSuiteDetails(suite, output);
44 | printf("%s\n", output->buffer);
45 | CuStringDelete(output);
46 | int status = (suite->failCount > 0);
47 | free(mtfs_s);
48 | CuSuiteDelete(suite);
49 | return status;
50 | }
51 | int main(void) {
52 | return mtfs_RunAllTests();
53 | }
54 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/src/mafToFastaStitcher.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 |
26 | #include // mac os x toupper()
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include "common.h"
33 | #include "CuTest.h"
34 | #include "sharedMaf.h"
35 | #include "sonLib.h"
36 | #include "mafToFastaStitcher.h"
37 | #include "mafToFastaStitcherAPI.h"
38 | #include "buildVersion.h"
39 |
40 | const char *g_version = "v0.1 Oct 2012";
41 |
42 | void version(void);
43 | void usage(void);
44 |
45 | void parseOptions(int argc, char **argv, options_t *options) {
46 | int c;
47 | bool setMafName = false, setSeqNames = false, setOutName = false,
48 | setBreakpointPenalty = false, setInterstitialSequence = false;
49 |
50 | size_t i;
51 | while (1) {
52 | static struct option long_options[] = {
53 | {"debug", no_argument, 0, 'd'},
54 | {"verbose", no_argument, 0, 'v'},
55 | {"help", no_argument, 0, 'h'},
56 | {"version", no_argument, 0, 0},
57 | {"maf", required_argument, 0, 0},
58 | {"seqs", required_argument, 0, 0},
59 | {"outMfa", required_argument, 0, 0},
60 | {"outMaf", required_argument, 0, 0},
61 | {"breakpointPenalty", required_argument, 0, 0},
62 | {"interstitialSequence", required_argument, 0, 0},
63 | {"referenceSequence", required_argument, 0, 0},
64 | {0, 0, 0, 0}
65 | };
66 | int option_index = 0;
67 | c = getopt_long(argc, argv, "d:m:s:h:v:t", long_options, &option_index);
68 | if (c == -1)
69 | break;
70 | switch (c) {
71 | case 0:
72 | if (strcmp("version", long_options[option_index].name) == 0) {
73 | version();
74 | exit(EXIT_SUCCESS);
75 | }
76 | if (strcmp("maf", long_options[option_index].name) == 0) {
77 | setMafName = true;
78 | options->maf = stString_copy(optarg);
79 | break;
80 | }
81 | if (strcmp("seqs", long_options[option_index].name) == 0) {
82 | setSeqNames = true;
83 | options->seqs = stString_copy(optarg);
84 | break;
85 | }
86 | if (strcmp("outMaf", long_options[option_index].name) == 0) {
87 | setOutName = true;
88 | options->outMaf = stString_copy(optarg);
89 | break;
90 | }
91 | if (strcmp("outMfa", long_options[option_index].name) == 0) {
92 | setOutName = true;
93 | options->outMfa = stString_copy(optarg);
94 | break;
95 | }
96 | if (strcmp("breakpointPenalty", long_options[option_index].name) == 0) {
97 | setBreakpointPenalty = true;
98 | i = sscanf(optarg, "%" PRIu64, &(options->breakpointPenalty));
99 | assert(i == 1);
100 | break;
101 | }
102 | if (strcmp("interstitialSequence", long_options[option_index].name) == 0) {
103 | setInterstitialSequence = true;
104 | i = sscanf(optarg, "%" PRIu64, &(options->interstitialSequence));
105 | assert(i == 1);
106 | break;
107 | }
108 | if (strcmp("referenceSequence", long_options[option_index].name) == 0) {
109 | options->reference = stString_copy(optarg);
110 | break;
111 | }
112 | break;
113 | case 'v':
114 | g_verbose_flag++;
115 | break;
116 | case 'd':
117 | g_debug_flag = 1;
118 | break;
119 | case 'h':
120 | case '?':
121 | usage();
122 | break;
123 | default:
124 | abort();
125 | }
126 | }
127 | if (!setMafName) {
128 | fprintf(stderr, "specify --maf\n");
129 | usage();
130 | }
131 | if (!setOutName) {
132 | fprintf(stderr, "specify --outMaf or --outMfa\n");
133 | usage();
134 | }
135 | if (!setSeqNames) {
136 | fprintf(stderr, "specify --seqs\n");
137 | usage();
138 | }
139 | if (!setBreakpointPenalty) {
140 | fprintf(stderr, "specify --breakpointPenalty\n");
141 | usage();
142 | }
143 | if (!setInterstitialSequence) {
144 | fprintf(stderr, "specify --interstitialSequence\n");
145 | usage();
146 | }
147 | // Check there's nothing left over on the command line
148 | if (optind < argc) {
149 | char *errorString = st_malloc(kMaxStringLength);
150 | strcpy(errorString, "Unexpected arguments:");
151 | while (optind < argc) {
152 | strcat(errorString, " ");
153 | strcat(errorString, argv[optind++]);
154 | }
155 | fprintf(stderr, "%s\n", errorString);
156 | free(errorString);
157 | usage();
158 | }
159 | }
160 | void version(void) {
161 | fprintf(stderr, "mafToFastaStitcher, %s\nbuild: %s, %s, %s\n\n", g_version, g_build_date,
162 | g_build_git_branch, g_build_git_sha);
163 | }
164 | void usage(void) {
165 | version();
166 | fprintf(stderr, "Usage: mafToFastaStitcher --maf mafFile.maf --seqs seq1.fa,seq2.fa[,...] --breakpointPenalty 5 --interstitialSequence 20 --outMfa output.mfa \n\n"
167 | "\n\n");
168 | fprintf(stderr, "Options: \n");
169 | usageMessage('h', "help", "show this message and exit.");
170 | usageMessage('m', "maf", "path to the maf file.");
171 | usageMessage('\0', "seqs", "comma separated list of fasta sequences. each fasta may contain multiple entries. all sequences in the input alignment must be accounted for with an element in a fasta.");
172 | usageMessage('\0', "outMfa", "multiple sequence fasta output file.");
173 | usageMessage('\0', "breakpointPenalty", "number of `N' characters to insert into a sequence when a breakpoint is detected.");
174 | usageMessage('\0', "interstitialSequence", "maximum length of interstitial sequence to be added (from a fasta) into the fasta before a breakpoint is declared and the --breakpointPenalty
number of N
's is added instead.");
175 | usageMessage('\0', "outMaf", "multiple alignment format output file.");
176 | usageMessage('\0', "reference", "optional. The name of the reference sequence. All intervening reference sequence between the first and last block of the input --maf will be read out in the output.");
177 | usageMessage('v', "verbose", "turns on verbose output.");
178 | exit(EXIT_FAILURE);
179 | }
180 | int main(int argc, char **argv) {
181 | options_t *options = options_construct();
182 | stHash *sequenceHash = NULL; // keyed on fasta headers, valued with mtfseq_t pointers
183 | stHash *alignmentHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyRow); // keyed on species names, valued with row_t pointers
184 | stList *rowOrder = stList_construct3(0, free); // when adding keys to alignmentHash, append to this list
185 | parseOptions(argc, argv, options);
186 | // read fastas, populate sequenceHash
187 | de_verbose("Creating sequence hash.\n");
188 | sequenceHash = createSequenceHash(options->seqs);
189 | mafFileApi_t *mfapi = maf_newMfa(options->maf, "r");
190 | de_verbose("Creating alignment hash.\n");
191 | buildAlignmentHash(mfapi, alignmentHash, sequenceHash, rowOrder, options);
192 | if (options->outMfa != NULL) {
193 | // fasta output
194 | de_verbose("Writing fasta output.\n");
195 | writeFastaOut(alignmentHash, rowOrder, options);
196 | }
197 | if (options->outMaf != NULL) {
198 | // maf output
199 | de_verbose("Writing maf output.\n");
200 | writeMafOut(alignmentHash, rowOrder, options);
201 | }
202 | // cleanup
203 | maf_destroyMfa(mfapi);
204 | stHash_destruct(alignmentHash);
205 | stHash_destruct(sequenceHash);
206 | stList_destruct(rowOrder);
207 | destroyOptions(options);
208 | return(EXIT_SUCCESS);
209 | }
210 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/src/mafToFastaStitcher.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef MAFTOFASTASTITCHER_H_
26 | #define MAFTOFASTASTITCHER_H_
27 | #include
28 | #include "sonLib.h"
29 | #include "common.h"
30 | #include "CuTest.h"
31 | #include "sharedMaf.h"
32 | #include "mafToFastaStitcherAPI.h"
33 |
34 | void usage(void);
35 | void parseOptions(int argc, char **argv, options_t *options);
36 |
37 | #endif // MAFTOFASTASTITCHER_H_
38 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/src/mafToFastaStitcherAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef MAFTOFASTASTITCHER_API_H_
26 | #define MAFTOFASTASTITCHER_API_H_
27 | #include
28 | #include "common.h"
29 | #include "CuTest.h"
30 | #include "sharedMaf.h"
31 | #include "sonLib.h"
32 |
33 | typedef struct _options {
34 | // used to hold all the command line options
35 | char *maf;
36 | char *seqs;
37 | char *outMfa;
38 | char *outMaf;
39 | char *reference;
40 | uint64_t breakpointPenalty;
41 | uint64_t interstitialSequence;
42 | } options_t;
43 | typedef struct _sequence {
44 | // used to store fasta sequence elements
45 | char *seq; // DNA sequence
46 | uint64_t index; // first empty position in *seq
47 | uint64_t memLength; // size of the *seq buffer
48 | } mtfseq_t;
49 | typedef struct _row {
50 | // used to store the ultimate output of the utility,
51 | // a single element of either a multiple fasta alignment (mfa)
52 | // or a single row in a multiple alignment format (maf) file.
53 | char *name;
54 | char *prevName; //
55 | char *sequence;
56 | bool multipleNames; // initalized false, if prevName is ever != name, then this should be set permanently true
57 | uint64_t start;
58 | uint64_t length;
59 | uint64_t prevRightPos; // rightmost position in the sequence, 0 based
60 | char strand; // `+' `-' or `*' when both strands have been observed (multipleNames should be set true)
61 | char prevStrand; //
62 | uint64_t sourceLength;
63 | uint64_t index; // first empty position in *sequence
64 | uint64_t memLength; //size of the *sequence buffer
65 | } row_t;
66 |
67 | options_t* options_construct(void);
68 | void destroyOptions(options_t *o);
69 | mtfseq_t* newMtfseq(uint64_t length);
70 | void resizeMtfseq(mtfseq_t *m);
71 | void resizeRowSequence(row_t *r);
72 | void destroyMtfseq(void *p);
73 | row_t* newRow(uint64_t length);
74 | void destroyRow(void *row);
75 | row_t* mafLineToRow(mafLine_t *ml);
76 | stHash* mafBlockToBlockHash(mafBlock_t *mb, stList *orderList);
77 | stHash* createSequenceHash(char *fastas);
78 | void seq_copyIn(mtfseq_t *mtfss, char *src);
79 | void row_copyIn(row_t *row, char *src);
80 | void addSequencesToHash(stHash *hash, char *filename);
81 | void reportSequenceHash(stHash *hash);
82 | void penalize(stHash *hash, char *name, uint64_t n);
83 | void extendSequence(row_t *r, uint64_t n);
84 | void interstitialInsert(stHash *alignHash, stHash *seqHash, char *name, uint64_t pos, char strand, uint64_t n);
85 | char* extractSubSequence(mtfseq_t *mtfs, char strand, uint64_t pos, uint64_t n);
86 | void addMafLineToRow(row_t *row, mafLine_t *ml);
87 | void addMafBlockToRowHash(stHash *alignHash, stHash *seqHash, stList *order, mafBlock_t *mb, options_t *options);
88 | void prependGaps(row_t *r, uint64_t n);
89 | void buildAlignmentHash(mafFileApi_t *mfapi, stHash *alignmentHash, stHash *sequenceHash,
90 | stList *rowOrder, options_t *options);
91 | void writeFastaOut(stHash *alignmentHash, stList *rowOrder, options_t *options);
92 | void writeMafOut(stHash *alignmentHash, stList *rowOrder, options_t *options);
93 | uint64_t nearestTwo(uint64_t n);
94 | #endif // MAFTOFASTASTITCHER_API_H_
95 |
--------------------------------------------------------------------------------
/mafToFastaStitcher/src/test.mafToFastaStitcherAPI.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_MAFTOFASTASTITCHER_H_
26 | #define TEST_MAFTOFASTASTITCHER_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include "CuTest.h"
32 | #include "common.h"
33 | #include "sonLib.h"
34 | #include "mafToFastaStitcher.h"
35 | #include "mafToFastaStitcherAPI.h"
36 |
37 | CuSuite* mtfs_TestSuite(void);
38 |
39 | #endif // TEST_MAFTOFASTASTITCHER_API_H_
40 |
--------------------------------------------------------------------------------
/mafTransitiveClosure/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | SHELL:=/bin/bash
3 | bin = ../bin
4 | inc = ../inc
5 | lib = ../lib
6 | PROGS = mafTransitiveClosure
7 | dependencies = ${inc}/common.h ${inc}/sharedMaf.h ${lib}/common.c ${lib}/sharedMaf.c $(wildcard ${sonLibPath}/*) ${sonLibPath}/stPinchesAndCacti.a ${sonLibPath}/sonLib.a src/allTests.c
8 | objects := ${lib}/common.o ${lib}/sharedMaf.o ${sonLibPath}/stPinchesAndCacti.a ${sonLibPath}/sonLib.a ../external/CuTest.a src/test.mafTransitiveClosure.o src/buildVersion.o
9 | testObjects := test/sharedMaf.o test/common.o ${sonLibPath}/stPinchesAndCacti.a ${sonLibPath}/sonLib.a ../external/CuTest.a src/test.mafTransitiveClosure.o test/buildVersion.o
10 | sources := src/mafTransitiveClosure.c src/mafTransitiveClosure.h
11 |
12 | .PHONY: all clean test buildVersion
13 |
14 | all: buildVersion $(foreach f,${PROGS}, ${bin}/$f)
15 | buildVersion: src/buildVersion.c
16 | src/buildVersion.c: ${sources} ${dependencies}
17 | @python ${lib}/createVersionSources.py
18 |
19 | ../lib/%.o: ../lib/%.c ../inc/%.h
20 | cd ../lib/ && make
21 |
22 | ${bin}/mafTransitiveClosure: src/mafTransitiveClosure.c ${dependencies} ${objects}
23 | mkdir -p $(dir $@)
24 | ${cxx} $< src/allTests.c ${objects} -o $@.tmp ${cflags} -lm
25 | mv $@.tmp $@
26 |
27 | test/mafTransitiveClosure: src/mafTransitiveClosure.c ${dependencies} ${testObjects}
28 | mkdir -p $(dir $@)
29 | ${cxx} $< src/allTests.c ${testObjects} -o $@.tmp ${testFlags} -lm
30 | mv $@.tmp $@
31 | %.o: %.c ${inc}/%.h
32 | ${cxx} -c $< -o $@.tmp ${cflags}
33 | mv $@.tmp $@
34 | %.o: %.c %.h
35 | ${cxx} -c $< -o $@.tmp ${cflags}
36 | mv $@.tmp $@
37 | test/%.o: ${lib}/%.c ${inc}/%.h
38 | mkdir -p $(dir $@)
39 | ${cxx} -c $< -o $@.tmp ${testFlags}
40 | mv $@.tmp $@
41 | test/%.o: src/%.c src/%.h
42 | mkdir -p $(dir $@)
43 | ${cxx} -c $< -o $@.tmp ${testFlags}
44 | mv $@.tmp $@
45 | test/allTests: src/allTests.c ${testObjects} ${sonLibPath}/sonLib.a
46 | mkdir -p $(dir $@)
47 | ${cxx} $^ -o $@.tmp ${testFlags} ${lm}
48 | mv $@.tmp $@
49 |
50 | clean:
51 | rm -rf $(foreach f,${PROGS}, ${bin}/$f) src/*.o test/ src/buildVersion.c src/buildVersion.h
52 |
53 | test: buildVersion test/mafTransitiveClosure
54 | test/mafTransitiveClosure --test && python2.7 src/test.mafTransitiveClosure.py --verbose && rm -rf ./test/ && rmdir ./tempTestDir
55 |
56 | ../external/CuTest.a: ../external/CuTest.c ../external/CuTest.h
57 | ${cxx} -c $< ${cflags}
58 | ar rc CuTest.a CuTest.o
59 | ranlib CuTest.a
60 | rm -f CuTest.o
61 | mv CuTest.a $@
62 |
--------------------------------------------------------------------------------
/mafTransitiveClosure/README.md:
--------------------------------------------------------------------------------
1 | # mafTransitiveClosure
2 | 24 May 2012
3 |
4 | ## Author
5 | [Dent Earl](https://github.com/dentearl/)
6 |
7 | ## Description
8 | A program to perform the transitive closure on an alignment. That is it checks every column of the alignment and looks for situations where a position A is aligned to B in one part of a file and B is aligned to C in another part of the file. The transitive closure of this relationship would be a single column with A, B and C all present. Useful for when you have pairwise alignments and you wish to turn them into something more resembling a multiple alignment.
9 |
10 | ## Dependencies
11 | * sonLib https://github.com/benedictpaten/sonLib/
12 | * pinchesAndCacti https://github.com/benedictpaten/pinchesAndCacti
13 |
14 | ## Installation
15 | 1. Download the package.
16 | 2. cd
into the directory.
17 | 3. Type make
.
18 |
19 | ## Use
20 | mafTransitiveClosure --maf mafFile.maf > transitivelyClosed.maf
21 |
22 | ### Options
23 | * -h, --help
show this help message and exit.
24 | * -m, --maf
path to maf file.
25 | * -v, --verbose
turns on verbose output.
26 |
--------------------------------------------------------------------------------
/mafTransitiveClosure/src/allTests.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #include
26 | #include
27 | #include "CuTest.h"
28 | #include "mafTransitiveClosure.h"
29 | #include "test.mafTransitiveClosure.h"
30 |
31 | int mafTransitiveClosure_RunAllTests(void) {
32 | CuString *output = CuStringNew();
33 | CuSuite *suite = CuSuiteNew();
34 | CuSuite *maf_s = mafTransitiveClosure_TestSuite();
35 | CuSuiteAddSuite(suite, maf_s);
36 | printf("\n");
37 | CuSuiteRun(suite);
38 | CuSuiteSummary(suite, output);
39 | CuSuiteDetails(suite, output);
40 | printf("%s\n", output->buffer);
41 | CuStringDelete(output);
42 | int status = (suite->failCount > 0);
43 | free(maf_s);
44 | CuSuiteDelete(suite);
45 | return status;
46 | }
47 |
--------------------------------------------------------------------------------
/mafTransitiveClosure/src/mafTransitiveClosure.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef MAFTRANSITIVECLOSURE_H_
26 | #define MAFTRANSITIVECLOSURE_H_
27 | #include "sonLib.h"
28 | #include "stPinchGraphs.h"
29 | #include "common.h"
30 | #include "CuTest.h"
31 | #include "sharedMaf.h"
32 |
33 | typedef struct mafTcSeq {
34 | // maf tc (trasitive closure) sequence
35 | char *name;
36 | char *sequence;
37 | uint64_t length;
38 | } mafTcSeq_t;
39 | typedef struct mafTcRegion {
40 | // region or interval
41 | uint64_t start;
42 | uint64_t end;
43 | struct mafTcRegion *next;
44 | } mafTcRegion_t;
45 | typedef struct mafTcComparisonOrder {
46 | // This struct is used in adding alignment information into the thread set.
47 | /* for this sequence matrix:
48 | 0123456789
49 | 0 AC---ACG-G
50 | 1 ACG--ACGGC
51 | 2 A-G-TACGGC
52 | 3 ACGTTACGGC
53 | a comparison order is {3: [3, 3]}, {2: [4, 4]},
54 | {1: [8, 8]}, {1: [2, 2]}, {0: [9, 9]}, {0: [5, 7]}, {0: [0, 1]}
55 | [note that the ordering of these values is arbitrary, though consistent with the
56 | output of the algo in the code which appends new structs to the head of the list]
57 | e.g. the first block to process uses 0 as its reference and it starts at column 1
58 | and ends at column 1. The second block to process uses 1 as its reference and it
59 | starts at column 2 and ends at column 2 (it is only one column), etc.
60 | */
61 | uint64_t ref; //
62 | mafTcRegion_t *region;
63 | struct mafTcComparisonOrder *next;
64 | } mafTcComparisonOrder_t;
65 | typedef struct mafCoordinatePair {
66 | /* this struct is used to store pairs of coordinates
67 | */
68 | int64_t a;
69 | int64_t b;
70 | } mafCoordinatePair_t;
71 | typedef struct mafBlockSort {
72 | /* this struct is used to sort a sequence matrix by the number of gaps in each row
73 | */
74 | int64_t value; // value to sort upon
75 | mafLine_t *ml;
76 | } mafBlockSort_t;
77 |
78 | void usage(void);
79 | mafTcSeq_t* newMafTcSeq(char *name, uint64_t length);
80 | mafTcComparisonOrder_t* newMafTcComparisonOrder(void);
81 | mafTcRegion_t* newMafTcRegion(uint64_t start, uint64_t end);
82 | mafCoordinatePair_t* newCoordinatePairArray(uint64_t numSeqs, char **seqs);
83 | void destroyMafTcSeq(void *p);
84 | void destroyMafTcRegionList(mafTcRegion_t *r);
85 | void destroyMafTcRegion(mafTcRegion_t *r);
86 | void destroyMafTcComparisonOrder(mafTcComparisonOrder_t *c);
87 | void destroyCoordinatePairArray(mafCoordinatePair_t *cp);
88 | uint64_t hashMafTcSeq(const mafTcSeq_t *mtcs);
89 | int hashCompareMafTcSeq(const mafTcSeq_t *m1, const mafTcSeq_t *m2);
90 | char* createNSequence(uint64_t length);
91 | void addSequenceValuesToMtcSeq(mafLine_t *ml, mafTcSeq_t *mtcs);
92 | void parseOptions(int argc, char **argv, char *filename);
93 | stPinchThreadSet* buildThreadSet(stHash *hash);
94 | void walkBlockAddingAlignments(mafBlock_t *mb, stPinchThreadSet *threadSet);
95 | void addAlignmentsToThreadSet(mafFileApi_t *mfa, stPinchThreadSet *threadSet);
96 | void createSequenceHash(mafFileApi_t *mfa, stHash **hash, stHash **nameHash);
97 | mafTcRegion_t* getComparisonOrderFromRow(char **mat, uint64_t row, mafTcComparisonOrder_t **done,
98 | mafTcRegion_t *todo, int containsGaps);
99 | mafTcComparisonOrder_t *getComparisonOrderFromMatrix(char **mat, uint64_t rowLength, uint64_t colLength,
100 | uint64_t *lengths, int **vizMat);
101 | void processPairForPinching(stPinchThreadSet *threadSet, stPinchThread *a, uint64_t aGlobalStart,
102 | uint64_t aGlobalLength, int aStrand,
103 | char *aSeq, stPinchThread *b, uint64_t bGlobalStart, uint64_t bGlobalLength,
104 | int bStrand, char *bSeq, uint64_t regionStart, uint64_t regionEnd,
105 | mafCoordinatePair_t aBookmark, mafCoordinatePair_t bBookmark,
106 | int aContainsGaps, int bContainsGaps,
107 | void (*pinchFunction)(stPinchThread *, stPinchThread *, int64_t, int64_t, int64_t, bool));
108 | int64_t localSeqCoords(uint64_t p, char *s, mafCoordinatePair_t *bookmark, int containsGaps);
109 | int64_t localSeqCoordsToGlobalPositiveCoords(int64_t c, uint64_t start, uint64_t sourceLength, char strand);
110 | int64_t localSeqCoordsToGlobalPositiveStartCoords(int64_t c, uint64_t start, uint64_t sourceLength,
111 | char strand, uint64_t length);
112 | void mafBlock_sortBlockByIncreasingGap(mafBlock_t *mb);
113 | void walkBlockAddingSequence(mafBlock_t *mb, stHash *hash, stHash *nameHash);
114 | void reportSequenceHash(stHash *hash, stHash *nameHash);
115 | void destroyVizMatrix(int **mat, unsigned n);
116 | int cmp_by_gaps(const void *a, const void *b);
117 | uint64_t getMaxNameLength(stHash *hash);
118 | void getMaxFieldLengths(stHash *hash, stHash *nameHash, stPinchBlock *block, uint64_t *maxStart,
119 | uint64_t *maxLength, uint64_t *maxSource);
120 | char* getSequenceSubset(char *seq, int64_t start, char strand, int64_t length);
121 | void reportTransitiveClosure(stPinchThreadSet *threadSet, stHash *hash, stHash *nameHash);
122 | // debugging tools
123 | int** getVizMatrix(mafBlock_t *mb, unsigned n, unsigned m);
124 | void updateVizMatrix(int **mat, mafTcComparisonOrder_t *co);
125 | void printVizMatrix(int **mat, uint64_t n, uint64_t m);
126 | void printTodoArray(mafTcRegion_t *reg, unsigned max);
127 | // test suite
128 | CuSuite* mafTransitiveClosure_TestSuite(void);
129 | int mafTransitiveClosure_RunAllTests(void);
130 |
131 | #endif // MAFTRANSITIVECLOSURE_H_
132 |
--------------------------------------------------------------------------------
/mafTransitiveClosure/src/test.mafTransitiveClosure.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2013 by
3 | * Dent Earl (dearl@soe.ucsc.edu, dentearl@gmail.com)
4 | * ... and other members of the Reconstruction Team of David Haussler's
5 | * lab (BME Dept. UCSC).
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in
15 | * all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | * THE SOFTWARE.
24 | */
25 | #ifndef TEST_MAFTRANSITIVECLOSURE_H_
26 | #define TEST_MAFTRANSITIVECLOSURE_H_
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include "CuTest.h"
32 | #include "common.h"
33 | #include "sonLib.h"
34 | #include "stPinchGraphs.h"
35 | #include "mafTransitiveClosure.h"
36 |
37 | void printRegionList(mafTcRegion_t *reg, FILE *ofp);
38 | bool regionListsAreEqual(mafTcRegion_t *expected, mafTcRegion_t *obs, bool verbose);
39 | void printTestComparisonOrder(mafTcComparisonOrder_t *co);
40 | bool comparisonOrdersAreEqual(mafTcComparisonOrder_t *eo, mafTcComparisonOrder_t *oo, bool verbose);
41 | bool mafBlocksAreEqual(mafBlock_t *input, mafBlock_t *expected, bool verbose);
42 | void test_reverseComplement(CuTest *testCase);
43 | void test_rowAlignmentBlockComparisonOrdering_0(CuTest *testCase);
44 | void test_rowAlignmentBlockComparisonOrdering_1(CuTest *testCase);
45 | void test_rowAlignmentBlockComparisonOrdering_2(CuTest *testCase);
46 | void test_rowAlignmentBlockComparisonOrdering_3(CuTest *testCase);
47 | void test_matrixAlignmentBlockComparisonOrdering_0(CuTest *testCase);
48 | void test_matrixAlignmentBlockComparisonOrdering_1(CuTest *testCase);
49 | void test_matrixAlignmentBlockComparisonOrdering_2(CuTest *testCase);
50 | void test_matrixAlignmentBlockComparisonOrdering_3(CuTest *testCase);
51 | void test_matrixAlignmentBlockComparisonOrdering_4(CuTest *testCase);
52 | void test_addSequenceValuesToMtcSeq_0(CuTest *testCase);
53 | void test_localSeqCoords_0(CuTest *testCase);
54 | void test_localSeqCoordsToGlobalPositiveCoords_0(CuTest *testCase);
55 | void test_localSeqCoordsToGlobalPositiveStartCoords_0(CuTest *testCase);
56 | void test_coordinateTransforms_0(CuTest *testCase);
57 | void test_coordinateTransforms_1(CuTest *testCase);
58 | void test_mafBlockGapSorting_0(CuTest *testCase);
59 | CuSuite* mafTransitiveClosure_TestSuite(void);
60 |
61 | #endif // TEST_MAFTRANSITIVECLOSURE_H_
62 |
--------------------------------------------------------------------------------
/mafValidator/Makefile:
--------------------------------------------------------------------------------
1 | include ../inc/common.mk
2 | binPath = ../bin
3 |
4 | progs = $(foreach f,mafValidator.py, ${binPath}/$f)
5 |
6 | .PHONY: all clean test
7 |
8 | all: ${progs}
9 |
10 | ${binPath}/%.py : src/%.py
11 | @mkdir -p $(dir $@)
12 | cp $< $@.tmp
13 | chmod +x $@.tmp
14 | mv $@.tmp $@
15 |
16 | test :
17 | python src/test.mafValidator.py -v && rmdir tempTestDir
18 |
19 | clean :
20 | rm -f ${progs}
21 |
--------------------------------------------------------------------------------
/mafValidator/README.md:
--------------------------------------------------------------------------------
1 | # mafValidator
2 |
3 | 10 October 2011
4 |
5 | ## Authors
6 |
7 | [Dent Earl](https://github.com/dentearl/)
8 |
9 | ## Description
10 | mafValidator is a script to validate the formatting and basic data contained in a maf file.
11 |
12 | ## Dependencies
13 | * Python 2.6 ≤ version < 3.0
14 |
15 | ## Installation
16 | 1. Download the package.
17 | 2. cd
into the directory.
18 | 3. Type make
.
19 |
20 | ## Use
21 | mafValidator.py --maf=FILE [options]
22 |
23 | ### Options
24 | * mafValidator.py
25 | * --help
: show this help message and exit
26 | * --maf
: path to the maf file to test
27 | * --testChromNames
: Expects that the source field will be formatted with .chrN e.g. "hg19.chr1" default=False
28 | * --ignoreDuplicateColumns
: Turn off the checks for duplicate columns, may be useful for pairwise-only alignments. default=duplicate checking is on.
29 | * --validateSequence
Turn on checks to make sure all sequence fields are consistent. Slows things down considerably.
30 |
31 | ## Test
32 | make test
33 |
--------------------------------------------------------------------------------