├── test_public
├── check_uTR
│ ├── test.sh
│ ├── acc
│ ├── main.o
│ ├── Makefile
│ └── main.c
├── gendata
│ ├── gen
│ ├── main.o
│ ├── Makefile
│ ├── main.c
│ └── MT.h
├── checkTRF
│ ├── main.o
│ ├── checkTRF
│ ├── Makefile
│ └── main.c
├── tmp
│ ├── accuracy_time.xlsx
│ ├── time_RM.txt
│ ├── time_TRF.txt
│ ├── accuracy_TRF_allowance0.txt
│ ├── accuracy_RM_allowance0.txt
│ ├── accuracy_TRF_allowance0.01.txt
│ ├── accuracy_TRF_allowance0.02.txt
│ ├── accuracy_TRF_allowance0.03.txt
│ ├── accuracy_RM_allowance0.01.txt
│ ├── accuracy_RM_allowance0.02.txt
│ ├── accuracy_RM_allowance0.03.txt
│ ├── accuracy_uTR.txt
│ ├── accuracy_uTR_allowance0.txt
│ ├── accuracy_uTR_allowance0.01.txt
│ ├── accuracy_uTR_allowance0.02.txt
│ ├── accuracy_uTR_allowance0.03.txt
│ └── time_uTR.txt
├── check_RepeatMasker
│ ├── checkRM
│ ├── main.o
│ ├── Makefile
│ └── main.c
├── parse_RepeatMasker
│ ├── main.o
│ ├── parseRM
│ ├── Makefile
│ └── main.c
├── make.sh
├── test.sh
├── test_allowance.sh
├── README.md
└── test_gendata_decompose.sh
├── nsop_test
├── gendata
│ ├── gen
│ ├── main.o
│ ├── Makefile
│ ├── main.c
│ └── MT.h
├── nsop_compression_ratio.xlsx
├── README.md
├── uTR
│ ├── Makefile
│ ├── main.c
│ ├── uTR.h
│ ├── MT.h
│ ├── SAIS.c
│ ├── coverage_by_units.c
│ └── handle_one_file.c
├── test.sh
└── nsop_compression.csv
├── realdata
├── test.sh
├── README.md
├── realdata.fasta
└── realdata_result.fasta
├── Makefile
├── LICENSE.txt
├── smooth.c
├── Kawahara_nsop_Z.cpp
├── wrap_around_DP.c
├── MT.h
├── SAIS.c
├── handle_one_file.c
├── uTR.h
├── README.md
└── coverage_by_long_units_nsop_Z.cpp
/test_public/check_uTR/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ./acc sample.fasta
4 |
--------------------------------------------------------------------------------
/nsop_test/gendata/gen:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/nsop_test/gendata/gen
--------------------------------------------------------------------------------
/test_public/gendata/gen:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/gendata/gen
--------------------------------------------------------------------------------
/nsop_test/gendata/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/nsop_test/gendata/main.o
--------------------------------------------------------------------------------
/test_public/check_uTR/acc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/check_uTR/acc
--------------------------------------------------------------------------------
/test_public/gendata/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/gendata/main.o
--------------------------------------------------------------------------------
/test_public/checkTRF/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/checkTRF/main.o
--------------------------------------------------------------------------------
/test_public/check_uTR/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/check_uTR/main.o
--------------------------------------------------------------------------------
/realdata/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ../uTR -f realdata.fasta -o realdata_result.fasta -sda
4 |
5 | exit 0
6 |
--------------------------------------------------------------------------------
/test_public/checkTRF/checkTRF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/checkTRF/checkTRF
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_time.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/tmp/accuracy_time.xlsx
--------------------------------------------------------------------------------
/nsop_test/nsop_compression_ratio.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/nsop_test/nsop_compression_ratio.xlsx
--------------------------------------------------------------------------------
/test_public/check_RepeatMasker/checkRM:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/check_RepeatMasker/checkRM
--------------------------------------------------------------------------------
/test_public/check_RepeatMasker/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/check_RepeatMasker/main.o
--------------------------------------------------------------------------------
/test_public/parse_RepeatMasker/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/parse_RepeatMasker/main.o
--------------------------------------------------------------------------------
/test_public/parse_RepeatMasker/parseRM:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morisUtokyo/uTR/HEAD/test_public/parse_RepeatMasker/parseRM
--------------------------------------------------------------------------------
/test_public/make.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd check_uTR; make clean; make;
4 | cd ../gendata; make clean; make;
5 | cd ../parse_RepeatMasker; make clean; make;
6 | cd ../check_RepeatMasker; make clean; make;
7 | cd ../checkTRF; make clean; make;
8 | cd ..
9 |
10 | exit 0
11 |
--------------------------------------------------------------------------------
/nsop_test/README.md:
--------------------------------------------------------------------------------
1 | ## Introduction
2 |
3 | To calculate the compression ratios of typical fasta files with 1000 mosaic tandem repeats, issue:
4 |
5 | bash test.sh
6 |
7 | It generates two executable modules in the directories named uTR and gendata. Afterwards, it starts computing the compression ratios of fasta files.
8 |
9 |
10 |
--------------------------------------------------------------------------------
/nsop_test/gendata/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = gen
2 | OBJS = main.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/test_public/gendata/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = gen
2 | OBJS = main.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/test_public/check_uTR/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = acc
2 | OBJS = main.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/test_public/checkTRF/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = checkTRF
2 | OBJS = main.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/test_public/check_RepeatMasker/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = checkRM
2 | OBJS = main.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/test_public/parse_RepeatMasker/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = parseRM
2 | OBJS = main.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/test_public/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # generate benchmark data and decompose them into mosaic tandem repeats
4 | bash test_gendata_decompose.sh
5 |
6 | # Compute the accuracy of predicting mosaic tandem repeats, say U_i V_j W_k with allowing the values of i, j, and k can differ from the true values at most 1%, 2%, and 3%.
7 | bash test_allowance.sh
8 |
9 | exit 0
10 |
--------------------------------------------------------------------------------
/nsop_test/uTR/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = uTR
2 | OBJS = main.o handle_one_file.o SAIS.o coverage_by_units.o units.o coverage_by_long_units_nsop_Z.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = uTR
2 | OBJS = main.o handle_one_file.o SAIS.o coverage_by_units.o units.o coverage_by_long_units_nsop_Z.o string_decomposer.o smooth.o wrap_around_DP.o
3 | CC = gcc
4 | CPP = g++
5 | CFLAGS = -std=c99 -fPIC -fcommon
6 |
7 | .cpp.o:
8 | $(CPP) -c $<
9 | .c.o:
10 | $(CC) $(CFLAGS) -c $<
11 |
12 | # g++ must be used to link libraries required
13 | $(PROGRAM): $(OBJS)
14 | $(CPP) $(OBJS) -o $(PROGRAM)
15 | clean:
16 | rm $(PROGRAM) $(OBJS)
17 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | GNU General Public License version 3
2 | Copyright (C) 2021- Shinichi Morishita, University of Tokyo
3 |
4 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
5 |
6 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
7 |
8 | You should have received a copy of the GNU General Public License along with this program. If not, see .
9 |
--------------------------------------------------------------------------------
/realdata/README.md:
--------------------------------------------------------------------------------
1 | ## Real data
2 |
3 | The fasta file includes the following DNA sequences:
4 |
5 | - SAND12(control,BAFME) hg38_dna range=chr8:118366813-118366928 in the 4th intron of SAMD12, pattern=(AAAAT)23
6 |
7 | - SAND12(case,BAFME) Tandem repeat in the 4th intron of SAND12 found in Patient II-1 in family F6115 (Supplementary Figure 6, Ishiura, H. et al. Expansions of intronic TTTCA and TTTTA repeats in benign adult familial myoclonic epilepsy. Nat Genet 50, 581–590 (2018)) pattern=(ATTTT)221(ATTTC)221(ATTTT)82
8 |
9 | - SAND12(case,BAFME) Tandem repeat in the 4th intron of SAND12 found in (Supplementary Figure 6, Ishiura, H. et al. Expansions of intronic TTTCA and TTTTA repeats in benign adult familial myoclonic epilepsy. Nat Genet 50, 581–590 (2018)) pattern=(ATTTT)613(ATTTC)320(ATTTT)5(ATTTC)130
10 |
11 | - RFC1(control,CANVAS) hg38_dna range=chr4:39348425-39348483 pattern=(AAAAG)11
12 |
13 | - KAZN(control) hg38_dna range=chr1:14883297-14883426 pattern=(AAAG)6(AG)11(AAAG)20
14 |
15 | - ZNF37A(control) hg38_dna range=chr10:38112731-38112826 pattern=(CTTTT)12(CTTGT)3(CTTTT)2
16 |
17 | To apply uTR, use
18 |
19 | bash test.sh
20 |
--------------------------------------------------------------------------------
/nsop_test/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "----------------------------"
4 | echo "Compile gendata"
5 | cd gendata
6 | make clean; make
7 |
8 | echo "----------------------------"
9 | echo "Compile uTR"
10 | cd ../uTR
11 | make clean; make
12 | cd ..
13 |
14 | echo "----------------------------"
15 | echo "Calculate compression ratio"
16 | gen_units=gendata/gen
17 | uTR=uTR/uTR
18 |
19 | num_TRs=1000
20 | min_unit_occ=10
21 | max_unit_occ=200
22 |
23 | result="nsop_compression.csv"
24 | rm $result
25 |
26 |
27 | listError=(0.0 0.01 0.03 0.05 0.1 0.15)
28 | listPattern=("AC_AG" "ACC_GTT" "AAG_AG" "AAG_AGG" "AAAG_AG" "AAAG_AG_AAAG" "AAAG_AG_AGGG_AG_AAAG" "AGGGG_AAAAGAAAGAGAGGG_AGGGG")
29 |
30 | for error_ratio in ${listError[@]}
31 | do
32 |
33 | for units_name in ${listPattern[@]}
34 | do
35 | units=${units_name//\_/ }
36 | run_name=$units_name"_"$min_unit_occ"_"$max_unit_occ"_"$error_ratio
37 |
38 | # Generate data of tandem repeats for the given pattern
39 | TR_file=$run_name".fasta"
40 | res="${units_name//[^_]}"
41 | numUnits=$(( ${#res}+1 ))
42 |
43 | $gen_units -k $min_unit_occ -l $max_unit_occ -n $num_TRs -e $error_ratio -m $numUnits $units > $TR_file
44 |
45 | echo -n $run_name >> $result
46 | $uTR -f $TR_file >> $result
47 | rm $TR_file
48 |
49 | done
50 | done
51 |
52 | exit 0
53 |
--------------------------------------------------------------------------------
/test_public/tmp/time_RM.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 23
2 | ACC_GTT_10_200_0.0 19
3 | AAG_AG_10_200_0.0 19
4 | AAG_AGG_10_200_0.0 19
5 | AAAG_AG_10_200_0.0 21
6 | AAAG_AG_AAAG_10_200_0.0 31
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 89
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 79
9 | AC_AG_10_200_0.01 24
10 | ACC_GTT_10_200_0.01 25
11 | AAG_AG_10_200_0.01 25
12 | AAG_AGG_10_200_0.01 25
13 | AAAG_AG_10_200_0.01 28
14 | AAAG_AG_AAAG_10_200_0.01 53
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 106
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 159
17 | AC_AG_10_200_0.03 25
18 | ACC_GTT_10_200_0.03 26
19 | AAG_AG_10_200_0.03 28
20 | AAG_AGG_10_200_0.03 31
21 | AAAG_AG_10_200_0.03 32
22 | AAAG_AG_AAAG_10_200_0.03 66
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 122
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 190
25 | AC_AG_10_200_0.05 26
26 | ACC_GTT_10_200_0.05 31
27 | AAG_AG_10_200_0.05 37
28 | AAG_AGG_10_200_0.05 37
29 | AAAG_AG_10_200_0.05 40
30 | AAAG_AG_AAAG_10_200_0.05 74
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 132
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 215
33 | AC_AG_10_200_0.1 30
34 | ACC_GTT_10_200_0.1 25
35 | AAG_AG_10_200_0.1 51
36 | AAG_AGG_10_200_0.1 60
37 | AAAG_AG_10_200_0.1 60
38 | AAAG_AG_AAAG_10_200_0.1 104
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 186
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 305
41 | AC_AG_10_200_0.15 29
42 | ACC_GTT_10_200_0.15 24
43 | AAG_AG_10_200_0.15 44
44 | AAG_AGG_10_200_0.15 56
45 | AAAG_AG_10_200_0.15 57
46 | AAAG_AG_AAAG_10_200_0.15 112
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 178
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 329
49 |
--------------------------------------------------------------------------------
/test_public/tmp/time_TRF.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 14
2 | ACC_GTT_10_200_0.0 18
3 | AAG_AG_10_200_0.0 18
4 | AAG_AGG_10_200_0.0 21
5 | AAAG_AG_10_200_0.0 20
6 | AAAG_AG_AAAG_10_200_0.0 55
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 201
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 147
9 | AC_AG_10_200_0.01 19
10 | ACC_GTT_10_200_0.01 34
11 | AAG_AG_10_200_0.01 32
12 | AAG_AGG_10_200_0.01 38
13 | AAAG_AG_10_200_0.01 41
14 | AAAG_AG_AAAG_10_200_0.01 124
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 197
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 313
17 | AC_AG_10_200_0.03 23
18 | ACC_GTT_10_200_0.03 45
19 | AAG_AG_10_200_0.03 42
20 | AAG_AGG_10_200_0.03 52
21 | AAAG_AG_10_200_0.03 54
22 | AAAG_AG_AAAG_10_200_0.03 146
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 222
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 411
25 | AC_AG_10_200_0.05 25
26 | ACC_GTT_10_200_0.05 50
27 | AAG_AG_10_200_0.05 50
28 | AAG_AGG_10_200_0.05 56
29 | AAAG_AG_10_200_0.05 61
30 | AAAG_AG_AAAG_10_200_0.05 160
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 240
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 452
33 | AC_AG_10_200_0.1 24
34 | ACC_GTT_10_200_0.1 42
35 | AAG_AG_10_200_0.1 49
36 | AAG_AGG_10_200_0.1 54
37 | AAAG_AG_10_200_0.1 60
38 | AAAG_AG_AAAG_10_200_0.1 154
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 226
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 516
41 | AC_AG_10_200_0.15 21
42 | ACC_GTT_10_200_0.15 38
43 | AAG_AG_10_200_0.15 43
44 | AAG_AGG_10_200_0.15 49
45 | AAAG_AG_10_200_0.15 55
46 | AAAG_AG_AAAG_10_200_0.15 133
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 203
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 505
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_TRF_allowance0.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000 1000
2 | ACC_GTT_10_200_0.0 1000 1000
3 | AAG_AG_10_200_0.0 1000 1000
4 | AAG_AGG_10_200_0.0 1000 1000
5 | AAAG_AG_10_200_0.0 1000 1000
6 | AAAG_AG_AAAG_10_200_0.0 2 1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 0 1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0 1000
9 | AC_AG_10_200_0.01 679 1000
10 | ACC_GTT_10_200_0.01 880 1000
11 | AAG_AG_10_200_0.01 508 1000
12 | AAG_AGG_10_200_0.01 895 1000
13 | AAAG_AG_10_200_0.01 464 1000
14 | AAAG_AG_AAAG_10_200_0.01 1 1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 2 1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0 1000
17 | AC_AG_10_200_0.03 258 1000
18 | ACC_GTT_10_200_0.03 528 1000
19 | AAG_AG_10_200_0.03 178 1000
20 | AAG_AGG_10_200_0.03 549 1000
21 | AAAG_AG_10_200_0.03 125 1000
22 | AAAG_AG_AAAG_10_200_0.03 1 1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 0 1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0 1000
25 | AC_AG_10_200_0.05 111 1000
26 | ACC_GTT_10_200_0.05 208 1000
27 | AAG_AG_10_200_0.05 78 1000
28 | AAG_AGG_10_200_0.05 294 1000
29 | AAAG_AG_10_200_0.05 51 1000
30 | AAAG_AG_AAAG_10_200_0.05 0 1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 1 1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0 1000
33 | AC_AG_10_200_0.1 20 1000
34 | ACC_GTT_10_200_0.1 30 1000
35 | AAG_AG_10_200_0.1 4 1000
36 | AAG_AGG_10_200_0.1 41 1000
37 | AAAG_AG_10_200_0.1 7 1000
38 | AAAG_AG_AAAG_10_200_0.1 1 1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0 1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0 1000
41 | AC_AG_10_200_0.15 4 1000
42 | ACC_GTT_10_200_0.15 0 1000
43 | AAG_AG_10_200_0.15 1 1000
44 | AAG_AGG_10_200_0.15 1 1000
45 | AAAG_AG_10_200_0.15 1 1000
46 | AAAG_AG_AAAG_10_200_0.15 0 1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0 1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_RM_allowance0.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000/1000
2 | ACC_GTT_10_200_0.0 1000/1000
3 | AAG_AG_10_200_0.0 1000/1000
4 | AAG_AGG_10_200_0.0 1000/1000
5 | AAAG_AG_10_200_0.0 1000/1000
6 | AAAG_AG_AAAG_10_200_0.0 47/1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 0/1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0/916
9 | AC_AG_10_200_0.01 870/1000
10 | ACC_GTT_10_200_0.01 925/1000
11 | AAG_AG_10_200_0.01 816/1000
12 | AAG_AGG_10_200_0.01 940/1000
13 | AAAG_AG_10_200_0.01 863/1000
14 | AAAG_AG_AAAG_10_200_0.01 28/1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 24/1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0/962
17 | AC_AG_10_200_0.03 596/1000
18 | ACC_GTT_10_200_0.03 706/1000
19 | AAG_AG_10_200_0.03 535/999
20 | AAG_AGG_10_200_0.03 705/1000
21 | AAAG_AG_10_200_0.03 634/1000
22 | AAAG_AG_AAAG_10_200_0.03 34/1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 22/1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0/1000
25 | AC_AG_10_200_0.05 455/1000
26 | ACC_GTT_10_200_0.05 504/1000
27 | AAG_AG_10_200_0.05 296/996
28 | AAG_AGG_10_200_0.05 401/1000
29 | AAAG_AG_10_200_0.05 375/1000
30 | AAAG_AG_AAAG_10_200_0.05 22/1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 6/1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0/989
33 | AC_AG_10_200_0.1 283/1000
34 | ACC_GTT_10_200_0.1 284/1000
35 | AAG_AG_10_200_0.1 10/998
36 | AAG_AGG_10_200_0.1 10/1000
37 | AAAG_AG_10_200_0.1 24/1000
38 | AAAG_AG_AAAG_10_200_0.1 1/1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0/1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0/878
41 | AC_AG_10_200_0.15 123/1000
42 | ACC_GTT_10_200_0.15 158/1000
43 | AAG_AG_10_200_0.15 3/997
44 | AAG_AGG_10_200_0.15 1/1000
45 | AAAG_AG_10_200_0.15 2/1000
46 | AAAG_AG_AAAG_10_200_0.15 0/1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0/1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0/931
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_TRF_allowance0.01.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000 1000
2 | ACC_GTT_10_200_0.0 1000 1000
3 | AAG_AG_10_200_0.0 1000 1000
4 | AAG_AGG_10_200_0.0 1000 1000
5 | AAAG_AG_10_200_0.0 1000 1000
6 | AAAG_AG_AAAG_10_200_0.0 2 1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 159 1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0 1000
9 | AC_AG_10_200_0.01 952 1000
10 | ACC_GTT_10_200_0.01 915 1000
11 | AAG_AG_10_200_0.01 831 1000
12 | AAG_AGG_10_200_0.01 909 1000
13 | AAAG_AG_10_200_0.01 748 1000
14 | AAAG_AG_AAAG_10_200_0.01 5 1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 63 1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0 1000
17 | AC_AG_10_200_0.03 589 1000
18 | ACC_GTT_10_200_0.03 601 1000
19 | AAG_AG_10_200_0.03 408 1000
20 | AAG_AGG_10_200_0.03 583 1000
21 | AAAG_AG_10_200_0.03 276 1000
22 | AAAG_AG_AAAG_10_200_0.03 3 1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 3 1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0 1000
25 | AC_AG_10_200_0.05 300 1000
26 | ACC_GTT_10_200_0.05 299 1000
27 | AAG_AG_10_200_0.05 173 1000
28 | AAG_AGG_10_200_0.05 345 1000
29 | AAAG_AG_10_200_0.05 110 1000
30 | AAAG_AG_AAAG_10_200_0.05 1 1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 3 1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0 1000
33 | AC_AG_10_200_0.1 62 1000
34 | ACC_GTT_10_200_0.1 56 1000
35 | AAG_AG_10_200_0.1 18 1000
36 | AAG_AGG_10_200_0.1 61 1000
37 | AAAG_AG_10_200_0.1 18 1000
38 | AAAG_AG_AAAG_10_200_0.1 1 1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0 1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0 1000
41 | AC_AG_10_200_0.15 15 1000
42 | ACC_GTT_10_200_0.15 3 1000
43 | AAG_AG_10_200_0.15 5 1000
44 | AAG_AGG_10_200_0.15 4 1000
45 | AAAG_AG_10_200_0.15 2 1000
46 | AAAG_AG_AAAG_10_200_0.15 0 1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0 1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_TRF_allowance0.02.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000 1000
2 | ACC_GTT_10_200_0.0 1000 1000
3 | AAG_AG_10_200_0.0 1000 1000
4 | AAG_AGG_10_200_0.0 1000 1000
5 | AAAG_AG_10_200_0.0 1000 1000
6 | AAAG_AG_AAAG_10_200_0.0 2 1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 159 1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0 1000
9 | AC_AG_10_200_0.01 973 1000
10 | ACC_GTT_10_200_0.01 916 1000
11 | AAG_AG_10_200_0.01 888 1000
12 | AAG_AGG_10_200_0.01 912 1000
13 | AAAG_AG_10_200_0.01 786 1000
14 | AAAG_AG_AAAG_10_200_0.01 5 1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 71 1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0 1000
17 | AC_AG_10_200_0.03 776 1000
18 | ACC_GTT_10_200_0.03 605 1000
19 | AAG_AG_10_200_0.03 543 1000
20 | AAG_AGG_10_200_0.03 590 1000
21 | AAAG_AG_10_200_0.03 357 1000
22 | AAAG_AG_AAAG_10_200_0.03 7 1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 4 1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0 1000
25 | AC_AG_10_200_0.05 459 1000
26 | ACC_GTT_10_200_0.05 314 1000
27 | AAG_AG_10_200_0.05 251 1000
28 | AAG_AGG_10_200_0.05 353 1000
29 | AAAG_AG_10_200_0.05 151 1000
30 | AAAG_AG_AAAG_10_200_0.05 2 1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 4 1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0 1000
33 | AC_AG_10_200_0.1 100 1000
34 | ACC_GTT_10_200_0.1 64 1000
35 | AAG_AG_10_200_0.1 25 1000
36 | AAG_AGG_10_200_0.1 64 1000
37 | AAAG_AG_10_200_0.1 22 1000
38 | AAAG_AG_AAAG_10_200_0.1 1 1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0 1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0 1000
41 | AC_AG_10_200_0.15 19 1000
42 | ACC_GTT_10_200_0.15 4 1000
43 | AAG_AG_10_200_0.15 5 1000
44 | AAG_AGG_10_200_0.15 5 1000
45 | AAAG_AG_10_200_0.15 2 1000
46 | AAAG_AG_AAAG_10_200_0.15 0 1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0 1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_TRF_allowance0.03.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000 1000
2 | ACC_GTT_10_200_0.0 1000 1000
3 | AAG_AG_10_200_0.0 1000 1000
4 | AAG_AGG_10_200_0.0 1000 1000
5 | AAAG_AG_10_200_0.0 1000 1000
6 | AAAG_AG_AAAG_10_200_0.0 2 1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 159 1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0 1000
9 | AC_AG_10_200_0.01 977 1000
10 | ACC_GTT_10_200_0.01 916 1000
11 | AAG_AG_10_200_0.01 902 1000
12 | AAG_AGG_10_200_0.01 912 1000
13 | AAAG_AG_10_200_0.01 795 1000
14 | AAAG_AG_AAAG_10_200_0.01 5 1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 76 1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0 1000
17 | AC_AG_10_200_0.03 844 1000
18 | ACC_GTT_10_200_0.03 605 1000
19 | AAG_AG_10_200_0.03 591 1000
20 | AAG_AGG_10_200_0.03 591 1000
21 | AAAG_AG_10_200_0.03 404 1000
22 | AAAG_AG_AAAG_10_200_0.03 8 1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 9 1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0 1000
25 | AC_AG_10_200_0.05 580 1000
26 | ACC_GTT_10_200_0.05 315 1000
27 | AAG_AG_10_200_0.05 294 1000
28 | AAG_AGG_10_200_0.05 356 1000
29 | AAAG_AG_10_200_0.05 195 1000
30 | AAAG_AG_AAAG_10_200_0.05 3 1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 6 1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0 1000
33 | AC_AG_10_200_0.1 144 1000
34 | ACC_GTT_10_200_0.1 68 1000
35 | AAG_AG_10_200_0.1 43 1000
36 | AAG_AGG_10_200_0.1 68 1000
37 | AAAG_AG_10_200_0.1 32 1000
38 | AAAG_AG_AAAG_10_200_0.1 1 1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0 1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0 1000
41 | AC_AG_10_200_0.15 22 1000
42 | ACC_GTT_10_200_0.15 4 1000
43 | AAG_AG_10_200_0.15 6 1000
44 | AAG_AGG_10_200_0.15 5 1000
45 | AAAG_AG_10_200_0.15 2 1000
46 | AAAG_AG_AAAG_10_200_0.15 0 1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0 1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_RM_allowance0.01.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000/1000
2 | ACC_GTT_10_200_0.0 1000/1000
3 | AAG_AG_10_200_0.0 1000/1000
4 | AAG_AGG_10_200_0.0 1000/1000
5 | AAAG_AG_10_200_0.0 1000/1000
6 | AAAG_AG_AAAG_10_200_0.0 48/1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 495/1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0/916
9 | AC_AG_10_200_0.01 992/1000
10 | ACC_GTT_10_200_0.01 999/1000
11 | AAG_AG_10_200_0.01 958/1000
12 | AAG_AGG_10_200_0.01 995/1000
13 | AAAG_AG_10_200_0.01 978/1000
14 | AAAG_AG_AAAG_10_200_0.01 46/1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 209/1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0/962
17 | AC_AG_10_200_0.03 947/1000
18 | ACC_GTT_10_200_0.03 967/1000
19 | AAG_AG_10_200_0.03 838/999
20 | AAG_AGG_10_200_0.03 965/1000
21 | AAAG_AG_10_200_0.03 909/1000
22 | AAAG_AG_AAAG_10_200_0.03 55/1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 158/1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0/1000
25 | AC_AG_10_200_0.05 872/1000
26 | ACC_GTT_10_200_0.05 904/1000
27 | AAG_AG_10_200_0.05 539/996
28 | AAG_AGG_10_200_0.05 700/1000
29 | AAAG_AG_10_200_0.05 685/1000
30 | AAAG_AG_AAAG_10_200_0.05 47/1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 89/1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0/989
33 | AC_AG_10_200_0.1 617/1000
34 | ACC_GTT_10_200_0.1 662/1000
35 | AAG_AG_10_200_0.1 15/998
36 | AAG_AGG_10_200_0.1 35/1000
37 | AAAG_AG_10_200_0.1 44/1000
38 | AAAG_AG_AAAG_10_200_0.1 1/1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0/1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0/878
41 | AC_AG_10_200_0.15 365/1000
42 | ACC_GTT_10_200_0.15 464/1000
43 | AAG_AG_10_200_0.15 5/997
44 | AAG_AGG_10_200_0.15 5/1000
45 | AAAG_AG_10_200_0.15 3/1000
46 | AAAG_AG_AAAG_10_200_0.15 0/1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0/1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0/931
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_RM_allowance0.02.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000/1000
2 | ACC_GTT_10_200_0.0 1000/1000
3 | AAG_AG_10_200_0.0 1000/1000
4 | AAG_AGG_10_200_0.0 1000/1000
5 | AAAG_AG_10_200_0.0 1000/1000
6 | AAAG_AG_AAAG_10_200_0.0 48/1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 495/1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0/916
9 | AC_AG_10_200_0.01 998/1000
10 | ACC_GTT_10_200_0.01 999/1000
11 | AAG_AG_10_200_0.01 963/1000
12 | AAG_AGG_10_200_0.01 1000/1000
13 | AAAG_AG_10_200_0.01 988/1000
14 | AAAG_AG_AAAG_10_200_0.01 47/1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 221/1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0/962
17 | AC_AG_10_200_0.03 979/1000
18 | ACC_GTT_10_200_0.03 991/1000
19 | AAG_AG_10_200_0.03 866/999
20 | AAG_AGG_10_200_0.03 982/1000
21 | AAAG_AG_10_200_0.03 937/1000
22 | AAAG_AG_AAAG_10_200_0.03 64/1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 180/1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0/1000
25 | AC_AG_10_200_0.05 936/1000
26 | ACC_GTT_10_200_0.05 962/1000
27 | AAG_AG_10_200_0.05 594/996
28 | AAG_AGG_10_200_0.05 738/1000
29 | AAAG_AG_10_200_0.05 736/1000
30 | AAAG_AG_AAAG_10_200_0.05 52/1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 115/1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0/989
33 | AC_AG_10_200_0.1 782/1000
34 | ACC_GTT_10_200_0.1 798/1000
35 | AAG_AG_10_200_0.1 17/998
36 | AAG_AGG_10_200_0.1 37/1000
37 | AAAG_AG_10_200_0.1 50/1000
38 | AAAG_AG_AAAG_10_200_0.1 1/1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0/1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0/878
41 | AC_AG_10_200_0.15 495/1000
42 | ACC_GTT_10_200_0.15 618/1000
43 | AAG_AG_10_200_0.15 5/997
44 | AAG_AGG_10_200_0.15 5/1000
45 | AAAG_AG_10_200_0.15 3/1000
46 | AAAG_AG_AAAG_10_200_0.15 0/1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0/1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0/931
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_RM_allowance0.03.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 1000/1000
2 | ACC_GTT_10_200_0.0 1000/1000
3 | AAG_AG_10_200_0.0 1000/1000
4 | AAG_AGG_10_200_0.0 1000/1000
5 | AAAG_AG_10_200_0.0 1000/1000
6 | AAAG_AG_AAAG_10_200_0.0 48/1000
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 495/1000
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0/916
9 | AC_AG_10_200_0.01 998/1000
10 | ACC_GTT_10_200_0.01 999/1000
11 | AAG_AG_10_200_0.01 967/1000
12 | AAG_AGG_10_200_0.01 1000/1000
13 | AAAG_AG_10_200_0.01 990/1000
14 | AAAG_AG_AAAG_10_200_0.01 47/1000
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 232/1000
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0/962
17 | AC_AG_10_200_0.03 991/1000
18 | ACC_GTT_10_200_0.03 992/1000
19 | AAG_AG_10_200_0.03 882/999
20 | AAG_AGG_10_200_0.03 987/1000
21 | AAAG_AG_10_200_0.03 946/1000
22 | AAAG_AG_AAAG_10_200_0.03 65/1000
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 206/1000
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0/1000
25 | AC_AG_10_200_0.05 965/1000
26 | ACC_GTT_10_200_0.05 973/1000
27 | AAG_AG_10_200_0.05 611/996
28 | AAG_AGG_10_200_0.05 748/1000
29 | AAAG_AG_10_200_0.05 753/1000
30 | AAAG_AG_AAAG_10_200_0.05 53/1000
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 126/1000
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0/989
33 | AC_AG_10_200_0.1 864/1000
34 | ACC_GTT_10_200_0.1 869/1000
35 | AAG_AG_10_200_0.1 22/998
36 | AAG_AGG_10_200_0.1 37/1000
37 | AAAG_AG_10_200_0.1 52/1000
38 | AAAG_AG_AAAG_10_200_0.1 1/1000
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0/1000
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0/878
41 | AC_AG_10_200_0.15 581/1000
42 | ACC_GTT_10_200_0.15 725/1000
43 | AAG_AG_10_200_0.15 5/997
44 | AAG_AGG_10_200_0.15 5/1000
45 | AAAG_AG_10_200_0.15 4/1000
46 | AAAG_AG_AAAG_10_200_0.15 0/1000
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0/1000
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0/931
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_uTR.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 992 1000 414
2 | ACC_GTT_10_200_0.0 998 1000 617
3 | AAG_AG_10_200_0.0 991 1000 524
4 | AAG_AGG_10_200_0.0 998 1000 617
5 | AAAG_AG_10_200_0.0 982 1000 624
6 | AAAG_AG_AAAG_10_200_0.0 922 1000 1040
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 957 1000 1674
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 996 1000 2617
9 | AC_AG_10_200_0.01 996 1000 421
10 | ACC_GTT_10_200_0.01 1000 1000 636
11 | AAG_AG_10_200_0.01 992 1000 523
12 | AAG_AGG_10_200_0.01 999 1000 617
13 | AAAG_AG_10_200_0.01 990 1000 628
14 | AAAG_AG_AAAG_10_200_0.01 940 1000 1042
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 948 1000 1671
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 999 1000 2564
17 | AC_AG_10_200_0.03 992 1000 413
18 | ACC_GTT_10_200_0.03 999 1000 627
19 | AAG_AG_10_200_0.03 983 1000 514
20 | AAG_AGG_10_200_0.03 999 1000 633
21 | AAAG_AG_10_200_0.03 987 1000 625
22 | AAAG_AG_AAAG_10_200_0.03 948 1000 1031
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 934 1000 1670
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 958 1000 2608
25 | AC_AG_10_200_0.05 993 1000 416
26 | ACC_GTT_10_200_0.05 999 1000 650
27 | AAG_AG_10_200_0.05 971 1000 528
28 | AAG_AGG_10_200_0.05 998 1000 619
29 | AAAG_AG_10_200_0.05 973 1000 640
30 | AAAG_AG_AAAG_10_200_0.05 924 1000 1053
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 925 1000 1704
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 306 1000 2599
33 | AC_AG_10_200_0.1 983 1000 415
34 | ACC_GTT_10_200_0.1 995 1000 612
35 | AAG_AG_10_200_0.1 885 1000 529
36 | AAG_AGG_10_200_0.1 984 1000 629
37 | AAAG_AG_10_200_0.1 943 1000 629
38 | AAAG_AG_AAAG_10_200_0.1 886 1000 1047
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 819 1000 1681
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 1 1000 2629
41 | AC_AG_10_200_0.15 963 1000 419
42 | ACC_GTT_10_200_0.15 946 1000 614
43 | AAG_AG_10_200_0.15 660 1000 521
44 | AAG_AGG_10_200_0.15 908 1000 636
45 | AAAG_AG_10_200_0.15 793 1000 627
46 | AAAG_AG_AAAG_10_200_0.15 638 1000 1035
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 539 1000 1668
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000 2625
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_uTR_allowance0.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 992 1000 414
2 | ACC_GTT_10_200_0.0 998 1000 617
3 | AAG_AG_10_200_0.0 991 1000 524
4 | AAG_AGG_10_200_0.0 998 1000 617
5 | AAAG_AG_10_200_0.0 982 1000 624
6 | AAAG_AG_AAAG_10_200_0.0 922 1000 1040
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 954 1000 1674
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 996 1000 2617
9 | AC_AG_10_200_0.01 923 1000 421
10 | ACC_GTT_10_200_0.01 967 1000 636
11 | AAG_AG_10_200_0.01 849 1000 523
12 | AAG_AGG_10_200_0.01 926 1000 617
13 | AAAG_AG_10_200_0.01 878 1000 628
14 | AAAG_AG_AAAG_10_200_0.01 771 1000 1042
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 641 1000 1671
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 986 1000 2564
17 | AC_AG_10_200_0.03 664 1000 413
18 | ACC_GTT_10_200_0.03 809 1000 627
19 | AAG_AG_10_200_0.03 556 1000 514
20 | AAG_AGG_10_200_0.03 705 1000 633
21 | AAAG_AG_10_200_0.03 633 1000 625
22 | AAAG_AG_AAAG_10_200_0.03 430 1000 1031
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 234 1000 1670
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 854 1000 2608
25 | AC_AG_10_200_0.05 502 1000 416
26 | ACC_GTT_10_200_0.05 659 1000 650
27 | AAG_AG_10_200_0.05 361 1000 528
28 | AAG_AGG_10_200_0.05 524 1000 619
29 | AAAG_AG_10_200_0.05 444 1000 640
30 | AAAG_AG_AAAG_10_200_0.05 268 1000 1053
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 104 1000 1704
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 223 1000 2599
33 | AC_AG_10_200_0.1 286 1000 415
34 | ACC_GTT_10_200_0.1 515 1000 612
35 | AAG_AG_10_200_0.1 191 1000 529
36 | AAG_AGG_10_200_0.1 330 1000 629
37 | AAAG_AG_10_200_0.1 266 1000 629
38 | AAAG_AG_AAAG_10_200_0.1 121 1000 1047
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 27 1000 1681
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0 1000 2629
41 | AC_AG_10_200_0.15 219 1000 419
42 | ACC_GTT_10_200_0.15 327 1000 614
43 | AAG_AG_10_200_0.15 90 1000 521
44 | AAG_AGG_10_200_0.15 223 1000 636
45 | AAAG_AG_10_200_0.15 176 1000 627
46 | AAAG_AG_AAAG_10_200_0.15 67 1000 1035
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 4 1000 1668
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000 2625
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_uTR_allowance0.01.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 992 1000 414
2 | ACC_GTT_10_200_0.0 998 1000 617
3 | AAG_AG_10_200_0.0 991 1000 524
4 | AAG_AGG_10_200_0.0 998 1000 617
5 | AAAG_AG_10_200_0.0 982 1000 624
6 | AAAG_AG_AAAG_10_200_0.0 922 1000 1040
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 957 1000 1674
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 996 1000 2617
9 | AC_AG_10_200_0.01 994 1000 421
10 | ACC_GTT_10_200_0.01 1000 1000 636
11 | AAG_AG_10_200_0.01 980 1000 523
12 | AAG_AGG_10_200_0.01 998 1000 617
13 | AAAG_AG_10_200_0.01 978 1000 628
14 | AAAG_AG_AAAG_10_200_0.01 928 1000 1042
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 910 1000 1671
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 999 1000 2564
17 | AC_AG_10_200_0.03 964 1000 413
18 | ACC_GTT_10_200_0.03 995 1000 627
19 | AAG_AG_10_200_0.03 876 1000 514
20 | AAG_AGG_10_200_0.03 981 1000 633
21 | AAAG_AG_10_200_0.03 941 1000 625
22 | AAAG_AG_AAAG_10_200_0.03 816 1000 1031
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 680 1000 1670
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 956 1000 2608
25 | AC_AG_10_200_0.05 910 1000 416
26 | ACC_GTT_10_200_0.05 978 1000 650
27 | AAG_AG_10_200_0.05 753 1000 528
28 | AAG_AGG_10_200_0.05 893 1000 619
29 | AAAG_AG_10_200_0.05 840 1000 640
30 | AAAG_AG_AAAG_10_200_0.05 714 1000 1053
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 518 1000 1704
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 302 1000 2599
33 | AC_AG_10_200_0.1 707 1000 415
34 | ACC_GTT_10_200_0.1 894 1000 612
35 | AAG_AG_10_200_0.1 465 1000 529
36 | AAG_AGG_10_200_0.1 719 1000 629
37 | AAAG_AG_10_200_0.1 619 1000 629
38 | AAAG_AG_AAAG_10_200_0.1 478 1000 1047
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 222 1000 1681
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 1 1000 2629
41 | AC_AG_10_200_0.15 563 1000 419
42 | ACC_GTT_10_200_0.15 747 1000 614
43 | AAG_AG_10_200_0.15 260 1000 521
44 | AAG_AGG_10_200_0.15 539 1000 636
45 | AAAG_AG_10_200_0.15 433 1000 627
46 | AAAG_AG_AAAG_10_200_0.15 234 1000 1035
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 77 1000 1668
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000 2625
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_uTR_allowance0.02.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 992 1000 414
2 | ACC_GTT_10_200_0.0 998 1000 617
3 | AAG_AG_10_200_0.0 991 1000 524
4 | AAG_AGG_10_200_0.0 998 1000 617
5 | AAAG_AG_10_200_0.0 982 1000 624
6 | AAAG_AG_AAAG_10_200_0.0 922 1000 1040
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 957 1000 1674
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 996 1000 2617
9 | AC_AG_10_200_0.01 996 1000 421
10 | ACC_GTT_10_200_0.01 1000 1000 636
11 | AAG_AG_10_200_0.01 990 1000 523
12 | AAG_AGG_10_200_0.01 998 1000 617
13 | AAAG_AG_10_200_0.01 987 1000 628
14 | AAAG_AG_AAAG_10_200_0.01 934 1000 1042
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 932 1000 1671
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 999 1000 2564
17 | AC_AG_10_200_0.03 989 1000 413
18 | ACC_GTT_10_200_0.03 999 1000 627
19 | AAG_AG_10_200_0.03 936 1000 514
20 | AAG_AGG_10_200_0.03 993 1000 633
21 | AAAG_AG_10_200_0.03 965 1000 625
22 | AAAG_AG_AAAG_10_200_0.03 884 1000 1031
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 799 1000 1670
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 957 1000 2608
25 | AC_AG_10_200_0.05 967 1000 416
26 | ACC_GTT_10_200_0.05 998 1000 650
27 | AAG_AG_10_200_0.05 848 1000 528
28 | AAG_AGG_10_200_0.05 962 1000 619
29 | AAAG_AG_10_200_0.05 921 1000 640
30 | AAAG_AG_AAAG_10_200_0.05 816 1000 1053
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 701 1000 1704
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 304 1000 2599
33 | AC_AG_10_200_0.1 852 1000 415
34 | ACC_GTT_10_200_0.1 977 1000 612
35 | AAG_AG_10_200_0.1 611 1000 529
36 | AAG_AGG_10_200_0.1 862 1000 629
37 | AAAG_AG_10_200_0.1 759 1000 629
38 | AAAG_AG_AAAG_10_200_0.1 616 1000 1047
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 390 1000 1681
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 1 1000 2629
41 | AC_AG_10_200_0.15 736 1000 419
42 | ACC_GTT_10_200_0.15 875 1000 614
43 | AAG_AG_10_200_0.15 374 1000 521
44 | AAG_AGG_10_200_0.15 702 1000 636
45 | AAAG_AG_10_200_0.15 564 1000 627
46 | AAAG_AG_AAAG_10_200_0.15 350 1000 1035
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 151 1000 1668
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000 2625
49 |
--------------------------------------------------------------------------------
/test_public/tmp/accuracy_uTR_allowance0.03.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 992 1000 414
2 | ACC_GTT_10_200_0.0 998 1000 617
3 | AAG_AG_10_200_0.0 991 1000 524
4 | AAG_AGG_10_200_0.0 998 1000 617
5 | AAAG_AG_10_200_0.0 982 1000 624
6 | AAAG_AG_AAAG_10_200_0.0 922 1000 1040
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 957 1000 1674
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 996 1000 2617
9 | AC_AG_10_200_0.01 996 1000 421
10 | ACC_GTT_10_200_0.01 1000 1000 636
11 | AAG_AG_10_200_0.01 991 1000 523
12 | AAG_AGG_10_200_0.01 999 1000 617
13 | AAAG_AG_10_200_0.01 988 1000 628
14 | AAAG_AG_AAAG_10_200_0.01 937 1000 1042
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 939 1000 1671
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 999 1000 2564
17 | AC_AG_10_200_0.03 989 1000 413
18 | ACC_GTT_10_200_0.03 999 1000 627
19 | AAG_AG_10_200_0.03 953 1000 514
20 | AAG_AGG_10_200_0.03 996 1000 633
21 | AAAG_AG_10_200_0.03 974 1000 625
22 | AAAG_AG_AAAG_10_200_0.03 920 1000 1031
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 862 1000 1670
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 958 1000 2608
25 | AC_AG_10_200_0.05 984 1000 416
26 | ACC_GTT_10_200_0.05 999 1000 650
27 | AAG_AG_10_200_0.05 901 1000 528
28 | AAG_AGG_10_200_0.05 980 1000 619
29 | AAAG_AG_10_200_0.05 949 1000 640
30 | AAAG_AG_AAAG_10_200_0.05 864 1000 1053
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 789 1000 1704
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 304 1000 2599
33 | AC_AG_10_200_0.1 914 1000 415
34 | ACC_GTT_10_200_0.1 992 1000 612
35 | AAG_AG_10_200_0.1 712 1000 529
36 | AAG_AGG_10_200_0.1 922 1000 629
37 | AAAG_AG_10_200_0.1 833 1000 629
38 | AAAG_AG_AAAG_10_200_0.1 718 1000 1047
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 531 1000 1681
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 1 1000 2629
41 | AC_AG_10_200_0.15 836 1000 419
42 | ACC_GTT_10_200_0.15 914 1000 614
43 | AAG_AG_10_200_0.15 450 1000 521
44 | AAG_AGG_10_200_0.15 782 1000 636
45 | AAAG_AG_10_200_0.15 639 1000 627
46 | AAAG_AG_AAAG_10_200_0.15 414 1000 1035
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 229 1000 1668
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0 1000 2625
49 |
--------------------------------------------------------------------------------
/test_public/parse_RepeatMasker/main.c:
--------------------------------------------------------------------------------
1 | // Retrieve TRs from a RepeatMasker output file
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | #define BLK 4096
12 |
13 | int main(int argc, char *argv[])
14 | {
15 |
16 | int opt;
17 | int print_homopolymer = 0;
18 | char inputFile[BLK], outputFile[BLK];
19 | while ((opt = getopt(argc, argv, "i:o:h")) != -1) {
20 | switch(opt){
21 | case 'i':
22 | strcpy(inputFile,optarg);
23 | break;
24 | case 'o':
25 | strcpy(outputFile,optarg);
26 | break;
27 | case 'h': // Print homopolymer
28 | print_homopolymer = 1;
29 | break;
30 | default:
31 | exit(EXIT_FAILURE);
32 | }
33 | }
34 |
35 | // Count the frequency of each SNV
36 | FILE *fp_in = fopen(inputFile, "r");
37 | FILE *fp_out = fopen(outputFile, "w");
38 |
39 | char *s = (char *)malloc(sizeof(char)*BLK);
40 | char a1[BLK], a2[BLK], a3[BLK], a4[BLK], ID[BLK], strand[BLK], a_repeat[BLK], repeat_unit[BLK], repeat_class[BLK], others[BLK];
41 | int beginTR, endTR, a_left;
42 |
43 | int i=0;
44 | while (fgets(s, BLK, fp_in) != NULL) {
45 | if(i++ > 2){ // Skip the first three lines
46 | //printf("%s", s);
47 | sscanf(s, "%s %s %s %s %s %d %d (%d) %s %s %s %[^\n]",
48 | a1,a2,a3,a4,ID,&beginTR,&endTR,&a_left,strand,a_repeat,repeat_class,others);
49 | if(strcmp(repeat_class,"Simple_repeat")==0){
50 | sscanf(a_repeat,"(%[^)])n",repeat_unit);
51 | strcpy(a_repeat,repeat_unit);
52 | }
53 | if(1 < strlen(a_repeat) || print_homopolymer == 1){
54 | fprintf(fp_out, "%s\t%d\t%d\t%d\t%d\t%s\t%s\n", ID,(endTR+a_left),beginTR,endTR,(endTR-beginTR+1),a_repeat,repeat_class);
55 | }
56 | }
57 | }
58 | fclose(fp_in);
59 | fclose(fp_out);
60 |
61 | free(s);
62 |
63 | return EXIT_SUCCESS;
64 | }
65 |
--------------------------------------------------------------------------------
/nsop_test/nsop_compression.csv:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0 0.011494 2 174
2 | ACC_GTT_10_200_0.0 0.017021 4 235
3 | AAG_AG_10_200_0.0 0.018182 2 110
4 | AAG_AGG_10_200_0.0 0.022059 3 136
5 | AAAG_AG_10_200_0.0 0.026087 3 115
6 | AAAG_AG_AAAG_10_200_0.0 0.019737 3 152
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0 0.018182 5 275
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0 0.202290 53 262
9 | AC_AG_10_200_0.01 0.018305 208 11363
10 | ACC_GTT_10_200_0.01 0.019250 351 18234
11 | AAG_AG_10_200_0.01 0.027468 278 10121
12 | AAG_AGG_10_200_0.01 0.028037 346 12341
13 | AAAG_AG_10_200_0.01 0.035021 366 10451
14 | AAAG_AG_AAAG_10_200_0.01 0.027636 373 13497
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01 0.030392 652 21453
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01 0.058439 1993 34104
17 | AC_AG_10_200_0.03 0.011574 362 31278
18 | ACC_GTT_10_200_0.03 0.012996 600 46168
19 | AAG_AG_10_200_0.03 0.015381 433 28152
20 | AAG_AGG_10_200_0.03 0.014526 502 34558
21 | AAAG_AG_10_200_0.03 0.020507 600 29259
22 | AAAG_AG_AAAG_10_200_0.03 0.020685 770 37225
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03 0.021847 1331 60923
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03 0.030139 2928 97149
25 | AC_AG_10_200_0.05 0.012278 704 57340
26 | ACC_GTT_10_200_0.05 0.012714 1050 82586
27 | AAG_AG_10_200_0.05 0.015096 781 51734
28 | AAG_AGG_10_200_0.05 0.017897 1117 62414
29 | AAAG_AG_10_200_0.05 0.019228 1050 54607
30 | AAAG_AG_AAAG_10_200_0.05 0.023374 1557 66612
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05 0.022826 2537 111143
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05 0.029978 5146 171657
33 | AC_AG_10_200_0.1 0.011350 1643 144763
34 | ACC_GTT_10_200_0.1 0.011454 2348 204987
35 | AAG_AG_10_200_0.1 0.013429 1821 135607
36 | AAG_AGG_10_200_0.1 0.013660 2242 164131
37 | AAAG_AG_10_200_0.1 0.015586 2122 136146
38 | AAAG_AG_AAAG_10_200_0.1 0.017713 3037 171459
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1 0.017846 5037 282244
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1 0.023757 10297 433435
41 | AC_AG_10_200_0.15 0.008204 2179 265609
42 | ACC_GTT_10_200_0.15 0.009086 3317 365081
43 | AAG_AG_10_200_0.15 0.010143 2494 245883
44 | AAG_AGG_10_200_0.15 0.011134 3344 300334
45 | AAAG_AG_10_200_0.15 0.012219 3004 245846
46 | AAAG_AG_AAAG_10_200_0.15 0.014500 4542 313242
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15 0.014307 7303 510446
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15 0.018779 14522 773303
49 |
--------------------------------------------------------------------------------
/test_public/tmp/time_uTR.txt:
--------------------------------------------------------------------------------
1 | AC_AG_10_200_0.0.fasta 21.147736 sec
2 | ACC_GTT_10_200_0.0.fasta 21.771969 sec
3 | AAG_AG_10_200_0.0.fasta 21.427761 sec
4 | AAG_AGG_10_200_0.0.fasta 20.788939 sec
5 | AAAG_AG_10_200_0.0.fasta 19.791121 sec
6 | AAAG_AG_AAAG_10_200_0.0.fasta 21.229429 sec
7 | AAAG_AG_AGGG_AG_AAAG_10_200_0.0.fasta 23.324635 sec
8 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.0.fasta 27.386393 sec
9 | AC_AG_10_200_0.01.fasta 21.699499 sec
10 | ACC_GTT_10_200_0.01.fasta 22.978775 sec
11 | AAG_AG_10_200_0.01.fasta 22.058565 sec
12 | AAG_AGG_10_200_0.01.fasta 22.848272 sec
13 | AAAG_AG_10_200_0.01.fasta 22.830925 sec
14 | AAAG_AG_AAAG_10_200_0.01.fasta 24.864824 sec
15 | AAAG_AG_AGGG_AG_AAAG_10_200_0.01.fasta 30.533117 sec
16 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.01.fasta 51.646774 sec
17 | AC_AG_10_200_0.03.fasta 22.234692 sec
18 | ACC_GTT_10_200_0.03.fasta 23.793291 sec
19 | AAG_AG_10_200_0.03.fasta 27.605684 sec
20 | AAG_AGG_10_200_0.03.fasta 23.725142 sec
21 | AAAG_AG_10_200_0.03.fasta 23.259960 sec
22 | AAAG_AG_AAAG_10_200_0.03.fasta 26.080307 sec
23 | AAAG_AG_AGGG_AG_AAAG_10_200_0.03.fasta 34.167931 sec
24 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.03.fasta 61.479633 sec
25 | AC_AG_10_200_0.05.fasta 22.052540 sec
26 | ACC_GTT_10_200_0.05.fasta 24.236048 sec
27 | AAG_AG_10_200_0.05.fasta 22.805019 sec
28 | AAG_AGG_10_200_0.05.fasta 24.181370 sec
29 | AAAG_AG_10_200_0.05.fasta 24.580435 sec
30 | AAAG_AG_AAAG_10_200_0.05.fasta 29.060202 sec
31 | AAAG_AG_AGGG_AG_AAAG_10_200_0.05.fasta 40.593407 sec
32 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.05.fasta 79.609673 sec
33 | AC_AG_10_200_0.1.fasta 23.727283 sec
34 | ACC_GTT_10_200_0.1.fasta 27.058134 sec
35 | AAG_AG_10_200_0.1.fasta 25.016262 sec
36 | AAG_AGG_10_200_0.1.fasta 30.091669 sec
37 | AAAG_AG_10_200_0.1.fasta 26.549988 sec
38 | AAAG_AG_AAAG_10_200_0.1.fasta 34.315407 sec
39 | AAAG_AG_AGGG_AG_AAAG_10_200_0.1.fasta 54.932568 sec
40 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.1.fasta 125.210846 sec
41 | AC_AG_10_200_0.15.fasta 24.514071 sec
42 | ACC_GTT_10_200_0.15.fasta 25.871542 sec
43 | AAG_AG_10_200_0.15.fasta 23.046970 sec
44 | AAG_AGG_10_200_0.15.fasta 28.966978 sec
45 | AAAG_AG_10_200_0.15.fasta 28.361736 sec
46 | AAAG_AG_AAAG_10_200_0.15.fasta 39.555313 sec
47 | AAAG_AG_AGGG_AG_AAAG_10_200_0.15.fasta 67.106201 sec
48 | AGGGG_AAAAGAAAGAGAGGG_AGGGG_10_200_0.15.fasta 164.649063 sec
49 |
--------------------------------------------------------------------------------
/smooth.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "uTR.h"
8 |
9 | int majority(int *blocks, int start, int end, int *unit_freq, int numKeyUnits){
10 | for(int i=0; i> $acc_table_uTR #"Min Max num unit occ"
42 | $check_uTR -i $TR_uTR_decomp_file -a $allowance >> $acc_table_uTR
43 |
44 | # RepeatMasker
45 | parse_RM=$tmp"/"$run_name".fasta.result.txt"
46 | match_RM=$tmp"/"$run_name".fasta.match.txt"
47 | $checkRM -i $parse_RM -a $allowance -p > $match_RM
48 | echo -n -e $run_name" " >> $acc_table_RM
49 | tail -n1 $match_RM >> $acc_table_RM
50 |
51 | # TRF
52 | result_TRF=$tmp"/"$run_name"_TRF.txt"
53 | accuracy_TRF=$tmp"/"$run_name"_TRF_acc.txt"
54 | $checkTRF -i $result_TRF -a $allowance -o $accuracy_TRF
55 | echo -n -e $run_name" " >> $acc_table_TRF
56 | tail -n1 $accuracy_TRF >> $acc_table_TRF
57 | done
58 | done
59 | done
60 | done
61 | done
62 |
63 | exit 0
64 |
--------------------------------------------------------------------------------
/Kawahara_nsop_Z.cpp:
--------------------------------------------------------------------------------
1 | // Riki Kawahara's algorithm that lists all non-self-overlapping substrings for a given string S in O(n^2)-time
2 |
3 | //#define DUMP_Kawahara_nsop_Z
4 |
5 | #ifndef DUMP_Kawahara_nsop_Z
6 | #include "uTR.h"
7 | #endif
8 |
9 | #include
10 | #include
11 | #include
12 | using namespace std;
13 |
14 | void dump_int_array(int *a, int len, string name){
15 | cout << name << "\t";
16 | for(int i=0; i
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "uTR.h"
9 |
10 | void put_qualified_read(Read *currentRead, int i){
11 | int numReads, diameter, radius;
12 | // The annotation starts with a space " " !!!
13 | sscanf(currentRead->ID, " GroupSize = %d, Diameter = %d, RadiusFromCentroid = %d", &numReads, &diameter, &radius);
14 | Qreads[i].numReads = numReads;
15 | Qreads[i].len = currentRead->len;
16 | Qreads[i].numKeyUnits = currentRead->numKeyUnits;
17 | Qreads[i].mosaic_mode = currentRead->mosaic_mode;
18 | }
19 |
20 | int main(int argc, char *argv[])
21 | {
22 | char inputFile[500]; // For the input file name
23 | char repUnit[1000]; // For storing a representative unit
24 | char outputFile[500]; // For the output file name
25 | int inputFile_given = 0;
26 | int repUnit_given = 0;
27 | int print_time = 0;
28 | int print_EDDC = 0;
29 | int opt;
30 | while ((opt = getopt(argc, argv, "f:u:o:t")) != -1) {
31 | switch(opt){
32 | case 'f':
33 | strcpy(inputFile,optarg); inputFile_given = 1; break;
34 | case 'u':
35 | strcpy(repUnit, optarg); repUnit_given= 1; break;
36 | case 'o':
37 | strcpy(outputFile, optarg); print_EDDC = 1; break;
38 | case 't':
39 | print_time = 1; break;
40 | default:
41 | fprintf(stderr, "Usage: uTR -f (-u ) -o