├── .gitignore ├── 01_dna ├── .gitignore ├── Makefile ├── README.md ├── solution1_iter.py ├── solution2_unit.py ├── solution3_str_count.py ├── solution4_dict.py ├── solution5_dict.py ├── solution6_defaultdict.py ├── solution7_counter.py └── tests │ ├── dna_test.py │ └── inputs │ ├── input1.txt │ ├── input2.txt │ └── input3.txt ├── 02_rna ├── .gitignore ├── Makefile ├── README.md ├── all_test.sh ├── genseq.py ├── solution1_str_replace.py ├── solution2_re_sub.py └── tests │ ├── inputs │ ├── input1.txt │ ├── input2.txt │ └── input3.txt │ └── rna_test.py ├── 03_revc ├── .gitignore ├── Makefile ├── README.md ├── sample.dna ├── solution1_for_loop.py ├── solution2_dict_lookup_list.py ├── solution2_dict_lookup_string.py ├── solution3_list_comprehension.py ├── solution4_str_translate.py ├── solution5_bio_seq.py └── tests │ ├── inputs │ ├── input1.txt │ ├── input2.txt │ ├── output1.txt │ └── output2.txt │ └── revc_test.py ├── 04_fib ├── .gitignore ├── Makefile ├── README.md ├── solution1_list_closure.py ├── solution1_list_function.py ├── solution2_generator.py ├── solution2_generator_for_loop.py ├── solution2_generator_islice.py ├── solution3_recursion.py ├── solution3_recursion_lru_cache.py ├── solution3_recursion_memoize.py ├── solution3_recursion_memoize_decorator.py └── tests │ └── fib_test.py ├── 05_gc ├── .gitignore ├── Makefile ├── README.md ├── bench.sh ├── gc.orig ├── genseq.py ├── solution1_list.py ├── solution2_unit.py ├── solution3_max_var.py ├── solution4_list_comp.py ├── solution5_filter.py ├── solution6_map.py ├── solution7_re.py ├── solution8_list_comp_map.py └── tests │ ├── cgc_test.py │ └── inputs │ ├── 1.fa │ └── 2.fa ├── 06_hamm ├── .gitignore ├── Makefile ├── README.md ├── solution1_abs_iterate.py ├── solution2_unit_test.py ├── solution3_zip.py ├── solution4_zip_longest.py ├── solution5_list_comp.py ├── solution6_filter.py ├── solution7_map.py ├── solution8_operator_starmap.py └── tests │ ├── hamm_test.py │ └── inputs │ ├── 1.txt │ └── 2.txt ├── 07_prot ├── .gitignore ├── Makefile ├── README.md ├── bench.sh ├── solution1_for.py ├── solution2_unit.py ├── solution3_list_comp_slice.py ├── solution4_map_takewhile.py ├── solution5_bio_seq.py └── tests │ ├── inputs │ ├── input1.txt │ └── input1.txt.out │ └── prot_test.py ├── 08_subs ├── .gitignore ├── Makefile ├── README.md ├── bench.sh ├── solution1_str_find.py ├── solution2_str_index.py ├── solution3_functional.py ├── solution4_kmers_functional.py ├── solution4_kmers_imperative.py ├── solution5_re.py └── tests │ ├── inputs │ ├── input1.txt │ └── input1.txt.out │ └── subs_test.py ├── 09_grph ├── .gitignore ├── Makefile ├── README.md ├── log.py ├── rosalind_grph.txt ├── solution1.py ├── solution2_graph.py └── tests │ ├── grph_test.py │ └── inputs │ ├── 1.fa │ ├── 1.fa.3.out │ ├── 1.fa.4.out │ ├── 1.fa.5.out │ ├── 2.fa │ ├── 2.fa.3.out │ ├── 2.fa.4.out │ ├── 2.fa.5.out │ ├── 3.fa │ ├── 3.fa.3.out │ ├── 3.fa.4.out │ └── 3.fa.5.out ├── 10_lcsm ├── .gitignore ├── Makefile ├── README.md ├── binsearch.py ├── genseq.py ├── scan_fh.py ├── scan_mem.py ├── solution1_kmers_functional.py ├── solution1_kmers_imperative.py ├── solution2_binary_search.py └── tests │ ├── inputs │ ├── 1.fa │ ├── 2.fa │ ├── empty.fa │ └── none.fa │ └── lcsm_test.py ├── 11_mprt ├── .gitignore ├── Makefile ├── README.md ├── fetch_fasta.sh ├── solution1_regex.py ├── solution2_manual.py └── tests │ ├── inputs │ ├── 1.txt │ ├── 1.txt.out │ ├── 2.txt │ └── 2.txt.out │ └── mprt_test.py ├── 12_mrna ├── .gitignore ├── Makefile ├── README.md ├── show_patterns.py ├── solution1_dict.py ├── solution2_rev_dict.py ├── solution3_slim_dict.py └── tests │ ├── inputs │ ├── 1.txt │ ├── 2.txt │ └── 3.txt │ └── mrna_test.py ├── 13_revp ├── .gitignore ├── Makefile ├── README.md ├── common.py ├── solution1_zip_enumerate.py ├── solution2_operator_eq_if.py ├── solution2_operator_eq_lc.py ├── solution3_revp.py └── tests │ ├── inputs │ ├── 1.fa │ ├── 1.fa.out │ ├── 2.fa │ ├── 2.fa.out │ ├── empty.fa │ └── empty.fa.out │ └── revp_test.py ├── 14_orf ├── .gitignore ├── Makefile ├── README.md ├── solution1_iterate_set.py ├── solution2_str_partition.py ├── solution3_regex.py └── tests │ ├── inputs │ ├── 1.fa │ ├── 1.fa.out │ ├── 2.fa │ ├── 2.fa.out │ ├── 3.fa │ ├── 3.fa.out │ └── empty.fa │ └── orf_test.py ├── 15_seqmagique ├── .gitignore ├── Makefile ├── README.md ├── mk-outs.sh ├── seqmagique_rich.py ├── solution1.py ├── tests │ ├── inputs │ │ ├── 1.fa │ │ ├── 1.fa.grid.out │ │ ├── 1.fa.latex.out │ │ ├── 1.fa.latex_booktabs.out │ │ ├── 1.fa.latex_raw.out │ │ ├── 1.fa.mediawiki.out │ │ ├── 1.fa.orgtbl.out │ │ ├── 1.fa.out │ │ ├── 1.fa.pipe.out │ │ ├── 1.fa.plain.out │ │ ├── 1.fa.rst.out │ │ ├── 1.fa.simple.out │ │ ├── 2.fa │ │ ├── 2.fa.grid.out │ │ ├── 2.fa.latex.out │ │ ├── 2.fa.latex_booktabs.out │ │ ├── 2.fa.latex_raw.out │ │ ├── 2.fa.mediawiki.out │ │ ├── 2.fa.orgtbl.out │ │ ├── 2.fa.out │ │ ├── 2.fa.pipe.out │ │ ├── 2.fa.plain.out │ │ ├── 2.fa.rst.out │ │ ├── 2.fa.simple.out │ │ ├── all.fa.out │ │ ├── checksums.md5 │ │ ├── empty.fa │ │ └── empty.fa.out │ └── seqmagique_test.py └── unit.py ├── 16_fastx_grep ├── .gitignore ├── .out ├── Makefile ├── README.md ├── asciitbl.py ├── mk-outs.sh ├── solution.py └── tests │ ├── fastx_grep_test.py │ └── inputs │ ├── empty.fa │ ├── empty.fa.out │ ├── lsu.fa │ ├── lsu.fq │ ├── lsu.fq.2fa.out │ ├── lsu.fq.fa.out │ ├── lsu.fq.i.lower.out │ ├── lsu.fq.i.upper.out │ ├── lsu.fq.lower.out │ ├── lsu.fq.upper.out │ └── lsu.fx ├── 17_synth ├── .gitignore ├── Makefile ├── README.md ├── kmer_tiler.py ├── mk-outs.sh ├── solution.py └── tests │ ├── inputs │ ├── CAM_SMPL_GS108.fa │ ├── CAM_SMPL_GS108.fa.default.out │ ├── CAM_SMPL_GS108.fa.n1.k4.out │ ├── CAM_SMPL_GS108.fa.n1.k5.out │ ├── CAM_SMPL_GS108.fa.n1.m20.x40.out │ ├── CAM_SMPL_GS108.fa.n1.out │ ├── CAM_SMPL_GS112.fa │ ├── lsu.fq │ ├── lsu.fq.n1.out │ └── mult.n10.out │ ├── synth_test.py │ └── unit_test.py ├── 18_fastx_sampler ├── .gitignore ├── Makefile ├── README.md ├── requirements.txt ├── sampler_dir_reader.py ├── sampler_gzip_reader.py ├── solution.py └── tests │ ├── inputs │ └── .gitkeep │ └── sampler_test.py ├── 19_blastomatic ├── .gitignore ├── Makefile ├── README.md ├── solution1_manual.py ├── solution2_dict_writer.py ├── solution3_pandas.py ├── solution4_pandas_join.py └── tests │ ├── blastomatic_test.py │ ├── inputs │ ├── gos.fa │ ├── hits1.csv │ ├── hits2.csv │ └── meta.csv │ └── unit_test.py ├── LICENSE ├── Makefile ├── README.md ├── SETUP.md ├── app01_makefiles ├── .gitignore ├── c-hello │ ├── .gitignore │ ├── Makefile │ └── hello.c ├── hello │ └── Makefile ├── pie │ ├── .gitignore │ ├── Makefile │ ├── combine.sh │ └── cook.sh └── yeast │ ├── .gitignore │ ├── Makefile │ ├── download.sh │ ├── palinsreg.txt │ └── test.py ├── bin ├── all_test.py └── uber_test.sh ├── docker ├── Dockerfile310 ├── Dockerfile38 ├── Dockerfile390 ├── Dockerfile391 ├── Makefile └── README.md ├── mypy.ini ├── pylintrc └── requirements.txt /01_dna/.gitignore: -------------------------------------------------------------------------------- 1 | dna.py 2 | -------------------------------------------------------------------------------- /01_dna/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --flake8 --pylint --pylint-rcfile=../pylintrc --mypy dna.py tests/dna_test.py 5 | 6 | all: 7 | ../bin/all_test.py dna.py 8 | -------------------------------------------------------------------------------- /01_dna/README.md: -------------------------------------------------------------------------------- 1 | # Counting tetranucleotide frequency 2 | 3 | http://rosalind.info/problems/dna/ 4 | 5 | Create a program called `dna.py` that will accept a sequence of DNA as a single positional argument. 6 | The program should print a "usage" statement for `-h` or `--help` flags: 7 | 8 | ``` 9 | $ ./dna.py -h 10 | usage: dna.py [-h] DNA 11 | 12 | Tetranucleotide frequency 13 | 14 | positional arguments: 15 | DNA Input DNA sequence 16 | 17 | optional arguments: 18 | -h, --help show this help message and exit 19 | ``` 20 | 21 | The program should print the frequencies of the bases A, C, G, and T: 22 | 23 | ``` 24 | $ ./dna.py AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC 25 | 20 12 17 21 26 | ``` 27 | 28 | The `make test` target will run the complete test suite: 29 | 30 | ``` 31 | $ make test 32 | python3 -m pytest -xv --flake8 --pylint --mypy dna.py tests/dna_test.py 33 | ============================ test session starts ============================ 34 | ... 35 | 36 | dna.py::FLAKE8 PASSED [ 11%] 37 | dna.py::mypy PASSED [ 22%] 38 | tests/dna_test.py::FLAKE8 SKIPPED [ 33%] 39 | tests/dna_test.py::mypy PASSED [ 44%] 40 | tests/dna_test.py::test_exists PASSED [ 55%] 41 | tests/dna_test.py::test_usage PASSED [ 66%] 42 | tests/dna_test.py::test_arg PASSED [ 77%] 43 | tests/dna_test.py::test_file PASSED [ 88%] 44 | ::mypy PASSED [100%] 45 | =================================== mypy ==================================== 46 | 47 | Success: no issues found in 2 source files 48 | ======================= 8 passed, 1 skipped in 0.87s ======================== 49 | ``` 50 | 51 | ## Author 52 | 53 | Ken Youens-Clark 54 | -------------------------------------------------------------------------------- /01_dna/solution1_iter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tetranucleotide frequency """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Tetranucleotide frequency', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input DNA sequence') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read().rstrip() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | count_a, count_c, count_g, count_t = 0, 0, 0, 0 38 | 39 | for base in args.dna: 40 | if base == 'A': 41 | count_a += 1 42 | elif base == 'C': 43 | count_c += 1 44 | elif base == 'G': 45 | count_g += 1 46 | elif base == 'T': 47 | count_t += 1 48 | 49 | print(count_a, count_c, count_g, count_t) 50 | 51 | 52 | # -------------------------------------------------- 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /01_dna/solution3_str_count.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tetranucleotide frequency """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple, Tuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Tetranucleotide frequency', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input DNA sequence') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | count_a, count_c, count_g, count_t = count(args.dna) 38 | print(f'{count_a} {count_c} {count_g} {count_t}') 39 | 40 | 41 | # -------------------------------------------------- 42 | def count(dna: str) -> Tuple[int, int, int, int]: 43 | """ Count bases in DNA """ 44 | 45 | return (dna.count('A'), dna.count('C'), dna.count('G'), dna.count('T')) 46 | 47 | 48 | # -------------------------------------------------- 49 | def test_count() -> None: 50 | """ Test count """ 51 | 52 | assert count('') == (0, 0, 0, 0) 53 | assert count('123XYZ') == (0, 0, 0, 0) 54 | assert count('A') == (1, 0, 0, 0) 55 | assert count('C') == (0, 1, 0, 0) 56 | assert count('G') == (0, 0, 1, 0) 57 | assert count('T') == (0, 0, 0, 1) 58 | assert count('ACCGGGTTTT') == (1, 2, 3, 4) 59 | 60 | 61 | # -------------------------------------------------- 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /01_dna/solution4_dict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tetranucleotide frequency """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple, Tuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Tetranucleotide frequency', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input DNA sequence') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | count_a, count_c, count_g, count_t = count(args.dna) 38 | print(f'{count_a} {count_c} {count_g} {count_t}') 39 | 40 | 41 | # -------------------------------------------------- 42 | def count(dna: str) -> Tuple[int, int, int, int]: 43 | """ Count bases in DNA """ 44 | 45 | counts = {} 46 | for base in dna: 47 | if base not in counts: 48 | counts[base] = 0 49 | counts[base] += 1 50 | 51 | return (counts.get('A', 0), 52 | counts.get('C', 0), 53 | counts.get('G', 0), 54 | counts.get('T', 0)) 55 | 56 | 57 | # -------------------------------------------------- 58 | def test_count() -> None: 59 | """ Test count """ 60 | 61 | assert count('') == (0, 0, 0, 0) 62 | assert count('123XYZ') == (0, 0, 0, 0) 63 | assert count('A') == (1, 0, 0, 0) 64 | assert count('C') == (0, 1, 0, 0) 65 | assert count('G') == (0, 0, 1, 0) 66 | assert count('T') == (0, 0, 0, 1) 67 | assert count('ACCGGGTTTT') == (1, 2, 3, 4) 68 | 69 | 70 | # -------------------------------------------------- 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /01_dna/solution5_dict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tetranucleotide frequency """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple, Dict 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Tetranucleotide frequency', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input DNA sequence') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | counts = count(args.dna) 38 | print('{} {} {} {}'.format(counts['A'], counts['C'], counts['G'], 39 | counts['T'])) 40 | 41 | 42 | # -------------------------------------------------- 43 | def count(dna: str) -> Dict[str, int]: 44 | """ Count bases in DNA """ 45 | 46 | counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0} 47 | for base in dna: 48 | if base in counts: 49 | counts[base] += 1 50 | 51 | return counts 52 | 53 | 54 | # -------------------------------------------------- 55 | def test_count() -> None: 56 | """ Test count """ 57 | 58 | assert count('') == {'A': 0, 'C': 0, 'G': 0, 'T': 0} 59 | assert count('123XYZ') == {'A': 0, 'C': 0, 'G': 0, 'T': 0} 60 | assert count('A') == {'A': 1, 'C': 0, 'G': 0, 'T': 0} 61 | assert count('C') == {'A': 0, 'C': 1, 'G': 0, 'T': 0} 62 | assert count('G') == {'A': 0, 'C': 0, 'G': 1, 'T': 0} 63 | assert count('T') == {'A': 0, 'C': 0, 'G': 0, 'T': 1} 64 | assert count('ACCGGGTTTT') == {'A': 1, 'C': 2, 'G': 3, 'T': 4} 65 | 66 | 67 | # -------------------------------------------------- 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /01_dna/solution6_defaultdict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tetranucleotide frequency """ 3 | 4 | import argparse 5 | import os 6 | from collections import defaultdict 7 | from typing import NamedTuple, Dict 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | dna: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Tetranucleotide frequency', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('dna', metavar='DNA', help='Input DNA sequence') 24 | 25 | args = parser.parse_args() 26 | 27 | if os.path.isfile(args.dna): 28 | args.dna = open(args.dna).read() 29 | 30 | return Args(args.dna) 31 | 32 | 33 | # -------------------------------------------------- 34 | def main() -> None: 35 | """ Make a jazz noise here """ 36 | 37 | args = get_args() 38 | counts = count(args.dna) 39 | print(counts.get('A', 0), counts.get('C', 0), counts.get('G', 0), 40 | counts.get('T', 0)) 41 | 42 | 43 | # -------------------------------------------------- 44 | def count(dna: str) -> Dict[str, int]: 45 | """ Count bases in DNA """ 46 | 47 | counts: Dict[str, int] = defaultdict(int) 48 | 49 | for base in dna: 50 | counts[base] += 1 51 | 52 | return counts 53 | 54 | 55 | # -------------------------------------------------- 56 | def test_count() -> None: 57 | """ Test count """ 58 | 59 | assert count('') == {} 60 | assert count('123XYZ') == {'1': 1, '2': 1, '3': 1, 'X': 1, 'Y': 1, 'Z': 1} 61 | assert count('A') == {'A': 1} 62 | assert count('C') == {'C': 1} 63 | assert count('G') == {'G': 1} 64 | assert count('T') == {'T': 1} 65 | assert count('ACCGGGTTTT') == {'A': 1, 'C': 2, 'G': 3, 'T': 4} 66 | 67 | 68 | # -------------------------------------------------- 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /01_dna/solution7_counter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tetranucleotide frequency """ 3 | 4 | import argparse 5 | import os 6 | from collections import Counter 7 | from typing import NamedTuple 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | dna: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Tetranucleotide frequency', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('dna', metavar='DNA', help='Input DNA sequence') 24 | 25 | args = parser.parse_args() 26 | 27 | if os.path.isfile(args.dna): 28 | args.dna = open(args.dna).read() 29 | 30 | return Args(args.dna) 31 | 32 | 33 | # -------------------------------------------------- 34 | def main() -> None: 35 | """ Make a jazz noise here """ 36 | 37 | args = get_args() 38 | counts = Counter(args.dna) 39 | print(counts.get('A', 0), counts.get('C', 0), counts.get('G', 0), 40 | counts.get('T', 0)) 41 | 42 | 43 | # -------------------------------------------------- 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /01_dna/tests/dna_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for dna.py """ 2 | 3 | import os 4 | import platform 5 | from subprocess import getstatusoutput 6 | 7 | PRG = './dna.py' 8 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 9 | TEST1 = ('./tests/inputs/input1.txt', '1 2 3 4') 10 | TEST2 = ('./tests/inputs/input2.txt', '20 12 17 21') 11 | TEST3 = ('./tests/inputs/input3.txt', '196 231 237 246') 12 | 13 | 14 | # -------------------------------------------------- 15 | def test_exists() -> None: 16 | """ Program exists """ 17 | 18 | assert os.path.exists(PRG) 19 | 20 | 21 | # -------------------------------------------------- 22 | def test_usage() -> None: 23 | """ Prints usage """ 24 | 25 | for arg in ['-h', '--help']: 26 | rv, out = getstatusoutput(f'{RUN} {arg}') 27 | assert rv == 0 28 | assert out.lower().startswith('usage:') 29 | 30 | 31 | # -------------------------------------------------- 32 | def test_dies_no_args() -> None: 33 | """ Dies with no arguments """ 34 | 35 | rv, out = getstatusoutput(RUN) 36 | assert rv != 0 37 | assert out.lower().startswith('usage:') 38 | 39 | 40 | # -------------------------------------------------- 41 | def test_arg() -> None: 42 | """ Uses command-line arg """ 43 | 44 | for file, expected in [TEST1, TEST2, TEST3]: 45 | dna = open(file).read() 46 | retval, out = getstatusoutput(f'{RUN} {dna}') 47 | assert retval == 0 48 | assert out == expected 49 | 50 | 51 | # -------------------------------------------------- 52 | def test_file() -> None: 53 | """ Uses file arg """ 54 | 55 | for file, expected in [TEST1, TEST2, TEST3]: 56 | retval, out = getstatusoutput(f'{RUN} {file}') 57 | assert retval == 0 58 | assert out == expected 59 | -------------------------------------------------------------------------------- /01_dna/tests/inputs/input1.txt: -------------------------------------------------------------------------------- 1 | ACCGGGTTTT 2 | -------------------------------------------------------------------------------- /01_dna/tests/inputs/input2.txt: -------------------------------------------------------------------------------- 1 | AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC 2 | -------------------------------------------------------------------------------- /01_dna/tests/inputs/input3.txt: -------------------------------------------------------------------------------- 1 | TTGGATAACATTTGCCCCGTGTTAGTGTTCGAAATCAGCAGGGATAAGCACAAGTCCATGGGGTCCCTCTAGGGCGTTGTTGAGCGGAGAATGAAACACATGTTTGGCGAGGTTACTGTCGTCCACGAACATGTACTTACGTCTCTAACCCGTAGGCTCCCCACCAAAAACCACGGGCGCGGTCATACCATGCTCGCGGTTAGCCTGAAGGCGGTGTTCGCGTCCCTCCTCCTCCTGGCTAGTTATCGACCGCATGGCAGAGTTTGCGGAGCTTCGAGTCCACCCACCAGTTGTGCGAGTTGGCCGCTGTACCTTCCGGTGAATCGACTCCGATAAAGAAACTCTTGCGCTGGATTCTACTCGGCGCCCAAGTATGCGTTCGTTCCAAGCGAGGCTGTAGTAGCTAAATGGCAAAATAGCGACGATAGACCTCCAAGAGATATGCCGTACCTAGTTACTTGAATTATCGGCACGATCGCTTATTTGGATTACTTCGCTCGCGAATGAGAGCCGCCGGAGGATGTGTGCAAAGCTGAGTAGGAAGTTTGGTTACTCGCTCGTGTCCGTTTCACCGGCACTCGGAGATTATAGAATGAGGTGGATACGGTGTATTGAAAATTCAGTCAGATGACGTCCGCTTGTACTGTAGCTAATTGCGTCGACGCCGGGCATATTGGGCACTAATGCTCCCCGTCTCTCTATCGTATCGTCGATCGAATCTATTTTGTTTCATCCATCTTCTGAAATGCATCGGTCCACCTTCTTTTAAAGGCGTGATTCAAATTCTATGCTCCATATACATCTGGTCCGGCGCTCGCCGTCTCCGGAGGGCTCGGTCTTTGTGCAGCTTATTAGACTGTCCGGGAAGGCAGAACTGGAATCCCTTGGATCCCTTGTTCAGGTGCTTAGC 2 | -------------------------------------------------------------------------------- /02_rna/.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | seq.txt 3 | rna.py 4 | -------------------------------------------------------------------------------- /02_rna/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --flake8 --pylint --pylint-rcfile=../pylintrc --mypy rna.py tests/rna_test.py 5 | 6 | all: 7 | ../bin/all_test.py rna.py 8 | -------------------------------------------------------------------------------- /02_rna/all_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu -o pipefail 4 | 5 | PRG="rna.py" 6 | for FILE in solution*.py; do 7 | echo "==> ${FILE} <==" 8 | cp "$FILE" "$PRG" 9 | make test 10 | done 11 | 12 | echo "Done." 13 | -------------------------------------------------------------------------------- /02_rna/genseq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Generate long sequence """ 3 | 4 | import argparse 5 | import random 6 | from typing import NamedTuple, TextIO 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq_len: int 12 | num_seqs: int 13 | out_file: TextIO 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Generate long sequence', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('-l', 25 | '--len', 26 | help='Sequence length', 27 | metavar='int', 28 | type=int, 29 | default=1000000) 30 | 31 | parser.add_argument('-n', 32 | '--num', 33 | help='Number of sequences', 34 | metavar='int', 35 | type=int, 36 | default=100) 37 | 38 | parser.add_argument('-o', 39 | '--outfile', 40 | help='Output file', 41 | metavar='FILE', 42 | type=argparse.FileType('wt'), 43 | default='seq.txt') 44 | 45 | args = parser.parse_args() 46 | 47 | return Args(args.len, args.num, args.outfile) 48 | 49 | 50 | # -------------------------------------------------- 51 | def main() -> None: 52 | """ Make a jazz noise here """ 53 | 54 | args = get_args() 55 | for _ in range(args.num_seqs): 56 | print(''.join([random.choice('ACGT') for _ in range(args.seq_len)]), 57 | file=args.out_file) 58 | print(f'Done, see "{args.out_file.name}".') 59 | 60 | 61 | # -------------------------------------------------- 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /02_rna/solution1_str_replace.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Transcribe DNA into RNA """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple, List, TextIO 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | files: List[TextIO] 12 | out_dir: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Transcribe DNA into RNA', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('file', 24 | help='Input DNA file(s)', 25 | metavar='FILE', 26 | type=argparse.FileType('rt'), 27 | nargs='+') 28 | 29 | parser.add_argument('-o', 30 | '--out_dir', 31 | help='Output directory', 32 | metavar='DIR', 33 | type=str, 34 | default='out') 35 | 36 | args = parser.parse_args() 37 | 38 | return Args(args.file, args.out_dir) 39 | 40 | 41 | # -------------------------------------------------- 42 | def main() -> None: 43 | """ Make a jazz noise here """ 44 | 45 | args = get_args() 46 | 47 | if not os.path.isdir(args.out_dir): 48 | os.makedirs(args.out_dir) 49 | 50 | num_files, num_seqs = 0, 0 51 | for fh in args.files: 52 | num_files += 1 53 | out_file = os.path.join(args.out_dir, os.path.basename(fh.name)) 54 | out_fh = open(out_file, 'wt') 55 | 56 | for dna in fh: 57 | num_seqs += 1 58 | out_fh.write(dna.replace('T', 'U')) 59 | 60 | out_fh.close() 61 | 62 | print(f'Done, wrote {num_seqs} sequence{"" if num_seqs == 1 else "s"} ' 63 | f'in {num_files} file{"" if num_files == 1 else "s"} ' 64 | f'to directory "{args.out_dir}".') 65 | 66 | 67 | # -------------------------------------------------- 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /02_rna/solution2_re_sub.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Transcribe DNA into RNA """ 3 | 4 | import argparse 5 | import os 6 | import re 7 | from typing import NamedTuple, List, TextIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | files: List[TextIO] 13 | out_dir: str 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Transcribe DNA into RNA', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('file', 25 | help='Input DNA file(s)', 26 | metavar='FILE', 27 | type=argparse.FileType('rt'), 28 | nargs='+') 29 | 30 | parser.add_argument('-o', 31 | '--out_dir', 32 | help='Output directory', 33 | metavar='DIR', 34 | type=str, 35 | default='out') 36 | 37 | args = parser.parse_args() 38 | 39 | return Args(files=args.file, out_dir=args.out_dir) 40 | 41 | 42 | # -------------------------------------------------- 43 | def main() -> None: 44 | """ Make a jazz noise here """ 45 | 46 | args = get_args() 47 | 48 | if not os.path.isdir(args.out_dir): 49 | os.makedirs(args.out_dir) 50 | 51 | num_files, num_seqs = 0, 0 52 | for fh in args.files: 53 | num_files += 1 54 | out_file = os.path.join(args.out_dir, os.path.basename(fh.name)) 55 | out_fh = open(out_file, 'wt') 56 | 57 | for dna in fh: 58 | num_seqs += 1 59 | print(re.sub('T', 'U', dna.rstrip()), file=out_fh) 60 | 61 | out_fh.close() 62 | 63 | print(f'Done, wrote {num_seqs} sequence{"" if num_seqs == 1 else "s"} ' 64 | f'in {num_files} file{"" if num_files == 1 else "s"} ' 65 | f'to directory "{args.out_dir}".') 66 | 67 | 68 | # -------------------------------------------------- 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /02_rna/tests/inputs/input1.txt: -------------------------------------------------------------------------------- 1 | GATGGAACTTGACTACGTAAATT 2 | -------------------------------------------------------------------------------- /02_rna/tests/inputs/input2.txt: -------------------------------------------------------------------------------- 1 | TTAGCCCAGACTAGGACTTT 2 | AACTAGTCAAAGTACACC 3 | -------------------------------------------------------------------------------- /02_rna/tests/inputs/input3.txt: -------------------------------------------------------------------------------- 1 | CTTAGGTCAGTGGTCTCTAAACTTTCGGTTCTGTCGTCTTCATAGGCAAATTTTTGAACCGGCAGACAAGCTAATCCCTGTGCGGTTAGCTCAAGCAACAGAATGTCCGATCTTTGAACTTCCTAACGAACCGAACCTACTATAATTACATACGAATAATGTATGGGCTAGCGTTGGCTCATCATCAAGTCTGCGGTGAAATGGGAACATATTCGCATTGCATATAGGGCGTATCTGACGATCGATTCGAGTTGGCTAGTCGTACCAAATGATTATGGGCTGGAGGGCCAATGTATACGTCAGCCAGGCTAAACCACTGGACCGCTTGCAATCCATAGGAAGTAAAATTACCCTTTTTAAACTCTCTAAGATGTGGCGTCTCGTTCTTAAGGAGTAATGAGACTGTGACAACATTGGCAAGCACAGCCTCAGTATAGCTACAGCACCGGTGCTAATAGTAAATGCAAACACCGTTTCAAGAGCCGAGCCTTTTTTTAATGCAAGGTGACTTCAGAGGGAGTAAATCGTGGCCGGGGACTGTCCAGAGCAATGCATTCCCGAGTGCGGGTACCCGTGGTGTGAGAGGAATCGATTTCGCGTGTGATACCATTAATGGTCCTGTACTACTGTCAGTCAGCTTGATTTGAAGTCGGCCGACAAGGTTGGTACATAATGGGCTTACTGGGAGCTTAGGTTAGCCTCTGGAAAACTTTAGAATTTATATGGGTGTTTCTGTGTTCGTACAGGCCCCAGTCGGGCCATCGTTGTTGAGCATAGACCGGTGTAACCTTAATTATTCACAGGCCAATCCCCGTATACGCATCTGAAAGGCACACCGCCTATTACCAATTTGCGCTTCCTTACATAGGAGGACCTGTTATCGTCTTCTCAATCGCTGAGTTACCTTAAAACTAGGATC 2 | ACCGAGTAAAAGGCGACGGTTCGTTTCCGAACCTATTTGCTCTTATTTCTACGGGCTGCTAGTGTTGTAGGCTGCAAAACCTACGTAGTCCCATCTATCATGCTCGACCCTACGAGGCTAATGTCTTGTCAGAGGCCCGTCATGTGCCACGTACATACACCAATGTATACCGCTCTAGCGGTTTGGTGTAGTAGGACTTGTGTATGCACGCTACAGCGAACAACGTTGATCCCTAACTGAAGTCGGGCTCCGCAGGCCTACTCACGCCGTTTCTATAGGTTGAGCCGCATCAAACATTGGGTTGAGTCTCGAGTATAGAGGAAGGCTCTGGTGGCAGGCGCGACGTTGATCGGGAGGAGTATGGATGGTGATCAATCCCCGTGCCAATCGCGAGTACTACAGGAGGAGGGGGCGGCTCTGTTCAATCATCACCCGTTCCATCACACGGGCAGCACAGTTGACCTCCCGAGCCGTCTCACGGACCTAGTGGCAACAGGTGTATTGAAGCGCCGGGAATAGTCATACCCGTGGGCTTGATTGAGAGACCGAAATTCCGACCGCCAAAACTGCTGATATCGTACGCCTTACTACAAAACAAATGACGTCACTACCGGCCAGGGACAAGCTTATTAATTAAGTAGGAACCCTATACCTTGCACATCCTAAATCTAGCAGCGGGTCCAGGATTGGTTCCAGTCCAACGCGCGATGCGCGTCAAGCTAGGCGAATGACCACGGTCGAAACACCACTTATGTGACCCACCTTGGCCAACTCTCCCGATTCTCCTCGCTACTATCTTGAAGGTCACTGAGAATATCCCTTATGGGTCGCATACGGAGACAGCCGCAGGAGCCTTAACGGAGAATACGCCAATACTATGTTCTGGGTCGGTGGGTGTAATGCGATGCAATCCGATCGTGCGAACGTTCCCTTTGATGACTATAGGGTCTAGTGATCGTACATGTGC 3 | -------------------------------------------------------------------------------- /03_revc/.gitignore: -------------------------------------------------------------------------------- 1 | revc.py 2 | -------------------------------------------------------------------------------- /03_revc/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --flake8 --pylint --pylint-rcfile=../pylintrc --mypy revc.py tests/revc_test.py 5 | 6 | all: 7 | ../bin/all_test.py revc.py 8 | -------------------------------------------------------------------------------- /03_revc/sample.dna: -------------------------------------------------------------------------------- 1 | AAAACCCGGT 2 | -------------------------------------------------------------------------------- /03_revc/solution1_for_loop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Reverse complement """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Print the reverse complement of DNA', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input sequence or file') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read().rstrip() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | revc = '' 38 | 39 | for base in reversed(args.dna): 40 | if base == 'A': 41 | revc += 'T' 42 | elif base == 'T': 43 | revc += 'A' 44 | elif base == 'G': 45 | revc += 'C' 46 | elif base == 'C': 47 | revc += 'G' 48 | elif base == 'a': 49 | revc += 't' 50 | elif base == 't': 51 | revc += 'a' 52 | elif base == 'g': 53 | revc += 'c' 54 | elif base == 'c': 55 | revc += 'g' 56 | else: 57 | revc += base 58 | 59 | print(revc) 60 | 61 | 62 | # -------------------------------------------------- 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /03_revc/solution2_dict_lookup_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Reverse complement """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Print the reverse complement of DNA', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input sequence or file') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read().rstrip() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | trans = { 38 | 'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 39 | 'a': 't', 'c': 'g', 'g': 'c', 't': 'a' 40 | } 41 | 42 | complement = [] 43 | for base in args.dna: 44 | # complement += trans.get(base, base) 45 | complement.append(trans.get(base, base)) 46 | 47 | print(''.join(reversed(complement))) 48 | 49 | 50 | # -------------------------------------------------- 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /03_revc/solution2_dict_lookup_string.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Reverse complement """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Print the reverse complement of DNA', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input sequence or file') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read().rstrip() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | trans = { 38 | 'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 39 | 'a': 't', 'c': 'g', 'g': 'c', 't': 'a' 40 | } 41 | 42 | complement = '' 43 | for base in args.dna: 44 | complement += trans.get(base, base) 45 | 46 | print(''.join(reversed(complement))) 47 | 48 | 49 | # -------------------------------------------------- 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /03_revc/solution3_list_comprehension.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Reverse complement """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Print the reverse complement of DNA', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input sequence or file') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read().rstrip() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | trans = { 38 | 'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 39 | 'a': 't', 'c': 'g', 'g': 'c', 't': 'a' 40 | } 41 | 42 | # complement = [trans.get(base, base) for base in args.dna] 43 | # print(''.join(reversed(complement))) 44 | 45 | print(''.join(reversed([trans.get(base, base) for base in args.dna]))) 46 | 47 | 48 | # -------------------------------------------------- 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /03_revc/solution4_str_translate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Reverse complement """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | dna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Print the reverse complement of DNA', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('dna', metavar='DNA', help='Input sequence or file') 23 | 24 | args = parser.parse_args() 25 | 26 | if os.path.isfile(args.dna): 27 | args.dna = open(args.dna).read().rstrip() 28 | 29 | return Args(args.dna) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | trans = str.maketrans({ 38 | 'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 39 | 'a': 't', 'c': 'g', 'g': 'c', 't': 'a' 40 | }) 41 | print(''.join(reversed(args.dna.translate(trans)))) 42 | 43 | # trans = str.maketrans('ACGTacgt', 'TGCAtgca') 44 | # print(''.join(reversed(args.dna.translate(trans)))) 45 | 46 | 47 | # -------------------------------------------------- 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /03_revc/solution5_bio_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Reverse complement """ 3 | 4 | import argparse 5 | import os 6 | from typing import NamedTuple 7 | from Bio import Seq 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | dna: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Print the reverse complement of DNA', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('dna', metavar='DNA', help='Input sequence or file') 24 | 25 | args = parser.parse_args() 26 | 27 | if os.path.isfile(args.dna): 28 | args.dna = open(args.dna).read().rstrip() 29 | 30 | return Args(args.dna) 31 | 32 | 33 | # -------------------------------------------------- 34 | def main() -> None: 35 | """ Make a jazz noise here """ 36 | 37 | args = get_args() 38 | print(Seq.reverse_complement(args.dna)) 39 | 40 | 41 | # -------------------------------------------------- 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /03_revc/tests/inputs/input1.txt: -------------------------------------------------------------------------------- 1 | AAAACCCGGT 2 | -------------------------------------------------------------------------------- /03_revc/tests/inputs/input2.txt: -------------------------------------------------------------------------------- 1 | TACATTACAGTCGCTGGGTAAGACGCCTTCGATAGGTCCCTCAATGCGGCGGACGGGGGTCAATCCCCTAGAACTTAGCCGTATTTGAGGGAATCAGCTGCTCTACGGTAGGTGTCTGTAGCCCTTCCGTTCTATAAAAGTTGACGAATGTTGCAGTGGACTCTTATTATACAGCGGCACCCTTGACACCGCCCGTGTAGAGCGTTCAGGCCCGGAGAGTACGGAGGATGTGGAGAACACTACTGGTTCGACCCCGTAATCTATGCACTTGCCCTATGTGAGGCCATGACATCCGAACCGCTAATTATCGACCGACCATTGGTCCACTGCTCCGCTCTGACTCTGGGGCATAGACTATGATGTTCACGGCGTGTGTACTGCTTTTTTAGGCACGGCCGGGCTGCTCTGTAAGAGGAAATCACGGTGCAACCAGTCAGTAGAGGGCTTTGCTAGCTACGGATAGGAGATAAAGGTCTTTCCATACACGGTGTGTATACAGCTGTTCCCCGTGTACAAAGGGCCAACAGCATGCTGGGCATTTGAGACGTCAGTGGCCCGAAGCTTAGAAATTCTAACAGTCTCCTCAAGAGCATGGAGCTTTCATCATCTCAAATTGGACAAGCTGTACCAGACTTATTCTGCTGTGATTCACCATCTGAGCTAAACCTGTTACCGACAGACAGTAAAAATAGAGGTTACTTATACTCCAAGCATAATCTTGCTAGGGTAGTTGGACTGTCAAGCATCAAATATCCAATTGCCGACGCCCTTCCGATAATAATGATACCAGCACGCGGAAGATATCCACAGATTCGATGTGTCGAGAATGGACCCCTTCGGGGACAAGCTGAGCCAGCCAGAAATTAAGCCGCAGTGGCTACGAACACGGTCTTTACACACAGCACGTGTTGGAGCATGGGGAGGTCTGGCGAACTCCCCTATGCTCGCTGTTTTTAGGCTGCTTAACACGATCCTAACAGAGTACGAGAGAGCTACAAC 2 | -------------------------------------------------------------------------------- /03_revc/tests/inputs/output1.txt: -------------------------------------------------------------------------------- 1 | ACCGGGTTTT 2 | -------------------------------------------------------------------------------- /03_revc/tests/inputs/output2.txt: -------------------------------------------------------------------------------- 1 | GTTGTAGCTCTCTCGTACTCTGTTAGGATCGTGTTAAGCAGCCTAAAAACAGCGAGCATAGGGGAGTTCGCCAGACCTCCCCATGCTCCAACACGTGCTGTGTGTAAAGACCGTGTTCGTAGCCACTGCGGCTTAATTTCTGGCTGGCTCAGCTTGTCCCCGAAGGGGTCCATTCTCGACACATCGAATCTGTGGATATCTTCCGCGTGCTGGTATCATTATTATCGGAAGGGCGTCGGCAATTGGATATTTGATGCTTGACAGTCCAACTACCCTAGCAAGATTATGCTTGGAGTATAAGTAACCTCTATTTTTACTGTCTGTCGGTAACAGGTTTAGCTCAGATGGTGAATCACAGCAGAATAAGTCTGGTACAGCTTGTCCAATTTGAGATGATGAAAGCTCCATGCTCTTGAGGAGACTGTTAGAATTTCTAAGCTTCGGGCCACTGACGTCTCAAATGCCCAGCATGCTGTTGGCCCTTTGTACACGGGGAACAGCTGTATACACACCGTGTATGGAAAGACCTTTATCTCCTATCCGTAGCTAGCAAAGCCCTCTACTGACTGGTTGCACCGTGATTTCCTCTTACAGAGCAGCCCGGCCGTGCCTAAAAAAGCAGTACACACGCCGTGAACATCATAGTCTATGCCCCAGAGTCAGAGCGGAGCAGTGGACCAATGGTCGGTCGATAATTAGCGGTTCGGATGTCATGGCCTCACATAGGGCAAGTGCATAGATTACGGGGTCGAACCAGTAGTGTTCTCCACATCCTCCGTACTCTCCGGGCCTGAACGCTCTACACGGGCGGTGTCAAGGGTGCCGCTGTATAATAAGAGTCCACTGCAACATTCGTCAACTTTTATAGAACGGAAGGGCTACAGACACCTACCGTAGAGCAGCTGATTCCCTCAAATACGGCTAAGTTCTAGGGGATTGACCCCCGTCCGCCGCATTGAGGGACCTATCGAAGGCGTCTTACCCAGCGACTGTAATGTA 2 | -------------------------------------------------------------------------------- /03_revc/tests/revc_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tests for revc.py """ 3 | 4 | from subprocess import getstatusoutput 5 | import platform 6 | import os 7 | import re 8 | 9 | PRG = './revc.py' 10 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 11 | TEST1 = ('./tests/inputs/input1.txt', './tests/inputs/output1.txt') 12 | TEST2 = ('./tests/inputs/input2.txt', './tests/inputs/output2.txt') 13 | 14 | 15 | # -------------------------------------------------- 16 | def test_exists() -> None: 17 | """ Program exists """ 18 | 19 | assert os.path.isfile(PRG) 20 | 21 | 22 | # -------------------------------------------------- 23 | def test_usage() -> None: 24 | """ Prints usage """ 25 | 26 | for arg in ['-h', '--help']: 27 | rv, out = getstatusoutput(f'{RUN} {arg}') 28 | assert rv == 0 29 | assert out.lower().startswith('usage:') 30 | 31 | 32 | # -------------------------------------------------- 33 | def test_no_args() -> None: 34 | """ Dies on no args """ 35 | 36 | rv, out = getstatusoutput(RUN) 37 | assert rv != 0 38 | assert re.match("usage", out, re.IGNORECASE) 39 | 40 | 41 | # -------------------------------------------------- 42 | def test_uppercase() -> None: 43 | """ Runs on uppercase input """ 44 | 45 | rv, out = getstatusoutput(f'{RUN} AAAACCCGGT') 46 | assert rv == 0 47 | assert out == 'ACCGGGTTTT' 48 | 49 | 50 | # -------------------------------------------------- 51 | def test_lowercase() -> None: 52 | """ Runs on lowercase input """ 53 | 54 | rv, out = getstatusoutput(f'{RUN} aaaaCCCGGT') 55 | assert rv == 0 56 | assert out == 'ACCGGGtttt' 57 | 58 | 59 | # -------------------------------------------------- 60 | def test_input1() -> None: 61 | """ Runs on file input """ 62 | 63 | file, expected = TEST1 64 | rv, out = getstatusoutput(f'{RUN} {file}') 65 | assert rv == 0 66 | assert out == open(expected).read().rstrip() 67 | 68 | 69 | # -------------------------------------------------- 70 | def test_input2() -> None: 71 | """ Runs on file input """ 72 | 73 | file, expected = TEST2 74 | rv, out = getstatusoutput(f'{RUN} {file}') 75 | assert rv == 0 76 | assert out == open(expected).read().rstrip() 77 | -------------------------------------------------------------------------------- /04_fib/.gitignore: -------------------------------------------------------------------------------- 1 | fib.py 2 | -------------------------------------------------------------------------------- /04_fib/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --flake8 --pylint --pylint-rcfile=../pylintrc --mypy fib.py tests/fib_test.py 5 | 6 | all: 7 | ../bin/all_test.py fib.py 8 | -------------------------------------------------------------------------------- /04_fib/solution1_list_closure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | 49 | def fib(n: int) -> int: 50 | nums = [0, 1] 51 | for _ in range(n - 1): 52 | nums.append((nums[-2] * args.litter) + nums[-1]) 53 | return nums[-1] 54 | 55 | print(fib(args.generations)) 56 | 57 | 58 | # -------------------------------------------------- 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /04_fib/solution1_list_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | 49 | print(fib(args.generations, args.litter)) 50 | 51 | 52 | # -------------------------------------------------- 53 | def fib(n: int, litter: int) -> int: 54 | """ Find Fibonnaci """ 55 | 56 | nums = [0, 1] 57 | for _ in range(n - 1): 58 | nums.append((nums[-2] * litter) + nums[-1]) 59 | 60 | return nums[-1] 61 | 62 | 63 | # -------------------------------------------------- 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /04_fib/solution2_generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple, Generator 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | gen = fib(args.litter) 49 | seq = [next(gen) for _ in range(args.generations + 1)] 50 | print(seq[-1]) 51 | 52 | 53 | # -------------------------------------------------- 54 | def fib(k: int) -> Generator[int, None, None]: 55 | """ Generator """ 56 | 57 | x, y = 0, 1 58 | yield x 59 | 60 | while True: 61 | yield y 62 | x, y = y * k, x + y 63 | 64 | 65 | # -------------------------------------------------- 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /04_fib/solution2_generator_for_loop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple, Generator 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | gen = fib(args.litter) 49 | answer = 0 50 | for _ in range(args.generations + 1): 51 | answer = next(gen) 52 | 53 | print(answer) 54 | 55 | 56 | # -------------------------------------------------- 57 | def fib(k: int) -> Generator[int, None, None]: 58 | """ Generator """ 59 | 60 | x, y = 0, 1 61 | yield x 62 | 63 | while True: 64 | yield y 65 | x, y = y * k, x + y 66 | 67 | 68 | # -------------------------------------------------- 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /04_fib/solution2_generator_islice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from itertools import islice 6 | from typing import NamedTuple, Generator 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | generations: int 12 | litter: int 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Calculate Fibonacci', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('gen', 24 | metavar='generations', 25 | type=int, 26 | help='Number of generations') 27 | 28 | parser.add_argument('litter', 29 | metavar='litter', 30 | type=int, 31 | help='Size of litter per generation') 32 | 33 | args = parser.parse_args() 34 | 35 | if not 1 <= args.gen <= 40: 36 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 37 | 38 | if not 1 <= args.litter <= 5: 39 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 40 | 41 | return Args(generations=args.gen, litter=args.litter) 42 | 43 | 44 | # -------------------------------------------------- 45 | def main() -> None: 46 | """ Make a jazz noise here """ 47 | 48 | args = get_args() 49 | # seq = list(islice(fib(args.litter), args.generations + 1)) 50 | # print(seq[-1]) 51 | 52 | print(list(islice(fib(args.litter), args.generations + 1))[-1]) 53 | 54 | 55 | # -------------------------------------------------- 56 | def fib(k: int) -> Generator[int, None, None]: 57 | """ Generator """ 58 | 59 | x, y = 0, 1 60 | yield x 61 | 62 | while True: 63 | yield y 64 | x, y = y * k, x + y 65 | 66 | 67 | # -------------------------------------------------- 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /04_fib/solution3_recursion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | 49 | def fib(n: int) -> int: 50 | return 1 if n in (1, 2) else fib(n - 2) * args.litter + fib(n - 1) 51 | 52 | print(fib(args.generations)) 53 | 54 | 55 | # -------------------------------------------------- 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /04_fib/solution3_recursion_lru_cache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from functools import lru_cache 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | generations: int 12 | litter: int 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Calculate Fibonacci', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('gen', 24 | metavar='generations', 25 | type=int, 26 | help='Number of generations') 27 | 28 | parser.add_argument('litter', 29 | metavar='litter', 30 | type=int, 31 | help='Size of litter per generation') 32 | 33 | args = parser.parse_args() 34 | 35 | if not 1 <= args.gen <= 40: 36 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 37 | 38 | if not 1 <= args.litter <= 5: 39 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 40 | 41 | return Args(generations=args.gen, litter=args.litter) 42 | 43 | 44 | # -------------------------------------------------- 45 | def main() -> None: 46 | """ Make a jazz noise here """ 47 | 48 | args = get_args() 49 | 50 | @lru_cache() 51 | def fib(n: int) -> int: 52 | return 1 if n in (1, 2) else fib(n - 2) * args.litter + fib(n - 1) 53 | 54 | print(fib(args.generations)) 55 | 56 | 57 | # -------------------------------------------------- 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /04_fib/solution3_recursion_memoize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple, Callable 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | 49 | def fib(n: int) -> int: 50 | return 1 if n in (1, 2) else fib(n - 2) * args.litter + fib(n - 1) 51 | 52 | fib = memoize(fib) 53 | 54 | print(fib(args.generations)) 55 | 56 | 57 | # -------------------------------------------------- 58 | def memoize(f: Callable) -> Callable: 59 | """ Memoize a function """ 60 | 61 | cache = {} 62 | 63 | def memo(x): 64 | if x not in cache: 65 | cache[x] = f(x) 66 | return cache[x] 67 | 68 | return memo 69 | 70 | 71 | # -------------------------------------------------- 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /04_fib/solution3_recursion_memoize_decorator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Calculate Fibonacci """ 3 | 4 | import argparse 5 | from typing import NamedTuple, Callable 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | generations: int 11 | litter: int 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Calculate Fibonacci', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('gen', 23 | metavar='generations', 24 | type=int, 25 | help='Number of generations') 26 | 27 | parser.add_argument('litter', 28 | metavar='litter', 29 | type=int, 30 | help='Size of litter per generation') 31 | 32 | args = parser.parse_args() 33 | 34 | if not 1 <= args.gen <= 40: 35 | parser.error(f'generations "{args.gen}" must be between 1 and 40') 36 | 37 | if not 1 <= args.litter <= 5: 38 | parser.error(f'litter "{args.litter}" must be between 1 and 5') 39 | 40 | return Args(generations=args.gen, litter=args.litter) 41 | 42 | 43 | # -------------------------------------------------- 44 | def main() -> None: 45 | """ Make a jazz noise here """ 46 | 47 | args = get_args() 48 | 49 | @memoize 50 | def fib(n: int) -> int: 51 | return 1 if n in (1, 2) else fib(n - 2) * args.litter + fib(n - 1) 52 | 53 | print(fib(args.generations)) 54 | 55 | 56 | # -------------------------------------------------- 57 | def memoize(f: Callable) -> Callable: 58 | """ Memoize a function """ 59 | 60 | cache = {} 61 | 62 | def memo(x): 63 | if x not in cache: 64 | cache[x] = f(x) 65 | return cache[x] 66 | 67 | return memo 68 | 69 | 70 | # -------------------------------------------------- 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /04_fib/tests/fib_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for fib.py """ 2 | 3 | import os 4 | import platform 5 | import random 6 | import re 7 | from subprocess import getstatusoutput 8 | 9 | PRG = './fib.py' 10 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 11 | 12 | 13 | # -------------------------------------------------- 14 | def test_exists() -> None: 15 | """ Program exists """ 16 | 17 | assert os.path.isfile(PRG) 18 | 19 | 20 | # -------------------------------------------------- 21 | def test_usage() -> None: 22 | """ Usage """ 23 | 24 | for arg in ['-h', '--help']: 25 | rv, out = getstatusoutput(f'{RUN} {arg}') 26 | assert rv == 0 27 | assert out.lower().startswith('usage:') 28 | 29 | 30 | # -------------------------------------------------- 31 | def test_bad_generations() -> None: 32 | """ Dies when generations is bad """ 33 | 34 | n = random.choice(list(range(-10, 0)) + list(range(41, 50))) 35 | k = random.randint(1, 5) 36 | rv, out = getstatusoutput(f'{RUN} {n} {k}') 37 | assert rv != 0 38 | assert out.lower().startswith('usage:') 39 | assert re.search(f'generations "{n}" must be between 1 and 40', out) 40 | 41 | 42 | # -------------------------------------------------- 43 | def test_bad_litter() -> None: 44 | """ Dies when litter size is bad """ 45 | 46 | n = random.randint(1, 40) 47 | k = random.choice(list(range(-10, 0)) + list(range(6, 20))) 48 | rv, out = getstatusoutput(f'{RUN} {n} {k}') 49 | assert rv != 0 50 | assert out.lower().startswith('usage:') 51 | assert re.search(f'litter "{k}" must be between 1 and 5', out) 52 | 53 | 54 | # -------------------------------------------------- 55 | def test_1() -> None: 56 | """runs on good input""" 57 | 58 | rv, out = getstatusoutput(f'{RUN} 5 3') 59 | assert rv == 0 60 | assert out == '19' 61 | 62 | 63 | # -------------------------------------------------- 64 | def test_2() -> None: 65 | """runs on good input""" 66 | 67 | rv, out = getstatusoutput(f'{RUN} 30 4') 68 | assert rv == 0 69 | assert out == '436390025825' 70 | 71 | 72 | # -------------------------------------------------- 73 | def test_3() -> None: 74 | """runs on good input""" 75 | 76 | rv, out = getstatusoutput(f'{RUN} 29 2') 77 | assert rv == 0 78 | assert out == '178956971' 79 | -------------------------------------------------------------------------------- /05_gc/.gitignore: -------------------------------------------------------------------------------- 1 | cgc.py 2 | seqs.fa 3 | -------------------------------------------------------------------------------- /05_gc/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy cgc.py tests/cgc_test.py 5 | 6 | all: 7 | ../bin/all_test.py cgc.py 8 | 9 | seqs.fa: 10 | ./genseq.py -n 1000 11 | 12 | bench: seqs.fa 13 | ./bench.sh 14 | -------------------------------------------------------------------------------- /05_gc/bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Benchmark all the solutions 4 | 5 | PRGS=$(find . -name solution\* | sort | xargs echo | sed "s/ /,/g") 6 | hyperfine -i --warmup 1 -L prg $PRGS '{prg} seqs.fa' 7 | -------------------------------------------------------------------------------- /05_gc/gc.orig: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Purpose: Calculate GC content 4 | Author : Ken Youens-Clark 5 | """ 6 | 7 | import argparse 8 | import re 9 | from Bio import SeqIO 10 | 11 | 12 | # -------------------------------------------------- 13 | def get_args(): 14 | """ Get command-line arguments """ 15 | 16 | parser = argparse.ArgumentParser( 17 | description='Calculate GC content', 18 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 19 | 20 | parser.add_argument('file', 21 | metavar='FILE', 22 | type=argparse.FileType('rt'), 23 | help='Input sequence file') 24 | 25 | return parser.parse_args() 26 | 27 | 28 | # -------------------------------------------------- 29 | def main(): 30 | """ Make a jazz noise here """ 31 | 32 | args = get_args() 33 | high = (0, '') 34 | 35 | # high = sorted([gc(seq) for seq in SeqIO.parse(args.file, 'fasta')])[-1] 36 | high = sorted(map(gc, SeqIO.parse(args.file, 'fasta')))[-1] 37 | 38 | print(f'{high[1]} {high[0]:0.06f}') 39 | 40 | 41 | # -------------------------------------------------- 42 | def gc(rec): 43 | """ Return the GC content, record ID for a sequence """ 44 | 45 | seq = str(rec.seq) 46 | gc = re.findall('[gc]', seq, re.IGNORECASE) 47 | return ((len(gc) / len(seq)) * 100, rec.id) 48 | 49 | 50 | # -------------------------------------------------- 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /05_gc/solution1_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import sys 6 | from typing import NamedTuple, TextIO, List, Tuple 7 | from Bio import SeqIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Compute GC content', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('file', 24 | metavar='FILE', 25 | type=argparse.FileType('rt'), 26 | nargs='?', 27 | default=sys.stdin, 28 | help='Input sequence file') 29 | 30 | args = parser.parse_args() 31 | 32 | return Args(args.file) 33 | 34 | 35 | # -------------------------------------------------- 36 | def main() -> None: 37 | """ Make a jazz noise here """ 38 | 39 | args = get_args() 40 | seqs: List[Tuple[float, str]] = [] 41 | 42 | for rec in SeqIO.parse(args.file, 'fasta'): 43 | # Iterate each base and compare to G or C, add 1 to counter 44 | gc = 0 45 | for base in rec.seq.upper(): 46 | if base in ('C', 'G'): 47 | gc += 1 48 | pct = (gc * 100) / len(rec.seq) 49 | seqs.append((pct, rec.id)) 50 | 51 | high = max(seqs) 52 | print(f'{high[1]} {high[0]:0.6f}') 53 | 54 | 55 | # -------------------------------------------------- 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /05_gc/solution2_unit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import sys 6 | from typing import NamedTuple, TextIO, List 7 | from Bio import SeqIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | class MySeq(NamedTuple): 16 | """ Sequence """ 17 | gc: float 18 | name: str 19 | 20 | 21 | # -------------------------------------------------- 22 | def get_args() -> Args: 23 | """ Get command-line arguments """ 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Compute GC content', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | 29 | parser.add_argument('file', 30 | metavar='FILE', 31 | type=argparse.FileType('rt'), 32 | nargs='?', 33 | default=sys.stdin, 34 | help='Input sequence file') 35 | 36 | args = parser.parse_args() 37 | 38 | return Args(args.file) 39 | 40 | 41 | # -------------------------------------------------- 42 | def main() -> None: 43 | """ Make a jazz noise here """ 44 | 45 | args = get_args() 46 | seqs: List[MySeq] = [] 47 | 48 | for rec in SeqIO.parse(args.file, 'fasta'): 49 | seqs.append(MySeq(find_gc(rec.seq), rec.id)) 50 | 51 | high = max(seqs) 52 | print(f'{high.name} {high.gc:0.6f}') 53 | 54 | 55 | # -------------------------------------------------- 56 | def find_gc(seq: str) -> float: 57 | """ Calculate GC content """ 58 | 59 | if not seq: 60 | return 0 61 | 62 | gc = 0 63 | for base in seq.upper(): 64 | if base in ('C', 'G'): 65 | gc += 1 66 | 67 | return (gc * 100) / len(seq) 68 | 69 | 70 | # -------------------------------------------------- 71 | def test_find_gc() -> None: 72 | """ Test find_gc """ 73 | 74 | assert find_gc('') == 0. 75 | assert find_gc('C') == 100. 76 | assert find_gc('G') == 100. 77 | assert find_gc('CGCCG') == 100. 78 | assert find_gc('ATTAA') == 0. 79 | assert find_gc('ACGT') == 50. 80 | 81 | 82 | # -------------------------------------------------- 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /05_gc/solution3_max_var.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import sys 6 | from typing import NamedTuple, TextIO 7 | from Bio import SeqIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | class MySeq(NamedTuple): 16 | """ Sequence """ 17 | gc: float 18 | name: str 19 | 20 | 21 | # -------------------------------------------------- 22 | def get_args() -> Args: 23 | """ Get command-line arguments """ 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Compute GC content', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | 29 | parser.add_argument('file', 30 | metavar='FILE', 31 | type=argparse.FileType('rt'), 32 | nargs='?', 33 | default=sys.stdin, 34 | help='Input sequence file') 35 | 36 | args = parser.parse_args() 37 | 38 | return Args(args.file) 39 | 40 | 41 | # -------------------------------------------------- 42 | def main(): 43 | """ Make a jazz noise here """ 44 | 45 | args = get_args() 46 | high = MySeq(0., '') 47 | 48 | for rec in SeqIO.parse(args.file, 'fasta'): 49 | pct = find_gc(rec.seq) 50 | if pct > high.gc: 51 | high = MySeq(pct, rec.id) 52 | 53 | print(f'{high.name} {high.gc:0.6f}') 54 | 55 | 56 | # -------------------------------------------------- 57 | def find_gc(seq: str) -> float: 58 | """ Calculate GC content """ 59 | 60 | return (seq.upper().count('C') + 61 | seq.upper().count('G')) * 100 / len(seq) if seq else 0 62 | 63 | 64 | # -------------------------------------------------- 65 | def test_find_gc(): 66 | """ Test gc """ 67 | 68 | assert find_gc('') == 0. 69 | assert find_gc('C') == 100. 70 | assert find_gc('G') == 100. 71 | assert find_gc('CGCCG') == 100. 72 | assert find_gc('ATTAA') == 0. 73 | assert find_gc('ACGT') == 50. 74 | 75 | 76 | # -------------------------------------------------- 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /05_gc/solution4_list_comp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import sys 6 | from typing import NamedTuple, TextIO 7 | from Bio import SeqIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | class MySeq(NamedTuple): 16 | """ Sequence """ 17 | gc: float 18 | name: str 19 | 20 | 21 | # -------------------------------------------------- 22 | def get_args() -> Args: 23 | """ Get command-line arguments """ 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Compute GC content', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | 29 | parser.add_argument('file', 30 | metavar='FILE', 31 | type=argparse.FileType('rt'), 32 | nargs='?', 33 | default=sys.stdin, 34 | help='Input sequence file') 35 | 36 | args = parser.parse_args() 37 | 38 | return Args(args.file) 39 | 40 | 41 | # -------------------------------------------------- 42 | def main() -> None: 43 | """ Make a jazz noise here """ 44 | 45 | args = get_args() 46 | high = MySeq(0., '') 47 | 48 | for rec in SeqIO.parse(args.file, 'fasta'): 49 | pct = find_gc(rec.seq) 50 | if pct > high.gc: 51 | high = MySeq(pct, rec.id) 52 | 53 | print(f'{high.name} {high.gc:0.6f}') 54 | 55 | 56 | # -------------------------------------------------- 57 | def find_gc(seq: str) -> float: 58 | """ Calculate GC content """ 59 | 60 | if not seq: 61 | return 0 62 | 63 | gc = len([base for base in seq.upper() if base in 'CG']) 64 | return (gc * 100) / len(seq) 65 | 66 | 67 | # -------------------------------------------------- 68 | def test_find_gc(): 69 | """ Test find_gc """ 70 | 71 | assert find_gc('') == 0. 72 | assert find_gc('C') == 100. 73 | assert find_gc('G') == 100. 74 | assert find_gc('CGCCG') == 100. 75 | assert find_gc('ATTAA') == 0. 76 | assert find_gc('ACGT') == 50. 77 | 78 | 79 | # -------------------------------------------------- 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /05_gc/solution5_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import sys 6 | from typing import NamedTuple, TextIO 7 | from Bio import SeqIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | class MySeq(NamedTuple): 16 | """ Sequence """ 17 | gc: float 18 | name: str 19 | 20 | 21 | # -------------------------------------------------- 22 | def get_args() -> Args: 23 | """ Get command-line arguments """ 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Compute GC content', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | 29 | parser.add_argument('file', 30 | metavar='FILE', 31 | type=argparse.FileType('rt'), 32 | nargs='?', 33 | default=sys.stdin, 34 | help='Input sequence file') 35 | 36 | args = parser.parse_args() 37 | 38 | return Args(args.file) 39 | 40 | 41 | # -------------------------------------------------- 42 | def main() -> None: 43 | """ Make a jazz noise here """ 44 | 45 | args = get_args() 46 | high = MySeq(0., '') 47 | 48 | for rec in SeqIO.parse(args.file, 'fasta'): 49 | pct = find_gc(rec.seq) 50 | if pct > high.gc: 51 | high = MySeq(pct, rec.id) 52 | 53 | print(f'{high.name} {high.gc:0.6f}') 54 | 55 | 56 | # -------------------------------------------------- 57 | def find_gc(seq: str) -> float: 58 | """ Calculate GC content """ 59 | 60 | if not seq: 61 | return 0 62 | 63 | gc = len(list(filter(lambda base: base in 'CG', seq.upper()))) 64 | return (gc * 100) / len(seq) 65 | 66 | 67 | # -------------------------------------------------- 68 | def test_find_gc(): 69 | """ Test find_gc """ 70 | 71 | assert find_gc('') == 0. 72 | assert find_gc('C') == 100. 73 | assert find_gc('G') == 100. 74 | assert find_gc('CGCCG') == 100. 75 | assert find_gc('ATTAA') == 0. 76 | assert find_gc('ACGT') == 50. 77 | 78 | 79 | # -------------------------------------------------- 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /05_gc/solution6_map.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import sys 6 | from typing import NamedTuple, TextIO 7 | from Bio import SeqIO 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | class MySeq(NamedTuple): 16 | """ Sequence """ 17 | gc: float 18 | name: str 19 | 20 | 21 | # -------------------------------------------------- 22 | def get_args() -> Args: 23 | """ Get command-line arguments """ 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Compute GC content', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | 29 | parser.add_argument('file', 30 | metavar='FILE', 31 | type=argparse.FileType('rt'), 32 | nargs='?', 33 | default=sys.stdin, 34 | help='Input sequence file') 35 | 36 | args = parser.parse_args() 37 | 38 | return Args(args.file) 39 | 40 | 41 | # -------------------------------------------------- 42 | def main() -> None: 43 | """ Make a jazz noise here """ 44 | 45 | args = get_args() 46 | high = MySeq(0., '') 47 | 48 | for rec in SeqIO.parse(args.file, 'fasta'): 49 | pct = find_gc(rec.seq) 50 | if pct > high.gc: 51 | high = MySeq(pct, rec.id) 52 | 53 | print(f'{high.name} {high.gc:0.6f}') 54 | 55 | 56 | # -------------------------------------------------- 57 | def find_gc(seq: str) -> float: 58 | """ Calculate GC content """ 59 | 60 | if not seq: 61 | return 0 62 | 63 | gc = sum(map(lambda base: base in 'CG', seq.upper())) 64 | return (gc * 100) / len(seq) 65 | 66 | 67 | # -------------------------------------------------- 68 | def test_find_gc(): 69 | """ Test find_gc """ 70 | 71 | assert find_gc('') == 0. 72 | assert find_gc('C') == 100. 73 | assert find_gc('G') == 100. 74 | assert find_gc('CGCCG') == 100. 75 | assert find_gc('ATTAA') == 0. 76 | assert find_gc('ACGT') == 50. 77 | 78 | 79 | # -------------------------------------------------- 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /05_gc/solution7_re.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Compute GC content """ 3 | 4 | import argparse 5 | import re 6 | import sys 7 | from typing import NamedTuple, TextIO 8 | from Bio import SeqIO 9 | 10 | 11 | class Args(NamedTuple): 12 | """ Command-line arguments """ 13 | file: TextIO 14 | 15 | 16 | class MySeq(NamedTuple): 17 | """ Sequence """ 18 | gc: float 19 | name: str 20 | 21 | 22 | # -------------------------------------------------- 23 | def get_args() -> Args: 24 | """ Get command-line arguments """ 25 | 26 | parser = argparse.ArgumentParser( 27 | description='Compute GC content', 28 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 29 | 30 | parser.add_argument('file', 31 | metavar='FILE', 32 | type=argparse.FileType('rt'), 33 | nargs='?', 34 | default=sys.stdin, 35 | help='Input sequence file') 36 | 37 | args = parser.parse_args() 38 | 39 | return Args(args.file) 40 | 41 | 42 | # -------------------------------------------------- 43 | def main() -> None: 44 | """ Make a jazz noise here """ 45 | 46 | args = get_args() 47 | high = MySeq(0., '') 48 | 49 | for rec in SeqIO.parse(args.file, 'fasta'): 50 | pct = find_gc(str(rec.seq)) 51 | if pct > high.gc: 52 | high = MySeq(pct, rec.id) 53 | 54 | print(f'{high.name} {high.gc:0.6f}') 55 | 56 | 57 | # -------------------------------------------------- 58 | def find_gc(seq: str) -> float: 59 | """ Calculate GC content """ 60 | 61 | return len(re.findall('[GC]', seq.upper()) * 100) / len(seq) if seq else 0 62 | 63 | 64 | # -------------------------------------------------- 65 | def test_find_gc(): 66 | """ Test find_gc """ 67 | 68 | assert find_gc('') == 0. 69 | assert find_gc('C') == 100. 70 | assert find_gc('G') == 100. 71 | assert find_gc('CGCCG') == 100. 72 | assert find_gc('ATTAA') == 0. 73 | assert find_gc('ACGT') == 50. 74 | 75 | 76 | # -------------------------------------------------- 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /05_gc/tests/cgc_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for cgc.py """ 2 | 3 | import os 4 | import platform 5 | import random 6 | import string 7 | import re 8 | from subprocess import getstatusoutput 9 | 10 | PRG = './cgc.py' 11 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 12 | SAMPLE1 = './tests/inputs/1.fa' 13 | SAMPLE2 = './tests/inputs/2.fa' 14 | 15 | 16 | # -------------------------------------------------- 17 | def test_exists() -> None: 18 | """ Program exists """ 19 | 20 | assert os.path.isfile(PRG) 21 | 22 | 23 | # -------------------------------------------------- 24 | def test_usage() -> None: 25 | """ Usage """ 26 | 27 | for flag in ['-h', '--help']: 28 | rv, out = getstatusoutput(f'{RUN} {flag}') 29 | assert rv == 0 30 | assert out.lower().startswith('usage:') 31 | 32 | 33 | # -------------------------------------------------- 34 | def test_bad_input() -> None: 35 | """ Fails on bad input """ 36 | 37 | bad = random_string() 38 | rv, out = getstatusoutput(f'{RUN} {bad}') 39 | assert rv != 0 40 | assert out.lower().startswith('usage:') 41 | assert re.search(f"No such file or directory: '{bad}'", out) 42 | 43 | 44 | # -------------------------------------------------- 45 | def test_good_input1() -> None: 46 | """ Works on good input """ 47 | 48 | rv, out = getstatusoutput(f'{RUN} {SAMPLE1}') 49 | assert rv == 0 50 | assert out == 'Rosalind_0808 60.919540' 51 | 52 | 53 | # -------------------------------------------------- 54 | def test_good_input2() -> None: 55 | """ Works on good input """ 56 | 57 | rv, out = getstatusoutput(f'{RUN} {SAMPLE2}') 58 | assert rv == 0 59 | assert out == 'Rosalind_5723 52.806415' 60 | 61 | 62 | # -------------------------------------------------- 63 | def test_stdin() -> None: 64 | """ Works on STDIN """ 65 | 66 | rv, out = getstatusoutput(f'cat {SAMPLE1} | {RUN}') 67 | assert rv == 0 68 | assert out == 'Rosalind_0808 60.919540' 69 | 70 | 71 | # -------------------------------------------------- 72 | def random_string() -> str: 73 | """ Generate a random string """ 74 | 75 | k = random.randint(5, 10) 76 | return ''.join(random.choices(string.ascii_letters + string.digits, k=k)) 77 | -------------------------------------------------------------------------------- /05_gc/tests/inputs/1.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_6404 2 | CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC 3 | TCCCACTAATAATTCTGAGG 4 | >Rosalind_5959 5 | CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT 6 | ATATCCATTTGTCAGCAGACACGC 7 | >Rosalind_0808 8 | CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC 9 | TGGGAACCTGCGGGCAGTAGGTGGAAT 10 | -------------------------------------------------------------------------------- /06_hamm/.gitignore: -------------------------------------------------------------------------------- 1 | hamm.py 2 | -------------------------------------------------------------------------------- /06_hamm/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy hamm.py tests/hamm_test.py 5 | 6 | all: 7 | ../bin/all_test.py hamm.py 8 | -------------------------------------------------------------------------------- /06_hamm/solution1_abs_iterate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | seq1: str 11 | seq2: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args(): 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Hamming distance', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 23 | 24 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 25 | 26 | args = parser.parse_args() 27 | 28 | return Args(args.seq1, args.seq2) 29 | 30 | 31 | # -------------------------------------------------- 32 | def main(): 33 | """ Make a jazz noise here """ 34 | 35 | args = get_args() 36 | seq1, seq2 = args.seq1, args.seq2 37 | 38 | # Method 1: The base distance is the difference in their lengths 39 | l1, l2 = len(seq1), len(seq2) 40 | distance = abs(l1 - l2) 41 | 42 | # Use the length of the shortest word 43 | # Check the letters at each position 44 | for i in range(min(l1, l2)): 45 | if seq1[i] != seq2[i]: 46 | distance += 1 47 | 48 | print(distance) 49 | 50 | 51 | # -------------------------------------------------- 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /06_hamm/solution2_unit_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | seq1: str 11 | seq2: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args(): 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Hamming distance', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 23 | 24 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 25 | 26 | args = parser.parse_args() 27 | 28 | return Args(args.seq1, args.seq2) 29 | 30 | 31 | # -------------------------------------------------- 32 | def main(): 33 | """ Make a jazz noise here """ 34 | 35 | args = get_args() 36 | print(hamming(args.seq1, args.seq2)) 37 | 38 | 39 | # -------------------------------------------------- 40 | def hamming(seq1: str, seq2: str) -> int: 41 | """ Calculate Hamming distance """ 42 | 43 | # Method 1: The base distance is the difference in their lengths 44 | l1, l2 = len(seq1), len(seq2) 45 | distance = abs(l1 - l2) 46 | 47 | # Use the length of the shortest word 48 | # Check the letters at each position 49 | for i in range(min(l1, l2)): 50 | if seq1[i] != seq2[i]: 51 | distance += 1 52 | 53 | return distance 54 | 55 | 56 | # -------------------------------------------------- 57 | def test_hamming() -> None: 58 | """ Test hamming """ 59 | 60 | assert hamming('', '') == 0 61 | assert hamming('AC', 'ACGT') == 2 62 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 63 | 64 | 65 | # -------------------------------------------------- 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /06_hamm/solution3_zip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | seq1: str 11 | seq2: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args(): 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Hamming distance', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 23 | 24 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 25 | 26 | args = parser.parse_args() 27 | 28 | return Args(args.seq1, args.seq2) 29 | 30 | 31 | # -------------------------------------------------- 32 | def main(): 33 | """ Make a jazz noise here """ 34 | 35 | args = get_args() 36 | print(hamming(args.seq1, args.seq2)) 37 | 38 | 39 | # -------------------------------------------------- 40 | def hamming(seq1: str, seq2: str) -> int: 41 | """ Calculate Hamming distance """ 42 | 43 | # Method 2: The base distance is the difference in their lengths 44 | distance = abs(len(seq1) - len(seq2)) 45 | 46 | # Use zip to pair up the letters 47 | for char1, char2 in zip(seq1, seq2): 48 | if char1 != char2: 49 | distance += 1 50 | 51 | return distance 52 | 53 | 54 | # -------------------------------------------------- 55 | def test_hamming() -> None: 56 | """ Test hamming """ 57 | 58 | assert hamming('', '') == 0 59 | assert hamming('AC', 'ACGT') == 2 60 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 61 | 62 | 63 | # -------------------------------------------------- 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /06_hamm/solution4_zip_longest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from itertools import zip_longest 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq1: str 12 | seq2: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args(): 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Hamming distance', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 24 | 25 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.seq1, args.seq2) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main(): 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | print(hamming(args.seq1, args.seq2)) 38 | 39 | 40 | # -------------------------------------------------- 41 | def hamming(seq1: str, seq2: str) -> int: 42 | """ Calculate Hamming distance """ 43 | 44 | # Method 3: zip_longest 45 | distance = 0 46 | for char1, char2 in zip_longest(seq1, seq2): 47 | if char1 != char2: 48 | distance += 1 49 | 50 | return distance 51 | 52 | 53 | # -------------------------------------------------- 54 | def test_hamming() -> None: 55 | """ Test hamming """ 56 | 57 | assert hamming('', '') == 0 58 | assert hamming('AC', 'ACGT') == 2 59 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 60 | 61 | 62 | # -------------------------------------------------- 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /06_hamm/solution5_list_comp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from itertools import zip_longest 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq1: str 12 | seq2: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args(): 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Hamming distance', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 24 | 25 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.seq1, args.seq2) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main(): 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | print(hamming(args.seq1, args.seq2)) 38 | 39 | 40 | # -------------------------------------------------- 41 | def hamming(seq1: str, seq2: str) -> int: 42 | """ Calculate Hamming distance """ 43 | 44 | # Method 4: list comprehension 45 | # return sum([1 if c1 != c2 else 0 for c1, c2 in zip_longest(seq1, seq2)]) 46 | 47 | # Use guard 48 | # return sum([1 for c1, c2 in zip_longest(seq1, seq2) if c1 != c2]) 49 | 50 | # Use bool->int coercion 51 | return sum([c1 != c2 for c1, c2 in zip_longest(seq1, seq2)]) 52 | 53 | 54 | # -------------------------------------------------- 55 | def test_hamming() -> None: 56 | """ Test hamming """ 57 | 58 | assert hamming('', '') == 0 59 | assert hamming('AC', 'ACGT') == 2 60 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 61 | 62 | 63 | # -------------------------------------------------- 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /06_hamm/solution6_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from itertools import zip_longest 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq1: str 12 | seq2: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args(): 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Hamming distance', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 24 | 25 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.seq1, args.seq2) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main(): 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | print(hamming(args.seq1, args.seq2)) 38 | 39 | 40 | # -------------------------------------------------- 41 | def hamming(seq1: str, seq2: str) -> int: 42 | """ Calculate Hamming distance """ 43 | 44 | # Method 5: Use filter 45 | distance = filter(lambda t: t[0] != t[1], zip_longest(seq1, seq2)) 46 | return len(list((distance))) 47 | 48 | 49 | # -------------------------------------------------- 50 | def test_hamming() -> None: 51 | """ Test hamming """ 52 | 53 | assert hamming('', '') == 0 54 | assert hamming('AC', 'ACGT') == 2 55 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 56 | 57 | 58 | # -------------------------------------------------- 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /06_hamm/solution7_map.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | from itertools import zip_longest 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq1: str 12 | seq2: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args(): 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Hamming distance', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 24 | 25 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.seq1, args.seq2) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main(): 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | print(hamming(args.seq1, args.seq2)) 38 | 39 | 40 | # -------------------------------------------------- 41 | def hamming(seq1: str, seq2: str) -> int: 42 | """ Calculate Hamming distance """ 43 | 44 | # Method 7: use map, zip_longest 45 | return sum(map(lambda t: t[0] != t[1], zip_longest(seq1, seq2))) 46 | 47 | 48 | # -------------------------------------------------- 49 | def test_hamming() -> None: 50 | """ Test hamming """ 51 | 52 | assert hamming('', '') == 0 53 | assert hamming('AC', 'ACGT') == 2 54 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 55 | 56 | 57 | # -------------------------------------------------- 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /06_hamm/solution8_operator_starmap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Hamming distance """ 3 | 4 | import argparse 5 | import operator 6 | from itertools import zip_longest, starmap 7 | from typing import NamedTuple 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | seq1: str 13 | seq2: str 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args(): 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Hamming distance', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('seq1', metavar='str', help='Sequence 1') 25 | 26 | parser.add_argument('seq2', metavar='str', help='Sequence 2') 27 | 28 | args = parser.parse_args() 29 | 30 | return Args(args.seq1, args.seq2) 31 | 32 | 33 | # -------------------------------------------------- 34 | def main(): 35 | """ Make a jazz noise here """ 36 | 37 | args = get_args() 38 | print(hamming(args.seq1, args.seq2)) 39 | 40 | 41 | # -------------------------------------------------- 42 | def hamming(seq1: str, seq2: str) -> int: 43 | """ Calculate Hamming distance """ 44 | 45 | # Method 9: operator.ne and starmap 46 | return sum(starmap(operator.ne, zip_longest(seq1, seq2))) 47 | 48 | 49 | # -------------------------------------------------- 50 | def test_hamming() -> None: 51 | """ Test hamming """ 52 | 53 | assert hamming('', '') == 0 54 | assert hamming('AC', 'ACGT') == 2 55 | assert hamming('GAGCCTACTAACGGGAT', 'CATCGTAATGACGGCCT') == 7 56 | 57 | 58 | # -------------------------------------------------- 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /06_hamm/tests/hamm_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tests for hamm.py """ 3 | 4 | import os 5 | import platform 6 | from subprocess import getstatusoutput 7 | 8 | PRG = './hamm.py' 9 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 10 | INPUT1 = './tests/inputs/1.txt' 11 | INPUT2 = './tests/inputs/2.txt' 12 | 13 | 14 | # -------------------------------------------------- 15 | def test_exists() -> None: 16 | """ Program exists """ 17 | 18 | assert os.path.isfile(PRG) 19 | 20 | 21 | # -------------------------------------------------- 22 | def test_usage() -> None: 23 | """ Usage """ 24 | 25 | for flag in ['-h', '--help']: 26 | rv, out = getstatusoutput(f'{RUN} {flag}') 27 | assert rv == 0 28 | assert out.lower().startswith('usage') 29 | 30 | 31 | # -------------------------------------------------- 32 | def run(file: str) -> None: 33 | """ Run with input """ 34 | 35 | assert os.path.isfile(file) 36 | seq1, seq2, expected = open(file).read().splitlines() 37 | 38 | rv, out = getstatusoutput(f'{RUN} {seq1} {seq2}') 39 | assert rv == 0 40 | assert out.rstrip() == expected 41 | 42 | 43 | # -------------------------------------------------- 44 | def test_input1() -> None: 45 | """ Test with input1 """ 46 | 47 | run(INPUT1) 48 | 49 | 50 | # -------------------------------------------------- 51 | def test_input2() -> None: 52 | """ Test with input2 """ 53 | 54 | run(INPUT2) 55 | -------------------------------------------------------------------------------- /06_hamm/tests/inputs/1.txt: -------------------------------------------------------------------------------- 1 | GAGCCTACTAACGGGAT 2 | CATCGTAATGACGGCCT 3 | 7 4 | -------------------------------------------------------------------------------- /06_hamm/tests/inputs/2.txt: -------------------------------------------------------------------------------- 1 | TCACCATCCGGACCATGTTTATCTAGAAGATGATTTTTGGTGTGTGAAATGCGGAGACGGCATTGATCGCGTCCTGGAACTGGTCTACCAGTTGACACGATGGACCATATGTATGTTAGCCTCCCCGGTGGTCAATCTACCCCCGCGAATATACTAACCGCTAATGGTTAATATAGTCGATAGCCAAGTGCGTGCCTGCGCTGCGCGTATAGTCGTATGGGGTCGTTAGTAACCTAGCAGGAGTATGTAACATGGACGCCGGGCGGGTTACATTATCAACCTACGTTAGTCGTCGCAAATTGGCTAGCGCAGCGCTGTATGGGTGTTCACTGAGAGAATTAGTTCTGAGTCCTACAGGATGTGGAGTAACTCTAACTTTGAGTAGAGGACACTTTCTCTCACCCGTGGGGGGGTGACCATATAGCGTGCAAAACGAACAATCTCACTATATTTTCTCCTCTCCAGCTAGGATATCAAATTGGCCCGGGCCCTTTCGAAATCTAACTAAACGAACCCCTTCCGAAACGATCATTCTGCATTGACCACCTTTTCAGGACACACAGCACCAACTCCCAGACCGCGCCTGCCTCCGAAAATACTAGAGTGTACCTAACTATCAAGGGCGGGCCGAACCTTTCAAGACATCAACGCTGGAGAGTAATGATGATCGAATGAAGCCAATTGCCTCGACTCTAAAACGCTGGTCTAGGACGCTGAGCAACGTGGGATTGGGACTTTCCAGCATTTGAGATTACTAAGGTGAAAATGAGGACAGTAGGGGATGTGTGACTGCTTAATCTAATGTATGGCGTCCGGGGGTGGCTGCAAGTAAACGCTCGAACCGTGCTTTGGCCGTCAGTGAATTACGCTCCTGTGTTCTGTGTAGTCTCGTTGTTTGGTGTACCCGGCCGTGGACGAACCTATAGCCAAGATAGTGAGAAGCCAGCCACGCGCAGAGGGGCAGAAACAACTGGGTGCC 2 | CTCCCAGGTGGACTACGCAAATGTCAACTAAAAGGGTTCGTGCGAGAACGGATGACGAGGCGAGTATAAATATCTGGCATTCGACTACTACTTGCTCACTTGAGCAACCATGATTTTATGTCCCTAGGTGAGCATCATACGAGCCTGTCTAGTCTGACTAGGCTTGGACACATTTCACGAGCCTCAAGACCCACTCAAGGCGAAGCGGGTTATACGATGGGATGATTCGCGATCAACCAGCTCTAGTGGACCTCCACCTCTGGTGGATTAATTGACGAAGTTGCGTGGGGCTGGCATGTTTGTAAGGGTCACCTATTTAGTAGGATGCCCTGGGGGACCTTTATCTCCGGCCGGCCTGCTGTTTAGAGTCTAGCAATCTAGCTAGCGAACCGTTCCTGGCGCCACGGCCACGTCCACCCTAGAACTTGCAAAAAGTATCACTTCGCTGTACTTCATCCATTTGGACGAGTTTACCAGGTTGTACCGAGCCCCGTGGAACCGTAAATAATCTAATCGTCTAGGCCTCTCTTTCTCGGCAGGAACCATCCTTTTACCACGCTGATACCTTACGCCAAGCTAGGGTCGAGGACTGATTCGAGTATGTGCTCCCTCTCATATAATCCGGGATGGACCCTGCCAATACAAAATCACTAGACTGTAAAGATTAAACGACATGGACAGTTACCTCGCTTCGAAACGACTCGTTTGCTACGGATCCCAACCTGGGAAGGTGAGTGCCCATCTTTTCGCAGTTATTTATGAGAAACGAACACAGGAGTTGCAAAGAGACGGTTCAGGTAACCCCAAGGCGCGCCTTGAAGTATGCCACTTAACGAAGGAGCTGTGCGAATTTGGAACGAGACGTACGTTCCATCGTACTATGTGCTTTCACTGTTCGTACTCCCAAGGAATCGACTAGGAAATCGTTAAATTTAGCGGAAAGCAACGGGGGACAGATATGCTTAAACAATCGGGAAGC 3 | 503 4 | -------------------------------------------------------------------------------- /07_prot/.gitignore: -------------------------------------------------------------------------------- 1 | prot.py 2 | -------------------------------------------------------------------------------- /07_prot/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy prot.py tests/prot_test.py 5 | 6 | all: 7 | ../bin/all_test.py prot.py 8 | -------------------------------------------------------------------------------- /07_prot/README.md: -------------------------------------------------------------------------------- 1 | # Translating RNA into Protein 2 | 3 | http://rosalind.info/problems/prot/ 4 | 5 | Write a Python program called `prot.py` that takes a sequence of RNA as a single position argument and prints the protein translation. 6 | 7 | ``` 8 | $ ./prot.py AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA 9 | MAMAPRTEINSTRING 10 | ``` 11 | 12 | The program should print a "usage" statement for `-h` or `--help` flags: 13 | 14 | ``` 15 | $ ./prot.py -h 16 | usage: prot.py [-h] RNA 17 | 18 | Translate RNA to proteins 19 | 20 | positional arguments: 21 | RNA RNA sequence 22 | 23 | optional arguments: 24 | -h, --help show this help message and exit 25 | ``` 26 | 27 | A passing test suite looks like this: 28 | 29 | ``` 30 | $ make test 31 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint 32 | --mypy prot.py tests/prot_test.py 33 | ============================= test session starts ============================== 34 | platform darwin -- Python 3.9.1, pytest-6.1.2, py-1.9.0, pluggy-0.13.1 -- /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 35 | cachedir: .pytest_cache 36 | rootdir: /Users/kyclark/work/bio/code/07_prot 37 | plugins: mypy-0.7.0, flake8-1.0.6, pylint-0.17.0 38 | collected 9 items 39 | 40 | prot.py::FLAKE8 SKIPPED [ 10%] 41 | prot.py::mypy PASSED [ 20%] 42 | tests/prot_test.py::FLAKE8 SKIPPED [ 30%] 43 | tests/prot_test.py::mypy PASSED [ 40%] 44 | tests/prot_test.py::test_exists PASSED [ 50%] 45 | tests/prot_test.py::test_usage PASSED [ 60%] 46 | tests/prot_test.py::test_input1 PASSED [ 70%] 47 | tests/prot_test.py::test_stop_codon PASSED [ 80%] 48 | tests/prot_test.py::test_input2 PASSED [ 90%] 49 | ::mypy PASSED [100%] 50 | ===================================== mypy ===================================== 51 | 52 | Success: no issues found in 2 source files 53 | ========================= 8 passed, 2 skipped in 1.72s ========================= 54 | ``` 55 | 56 | ## Author 57 | 58 | Ken Youens-Clark 59 | -------------------------------------------------------------------------------- /07_prot/bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Benchmark all the solutions 4 | hyperfine -m 1000 -L prg ./solution1_for.py,./solution2_unit.py,\ 5 | ./solution3_list_comp_slice.py,./solution4_map_takewhile.py,\ 6 | ./solution5_bio_seq.py \ 7 | '{prg} AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA' \ 8 | --prepare 'rm -rf __pycache__' 9 | -------------------------------------------------------------------------------- /07_prot/solution1_for.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Translate DNA/RNA to proteins """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | rna: str 11 | 12 | 13 | # -------------------------------------------------- 14 | def get_args() -> Args: 15 | """Get command-line arguments""" 16 | 17 | parser = argparse.ArgumentParser( 18 | description='Translate RNA to proteins', 19 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 20 | 21 | parser.add_argument('rna', type=str, metavar='RNA', help='RNA sequence') 22 | 23 | args = parser.parse_args() 24 | 25 | return Args(args.rna) 26 | 27 | 28 | # -------------------------------------------------- 29 | def main() -> None: 30 | """Make a jazz noise here""" 31 | 32 | args = get_args() 33 | rna = args.rna.upper() 34 | codon_to_aa = { 35 | 'AAA': 'K', 'AAC': 'N', 'AAG': 'K', 'AAU': 'N', 'ACA': 'T', 36 | 'ACC': 'T', 'ACG': 'T', 'ACU': 'T', 'AGA': 'R', 'AGC': 'S', 37 | 'AGG': 'R', 'AGU': 'S', 'AUA': 'I', 'AUC': 'I', 'AUG': 'M', 38 | 'AUU': 'I', 'CAA': 'Q', 'CAC': 'H', 'CAG': 'Q', 'CAU': 'H', 39 | 'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCU': 'P', 'CGA': 'R', 40 | 'CGC': 'R', 'CGG': 'R', 'CGU': 'R', 'CUA': 'L', 'CUC': 'L', 41 | 'CUG': 'L', 'CUU': 'L', 'GAA': 'E', 'GAC': 'D', 'GAG': 'E', 42 | 'GAU': 'D', 'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCU': 'A', 43 | 'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGU': 'G', 'GUA': 'V', 44 | 'GUC': 'V', 'GUG': 'V', 'GUU': 'V', 'UAC': 'Y', 'UAU': 'Y', 45 | 'UCA': 'S', 'UCC': 'S', 'UCG': 'S', 'UCU': 'S', 'UGC': 'C', 46 | 'UGG': 'W', 'UGU': 'C', 'UUA': 'L', 'UUC': 'F', 'UUG': 'L', 47 | 'UUU': 'F', 'UAA': '*', 'UAG': '*', 'UGA': '*', 48 | } 49 | 50 | # Method 1: for loop 51 | k = 3 52 | protein = '' 53 | for codon in [rna[i:i + k] for i in range(0, len(rna), k)]: 54 | aa = codon_to_aa.get(codon, '-') 55 | if aa == '*': 56 | break 57 | protein += aa 58 | 59 | print(protein) 60 | 61 | 62 | # -------------------------------------------------- 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /07_prot/solution5_bio_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Translate RNA to proteins """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | from Bio import Seq 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | rna: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Translate RNA to proteins', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('rna', type=str, metavar='RNA', help='RNA sequence') 23 | 24 | args = parser.parse_args() 25 | 26 | return Args(args.rna) 27 | 28 | 29 | # -------------------------------------------------- 30 | def main() -> None: 31 | """ Make a jazz noise here """ 32 | 33 | args = get_args() 34 | print(Seq.translate(args.rna, to_stop=True)) 35 | 36 | 37 | # -------------------------------------------------- 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /07_prot/tests/prot_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for prot.py """ 2 | 3 | import os 4 | import platform 5 | from subprocess import getstatusoutput, getoutput 6 | 7 | PRG = './prot.py' 8 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 9 | TEST1 = ('./tests/inputs/input1.txt', './tests/inputs/input1.txt.out') 10 | 11 | 12 | # -------------------------------------------------- 13 | def test_exists() -> None: 14 | """ Program exists """ 15 | 16 | assert os.path.isfile(PRG) 17 | 18 | 19 | # -------------------------------------------------- 20 | def test_usage() -> None: 21 | """ Usage """ 22 | 23 | for arg in ['', '-h', '--help']: 24 | out = getoutput(f'{RUN} {arg}') 25 | assert out.lower().startswith('usage:') 26 | 27 | 28 | # -------------------------------------------------- 29 | def run(rna: str, expected: str) -> None: 30 | """ Runs test """ 31 | 32 | rv, out = getstatusoutput(f'{RUN} {rna}') 33 | assert rv == 0 34 | assert out == expected 35 | 36 | 37 | # -------------------------------------------------- 38 | def test_input1() -> None: 39 | """ Runs on command-line input """ 40 | 41 | run('AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA', 42 | 'MAMAPRTEINSTRING') 43 | 44 | 45 | # -------------------------------------------------- 46 | def test_stop_codon() -> None: 47 | """ Stops at the stop codon """ 48 | 49 | run('AUGCCGUAAUCU', 'MP') 50 | 51 | 52 | # -------------------------------------------------- 53 | def test_input2() -> None: 54 | """ Runs on file input """ 55 | 56 | file, expected = TEST1 57 | 58 | def cat(filename): 59 | return open(filename).read().rstrip() 60 | 61 | run(cat(file), cat(expected)) 62 | -------------------------------------------------------------------------------- /08_subs/.gitignore: -------------------------------------------------------------------------------- 1 | subs.py 2 | -------------------------------------------------------------------------------- /08_subs/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy subs.py tests/subs_test.py 5 | 6 | all: 7 | ../bin/all_test.py subs.py 8 | -------------------------------------------------------------------------------- /08_subs/README.md: -------------------------------------------------------------------------------- 1 | # Finding a Motif in DNA 2 | 3 | http://rosalind.info/problems/subs/ 4 | 5 | Write a Python program called `subs.py` that accepts two positional arguments, a sequence and a possible subsequence. 6 | The output should be all the start positions where the subsequence can be found in the sequence: 7 | 8 | ``` 9 | $ ./subs.py GATATATGCATATACTT ATAT 10 | 2 4 10 11 | ``` 12 | 13 | The program should print a "usage" statement for `-h` or `--help` flags: 14 | 15 | ``` 16 | $ ./subs.py -h 17 | usage: subs.py [-h] seq subseq 18 | 19 | Find subsequences 20 | 21 | positional arguments: 22 | seq Sequence 23 | subseq Sub-sequence 24 | 25 | optional arguments: 26 | -h, --help show this help message and exit 27 | ``` 28 | 29 | A passing test suite looks like this: 30 | 31 | ``` 32 | $ make test 33 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint 34 | --mypy subs.py tests/subs_test.py 35 | ============================ test session starts ============================ 36 | ... 37 | 38 | subs.py::FLAKE8 SKIPPED [ 11%] 39 | subs.py::mypy PASSED [ 22%] 40 | tests/subs_test.py::FLAKE8 SKIPPED [ 33%] 41 | tests/subs_test.py::mypy PASSED [ 44%] 42 | tests/subs_test.py::test_exists PASSED [ 55%] 43 | tests/subs_test.py::test_usage PASSED [ 66%] 44 | tests/subs_test.py::test_input1 PASSED [ 77%] 45 | tests/subs_test.py::test_input2 PASSED [ 88%] 46 | ::mypy PASSED [100%] 47 | =================================== mypy ==================================== 48 | 49 | Success: no issues found in 2 source files 50 | ======================= 7 passed, 2 skipped in 0.28s ======================== 51 | ``` 52 | 53 | ## Author 54 | 55 | Ken Youens-Clark 56 | -------------------------------------------------------------------------------- /08_subs/bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Benchmark all the solutions 4 | hyperfine -m 1000 -L prg ./solution1_str_find.py,./solution2_str_index.py,\ 5 | ./solution3_functional.py,./solution4_kmers_functional.py,\ 6 | ./solution4_kmers_imperative.py,./solution5_re.py \ 7 | '{prg} GATATATGCATATACTT ATAT' --prepare 'rm -rf __pycache__' 8 | -------------------------------------------------------------------------------- /08_subs/solution1_str_find.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Find subsequences """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | seq: str 11 | subseq: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Find subsequences', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('seq', metavar='seq', help='Sequence') 23 | 24 | parser.add_argument('subseq', metavar='subseq', help='Sub-sequence') 25 | 26 | args = parser.parse_args() 27 | 28 | return Args(args.seq, args.subseq) 29 | 30 | 31 | # -------------------------------------------------- 32 | def main() -> None: 33 | """ Make a jazz noise here """ 34 | 35 | args = get_args() 36 | 37 | # Method 1: str.find() 38 | last = 0 39 | found = [] 40 | while True: 41 | pos = args.seq.find(args.subseq, last) 42 | if pos == -1: 43 | break 44 | found.append(pos + 1) 45 | last = pos + 1 46 | 47 | print(*found) 48 | 49 | 50 | # -------------------------------------------------- 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /08_subs/solution2_str_index.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Find subsequences """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | seq: str 11 | subseq: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Find subsequences', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('seq', metavar='seq', help='Sequence') 23 | 24 | parser.add_argument('subseq', metavar='subseq', help='Sub-sequence') 25 | 26 | args = parser.parse_args() 27 | 28 | return Args(args.seq, args.subseq) 29 | 30 | 31 | # -------------------------------------------------- 32 | def main() -> None: 33 | """ Make a jazz noise here """ 34 | 35 | args = get_args() 36 | seq, subseq = args.seq, args.subseq 37 | 38 | # Method 2: str.index() 39 | found = [] 40 | last = 0 41 | while subseq in seq[last:]: 42 | last = seq.index(subseq, last) + 1 43 | found.append(last) 44 | 45 | print(' '.join(map(str, found))) 46 | 47 | 48 | # -------------------------------------------------- 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /08_subs/solution3_functional.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Find subsequences """ 3 | 4 | import argparse 5 | import operator 6 | from functools import partial 7 | from typing import NamedTuple 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | seq: str 13 | subseq: str 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Find subsequences', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('seq', metavar='seq', help='Sequence') 25 | 26 | parser.add_argument('subseq', metavar='subseq', help='Sub-sequence') 27 | 28 | args = parser.parse_args() 29 | 30 | return Args(args.seq, args.subseq) 31 | 32 | 33 | # -------------------------------------------------- 34 | def main() -> None: 35 | """ Make a jazz noise here """ 36 | 37 | args = get_args() 38 | seq, subseq = args.seq, args.subseq 39 | r = list(range(len(seq) - len(subseq))) 40 | ok = partial(operator.le, 0) 41 | find = partial(seq.find, subseq) 42 | add1 = partial(operator.add, 1) 43 | print(*sorted(map(add1, set(filter(ok, map(find, r)))))) 44 | 45 | 46 | # -------------------------------------------------- 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /08_subs/solution4_kmers_functional.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Find subsequences """ 3 | 4 | import argparse 5 | from itertools import starmap 6 | from typing import NamedTuple, Iterator 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq: str 12 | subseq: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Find subsequences', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('seq', metavar='seq', help='Sequence') 24 | 25 | parser.add_argument('subseq', metavar='subseq', help='Sub-sequence') 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.seq, args.subseq) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | seq, subseq = args.seq, args.subseq 38 | k = len(subseq) 39 | kmers = enumerate(seq[i:i + k] for i in range(len(seq) - k + 1)) 40 | found: Iterator[int] = filter( 41 | None, starmap(lambda i, kmer: i + 1 42 | if kmer == subseq else None, kmers)) 43 | print(*found) 44 | 45 | 46 | # -------------------------------------------------- 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /08_subs/solution4_kmers_imperative.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Find subsequences """ 3 | 4 | import argparse 5 | from typing import NamedTuple 6 | 7 | 8 | class Args(NamedTuple): 9 | """ Command-line arguments """ 10 | seq: str 11 | subseq: str 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Find subsequences', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('seq', metavar='seq', help='Sequence') 23 | 24 | parser.add_argument('subseq', metavar='subseq', help='Sub-sequence') 25 | 26 | args = parser.parse_args() 27 | 28 | return Args(args.seq, args.subseq) 29 | 30 | 31 | # -------------------------------------------------- 32 | def main() -> None: 33 | """ Make a jazz noise here """ 34 | 35 | args = get_args() 36 | seq, subseq = args.seq, args.subseq 37 | k = len(subseq) 38 | kmers = [seq[i:i + k] for i in range(len(seq) - k + 1)] 39 | found = [i + 1 for i, kmer in enumerate(kmers) if kmer == subseq] 40 | print(*found) 41 | 42 | 43 | # -------------------------------------------------- 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /08_subs/solution5_re.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Find subsequences """ 3 | 4 | import argparse 5 | import re 6 | from typing import NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | seq: str 12 | subseq: str 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Find subsequences', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('seq', metavar='seq', help='Sequence') 24 | 25 | parser.add_argument('subseq', metavar='subseq', help='Sub-sequence') 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.seq, args.subseq) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | seq, subseq = args.seq, args.subseq 38 | print(*[m.start() + 1 for m in re.finditer(f'(?=({subseq}))', seq)]) 39 | 40 | 41 | # -------------------------------------------------- 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /08_subs/tests/inputs/input1.txt: -------------------------------------------------------------------------------- 1 | TCGAAACCAGAGATCACCTGAAAACCAGCCAGAAACCAGTCAAACCAGGGCGTAAACCAGTCAAAACCAGAAACCAGCGTAAACCAGAAACCAGTTTAAAACCAGAAACCAGATAAACCAGGTCAGAAACCAGCATCAAACCAGAAACCAGAAACCAGCCCTTAAACCAGAAACCAGAAAACCAGCGCAAAACCAGTGAAAACCAGGAAACCAGAAACCAGCTCAAAACCAGAAAAACCAGAAACCAGAAACCAGTAAAACCAGCCGTAAACCAGCTAAACCAGAAAACCAGCTACAAACCAGAAACCAGCAAACCAGCAATGAAAACCAGACCAGAAACCAGCTAAAAACCAGGAGAGAAACCAGTAAACCAGAGCTTAAACCAGAAAACCAGAAACCAGTCAAACCAGAAAACCAGCAAACCAGATTGAAAACCAGAAACCAGCAAAACCAGGAAACCAGTAGATTGAAACCAGAAACCAGACTTATACAAACCAGTACATGGGCTCTAACAAACCAGCCTATGGCTGTGTGGATAAACCAGAAACCAGAATAAACCAGGGGCCAAACCAGGCGTAAACCAGGCTGAAACCAGAAACCAGAAACCAGCAAAACCAGCCGTTTTGCTCGAAACCAGGAAAACCAGAAACCAGCCAATAAACCAGAAACCAGGGAAACCAGGGCAAAACCAGTAAACCAGGCATAAACCAGCCCCGAAAAACCAGTTCTTAAACCAGGATCGATAAAACCAGTATAAAACCAGGAGTAAACCAGGAAACCAGGGACAAACCAGCTAAACCAGGTGAAACCAGGAAACCAGAAACCAGACTTAAACCAGGAAACCAGGTATGAAACCAGTAAAAACCAGGGAAAACCAGAAACCAGAAACCAGCCTAAACCAGTAAACCAGAAACCAGGAAACCAGAGAAACCAG AAACCAGAA 2 | -------------------------------------------------------------------------------- /08_subs/tests/inputs/input1.txt.out: -------------------------------------------------------------------------------- 1 | 64 81 99 138 145 164 171 208 226 235 242 278 297 380 388 404 432 470 538 545 589 596 640 659 814 872 879 904 2 | -------------------------------------------------------------------------------- /08_subs/tests/subs_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Tests for subs.py """ 3 | 4 | import os 5 | import platform 6 | from subprocess import getstatusoutput 7 | 8 | PRG = './subs.py' 9 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 10 | TEST1 = ('./tests/inputs/input1.txt', './tests/inputs/input1.txt.out') 11 | 12 | 13 | # -------------------------------------------------- 14 | def test_exists() -> None: 15 | """ Program exists """ 16 | 17 | assert os.path.isfile(PRG) 18 | 19 | 20 | # -------------------------------------------------- 21 | def test_usage() -> None: 22 | """ Usage """ 23 | 24 | for arg in ['-h', '--help']: 25 | rv, out = getstatusoutput(f'{RUN} {arg}') 26 | assert rv == 0 27 | assert out.lower().startswith('usage:') 28 | 29 | 30 | # -------------------------------------------------- 31 | def run(inputs: str, expected: str) -> None: 32 | """ Runs on command-line input """ 33 | 34 | rv, out = getstatusoutput(f'{RUN} {inputs}') 35 | assert rv == 0 36 | assert out == expected 37 | 38 | 39 | # -------------------------------------------------- 40 | def cat(file: str) -> str: 41 | """ Return contents of file """ 42 | 43 | return open(file).read().rstrip() 44 | 45 | 46 | # -------------------------------------------------- 47 | def test_input1() -> None: 48 | """ Runs on command-line input """ 49 | 50 | run('GATATATGCATATACTT ATAT', '2 4 10') 51 | 52 | 53 | # -------------------------------------------------- 54 | def test_input2() -> None: 55 | """ Runs on file input """ 56 | 57 | file, expected = TEST1 58 | run(cat(file), cat(expected)) 59 | -------------------------------------------------------------------------------- /09_grph/.gitignore: -------------------------------------------------------------------------------- 1 | .log 2 | grph.py 3 | graph.txt* 4 | out 5 | err 6 | -------------------------------------------------------------------------------- /09_grph/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy grph.py tests/grph_test.py 5 | 6 | all: 7 | ../bin/all_test.py grph.py 8 | 9 | out1: 10 | ./solution2_graph.py -o 1.txt tests/inputs/1.fa 11 | 12 | out2: 13 | ./solution2_graph.py -o 2.txt tests/inputs/2.fa 14 | -------------------------------------------------------------------------------- /09_grph/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | print('This is STDOUT.') 6 | print('This is also STDOUT.', file=sys.stdout) 7 | print('This is STDERR.', file=sys.stderr) 8 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/1.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_0498 2 | AAATAAA 3 | >Rosalind_2391 4 | AAATTTT 5 | >Rosalind_2323 6 | TTTTCCC 7 | >Rosalind_0442 8 | AAATCCC 9 | >Rosalind_5013 10 | GGGTGGG 11 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/1.fa.3.out: -------------------------------------------------------------------------------- 1 | Rosalind_0498 Rosalind_0442 2 | Rosalind_0498 Rosalind_2391 3 | Rosalind_2391 Rosalind_2323 4 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/1.fa.4.out: -------------------------------------------------------------------------------- 1 | Rosalind_2391 Rosalind_2323 2 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/1.fa.5.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/09_grph/tests/inputs/1.fa.5.out -------------------------------------------------------------------------------- /09_grph/tests/inputs/2.fa.4.out: -------------------------------------------------------------------------------- 1 | Rosalind_0505 Rosalind_7158 2 | Rosalind_1195 Rosalind_4924 3 | Rosalind_1890 Rosalind_7493 4 | Rosalind_2576 Rosalind_2554 5 | Rosalind_2840 Rosalind_0491 6 | Rosalind_3147 Rosalind_2208 7 | Rosalind_4304 Rosalind_0691 8 | Rosalind_4566 Rosalind_0045 9 | Rosalind_4566 Rosalind_2666 10 | Rosalind_4581 Rosalind_0614 11 | Rosalind_5472 Rosalind_3293 12 | Rosalind_5472 Rosalind_3966 13 | Rosalind_6802 Rosalind_5168 14 | Rosalind_6802 Rosalind_9390 15 | Rosalind_7135 Rosalind_0726 16 | Rosalind_7364 Rosalind_8145 17 | Rosalind_7595 Rosalind_3624 18 | Rosalind_7751 Rosalind_1517 19 | Rosalind_8145 Rosalind_5168 20 | Rosalind_8145 Rosalind_9390 21 | Rosalind_9149 Rosalind_2943 22 | Rosalind_9149 Rosalind_5193 23 | Rosalind_9421 Rosalind_6002 24 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/2.fa.5.out: -------------------------------------------------------------------------------- 1 | Rosalind_0378 Rosalind_7135 2 | Rosalind_0914 Rosalind_2840 3 | Rosalind_1144 Rosalind_5870 4 | Rosalind_2095 Rosalind_0045 5 | Rosalind_2840 Rosalind_4493 6 | Rosalind_2943 Rosalind_1144 7 | Rosalind_3470 Rosalind_0281 8 | Rosalind_4581 Rosalind_4374 9 | Rosalind_5867 Rosalind_6515 10 | Rosalind_7148 Rosalind_6515 11 | Rosalind_7364 Rosalind_6988 12 | Rosalind_7751 Rosalind_1517 13 | Rosalind_9983 Rosalind_2130 14 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/3.fa.4.out: -------------------------------------------------------------------------------- 1 | Rosalind_0467 Rosalind_1083 2 | Rosalind_0610 Rosalind_6708 3 | Rosalind_1032 Rosalind_5108 4 | Rosalind_1032 Rosalind_6425 5 | Rosalind_1083 Rosalind_9047 6 | Rosalind_1181 Rosalind_5581 7 | Rosalind_1489 Rosalind_2350 8 | Rosalind_1998 Rosalind_9039 9 | Rosalind_2267 Rosalind_4227 10 | Rosalind_2343 Rosalind_8935 11 | Rosalind_2350 Rosalind_4964 12 | Rosalind_2772 Rosalind_0821 13 | Rosalind_3213 Rosalind_8754 14 | Rosalind_3260 Rosalind_1532 15 | Rosalind_3545 Rosalind_9503 16 | Rosalind_3612 Rosalind_9549 17 | Rosalind_3612 Rosalind_9909 18 | Rosalind_3679 Rosalind_4513 19 | Rosalind_3679 Rosalind_9138 20 | Rosalind_3679 Rosalind_9524 21 | Rosalind_4116 Rosalind_8786 22 | Rosalind_4292 Rosalind_9503 23 | Rosalind_4407 Rosalind_0040 24 | Rosalind_4513 Rosalind_8935 25 | Rosalind_4771 Rosalind_1078 26 | Rosalind_4964 Rosalind_7026 27 | Rosalind_5025 Rosalind_2343 28 | Rosalind_5365 Rosalind_8870 29 | Rosalind_5627 Rosalind_4963 30 | Rosalind_6125 Rosalind_8452 31 | Rosalind_6425 Rosalind_8452 32 | Rosalind_6895 Rosalind_8870 33 | Rosalind_6938 Rosalind_6969 34 | Rosalind_6969 Rosalind_8162 35 | Rosalind_6983 Rosalind_8452 36 | Rosalind_7170 Rosalind_9549 37 | Rosalind_7170 Rosalind_9909 38 | Rosalind_7444 Rosalind_2772 39 | Rosalind_7444 Rosalind_3662 40 | Rosalind_8162 Rosalind_8270 41 | Rosalind_8270 Rosalind_4407 42 | Rosalind_8442 Rosalind_5025 43 | Rosalind_8964 Rosalind_3545 44 | Rosalind_9039 Rosalind_9143 45 | Rosalind_9143 Rosalind_6125 46 | Rosalind_9475 Rosalind_9039 47 | Rosalind_9503 Rosalind_2771 48 | Rosalind_9595 Rosalind_5221 49 | Rosalind_9909 Rosalind_4513 50 | Rosalind_9909 Rosalind_9138 51 | Rosalind_9909 Rosalind_9524 52 | -------------------------------------------------------------------------------- /09_grph/tests/inputs/3.fa.5.out: -------------------------------------------------------------------------------- 1 | Rosalind_0070 Rosalind_1715 2 | Rosalind_2102 Rosalind_6969 3 | Rosalind_2350 Rosalind_0040 4 | Rosalind_4964 Rosalind_2102 5 | Rosalind_5581 Rosalind_4585 6 | Rosalind_6131 Rosalind_7960 7 | Rosalind_6245 Rosalind_6125 8 | Rosalind_6708 Rosalind_7895 9 | Rosalind_9056 Rosalind_6938 10 | -------------------------------------------------------------------------------- /10_lcsm/.gitignore: -------------------------------------------------------------------------------- 1 | lcsm.py 2 | 1*.fa 3 | seqs.fa 4 | -------------------------------------------------------------------------------- /10_lcsm/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy lcsm.py tests/lcsm_test.py 5 | 6 | all: 7 | ../bin/all_test.py lcsm.py 8 | 9 | 1K.fa: 10 | ./genseq.py -n 1000 -o 1K.fa 11 | 12 | 100K.fa: 13 | ./genseq.py -n 100000 -o 100K.fa 14 | 15 | 1M.fa: 16 | ./genseq.py -n 1000000 -o 1M.fa 17 | -------------------------------------------------------------------------------- /10_lcsm/binsearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Binary Search """ 3 | 4 | import argparse 5 | import sys 6 | from typing import List, NamedTuple 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | num: int 12 | maximum: int 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Binary Search', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('-n', 24 | '--num', 25 | help='The number to guess', 26 | metavar='int', 27 | type=int, 28 | required=True) 29 | 30 | parser.add_argument('-m', 31 | '--max', 32 | help='The maximum range', 33 | metavar='int', 34 | type=int, 35 | required=True) 36 | 37 | args = parser.parse_args() 38 | 39 | return Args(args.num, args.max) 40 | 41 | 42 | # -------------------------------------------------- 43 | def main() -> None: 44 | """ Make a jazz noise here """ 45 | 46 | args = get_args() 47 | nums = list(range(args.maximum + 1)) 48 | pos = binary_search(args.num, nums, 0, args.maximum) 49 | print(f'Found {args.num}!' if pos > 0 else f'{args.num} not present.') 50 | 51 | 52 | # -------------------------------------------------- 53 | def binary_search(x: int, xs: List[int], low: int, high: int) -> int: 54 | """ Binary search """ 55 | 56 | print(f'{low:4} {high:4}', file=sys.stderr) 57 | 58 | if high >= low: 59 | mid = (high + low) // 2 60 | 61 | if xs[mid] == x: 62 | return mid 63 | 64 | if xs[mid] > x: 65 | return binary_search(x, xs, low, mid - 1) 66 | 67 | return binary_search(x, xs, mid + 1, high) 68 | 69 | return -1 70 | 71 | 72 | # -------------------------------------------------- 73 | if __name__ == '__main__': 74 | main() 75 | -------------------------------------------------------------------------------- /10_lcsm/scan_fh.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Scan for shortest, number using memory """ 3 | 4 | import argparse 5 | from Bio import SeqIO 6 | from typing import NamedTuple, TextIO 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | file: TextIO 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Scan for shortest, number using memory', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('file', 23 | help='FASTA file', 24 | metavar='FILE', 25 | type=argparse.FileType('rt')) 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.file) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | args.file.close() 38 | 39 | def seqs(): 40 | return [str(rec.seq) for rec in SeqIO.parse(args.file.name, 'fasta')] 41 | 42 | shortest, num_seqs = 0, 0 43 | for seq_len in map(len, seqs()): 44 | if shortest == 0: 45 | shortest = seq_len 46 | 47 | if seq_len < shortest: 48 | shortest = seq_len 49 | 50 | num_seqs += 1 51 | 52 | print(f'shortest = "{shortest}", num = "{num_seqs}"') 53 | 54 | 55 | # -------------------------------------------------- 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /10_lcsm/scan_mem.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Scan for shortest, number using memory """ 3 | 4 | import argparse 5 | from Bio import SeqIO 6 | from typing import NamedTuple, TextIO 7 | 8 | 9 | class Args(NamedTuple): 10 | """ Command-line arguments """ 11 | file: TextIO 12 | 13 | 14 | # -------------------------------------------------- 15 | def get_args() -> Args: 16 | """ Get command-line arguments """ 17 | 18 | parser = argparse.ArgumentParser( 19 | description='Scan for shortest, number using memory', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | parser.add_argument('file', 23 | help='FASTA file', 24 | metavar='FILE', 25 | type=argparse.FileType('rt')) 26 | 27 | args = parser.parse_args() 28 | 29 | return Args(args.file) 30 | 31 | 32 | # -------------------------------------------------- 33 | def main() -> None: 34 | """ Make a jazz noise here """ 35 | 36 | args = get_args() 37 | # Get a list of the sequences as strings 38 | seqs = list(map(lambda s: str(s.seq), SeqIO.parse(args.file, 'fasta'))) 39 | 40 | # Find the length of the shortest sequence, total num of sequences 41 | shortest = min(map(len, seqs)) 42 | num_seqs = len(seqs) 43 | 44 | print(f'shortest = "{shortest}", num = "{num_seqs}"') 45 | 46 | 47 | # -------------------------------------------------- 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /10_lcsm/tests/inputs/1.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_1 2 | GATTACA 3 | >Rosalind_2 4 | TAGACCA 5 | >Rosalind_3 6 | ATACA 7 | -------------------------------------------------------------------------------- /10_lcsm/tests/inputs/empty.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/10_lcsm/tests/inputs/empty.fa -------------------------------------------------------------------------------- /10_lcsm/tests/inputs/none.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_1 2 | GGGGGGG 3 | >Rosalind_2 4 | AAAAAAAA 5 | >Rosalind_3 6 | CCCC 7 | >Rosalind_4 8 | TTTTTTTT 9 | -------------------------------------------------------------------------------- /10_lcsm/tests/lcsm_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for lcsm.py """ 2 | 3 | import os 4 | import platform 5 | import random 6 | import re 7 | import string 8 | from subprocess import getstatusoutput 9 | 10 | PRG = './lcsm.py' 11 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 12 | INPUT1 = './tests/inputs/1.fa' 13 | INPUT2 = './tests/inputs/2.fa' 14 | NO_SHARED = './tests/inputs/none.fa' 15 | 16 | 17 | # -------------------------------------------------- 18 | def test_exists() -> None: 19 | """ Program exists """ 20 | 21 | assert os.path.isfile(PRG) 22 | 23 | 24 | # -------------------------------------------------- 25 | def test_usage() -> None: 26 | """ Prints usage """ 27 | 28 | rv, out = getstatusoutput(RUN) 29 | assert rv != 0 30 | assert out.lower().startswith('usage:') 31 | 32 | 33 | # -------------------------------------------------- 34 | def test_bad_file() -> None: 35 | """ Dies on bad file """ 36 | 37 | bad = random_string() 38 | rv, out = getstatusoutput(f'{RUN} {bad}') 39 | assert rv != 0 40 | assert out.lower().startswith('usage:') 41 | assert re.search(f"No such file or directory: '{bad}'", out) 42 | 43 | 44 | # -------------------------------------------------- 45 | def test_short() -> None: 46 | """ Runs OK """ 47 | 48 | rv, out = getstatusoutput(f'{RUN} {INPUT1}') 49 | assert rv == 0 50 | assert out in ['AC', 'CA', 'TA'] 51 | 52 | 53 | # -------------------------------------------------- 54 | def test_long() -> None: 55 | """ Runs OK """ 56 | 57 | rv, out = getstatusoutput(f'{RUN} {INPUT2}') 58 | assert rv == 0 59 | expected = ('GCCTTTTGATTTTAACGTTTATCGGGTGTAGTAAGATTGCGCGC' 60 | 'TAATTCCAATAAACGTATGGAGGACATTCCCCGT') 61 | assert out == expected 62 | 63 | 64 | # -------------------------------------------------- 65 | def test_no_shared() -> None: 66 | """ Correctly reports when no sequences are shared """ 67 | 68 | rv, out = getstatusoutput(f'{RUN} {NO_SHARED}') 69 | assert rv == 0 70 | assert out == 'No common subsequence.' 71 | 72 | 73 | # -------------------------------------------------- 74 | def random_string() -> str: 75 | """ Generate a random string """ 76 | 77 | k = random.randint(5, 10) 78 | return ''.join(random.choices(string.ascii_letters + string.digits, k=k)) 79 | -------------------------------------------------------------------------------- /11_mprt/.gitignore: -------------------------------------------------------------------------------- 1 | fasta 2 | .log 3 | mprt.py 4 | -------------------------------------------------------------------------------- /11_mprt/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy mprt.py tests/mprt_test.py 5 | 6 | clean: 7 | rm -rf fasta 8 | 9 | fasta: 10 | ./fetch_fasta.sh tests/inputs/1.txt 11 | 12 | # Cf. https://github.com/katef/libfsm 13 | fsm: 14 | re -b -pl dot 'N[^P][ST][^P]' | dot -Tpng -ofsm.png 15 | 16 | all: 17 | ../bin/all_test.py mprt.py 18 | -------------------------------------------------------------------------------- /11_mprt/fetch_fasta.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ $# -ne 1 ]]; then 4 | printf "usage: %s FILE\n" $(basename "$0") 5 | exit 1 6 | fi 7 | 8 | OUT_DIR="fasta" 9 | 10 | [[ ! -d "$OUT_DIR" ]] && mkdir -p "$OUT_DIR" 11 | 12 | while read -r PROT_ID; do 13 | echo "$PROT_ID" 14 | URL="http://www.uniprot.org/uniprot/${PROT_ID}.fasta" 15 | echo $URL 16 | OUT_FILE="$OUT_DIR/${PROT_ID}.fasta" 17 | wget -q -O "$OUT_FILE" "$URL" 18 | done < $1 19 | 20 | echo "Done, see output in \"$OUT_DIR\"." 21 | -------------------------------------------------------------------------------- /11_mprt/tests/inputs/1.txt: -------------------------------------------------------------------------------- 1 | A2Z669 2 | B5ZC00 3 | P07204_TRBM_HUMAN 4 | P20840_SAG1_YEAST 5 | -------------------------------------------------------------------------------- /11_mprt/tests/inputs/1.txt.out: -------------------------------------------------------------------------------- 1 | B5ZC00 2 | 85 118 142 306 395 3 | P07204_TRBM_HUMAN 4 | 47 115 116 382 409 5 | P20840_SAG1_YEAST 6 | 79 109 135 248 306 348 364 402 485 501 614 7 | -------------------------------------------------------------------------------- /11_mprt/tests/inputs/2.txt: -------------------------------------------------------------------------------- 1 | P13473_LMP2_HUMAN 2 | P42098_ZP3_PIG 3 | P80069_A45K_MYCBO 4 | Q13VE3 5 | P20840_SAG1_YEAST 6 | P01042_KNH_HUMAN 7 | P07204_TRBM_HUMAN 8 | Q7S432 9 | A3DF24 10 | Q6GEK4 11 | P07585_PGS2_HUMAN 12 | A9QYN2 13 | Q5WFN0 14 | Q9QSP4 15 | -------------------------------------------------------------------------------- /11_mprt/tests/inputs/2.txt.out: -------------------------------------------------------------------------------- 1 | P13473_LMP2_HUMAN 2 | 32 38 49 58 75 101 123 179 229 242 257 275 300 307 317 356 3 | P42098_ZP3_PIG 4 | 124 146 179 271 5 | P80069_A45K_MYCBO 6 | 7 161 7 | Q13VE3 8 | 95 9 | P20840_SAG1_YEAST 10 | 79 109 135 248 306 348 364 402 485 501 614 11 | P01042_KNH_HUMAN 12 | 48 169 205 294 13 | P07204_TRBM_HUMAN 14 | 47 115 116 382 409 15 | Q7S432 16 | 173 17 | A3DF24 18 | 178 19 | P07585_PGS2_HUMAN 20 | 211 262 303 21 | Q9QSP4 22 | 196 250 326 443 23 | -------------------------------------------------------------------------------- /11_mprt/tests/mprt_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for mprt.py """ 2 | 3 | import os 4 | import platform 5 | import random 6 | import re 7 | import string 8 | from subprocess import getstatusoutput 9 | 10 | PRG = './mprt.py' 11 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 12 | INPUT1 = './tests/inputs/1.txt' 13 | INPUT2 = './tests/inputs/2.txt' 14 | 15 | 16 | # -------------------------------------------------- 17 | def test_exists() -> None: 18 | """ Program exists """ 19 | 20 | assert os.path.isfile(PRG) 21 | 22 | 23 | # -------------------------------------------------- 24 | def test_usage() -> None: 25 | """ Usage """ 26 | 27 | rv, out = getstatusoutput(RUN) 28 | assert rv != 0 29 | assert out.lower().startswith('usage:') 30 | 31 | 32 | # -------------------------------------------------- 33 | def test_bad_file() -> None: 34 | """ Dies on bad file """ 35 | 36 | bad = random_string() 37 | rv, out = getstatusoutput(f'{RUN} {bad}') 38 | assert rv != 0 39 | assert out.lower().startswith('usage:') 40 | assert re.search(f"No such file or directory: '{bad}'", out) 41 | 42 | 43 | # -------------------------------------------------- 44 | def run(file: str) -> None: 45 | """ Run test """ 46 | 47 | expected_file = file + '.out' 48 | assert os.path.isfile(expected_file) 49 | expected = open(expected_file).read().rstrip() 50 | rv, out = getstatusoutput(f'{RUN} {file}') 51 | assert rv == 0 52 | assert out.rstrip() == expected 53 | 54 | 55 | # -------------------------------------------------- 56 | def test_1() -> None: 57 | """ Input 1 """ 58 | 59 | run(INPUT1) 60 | 61 | 62 | # -------------------------------------------------- 63 | def test_2() -> None: 64 | """ Input 2 """ 65 | 66 | run(INPUT2) 67 | 68 | 69 | # -------------------------------------------------- 70 | def random_string() -> str: 71 | """ Generate a random string """ 72 | 73 | k = random.randint(5, 10) 74 | return ''.join(random.choices(string.ascii_letters + string.digits, k=k)) 75 | -------------------------------------------------------------------------------- /12_mrna/.gitignore: -------------------------------------------------------------------------------- 1 | mrna.py 2 | -------------------------------------------------------------------------------- /12_mrna/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy mrna.py tests/mrna_test.py 5 | 6 | all: 7 | ../bin/all_test.py mrna.py 8 | -------------------------------------------------------------------------------- /12_mrna/README.md: -------------------------------------------------------------------------------- 1 | # Inferring mRNA from Protein 2 | 3 | http://rosalind.info/problems/mrna/ 4 | 5 | Write a Python program called `mrna.py` that will accept a protein sequence as a positional argument or a file name along with an optional "modulo" argument that defaults to 1,000,000. 6 | 7 | The program should print a "usage" statement for `-h` or `--help` flags: 8 | 9 | ``` 10 | $ ./mrna.py -h 11 | usage: mrna.py [-h] [-m int] str 12 | 13 | Inferring mRNA from Protein 14 | 15 | positional arguments: 16 | str Input protein or file 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | -m int, --modulo int Modulo value (default: 1000000) 21 | ``` 22 | 23 | The output for the program should be number of different RNA strings from which the protein could have been translated, modulo the given argument, e.g.: 24 | 25 | ``` 26 | $ ./mrna.py MA 27 | 12 28 | ``` 29 | 30 | A passing test suite looks like this: 31 | 32 | ``` 33 | $ make test 34 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy mrna.py tests/mrna_test.py 35 | ============================ test session starts ============================ 36 | ... 37 | 38 | mrna.py::FLAKE8 PASSED [ 10%] 39 | mrna.py::mypy PASSED [ 20%] 40 | tests/mrna_test.py::FLAKE8 PASSED [ 30%] 41 | tests/mrna_test.py::mypy PASSED [ 40%] 42 | tests/mrna_test.py::test_exists PASSED [ 50%] 43 | tests/mrna_test.py::test_usage PASSED [ 60%] 44 | tests/mrna_test.py::test_ok1 PASSED [ 70%] 45 | tests/mrna_test.py::test_ok2 PASSED [ 80%] 46 | tests/mrna_test.py::test_ok3 PASSED [ 90%] 47 | ::mypy PASSED [100%] 48 | =================================== mypy ==================================== 49 | 50 | Success: no issues found in 2 source files 51 | ============================ 10 passed in 0.63s ============================= 52 | ``` 53 | 54 | ## Author 55 | 56 | Ken Youens-Clark 57 | -------------------------------------------------------------------------------- /12_mrna/solution3_slim_dict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Infer mRNA from Protein """ 3 | 4 | import argparse 5 | import math 6 | import os 7 | from typing import NamedTuple 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | protein: str 13 | modulo: int 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Infer mRNA from Protein', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('protein', 25 | metavar='protein', 26 | type=str, 27 | help='Input protein or file') 28 | 29 | parser.add_argument('-m', 30 | '--modulo', 31 | metavar='int', 32 | type=int, 33 | default=1000000, 34 | help='Modulo value') 35 | 36 | args = parser.parse_args() 37 | 38 | if os.path.isfile(args.protein): 39 | args.protein = open(args.protein).read().rstrip() 40 | 41 | return Args(args.protein, args.modulo) 42 | 43 | 44 | # -------------------------------------------------- 45 | def main(): 46 | """ Make a jazz noise here """ 47 | 48 | args = get_args() 49 | codons = { 50 | 'A': 4, 'C': 2, 'D': 2, 'E': 2, 'F': 2, 'G': 4, 'H': 2, 'I': 3, 51 | 'K': 2, 'L': 6, 'M': 1, 'N': 2, 'P': 4, 'Q': 2, 'R': 6, 'S': 6, 52 | 'T': 4, 'V': 4, 'W': 1, 'Y': 2, '*': 3, 53 | } 54 | nums = [codons.get(aa, 1) for aa in args.protein + '*'] 55 | print(math.prod(nums) % args.modulo) 56 | 57 | 58 | # -------------------------------------------------- 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /12_mrna/tests/inputs/1.txt: -------------------------------------------------------------------------------- 1 | MSVHDQCHHQLSFSMMECLLPRSEHTRMEWKTWDVVVWMPRRWPWGPSRDKTCIYAHTCMQGKDPIFHRIIPKAVKQTMQYCHVCPMNRHGCLYMFKLLCPHEPWHGNGVVPFFTLDQCPKYAWVDYIFIHATPGYSFHSPTNHFSPRFVDMHHSQSVLERFYANTNSNVIMNYWPVAHTPHWHLLWVHVCFPNETMHEMFMGRRARKWLSFWQPYTFWWMAHCADKYKDGCQSVPCCFRYQYGWIGNFFVMIPTNFDMFCKDEWGATFCHTFHANALRTGNHGFYDRRKWERKYEKLQDTCEPKAGQNCEVFEHMTHLPNRSRRPWVFLVPHPVFMHYKGFDQDRKQTDGKCVVDSTWNINLNFDSEISQMDNADGECSYNNYACWLWPPAFIRVCQHHPLIPHNWCCWPNHNMFPMIIVFTMILNLCHVCIRPQEDLHEKMQVGTWGFPPLYMRKGEQTAKVFLCQKWWESCSNLEAEFSMEMSQSQFQSSGAYRIRFDVYGVWTEKEVLRSTNYSCEPEWNEVAPTSCRQNARDCDWRSVIDDCTANLTRQTVMSWPGSCGAASRLNICKTINMQDTASRDEMVPYWRMSYRDSGNYNNAWQTFYVQLSKGWRYDWMTGNWNRPEIYMRKCGDMRQCFTTSYIINCVFAQSLFVWNFGILWRKVQRRFKVANYWLPVRHEIHSTMVSMKMVTVGQNDIPYAHVNTCGEPNCNFMIGRRHELDNGARVLRRGYNSETQMLCVEDAHNFLQASEDDPWNNIISRADCMTSAADIHKMLCIHVAREHGVTRFICWIVHYVRRCNGTHWCVRGWQNNHCYCCRCTEDEYTLQKESWHIGPDQDMTREVRMNMYYIQVGSAMTPKTSFEMRRWVARTGIETWYWNRVGSGAFIFHVFCEYYYYINPCHDIYWEIVFDSHMFQCWTRVNYFVGEQVARIAIAGCWIIGPHRRWYEFQVNAIQHWENVVRNSAEHSRHGGCDSGWRTGIPFVFAVFCHKQN 2 | -------------------------------------------------------------------------------- /12_mrna/tests/inputs/2.txt: -------------------------------------------------------------------------------- 1 | MQLSGSYMIECNFRRKDYDPWSLWLDMAYMMRFKWFVTASAQYIMCFDAVLQSPFVHGRTHEPFHSQPTHTVLITQQYEWSREMTVCENEIDVFGVALYWEGMGVDCDFFHYRFICQAWKKLQSHYGIWTDLQGRHMFDTFFHRQQQWIDRANHLGGVNCSSQSTFFVMCHNKFVPRAAHVQTVIMTFTAQCIGVVCDRIMNQMPWLGPSLKEQFFRRNDDSTVPERSFFEMGTHHNLAQRATDTYCGFGDQSCHEHSFRSIMREHYVAMYNYGSLGVTFQAPGIFFLHIPPHGEFHCDKQSEARWDQQGYWQLLWTPEIEHIKYYLNASWSYSYFPETHHTNSNTETLSVFTLGLVPLHEENDPQIFAKYMVFAQHHYFKLGHKPRLGGDYFVAWHRVRTPFQMENGNDDCWRQSVPIPNHNRKMMTYPEWIRYAHGHGGTILQPRMHIRLYMCWSTCWHVMMGAIWWPKAPTERSTDCLCRCEHFQGCGILYKFMQWPEIDKGHEQSQIEHATFCQWTMATMNWFRKRSFDPCDSQTIKYRPLSPPIMSAAESVCKDHGQMDNAPITYDTGKKPYEHMMIENHRGANVTKIVPSTEDTMDLDLCPMVASNVPYPPAGNWSELKIMENITNDIHNKSYWVRYELQRVGAHHIPYVFYYVVHVQYRSGYTKTRSKNIPWYMQICRWGEIKLECAFMGAPWDICVFNHSYHVGFWWVIGPNVLEETFGGFHTFLYCWMPGAFMEHENEHFWHPIYLFMVQGWQLMVFWKFGKWTLHVIRCLHPQWQLVNYYPYCSCGAWPNAMYNALFKDSFTSVNCEWAVVHRKDETTAGSMEDQQPVCNCVDQKTGIVHQFARLRCSMHGSKQFDDNNRQEYVNDHFLVLRNLYSYSIKRFHRPRSFTTCNCSAHQFFPEGWWRYQIGCGRQHWFLDYTWVWRTLSRRDIANEDEIAHVRKSSRQYCFHVKHPQGGPQFPDPLNNYYVMLMSYFQNRLYSENVEWVW 2 | -------------------------------------------------------------------------------- /12_mrna/tests/inputs/3.txt: -------------------------------------------------------------------------------- 1 | MWWFYRGAIHSENRDQLVCCQRERIHDRQSIPQIHYTVDLVAAITPWNILAVLSHDRGTLYGINVKHYTGVRYCLKCCSKIFYEIMESHADTQMSTVGWVKYMWWYSLEEAIYLCLEQAQDATHMPIEKICLFEMVMYKPQVISYWAQFFCPEYTSIMDIGQSVCWTLLDNHINPYSSRPPQIGCWPFGEPATGHTWYGNNMNKAEIPLFYQYLDSEMRWEMAPDNTWWMMQCVYWRYFRHHPHEDLIHAVIAIYFLVCWWYQSFASRQEPLLSMEVCASRIIVMCGFTKYHEAIQWYRVLHENDSSSLSSGPGVCIQWMQKCHMTICKRCYHLDFRACDFVSDHGTFQRGTVHRTEVLFNHNSTYEDMSMYNQIILVTRICHYDVKKFTQTVYCGAFDSPSQMQTQCYKEMAYYNQWRNSTYRMIARLFWGWRFRPVLAEDIMMDMHARARCQMCNLWYEIYHWHTGKVDYIGIMQKWGTIPGLGPNWCEEHPNVVHFTGRWKHRGLCVQSYDCEPYFDLIDRNCVPNCILSNHGWLAKVCAPIGFTMSTQTTKCNIDSGQIYHCSKNQFWQRCAVMNMDWARYFPVMKSDPWHTIEINVVMQFPCAQIKHCFHSLMHSTWCHMKIAIWSPGRMGYGKEAILLYYPKQNQVFYESMCGPMQKTLHMPGIVRICQCTIMTCLYYSPKRWEPEGHHPSRFCFKCKDQKMQWYKKWGVNQMQPKKIPFSCGKMCHSLVNDIYNPRTVDAPFCGSAGEHYKPAYIMNPYVMMMPTGHRQMVFKKGDSHKGFVEKNHRWFMHEQCNNICGSYAMNHHGDGLGQDYAKNMGRMKDWYMEFVDMPFSHFVWNHFKGIVAGIMCCNNHHMYWCTFTLGRYFRALKLFSSIGKKLVGIQMDKYLWYTCEAGVGKCNCYSVHFVIVHLYHLDCYSFVLICLIGVFKNVWKCWQEHARINQHASKIAQRSFIYTAQHSRGWSCMGKRELKMCEKIIHKQVRKLIFQSNFW 2 | -------------------------------------------------------------------------------- /12_mrna/tests/mrna_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for mrna.py """ 2 | 3 | import os 4 | import platform 5 | from subprocess import getstatusoutput 6 | 7 | PRG = './mrna.py' 8 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 9 | TEST1 = ('MA', '12') 10 | TEST2 = ('./tests/inputs/1.txt', '448832') 11 | TEST3 = ('./tests/inputs/2.txt', '415872') 12 | TEST4 = ('./tests/inputs/3.txt', '283264') 13 | 14 | 15 | # -------------------------------------------------- 16 | def test_exists() -> None: 17 | """ Program exists """ 18 | 19 | assert os.path.isfile(PRG) 20 | 21 | 22 | # -------------------------------------------------- 23 | def test_usage() -> None: 24 | """ Usage """ 25 | 26 | rv, out = getstatusoutput(RUN) 27 | assert rv != 0 28 | assert out.lower().startswith('usage:') 29 | 30 | 31 | # -------------------------------------------------- 32 | def run(protein: str, expected: str) -> None: 33 | """ Run test """ 34 | 35 | rv, out = getstatusoutput(f'{RUN} {protein}') 36 | assert rv == 0 37 | assert out.rstrip() == expected 38 | 39 | 40 | # -------------------------------------------------- 41 | def test_01() -> None: 42 | """ OK """ 43 | 44 | run(*TEST1) 45 | 46 | 47 | # -------------------------------------------------- 48 | def test_02() -> None: 49 | """ OK """ 50 | 51 | run(*TEST2) 52 | 53 | 54 | # -------------------------------------------------- 55 | def test_03() -> None: 56 | """ OK """ 57 | 58 | run(*TEST3) 59 | 60 | 61 | # -------------------------------------------------- 62 | def test_04() -> None: 63 | """ OK """ 64 | 65 | run(*TEST4) 66 | -------------------------------------------------------------------------------- /13_revp/.gitignore: -------------------------------------------------------------------------------- 1 | revp.py 2 | -------------------------------------------------------------------------------- /13_revp/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy revp.py tests/revp_test.py 5 | 6 | all: 7 | ../bin/all_test.py revp.py 8 | -------------------------------------------------------------------------------- /13_revp/common.py: -------------------------------------------------------------------------------- 1 | """ Common functions """ 2 | 3 | from typing import Any, List, Tuple 4 | 5 | 6 | # -------------------------------------------------- 7 | def fst(tup: Tuple[Any, Any]) -> Any: 8 | """ Return first member of tuple """ 9 | 10 | return tup[0] 11 | 12 | 13 | # -------------------------------------------------- 14 | def test_fst() -> None: 15 | """Test fst""" 16 | 17 | assert fst((1, 'A')) == 1 18 | assert fst(('A', 1)) == 'A' 19 | 20 | 21 | # -------------------------------------------------- 22 | def snd(tup: Tuple[Any, Any]) -> Any: 23 | """ Return second member of tuple """ 24 | 25 | return tup[1] 26 | 27 | 28 | # -------------------------------------------------- 29 | def test_snd() -> None: 30 | """ Test snd """ 31 | 32 | assert snd((1, 'A')) == 'A' 33 | assert snd(('A', 1)) == 1 34 | 35 | 36 | # -------------------------------------------------- 37 | def find_kmers(seq: str, k: int) -> List[str]: 38 | """ Find k-mers in string """ 39 | n = len(seq) - k + 1 40 | return [] if n < 1 else [seq[i:i + k] for i in range(n)] 41 | 42 | 43 | # -------------------------------------------------- 44 | def test_find_kmers() -> None: 45 | """ Test find_kmers """ 46 | 47 | assert find_kmers('', 1) == [] 48 | assert find_kmers('ACTG', 1) == ['A', 'C', 'T', 'G'] 49 | assert find_kmers('ACTG', 2) == ['AC', 'CT', 'TG'] 50 | assert find_kmers('ACTG', 3) == ['ACT', 'CTG'] 51 | assert find_kmers('ACTG', 4) == ['ACTG'] 52 | assert find_kmers('ACTG', 5) == [] 53 | -------------------------------------------------------------------------------- /13_revp/solution1_zip_enumerate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Locating Restriction Sites """ 3 | 4 | import argparse 5 | from typing import NamedTuple, TextIO 6 | from Bio import SeqIO, Seq 7 | from common import find_kmers, fst, snd 8 | 9 | 10 | class Args(NamedTuple): 11 | """ Command-line arguments """ 12 | file: TextIO 13 | 14 | 15 | # -------------------------------------------------- 16 | def get_args() -> Args: 17 | """ Get command-line arguments """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Locating Restriction Sites', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('file', 24 | help='Input FASTA file', 25 | metavar='FILE', 26 | type=argparse.FileType('rt')) 27 | 28 | args = parser.parse_args() 29 | 30 | return Args(args.file) 31 | 32 | 33 | # -------------------------------------------------- 34 | def main() -> None: 35 | """ Make a jazz noise here """ 36 | 37 | args = get_args() 38 | for rec in SeqIO.parse(args.file, 'fasta'): 39 | for k in range(4, 13): 40 | kmers = find_kmers(str(rec.seq), k) 41 | revc = list(map(Seq.reverse_complement, kmers)) 42 | 43 | for pos, pair in enumerate(zip(kmers, revc)): 44 | if fst(pair) == snd(pair): 45 | print(pos + 1, k) 46 | 47 | 48 | # -------------------------------------------------- 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /13_revp/solution2_operator_eq_if.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Locating Restriction Sites """ 3 | 4 | import argparse 5 | import operator 6 | from typing import NamedTuple, TextIO 7 | from Bio import SeqIO, Seq 8 | from common import find_kmers 9 | 10 | 11 | class Args(NamedTuple): 12 | """ Command-line arguments """ 13 | file: TextIO 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Locating Restriction Sites', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('file', 25 | help='Input FASTA file', 26 | metavar='FILE', 27 | type=argparse.FileType('rt')) 28 | 29 | args = parser.parse_args() 30 | 31 | return Args(args.file) 32 | 33 | 34 | # -------------------------------------------------- 35 | def main() -> None: 36 | """ Make a jazz noise here """ 37 | 38 | args = get_args() 39 | for rec in SeqIO.parse(args.file, 'fasta'): 40 | for k in range(4, 13): 41 | kmers = find_kmers(str(rec.seq), k) 42 | revc = list(map(Seq.reverse_complement, kmers)) 43 | 44 | for pos, pair in enumerate(zip(kmers, revc)): 45 | if operator.eq(*pair): 46 | print(pos + 1, k) 47 | 48 | 49 | # -------------------------------------------------- 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /13_revp/solution2_operator_eq_lc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Locating Restriction Sites """ 3 | 4 | import argparse 5 | import operator 6 | from typing import NamedTuple, TextIO 7 | from Bio import SeqIO, Seq 8 | from common import find_kmers 9 | 10 | 11 | class Args(NamedTuple): 12 | """ Command-line arguments """ 13 | file: TextIO 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Locating Restriction Sites', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('file', 25 | help='Input FASTA file', 26 | metavar='FILE', 27 | type=argparse.FileType('rt')) 28 | 29 | args = parser.parse_args() 30 | 31 | return Args(args.file) 32 | 33 | 34 | # -------------------------------------------------- 35 | def main() -> None: 36 | """ Make a jazz noise here """ 37 | 38 | args = get_args() 39 | for rec in SeqIO.parse(args.file, 'fasta'): 40 | for k in range(4, 13): 41 | kmers = find_kmers(str(rec.seq), k) 42 | revc = map(Seq.reverse_complement, kmers) 43 | pairs = enumerate(zip(kmers, revc)) 44 | 45 | for pos in [pos + 1 for pos, pair in pairs if operator.eq(*pair)]: 46 | print(pos, k) 47 | 48 | 49 | # -------------------------------------------------- 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /13_revp/solution3_revp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Locating Restriction Sites """ 3 | 4 | import argparse 5 | import operator 6 | from typing import List, NamedTuple, TextIO 7 | from Bio import SeqIO, Seq 8 | from common import find_kmers 9 | 10 | 11 | class Args(NamedTuple): 12 | """ Command-line arguments """ 13 | file: TextIO 14 | 15 | 16 | # -------------------------------------------------- 17 | def get_args() -> Args: 18 | """ Get command-line arguments """ 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Locating Restriction Sites', 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | 24 | parser.add_argument('file', 25 | help='Input FASTA file', 26 | metavar='FILE', 27 | type=argparse.FileType('rt')) 28 | 29 | args = parser.parse_args() 30 | 31 | return Args(args.file) 32 | 33 | 34 | # -------------------------------------------------- 35 | def main() -> None: 36 | """ Make a jazz noise here """ 37 | 38 | args = get_args() 39 | for rec in SeqIO.parse(args.file, 'fasta'): 40 | for k in range(4, 13): 41 | for pos in revp(str(rec.seq), k): 42 | print(pos, k) 43 | 44 | # for k, pos in [(k, p) for k in range(4, 13) for p in revp(seq, k)]: 45 | # print(pos, k) 46 | 47 | 48 | # -------------------------------------------------- 49 | def revp(seq: str, k: int) -> List[int]: 50 | """ Return positions of reverse palindromes """ 51 | 52 | kmers = find_kmers(seq, k) 53 | revc = map(Seq.reverse_complement, kmers) 54 | pairs = enumerate(zip(kmers, revc)) 55 | return [pos + 1 for pos, pair in pairs if operator.eq(*pair)] 56 | 57 | 58 | # -------------------------------------------------- 59 | def test_revp() -> None: 60 | """ Test revp """ 61 | 62 | assert revp('CGCATGCATTGA', 4) == [3, 5] 63 | assert revp('CGCATGCATTGA', 5) == [] 64 | assert revp('CGCATGCATTGA', 6) == [2, 4] 65 | assert revp('CGCATGCATTGA', 7) == [] 66 | assert revp('CCCGCATGCATT', 4) == [5, 7] 67 | assert revp('CCCGCATGCATT', 5) == [] 68 | assert revp('CCCGCATGCATT', 6) == [4, 6] 69 | 70 | 71 | # -------------------------------------------------- 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /13_revp/tests/inputs/1.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_24 2 | TCAATGCATGCGGGTCTATATGCAT 3 | -------------------------------------------------------------------------------- /13_revp/tests/inputs/1.fa.out: -------------------------------------------------------------------------------- 1 | 5 4 2 | 7 4 3 | 17 4 4 | 18 4 5 | 21 4 6 | 4 6 7 | 6 6 8 | 20 6 9 | -------------------------------------------------------------------------------- /13_revp/tests/inputs/2.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_2002 2 | GCGCTCCTGATTTAATACGACGAGACGACCAGCCCCAGCCGAGATTTGTGCTGATCCGGT 3 | CAAGCAATCCCTTGAACACGTAGCGCGGACAGTGAGACCTAGGGCGCACGTGCAGGGTCG 4 | ATACTCACTGAAAGGCGCAGACGGATAGCTGGGCTCCGTAATCGTGTACGCAGTCGCCGG 5 | ATGGGCCTCACCAAGAATGACCTGTCACATCCTTCGCGAATGGCGCGGATTATCACTTAC 6 | TACATGGTAAGACATGTTCCTAACCGGGGAGATGCGTGGCTACGCCTGAACTTTCGTCTC 7 | TTTTGCGTGGTACCAGTGGAGGCGGTGATACCTGATATATGTTAGCGGTCCCAACGACTC 8 | TGGGTTCGGACCTATGGCTGGCAAAATACATCAATTCTCTAGTTGGCGCCCGAGTAGGAT 9 | ATATCCTTGCCGGTTATGAACACAGTTTCTACAAATTGGCTTACTGCGCTTTTGCGGCGT 10 | GACGACTATGCCTTAGCATCGTATCGCTTCTGCTGACATGGTCGCGGTCTGTCGATATCA 11 | TTAGGGAGATTATGCAATAACAACAAGTCGTACCGAGAGTTGCGTTATTACTCCAGGGTA 12 | CGAGGCAGGTATCGCGTAAGATAAGCAGCCACACATACACCTTCCACACTAGAACGTCTC 13 | CTTTCAGCGGAAATATGTCATCATCAGGGGTTAATGTTATGTGGGTCAACTCGATGGGCT 14 | AACCCCGCTTTAAGATCCGCTATGGTCGCCTTAGCATCTGCTCAAGTTTCTGTGTGACAT 15 | GGGCATTGGGGAAACCCGACATTGTGTAAACGAATGATCCGAGTGGGCGTGGATCATAAT 16 | AT 17 | -------------------------------------------------------------------------------- /13_revp/tests/inputs/2.fa.out: -------------------------------------------------------------------------------- 1 | 1 4 2 | 12 4 3 | 53 4 4 | 56 4 5 | 78 4 6 | 83 4 7 | 84 4 8 | 99 4 9 | 104 4 10 | 108 4 11 | 111 4 12 | 118 4 13 | 135 4 14 | 147 4 15 | 166 4 16 | 177 4 17 | 184 4 18 | 215 4 19 | 223 4 20 | 224 4 21 | 243 4 22 | 253 4 23 | 264 4 24 | 310 4 25 | 335 4 26 | 336 4 27 | 337 4 28 | 393 4 29 | 399 4 30 | 406 4 31 | 419 4 32 | 420 4 33 | 421 4 34 | 430 4 35 | 454 4 36 | 466 4 37 | 517 4 38 | 523 4 39 | 532 4 40 | 535 4 41 | 553 4 42 | 570 4 43 | 598 4 44 | 613 4 45 | 649 4 46 | 654 4 47 | 673 4 48 | 691 4 49 | 711 4 50 | 730 4 51 | 734 4 52 | 778 4 53 | 816 4 54 | 832 4 55 | 839 4 56 | 98 6 57 | 107 6 58 | 214 6 59 | 252 6 60 | 309 6 61 | 335 6 62 | 405 6 63 | 419 6 64 | 534 6 65 | 106 8 66 | 213 8 67 | 308 8 68 | 418 8 69 | 417 10 70 | 416 12 71 | -------------------------------------------------------------------------------- /13_revp/tests/inputs/empty.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/13_revp/tests/inputs/empty.fa -------------------------------------------------------------------------------- /13_revp/tests/inputs/empty.fa.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/13_revp/tests/inputs/empty.fa.out -------------------------------------------------------------------------------- /13_revp/tests/revp_test.py: -------------------------------------------------------------------------------- 1 | """ Tests for revp.py """ 2 | 3 | import os 4 | import platform 5 | import random 6 | import re 7 | import string 8 | from subprocess import getstatusoutput 9 | 10 | PRG = './revp.py' 11 | RUN = f'python {PRG}' if platform.system() == 'Windows' else PRG 12 | INPUT1 = './tests/inputs/1.fa' 13 | INPUT2 = './tests/inputs/2.fa' 14 | EMPTY = './tests/inputs/empty.fa' 15 | 16 | 17 | # -------------------------------------------------- 18 | def test_exists() -> None: 19 | """ Program exists """ 20 | 21 | assert os.path.isfile(PRG) 22 | 23 | 24 | # -------------------------------------------------- 25 | def test_usage() -> None: 26 | """ Usage """ 27 | 28 | rv, out = getstatusoutput(RUN) 29 | assert rv != 0 30 | assert out.lower().startswith('usage:') 31 | 32 | 33 | # -------------------------------------------------- 34 | def test_bad_file() -> None: 35 | """ Dies on bad file """ 36 | 37 | bad = random_string() 38 | rv, out = getstatusoutput(f'{RUN} {bad}') 39 | assert rv != 0 40 | assert out.lower().startswith('usage:') 41 | assert re.search(f"No such file or directory: '{bad}'", out) 42 | 43 | 44 | # -------------------------------------------------- 45 | def run(file: str) -> None: 46 | """ Run the test """ 47 | 48 | expected_file = file + '.out' 49 | assert os.path.isfile(expected_file) 50 | 51 | rv, out = getstatusoutput(f'{RUN} {file}') 52 | assert rv == 0 53 | 54 | expected = set(open(expected_file).read().splitlines()) 55 | assert set(out.splitlines()) == expected 56 | 57 | 58 | # -------------------------------------------------- 59 | def test_ok1() -> None: 60 | """ Runs ok """ 61 | 62 | run(INPUT1) 63 | 64 | 65 | # -------------------------------------------------- 66 | def test_ok2() -> None: 67 | """ Runs ok """ 68 | 69 | run(INPUT2) 70 | 71 | 72 | # -------------------------------------------------- 73 | def test_mepty() -> None: 74 | """ Runs ok """ 75 | 76 | run(EMPTY) 77 | 78 | 79 | # -------------------------------------------------- 80 | def random_string() -> str: 81 | """ Generate a random string """ 82 | 83 | k = random.randint(5, 10) 84 | return ''.join(random.choices(string.ascii_letters + string.digits, k=k)) 85 | -------------------------------------------------------------------------------- /14_orf/.gitignore: -------------------------------------------------------------------------------- 1 | orf.py 2 | -------------------------------------------------------------------------------- /14_orf/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy orf.py tests/orf_test.py 5 | 6 | all: 7 | ../bin/all_test.py orf.py 8 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/1.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_99 2 | AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG 3 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/1.fa.out: -------------------------------------------------------------------------------- 1 | M 2 | MGMTPRLGLESLLE 3 | MLLGSFRLIPKETLIQVAGSSPCNLS 4 | MTPRLGLESLLE 5 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/2.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_2116 2 | CGTGTTGAAGCACCAGATCGTGGGTCCCCTCGCCACCAACAGTCTGAAGTAGGTTGCTGC 3 | TCTTAGCCAAAGACTGGGGGTTCGAGTCACGCCTGCAATTGTTACCACTTCGGCAGACAC 4 | CGCACGTATTGACGGGGGTATGCCATGCTATGGACAAACACAAGAAATGGACGCCCGTAT 5 | TTCTACTACCTTTGAGCTGCCCTATCTCACTATCTGTCAGGCATTTACATTCTGATGGCT 6 | GAGGGGGGTTATCGTACTTCCAATCACGGGTCGTCCCTCTGATTCTCCTTGCGGCTAATC 7 | CGCACACTCTATGACGACCCAAAAAGCTGGGATACAATGGGCTTACAATAAGTTGTTGAC 8 | AACTGCCTTTGGAGTAACTAAGCTAGATTAAGAAATTCCTGGTAGCAGTCAATAACGCAA 9 | GCTAGAATAAAAATACCCTCACTCTAAGCCGACCCTCTCTGTAATGTCAGAGGAAACGGA 10 | CTAGCTAGTCCGTTTCCTCTGACATGCGCTTATCCAGAATCGCACTCAACCGTTTCCCCA 11 | CTGGACAAAGAAACCGTCTGGCATACGACAAGTAACCTTGCAAAAGCAGGCCGGCGACGT 12 | TCTTGCTGAGTGAGGAGAAACCGTGGCCTCCGGGCGGGCATGACAAGAATGTTAGAGGCT 13 | CAGACAGACGGATAGTATGGTTTTCTGGTACACCAGGCGATAGAATTAAAGATCTATCAT 14 | TGCGGGTGCCCCAGTCAGCGCGAGGTGCAGGGAGCAGCCGAGCTTCGAATTGGCACCTTC 15 | CTATTATAACCGCCCCTCCGGGAGAGCCGGGTTCCGAGTACAGCTTGTCTTAACAGAAAG 16 | GCATGGAAGACGAGGTCCACCCAGCCTTCGGGGTTCGGCATCAAAGTCGGCAACAGAGTA 17 | CAGCCCAACAGGGGGAAGATTTACACGCGTACACCTTATATACCACCTTGCTGTCGCGGA 18 | CTAATCTA 19 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/2.fa.out: -------------------------------------------------------------------------------- 1 | M 2 | MAEGGYRTSNHGSSL 3 | MAYPRQYVRCLPKW 4 | MDARISTTFELPYLTICQAFTF 5 | MDKHKKWTPVFLLPLSCPISLSVRHLHSDG 6 | MGLQ 7 | MIDL 8 | MLEAQTDG 9 | MLWTNTRNGRPYFYYL 10 | MPARRPRFLLTQQERRRPAFARLLVVCQTVSLSSGETVECDSG 11 | MPCYGQTQEMDARISTTFELPYLTICQAFTF 12 | MPDGFFVQWGNG 13 | MPDR 14 | MPFC 15 | MPNPEGWVDLVFHAFLLRQAVLGTRLSRRGGYNRKVPIRSSAAPCTSR 16 | MRLSRIALNRFPTGQRNRLAYDK 17 | MSEETD 18 | MTRMLEAQTDG 19 | MTTQKAGIQWAYNKLLTTAFGVTKLD 20 | MVFWYTRR 21 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/3.fa: -------------------------------------------------------------------------------- 1 | >Rosalind_4620 2 | GTGGATATGACCTCACCAGAATATCGGACATCGGCGCGGGCTTGATTAAACAGCAAGACT 3 | GCATTAAAGGACGTTACCGGAGACCTACCAGGACAATAGATGCTCGCGGGACATGTTGGG 4 | TGGCCTTAATTACGGGTAAGAGTGAGCCTAGGATTTGGATCCAAGAAGCCTTCTGAGAAT 5 | AATCCAGCGGTCGTAAATTCCAGCCACCATGCGCATGAAGCATGCCTTCCGTATCAGTTT 6 | TCAGGCAAACAATTGTGTGTGGGTCAACTAAACTGCTGGTGATCACCATGAATGTCCTGG 7 | CGCTATGCTATTTTCTGCGGTTTAGAACGAATCGCCGCCCAGGAGGAGCGTCGGAAGTTG 8 | CAATCGATACTTATTATTATCTGCACGCAACCGCCTGTCGATGGCTATTGGCGTAGTCTG 9 | GGTATAGCTATACCCAGACTACGCCAATAGCCATCGCGCGCCGATCCATCGGAACCGGCT 10 | AGGTCACAGGAGCGAGTAGAACGGTACCGAAGCTGTATTGCTCGCCGTGCTTTATAATAT 11 | CGTTATCGGATGCTCGGATATCGGCTACATAGGATGCGGACTATACCAAGGCCCAGACAT 12 | GAATCGTTGTAGATTTTACATACCCCCCGAAATGCCAACAGCAGAAGGCTACCCGCATGG 13 | TGAAGGCCCTTCCACCTTATGAGTATACGAGTAAATGGCCAACGGCGTGGTAGCGACCGG 14 | ATTGGGCCGCTCCCTTCTTGCCTAATCCTCTAATCTATGGCCTGCCTAGCGCCGAGGGTG 15 | CCTGTGAGTTGATTCAATACGCTCTCAGCATTACATATAGCACCCTTTGAGGGTACTCGT 16 | AGAAGACATTGGAC 17 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/3.fa.out: -------------------------------------------------------------------------------- 1 | M 2 | MACLAPRVPVS 3 | MAIGVVWV 4 | MANGVVATGLGRSLLA 5 | MDRRAMAIGVVWV 6 | MKHAFRISFQANNCVWVN 7 | MLAGHVGWP 8 | MLFSAV 9 | MLGGLNYG 10 | MLGYRLHRMRTIPRPRHESL 11 | MLHAHGGWNLRPLDYSQKASWIQILGSLLPVIKATQHVPRASIVLVGLR 12 | MLRAY 13 | MNRCRFYIPPEMPTAEGYPHGEGPSTL 14 | MNVLALCYFLRFRTNRRPGGASEVAIDTYYYLHATACRWLLA 15 | MPSVSVFRQTIVCGSTKLLVITMNVLALCYFLRFRTNRRPGGASEVAIDTYYYLHATACRWLLA 16 | MPTAEGYPHGEGPSTL 17 | MRMKHAFRISFQANNCVWVN 18 | MRMVAGIYDRWIILRRLLGSKS 19 | MRTIPRPRHESL 20 | MRVAFCCWHFGGYVKSTTIHVWALV 21 | MSGPWYSPHPM 22 | MSIRVNGQRRGSDRIGPLPSCLIL 23 | MSREHLLSW 24 | MSSTSTLKGCYM 25 | MSWRYAIFCGLERIAAQEERRKLQSILIIICTQPPVDGYWRSLGIAIPRLRQ 26 | MTSPEYRTSARA 27 | MVAGIYDRWIILRRLLGSKS 28 | MVITSSLVDPHTIVCLKTDTEGMLHAHGGWNLRPLDYSQKASWIQILGSLLPVIKATQHVPRASIVLVGLR 29 | MVKALPPYEYTSKWPTAW 30 | -------------------------------------------------------------------------------- /14_orf/tests/inputs/empty.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/14_orf/tests/inputs/empty.fa -------------------------------------------------------------------------------- /15_seqmagique/.gitignore: -------------------------------------------------------------------------------- 1 | CAM_SMPL_GS108.fa* 2 | seqmagique.py 3 | -------------------------------------------------------------------------------- /15_seqmagique/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | data: 4 | wget ftp://ftp.imicrobe.us/projects/26/samples/578/CAM_SMPL_GS108.fa.gz 5 | 6 | test: 7 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy seqmagique.py tests/seqmagique_test.py 8 | 9 | all: 10 | ../bin/all_test.py seqmagique.py 11 | -------------------------------------------------------------------------------- /15_seqmagique/mk-outs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PRG="./seqmagique.py" 4 | DIR="./tests/inputs" 5 | INPUT1="${DIR}/1.fa" 6 | INPUT2="${DIR}/2.fa" 7 | EMPTY="${DIR}/empty.fa" 8 | 9 | $PRG $INPUT1 > "${INPUT1}.out" 10 | $PRG $INPUT2 > "${INPUT2}.out" 11 | $PRG $EMPTY > "${EMPTY}.out" 12 | $PRG $INPUT1 $INPUT2 $EMPTY > "$DIR/all.fa.out" 13 | 14 | STYLES="plain simple grid pipe orgtbl rst mediawiki latex latex_raw latex_booktabs" 15 | 16 | for FILE in $INPUT1 $INPUT2; do 17 | for STYLE in $STYLES; do 18 | $PRG -t $STYLE $FILE > "$FILE.${STYLE}.out" 19 | done 20 | done 21 | 22 | echo Done. 23 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa: -------------------------------------------------------------------------------- 1 | >SEQ0 2 | GGATAAAGCGAGAGGCTGGATCATGCACCAACTGCGTGCAACGAAGGAAT 3 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.grid.out: -------------------------------------------------------------------------------- 1 | +---------------------+-----------+-----------+-----------+------------+ 2 | | name | min_len | max_len | avg_len | num_seqs | 3 | +=====================+===========+===========+===========+============+ 4 | | ./tests/inputs/1.fa | 50 | 50 | 50.00 | 1 | 5 | +---------------------+-----------+-----------+-----------+------------+ 6 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.latex.out: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrrrr} 2 | \hline 3 | name & min\_len & max\_len & avg\_len & num\_seqs \\ 4 | \hline 5 | ./tests/inputs/1.fa & 50 & 50 & 50.00 & 1 \\ 6 | \hline 7 | \end{tabular} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.latex_booktabs.out: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrrrr} 2 | \toprule 3 | name & min\_len & max\_len & avg\_len & num\_seqs \\ 4 | \midrule 5 | ./tests/inputs/1.fa & 50 & 50 & 50.00 & 1 \\ 6 | \bottomrule 7 | \end{tabular} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.latex_raw.out: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrrrr} 2 | \hline 3 | name & min_len & max_len & avg_len & num_seqs \\ 4 | \hline 5 | ./tests/inputs/1.fa & 50 & 50 & 50.00 & 1 \\ 6 | \hline 7 | \end{tabular} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.mediawiki.out: -------------------------------------------------------------------------------- 1 | {| class="wikitable" style="text-align: left;" 2 | |+ 3 | |- 4 | ! name !! align="right"| min_len !! align="right"| max_len !! align="right"| avg_len !! align="right"| num_seqs 5 | |- 6 | | ./tests/inputs/1.fa || align="right"| 50 || align="right"| 50 || align="right"| 50.00 || align="right"| 1 7 | |} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.orgtbl.out: -------------------------------------------------------------------------------- 1 | | name | min_len | max_len | avg_len | num_seqs | 2 | |---------------------+-----------+-----------+-----------+------------| 3 | | ./tests/inputs/1.fa | 50 | 50 | 50.00 | 1 | 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ./tests/inputs/1.fa 50 50 50.00 1 3 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.pipe.out: -------------------------------------------------------------------------------- 1 | | name | min_len | max_len | avg_len | num_seqs | 2 | |:--------------------|----------:|----------:|----------:|-----------:| 3 | | ./tests/inputs/1.fa | 50 | 50 | 50.00 | 1 | 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.plain.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ./tests/inputs/1.fa 50 50 50.00 1 3 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.rst.out: -------------------------------------------------------------------------------- 1 | =================== ========= ========= ========= ========== 2 | name min_len max_len avg_len num_seqs 3 | =================== ========= ========= ========= ========== 4 | ./tests/inputs/1.fa 50 50 50.00 1 5 | =================== ========= ========= ========= ========== 6 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/1.fa.simple.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ------------------- --------- --------- --------- ---------- 3 | ./tests/inputs/1.fa 50 50 50.00 1 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa: -------------------------------------------------------------------------------- 1 | >SEQ0 2 | GGGCATTGCTAACGATACGTACTCTATACGCCTAATGAATAAGGAATTTTGACTAGATAGAGTCAGACTCGAGATATCC 3 | >SEQ1 4 | TCCTAAATACTATCCTGCTGTGCAAGAGCCCCCACTCCCCACTGAGAATCTCCTGTGCG 5 | >SEQ2 6 | AATTTTTAGTCCTCCATATTTTCGCGGTGCTCAACCGAATCAAGTTCGTATCACGACAC 7 | >SEQ3 8 | GCAAGGTGGTCTGTCTTGATGTAGTTCTGGCAAGGTTTATGTATCGTGTTAACCTACTTTCTCAAGGCTTCCAA 9 | >SEQ4 10 | GAGATCGACCTACCTCGAGAGGAAGCTCCGCAAAATATTTAGGGAAAGG 11 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.grid.out: -------------------------------------------------------------------------------- 1 | +---------------------+-----------+-----------+-----------+------------+ 2 | | name | min_len | max_len | avg_len | num_seqs | 3 | +=====================+===========+===========+===========+============+ 4 | | ./tests/inputs/2.fa | 49 | 79 | 64.00 | 5 | 5 | +---------------------+-----------+-----------+-----------+------------+ 6 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.latex.out: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrrrr} 2 | \hline 3 | name & min\_len & max\_len & avg\_len & num\_seqs \\ 4 | \hline 5 | ./tests/inputs/2.fa & 49 & 79 & 64.00 & 5 \\ 6 | \hline 7 | \end{tabular} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.latex_booktabs.out: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrrrr} 2 | \toprule 3 | name & min\_len & max\_len & avg\_len & num\_seqs \\ 4 | \midrule 5 | ./tests/inputs/2.fa & 49 & 79 & 64.00 & 5 \\ 6 | \bottomrule 7 | \end{tabular} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.latex_raw.out: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrrrr} 2 | \hline 3 | name & min_len & max_len & avg_len & num_seqs \\ 4 | \hline 5 | ./tests/inputs/2.fa & 49 & 79 & 64.00 & 5 \\ 6 | \hline 7 | \end{tabular} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.mediawiki.out: -------------------------------------------------------------------------------- 1 | {| class="wikitable" style="text-align: left;" 2 | |+ 3 | |- 4 | ! name !! align="right"| min_len !! align="right"| max_len !! align="right"| avg_len !! align="right"| num_seqs 5 | |- 6 | | ./tests/inputs/2.fa || align="right"| 49 || align="right"| 79 || align="right"| 64.00 || align="right"| 5 7 | |} 8 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.orgtbl.out: -------------------------------------------------------------------------------- 1 | | name | min_len | max_len | avg_len | num_seqs | 2 | |---------------------+-----------+-----------+-----------+------------| 3 | | ./tests/inputs/2.fa | 49 | 79 | 64.00 | 5 | 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ./tests/inputs/2.fa 49 79 64.00 5 3 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.pipe.out: -------------------------------------------------------------------------------- 1 | | name | min_len | max_len | avg_len | num_seqs | 2 | |:--------------------|----------:|----------:|----------:|-----------:| 3 | | ./tests/inputs/2.fa | 49 | 79 | 64.00 | 5 | 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.plain.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ./tests/inputs/2.fa 49 79 64.00 5 3 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.rst.out: -------------------------------------------------------------------------------- 1 | =================== ========= ========= ========= ========== 2 | name min_len max_len avg_len num_seqs 3 | =================== ========= ========= ========= ========== 4 | ./tests/inputs/2.fa 49 79 64.00 5 5 | =================== ========= ========= ========= ========== 6 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/2.fa.simple.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ------------------- --------- --------- --------- ---------- 3 | ./tests/inputs/2.fa 49 79 64.00 5 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/all.fa.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ./tests/inputs/1.fa 50 50 50.00 1 3 | ./tests/inputs/2.fa 49 79 64.00 5 4 | ./tests/inputs/empty.fa 0 0 0.00 0 5 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/checksums.md5: -------------------------------------------------------------------------------- 1 | c383c386a44d83c37ae287f0aa5ae11d 1.fa 2 | 863ebc53e28fdfe6689278e40992db9d 2.fa 3 | d41d8cd98f00b204e9800998ecf8427e empty.fa 4 | -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/empty.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/15_seqmagique/tests/inputs/empty.fa -------------------------------------------------------------------------------- /15_seqmagique/tests/inputs/empty.fa.out: -------------------------------------------------------------------------------- 1 | name min_len max_len avg_len num_seqs 2 | ./tests/inputs/empty.fa 0 0 0.00 0 3 | -------------------------------------------------------------------------------- /15_seqmagique/unit.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import mock_open 2 | from seqmagique import process 3 | 4 | 5 | # -------------------------------------------------- 6 | def test_process() -> None: 7 | """ Test process """ 8 | 9 | empty = process(mock_open(read_data='')()) 10 | assert empty.min_len == 0 11 | assert empty.max_len == 0 12 | assert empty.avg_len == 0 13 | assert empty.num_seqs == 0 14 | 15 | one = process(mock_open(read_data='>SEQ0\nAAA')()) 16 | assert one.min_len == 3 17 | assert one.max_len == 3 18 | assert one.avg_len == 3 19 | assert one.num_seqs == 1 20 | 21 | two = process(mock_open(read_data='>SEQ0\nAAA\n>SEQ1\nCCCC')()) 22 | assert two.min_len == 3 23 | assert two.max_len == 4 24 | assert two.avg_len == 3.5 25 | assert two.num_seqs == 2 26 | -------------------------------------------------------------------------------- /16_fastx_grep/.gitignore: -------------------------------------------------------------------------------- 1 | fastx_grep.py 2 | out.* 3 | -------------------------------------------------------------------------------- /16_fastx_grep/.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/16_fastx_grep/.out -------------------------------------------------------------------------------- /16_fastx_grep/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy fastx_grep.py tests/fastx_grep_test.py 5 | 6 | all: 7 | ../bin/all_test.py fastx_grep.py 8 | -------------------------------------------------------------------------------- /16_fastx_grep/mk-outs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -u 4 | 5 | PRG="./fastx_grep.py" 6 | EMPTY="./tests/inputs/empty.fa" 7 | LSU="./tests/inputs/lsu.fq" 8 | LSU_FA="./tests/inputs/lsu.fa" 9 | 10 | rm -f ./tests/inputs/*.out 11 | 12 | "$PRG" -o "$EMPTY.out" XXX "$EMPTY" 13 | 14 | "$PRG" -o "$LSU.upper.out" LSU "$LSU" 15 | "$PRG" -o "$LSU.lower.out" lsu "$LSU" 16 | 17 | "$PRG" -o "$LSU.i.upper.out" -i LSU "$LSU" 18 | "$PRG" -o "$LSU.i.lower.out" -i lsu "$LSU" 19 | 20 | "$PRG" -O fasta -o "$LSU.fa.out" LSU "$LSU" 21 | "$PRG" -O fasta-2line -o "$LSU.2fa.out" LSU "$LSU" 22 | 23 | echo "Done." 24 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/empty.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/16_fastx_grep/tests/inputs/empty.fa -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/empty.fa.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/16_fastx_grep/tests/inputs/empty.fa.out -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fa: -------------------------------------------------------------------------------- 1 | >ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTG 3 | AGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATT 4 | ATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATT 5 | TCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAA 6 | CTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 7 | >ITSLSUmock2p.ITS_M01384:138:000000000-C9GKM:1:1101:14440:2043 2:N:0 8 | ACCCGTCAATTTCTTTAAGTTTTAGCCTTGCGACCGTACTCCCCAGGCGGTGCACTTAGT 9 | GGTTTTCCGGCGACCCGGGCGGCGTCAGAGCCCCCCAAGTCTCGTGCACATCGTTTACGG 10 | CGTGGACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGTGCCTCAGCGTCAG 11 | TACCGGCCCAGCCACCCGTCTTCACCTTCGGCGTTCCTGTAGATATCTACGCATTTCACC 12 | GCTACACCTACAGTTCCGGTGGCGCCTACCGGCCTCAAGAAACGCAGTATGCCCAGCTAT 13 | T 14 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fq.2fa.out: -------------------------------------------------------------------------------- 1 | >ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATTATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATTTCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAACTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 3 | >ITSLSUmock2p.ITS_M01384:138:000000000-C9GKM:1:1101:14440:2043 2:N:0 4 | ACCCGTCAATTTCTTTAAGTTTTAGCCTTGCGACCGTACTCCCCAGGCGGTGCACTTAGTGGTTTTCCGGCGACCCGGGCGGCGTCAGAGCCCCCCAAGTCTCGTGCACATCGTTTACGGCGTGGACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGTGCCTCAGCGTCAGTACCGGCCCAGCCACCCGTCTTCACCTTCGGCGTTCCTGTAGATATCTACGCATTTCACCGCTACACCTACAGTTCCGGTGGCGCCTACCGGCCTCAAGAAACGCAGTATGCCCAGCTATT 5 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fq.fa.out: -------------------------------------------------------------------------------- 1 | >ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTG 3 | AGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATT 4 | ATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATT 5 | TCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAA 6 | CTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 7 | >ITSLSUmock2p.ITS_M01384:138:000000000-C9GKM:1:1101:14440:2043 2:N:0 8 | ACCCGTCAATTTCTTTAAGTTTTAGCCTTGCGACCGTACTCCCCAGGCGGTGCACTTAGT 9 | GGTTTTCCGGCGACCCGGGCGGCGTCAGAGCCCCCCAAGTCTCGTGCACATCGTTTACGG 10 | CGTGGACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGTGCCTCAGCGTCAG 11 | TACCGGCCCAGCCACCCGTCTTCACCTTCGGCGTTCCTGTAGATATCTACGCATTTCACC 12 | GCTACACCTACAGTTCCGGTGGCGCCTACCGGCCTCAAGAAACGCAGTATGCCCAGCTAT 13 | T 14 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fq.i.lower.out: -------------------------------------------------------------------------------- 1 | @ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATTATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATTTCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAACTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 3 | + 4 | EFGGGGGGGGGCGGGGGFCFFFGGGGGFGGGGGGGGGGGFGGGGGGGFGFFFCFGGFFGGGGGGGGGFGGGGFGGGDGEGGGGGGF7=9:A@FBF>DGCGFF=75C=DBCF74DFFFF*/91B>9>?9?>>:B?>F>FBB:??200:699>?AA2)34F?2))54 9 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fq.i.upper.out: -------------------------------------------------------------------------------- 1 | @ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATTATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATTTCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAACTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 3 | + 4 | EFGGGGGGGGGCGGGGGFCFFFGGGGGFGGGGGGGGGGGFGGGGGGGFGFFFCFGGFFGGGGGGGGGFGGGGFGGGDGEGGGGGGF7=9:A@FBF>DGCGFF=75C=DBCF74DFFFF*/91B>9>?9?>>:B?>F>FBB:??200:699>?AA2)34F?2))54 9 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fq.lower.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/16_fastx_grep/tests/inputs/lsu.fq.lower.out -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fq.upper.out: -------------------------------------------------------------------------------- 1 | @ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATTATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATTTCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAACTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 3 | + 4 | EFGGGGGGGGGCGGGGGFCFFFGGGGGFGGGGGGGGGGGFGGGGGGGFGFFFCFGGFFGGGGGGGGGFGGGGFGGGDGEGGGGGGF7=9:A@FBF>DGCGFF=75C=DBCF74DFFFF*/91B>9>?9?>>:B?>F>FBB:??200:699>?AA2)34F?2))54 9 | -------------------------------------------------------------------------------- /16_fastx_grep/tests/inputs/lsu.fx: -------------------------------------------------------------------------------- 1 | @ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 2 | CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATTATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATTTCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAACTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA 3 | + 4 | EFGGGGGGGGGCGGGGGFCFFFGGGGGFGGGGGGGGGGGFGGGGGGGFGFFFCFGGFFGGGGGGGGGFGGGGFGGGDG1 2 | TTGACATGGGAAGAGTAATCCATCCATGAATTTAAAAAGATTCCACCTGCCATCCCAATAGTTTCTTT 3 | -------------------------------------------------------------------------------- /17_synth/tests/inputs/CAM_SMPL_GS108.fa.n1.k5.out: -------------------------------------------------------------------------------- 1 | >1 2 | ACTTGCTGGACCTGTATCATTAATACTAAAAAATTTTTTATTTCCACCTGTAATACATCATTGTTTGT 3 | -------------------------------------------------------------------------------- /17_synth/tests/inputs/CAM_SMPL_GS108.fa.n1.m20.x40.out: -------------------------------------------------------------------------------- 1 | >1 2 | AACTATGTTTTGGTTAATATTAAAAACGAATCCGCGTT 3 | -------------------------------------------------------------------------------- /17_synth/tests/inputs/CAM_SMPL_GS108.fa.n1.out: -------------------------------------------------------------------------------- 1 | >1 2 | AACTATGTTTTGGTTAATATTAAAAACGAATCCGCGTTATCATCAAACGCTTAAAACTACCCTCGGAA 3 | -------------------------------------------------------------------------------- /17_synth/tests/inputs/lsu.fq.n1.out: -------------------------------------------------------------------------------- 1 | >1 2 | TACCTCCCACACTTAGTGGTTTTCCGGCGACCCGGGCGGCGTCAGAGCCC 3 | -------------------------------------------------------------------------------- /17_synth/tests/inputs/mult.n10.out: -------------------------------------------------------------------------------- 1 | >1 2 | TATTGCTCTTTTAGCTTGGCAAGGTATCAAGCTTTTTATTTTGTGTTAGAGACACTTCAAGCTGTTAG 3 | >2 4 | AAGGGAAATAGTCCATCCACTGTATAAGAATAATTTTAGCCAATCATTAGTGTCATAAATT 5 | >3 6 | CATGAAATTTTAGATGTAATGCTTGGTGAAAATCATAAGTTCTATGAATGATATTAGAAAGGCCATTTTC 7 | >4 8 | ATAGCACTAATAAATTAGTCTTGAAAATGGTTTGGGAACAAATGAAGAAAATCATAATGCAGCTCCCATTCCG 9 | >5 10 | TCTGTGATATCCAGTGGAGATTTAATAACTTGGCAACACGGGCATTGTATAAAA 11 | >6 12 | GAAAGAGCTGGGTTAAGTATGCAACTTGCAGCAGCCATTGTGTGGCAAATAA 13 | >7 14 | TCATTGCTTTTTGTTCCAATCTGTCAGCCCCCTGATGAATGTATCAATTTCTT 15 | >8 16 | GAGACTGCCGGTGCAAACCGNGAGGAAGGTGGGGTTAAACTTATCAAAATCTTCAATAGCGAAGAACAGATG 17 | >9 18 | AAAGTTGCTAAAACTTGTTTTCTAATCATTGTAGATTCTATAGGTTAAGTCCTTCTTCTTTAAGATATATATTT 19 | >10 20 | CTTAATCTTCCTCGAAGGAAAATATTCCATAAGCAACAAGTAAGAGACCAAAAAGAAAATATTTAACAAAACTAT 21 | -------------------------------------------------------------------------------- /17_synth/tests/unit_test.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for synth.py """ 2 | 3 | import io 4 | import random 5 | from synth import read_training, find_kmers, gen_seq 6 | 7 | 8 | # -------------------------------------------------- 9 | def test_gen_seq() -> None: 10 | """ Test gen_seq """ 11 | 12 | chain = { 13 | 'ACG': { 14 | 'T': 0.5, 15 | 'C': 0.5 16 | }, 17 | 'CGT': { 18 | 'A': 1.0 19 | }, 20 | 'GTA': { 21 | 'C': 1.0 22 | }, 23 | 'TAC': { 24 | 'G': 1.0 25 | } 26 | } 27 | 28 | state = random.getstate() 29 | random.seed(1) 30 | assert gen_seq(chain, k=4, min_len=6, max_len=12) == 'CGTACGTACG' 31 | random.seed(2) 32 | assert gen_seq(chain, k=4, min_len=5, max_len=10) == 'ACGTA' 33 | random.setstate(state) 34 | 35 | 36 | # -------------------------------------------------- 37 | def test_read_training() -> None: 38 | """ Test read_training """ 39 | 40 | f1 = io.StringIO('>1\nACGTACGC\n') 41 | assert read_training([f1], 'fasta', 4) == { 42 | 'ACG': { 43 | 'T': 0.5, 44 | 'C': 0.5 45 | }, 46 | 'CGT': { 47 | 'A': 1.0 48 | }, 49 | 'GTA': { 50 | 'C': 1.0 51 | }, 52 | 'TAC': { 53 | 'G': 1.0 54 | } 55 | } 56 | 57 | f2 = io.StringIO('@1\nACGTACGC\n+\n!!!!!!!!') 58 | assert read_training([f2], 'fastq', 5) == { 59 | 'ACGT': { 60 | 'A': 1.0 61 | }, 62 | 'CGTA': { 63 | 'C': 1.0 64 | }, 65 | 'GTAC': { 66 | 'G': 1.0 67 | }, 68 | 'TACG': { 69 | 'C': 1.0 70 | } 71 | } 72 | 73 | 74 | # -------------------------------------------------- 75 | def test_find_kmers() -> None: 76 | """ Test find_kmers """ 77 | 78 | assert find_kmers('ACTG', 2) == ['AC', 'CT', 'TG'] 79 | assert find_kmers('ACTG', 3) == ['ACT', 'CTG'] 80 | assert find_kmers('ACTG', 4) == ['ACTG'] 81 | -------------------------------------------------------------------------------- /18_fastx_sampler/.gitignore: -------------------------------------------------------------------------------- 1 | tests/inputs/n*.fa 2 | sampler.py 3 | -------------------------------------------------------------------------------- /18_fastx_sampler/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY = test 2 | SYNTH = ../17_synth/synth.py 3 | TRAIN = ../17_synth/tests/inputs/*.fa 4 | 5 | test: fasta 6 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy sampler.py tests/sampler_test.py 7 | 8 | all: 9 | ../bin/all_test.py sampler.py 10 | 11 | fasta: n1k.fa n10k.fa n100k.fa 12 | 13 | n1k.fa: 14 | $(SYNTH) -s 1 -n 1000 -m 75 -x 200 -o tests/inputs/n1k.fa $(TRAIN) 15 | 16 | n10k.fa: 17 | $(SYNTH) -s 1 -n 10000 -m 75 -x 200 -o tests/inputs/n10k.fa $(TRAIN) 18 | 19 | n100k.fa: 20 | $(SYNTH) -s 1 -n 100000 -m 75 -x 200 -o tests/inputs/n100k.fa $(TRAIN) 21 | -------------------------------------------------------------------------------- /18_fastx_sampler/requirements.txt: -------------------------------------------------------------------------------- 1 | biopython 2 | -------------------------------------------------------------------------------- /18_fastx_sampler/tests/inputs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyclark/biofx_python/65888923d35e6cf63bf0b43ecd7b51e42d261cd4/18_fastx_sampler/tests/inputs/.gitkeep -------------------------------------------------------------------------------- /19_blastomatic/.gitignore: -------------------------------------------------------------------------------- 1 | blastomatic.py 2 | -------------------------------------------------------------------------------- /19_blastomatic/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --pylint-rcfile=../pylintrc --mypy blastomatic.py tests/*_test.py 5 | 6 | all: 7 | ../bin/all_test.py blastomatic.py 8 | 9 | taxdump: 10 | wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_nucl.dmp.gz 11 | -------------------------------------------------------------------------------- /19_blastomatic/tests/unit_test.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for blastomatic """ 2 | 3 | from blastomatic import guess_delimiter 4 | 5 | 6 | # -------------------------------------------------- 7 | def test_guess_delimiter() -> None: 8 | """ Test guess_delimiter """ 9 | 10 | assert guess_delimiter('/foo/bar.csv') == ',' 11 | assert guess_delimiter('/foo/bar.txt') == '\t' 12 | assert guess_delimiter('/foo/bar.tsv') == '\t' 13 | assert guess_delimiter('/foo/bar.tab') == '\t' 14 | assert guess_delimiter('') == '\t' 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ken Youens-Clark 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | python3 -m pip install -r requirements.txt 3 | 4 | test: 5 | ./bin/uber_test.sh 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mastering Python for Bioinformatics 2 | 3 | This is the repository for the book [Mastering Python for Bioinformatics](https://learning.oreilly.com/library/view/mastering-python-for/9781098100872/) (O'Reilly, 2021, ISBN 9781098100889). 4 | 5 | See [O'Reilly's website](https://get.oreilly.com/ind_mastering-python-for-bioinformatics-ch1.html) for a free dowload of the preface and first chapter. 6 | 7 | # Author 8 | 9 | Ken Youens-Clark 10 | -------------------------------------------------------------------------------- /SETUP.md: -------------------------------------------------------------------------------- 1 | # Setup Instructions 2 | 3 | ## Mac 4 | 5 | Use Terminal or iTerm2. 6 | 7 | ## Windows 8 | 9 | ### Native shells: cmd.exe, Powershell 10 | 11 | Works if you prefix commands with "python". 12 | 13 | ### Git Bash 14 | 15 | A partial Unix-like shell that works fairly well, still need to prefix commands with "python." 16 | 17 | ### Windows Subsystem for Linux 18 | 19 | This is my recommended option. 20 | 21 | ## Author 22 | 23 | Ken Youens-Clark 24 | -------------------------------------------------------------------------------- /app01_makefiles/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .pytest_cache 3 | -------------------------------------------------------------------------------- /app01_makefiles/c-hello/.gitignore: -------------------------------------------------------------------------------- 1 | hello 2 | a.out 3 | -------------------------------------------------------------------------------- /app01_makefiles/c-hello/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | hello: clean 4 | gcc -o hello hello.c 5 | 6 | clean: 7 | rm -f hello 8 | -------------------------------------------------------------------------------- /app01_makefiles/c-hello/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main() { 3 | printf("Hello, World!\n"); 4 | return 0; 5 | } 6 | -------------------------------------------------------------------------------- /app01_makefiles/hello/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: hello 2 | 3 | hello: 4 | echo "Hello, World!" 5 | -------------------------------------------------------------------------------- /app01_makefiles/pie/.gitignore: -------------------------------------------------------------------------------- 1 | crust.txt 2 | filling.txt 3 | meringue.txt 4 | pie.txt 5 | -------------------------------------------------------------------------------- /app01_makefiles/pie/Makefile: -------------------------------------------------------------------------------- 1 | all: crust.txt filling.txt meringue.txt 2 | ./combine.sh pie.txt crust.txt filling.txt meringue.txt 3 | ./cook.sh pie.txt 375 45 4 | 5 | filling.txt: 6 | ./combine.sh filling.txt lemon butter sugar 7 | 8 | meringue.txt: 9 | ./combine.sh meringue.txt eggwhites sugar 10 | 11 | crust.txt: 12 | ./combine.sh crust.txt flour butter water 13 | 14 | clean: 15 | rm -f crust.txt meringue.txt filling.txt pie.txt 16 | -------------------------------------------------------------------------------- /app01_makefiles/pie/combine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# -gt 1 ]]; then 4 | FILE=$1 5 | shift 1 6 | echo "Will combine $@" > "$FILE" 7 | else 8 | echo "usage: $(basename "$0") FILE ingredients" 9 | fi 10 | -------------------------------------------------------------------------------- /app01_makefiles/pie/cook.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# -eq 3 ]]; then 4 | ITEM=$1 5 | TEMP=$2 6 | TIME=$3 7 | echo "Will cook \"${ITEM}\" at ${TEMP} degrees for ${TIME} minutes." 8 | else 9 | echo "usage: $(basename $0) ITEM TEMP TIME" 10 | fi 11 | -------------------------------------------------------------------------------- /app01_makefiles/yeast/.gitignore: -------------------------------------------------------------------------------- 1 | fasta 2 | chr-count 3 | chr-size 4 | gene-count 5 | verified-genes 6 | uncharacterized-genes 7 | gene-types 8 | terminated-genes 9 | SGD_features.tab 10 | -------------------------------------------------------------------------------- /app01_makefiles/yeast/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all fasta features test clean 2 | 3 | FEATURES = http://downloads.yeastgenome.org/curation/$\ 4 | chromosomal_feature/SGD_features.tab 5 | 6 | all: fasta genome chr-count chr-size features gene-count verified-genes uncharacterized-genes gene-types terminated-genes test 7 | 8 | clean: 9 | find . \( -name \*gene\* -o -name chr-\* \) -exec rm {} \; 10 | rm -rf fasta SGD_features.tab 11 | 12 | fasta: 13 | ./download.sh 14 | 15 | genome: fasta 16 | (cd fasta && cat *.fsa > genome.fa) 17 | 18 | chr-count: genome 19 | grep -e '^>' "fasta/genome.fa" | grep 'chromosome' | wc -l > chr-count 20 | 21 | chr-size: genome 22 | grep -ve '^>' "fasta/genome.fa" | wc -c > chr-size 23 | 24 | features: 25 | wget -nc $(FEATURES) 26 | 27 | gene-count: features 28 | cut -f 2 SGD_features.tab | grep ORF | wc -l > gene-count 29 | 30 | verified-genes: features 31 | awk -F"\t" '$$3 == "Verified" {print}' SGD_features.tab | \ 32 | wc -l > verified-genes 33 | 34 | uncharacterized-genes: features 35 | awk -F"\t" '$$2 == "ORF" && $$3 == "Uncharacterized" {print $$2}' \ 36 | SGD_features.tab | wc -l > uncharacterized-genes 37 | 38 | gene-types: features 39 | awk -F"\t" '{print $$3}' SGD_features.tab | sort | uniq -c > gene-types 40 | 41 | terminated-genes: 42 | grep -o '/G=[^ ]*' palinsreg.txt | cut -d = -f 2 | \ 43 | sort -u > terminated-genes 44 | 45 | test: 46 | pytest -xv ./test.py 47 | -------------------------------------------------------------------------------- /app01_makefiles/yeast/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -u 4 | 5 | OUT_DIR="fasta" 6 | [[ ! -d "$OUT_DIR" ]] && mkdir -p "$OUT_DIR" 7 | 8 | URLS=$(mktemp) 9 | echo "http://downloads.yeastgenome.org/sequence/S288C_reference/chromosomes/fasta/chrmt.fsa" > "$URLS" 10 | 11 | for i in $(seq 1 16); do 12 | printf "http://downloads.yeastgenome.org/sequence/S288C_reference/chromosomes/fasta/chr%02d.fsa\n" "$i" >> "$URLS" 13 | done 14 | 15 | cd "$OUT_DIR" 16 | wget -nc -i "$URLS" 17 | rm "$URLS" 18 | 19 | echo "Done." 20 | -------------------------------------------------------------------------------- /app01_makefiles/yeast/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """tests for yeast/Makefile exercise""" 3 | 4 | import os 5 | import re 6 | from subprocess import getstatusoutput 7 | 8 | 9 | # -------------------------------------------------- 10 | def test_files(): 11 | """files exist, have correct answers""" 12 | 13 | files = [('chr-count', '16'), ('chr-size', '12359733'), 14 | ('gene-count', '6604'), ('verified-genes', '5155'), 15 | ('uncharacterized-genes', '728')] 16 | 17 | for file, answer in files: 18 | assert os.path.isfile(file) 19 | contents = open(file).read().strip() 20 | assert contents == answer 21 | 22 | 23 | # -------------------------------------------------- 24 | def test_terminated_genes(): 25 | """terminated-genes""" 26 | 27 | file = 'terminated-genes' 28 | assert os.path.isfile(file) 29 | lines = open(file).readlines() 30 | assert len(lines) == 951 31 | 32 | 33 | # -------------------------------------------------- 34 | def test_gene_types(): 35 | """gene-types""" 36 | 37 | file = 'gene-types' 38 | assert os.path.isfile(file) 39 | 40 | expected = { 41 | 'Dubious': '717', 42 | 'Uncharacterized': '728', 43 | 'Verified': '5155', 44 | 'Verified|silenced_gene': '4', 45 | 'silenced_gene': '2', 46 | } 47 | 48 | regex = re.compile(r'^\s*(\d+)\s(.+)$') 49 | for line in open(file): 50 | match = regex.search(line) 51 | if match: 52 | num, gene_type = match.groups() 53 | if gene_type in expected: 54 | assert num == expected[gene_type] 55 | -------------------------------------------------------------------------------- /bin/all_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ Run the test suite on all solution*.py """ 3 | 4 | import argparse 5 | import os 6 | import re 7 | import shutil 8 | import sys 9 | import subprocess 10 | from subprocess import getstatusoutput 11 | from functools import partial 12 | from typing import NamedTuple 13 | 14 | 15 | class Args(NamedTuple): 16 | """ Command-line arguments """ 17 | program: str 18 | quiet: bool 19 | 20 | 21 | # -------------------------------------------------- 22 | def get_args() -> Args: 23 | """ Get command-line arguments """ 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Run the test suite on all solution*.py', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | 29 | parser.add_argument('program', metavar='prg', help='Program to test') 30 | 31 | parser.add_argument('-q', '--quiet', action='store_true', help='Be quiet') 32 | 33 | args = parser.parse_args() 34 | 35 | return Args(args.program, args.quiet) 36 | 37 | 38 | # -------------------------------------------------- 39 | def main() -> None: 40 | """ Make a jazz noise here """ 41 | 42 | args = get_args() 43 | cwd = os.getcwd() 44 | solutions = list( 45 | filter(partial(re.match, r'solution.*\.py'), os.listdir(cwd))) 46 | 47 | for solution in sorted(solutions): 48 | print(f'==> {solution} <==') 49 | shutil.copyfile(solution, os.path.join(cwd, args.program)) 50 | subprocess.run(['chmod', '+x', args.program], check=True) 51 | rv, out = getstatusoutput('make test') 52 | if rv != 0: 53 | sys.exit(out) 54 | 55 | if not args.quiet: 56 | print(out) 57 | 58 | print('Done.') 59 | 60 | 61 | # -------------------------------------------------- 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /bin/uber_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run all the tests for the entire repo 4 | 5 | set -eu -o pipefail 6 | 7 | ROOT=$(cd "$(dirname "$0")/.." && pwd) 8 | echo "ROOT $ROOT" 9 | 10 | PYLINTRC="$ROOT/.pylintrc" 11 | [[ -f "$PYLINTRC" ]] && export PYLINTRC 12 | 13 | DIRS=$(find "$ROOT" -mindepth 1 -maxdepth 1 -type d -name \[01\]\* | sort) 14 | for DIR in $DIRS; do 15 | BASE=$(basename "$DIR") 16 | echo "==> $BASE <==" 17 | cd "$DIR" 18 | [[ -f Makefile ]] && make all 19 | cd "$ROOT" 20 | done 21 | -------------------------------------------------------------------------------- /docker/Dockerfile310: -------------------------------------------------------------------------------- 1 | FROM python:3.8.6-buster 2 | RUN apt-get -y update 3 | RUN apt-get install -y git vim emacs 4 | 5 | WORKDIR /app 6 | ARG BAR=foo 7 | RUN git clone https://github.com/kyclark/biofx_python && python3 -m pip install -r /app/biofx_python/requirements.txt 8 | 9 | RUN cp /app/biofx_python/mypy.ini ~/.mypy.ini 10 | RUN cp /app/biofx_python/pylintrc ~/.pylintrc 11 | 12 | CMD ["python3", "--version"] 13 | -------------------------------------------------------------------------------- /docker/Dockerfile38: -------------------------------------------------------------------------------- 1 | FROM python:3.8.6-buster 2 | RUN apt-get -y update 3 | RUN apt-get install -y git vim emacs 4 | 5 | WORKDIR /app 6 | ARG BAR=foo 7 | RUN git clone https://github.com/kyclark/biofx_python && python3 -m pip install -r /app/biofx_python/requirements.txt 8 | 9 | RUN cp /app/biofx_python/mypy.ini ~/.mypy.ini 10 | RUN cp /app/biofx_python/pylintrc ~/.pylintrc 11 | 12 | CMD ["python3", "--version"] 13 | -------------------------------------------------------------------------------- /docker/Dockerfile390: -------------------------------------------------------------------------------- 1 | FROM python:3.9.0-buster 2 | RUN apt-get -y update 3 | RUN apt-get install -y git vim emacs 4 | 5 | WORKDIR /app 6 | ARG BAR=foo 7 | RUN git clone https://github.com/kyclark/biofx_python && python3 -m pip install -r /app/biofx_python/requirements.txt 8 | 9 | RUN cp /app/biofx_python/mypy.ini ~/.mypy.ini 10 | RUN cp /app/biofx_python/pylintrc ~/.pylintrc 11 | 12 | CMD ["python3", "--version"] 13 | -------------------------------------------------------------------------------- /docker/Dockerfile391: -------------------------------------------------------------------------------- 1 | FROM python:3.9.1-buster 2 | RUN apt-get -y update 3 | RUN apt-get install -y git vim emacs 4 | 5 | WORKDIR /app 6 | ARG BAR=foo 7 | RUN git clone https://github.com/kyclark/biofx_python && python3 -m pip install -r /app/biofx_python/requirements.txt 8 | 9 | RUN cp /app/biofx_python/mypy.ini ~/.mypy.ini 10 | RUN cp /app/biofx_python/pylintrc ~/.pylintrc 11 | 12 | CMD ["python3", "--version"] 13 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | TAG390 = kyclark/biofx_python_3.9.0:0.1.0 2 | TAG391 = kyclark/biofx_python_3.9.1:0.1.0 3 | TAG38 = kyclark/biofx_python_3.8:0.1.0 4 | TAG310 = kyclark/biofx_python_3.10:0.1.0 5 | DOCKER = sudo docker 6 | 7 | imgs: docker38 docker390 docker391 docker310 8 | 9 | docker391: 10 | $(DOCKER) build --tag=$(TAG39) -f Dockerfile391 . 11 | 12 | docker390: 13 | $(DOCKER) build --tag=$(TAG39) -f Dockerfile390 . 14 | 15 | docker38: 16 | $(DOCKER) build --tag=$(TAG38) -f Dockerfile38 . 17 | 18 | docker310: 19 | $(DOCKER) build --tag=$(TAG310) -f Dockerfile310 20 | 21 | test: test38 test39 test310 22 | 23 | test38: 24 | $(DOCKER) run $(TAG38) /app/biofx_python/bin/uber_test.sh 25 | 26 | test390: 27 | $(DOCKER) run $(TAG390) /app/biofx_python/bin/uber_test.sh 28 | 29 | test391: 30 | $(DOCKER) run $(TAG391) /app/biofx_python/bin/uber_test.sh 31 | 32 | test310: 33 | $(DOCKER) run $(TAG310) /app/biofx_python/bin/uber_test.sh 34 | 35 | shell38: 36 | $(DOCKER) run -it $(TAG38) bash 37 | 38 | shell390: 39 | $(DOCKER) run -it $(TAG390) bash 40 | 41 | shell391: 42 | $(DOCKER) run -it $(TAG391) bash 43 | 44 | shell310: 45 | $(DOCKER) run -it $(TAG310) bash 46 | 47 | push38: 48 | $(DOCKER) push $(TAG38) 49 | 50 | push390: 51 | $(DOCKER) push $(TAG390) 52 | 53 | push391: 54 | $(DOCKER) push $(TAG391) 55 | 56 | push310: 57 | $(DOCKER) push $(TAG310) 58 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Tiny Python Projects Docker 2 | 3 | If you like, you can run and test all the code using Python 3.8.3 in a Docker image: 4 | 5 | ``` 6 | $ docker pull kyclark/tiny_python_projects:0.2.0 7 | $ docker run -it --rm kyclark/tiny_python_projects:0.2.0 bash 8 | ``` 9 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | biopython 2 | black 3 | csvkit 4 | csvchk 5 | flake8 6 | graphviz 7 | iteration_utilities 8 | mypy 9 | new-py 10 | pandas 11 | pylint 12 | pytest 13 | pytest-flake8 14 | pytest-mypy 15 | pytest-pylint 16 | requests 17 | rich 18 | tabulate 19 | yapf 20 | seqmagick 21 | --------------------------------------------------------------------------------