├── ts.txt ├── README.rst └── markov.py /ts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattharrison/pycon-beg-markov-2017/master/ts.txt -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======================================== 2 | Hands-on Beginning Python 3.6 Bootcamp 3 | ======================================== 4 | 5 | This repository contains the final output from the 2017 PyCon 6 | tutorial `Beginning Python Bootcamp 7 | `_. 8 | 9 | 10 | Resources 11 | ----------- 12 | 13 | * https://github.com/mattharrison/Tiny-Python-3.6-Notebook 14 | * http://setosa.io/blog/2014/07/26/markov-chains/ 15 | * http://www.gutenberg.org/ebooks/74 16 | * https://pymotw.com/3/argparse/ 17 | 18 | -------------------------------------------------------------------------------- /markov.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a docstring. Here is an example 3 | of running a Markov prediction: 4 | 5 | >>> m = Markov('ab') 6 | >>> m.predict('a') 7 | 'b' 8 | 9 | >>> m.predict('c') 10 | Traceback (most recent call last): 11 | ... 12 | KeyError 13 | 14 | >>> get_table('ab') 15 | {'a': {'b': 1}} 16 | 17 | >>> random.seed(42) 18 | >>> m = Markov('Find a city, find yourself a city to live in', 4) 19 | >>> test_predict(m, 20, 'F', 4) 20 | 'Find a city, find a c' 21 | 22 | >>> with open('ts.txt', encoding='windows_1252') as fin: 23 | ... data = fin.read() 24 | >>> m2 = Markov(data, 4) 25 | >>> test_predict(m2, 100, 'T', 4) 26 | """ 27 | 28 | 29 | import argparse 30 | import random 31 | import sys 32 | 33 | 34 | class Markov: 35 | 36 | def __init__(self, data, size=1): 37 | self.tables = [] 38 | for i in range(size): 39 | self.tables.append(get_table(data, i+1)) 40 | #self.table = get_table(data) 41 | 42 | def predict(self, data_in): 43 | table = self.tables[len(data_in) - 1] 44 | options = table.get(data_in, {}) 45 | if not options: 46 | raise KeyError() 47 | possible = '' 48 | for result, count in options.items(): 49 | possible += result*count 50 | result = random.choice(possible) 51 | return result 52 | 53 | 54 | def get_table(line, numchars=1): 55 | 56 | results = {} 57 | for i, char in enumerate(line): 58 | #print(i, char) 59 | chars = line[i:i+numchars] 60 | try: # if i == len(line): # Look before you leap 61 | out = line[i+numchars] 62 | except IndexError: 63 | # easier to ask for forgiveness than permission 64 | break 65 | char_dict = results.setdefault(chars, {}) 66 | char_dict.setdefault(out, 0) 67 | char_dict[out] += 1 68 | results[char] = char_dict 69 | return results 70 | 71 | def test_predict(m, num_chars, start, size=1): 72 | res = [start] 73 | for i in range(num_chars): 74 | let = m.predict(start) 75 | res.append(let) 76 | start = ''.join(res)[-size:] 77 | return ''.join(res) 78 | 79 | def repl(m, size=1): 80 | """ 81 | This starts a repl, provide a Markov and 82 | optional size 83 | """ 84 | while 1: 85 | txt = input(">") 86 | try: 87 | res = m.predict(txt[-size:]) 88 | except KeyError: 89 | print("Try again...") 90 | print(res) 91 | 92 | def main(args): 93 | p = argparse.ArgumentParser() 94 | p.add_argument('-f', '--file', help='Input file') 95 | p.add_argument('--encoding', help='File encoding default(utf8)', 96 | default='utf8') 97 | p.add_argument('-s', '--size', help='Size of input default(1)', 98 | default=1, type=int) 99 | p.add_argument('-t', '--test', help='run tests', action='store_true') 100 | 101 | opts = p.parse_args(args) 102 | if opts.file: 103 | with open(opts.file, encoding=opts.encoding) as fin: 104 | data = fin.read() 105 | m = Markov(data, opts.size) 106 | repl(m) 107 | if opts.test: 108 | import doctest 109 | doctest.testmod() 110 | 111 | 112 | if __name__ == '__main__': 113 | print("EXECUTED") 114 | #import doctest 115 | #doctest.testmod() 116 | main(sys.argv[1:]) 117 | else: 118 | print("IMPORTED") 119 | 120 | 121 | --------------------------------------------------------------------------------