├── .gitignore ├── mboxes └── download.py ├── tipsum.py ├── markov.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | mboxes/*.txt 4 | -------------------------------------------------------------------------------- /mboxes/download.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import subprocess 3 | 4 | months = 'April May June July August September October November December'.split() 5 | years = range(2013, 2006, -1) 6 | 7 | for month, year in itertools.product(months, years): 8 | subprocess.call(['wget', 9 | 'http://lists.idyll.org/pipermail/testing-in-python/%s-%s.txt.gz' % (year, month)]) 10 | -------------------------------------------------------------------------------- /tipsum.py: -------------------------------------------------------------------------------- 1 | import mailbox 2 | import re 3 | import os 4 | 5 | import markov 6 | 7 | 8 | WORD_RE = r'^[a-zA-Z][a-zA-Z-\']*$' 9 | 10 | 11 | def words(body): 12 | for word in body.split(): 13 | word = word.strip(' ,.[]') 14 | if re.match(WORD_RE, word): 15 | yield word 16 | 17 | 18 | def main(): 19 | mkv = markov.make_markov() 20 | mboxes = os.listdir('mboxes') 21 | for mbox_file in mboxes: 22 | mbox = mailbox.mbox('mboxes/' + mbox_file) 23 | for message in mbox: 24 | markov.feed_markov(mkv, words(message.get_payload())) 25 | 26 | w = markov.walk_markov(mkv) 27 | for i in xrange(500): 28 | print w.next(), 29 | 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /markov.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import random 3 | 4 | 5 | def make_markov(): 6 | return collections.defaultdict(lambda: collections.defaultdict(int)) 7 | 8 | 9 | def feed_markov(markov, words): 10 | last_word = None 11 | for word in words: 12 | if last_word is None: 13 | last_word = word 14 | continue 15 | markov[last_word.lower()][word.lower()] += 1 16 | markov[last_word.lower()][None] += 1 17 | last_word = word 18 | return markov 19 | 20 | 21 | def walk_markov(markov): 22 | current = random.choice(markov.keys()) 23 | while True: 24 | yield current 25 | current = _step_markov(markov[current.lower()]) 26 | if current is None: 27 | current = random.choice(markov.keys()) 28 | 29 | 30 | def _step_markov(word_counts): 31 | total = word_counts[None] 32 | lucky = random.randint(0, total) 33 | for word, val in word_counts.iteritems(): 34 | if word is None: 35 | continue 36 | lucky -= val 37 | if lucky <= 0: 38 | return word 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Implementation of lorem ipsum using the [testing-in-python][] mailing list as a 2 | corpus text. 3 | 4 | [testing-in-python]: http://lists.idyll.org/listinfo/testing-in-python 5 | 6 | $ cd mboxes/ 7 | $ python download.py 8 | $ cd ../ 9 | $ python tipsum.py 10 | 11 | ## Example 12 | 13 | > weigand you expect that http error before shooting yourself and not a 14 | > reusable a dummy string on that all conference wide web status codes and new 15 | > piece of this yes i find module level directory but it did you don't want to 16 | > relicense the original message back to use in function undeclared use in 17 | > something not a way it can has been discussing stuff aiui the above have you 18 | > talk on the correctness true now we go into the dotted object using coverage 19 | > script made a fair enough code page without having to how it's intentionally 20 | > with side effect am andrew dalke hi all of it a public interface at why yet 21 | > told this undeclared use in many problems we have postgresql database 22 | > managing a python and core-python testing universe with the notation for 23 | > member in something not a structure or that approach to interoperate and i 24 | > know any and stores the road to add it can try to do you want to the test is 25 | > the python world theory behind that will give meaningful bug checkrow action 26 | > taken that setup i run from the issue here it's possible but i can't teach 27 | > the expected however the test libraries require added the dsl but when it 28 | > seams almost nothing sensitive about it is nice too different differently the 29 | > changes until i can succeed since you could change the file line in something 30 | > not a microcode controlling the latest release every time i get and in 31 | > function undeclared use in function undeclared use in something not a side of 32 | > this undeclared use in something not a structure or union syntax error before 33 | > token syntax error before token request for called during tests with all 34 | > thanks a structure or union request for an html attachment was scrubbed not a 35 | > bit clunky constraint that you could happen sooner than that only sprang into 36 | > nose's pattern which is of getting the variants will thanks benji york at top 37 | > syntax error before token syntax error before token in a good at pm michael 38 | > foord tdd and i'm not automated tests test plugins from the slow i need to 39 | > pluck out here's all the filter specifies what i don't think it seems i will 40 | > break broken web apps the behaviour although i believe this at hey there a 41 | > unit tests it seems to enumerate the first one of time outlining some mucking 42 | > internal methods of info on and reading this makes pointer at least python 43 | > and providing such frameworks would suppress the other def foo as long as it 44 | > seems like a bunch of the ben finney at the error before any ratio between 45 | > all active discussion with python our teams use in currently doing is the 46 | > testing project tree but i then i started regards bhargav next part url echo 47 | > server and acceptable use in doubt there's probably a structure or i can 48 | > think about oe jskit contains the content management wants a structure or 49 | > union request for the same thing i would also considering 50 | --------------------------------------------------------------------------------