├── bsky.sh ├── motifs.txt ├── mutate-requirements.txt ├── mutate.py ├── mythologybot.sh ├── mythologybutt.sh ├── new_motifs.txt └── prepare-mutate.py /bsky.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # shamelessly stolen from 4 | # https://gist.github.com/pojntfx/72403066a96593c1ba8fd5df2b531f2d 5 | # this takes the TWEET variable from the mythologybot.sh script and pushes it out to bsky 6 | 7 | # Resolve DID for handle 8 | DID_URL="https://bsky.social/xrpc/com.atproto.identity.resolveHandle" 9 | export DID=$(curl -s -G \ 10 | --data-urlencode "handle=$BSKY_USERNAME" \ 11 | "$DID_URL" | jq -r .did) 12 | 13 | # Get API key with the app password 14 | API_KEY_URL='https://bsky.social/xrpc/com.atproto.server.createSession' 15 | POST_DATA='{ "identifier": "'"${DID}"'", "password": "'"${BSKY_PASSWORD}"'" }' 16 | export API_KEY=$(curl -s -X POST \ 17 | -H 'Content-Type: application/json' \ 18 | -d "$POST_DATA" \ 19 | "$API_KEY_URL" | jq -r .accessJwt) 20 | 21 | BSKY_STATUS=${TWEET//$'\n'/\\n} 22 | POST_FEED_URL='https://bsky.social/xrpc/com.atproto.repo.createRecord' 23 | POST_RECORD='{ "collection": "app.bsky.feed.post", "repo": "'"${DID}"'", "record": { "text": "'"${BSKY_STATUS}"'", "createdAt": "'"$(date +%Y-%m-%dT%H:%M:%S.%3NZ)"'", "$type": "app.bsky.feed.post" } }' 24 | curl -s -X POST \ 25 | -H "Authorization: Bearer ${API_KEY}" \ 26 | -H 'Content-Type: application/json' \ 27 | -d "$POST_RECORD" \ 28 | "$POST_FEED_URL" | jq -c 29 | -------------------------------------------------------------------------------- /mutate-requirements.txt: -------------------------------------------------------------------------------- 1 | wordfilter==0.2.6.2 2 | numpy==1.15.0 3 | spacy==2.0.12 4 | simpleneighbors==0.0.1 5 | -------------------------------------------------------------------------------- /mutate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os, sys, random, json, re 4 | import datetime 5 | import wordfilter 6 | import spacy 7 | from simpleneighbors import SimpleNeighbors 8 | from random import choice, sample 9 | 10 | global nlp, vocab_forest, all_motifs 11 | nlp = None; vocab_forest = SimpleNeighbors(300); all_motifs = None 12 | 13 | def populate_motifs(infile="motifs.txt"): 14 | global all_motifs 15 | with open(infile) as f: 16 | all_motifs = list(l.strip() for l in f.readlines()) 17 | return all_motifs 18 | 19 | def init_nlp(**kwargs): 20 | global nlp, vocab_forest 21 | nlp = nlp or spacy.load(kwargs.get('model', 'en_vectors_web_lg')) 22 | 23 | # stop words from spacy.en: 24 | stop_words = ['other', 'she', 'alone', 'hers', 'enough', 'becoming', 'amount', 'himself', 'such', 'sometime', 'noone', 'though', 'thereupon', 'wherever', 'will', 'now', 'therefore', 'forty', 'name', 'whom', 'often', 'unless', 'this', 'whether', 'nothing', 'well', 'along', 'from', 'on', 'should', 'hundred', 'much', 'seems', 'wherein', 'beyond', 'used', 'you', 'except', 'so', 'top', 'even', 'without', 'give', 'and', 'whoever', 'about', 'nor', 'which', 'together', 'an', 'everyone', 'below', 'itself', 'doing', 'mostly', 'many', 'else', 'already', 'elsewhere', 'whereupon', 'were', 'using', 'until', 'mine', 'made', 'nobody', 'some', 'down', 'toward', 'with', 'out', 'has', 'although', 'their', 'sixty', 'somehow', 'full', 'next', 'between', 'by', 'yourselves', 'throughout', 'few', 'own', 'hereafter', 'up', 'done', 'indeed', 'anywhere', 'then', 'latter', 'our', 'same', 'over', 're', 'not', 'regarding', 'nowhere', 'really', 'former', 'any', 'through', 'they', 'whole', 'becomes', 'around', 'yet', 'less', 'is', 'these', 'whatever', 'otherwise', 'as', 'anything', 'among', 'have', 'however', 'go', 'afterwards', 'since', 'still', 'can', 'beforehand', 'everywhere', 'why', 'seem', 'because', 'last', 'due', 'had', 'get', 'while', 'all', 'him', 'who', 'most', 'to', 'only', 'serious', 'meanwhile', 'are', 'show', 'several', 'at', 'might', 'onto', 'anyone', 'her', 'hereby', 'seemed', 'am', 'again', 'move', 'therein', 'than', 'did', 'very', 'it', 'anyhow', 'both', 'please', 'i', 'make', 'more', 'no', 'off', 'various', 'been', 'thereby', 'against', 'whence', 'third', 'there', 'ever', 'sometimes', 'every', 'take', 'we', 'say', 'each', 'also', 'what', 'me', 'us', 'anyway', 'none', 'per', 'thru', 'his', 'moreover', 'a', 'perhaps', 'how', 'yours', 'besides', 'whenever', 'empty', 'least', 'under', 'he', 'back', 'myself', 'namely', 'first', 'herself', 'into', 'someone', 'quite', 'never', 'always', 'here', 'via', 'cannot', 'must', 'ca', 'would', 'nevertheless', 'above', 'front', 'part', 'became', 'yourself', 'after', 'everything', 'your', 'somewhere', 'before', 'too', 'the', 'those', 'once', 'does', 'do', 'towards', 'could', 'keep', 'them', 'for', 'twenty', 'something', 'but', 'my', 'see', 'that', 'in', 'others', 'side', 'of', 'further', 'during', 'upon', 'behind', 'become', 'almost', 'whose', 'another', 'its', 'within', 'thereafter', 'bottom', 'whereas', 'when', 'seeming', 'just', 'either', 'put', 'or', 'call', 'being', 'be', 'fifty', 'beside', 'across', 'may', 'whereby', 'neither', 'was', 'rather', 'if', 'formerly', 'amongst', 'where', 'thus', 'ourselves', 'themselves', 'hence', 'ours'] 25 | # custom stop words: 26 | stop_words += ['\'s', 'St.', 'tabu'] 27 | for stop_word in stop_words: 28 | nlp.vocab[stop_word].is_stop = True 29 | 30 | print("loading up the prepared Annoy object...") 31 | vocab_forest = SimpleNeighbors.load('vocab_forest') 32 | 33 | return nlp 34 | 35 | # Get vector for a string, which we assume is a single word. 36 | def vector(w): 37 | if type(w) == str: 38 | vector = nlp(w)[0].vector 39 | elif type(w) == spacy.lexeme.Lexeme or spacy.tokens.token.Token: 40 | vector = w.vector 41 | else: 42 | vector = None 43 | return vector 44 | 45 | def find_similar(target, count=20, offset=0): 46 | target_string = target 47 | 48 | if type(target) == str: 49 | target_vector = vector(target) 50 | elif type(target) == spacy.lexeme.Lexeme or spacy.tokens.token.Token: 51 | target_string = target.orth_ 52 | target_vector = target.vector 53 | elif type(target) == numpy.ndarray: 54 | target_string = '' 55 | target_vector = target 56 | else: 57 | print("Invalid target for finding similar word by vector...") 58 | 59 | if vocab_forest.vec(target_string) != None: 60 | neighbors = vocab_forest.neighbors(target_string, offset+count) 61 | else: 62 | neighbors = vocab_forest.nearest(target_vector, offset+count) 63 | 64 | similar = list(n for n in neighbors if n.lower() != target_string.lower())[offset:offset+count] 65 | return similar 66 | 67 | def mutation_candidates(tokens): 68 | return list(t for t in tokens if not (t.is_stop or t.is_punct)) 69 | 70 | def get_mutation_substitute(w): 71 | return choice(find_similar(w, offset=0, count=32)) 72 | 73 | def ok_to_tweet(m): 74 | # too long to tweet? 75 | if len(m) > 200: 76 | print("status is too long") 77 | return False 78 | # any bad words? 79 | elif wordfilter.blacklisted(m): 80 | print("found a bad word") 81 | return False 82 | else: 83 | return True 84 | 85 | def mutate(motif, verbose=False, index=None): 86 | try: 87 | pieces = motif.split() # => ["A13.1.1", "Cow", "as", "creator."] 88 | index = index or pieces[0] # => "A13.1.1" 89 | body = " ".join(pieces[1:]) # => "Cow as creator." 90 | tokens = nlp(body) 91 | candidates = mutation_candidates(tokens) 92 | 93 | if len(candidates) < 2: 94 | if verbose: 95 | print("Motif is not a good candidate for mutation:\n\t%s" % body) 96 | else: 97 | print(".", end=""); sys.stdout.flush(); 98 | return None 99 | new_motif = body 100 | 101 | if verbose: 102 | print("Finding ~similar words for %s @ %s" % (candidates, datetime.datetime.time(datetime.datetime.now()))) 103 | to_sub = list((c.orth_, get_mutation_substitute(c)) for c in candidates) 104 | 105 | for candidate,replacement in to_sub: 106 | if verbose: 107 | print("Replacing", candidate, "with", replacement) 108 | new_motif = re.sub(r"%s"%candidate, replacement, new_motif, count=1) 109 | 110 | index = index or motif.index 111 | if ok_to_tweet(new_motif) is False: 112 | if verbose: 113 | print("Mutated motif is not a good candidate to tweet:\n\t%s" % new_motif) 114 | return None 115 | new_motif = "%s %s" % (index, new_motif) 116 | if not verbose: 117 | print(".", end=""); sys.stdout.flush(); 118 | return new_motif 119 | except: 120 | return None 121 | 122 | def make_new_motifs(count=1, outfile=None, wipe=False, verbose=False, everything=False, offset=None, model=None, **kwargs): 123 | if everything: 124 | old_motifs = all_motifs 125 | elif offset is not None: 126 | print("offset: %s" % offset) 127 | print("count: %s" % count) 128 | old_motifs = all_motifs[offset:offset+count] 129 | else: 130 | old_motifs = sample(all_motifs, count) 131 | 132 | new_motifs = list(mutate(m, verbose) for m in old_motifs) 133 | 134 | # remove bad motifs: 135 | for i,m in enumerate(new_motifs): 136 | if m is None: 137 | old_motifs[i] = None 138 | old_motifs = list(filter(None, old_motifs)) 139 | new_motifs = list(filter(None, new_motifs)) 140 | 141 | transforms = list("'%s' \n\t=> '%s'" % (old_motifs[i], n) for i,n in enumerate(new_motifs) if n) 142 | if verbose: 143 | for t in transforms: 144 | print(t) 145 | 146 | if outfile: 147 | with open(outfile, 'w' if wipe else 'a', encoding="utf-8") as f: 148 | f.write("\n".join(new_motifs)) 149 | 150 | return new_motifs 151 | 152 | if __name__ == '__main__': 153 | import argparse 154 | parser = argparse.ArgumentParser(description="Mutate lines using SpaCy") 155 | parser.add_argument("-o", "--outfile", type=str, default="new_motifs.txt", 156 | help="append to given file") 157 | parser.add_argument("-i", "--infile", type=str, default="motifs.txt", 158 | help="readlines from given file") 159 | parser.add_argument("-c", "--count", type=int, default=1, 160 | help="generate this many mutated motifs") 161 | parser.add_argument("-s", "--start", type=int, default=None, dest='offset', 162 | help="offset to work from in motif file") 163 | parser.add_argument("-v", "--verbose", action="store_true", 164 | help="verbose stdout printing") 165 | parser.add_argument("-w", "--wipe", action="store_true", 166 | help="wipe out contents of outfile instead of appending") 167 | parser.add_argument("-m", "--model", type=str, default='en_vectors_web_lg', 168 | help="which SpaCy model to load (default: en_vectors_web_lg)") 169 | parser.add_argument("-e", "--everything", action="store_true", 170 | help="Run through every line from infile, not a random sample.") 171 | args = parser.parse_args() 172 | 173 | if args.verbose: 174 | print("Mutating %s motifs from %s and %swriting out to %s" % ("ALL" if args.everything else args.count, args.infile, "OVER" if args.wipe else "",args.outfile)) 175 | else: 176 | print("Mutating %s motifs..." % "ALL" if args.everything else args.count) 177 | 178 | all_motifs = populate_motifs(args.infile) 179 | 180 | print("Loading spacy parser...") 181 | nlp = init_nlp(model=args.model) 182 | 183 | print("Making motifs:") 184 | make_new_motifs(**vars(args)) 185 | -------------------------------------------------------------------------------- /mythologybot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | source ~/.cronrc; 3 | export TWEET=$(awk NR==$((${RANDOM} % `wc -l < motifs.txt` + 1)) motifs.txt | sed -r 's/(^\S+? )|(: |;|,|\?|!)| ([\(\"].{3,}[\)\"]) |\b(--|int?o?|about|while|he|she|they|s?o? ?that|as|who|by|when|for|witho?u?t?|at|on|and|because|but|to b?e?|from|under|has|where|how many|how long|of|or|whiche?v?e?r?)\b/\1\2\n\3\4/g'); 4 | echo -en Should tweet\n $TWEET; 5 | twurl set default MythologyBot 6 | twurl '/2/tweets' --data '{"text": "'"${TWEET//$'\n'/\\n}"'"}' --header 'Content-Type: application/json' --consumer-key ${MYTHBOT_CONSUMERKEY} --consumer-secret ${MYTHBOT_CONSUMERSECRET} --access-token ${MYTHBOT_ACCESSTOKEN} --token-secret ${MYTHBOT_TOKENSECRET} 7 | curl -H "Authorization: Bearer $MYTHOLOGYBOT_MASTODON" -d "status=$TWEET" https://botsin.space/api/v1/statuses 8 | /home/boodoo/apps/mythology/bsky.sh 9 | -------------------------------------------------------------------------------- /mythologybutt.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env sh 3 | source ~/.cronrc; 4 | TWEET=$(awk NR==$((${RANDOM} % `wc -l < new_motifs.txt` + 1)) new_motifs.txt | sed -r 's/(^\S+? )|(: |;|,|\?|!)| ([\(\"].{3,}[\)\"]) |\b(--|int?o?|about|while|he|she|they|s?o? ?that|as|who|by|when|for|witho?u?t?|at|on|and|because|but|to b?e?|from|under|has|where|of|or|whiche?v?e?r?)\b/\1\2\n\3\4/g'); 5 | echo Should tweet\n $TWEET; 6 | twurl set default MythologyButt 7 | twurl tweet -d "status=$TWEET" /1.1/statuses/update.json; 8 | curl -H "Authorization: Bearer $MYTHOLOGYBUTT_MASTODON" -d "status=$TWEET" https://botsin.space/api/v1/statuses -------------------------------------------------------------------------------- /prepare-mutate.py: -------------------------------------------------------------------------------- 1 | def prepare(**kwargs): 2 | model=kwargs['model'] 3 | trees=kwargs['trees'] 4 | outprefix=kwargs['outprefix'] 5 | 6 | from simpleneighbors import SimpleNeighbors; 7 | import spacy; 8 | print("Loading spacy model: %s" % model); 9 | nlp = spacy.load(model); 10 | sim = SimpleNeighbors(300); 11 | print("feeding vectors into SimpleNeighbors...") 12 | sim.feed((w.orth_, w.vector) for w in nlp.vocab if w.has_vector) 13 | print("Preparing binary forest of %i trees" % trees) 14 | sim.build(trees) 15 | print("Writing to %s.annoy and %s-data.pkl..." % (outprefix, outprefix)) 16 | sim.save(outprefix) 17 | print("~~~ That's it — we're done! ~~~") 18 | return 19 | 20 | if __name__ == '__main__': 21 | import argparse 22 | example_text = '''examples: 23 | 24 | python prepare-mutate.py -m en_vectors_web_lg -o vocab_forest -t 30 (defaults) 25 | python prepare-mutate.py -m en_core_web_md -o core_web_md_forest 26 | python prepare-mutate.py -t 10 -o quick_forest''' 27 | 28 | parser = argparse.ArgumentParser(prog="prepare-mutate", 29 | description="Prepare mutation script with Annoy mmap", 30 | epilog=example_text, 31 | formatter_class=argparse.RawDescriptionHelpFormatter) 32 | parser.add_argument("-m", "--model", type=str, default="en_vectors_web_lg", 33 | help="which spacy model to use (should have vector data)") 34 | parser.add_argument("-o", "--outprefix", type=str, default="vocab_forest", 35 | help="save Annoy index and data PKL with filenames prfefixed this way") 36 | parser.add_argument("-t", "--trees", type=int, default=30, 37 | help="how many trees in our binary forest. More trees takes longer but yields accuracy.") 38 | args = parser.parse_args() 39 | 40 | prepare(**vars(args)) 41 | --------------------------------------------------------------------------------