├── bsky.sh
├── motifs.txt
├── mutate-requirements.txt
├── mutate.py
├── mythologybot.sh
├── mythologybutt.sh
├── new_motifs.txt
└── prepare-mutate.py


/bsky.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # shamelessly stolen from
 4 | # https://gist.github.com/pojntfx/72403066a96593c1ba8fd5df2b531f2d
 5 | # this takes the TWEET variable from the mythologybot.sh script and pushes it out to bsky
 6 | 
 7 | # Resolve DID for handle
 8 | DID_URL="https://bsky.social/xrpc/com.atproto.identity.resolveHandle"
 9 | export DID=$(curl -s -G \
10 |   --data-urlencode "handle=$BSKY_USERNAME" \
11 |   "$DID_URL" | jq -r .did)
12 | 
13 | # Get API key with the app password
14 | API_KEY_URL='https://bsky.social/xrpc/com.atproto.server.createSession'
15 | POST_DATA='{ "identifier": "'"${DID}"'", "password": "'"${BSKY_PASSWORD}"'" }'
16 | export API_KEY=$(curl -s -X POST \
17 |   -H 'Content-Type: application/json' \
18 |   -d "$POST_DATA" \
19 |   "$API_KEY_URL" | jq -r .accessJwt)
20 | 
21 | BSKY_STATUS=${TWEET//$'\n'/\\n}
22 | POST_FEED_URL='https://bsky.social/xrpc/com.atproto.repo.createRecord'
23 | POST_RECORD='{ "collection": "app.bsky.feed.post", "repo": "'"${DID}"'", "record": { "text": "'"${BSKY_STATUS}"'", "createdAt": "'"$(date +%Y-%m-%dT%H:%M:%S.%3NZ)"'", "$type": "app.bsky.feed.post" } }'
24 | curl -s -X POST \
25 |   -H "Authorization: Bearer ${API_KEY}" \
26 |   -H 'Content-Type: application/json' \
27 |   -d "$POST_RECORD" \
28 |   "$POST_FEED_URL" | jq -c
29 | 


--------------------------------------------------------------------------------
/mutate-requirements.txt:
--------------------------------------------------------------------------------
1 | wordfilter==0.2.6.2
2 | numpy==1.15.0
3 | spacy==2.0.12
4 | simpleneighbors==0.0.1
5 | 


--------------------------------------------------------------------------------
/mutate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os, sys, random, json, re
  4 | import datetime
  5 | import wordfilter
  6 | import spacy
  7 | from simpleneighbors import SimpleNeighbors
  8 | from random import choice, sample
  9 | 
 10 | global nlp, vocab_forest, all_motifs
 11 | nlp = None; vocab_forest = SimpleNeighbors(300); all_motifs = None
 12 | 
 13 | def populate_motifs(infile="motifs.txt"):
 14 |     global all_motifs
 15 |     with open(infile) as f:
 16 |         all_motifs = list(l.strip() for l in f.readlines())
 17 |     return all_motifs
 18 | 
 19 | def init_nlp(**kwargs):
 20 |     global nlp, vocab_forest
 21 |     nlp = nlp or spacy.load(kwargs.get('model', 'en_vectors_web_lg'))
 22 | 
 23 |     # stop words from spacy.en:
 24 |     stop_words = ['other', 'she', 'alone', 'hers', 'enough', 'becoming', 'amount', 'himself', 'such', 'sometime', 'noone', 'though', 'thereupon', 'wherever', 'will', 'now', 'therefore', 'forty', 'name', 'whom', 'often', 'unless', 'this', 'whether', 'nothing', 'well', 'along', 'from', 'on', 'should', 'hundred', 'much', 'seems', 'wherein', 'beyond', 'used', 'you', 'except', 'so', 'top', 'even', 'without', 'give', 'and', 'whoever', 'about', 'nor', 'which', 'together', 'an', 'everyone', 'below', 'itself', 'doing', 'mostly', 'many', 'else', 'already', 'elsewhere', 'whereupon', 'were', 'using', 'until', 'mine', 'made', 'nobody', 'some', 'down', 'toward', 'with', 'out', 'has', 'although', 'their', 'sixty', 'somehow', 'full', 'next', 'between', 'by', 'yourselves', 'throughout', 'few', 'own', 'hereafter', 'up', 'done', 'indeed', 'anywhere', 'then', 'latter', 'our', 'same', 'over', 're', 'not', 'regarding', 'nowhere', 'really', 'former', 'any', 'through', 'they', 'whole', 'becomes', 'around', 'yet', 'less', 'is', 'these', 'whatever', 'otherwise', 'as', 'anything', 'among', 'have', 'however', 'go', 'afterwards', 'since', 'still', 'can', 'beforehand', 'everywhere', 'why', 'seem', 'because', 'last', 'due', 'had', 'get', 'while', 'all', 'him', 'who', 'most', 'to', 'only', 'serious', 'meanwhile', 'are', 'show', 'several', 'at', 'might', 'onto', 'anyone', 'her', 'hereby', 'seemed', 'am', 'again', 'move', 'therein', 'than', 'did', 'very', 'it', 'anyhow', 'both', 'please', 'i', 'make', 'more', 'no', 'off', 'various', 'been', 'thereby', 'against', 'whence', 'third', 'there', 'ever', 'sometimes', 'every', 'take', 'we', 'say', 'each', 'also', 'what', 'me', 'us', 'anyway', 'none', 'per', 'thru', 'his', 'moreover', 'a', 'perhaps', 'how', 'yours', 'besides', 'whenever', 'empty', 'least', 'under', 'he', 'back', 'myself', 'namely', 'first', 'herself', 'into', 'someone', 'quite', 'never', 'always', 'here', 'via', 'cannot', 'must', 'ca', 'would', 'nevertheless', 'above', 'front', 'part', 'became', 'yourself', 'after', 'everything', 'your', 'somewhere', 'before', 'too', 'the', 'those', 'once', 'does', 'do', 'towards', 'could', 'keep', 'them', 'for', 'twenty', 'something', 'but', 'my', 'see', 'that', 'in', 'others', 'side', 'of', 'further', 'during', 'upon', 'behind', 'become', 'almost', 'whose', 'another', 'its', 'within', 'thereafter', 'bottom', 'whereas', 'when', 'seeming', 'just', 'either', 'put', 'or', 'call', 'being', 'be', 'fifty', 'beside', 'across', 'may', 'whereby', 'neither', 'was', 'rather', 'if', 'formerly', 'amongst', 'where', 'thus', 'ourselves', 'themselves', 'hence', 'ours']
 25 |     # custom stop words:
 26 |     stop_words += ['\'s', 'St.', 'tabu']
 27 |     for stop_word in stop_words:
 28 |         nlp.vocab[stop_word].is_stop = True
 29 | 
 30 |     print("loading up the prepared Annoy object...")
 31 |     vocab_forest = SimpleNeighbors.load('vocab_forest')
 32 | 
 33 |     return nlp
 34 | 
 35 | # Get vector for a string, which we assume is a single word.
 36 | def vector(w):
 37 |     if type(w) == str:
 38 |         vector = nlp(w)[0].vector
 39 |     elif type(w) == spacy.lexeme.Lexeme or spacy.tokens.token.Token:
 40 |         vector = w.vector
 41 |     else:
 42 |         vector = None
 43 |     return vector
 44 | 
 45 | def find_similar(target, count=20, offset=0):
 46 |     target_string = target
 47 | 
 48 |     if type(target) == str:
 49 |         target_vector = vector(target)
 50 |     elif type(target) == spacy.lexeme.Lexeme or spacy.tokens.token.Token:
 51 |         target_string = target.orth_
 52 |         target_vector = target.vector
 53 |     elif type(target) == numpy.ndarray:
 54 |         target_string = ''
 55 |         target_vector = target
 56 |     else:
 57 |         print("Invalid target for finding similar word by vector...")
 58 | 
 59 |     if vocab_forest.vec(target_string) != None:
 60 |         neighbors = vocab_forest.neighbors(target_string, offset+count)
 61 |     else:
 62 |         neighbors = vocab_forest.nearest(target_vector, offset+count)
 63 | 
 64 |     similar = list(n for n in neighbors if n.lower() != target_string.lower())[offset:offset+count]
 65 |     return similar
 66 | 
 67 | def mutation_candidates(tokens):
 68 |     return list(t for t in tokens if not (t.is_stop or t.is_punct))
 69 | 
 70 | def get_mutation_substitute(w):
 71 |     return choice(find_similar(w, offset=0, count=32))
 72 | 
 73 | def ok_to_tweet(m):
 74 |     # too long to tweet?
 75 |     if len(m) > 200:
 76 |         print("status is too long")
 77 |         return False
 78 |     # any bad words?
 79 |     elif wordfilter.blacklisted(m):
 80 |         print("found a bad word")
 81 |         return False
 82 |     else:
 83 |         return True
 84 | 
 85 | def mutate(motif, verbose=False, index=None):
 86 |     try:
 87 |         pieces      = motif.split()  # => ["A13.1.1", "Cow", "as", "creator."]
 88 |         index       = index or pieces[0]     # => "A13.1.1"
 89 |         body        = " ".join(pieces[1:])  # => "Cow as creator."
 90 |         tokens      = nlp(body)
 91 |         candidates  = mutation_candidates(tokens)
 92 | 
 93 |         if len(candidates) < 2:
 94 |             if verbose:
 95 |                 print("Motif is not a good candidate for mutation:\n\t%s" % body)
 96 |             else:
 97 |                 print(".", end=""); sys.stdout.flush();
 98 |             return None
 99 |         new_motif = body
100 | 
101 |         if verbose:
102 |             print("Finding ~similar words for %s @ %s" % (candidates, datetime.datetime.time(datetime.datetime.now())))
103 |         to_sub = list((c.orth_, get_mutation_substitute(c)) for c in candidates)
104 | 
105 |         for candidate,replacement in to_sub:
106 |             if verbose:
107 |                 print("Replacing", candidate, "with", replacement)
108 |             new_motif = re.sub(r"%s"%candidate, replacement, new_motif, count=1)
109 | 
110 |         index = index or motif.index
111 |         if ok_to_tweet(new_motif) is False:
112 |             if verbose:
113 |                 print("Mutated motif is not a good candidate to tweet:\n\t%s" % new_motif)
114 |             return None
115 |         new_motif = "%s %s" % (index, new_motif)
116 |         if not verbose:
117 |             print(".", end=""); sys.stdout.flush();
118 |         return new_motif
119 |     except:
120 |         return None
121 | 
122 | def make_new_motifs(count=1, outfile=None, wipe=False, verbose=False, everything=False, offset=None, model=None, **kwargs):
123 |     if everything:
124 |         old_motifs = all_motifs
125 |     elif offset is not None:
126 |         print("offset: %s" % offset)
127 |         print("count: %s" % count)
128 |         old_motifs = all_motifs[offset:offset+count]
129 |     else:
130 |         old_motifs = sample(all_motifs, count)
131 | 
132 |     new_motifs = list(mutate(m, verbose) for m in old_motifs)
133 | 
134 |     # remove bad motifs:
135 |     for i,m in enumerate(new_motifs):
136 |         if m is None:
137 |             old_motifs[i] = None
138 |     old_motifs = list(filter(None, old_motifs))
139 |     new_motifs = list(filter(None, new_motifs))
140 | 
141 |     transforms = list("'%s' \n\t=> '%s'" % (old_motifs[i], n) for i,n in enumerate(new_motifs) if n)
142 |     if verbose:
143 |         for t in transforms:
144 |             print(t)
145 | 
146 |     if outfile:
147 |         with open(outfile, 'w' if wipe else 'a', encoding="utf-8") as f:
148 |             f.write("\n".join(new_motifs))
149 | 
150 |     return new_motifs
151 | 
152 | if __name__ == '__main__':
153 |     import argparse
154 |     parser = argparse.ArgumentParser(description="Mutate lines using SpaCy")
155 |     parser.add_argument("-o", "--outfile", type=str, default="new_motifs.txt",
156 |             help="append to given file")
157 |     parser.add_argument("-i", "--infile", type=str, default="motifs.txt",
158 |             help="readlines from given file")
159 |     parser.add_argument("-c", "--count", type=int, default=1,
160 |             help="generate this many mutated motifs")
161 |     parser.add_argument("-s", "--start", type=int, default=None, dest='offset',
162 |             help="offset to work from in motif file")
163 |     parser.add_argument("-v", "--verbose", action="store_true",
164 |             help="verbose stdout printing")
165 |     parser.add_argument("-w", "--wipe", action="store_true",
166 |             help="wipe out contents of outfile instead of appending")
167 |     parser.add_argument("-m", "--model", type=str, default='en_vectors_web_lg',
168 |             help="which SpaCy model to load (default: en_vectors_web_lg)")
169 |     parser.add_argument("-e", "--everything", action="store_true",
170 |             help="Run through every line from infile, not a random sample.")
171 |     args = parser.parse_args()
172 | 
173 |     if args.verbose:
174 |         print("Mutating %s motifs from %s and %swriting out to %s" % ("ALL" if args.everything else args.count, args.infile, "OVER" if args.wipe else "",args.outfile))
175 |     else:
176 |         print("Mutating %s motifs..." % "ALL" if args.everything else args.count)
177 | 
178 |     all_motifs = populate_motifs(args.infile)
179 | 
180 |     print("Loading spacy parser...")
181 |     nlp = init_nlp(model=args.model)
182 | 
183 |     print("Making motifs:")
184 |     make_new_motifs(**vars(args))
185 | 


--------------------------------------------------------------------------------
/mythologybot.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | source ~/.cronrc;
3 | export TWEET=$(awk NR==$((${RANDOM} % `wc -l < motifs.txt` + 1)) motifs.txt | sed -r 's/(^\S+? )|(: |;|,|\?|!)| ([\(\"].{3,}[\)\"]) |\b(--|int?o?|about|while|he|she|they|s?o? ?that|as|who|by|when|for|witho?u?t?|at|on|and|because|but|to b?e?|from|under|has|where|how many|how long|of|or|whiche?v?e?r?)\b/\1\2\n\3\4/g');
4 | echo -en Should tweet\n $TWEET;
5 | twurl set default MythologyBot
6 | twurl '/2/tweets' --data '{"text": "'"${TWEET//$'\n'/\\n}"'"}' --header 'Content-Type: application/json' --consumer-key ${MYTHBOT_CONSUMERKEY} --consumer-secret ${MYTHBOT_CONSUMERSECRET} --access-token ${MYTHBOT_ACCESSTOKEN} --token-secret ${MYTHBOT_TOKENSECRET}
7 | curl -H "Authorization: Bearer $MYTHOLOGYBOT_MASTODON" -d "status=$TWEET" https://botsin.space/api/v1/statuses
8 | /home/boodoo/apps/mythology/bsky.sh
9 | 


--------------------------------------------------------------------------------
/mythologybutt.sh:
--------------------------------------------------------------------------------
1 | 
2 | #!/usr/bin/env sh
3 | source ~/.cronrc;
4 | TWEET=$(awk NR==$((${RANDOM} % `wc -l < new_motifs.txt` + 1)) new_motifs.txt | sed -r 's/(^\S+? )|(: |;|,|\?|!)| ([\(\"].{3,}[\)\"]) |\b(--|int?o?|about|while|he|she|they|s?o? ?that|as|who|by|when|for|witho?u?t?|at|on|and|because|but|to b?e?|from|under|has|where|of|or|whiche?v?e?r?)\b/\1\2\n\3\4/g');
5 | echo Should tweet\n $TWEET;
6 | twurl set default MythologyButt
7 | twurl tweet -d "status=$TWEET" /1.1/statuses/update.json;
8 | curl -H "Authorization: Bearer $MYTHOLOGYBUTT_MASTODON" -d "status=$TWEET" https://botsin.space/api/v1/statuses


--------------------------------------------------------------------------------
/prepare-mutate.py:
--------------------------------------------------------------------------------
 1 | def prepare(**kwargs):
 2 | 	model=kwargs['model']
 3 | 	trees=kwargs['trees']
 4 | 	outprefix=kwargs['outprefix']
 5 | 
 6 | 	from simpleneighbors import SimpleNeighbors;
 7 | 	import spacy;
 8 | 	print("Loading spacy model: %s" % model);
 9 | 	nlp = spacy.load(model);
10 | 	sim = SimpleNeighbors(300);
11 | 	print("feeding vectors into SimpleNeighbors...")
12 | 	sim.feed((w.orth_, w.vector) for w in nlp.vocab if w.has_vector)
13 | 	print("Preparing binary forest of %i trees" % trees)
14 | 	sim.build(trees)
15 | 	print("Writing to %s.annoy and %s-data.pkl..." % (outprefix, outprefix))
16 | 	sim.save(outprefix)
17 | 	print("~~~ That's it — we're done! ~~~")
18 | 	return
19 | 
20 | if __name__ == '__main__':
21 | 	import argparse
22 | 	example_text = '''examples:
23 | 
24 |  	python prepare-mutate.py -m en_vectors_web_lg -o vocab_forest -t 30 (defaults)
25 |  	python prepare-mutate.py -m en_core_web_md -o core_web_md_forest
26 |  	python prepare-mutate.py -t 10 -o quick_forest'''
27 | 
28 | 	parser = argparse.ArgumentParser(prog="prepare-mutate",
29 | 									 description="Prepare mutation script with Annoy mmap",
30 | 									 epilog=example_text,
31 | 									 formatter_class=argparse.RawDescriptionHelpFormatter)
32 | 	parser.add_argument("-m", "--model", type=str, default="en_vectors_web_lg",
33 | 			help="which spacy model to use (should have vector data)")
34 | 	parser.add_argument("-o", "--outprefix", type=str, default="vocab_forest",
35 | 			help="save Annoy index and data PKL with filenames prfefixed this way")
36 | 	parser.add_argument("-t", "--trees", type=int, default=30,
37 | 			help="how many trees in our binary forest. More trees takes longer but yields accuracy.")
38 | 	args = parser.parse_args()
39 | 
40 | 	prepare(**vars(args))
41 | 


--------------------------------------------------------------------------------