├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── abbrase.c
├── abbrase.js
├── abbrase.py
├── abbrase_test.py
├── digest.py
├── forkme.png
├── generate_word.py
├── groupby.c
├── index.html
├── trigrams.json
├── wordgen.c
├── wordgen.py
└── wordlist_bigrams.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | groupby
3 | *.pyc
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014 Ryan Hitchman
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 
21 | NGram Data (c) 2009 Google
22 | 
23 | wordlist_bigrams.txt is derived from the "English One Million" dataset,
24 | an is licensed under the Creative Commons Attribution 3.0 Unported License.
25 | http://storage.googleapis.com/books/ngrams/books/datasetsv2.html
26 | http://creativecommons.org/licenses/by/3.0/
27 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: abbrase wordlist_bigrams.txt wordgen
 2 | 
 3 | PREFIX?=/usr/local
 4 | datadir?=${PREFIX}/share/abbrase
 5 | CFLAGS=-Wall -Wextra -Os -DDATADIR=${datadir}
 6 | 
 7 | CORPUS_EXEMPLAR=googlebooks-eng-1M-2gram-20090715-99.csv.zip
 8 | 
 9 | data/${CORPUS_EXEMPLAR}:
10 | 	mkdir -p data
11 | 	cd data
12 | 	gsutil && gsutil -m cp -n gs://books/ngrams/books/googlebooks-eng-1M-1gram-20090715-* . \
13 | 		&& gsutil -m cp -n gs://books/ngrams/books/googlebooks-eng-1M-2gram-20090715-* .
14 | 	gsutil || (curl -O -C - 'http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-1M-1gram-20090715-[0-9].csv.zip' \
15 | 		&& curl -O -C - 'http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-1M-2gram-20090715-[0-99].csv.zip')
16 | 
17 | 
18 | # the ngrams data is 'mostly sorted' -- lines tend to be in order, but it occasionally restarts
19 | # do a groupby (join records from different years into one) to reduce the data volume, then final sort+groupby
20 | data/1gram.csv.gz: | data/${CORPUS_EXEMPLAR} groupby
21 | 	zcat data/googlebooks-eng-1M-1gram-*.csv.zip | pv | ./groupby 3 | LC_ALL=c sort | ./groupby 2 | gzip -9 > $@
22 | 
23 | data/2gram.csv.gz: | data/${CORPUS_EXEMPLAR} groupby
24 | 	zcat data/googlebooks-eng-1M-2gram-*.csv.zip | pv | ./groupby 3 | LC_ALL=c sort | ./groupby 2 | gzip -9 > $@
25 | 
26 | # extract the 100,000 most common words
27 | data/1gram_common.csv: data/1gram.csv.gz
28 | 	zcat $< | sort -rgk2 | head -n 100000 > $@
29 | 
30 | data/prefixes.txt: data/1gram_common.csv
31 | 	cat $< | sed 's/^\(...\).*\t/\1\t/' | grep '^[a-z]\{3\}' | LC_ALL=c sort | ./groupby 2 | sort -rgk2 | grep -v iii | head -n 1024 > $@
32 | 
33 | wordlist_bigrams.txt:
34 | 	# relies on data/prefixes.txt data/2gram.csv.gz,
35 | 	# but I don't know how to tell Make to only generate those if
36 | 	# this target is missing
37 | 	pypy3 digest.py
38 | 
39 | clean:
40 | 	rm -f abbrase wordgen
41 | 
42 | install: all
43 | 	install -d $(DESTDIR)$(PREFIX)/bin $(DESTDIR)$(datadir)/
44 | 	install abbrase $(DESTDIR)$(PREFIX)/bin
45 | 	install wordgen $(DESTDIR)$(PREFIX)/bin
46 | 	install wordlist_bigrams.txt $(DESTDIR)$(datadir)/
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Abbrase
 2 | 
 3 | Abbrase is an abbreviated passphrase generator. An abbrase is one of the passwords it produces. It generates a password and a phrase like "phyeigdolrejutt" and "physical eight dollars rejected utterly".
 4 | 
 5 | Try the [web version](https://rmmh.github.io/abbrase).
 6 | 
 7 | Creating secure passwords is easy. Remembering them is hard. [Pwgen](http://sourceforge.net/projects/pwgen/) makes them memorable though prounouncability. XKCD [suggests](http://xkcd.com/936/) using a series of random common words, but memorizing series of unrelated words can be difficult, and typing long phrases can be tedious.
 8 | 
 9 | Abbrase is an experiment in generating probable phrases using markov chains, and abbreviating each word to the first few letters. This strikes a balance between excessive password length and excessive mnemonic length. Passwords generated by Abbrase are as secure as a number with the same length. "122079103" and "toldulbal" (tolerably dull ball) are equally hard to attack.
10 | 
11 | The wordlist and bigram graph are generated with data from Google NGrams [dataset](http://storage.googleapis.com/books/ngrams/books/datasetsv2.html), used under the [Creative Commons Attribution 3.0 Unported License](http://creativecommons.org/licenses/by/3.0/).
12 | 
13 | ## Theory
14 | 
15 | Language is the most information-dense thing people memorize. Brains don't operate on bits.
16 | 
17 | Pi recitation record-holders don't have thousands of digits in their minds. They map clusters of digits to far more mentally palatable words, memorizing a long story instead of a sequence of digits.
18 | 
19 | Memorizing a grammatically-sensible sentence fragment is easier than a sequence of randomly chosen words.
20 | 
21 | Picking a favorite phrase from the ones generated by Abbrase could make them very slightly easier to attack. A sophisticated attacker could check passwords that are likely to be picked before others. If the attacker can perfectly model which passwords you would prefer, this reduces the security of your password in a proportional amount to the number of passwords you selected it from -- if you picked from 32 passwords generated by abbrase, it makes your password 32x easier to attack (5 bits of security lost).
22 | 
23 | ## Building
24 | 
25 |     git clone https://github.com/rmmh/abbrase.git
26 |     cd abbrase
27 |     make
28 |     ./abbrase
29 | 
30 | The abbrase executable can optionally be supplied with `length` (a number), `count` (a number), and `hook` (a word).
31 | 
32 | ## FAQ
33 | 
34 | *Q:* Isn't using a phrase more secure than abbreviating it?
35 | 
36 | *A:* Not at all for phrases Abbrase generates. Displayed phrases are generated deterministically from the password, so they have no added security. Otherwise yes, 4 words have more security than 4 abbreviated words, but they're less convenient to type, and the added characters aren't as valuable as the first few characters.
37 | 
38 | ## Sample output
39 | 
40 | (don't actually use any of these passwords!)
41 | 
42 |     Generating 32 passwords with 50 bits of entropy
43 |     Password           Phrase
44 |     ---------------    --------------------
45 |     fibpsygotetemol    fibres psychology got eternal molecules
46 |     crohismacwrosiz    cross his machine wrong size
47 |     haswrigooalsarm    has written good also army
48 |     livfoctelduerow    living focus telescope due row
49 |     achtexmeaimiela    achieved text means imitation elaborate
50 |     poiatmengdocpea    point atmospheric engine doctor pearson
51 |     phosinwayladpha    photographs since way laden phagocytes
52 |     henraptopsawgif    henry rapidly top saw gifford
53 |     sednaravameaago    sed narrative available means agony
54 |     roomecgammedgoo    room mechanical game medical good
55 |     iniglouncmomkey    initial gloomy uncomfortable moment key
56 |     taswirhalpetgue    task wire hall peter guessed
57 |     nutdauliemesaba    nutrition daughter lie messenians abandoned
58 |     vanashairnumedi    van ashamed air number edited
59 |     groiniyarcampec    group initial yards came peculiarly
60 |     putmetvilburbor    put methodist village burnt borax
61 |     iniporadaneradh    initial portion adapted nerves adhere
62 |     liqpriproreapeo    liquid principle process reason people
63 |     nodnotmanpetedi    nodded not man peters edition
64 |     negfaslawelsbec    negative fashion law else because
65 |     nosyesroosoiask    nose yes room soissons asked
66 |     somextdiestumea    some extent die study means
67 |     vietumunrenapop    view tumultuous unrest enable popular
68 |     phyeigdolrejutt    physical eight dollars rejected utterly
69 |     somtoptexguibri    some top text guides bring
70 |     beyhabtoldulbal    beyond habit tolerably dull ball
71 |     timsirskyeldske    time sir sky elder sketch
72 |     bigownupoavolak    big own upon avon lake
73 |     phinowdirunalac    philosophy now direction unavoidable lack
74 |     darexppotegggat    dark experience potential eggs gathered
75 |     floalibadwhyour    flow alike bad why our
76 |     darmanfirpopnoi    dark man first popular noise
77 | 


--------------------------------------------------------------------------------
/abbrase.c:
--------------------------------------------------------------------------------
  1 | #include <ctype.h>
  2 | #include <err.h>
  3 | #include <errno.h>
  4 | #include <fcntl.h>
  5 | #include <libgen.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <string.h>
  9 | #include <unistd.h>
 10 | 
 11 | #define MAX_PREFIXES 1024
 12 | #define PREFIX_LEN 3
 13 | 
 14 | struct IntVec {
 15 |   int len;
 16 |   int cap;
 17 |   int *data;
 18 | };
 19 | 
 20 | struct IntVec *intvec_alloc() {
 21 |   struct IntVec *vec = malloc(sizeof *vec);
 22 |   vec->len = 0;
 23 |   vec->cap = 1;
 24 |   vec->data = malloc(sizeof(int) * vec->cap);
 25 |   return vec;
 26 | }
 27 | 
 28 | void intvec_free(struct IntVec *vec) {
 29 |   free(vec->data);
 30 |   free(vec);
 31 | }
 32 | 
 33 | void intvec_append(struct IntVec *vec, int val) {
 34 |   if (vec->len == vec->cap) {
 35 |     vec->cap *= 2;
 36 |     vec->data = realloc(vec->data, sizeof(int) * vec->cap);
 37 |   }
 38 |   vec->data[vec->len++] = val;
 39 | }
 40 | 
 41 | int intvec_get(struct IntVec *vec, int pos) {
 42 |   if (pos < 0 || pos >= vec->len)
 43 |     err(10, "invalid vector index %d not in [0, %d)", pos, vec->len);
 44 |   return vec->data[pos];
 45 | }
 46 | 
 47 | struct IntVec *intvec_copy(struct IntVec *vec) {
 48 |   /* could be faster, but no need to optimize */
 49 |   struct IntVec *ret = intvec_alloc();
 50 |   int i;
 51 |   for (i = 0; i < vec->len; i++)
 52 |     intvec_append(ret, vec->data[i]);
 53 |   return ret;
 54 | }
 55 | 
 56 | void intvec_print(struct IntVec *vec) {
 57 |   int i;
 58 |   printf("[");
 59 |   for (i = 0; i < vec->len; i++) {
 60 |     if (i != 0)
 61 |       printf(", ");
 62 |     printf("%d", vec->data[i]);
 63 |   }
 64 |   printf("]");
 65 | }
 66 | 
 67 | /* return a new IntVec with the elements in common between a and b.
 68 |    Requires a and b to be sorted. */
 69 | struct IntVec *intvec_intersect(struct IntVec *a, struct IntVec *b) {
 70 |   struct IntVec *ret = intvec_alloc();
 71 |   int ai = 0, bi = 0;
 72 |   while (ai < a->len && bi < b->len) {
 73 |     int diff = a->data[ai] - b->data[bi];
 74 |     if (diff == 0) {
 75 |       intvec_append(ret, a->data[ai]);
 76 |       ai++, bi++;
 77 |     } else if (diff < 0) {
 78 |       ai++;
 79 |     } else if (diff > 0) {
 80 |       bi++;
 81 |     }
 82 |   }
 83 |   return ret;
 84 | }
 85 | 
 86 | struct Prefix {
 87 |   char prefix[PREFIX_LEN];
 88 |   struct IntVec *words;
 89 | };
 90 | 
 91 | int prefix_cmp(const void *a, const void *b) {
 92 |   return memcmp(((const struct Prefix*)a)->prefix,
 93 |                 ((const struct Prefix*)b)->prefix, PREFIX_LEN);
 94 | }
 95 | 
 96 | struct WordGraph {
 97 |   int n_words;
 98 |   int n_prefixes;
 99 |   char **words;
100 |   char **followers_compressed;
101 |   struct Prefix prefixes[MAX_PREFIXES];
102 | };
103 | 
104 | void getline_trimmed(char **target, FILE *stream) {
105 |   size_t n, len;
106 |   n = 0;
107 |   *target = NULL;
108 |   if (getline(target, &n, stream) == -1)
109 |     err(1, "corrupted wordgraph file");
110 |   len = strlen(*target);
111 |   if ((*target)[len - 1] == '\n')
112 |     (*target)[len - 1] = 0;
113 | }
114 | 
115 | struct WordGraph *wordgraph_init(FILE *graph_file) {
116 |   int i, j;
117 |   struct WordGraph *g = malloc(sizeof *g);
118 |   if (fscanf(graph_file, "%d ", &g->n_words) != 1)
119 |     err(1, "corrupted wordgraph file");
120 |   g->n_prefixes = 0;
121 |   g->words = calloc(g->n_words, sizeof g->words[0]);
122 |   g->followers_compressed = calloc(g->n_words, sizeof g->words[0]);
123 |   for (i = 1; i < g->n_words; i++) {
124 |     getline_trimmed(&g->words[i], graph_file);
125 |     /* extract lowercase prefix */
126 |     char prefix[PREFIX_LEN];
127 |     for (j = 0; j < PREFIX_LEN; j++)
128 |         prefix[j] = tolower(g->words[i][j]);
129 |     /* add word to a prefix group */
130 |     for (j = 0; j <= g->n_prefixes; ++j) {
131 |       if (j == g->n_prefixes) {
132 |         /* none found, need to insert */
133 |         if (g->n_prefixes == MAX_PREFIXES)
134 |           errx(2, "corrupted wordgraph file: too many prefixes");
135 |         g->n_prefixes++;
136 |         memcpy(g->prefixes[j].prefix, prefix, PREFIX_LEN);
137 |         g->prefixes[j].words = intvec_alloc();
138 |       }
139 |       if (!memcmp(g->prefixes[j].prefix, prefix, PREFIX_LEN)) {
140 |         intvec_append(g->prefixes[j].words, i);
141 |         break;
142 |       }
143 |     }
144 |   }
145 |   /* sort prefixes (so passwords can be enumerated in order) */
146 |   qsort(g->prefixes, g->n_prefixes, sizeof(struct Prefix), prefix_cmp);
147 | 
148 |   if (g->n_prefixes != MAX_PREFIXES)
149 |     errx(3, "corrupted wordgraph file: not enough prefixes");
150 |   for (i = 0; i < g->n_words; i++)
151 |     getline_trimmed(&g->followers_compressed[i], graph_file);
152 |   return g;
153 | }
154 | 
155 | void wordgraph_free(struct WordGraph *g) {
156 |   int i;
157 |   for (i = 0; i < g->n_words; i++) {
158 |     free(g->words[i]);
159 |     free(g->followers_compressed[i]);
160 |   }
161 |   for (i = 0; i < g->n_prefixes; i++) {
162 |     intvec_free(g->prefixes[i].words);
163 |   }
164 |   free(g->words);
165 |   free(g->followers_compressed);
166 |   free(g);
167 | }
168 | 
169 | /* decode an adjacency list encoded as a string */
170 | struct IntVec *decode(char *enc) {
171 |   /*
172 |   general encoding steps:
173 |   input: [1, 2, 3, 5, 80]
174 |   subtract previous value: [1, 1, 1, 2, 75]
175 |   subtract 1: [0, 0, 0, 1, 74]
176 |   contract runs of zeros: [0x3, 1, 74]
177 |   printably encode numbers as base-32 varints,
178 |   and runs of zeros as the 31 leftover characters:
179 |   output: "bA*B"
180 | 
181 |   this function reverses the steps.
182 | 
183 |   Cf. decode in digest.py
184 |   */
185 |   int enc_ind = 0;
186 |   struct IntVec *dec = intvec_alloc();
187 |   int last_num = 0;
188 |   int zero_run = 0;
189 |   while (enc[enc_ind] || zero_run) {
190 |     int delta = 0;
191 |     int delta_ind = 0;
192 |     if (zero_run)
193 |       zero_run--;
194 |     else {
195 |       unsigned char val = enc[enc_ind];
196 |       if (val >= 0x60) {
197 |         zero_run = enc[enc_ind] & 0x1f;
198 |         delta_ind++;
199 |       } else {
200 |         /* decode base-32 varint */
201 |         do {
202 |           val = enc[enc_ind + delta_ind];
203 |           delta |= (val & 0x1f) << (5 * delta_ind);
204 |           delta_ind++;
205 |         } while (val & 0x20);
206 |       }
207 |     }
208 |     enc_ind += delta_ind;
209 |     last_num += delta + 1;
210 |     intvec_append(dec, last_num);
211 |   }
212 |   return dec;
213 | }
214 | 
215 | void wordgraph_dump(struct WordGraph *g, int a, int b) {
216 |   int i;
217 |   for (i = a; i < b; i++) {
218 |     printf("#%d: %s: %.30s ", i, g->words[i], g->followers_compressed[i]);
219 |     struct IntVec *followers = decode(g->followers_compressed[i]);
220 |     intvec_print(followers);
221 |     intvec_free(followers);
222 |     printf("\n");
223 |   }
224 | }
225 | 
226 | static int min(int a, int b) {
227 |   if (a <= b)
228 |     return a;
229 |   return b;
230 | }
231 | 
232 | int edit_distance(const char *a, const char *b) {
233 |   // code based off http://hetland.org/coding/python/levenshtein.py
234 | 
235 |   int n = strlen(a), m = strlen(b);
236 | 
237 |   if (n > m) {
238 |     // ensure n <= m, to use O(min(n,m)) space
239 |     const char *tmp_s = a;
240 |     a = b;
241 |     b = tmp_s;
242 |     int tmp_i = n;
243 |     n = m;
244 |     m = tmp_i;
245 |   }
246 | 
247 |   int cost[n + 1];
248 | 
249 |   int i, j;
250 |   int ins, del, sub;
251 |   int prevdiag; // lets us store only one row + one cell at a time
252 | 
253 |   const int insert_cost = 1;
254 |   const int gap_cost = 1;
255 |   const int mismatch_cost = 1;
256 | 
257 |   for (i = 0; i < n + 1; ++i)
258 |     cost[i] = i * insert_cost;
259 | 
260 |   for (i = 1; i < m + 1; ++i) {
261 |     prevdiag = cost[0];
262 |     cost[0] = i * gap_cost;
263 | 
264 |     for (j = 1; j < n + 1; ++j) {
265 |       ins = cost[j] + gap_cost;
266 |       del = cost[j - 1] + gap_cost;
267 |       sub = prevdiag;
268 |       if (a[j - 1] != b[i - 1])
269 |         sub += mismatch_cost;
270 |       prevdiag = cost[j];
271 |       cost[j] = min(ins, min(del, sub));
272 |     }
273 |   }
274 | 
275 |   return cost[n];
276 | }
277 | 
278 | /* find the closest word to the input */
279 | int wordgraph_find_word(struct WordGraph *g, const char *word) {
280 |   int i, best_word = 0, best_dist = 10000;
281 |   for (i = 1; i < g->n_words; i++) {
282 |     int dist = edit_distance(word, g->words[i]);
283 |     if (dist < best_dist) {
284 |       best_dist = dist;
285 |       best_word = i;
286 |     }
287 |   }
288 |   return best_word;
289 | }
290 | 
291 | /* pick series of prefixes that will make up the passwords */
292 | void generate_prefixes(int *prefix_indices_out, int length) {
293 |   int fd_crypto, i;
294 |   if ((fd_crypto = open("/dev/urandom", O_RDONLY)) < 0)
295 |     err(5, "unable to get secure random numbers");
296 | 
297 |   int read_len = sizeof(*prefix_indices_out) * length;
298 |   if (read(fd_crypto, prefix_indices_out, read_len) != read_len)
299 |     err(6, "unable to read random numbers");
300 | 
301 |   for (i = 0; i < length; i++) {
302 |     prefix_indices_out[i] &= MAX_PREFIXES - 1;
303 |   }
304 | 
305 |   close(fd_crypto);
306 | }
307 | 
308 | /* generate a reasonably memorable phrase for the prefixes */
309 | int generate_mnemonic(
310 |   struct WordGraph *g, int *prefix_indices, int *word_indices_out, int length) {
311 |   /* find possible words for each of the chosen prefixes */
312 |   int i, j;
313 | 
314 |   struct IntVec *word_sets[length];
315 |   for (i = 0; i < length; i++) {
316 |     word_sets[i] = intvec_copy(g->prefixes[prefix_indices[i]].words);
317 |   }
318 | 
319 |   /* working backwards, reduce possible words for each prefix to only
320 |      those words that have a link to a word in the next set of possible words
321 |    */
322 |   int mismatch = 0; /* track how many links were impossible */
323 |   struct IntVec *next_words, *new_words, *followers, *words, *intersect;
324 |   next_words = NULL;
325 |   for (i = length - 1; i >= 0; i--) {
326 |     words = word_sets[i];
327 |     new_words = intvec_alloc();
328 |     if (next_words) {
329 |       for (j = 0; j < words->len; j++) {
330 |         int word = intvec_get(words, j);
331 |         followers = decode(g->followers_compressed[word]);
332 |         intersect = intvec_intersect(next_words, followers);
333 |         if (intersect->len)
334 |           intvec_append(new_words, word);
335 |         intvec_free(intersect);
336 |         intvec_free(followers);
337 |       }
338 |     }
339 |     if (new_words->len) {
340 |       intvec_free(word_sets[i]);
341 |       word_sets[i] = new_words;
342 |     } else {
343 |       intvec_free(new_words);
344 |       mismatch++;
345 |     }
346 | 
347 |     next_words = word_sets[i];
348 |   }
349 | 
350 |   /* working forwards, pick a word for each prefix */
351 |   int last_word = word_indices_out[0];
352 |   for (i = 0; i < length; i++) {
353 |     followers = decode(g->followers_compressed[last_word]);
354 |     intersect = intvec_intersect(word_sets[i], followers);
355 |     /* Picking the first word available biases the phrase towards more
356 |      * common words, and produces generally satisfactory results.
357 |      * N.B.: to save space, adjacency lists don't encode probabilities */
358 |     last_word = intvec_get(intersect->len ? intersect : word_sets[i], 0);
359 |     word_indices_out[i] = last_word;
360 |     intvec_free(followers);
361 |     intvec_free(intersect);
362 |   }
363 | 
364 |   for (i = 0; i < length; i++) {
365 |     intvec_free(word_sets[i]);
366 |   }
367 | 
368 |   return mismatch;
369 | }
370 | 
371 | void write_passphrase(
372 |     struct Prefix *prefixes, int *prefix_indices, int length, char *buf) {
373 |   int i;
374 |   for (i = 0; i < length; i++)
375 |     sprintf(buf + 3 * i, "%.3s", prefixes[prefix_indices[i]].prefix);
376 | }
377 | 
378 | void run_tests(struct WordGraph *g) {
379 |   /* enumerate passphrases, assert that there are as many as we expect */
380 |   int i, length = 2;
381 |   long expected = 1L << (length * 10L);
382 |   long actual = 0;
383 | 
384 |   int prefix_indices[length];
385 |   char passphrase[length * 3 + 1], last_passphrase[length * 3 + 1];
386 | 
387 |   last_passphrase[0] = 0;
388 |   bzero(prefix_indices, sizeof(prefix_indices));
389 |   while (1) {
390 |     write_passphrase(g->prefixes, prefix_indices, length, passphrase);
391 |     if (strcmp(passphrase, last_passphrase) > 0) {
392 |       actual++;
393 |     } else {
394 |       errx(40, "passphrase out of order! %s -> %s",
395 |         last_passphrase, passphrase);
396 |     }
397 |     memcpy(last_passphrase, passphrase, sizeof(last_passphrase));
398 | 
399 |     /* next passphrase */
400 |     for (i = length - 1; i >= 0; --i) {
401 |       prefix_indices[i]++;
402 |       if (prefix_indices[i] != MAX_PREFIXES)
403 |         break;
404 |       prefix_indices[i] = 0;
405 |     }
406 |     if (i == -1)
407 |       break;
408 |   }
409 | 
410 |   if (actual != expected)
411 |     errx(42, "test failed! expected: %ld != actual: %ld", expected, actual);
412 | }
413 | 
414 | FILE *open_graph_file(char *executable, const char *graph_filename) {
415 |   // #1 try cwd
416 |   FILE *graph_file = fopen(graph_filename, "r");
417 |   if (!graph_file) {
418 |     // #2: try executable directory
419 |     char *binary_dir = dirname(executable);
420 |     char path[256];
421 |     snprintf(path, 255, "%s/%s", binary_dir, graph_filename);
422 |     graph_file = fopen(path, "r");
423 |   }
424 | #ifdef DATADIR
425 | #define QUOTE(name) #name
426 | #define STR(macro) QUOTE(macro)
427 |   if (!graph_file) {
428 |     // #3: datadir
429 |     char path[256];
430 |     snprintf(path, 255, STR(DATADIR) "/%s", graph_filename);
431 |     graph_file = fopen(path, "r");
432 |   }
433 | #endif
434 |   return graph_file;
435 | }
436 | 
437 | int main(int argc, char *argv[]) {
438 |   struct WordGraph *g ;
439 |   const char *graph_filename = "wordlist_bigrams.txt";
440 |   FILE *graph_file = open_graph_file(argv[0], graph_filename);
441 | 
442 |   if (!graph_file)
443 |     err(1, "unable to find %s", graph_filename);
444 | 
445 |   g = wordgraph_init(graph_file);
446 |   //wordgraph_dump(g, 0, 3000);
447 | 
448 | 
449 |   long length = 0;
450 |   long count = 0;
451 |   int start_word = 0;
452 |   int i, c;
453 | 
454 |   while ((c = getopt(argc, argv, "ht")) != -1) {
455 |     switch (c) {
456 |       case 't':
457 |         run_tests(g);
458 |         return 0;
459 |       case 'h':
460 |       default:
461 |         errx(1, "usage: [-h]/[-t] [length] [count] [hook word]");
462 |     }
463 |   }
464 | 
465 |   for (i = 1; i < argc; i++) {
466 |     errno = 0;
467 |     if (length == 0) {
468 |       length = strtol(argv[i], NULL, 10);
469 |       if (!errno && length > 0)
470 |         continue;
471 |       length = 0;
472 |     } else if (count == 0) {
473 |       count = strtol(argv[i], NULL, 10);
474 |       if (!errno && count > 0)
475 |         continue;
476 |       count = 0;
477 |     }
478 |     start_word = wordgraph_find_word(g, argv[i]);
479 |   }
480 | 
481 |   if (!length)
482 |     length = 5;
483 | 
484 |   if (!count)
485 |     count = 32;
486 | 
487 |   printf("Generating %ld passwords with %ld bits of entropy\n", count,
488 |          length * 10);
489 | 
490 |   if (start_word)
491 |     printf("    hook: %s\n", g->words[start_word]);
492 | 
493 |   int pass_len = length * 3;
494 |   printf("%-*s    %s\n", pass_len, "Password", "Mnemonic");
495 |   for (i = 0; i < pass_len; i++)
496 |     putchar('-');
497 |   printf("    ");
498 |   for (i = 0; i < length * 4; i++)
499 |     putchar('-');
500 |   printf("\n");
501 | 
502 |   while (count--) {
503 |     int prefix_indices[length];
504 |     int word_indices[length];
505 |     char passphrase[3 * length + 1];
506 |     word_indices[0] = start_word;
507 |     generate_prefixes(prefix_indices, length);
508 |     generate_mnemonic(g, prefix_indices, word_indices, length);
509 | 
510 |     write_passphrase(g->prefixes, prefix_indices, length, passphrase);
511 |     printf("%s   ", passphrase);
512 | 
513 |     if (start_word)
514 |       printf(" %s", g->words[start_word]);
515 | 
516 |     for (i = 0; i < length; i++)
517 |       printf(" %s", g->words[word_indices[i]]);
518 | 
519 |     printf("\n");
520 |   }
521 | 
522 |   wordgraph_free(g);
523 | 
524 |   return 0;
525 | }
526 | 


--------------------------------------------------------------------------------
/abbrase.js:
--------------------------------------------------------------------------------
  1 | function WordGraph(url) {
  2 | 	this.state = "init";
  3 | 	this.n_words = 0;
  4 | 	this.words = [];
  5 | 	this.prefixes = {};
  6 | 	this.prefix_list = [];
  7 | 	this.followers_compressed = [];
  8 | 	this.url = url;
  9 | 	this.onprogress = null;
 10 | 	this.onready = null;
 11 | }
 12 | 
 13 | WordGraph.prototype.load = function() {
 14 | 	if (this.state != "init")
 15 | 		return;
 16 | 	this.state = "downloading";
 17 | 	var xhr = new XMLHttpRequest();
 18 | 	xhr.open('GET', this.url, true);
 19 | 	xhr.totalsize = 20670394;
 20 | 	var self = this;
 21 | 	xhr.onreadystatechange = function(evt) {
 22 | 		if (xhr.status >= 400) {
 23 | 			if (self.onprogress)
 24 | 				self.onprogress(-xhr.status);
 25 | 		}
 26 | 		if (xhr.readyState === 3) {
 27 | 			var perc = (100 * xhr.response.length / xhr.totalsize)|0;
 28 | 			if (xhr.perc != perc) {
 29 | 				xhr.perc = perc;
 30 | 				if (self.onprogress) {
 31 | 					self.onprogress(perc);
 32 | 				}
 33 | 			}
 34 | 		} else if (xhr.readyState === 4) {
 35 | 			console.log("Done");
 36 | 			self.parse(xhr.responseText);
 37 | 		}
 38 | 	}
 39 | 	xhr.send();
 40 | }
 41 | 
 42 | WordGraph.prototype.parse = function(text) {
 43 | 	var lines = text.split("\n");
 44 | 	lines.reverse();
 45 | 
 46 | 	this.n_words = parseInt(lines.pop());
 47 | 	this.words.push('');
 48 | 	for (var n = 1; n < this.n_words; n++) {
 49 | 		var word = lines.pop();
 50 | 		var prefix = word.substr(0, 3).toLowerCase();
 51 | 		this.words.push(word);
 52 | 		this.prefixes[prefix] = this.prefixes[prefix] || [];
 53 | 		this.prefixes[prefix].push(n);
 54 | 	}
 55 | 
 56 | 	this.prefix_list = Object.keys(this.prefixes);
 57 | 	this.prefix_list.sort();
 58 | 
 59 | 	for (var n = 1; n <= this.n_words; n++) {
 60 | 		this.followers_compressed.push(lines.pop());
 61 | 	}
 62 | 
 63 | 	this.state = "ready";
 64 | 
 65 | 	if (this.onready !== null) {
 66 | 		this.onready(this);
 67 | 	}
 68 | }
 69 | 
 70 | WordGraph.prototype.get_followers = function(word_number) {
 71 | 	var enc = this.followers_compressed[word_number];
 72 | 	/*
 73 | 	general encoding steps:
 74 | 	input: [1, 2, 3, 5, 80]
 75 | 	subtract previous value: [1, 1, 1, 2, 75]
 76 | 	subtract 1: [0, 0, 0, 1, 74]
 77 | 	contract runs of zeros: [0x3, 1, 74]
 78 | 	printably encode numbers as base-32 varints,
 79 | 	and runs of zeros as the 31 leftover characters:
 80 | 	output: "bA*B"
 81 | 
 82 | 	this function reverses the steps.
 83 | 
 84 | 	Cf. decode in digest.py
 85 | 	*/
 86 | 	var enc_ind = 0;
 87 | 	var dec = [];
 88 | 	var last_num = 0;
 89 | 	var zero_run = 0;
 90 | 	while (enc_ind < enc.length || zero_run) {
 91 | 		var delta = 0;
 92 | 		var delta_ind = 0;
 93 | 		if (zero_run) {
 94 | 			zero_run--;
 95 | 		} else {
 96 | 			var val = enc.charCodeAt(enc_ind);
 97 | 			if (val >= 0x60) {
 98 | 				zero_run = val & 0x1f
 99 | 				delta_ind += 1
100 | 			} else {
101 | 				do {
102 | 					val = enc.charCodeAt(enc_ind + delta_ind);
103 | 					delta |= (val & 0x1f) << (5 * delta_ind);
104 | 					delta_ind++;
105 | 				} while (!(val & 0x40));
106 | 			}
107 | 		}
108 | 		enc_ind += delta_ind;
109 | 		num = last_num + delta + 1;
110 | 		last_num = num;
111 | 		dec.push(num);
112 | 	}
113 | 	return dec;
114 | };
115 | 
116 | function intersect(a, b) {
117 | 	var ret = [];
118 | 	var ai = 0, bi = 0;
119 | 	while (ai < a.length && bi < b.length) {
120 | 		var diff = a[ai] - b[bi];
121 | 		if (diff == 0) {
122 | 			ret.push(a[ai]);
123 | 			ai++, bi++;
124 | 		} else if (diff < 0) {
125 | 			ai++;
126 | 		} else if (diff > 0) {
127 | 			bi++;
128 | 		}
129 | 	}
130 | 	return ret;
131 | }
132 | 
133 | function PassphraseGenerator(graph) {
134 | 	this.graph = graph;
135 | }
136 | 
137 | PassphraseGenerator.prototype.gen_password = function(length) {
138 | 	if (this.graph.state !== "ready")
139 | 		throw new Error("wordgraph isn't loaded yet");
140 | 
141 | 	if (this.graph.prefix_list.length != 1024)
142 | 		throw new RangeError("unexpected number of prefixes");
143 | 
144 | 	/* pick series of prefixes that will make up the passwords */
145 | 	var prefix_numbers = new Uint16Array(length);
146 | 	crypto.getRandomValues(prefix_numbers);
147 | 	var prefixes_chosen = [];
148 | 
149 | 	var word_sets = [];
150 | 	var out_password = "";
151 | 
152 | 	/* find possible words for each of the chosen prefixes */
153 | 	for (var i = 0; i < length; i++) {
154 | 		prefix_numbers[i] = prefix_numbers[i] % 1024;
155 | 		prefixes_chosen[i] = this.graph.prefix_list[prefix_numbers[i]];
156 | 		out_password += prefixes_chosen[i];
157 | 		word_sets.push(this.graph.prefixes[prefixes_chosen[i]]);
158 | 	}
159 | 
160 | 	/* working backwards, reduce possible words for each prefix to only
161 | 	   those words that have a link to a word in the next set of possible words
162 | 	*/
163 | 
164 | 	var mismatch = 0; /* track how many links were impossible */
165 | 	var next_words = [];
166 | 	for (var i = length - 1; i >= 0; i--) {
167 | 		var words = word_sets[i];
168 | 		var new_words = [];
169 | 		if (next_words.length > 0) {
170 | 			for (var j = 0; j < words.length; j++) {
171 | 				var word = words[j];
172 | 				var followers = this.graph.get_followers(word);
173 | 				if (intersect(next_words, followers).length)
174 | 					new_words.push(word);
175 | 			}
176 | 		}
177 | 		if (new_words.length > 0) {
178 | 			word_sets[i] = new_words;
179 | 		} else {
180 | 			mismatch++;
181 | 		}
182 | 
183 | 		next_words = word_sets[i];
184 | 	}
185 | 
186 | 	/* working forwards, pick a word for each prefix */
187 | 	var out_words = [];
188 | 	var phrase_rank = 0;
189 | 	var last_word = 0;
190 | 	for (var i = 0; i < length; i++) {
191 | 		var followers = this.graph.get_followers(last_word);
192 | 		var forward = intersect(word_sets[i], followers);
193 | 		/* Picking the first word available biases the phrase towards more
194 | 		* common words, and produces generally satisfactory results.
195 | 		* N.B.: to save space, adjacency lists don't encode probabilities */
196 | 		last_word = (forward.length > 0 ? forward : word_sets[i])[0];
197 | 		phrase_rank += last_word;
198 | 		out_words.push(this.graph.words[last_word]);
199 | 	}
200 | 
201 | 	return {"password": out_password,
202 | 		"mnemonic": out_words.join(" "),
203 | 		"phrase_rank": phrase_rank,
204 | 		"mismatch": mismatch,
205 | 		"numbers": prefix_numbers};
206 | };
207 | 
208 | function ljust(inp, length, pad) {
209 | 	if (pad === undefined)
210 | 		pad = ' ';
211 | 	while (inp.length < length) {
212 | 		inp += pad;
213 | 	}
214 | 	return inp;
215 | }
216 | 
217 | PassphraseGenerator.prototype.make_table = function(length, count) {
218 | 	var passwords = [];
219 | 
220 | 	for (var i = 0; i < count; i++) {
221 | 		passwords.push(generator.gen_password(length));
222 | 	}
223 | 
224 | 	// Sort by the sum of the word positions -- meaning phrases with more common
225 | 	// words come first.
226 | 	//
227 | 	// This tends to rank desirable passwords higher, but could aid an attacker
228 | 	// leveraging selection biases (maximum entropy loss is log2(count) bits).
229 | 
230 | 	passwords.sort(function (a, b) {
231 | 		return a.phrase_rank - b.phrase_rank;
232 | 	});
233 | 
234 | 	output = 'Generating ' + count + ' passwords with ';
235 | 	output += (length * 10) + ' bits of entropy\n';
236 | 
237 | 	output += ljust("Password", 3 * length) + '    ' + 'Mnemonic\n';
238 | 	output += ljust('', 3 * length, '-') + '    ' + ljust('', 4 * length, '-') + '\n';
239 | 
240 | 	for (var i = 0; i < count; i++) {
241 | 		var gen = passwords[i];
242 | 		output += gen.password + '    ' + gen.mnemonic;
243 | 		// output += ' ' + gen.phrase_rank + ' ' + gen.mismatch;
244 | 		output += '\n';
245 | 	}
246 | 
247 | 	return output;
248 | };
249 | 
250 | function pretty_arraybuffer(ab) {
251 | 	var ret = [];
252 | 	for (var i = 0; i < ab.length; i++)
253 | 		ret.push(ab[i]);
254 | 	return '[' + ret.join(', ') + ']';
255 | }
256 | 


--------------------------------------------------------------------------------
/abbrase.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import math
  5 | import secrets
  6 | import sys
  7 | 
  8 | import digest
  9 | 
 10 | 
 11 | class WordGraph(object):
 12 | 
 13 |     def __init__(self, fname):
 14 |         with open(fname) as compressed_graph:
 15 |             n_words = int(compressed_graph.readline())
 16 |             self.wordlist = ['']  # ['', 'and', 'the', ...]
 17 |             self.prefixes = {}  # {'and': [1, ...], ...}
 18 |             for n in range(1, n_words):
 19 |                 word = compressed_graph.readline().strip()
 20 |                 self.wordlist.append(word)
 21 |                 self.prefixes.setdefault(word[:3].lower(), []).append(n)
 22 | 
 23 |             self.followers = []
 24 | 
 25 |             for a in range(n_words):
 26 |                 self.followers.append(compressed_graph.readline().rstrip('\n'))
 27 | 
 28 |     def get_followers(self, node_number):
 29 |         return set(digest.decode(self.followers[node_number]))
 30 | 
 31 |     def gen_password(self, length, seed=0):
 32 |         # pick the series of prefixes (3-letter abbreviations)
 33 |         # that will make up the password
 34 |         prefix_list = list(self.prefixes)
 35 |         assert len(prefix_list) == 1024
 36 |         out = []
 37 |         if seed:
 38 |             while seed:
 39 |                 out.append(prefix_list[seed & 1023])
 40 |                 seed >>= 10
 41 |         else:
 42 |             for _ in range(length):
 43 |                 out.append(prefix_list[secrets.randbelow(1024)])
 44 |         return ''.join(out)
 45 | 
 46 |     def split_password(self, password):
 47 |         assert len(password) % 3 == 0
 48 |         return [password[x:x + 3].lower() for x in range(0, len(password), 3)]
 49 | 
 50 |     def numbered_to_phrase(self, word_numbers):
 51 |         return ' '.join(self.wordlist[n] for n in word_numbers)
 52 | 
 53 |     def gen_passphrase_numbered(self, prefixes, skip_sets=None):
 54 |         # find possible words for each of the chosen prefixes
 55 |         word_sets = [set(self.prefixes[p]) for p in prefixes]
 56 | 
 57 |         if skip_sets is not None:
 58 |             assert len(skip_sets) == len(word_sets)
 59 |             for words, skips in zip(word_sets, skip_sets):
 60 |                 if len(skips) < len(words):
 61 |                     words.difference_update(skips)
 62 |                 assert words, "no words left!"
 63 | 
 64 |         # working backwards, reduce possible words for each prefix
 65 |         # to only those words that have an outgoing edge to a word
 66 |         # in the next set of possible words
 67 |         next_words = set()
 68 | 
 69 |         # sometimes a transition between two prefixes is impossible
 70 |         # (~13% of prefix pairs don't have associated bigrams)
 71 |         # it doesn't seem to matter very much, but let's keep track of it
 72 |         mismatch = -1
 73 |         for words in word_sets[::-1]:
 74 |             new_words = set(word for word in words
 75 |                             if self.get_followers(word) & next_words)
 76 |             if not new_words:
 77 |                 mismatch += 1
 78 |                 new_words = words
 79 |             words.intersection_update(new_words)
 80 |             next_words = words
 81 | 
 82 |         # working forwards, pick a word for each prefix
 83 |         last_word = 0
 84 |         out_word_numbers = []
 85 |         for words in word_sets:
 86 |             words = (words & self.get_followers(last_word)) or words
 87 |             # heuristic: try to chain with the most common word
 88 |             # (smallest node number)
 89 |             word = min(words)
 90 |             out_word_numbers.append(word)
 91 |             last_word = word
 92 | 
 93 |         return out_word_numbers
 94 | 
 95 |     def gen_passphrase(self, password):
 96 |         prefixes = self.split_password(password)
 97 | 
 98 |         word_numbers = self.gen_passphrase_numbered(prefixes)
 99 | 
100 |         return self.numbered_to_phrase(word_numbers)
101 | 
102 |     def gen_passphrases(self, password, count=16):
103 |         prefixes = self.split_password(password)
104 |         skip_sets = [set() for _ in prefixes]
105 |         phrases = []
106 | 
107 |         for _ in range(count):
108 |             phrase_numbers = self.gen_passphrase_numbered(prefixes, skip_sets)
109 |             for word, skips in zip(phrase_numbers, skip_sets):
110 |                 skips.add(word)
111 |             phrases.append(self.numbered_to_phrase(phrase_numbers))
112 | 
113 |         return phrases
114 | 
115 | 
116 | def wordgraph_dump(a, b):
117 |     for n in range(a, b):
118 |         print('#%d: %s: %.30s %s' % (n, graph.wordlist[n], graph.followers[n],
119 |                                      digest.decode(graph.followers[n])))
120 | 
121 | def table(strings):
122 |     split_strings = [s.split() for s in strings]
123 |     position_lengths = [[len(w) for w in s] for s in split_strings]
124 |     widths = [max(lens) for lens in zip(*position_lengths)]
125 |     return [' '.join(word.ljust(width) for word, width in zip(words, widths))
126 |             for words in split_strings]
127 | 
128 | class PhraseGenerator(object):
129 |     def __init__(self, graph, n_words=None):
130 |         self.graph = graph
131 |         self.n_words = n_words = n_words or len(graph.wordlist) - 1
132 |         assert self.graph.wordlist[0] == ''
133 |         self.idx_to_graph = sorted(range(1, n_words + 1), key=self.graph.wordlist.__getitem__)
134 | 
135 |         graph_to_idx = [self.idx_to_graph.index(n) for n in range(1, n_words+1)]
136 |         graph_to_idx = [None] + sorted(range(n_words), key=self.idx_to_graph.__getitem__)
137 |         assert 0 not in self.idx_to_graph
138 |         assert 0 in graph_to_idx
139 |         for n in range(n_words):
140 |             assert n == graph_to_idx[self.idx_to_graph[n]], n
141 |         # print(self.idx_to_graph, [self.graph.wordlist[x] for x in self.idx_to_graph])
142 |         self.adjacency_lists = [0] * n_words
143 |         for n in range(1, n_words + 1):
144 |             self.adjacency_lists[graph_to_idx[n]] = sorted(
145 |                 [graph_to_idx[x] for x in digest.decode(self.graph.followers[n])
146 |                  if x <= n_words])
147 |         self.path_counts = []
148 |         self.total_paths = 0
149 | 
150 |     def _prepare_path_counts(self, length):
151 |         if len(self.path_counts) != length:
152 |             path_counts = [[0] * self.n_words for _ in range(length)]
153 | 
154 |             for n in range(self.n_words):
155 |                 path_counts[length - 1][n] = 1
156 | 
157 |             for level in range(length - 2, -1, -1):
158 |                 for n in range(self.n_words):
159 |                     count = 0
160 |                     for out in self.adjacency_lists[n]:
161 |                         count += path_counts[level + 1][out]
162 |                     path_counts[level][n] = count
163 |             self.path_counts = path_counts
164 |             self.total_paths = sum(path_counts[0])
165 | 
166 |         return self.path_counts
167 | 
168 |     def n_bits(self, length):
169 |         self._prepare_path_counts(length)
170 |         return math.log2(self.total_paths)
171 | 
172 |     def generate(self, length, chosen_path=None):
173 |         ''' generate a random phrase '''
174 |         # pick a phrase at random
175 |         # or, pick a path through a DAG uniformly from all paths possible
176 | 
177 |         path_counts = self._prepare_path_counts(length)
178 | 
179 |         # 2) pick a path to follow
180 |         if chosen_path is None:
181 |             chosen_path = secrets.randbelow(self.total_paths)
182 |             #print('%.2f bits of entropy' % math.log(self.total_paths, 2), end=' ')
183 |             #print("chose %d/%d" % (chosen_path, self.total_paths))
184 |         if not 0 <= chosen_path < self.total_paths:
185 |             raise ValueError('chosen path %d not in [0,%d)' % (chosen_path, self.total_paths))
186 | 
187 |         # 3) working forwards, pick the word that contributed our chosen_path
188 |         path = chosen_path
189 |         words = []
190 |         for level in range(length):
191 |             for n in range(self.n_words) if level == 0 else self.adjacency_lists[words[-1]]:
192 |                 #print(words, n, path_counts[level][n], path, self.adjacency_lists[n])
193 |                 if path_counts[level][n] <= path:
194 |                     path -= path_counts[level][n]
195 |                 else:
196 |                     words.append(n)
197 |                     break
198 |             else:
199 |                 print("couldn't find a successor :(", words, level)
200 |         assert len(words) == length, chosen_path
201 |         return ' '.join(self.graph.wordlist[self.idx_to_graph[word]] for word in words)
202 | 
203 | def main(args):
204 |     parser = argparse.ArgumentParser()
205 |     parser.add_argument('-p', '--phrase', action='store_true',
206 |                         help='Generate passphrases instead of abbrases.')
207 |     parser.add_argument('-m', '--multiple', action='store_true',
208 |                         help='generate multiple mnemonics for each password')
209 |     parser.add_argument('length', default=5, type=int, nargs='?')
210 |     parser.add_argument('count', default=32, type=int, nargs='?')
211 |     parser.add_argument('-s', '--seed', type=int, help='convert number into passphrase')
212 |     options = parser.parse_args(args)
213 | 
214 |     if options.seed:
215 |         options.count = 1
216 | 
217 |     graph = WordGraph('wordlist_bigrams.txt')
218 | 
219 |     # wordgraph_dump(1, 3000)
220 |     count = options.count
221 |     length = options.length
222 |     n_bits = length * 10
223 |     if options.phrase:
224 |         pg = PhraseGenerator(graph)
225 |         n_bits = '%.2f' % pg.n_bits(length)
226 |     if not options.seed:
227 |         print('Generating %d passwords with %s bits of entropy' % (
228 |             count, n_bits))
229 |     if options.phrase:
230 |         print('Passphrase')
231 |         print('-' * length * (1 + sum(len(w) for w in graph.wordlist) // len(graph.wordlist)))
232 |     else:
233 |         pass_len = length * 3
234 |         print('Password'.ljust(pass_len), '  ', 'Mnemonic')
235 |         print('-' * pass_len, '  ', '-' * (4 * length))
236 |     for _ in range(count):
237 |         if options.phrase:
238 |             print(pg.generate(length, chosen_path=options.seed))
239 |         else:
240 |             if options.seed:
241 |                 password = graph.gen_password(0, seed=options.seed)
242 |             else:
243 |                 password = graph.gen_password(length)
244 |             if options.multiple:
245 |                 phrases = graph.gen_passphrases(password)
246 |                 print('%s   ' % (password))
247 |                 print('\t' + '\n\t'.join(table(phrases)))
248 |             else:
249 |                 phrase = graph.gen_passphrase(password)
250 |                 print('%s   %s' % (password, phrase))
251 | 
252 | if __name__ == '__main__':
253 |     sys.exit(main(sys.argv[1:]))
254 | 


--------------------------------------------------------------------------------
/abbrase_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import unittest
 4 | import secrets
 5 | 
 6 | import abbrase
 7 | 
 8 | 
 9 | class AbbraseTest(unittest.TestCase):
10 |     def setUp(self):
11 |         self.graph = abbrase.WordGraph('wordlist_bigrams.txt')
12 | 
13 |     def test_gen_passphrase(self):
14 |         for phrase in (
15 |                 'until nerve agent dropped nice',
16 |                 'obedience observed Tom instead engaged',
17 |                 'determined off agricultural winter squadron'):
18 |             words = phrase.split()
19 |             password = ''.join(x[:3] for x in words)
20 |             assert self.graph.gen_passphrase(password) == phrase
21 | 
22 |     def test_gen_password(self):
23 |         passwords = set()
24 |         while len(passwords) < 1024:
25 |             passwords.add(self.graph.gen_password(1))
26 | 
27 | class WordGraphTest(unittest.TestCase):
28 |     def setUp(self):
29 |         self.graph = abbrase.WordGraph('wordlist_bigrams.txt')
30 | 
31 |     def test_generate(self):
32 |         gen = abbrase.PhraseGenerator(self.graph, 256)
33 |         for c in [1, 2]:
34 |             gen.generate(c, 1)
35 |             ps = [gen.generate(c, n) for n in range(gen.total_paths)]
36 |             self.assertEqual(ps, sorted(ps))
37 | 
38 |     def test_generate_large(self):
39 |         # do probabilistic testing for large phrase generation:
40 |         # pick a random phrase, then test narrowing bounds around the phrase to ensure ordering
41 |         gen = abbrase.PhraseGenerator(self.graph)
42 |         gen.generate(5, 1)
43 |         print("testing", gen.total_paths)
44 |         for trial in range(100):
45 |             target = secrets.randbelow(gen.total_paths)
46 |             last_lo, last_hi = '', '~'
47 |             lo_idx = 0
48 |             hi_idx = gen.total_paths - 1
49 |             n = 0
50 |             while lo_idx < target < hi_idx:
51 |                 n += 1
52 |                 lo_gen = gen.generate(5, lo_idx)
53 |                 hi_gen = gen.generate(5, hi_idx)
54 |                 # print(lo_gen, hi_gen)
55 |                 self.assertLess(lo_gen, hi_gen, (lo_idx, hi_idx, lo_gen, hi_gen))
56 |                 self.assertLess(last_lo, lo_gen)
57 |                 self.assertLess(hi_gen, last_hi)
58 |                 lo_idx += (target - lo_idx + 1) // 2
59 |                 hi_idx -= (hi_idx - target + 1) // 2
60 |                 last_lo = lo_gen
61 |                 last_hi = hi_gen
62 |             target_gen = gen.generate(5, target)
63 |             self.assertLess(lo_gen, target_gen)
64 |             self.assertLess(target_gen, hi_gen)
65 |             print(trial, n, target, target - lo_idx, target - hi_idx, target_gen)
66 | 
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     unittest.main()
71 | 


--------------------------------------------------------------------------------
/digest.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gzip
  3 | 
  4 | 
  5 | def edit_dist(s1, s2, lim=0):
  6 |     last = list(range(len(s1) + 1))
  7 |     this = [0] * len(last)
  8 |     for j in range(0, len(s2)):
  9 |         this[0] = j + 1
 10 |         for i in range(1, len(this)):
 11 |             this[i] = min(last[i] + 1,
 12 |                              this[i - 1] + 1,
 13 |                              last[i - 1] + int(s2[j] != s1[i-1]))
 14 |         if lim and min(this) >= lim:
 15 |             return min(this)
 16 |         last, this = this, last
 17 |     return last[-1]
 18 | 
 19 | 
 20 | def build_common(digest, min_dist):
 21 |     common = {}
 22 |     prefixes_missing = set(prefixes)
 23 |     n = 1
 24 |     out = ''
 25 |     for line in open('data/1gram_common.csv', encoding='utf-8', errors='replace'):
 26 |         word_orig = line.split()[0]
 27 |         word = word_orig.lower()
 28 |         if word in common:
 29 |             continue
 30 |         prefix = word[:3]
 31 |         if prefix in prefixes:
 32 |             if min_dist and any(edit_dist(word, k, min_dist) < min_dist for k in common):
 33 |                 continue
 34 |             if prefix in prefixes_missing:
 35 |                 prefixes_missing.remove(prefix)
 36 |             common[word] = n
 37 |             out += word_orig + '\n'
 38 |             n += 1
 39 |             if n & 0xff == 0:
 40 |                 print(word, n)
 41 |     if prefixes_missing:
 42 |         print(('unable to find %d prefixes in input!: %s'
 43 |                % (len(prefixes_missing), ' '.join(sorted(prefixes_missing)))))
 44 |     digest.write('%s\n' % n)
 45 |     digest.write(out)
 46 |     print('words:', n)
 47 | 
 48 |     return common
 49 | 
 50 | def build_edges(common):
 51 |     edges = [[] for _ in range(len(common) + 1)]
 52 |     last_a = ''
 53 |     prefix_transitions = set()
 54 |     for line in gzip.GzipFile('data/2gram.csv.gz'):
 55 |         parts = line.decode('utf8').lower().split()
 56 |         if len(parts) == 3:
 57 |             a, b, count = parts
 58 |             if a not in common or b not in common:
 59 |                 continue
 60 |             prefix_a = a[:3]
 61 |             prefix_b = b[:3]
 62 |             prefix_transitions.add(prefix_a + prefix_b)
 63 |             if a != last_a:
 64 |                 if last_a:
 65 |                     if a[0] != last_a[0]:
 66 |                         print('!', last_a)
 67 |                 last_a = a
 68 |             edges[common[a]].append(common[b])
 69 | 
 70 |     print('Attested prefix-prefix combinations:', end=' ')
 71 |     print('%.2f%%' % (100.0 * len(prefix_transitions) /
 72 |                      (1.0 * len(prefixes) * len(prefixes))))
 73 | 
 74 |     return edges
 75 | 
 76 | 
 77 | def encode(l):
 78 |     ''' pack list of monotonically increasing positive integers into a string
 79 | 
 80 |     replace elements with the difference from the previous element minus one,
 81 |     runs of zeros with special 'zero repeat' characters (RLE for zeros),
 82 |     otherwise encode as printable base-32 varints
 83 | 
 84 |     >>> encode([1, 2, 3, 5, 8, 13, 21])
 85 |     'bABDG'
 86 |     '''
 87 |     enc = ''
 88 |     last_num = 0
 89 |     zero_run = 0
 90 |     for num in l:
 91 |         delta = num - last_num - 1
 92 |         assert delta >= 0, "input must be strictly increasing positive integers"
 93 |         last_num = num
 94 |         if delta == 0:
 95 |             zero_run += 1
 96 |             continue
 97 |         while zero_run:
 98 |             enc += chr(0x60 + min(0x1f, zero_run) - 1)
 99 |             zero_run = max(0, zero_run - 0x1f)
100 |         while True:
101 |             enc += chr((0x40 if delta < 0x20 else 0x20) | (delta & 0x1f))
102 |             delta >>= 5
103 |             if not delta:
104 |                 break
105 |     while zero_run:
106 |         enc += chr(0x60 + min(0x1f, zero_run) - 1)
107 |         zero_run = max(0, zero_run - 0x1f)
108 | 
109 |     return enc
110 | 
111 | 
112 | def decode(enc):
113 |     ''' inverse of encode
114 | 
115 |     >>> deencode('bABDG')
116 |     [1, 2, 3, 5, 8, 13, 21]
117 |     '''
118 |     enc_ind = 0
119 |     dec = []
120 |     last_num = 0
121 |     zero_run = 0
122 |     while enc_ind < len(enc) or zero_run:
123 |         delta = 0
124 |         delta_ind = 0
125 |         if zero_run:
126 |             zero_run -= 1
127 |         else:
128 |             val = ord(enc[enc_ind])
129 |             if val >= 0x60:
130 |                 zero_run = val & 0x1f
131 |                 delta_ind += 1
132 |             else:
133 |                 while True:
134 |                     val = ord(enc[enc_ind + delta_ind])
135 |                     delta |= (val & 0x1f) << (5 * delta_ind)
136 |                     delta_ind += 1
137 |                     if val & 0x40:
138 |                         break
139 |         enc_ind += delta_ind
140 |         num = last_num + delta + 1
141 |         last_num = num
142 |         dec.append(num)
143 |     return dec
144 | 
145 | for l in ([1, 2, 3, 5], [1], [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 3000000], list(range(1, 500))):
146 |     assert decode(encode(l)) == l
147 | 
148 | if __name__ == '__main__':
149 |     parser = argparse.ArgumentParser()
150 |     parser.add_argument('--prefixes', default='data/prefixes.txt')
151 |     parser.add_argument('--output', default='wordlist_bigrams.txt')
152 |     parser.add_argument('--min_dist', default=0, type=int)
153 | 
154 |     options = parser.parse_args()
155 | 
156 |     prefixes = set()
157 |     for line in open(options.prefixes):
158 |         prefixes.add(line.split()[0])
159 | 
160 |     digest = open(options.output, 'w')
161 | 
162 |     common = build_common(digest, options.min_dist)
163 |     edges = build_edges(common)
164 | 
165 |     for n, out in enumerate(edges):
166 |         digest.write(encode(sorted(set(out))))
167 |         digest.write('\n')
168 | 
169 |     digest.close()
170 | 


--------------------------------------------------------------------------------
/forkme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmmh/abbrase/c142f7fd07002aa6f27c4be7c177d41db15b76e5/forkme.png


--------------------------------------------------------------------------------
/generate_word.py:
--------------------------------------------------------------------------------
 1 | # example of word generation using trigrams
 2 | 
 3 | import abbrase
 4 | 
 5 | import random
 6 | import re
 7 | 
 8 | graph = abbrase.WordGraph('wordlist_bigrams.txt')
 9 | words = [' %s.' % word.lower()
10 |          for word in graph.wordlist if re.match('^[A-za-z]*$', word)]
11 | 
12 | # find common trigrams
13 | trigrams = set()
14 | 
15 | for word in words[:10000]:
16 |     for pos in xrange(0, len(word) - 2):
17 |         trigrams.add(word[pos:pos + 3])
18 | 
19 | trigrams = sorted(trigrams)
20 | 
21 | print len(trigrams)
22 | 
23 | def gen_word(length):
24 |     gen = ' '
25 |     while True:
26 |         possible = [ngram for ngram in trigrams if ngram.startswith(gen[-2:])]
27 |         if len(gen) == length:
28 |             if any('.' in word for word in possible):
29 |                 return gen
30 |             else:
31 |                 # restart
32 |                 possible = []
33 |         if not possible:
34 |             gen = ' '
35 |             continue
36 |         choice = random.choice(possible)
37 |         if len(gen) == 1:
38 |             gen = choice[1:]
39 |         else:
40 |             gen += choice[-1]
41 | 
42 | 
43 | for _ in range(20):
44 |     print gen_word(10),
45 | 


--------------------------------------------------------------------------------
/groupby.c:
--------------------------------------------------------------------------------
 1 | /* group series of tab-separated values by their first column,
 2 | outputting the first field and the total of a configurable count field */
 3 | 
 4 | #include <err.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |     size_t bufsize = 256;
11 |     char* buf = malloc(bufsize);
12 |     char last[512];
13 |     long long total = 0;
14 |     int count_field;
15 |     if (argc != 2)
16 |         errx(1, "usage: %s <count_field>", argv[0]);
17 |     if ((count_field = atoi(argv[1])) < 2)
18 |         errx(1, "count_field must be at least 2");
19 | 
20 |     while (!feof(stdin)) {
21 |         int field_index = 0;
22 |         getline(&buf, &bufsize, stdin);
23 | 
24 |         char *tab = buf;
25 |         while (++field_index != count_field && *tab) {
26 |             while (*tab && *tab != '\t')
27 |                 tab++;
28 |             *tab++ = 0;
29 |         }
30 |         if (field_index != count_field)
31 |             continue;
32 | 
33 |         if (strcmp(buf, last)) {
34 |             if (total)
35 |                 printf("%s\t%lld\n", last, total);
36 |             strncpy(last, buf, sizeof(last)-1);
37 |             total = 0;
38 |         }
39 | 
40 |         total += atoll(tab);
41 |     }
42 | 
43 |     if (total)
44 |         printf("%s\t%lld\n", last, total);
45 | }
46 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title>Abbrase.js</title>
 5 | 	<script src="abbrase.js"></script>
 6 | 	<style type="text/css">
 7 | 	body { background-color: #222; color: #ccc; font-family: sans-serif; }
 8 | 	pre { border: 1px solid #888; padding: 10px; overflow-x: auto; }
 9 | 	a { color: #44f; }
10 | 	#content { width: 800px; margin: auto;}
11 | 	#wordcount { width: 3em; }
12 | 	</style>
13 | </head>
14 | <body>
15 | <a href="https://github.com/rmmh/abbrase"><img style="position: absolute; top: 0; left: 0; border: 0;" src="forkme.png"></a>
16 | 
17 | <div id="content">
18 | 
19 | <h1>Abbrase</h1>
20 | Password generation using abbreviated phrases<br><br>
21 | 
22 | 
23 | <input type="button" onclick="newbatch();" value="Generate"> passwords using
24 | <input id="wordcount" type="number" min="1" max="100" value="5" onchange="newbatch();"> word phrases.
25 | 
26 | <pre id="output"></pre>
27 | 
28 | 
29 | <h2>How does this work?</h2>
30 | First, <span id="explain_count"></span> secure random numbers are generated. Each number ranges from 0-1023, giving 10 bits of entropy.
31 | <pre id="explain_numbers"></pre>
32 | Each number is converted into a three letter abbreviation. The password is made by joining them together. Abbreviations are taken from a pool of the 1024 most common English word prefixes.
33 | <pre id="explain_password"></pre>
34 | Finally, a phrase is found that abbreviates to the password. Words that are normally found together are chosen to make the phrase more memorable.
35 | <pre id="explain_mnemonic"></pre>
36 | The mnemonic is much easier to remember than the input numbers, but equally secure!<br>
37 | <br>
38 | All steps are performed locally in your browser, and not sent to any other server. For more security, try the Python or C versions in the Abbrase <a href="https://github.com/rmmh/abbrase">github repository</a>.
39 | 
40 | </div>
41 | 
42 | <script type="text/javascript">
43 | var graph = new WordGraph("wordlist_bigrams.txt");
44 | var generator = new PassphraseGenerator(graph);
45 | 
46 | function setTextContent(id, text) {
47 | 	document.getElementById(id).textContent = text;
48 | }
49 | 
50 | graph.onprogress = function(perc) {
51 | 	if (perc < 0)
52 | 		setTextContent("output", "Error downloading: HTTP Error " + -perc);
53 | 	else
54 | 		setTextContent("output", "Downloading 20MB word list... " + perc + "% done");
55 | }
56 | graph.onprogress(0);
57 | 
58 | function newbatch() {
59 | 	var length = parseInt(document.getElementById("wordcount").value) || 5;
60 | 
61 | 	var example_password = generator.gen_password(length);
62 | 	setTextContent("explain_count", length);
63 | 	setTextContent("explain_numbers", pretty_arraybuffer(example_password.numbers));
64 | 	setTextContent("explain_password", example_password.password);
65 | 	setTextContent("explain_mnemonic", example_password.mnemonic);
66 | 
67 | 	setTextContent("output", generator.make_table(length, 32));
68 | }
69 | 
70 | graph.onready = newbatch;
71 | graph.load();
72 | </script>
73 | </body>
74 | </html>
75 | 


--------------------------------------------------------------------------------
/trigrams.json:
--------------------------------------------------------------------------------
1 | [0, 33518332, 35951138, 35951394, 44335650, 58685182, 2396706, 35967778, 35684898, 1896600, 2130466, 53794, 35684898, 35684898, 2130466, 33387102, 36475682, 2097152, 2130722, 45349674, 44335906, 1929216, 2130466, 295714, 0, 2130466, 33314, 0, 0, 36492855, 37657387, 49133567, 794641, 1086051, 2945699, 557091, 6058136, 32770, 34079267, 41794303, 36565543, 108714943, 0, 37589799, 2097152, 48234495, 37468971, 104657775, 18706589, 33314, 547447, 33315, 8958583, 67142178, 0, 45963932, 33591842, 32770, 2097664, 48133115, 0, 0, 32768, 6150399, 2097184, 0, 35684898, 544, 32800, 62780055, 0, 0, 2130466, 3179049, 299042, 68973276, 32, 0, 0, 1638401, 0, 0, 8222941, 0, 2396962, 0, 10318455, 0, 0, 45937527, 73265407, 0, 46003063, 2130466, 0, 0, 117306367, 0, 2097152, 35684898, 1, 2945635, 1962620, 0, 0, 0, 1048589, 0, 0, 117273309, 299010, 2097154, 33853986, 31323135, 2130434, 8736, 32800, 75362558, 2129920, 0, 33587746, 33314, 34, 100528825, 262658, 0, 35684898, 1094441, 256, 913454, 546, 33314, 0, 541217, 0, 0, 83720413, 2396710, 37526314, 36491959, 98401117, 37524067, 2945699, 2130466, 6060248, 2129954, 528929, 41529983, 2748967, 75364347, 4026520, 4035363, 2097152, 50200575, 45853499, 45912943, 6119440, 33314, 824119, 3244843, 565797, 67109408, 0, 16546444, 0, 0, 0, 3961403, 36475427, 0, 0, 85741822, 0, 0, 35684898, 0, 0, 28103306, 0, 0, 2130466, 1, 34079265, 68972728, 0, 0, 0, 513, 0, 0, 113078941, 32768, 0, 0, 43840307, 0, 33853986, 10015271, 73199806, 0, 0, 35684898, 34, 565795, 8180407, 0, 0, 2130466, 1048833, 256, 1896994, 0, 2, 0, 286725, 0, 0, 117275389, 32770, 0, 32768, 31322747, 2097152, 0, 32768, 73267455, 0, 0, 33587744, 34, 33312, 16644799, 512, 0, 2130466, 1048577, 34100067, 1863900, 0, 32802, 0, 1650705, 0, 0, 1929373, 2888231, 37526315, 44618419, 16535769, 36737632, 11334563, 512, 0, 0, 546, 41466623, 2748967, 66846719, 4026521, 4043555, 2097152, 45988543, 4177919, 104657771, 8192, 4227618, 512, 1049123, 0, 67142178, 0, 44114572, 0, 0, 0, 10227752, 0, 0, 0, 1065092, 0, 0, 0, 0, 0, 36442764, 0, 0, 0, 0, 0, 1860240, 0, 0, 0, 0, 0, 0, 35393685, 33282, 0, 32770, 43831349, 2097664, 0, 32770, 1929276, 0, 0, 35684896, 2, 33314, 2146304, 33312, 0, 32770, 1061123, 32768, 69632, 0, 32770, 0, 4099, 0, 0, 134052541, 33314, 2130722, 823905, 66974719, 2884129, 546, 32768, 73398527, 0, 524835, 36299647, 557603, 32, 50328319, 1082146, 0, 33587712, 1114915, 103580451, 18742014, 546, 2, 0, 77423531, 0, 0, 121470909, 2920995, 0, 0, 10285247, 2129920, 0, 0, 85750011, 0, 0, 33554976, 35684898, 547, 16644767, 37524259, 0, 32768, 1049345, 0, 1863896, 0, 0, 0, 1572865, 0, 0, 16612093, 2392610, 36999970, 44954479, 67105535, 2396706, 37540711, 2130466, 75233791, 2129952, 34361891, 33587746, 32802, 35684898, 16642813, 299522, 2097152, 2130466, 11662315, 44888867, 1962750, 2130466, 33538, 32, 8193, 514, 0, 18708696, 7919143, 3447594, 44602043, 6046229, 3703395, 36500387, 33314, 1855576, 32, 533027, 41466495, 34202215, 50331647, 73235197, 37597987, 2097152, 46004991, 37331755, 37589807, 1921212, 2130466, 1700663, 33554977, 533027, 33312, 0, 50295485, 294912, 32768, 2, 10342651, 0, 0, 35426851, 85850362, 0, 0, 35684898, 34, 32800, 33422008, 35951138, 0, 2130466, 36733731, 2654755, 1929436, 0, 2, 0, 262657, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33314, 0, 0, 0, 0, 0, 0, 117276669, 2920995, 2396962, 2921251, 67108863, 2658851, 35951138, 33587746, 75233535, 2097186, 528931, 34112051, 36213539, 2663011, 66977791, 2921251, 2097152, 35685154, 3261227, 111997935, 1667838, 33314, 295202, 0, 9040419, 0, 0, 50166493, 2129954, 2396962, 546, 67073023, 2130466, 2392064, 45412911, 90042622, 2097152, 36176419, 35684898, 2654755, 2130466, 12448511, 36475683, 2097152, 2129952, 45216623, 44954471, 881406, 32, 33314, 0, 553481, 0, 0, 66943709, 295458, 299266, 32768, 29225215, 2130434, 262146, 44954231, 73398527, 2, 0, 33587746, 32802, 544, 29227775, 299522, 0, 35684898, 3154729, 36475427, 1962622, 0, 295458, 0, 340481, 545, 0, 6050460, 4044607, 3443498, 34079411, 1855607, 4160, 2642851, 0, 73224250, 0, 32, 7994107, 2744935, 16776191, 3408384, 37589807, 0, 41810687, 37313327, 79557631, 8192, 544, 0, 2097185, 0, 67108896, 0, 4027037, 0, 0, 0, 52196019, 0, 0, 0, 73191678, 0, 0, 0, 0, 0, 45357576, 0, 0, 0, 0, 0, 4096, 512, 0, 0, 1, 0, 0, 56392412, 294946, 0, 266784, 1856054, 2097152, 0, 33314, 6123768, 0, 512, 524833, 2, 524833, 10809344, 0, 0, 33314, 66337, 33024, 0, 0, 0, 0, 1, 0, 0, 1056904, 0, 2396706, 0, 811032, 0, 0, 33282, 1634362, 0, 0, 0, 0, 0, 278528, 2396706, 0, 0, 0, 2392610, 262146, 0, 0, 0, 1, 0, 0, 8681480, 32802, 37154, 262658, 1839123, 2097152, 294914, 32768, 540704, 0, 0, 37410, 123430, 1081898, 2375824, 1622818, 0, 33282, 1057315, 800, 8, 0, 32802, 0, 0, 32, 0, 1851393, 0, 0, 0, 806931, 0, 0, 0, 86048, 0, 0, 544, 0, 0, 49152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 33559075]


--------------------------------------------------------------------------------
/wordgen.c:
--------------------------------------------------------------------------------
  1 | #include <ctype.h>
  2 | #include <err.h>
  3 | #include <errno.h>
  4 | #include <stdbool.h>
  5 | #include <stdint.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <sys/random.h>
  9 | #include <unistd.h>
 10 | 
 11 | const uint32_t trigrams[] = {
 12 |     0,         33518332,  35951138,  35951394,  44335650,  58685182,  2396706,
 13 |     35967778,  35684898,  1896600,   2130466,   53794,     35684898,  35684898,
 14 |     2130466,   33387102,  36475682,  2097152,   2130722,   45349674,  44335906,
 15 |     1929216,   2130466,   295714,    0,         2130466,   33314,     0,
 16 |     0,         36492855,  37657387,  49133567,  794641,    1086051,   2945699,
 17 |     557091,    6058136,   32770,     34079267,  41794303,  36565543,  108714943,
 18 |     0,         37589799,  2097152,   48234495,  37468971,  104657775, 18706589,
 19 |     33314,     547447,    33315,     8958583,   67142178,  0,         45963932,
 20 |     33591842,  32770,     2097664,   48133115,  0,         0,         32768,
 21 |     6150399,   2097184,   0,         35684898,  544,       32800,     62780055,
 22 |     0,         0,         2130466,   3179049,   299042,    68973276,  32,
 23 |     0,         0,         1638401,   0,         0,         8222941,   0,
 24 |     2396962,   0,         10318455,  0,         0,         45937527,  73265407,
 25 |     0,         46003063,  2130466,   0,         0,         117306367, 0,
 26 |     2097152,   35684898,  1,         2945635,   1962620,   0,         0,
 27 |     0,         1048589,   0,         0,         117273309, 299010,    2097154,
 28 |     33853986,  31323135,  2130434,   8736,      32800,     75362558,  2129920,
 29 |     0,         33587746,  33314,     34,        100528825, 262658,    0,
 30 |     35684898,  1094441,   256,       913454,    546,       33314,     0,
 31 |     541217,    0,         0,         83720413,  2396710,   37526314,  36491959,
 32 |     98401117,  37524067,  2945699,   2130466,   6060248,   2129954,   528929,
 33 |     41529983,  2748967,   75364347,  4026520,   4035363,   2097152,   50200575,
 34 |     45853499,  45912943,  6119440,   33314,     824119,    3244843,   565797,
 35 |     67109408,  0,         16546444,  0,         0,         0,         3961403,
 36 |     36475427,  0,         0,         85741822,  0,         0,         35684898,
 37 |     0,         0,         28103306,  0,         0,         2130466,   1,
 38 |     34079265,  68972728,  0,         0,         0,         513,       0,
 39 |     0,         113078941, 32768,     0,         0,         43840307,  0,
 40 |     33853986,  10015271,  73199806,  0,         0,         35684898,  34,
 41 |     565795,    8180407,   0,         0,         2130466,   1048833,   256,
 42 |     1896994,   0,         2,         0,         286725,    0,         0,
 43 |     117275389, 32770,     0,         32768,     31322747,  2097152,   0,
 44 |     32768,     73267455,  0,         0,         33587744,  34,        33312,
 45 |     16644799,  512,       0,         2130466,   1048577,   34100067,  1863900,
 46 |     0,         32802,     0,         1650705,   0,         0,         1929373,
 47 |     2888231,   37526315,  44618419,  16535769,  36737632,  11334563,  512,
 48 |     0,         0,         546,       41466623,  2748967,   66846719,  4026521,
 49 |     4043555,   2097152,   45988543,  4177919,   104657771, 8192,      4227618,
 50 |     512,       1049123,   0,         67142178,  0,         44114572,  0,
 51 |     0,         0,         10227752,  0,         0,         0,         1065092,
 52 |     0,         0,         0,         0,         0,         36442764,  0,
 53 |     0,         0,         0,         0,         1860240,   0,         0,
 54 |     0,         0,         0,         0,         35393685,  33282,     0,
 55 |     32770,     43831349,  2097664,   0,         32770,     1929276,   0,
 56 |     0,         35684896,  2,         33314,     2146304,   33312,     0,
 57 |     32770,     1061123,   32768,     69632,     0,         32770,     0,
 58 |     4099,      0,         0,         134052541, 33314,     2130722,   823905,
 59 |     66974719,  2884129,   546,       32768,     73398527,  0,         524835,
 60 |     36299647,  557603,    32,        50328319,  1082146,   0,         33587712,
 61 |     1114915,   103580451, 18742014,  546,       2,         0,         77423531,
 62 |     0,         0,         121470909, 2920995,   0,         0,         10285247,
 63 |     2129920,   0,         0,         85750011,  0,         0,         33554976,
 64 |     35684898,  547,       16644767,  37524259,  0,         32768,     1049345,
 65 |     0,         1863896,   0,         0,         0,         1572865,   0,
 66 |     0,         16612093,  2392610,   36999970,  44954479,  67105535,  2396706,
 67 |     37540711,  2130466,   75233791,  2129952,   34361891,  33587746,  32802,
 68 |     35684898,  16642813,  299522,    2097152,   2130466,   11662315,  44888867,
 69 |     1962750,   2130466,   33538,     32,        8193,      514,       0,
 70 |     18708696,  7919143,   3447594,   44602043,  6046229,   3703395,   36500387,
 71 |     33314,     1855576,   32,        533027,    41466495,  34202215,  50331647,
 72 |     73235197,  37597987,  2097152,   46004991,  37331755,  37589807,  1921212,
 73 |     2130466,   1700663,   33554977,  533027,    33312,     0,         50295485,
 74 |     294912,    32768,     2,         10342651,  0,         0,         35426851,
 75 |     85850362,  0,         0,         35684898,  34,        32800,     33422008,
 76 |     35951138,  0,         2130466,   36733731,  2654755,   1929436,   0,
 77 |     2,         0,         262657,    0,         0,         0,         0,
 78 |     0,         0,         0,         0,         0,         0,         0,
 79 |     0,         0,         0,         0,         0,         0,         0,
 80 |     0,         0,         0,         0,         33314,     0,         0,
 81 |     0,         0,         0,         0,         117276669, 2920995,   2396962,
 82 |     2921251,   67108863,  2658851,   35951138,  33587746,  75233535,  2097186,
 83 |     528931,    34112051,  36213539,  2663011,   66977791,  2921251,   2097152,
 84 |     35685154,  3261227,   111997935, 1667838,   33314,     295202,    0,
 85 |     9040419,   0,         0,         50166493,  2129954,   2396962,   546,
 86 |     67073023,  2130466,   2392064,   45412911,  90042622,  2097152,   36176419,
 87 |     35684898,  2654755,   2130466,   12448511,  36475683,  2097152,   2129952,
 88 |     45216623,  44954471,  881406,    32,        33314,     0,         553481,
 89 |     0,         0,         66943709,  295458,    299266,    32768,     29225215,
 90 |     2130434,   262146,    44954231,  73398527,  2,         0,         33587746,
 91 |     32802,     544,       29227775,  299522,    0,         35684898,  3154729,
 92 |     36475427,  1962622,   0,         295458,    0,         340481,    545,
 93 |     0,         6050460,   4044607,   3443498,   34079411,  1855607,   4160,
 94 |     2642851,   0,         73224250,  0,         32,        7994107,   2744935,
 95 |     16776191,  3408384,   37589807,  0,         41810687,  37313327,  79557631,
 96 |     8192,      544,       0,         2097185,   0,         67108896,  0,
 97 |     4027037,   0,         0,         0,         52196019,  0,         0,
 98 |     0,         73191678,  0,         0,         0,         0,         0,
 99 |     45357576,  0,         0,         0,         0,         0,         4096,
100 |     512,       0,         0,         1,         0,         0,         56392412,
101 |     294946,    0,         266784,    1856054,   2097152,   0,         33314,
102 |     6123768,   0,         512,       524833,    2,         524833,    10809344,
103 |     0,         0,         33314,     66337,     33024,     0,         0,
104 |     0,         0,         1,         0,         0,         1056904,   0,
105 |     2396706,   0,         811032,    0,         0,         33282,     1634362,
106 |     0,         0,         0,         0,         0,         278528,    2396706,
107 |     0,         0,         0,         2392610,   262146,    0,         0,
108 |     0,         1,         0,         0,         8681480,   32802,     37154,
109 |     262658,    1839123,   2097152,   294914,    32768,     540704,    0,
110 |     0,         37410,     123430,    1081898,   2375824,   1622818,   0,
111 |     33282,     1057315,   800,       8,         0,         32802,     0,
112 |     0,         32,        0,         1851393,   0,         0,         0,
113 |     806931,    0,         0,         0,         86048,     0,         0,
114 |     544,       0,         0,         49152,     0,         0,         0,
115 |     0,         0,         0,         0,         0,         0,         1,
116 |     33559075};
117 | 
118 | #define BIG_WORDS 16
119 | typedef struct {
120 |   uint32_t val[BIG_WORDS];
121 | } big;
122 | 
123 | void big_iadd(big *dst, big *src) {
124 |   int carry = 0;
125 |   for (int i = 0; i < BIG_WORDS; i++) {
126 |     uint64_t tmp =
127 |         (uint64_t)dst->val[i] + (uint64_t)src->val[i] + +(uint64_t)carry;
128 |     dst->val[i] = (uint32_t)tmp;
129 |     carry = (tmp >> 32) & 1;
130 |   }
131 |   if (carry) errx(5, "addition overflowed");
132 | }
133 | 
134 | void big_isub(big *dst, big *src) {
135 |   int borrow = 0;
136 |   for (int i = 0; i < BIG_WORDS; i++) {
137 |     int64_t tmp = (int64_t)dst->val[i] - (int64_t)src->val[i] - borrow;
138 |     if (tmp < 0) {
139 |       borrow = 1;
140 |       tmp += 1L << 32;
141 |     } else {
142 |       borrow = 0;
143 |     }
144 |     dst->val[i] = tmp;
145 |   }
146 |   if (borrow) errx(5, "subraction underflowed");
147 | }
148 | 
149 | bool big_lte(big *a, big *b) {
150 |   for (int i = BIG_WORDS - 1; i >= 0; i--) {
151 |     if (a->val[i] != b->val[i]) {
152 |       return a->val[i] <= b->val[i];
153 |     }
154 |   }
155 |   return true;  // equal
156 | }
157 | 
158 | void big_irand(big *dst, big *below) {
159 |   while (1) {
160 |     if (getrandom(dst, sizeof *dst, 0) < 0) err(6, "unable to get randomness");
161 |     for (int i = BIG_WORDS - 1; i >= 0; i--) {
162 |       if (below->val[i] == 0) {
163 |         dst->val[i] = 0;
164 |       } else {
165 |         dst->val[i] &= (1L << (32L - __builtin_clz(below->val[i]))) - 1;
166 |         break;
167 |       }
168 |     }
169 |     if (big_lte(dst, below)) return;
170 |   }
171 | }
172 | 
173 | int big_idiv(big *dst, int divisor) {
174 |   int remainder = 0;
175 |   for (int i = BIG_WORDS - 1; i >= 0; i--) {
176 |     int64_t tmp = (int64_t)dst->val[i] + ((int64_t)(remainder) << 32);
177 |     dst->val[i] = tmp / divisor;
178 |     remainder = tmp % divisor;
179 |   }
180 |   return remainder;
181 | }
182 | 
183 | void big_print(big a) {
184 |   int chunks[BIG_WORDS * 4];
185 |   int c = 0;
186 |   big zero = {};
187 |   while (!big_lte(&a, &zero)) chunks[c++] = big_idiv(&a, 1000);
188 |   for (int i = c - 1; i >= 0; i--)
189 |     printf("%0*d%s", i == c - 1 ? 1 : 3, chunks[i], i > 0 ? "," : "");
190 | }
191 | 
192 | big prepare_path_counts(big paths[][27 * 27], int length) {
193 |   for (int i = 0; i < 27 * 27; i++)
194 |     paths[length - 1][i] = (big){{trigrams[i] & 1}};
195 | 
196 |   for (int level = length - 2; level > 0; level--) {
197 |     for (int i = 27; i < 27 * 27; i++) {
198 |       big count = {};
199 |       for (int j = 1; j < 27; j++) {
200 |         if (trigrams[i] & (1 << j)) {
201 |           big_iadd(&count, &paths[level + 1][(i % 27) * 27 + j]);
202 |         }
203 |       }
204 |       paths[level][i] = count;
205 |     }
206 |   }
207 | 
208 |   big total_paths = {};
209 |   for (int i = 0; i < 27; i++) {
210 |     big count = {};
211 |     for (int j = 1; j < 27; j++) {
212 |       if (trigrams[i] & (1 << j)) {
213 |         big_iadd(&count, &paths[1][(i % 27) * 27 + j]);
214 |       }
215 |     }
216 |     paths[0][i] = count;
217 |     big_iadd(&total_paths, &count);
218 |   }
219 | 
220 |   return total_paths;
221 | }
222 | 
223 | void generate(char *out, int length, big paths[][27 * 27], big chosen) {
224 |   int grams[length];
225 |   for (int i = 0; i < 27 * 27; i++) {
226 |     if (big_lte(&paths[0][i], &chosen)) {
227 |       big_isub(&chosen, &paths[0][i]);
228 |     } else {
229 |       grams[0] = i;
230 |       break;
231 |     }
232 |   }
233 |   for (int level = 1; level < length; level++) {
234 |     int base = grams[level - 1] % 27 * 27;
235 |     for (int i = 0; i < 27; i++) {
236 |       if (trigrams[grams[level - 1]] & (1 << i)) {
237 |         int n = base + i;
238 |         if (big_lte(&paths[level][n], &chosen)) {
239 |           big_isub(&chosen, &paths[level][n]);
240 |         } else {
241 |           grams[level] = n;
242 |           break;
243 |         }
244 |       }
245 |     }
246 |   }
247 |   for (int i = 0; i < length; i++) {
248 |     out[i] = (grams[i] % 27) + '`';
249 |   }
250 |   out[length] = 0;
251 | }
252 | 
253 | void usage(char *name) {
254 |   errx(1, "usage: %s [-halv] [length] [count]\n", name);
255 | }
256 | 
257 | int main(int argc, char **argv) {
258 |   int c;
259 |   bool all = false;
260 |   bool columns = true;
261 |   bool verbose = false;
262 |   long length = 8;
263 |   long count = 0;
264 | 
265 |   while ((c = getopt(argc, argv, "halv")) != -1) {
266 |     switch (c) {
267 |       case 'h':
268 |         usage(argv[0]);
269 |         break;
270 |       case 'a':
271 |         all = true;
272 |         break;
273 |       case 'l':
274 |         columns = false;
275 |         break;
276 |       case 'v':
277 |         verbose = true;
278 |         break;
279 |       case '?':
280 |         return 1;
281 |     }
282 |   }
283 | 
284 |   char *tmp;
285 |   if (optind < argc) {
286 |     length = strtol(argv[optind], &tmp, 10);
287 |     if (errno || *tmp != 0) errx(2, "invalid length %s", argv[optind]);
288 |     optind++;
289 |   }
290 | 
291 |   if (length > BIG_WORDS * 9)
292 |     errx(3, "length %ld too large, max is %d", length, BIG_WORDS * 9);
293 |   if (length < 3) errx(3, "length %ld too small, min is 3", length);
294 | 
295 |   if (optind < argc) {
296 |     count = strtol(argv[optind], &tmp, 10);
297 |     if (errno || *tmp != 0) errx(2, "invalid count %s", argv[optind]);
298 |     optind++;
299 |   }
300 | 
301 |   if (argv[optind] != NULL) usage(argv[0]);
302 | 
303 |   big paths[length][27 * 27];
304 | 
305 |   big total = prepare_path_counts(paths, length);
306 |   int bits = 0;
307 |   for (int i = BIG_WORDS - 1; i >= 0; i--) {
308 |     if (total.val[i]) {
309 |       bits = 32 * i + (31 - __builtin_clz(total.val[i]));
310 |       break;
311 |     }
312 |   }
313 | 
314 |   int words_per_line = 1;
315 |   if (columns) {
316 |     words_per_line = 1 + (80 - length) / (2 + length);
317 |   }
318 |   if (count == 0) count = words_per_line * 8;
319 |   int words = 0;
320 |   char word[length + 1];
321 |   big chosen = {};
322 |   if (all) {
323 |     if (verbose) {
324 |       printf("Generating all ");
325 |       big_print(total);
326 |       printf(" %ld-letter words with %d bits of entropy\n", length, bits);
327 |     }
328 |     big one = {{1}};
329 |     while (1) {
330 |       generate(word, length, paths, chosen);
331 |       fputs(word, stdout);
332 |       if (++words == words_per_line) {
333 |         words = 0;
334 |         putchar('\n');
335 |       } else {
336 |         fputs("  ", stdout);
337 |       }
338 |       big_iadd(&chosen, &one);
339 |       if (big_lte(&total, &chosen)) break;
340 |     }
341 |   } else {
342 |     if (verbose) {
343 |       printf("Generating %ld %ld-letter words with %d bits of entropy (", count,
344 |              length, bits);
345 |       big_print(total);
346 |       printf(" possible)\n");
347 |     }
348 |     for (int i = 0; i < count; i++) {
349 |       big_irand(&chosen, &total);
350 |       generate(word, length, paths, chosen);
351 |       fputs(word, stdout);
352 |       if (++words == words_per_line) {
353 |         words = 0;
354 |         putchar('\n');
355 |       } else if (i + 1 < count) {
356 |         fputs("  ", stdout);
357 |       }
358 |     }
359 |   }
360 |   if (words) putchar('\n');
361 | }
362 | 


--------------------------------------------------------------------------------
/wordgen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import argparse
  3 | import collections
  4 | import itertools
  5 | import json
  6 | import math
  7 | import re
  8 | import secrets
  9 | import string
 10 | import shutil
 11 | 
 12 | N = 3
 13 | V = None
 14 | 
 15 | ROMAN_RE = re.compile('^(?=[MDCLXVI])M*(C[MD]|D?C*)(X[CL]|L?X*)(I[XV]|V?I*)$', re.I)
 16 | 
 17 | 
 18 | def build_ngrams():
 19 |     counts = collections.defaultdict(int)
 20 |     for line in open('/usr/share/dict/words'):
 21 |         word = line.strip()
 22 |         if not re.match(r'^[a-z]{3,}$', word):
 23 |             continue
 24 |         word = line.strip()
 25 |         counts[' ' + word[:N-1]] += 1
 26 |         for n in range(len(word) - N + 2):
 27 |             counts[word[n:n+N].ljust(N)] += 1
 28 | 
 29 |     allowed = set()
 30 |     for k, v in counts.items():
 31 |         if v > 7 and k.strip().isalpha():
 32 |             allowed.add(k)
 33 | 
 34 |     print(' '.join(x.replace(' ', '`') for x in sorted(allowed)))
 35 |     print(len(allowed), 26 ** N + (N-1) * 26 ** 2)
 36 |     assert 'fbt' not in counts, counts['fbt']
 37 | 
 38 |     if 0:
 39 |         length = 10
 40 |         trials = 1 << 18
 41 |         valid = 0
 42 |         for _ in range(trials):
 43 |             word = ''.join(secrets.choice(string.ascii_lowercase) for _ in range(length))
 44 |             if ' ' + word[:N-1] in allowed and all(word[n:n+N].ljust(N) in allowed for n in range(len(word) - N + 2)):
 45 |                 valid += 1
 46 |                 print(word)
 47 |         print(valid, trials)
 48 |         est = int(valid/trials * 26 ** length)
 49 |         print(est, math.log2(est))
 50 |     if 0:
 51 |         global V
 52 |         V = []
 53 |         valid = 0
 54 |         att = 0
 55 |         for word in itertools.product(string.ascii_lowercase, repeat=6):
 56 |             word = ''.join(word)
 57 |             att += 1
 58 |             if ' ' + word[:N-1] in allowed and all(word[n:n+N].ljust(N) in allowed for n in range(len(word) - N + 2)):
 59 |                 valid += 1
 60 |                 V.append(word)
 61 |                 if valid % 65536 == 0:
 62 |                     print(word)
 63 |         print('V:', valid, att)
 64 | 
 65 |     out = [0] * 27 ** 2
 66 |     assert 96 == ord('`')
 67 |     for w in allowed:
 68 |         w = w.replace(' ', '`').encode('ascii')
 69 |         out[(w[0] - 96) * 27 + w[1] - 96] |= 1 << (w[2] - 96)
 70 | 
 71 |     return out
 72 | 
 73 | 
 74 | class WordGenerator(object):
 75 |     def __init__(self, allowed, n_words=None):
 76 |         self.allowed = allowed
 77 |         self.path_counts = []
 78 |         self.total_paths = 0
 79 | 
 80 |     def _prepare_path_counts(self, length):
 81 |         if len(self.path_counts) != length:
 82 |             path_counts = [[0] * 27 ** 2 for _ in range(length)]
 83 | 
 84 |             suff = []
 85 |             for n in range(27 ** 2):
 86 |                 if self.allowed[n] & 1:
 87 |                     path_counts[length - 1][n] = 1
 88 |                     suff.append('%s%s' % (chr(96 + (n // 27)), chr(96 + (n % 27))))
 89 | 
 90 |             for level in range(length - 2, 0, -1):
 91 |                 for n in range(27, 27 ** 2):
 92 |                     count = 0
 93 |                     for x in range(1, 27):
 94 |                         if self.allowed[n] & (1 << x):
 95 |                             count += path_counts[level + 1][(n % 27) * 27 + x]
 96 |                     path_counts[level][n] = count
 97 |             for n in range(27):
 98 |                 count = 0
 99 |                 for x in range(1, 27):
100 |                     if self.allowed[n] & (1 << x):
101 |                         count += path_counts[1][(n % 27) * 27 + x]
102 |                 path_counts[0][n] = count
103 |             self.path_counts = path_counts
104 |             self.total_paths = sum(path_counts[0])
105 | 
106 |         return self.path_counts
107 | 
108 |     def n_bits(self, length):
109 |         self._prepare_path_counts(length)
110 |         return math.log2(self.total_paths)
111 | 
112 |     def generate(self, length, chosen_path=None):
113 |         ''' generate a random Englishy word '''
114 |         # pick a word at random
115 |         # or, pick a path through a DAG uniformly from all paths possible
116 | 
117 |         path_counts = self._prepare_path_counts(length)
118 | 
119 |         # 2) pick a path to follow
120 |         if chosen_path is None:
121 |             chosen_path = secrets.randbelow(self.total_paths)
122 |             #print('%.2f bits of entropy' % math.log(self.total_paths, 2), end=' ')
123 |             #print("chose %d/%d" % (chosen_path, self.total_paths))
124 |         if not 0 <= chosen_path < self.total_paths:
125 |             raise ValueError('chosen path %d not in [0,%d)' % (chosen_path, self.total_paths))
126 | 
127 |         # 3) working forwards, pick the character that contributed our chosen_path
128 |         path = chosen_path
129 |         grams = []
130 |         for level in range(length):
131 |             if level == 0:
132 |                 adj = range(len(path_counts[0]))  # first level: (space, char, char)
133 |             else:
134 |                 base = grams[-1] % 27 * 27
135 |                 adj = (base + x for x in range(27) if self.allowed[grams[-1]] & (1 << x))
136 |             for n in adj:
137 |                 #print(grams, n, path_counts[level][n], path, self.adjacency_lists[n])
138 |                 if path_counts[level][n] <= path:
139 |                     path -= path_counts[level][n]
140 |                 else:
141 |                     grams.append(n)
142 |                     break
143 |             else:
144 |                 print("couldn't find a successor :(", grams, level)
145 |         assert len(grams) == length, chosen_path
146 |         return ''.join(chr(c % 27 + 96) for c in grams)
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     parser = argparse.ArgumentParser()
151 |     parser.add_argument('--digest', action='store_true', help='build trigrams from data file')
152 |     parser.add_argument('-a', '--all', action='store_true', help='emit all possibilities')
153 |     parser.add_argument('-l', '--line', action='store_true', help='one output per line')
154 |     parser.add_argument('length', default=9, type=int, nargs='?')
155 |     parser.add_argument('count', default=64, type=int, nargs='?')
156 |     options = parser.parse_args()
157 | 
158 |     if options.digest:
159 |         allowed = build_ngrams()
160 |         with open('trigrams.json', 'w') as f:
161 |             json.dump(allowed, f)
162 | 
163 |     gen = WordGenerator(json.load(open('trigrams.json')))
164 | 
165 |     if options.digest and V:
166 |         for n, v in enumerate(V):
167 |             assert gen.generate(len(v), n) == v, (n, v)
168 | 
169 |     cols = shutil.get_terminal_size((80, 0)).columns
170 |     if options.line:
171 |         cols = 0
172 | 
173 |     if options.all:
174 |         print('Generating all {2:,} {0}-letter words with {1:.1f} bits of entropy'.format(
175 |             options.length, gen.n_bits(options.length), gen.total_paths))
176 |     else:
177 |         print('Generating {} {}-letter words with {:.1f} bits of entropy ({:,} possible)'.format(
178 |             options.count, options.length, gen.n_bits(options.length), gen.total_paths))
179 | 
180 |     words = (gen.generate(options.length) for _ in range(options.count))
181 |     if options.all:
182 |         words = (gen.generate(options.length, n) for n in range(gen.total_paths))
183 | 
184 |     if cols > (2 + options.length * 2):
185 |         words = list(words)
186 |         cols = 1 + (cols - options.length) // (2 + options.length)
187 |         for x in range(0, len(words), cols):
188 |             print('  '.join(words[x:x+cols]))
189 |         if len(words) % cols:
190 |             print('  '.join(words[x+cols:]))
191 |     else:
192 |         for w in words:
193 |             print(w)
194 | 


--------------------------------------------------------------------------------