├── .gitignore
├── requirements.txt
├── phrase_test.py
├── explore.py
├── .github
    └── workflows
    │   └── ci.yml
├── recover_algo_word.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | __pycache__
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | py-algorand-sdk==1.7.0
2 | pytest==6.2.4
3 | 


--------------------------------------------------------------------------------
/phrase_test.py:
--------------------------------------------------------------------------------
1 | 
2 | import recover_algo_word as raw
3 | 
4 | 
5 | def test_bip39_choices():
6 |     assert raw.bip39_choices("stop") == []
7 | 


--------------------------------------------------------------------------------
/explore.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | import json
 6 | from urllib.request import urlopen, Request
 7 | 
 8 | 
 9 | url = "https://algoexplorerapi.io/v1/account/"
10 | headers = {"accept": "application/json", "user-agent": "please"}
11 | 
12 | 
13 | def algos(addr):
14 |     with urlopen(Request(url + addr,  headers=headers)) as resp:
15 |         j = json.loads(resp.read().decode("utf-8"))
16 |         return j["amount"]
17 | 
18 | 
19 | def active(addr):
20 |     return algos(addr) > 0
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     for addr in sys.argv[1:]:
25 |         print(active(addr))
26 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Run Python Tests
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - name: Install Python 3
16 |         uses: actions/setup-python@v1
17 |         with:
18 |           python-version: 3.6
19 |       - name: Install dependencies
20 |         run: |
21 |           python -m pip install --upgrade pip
22 |           pip install -r requirements.txt
23 |       - name: Run tests with pytest
24 |         run: pytest
25 | 


--------------------------------------------------------------------------------
/recover_algo_word.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import difflib
  4 | import math
  5 | import sys
  6 | 
  7 | from algosdk.wordlist import word_list_raw
  8 | import algosdk.mnemonic as mnemonic
  9 | import algosdk.account as account
 10 | 
 11 | known = """
 12 | sugar police obvious access unit blur
 13 | situate brown home useful manual coffee
 14 | erase pipe deputy panic make radar
 15 | scrap print glide abstract kind absorb
 16 | matrix
 17 | """
 18 | 
 19 | bip39 = word_list_raw().split()
 20 | 
 21 | 
 22 | reported = {}
 23 | 
 24 | 
 25 | def bip39_choices(pattern):
 26 |     if pattern in mnemonic.word_to_index:
 27 |         return [pattern]
 28 |     comma = pattern.find(',')
 29 |     if comma >= 0:
 30 |         return bip39_choices(pattern[:comma])+bip39_choices(pattern[comma+1:])
 31 | 
 32 |     underscore = pattern.find('_')
 33 |     if underscore >= 0:
 34 |         if underscore > 3:
 35 |             print(f"Useless _ in '{pattern}' " +
 36 |                   "bip39 words are unique in the first four characters.")
 37 |         prefix = pattern[:underscore]
 38 |         return [w for w in bip39 if w.startswith(prefix)]
 39 | 
 40 |     if pattern.endswith("~"):
 41 |         return difflib.get_close_matches(pattern[:-1], bip39, 6, .6)
 42 | 
 43 |     if pattern not in reported:
 44 |         print(f"{pattern} is not a bip39 word.")
 45 | 
 46 |     if len(pattern) > 4 and pattern[:4] in mnemonic.word_to_index:
 47 |         word = mnemonic.index_to_word[mnemonic.word_to_index[pattern[:4]]]
 48 |         if pattern not in reported:
 49 |             print(f"Using {word} for {pattern}.")
 50 |         reported[pattern] = 1
 51 |         return [word]
 52 | 
 53 |     matches = difflib.get_close_matches(pattern, bip39, 6, .6)
 54 |     if matches:
 55 |         print(f"Consider '{','.join(matches)}' or equivalently '{pattern}~'.")
 56 | 
 57 |     return []
 58 | 
 59 | 
 60 | def chk25(words):
 61 |     check = mnemonic.word_to_index[words[-1]]
 62 |     m_indexes = [mnemonic.word_to_index[w] for w in words[:-1]]
 63 |     m_bytes = mnemonic._to_bytes(m_indexes)
 64 |     if not m_bytes[-1:] == b'\x00':
 65 |         return False
 66 |     return check == mnemonic._checksum(m_bytes[:32])
 67 | 
 68 | 
 69 | def candidates(options):
 70 |     if not options:
 71 |         yield []
 72 |         return
 73 | 
 74 |     head = options[0]
 75 |     for candidate in candidates(options[1:]):
 76 |         for h in head:
 77 |             yield [h, *candidate]
 78 | 
 79 | 
 80 | def has_algos(addr):
 81 |     import explore
 82 |     explore.active(addr)
 83 | 
 84 | 
 85 | found = []
 86 | 
 87 | 
 88 | def print_candidate(candidate, prefix):
 89 |     phrase = " ".join([mnemonic.index_to_word[mnemonic.word_to_index[c]]
 90 |                        for c in candidate])
 91 |     sk = mnemonic.to_private_key(phrase)
 92 |     address = account.address_from_private_key(sk)
 93 |     if address.startswith(prefix):
 94 |         if args.explore and not has_algos(address):
 95 |             return
 96 |         print(address, phrase)
 97 |         found.append([address, phrase])
 98 | 
 99 | 
100 | def check_choices(choices):
101 |     found = 0
102 |     for c in candidates(choices):
103 |         if chk25(c):
104 |             found += 1
105 |             print_candidate(c, args.address.upper())
106 |     return found
107 | 
108 | 
109 | def count_choices(choices):
110 |     return math.prod([len(c) for c in choices])
111 | 
112 | 
113 | def index_pairs(top):
114 |     for lo in range(top-1):
115 |         for hi in range(lo+1, top):
116 |             yield (lo, hi)
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     parser = argparse.ArgumentParser(
121 |         description='Recover Algorand mnemonics when some is missing or wrong.')
122 |     parser.add_argument('words', metavar='N', nargs='+',
123 |                         help='sequence of of words in account mnemonic')
124 |     parser.add_argument('--address', default='',
125 |                         help='the account being recovered (prefix), if known')
126 |     parser.add_argument('--explore', action='store_true',
127 |                         help='use algoexplorer API to filter inactive accounts')
128 | 
129 |     args = parser.parse_args()
130 |     words = [w.lower() for w in args.words]
131 | 
132 |     choices = [bip39_choices(w.lower()) for w in words]
133 |     count = count_choices(choices)
134 | 
135 |     if len(words) == 25:
136 |         if count == 1:          # 25 words given, no wildcarding
137 |             if check_choices(choices) == 0:
138 |                 print("Bad checksum. Finding similar mnemonics")
139 |                 print(f" Trying swaps of all pairs. {25*24} possibilities")
140 |                 # Maybe this should be a switch that affects all
141 |                 # check_choices calls.  That would change all our
142 |                 # reporting about possibility count, but it would be
143 |                 # cool to always handle swaps.
144 |                 for lo, hi in index_pairs(25):
145 |                     choices[hi], choices[lo] = choices[lo], choices[hi]
146 |                     check_choices(choices)
147 |                     choices[hi], choices[lo] = choices[lo], choices[hi]
148 |                 if len(found) > 0:  # Add a switch to keep going?
149 |                     sys.exit(0)
150 |                 print(f" Trying to replace each word. {25*2048} possibilities")
151 |                 for i in range(25):
152 |                     wild = choices[:i] + [bip39] + choices[i+1:]
153 |                     check_choices(wild)
154 |         elif count > 1:
155 |             print(f"Trying {count} possibilities")
156 |             check_choices(choices)
157 | 
158 |     if len(words) == 24:        # Missing one word. Insert _ in each slot
159 |         if count > 0:
160 |             print(f"Trying {25*2048*count} possibilities")
161 |             for i in range(25):
162 |                 wild = choices[:i] + [bip39] + choices[i:]
163 |                 check_choices(wild)
164 | 
165 |     if len(words) == 23:
166 |         # This is at least 600 * 4M = 2.5B possibilities (more if any
167 |         # words have wildcards).  Utterly hopeless without an
168 |         # --address to winnow them down, and will take days anyway.
169 |         if count > 0:
170 |             print(f"Trying {24*25*2048*2048*count} possibilities")
171 |             for lo, hi in index_pairs(25):
172 |                 wild = choices[:lo] + [bip39] + choices[lo:hi] + [bip39] + choices[hi:]
173 |                 check_choices(wild)
174 | 
175 |     if 1 < len(words) <= 22:
176 |         print("No. I can't work miracles. " +
177 |               "Finding >= 3 words is only possible if _ indicates their positions.")
178 | 
179 |     if len(words) == 1:
180 |         # Useful for debugging a pattern
181 |         print(str(choices[0]))
182 |     elif count == 0:
183 |         print("Unable to find candidates to check.")
184 | 
185 |     if len(found) > 1 and not args.address:
186 |         print("Multiple possibilities. Narrow possibilities with --address")
187 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Purpose
  2 | 
  3 | This is a utility to help out when you've misplaced _a little bit_ of
  4 | your Algorand account mnemonic.
  5 | 
  6 | Algorand uses a 25 word, BIP39 account mnemonic.  This confuses some
  7 | users, since they may be used to 24 word mnemonics used by other
  8 | projects.  Never fear! The 25th word is a checksum word, it's derived
  9 | entirely from the first 24 words. If you fail to record it, this
 10 | utility will figure it out for you.
 11 | 
 12 | And it can do more. Suppose you wrote down 24 words, not because you
 13 | thought the 25th word was some sort of junk, but just because you
 14 | skipped one.  Then since you were used to 24 word projects, you didn't
 15 | notice that you had too few words.  Now, it's unclear which word is
 16 | missing, so the exact mnemonic can't be reconstructed. But it is
 17 | possible to figure out about 25 different possibilities by assuming
 18 | you dropped the word from each possible spot, and reconstructing what
 19 | would need to be there to make the checksum work.  Now you can try
 20 | them all until you find the right one.
 21 | 
 22 | Trying 25 mnemonics is a pain in the butt!  If you recall the address
 23 | you're recovering, or even just the start of it, you can supply it as
 24 | `--address AF32...` If you do, then the candidate mnemonics will be
 25 | filtered to only those that start with the given prefix, which is
 26 | likely to winnow things down quickly.
 27 | 
 28 | But you don't remember the address! While not implemented yet, the
 29 | next trick is to hit the actual blockchain to see if the address in
 30 | question has any Algos.  After all, you probably wouldn't be so
 31 | interested in recovery if you had nothing in the account.
 32 | 
 33 | # Usage
 34 | 
 35 | py-algorand-sdk is the only module you need install.  Use a
 36 | virtualenv, or install it globally as you see fit. Then try:
 37 | 
 38 | ```
 39 | ./recover-algo-word.py sugar police obvious access unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
 40 | ```
 41 | 
 42 | There's nothing to recover there, since you've supplied a 25 word
 43 | mnemonic, and the checksum works. The associated address is printed
 44 | along with the mnemonic.
 45 | 
 46 | ## You're missing a word
 47 | 
 48 | Now, try again, specifying that you forgot to record the final word by
 49 | using an underscore in its place.
 50 | 
 51 | ```
 52 | ./recover-algo-word.py sugar police obvious access unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb _
 53 | ```
 54 | 
 55 | This time 2048 options are considered - one for each possible bip39
 56 | word. Only one is the proper checksum, so the same final output is
 57 | obtained after a tiny delay.
 58 | 
 59 | Suppose you didn't know which word you skipped when you recorded you
 60 | mnemonic.  Let's try without `unit` from the original. But we don't
 61 | replace `unit` with `_` because we are pretending we didn't know which
 62 | spot we forgot.
 63 | 
 64 | ```
 65 | ./recover-algo-word.py sugar police obvious access blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
 66 | ```
 67 | 
 68 | Since you only gave 24 words and no indication of where the 25th
 69 | should go, 51,200 possibilities must be considered (2048 possibilitues
 70 | in each of 25 locations).  That still doesn't take long, but an
 71 | annoyance is that 20 valid mnemonics are found. Usually, I'd expect
 72 | closer to 25 in this situation, but there are some constraints that
 73 | make it impossible to find a mnemonic for each possible missing
 74 | spot. The address for each mnemonic is printed, and perhaps that will
 75 | jog your memory.  If you had recalled your mnemonic started with G,
 76 | you might have given `--address G` as an extra command line argument,
 77 | which would have narrowed the field to just two possibilities. You
 78 | could try to recover them in your wallet, or look them up in
 79 | [AlgoExporer](https://algoexplorer.io/) to figure out which holds your
 80 | account.
 81 | 
 82 | ## You have one word wrong
 83 | 
 84 | If you had tried to supply all 25 words, but one was wrong, the
 85 | checksum would (usually) fail.  When that happens, the script performs
 86 | 25 wildcard searches, replacing each word with _ in turn.  In effect,
 87 | it assumes that one of your words was mistyped, and tries to find all
 88 | the possible mnemonics that would work with the rest, in the given
 89 | order.
 90 | 
 91 | Let's try our first example with `dolphin` as the fourth word.
 92 | 
 93 | ```
 94 | ./recover-algo-word.py sugar police obvious dolphin unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
 95 | ```
 96 | 
 97 | Searching 51,200 candidates, about 30 mnemonics are found that meet the
 98 | checksum.  Now would be a good time to use `--address`.  Sometimes
 99 | you'll get lucky, and far fewer candidates pass the checksum.
100 | 
101 | # Obscure uses
102 | 
103 | Those are the most likely cases, but you can do more.
104 | 
105 | ## Underscore as a prefix wildcard
106 | 
107 | Using an _ in place of a word indicates that a full 2048 word search
108 | in that position must be done.  If you just have sloppy handwriting
109 | and know the word starts with certain letter(s), xy_ will limit the
110 | search to bip39 words that begin with xy.
111 | 
112 | For example, if you only remember that you first two words started
113 | with s and p:
114 | 
115 | ```
116 | ./recover-algo-word.py s_ p_ obvious access unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
117 | ```
118 | 
119 | checks 33,000 mnemonics and finds 16 possibilities. `--address` or
120 | AlgoExporer could narrow things down further.  By the way, if you also
121 | forgot the third word, and used o_, you'd be searching 1.8M choices
122 | which is much slower, but doable.  Any more and you're going to be
123 | waiting a while.
124 | 
125 | ## Comma, to try multiple choices
126 | 
127 | If, on the other hand, you don't have a prefix, but somehow think you
128 | know that a particular spot is one of a few words, you can separate
129 | them with commas, and it will try each, along with whatever other
130 | wildcarding you're doing.
131 | 
132 | ```
133 | ./recover-algo-word.py s_ police,favorite obvious access unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
134 | ```
135 | 
136 | will try 500 combos - 250 from the `s_` doubled by trying `police` and
137 | `favorite` as the second word.
138 | 
139 | 
140 | The comma doesn't seem very useful on its own - why would you know the
141 | word is one from a small list?  The functionality exists so that when
142 | a non-bip39 word is noticed, similar words can be found from the bip39
143 | list and searched that same way. However, since bip39 is unique in the
144 | first four characters, if a non-bip39 word is supplied that
145 | nonetheless matches the first four characters of a bip39 word, a
146 | warning is printed but the typo is assumed after character 4 and the
147 | indicated bip39 word is used.
148 | 
149 | ```
150 | ./recover-algo-word.py sugary police obvious access unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
151 | ```
152 | 
153 | works fine, substituting `sugar` for `sugary`.  Because of this, you
154 | can stop typing all of your words at the fourth character. You'll get no
155 | warnings, as shortened words are assumed to be intentional:
156 | 
157 | ```
158 | ./recover-algo-word.py suga poli obvi acce unit blur situ brow home usef manu coff eras pipe depu pani make rada scra prin glid abst kind abso matr
159 | ```
160 | 
161 | ## Tilde for fuzzy matching
162 | 
163 | If you wrote your mnemonic down, but now you doubt your ability to
164 | read your own handwriting, maybe you can tell which words are
165 | especially poorly written.  In that case, end them with `~` and they
166 | will be expanded to a set of similar words from the bip39 list.
167 | 
168 | Swap `aces~` for `access` and you'll be fine.
169 | 
170 | ```
171 | ./recover-algo-word.py stupor~ police obvious aces~ unit blur situate brown home useful manual coffee erase pipe deputy panic make radar scrap print glide abstract kind absorb matrix
172 | ```
173 | 
174 | In fact, if you had tried `aces`, it would be reported as a non bip39
175 | word, and suggestions for replacement would have been shown.
176 | 
177 | ## Debug your searches
178 | 
179 | If you supply only one word, it will be expanded according to the
180 | rules above and printed.
181 | 
182 | ```
183 | ./recover-algo-word.py bl_
184 | ./recover-algo-word.py aces~,ble_
185 | ./recover-algo-word.py wrong
186 | ```
187 | 


--------------------------------------------------------------------------------