├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── blockchain_parser ├── __init__.py ├── address.py ├── block.py ├── block_header.py ├── blockchain.py ├── index.py ├── input.py ├── output.py ├── script.py ├── tests │ ├── __init__.py │ ├── bip69_false.txt │ ├── bip69_true.txt │ ├── segwit.txt │ ├── test_address.py │ ├── test_block.py │ ├── test_index.py │ ├── test_output.py │ ├── test_script.py │ ├── test_transaction.py │ └── test_utils.py ├── transaction.py └── utils.py ├── examples ├── non-standard-outputs.py ├── ordered-blocks.py └── texts-in-coinbases.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests.sh ├── tox.ini └── travis.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | # PyCharm 61 | .idea/ 62 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: 4 | - python 5 | 6 | python: 7 | - "3.3" 8 | - "3.4" 9 | - "3.5" 10 | - "3.6" 11 | 12 | before_install: 13 | - ./travis.sh 14 | 15 | install: 16 | - pip install -r requirements.txt 17 | - pip install coveralls 18 | 19 | script: 20 | - coverage run --append --include='blockchain_parser/*' --omit='*/tests/*' setup.py test 21 | 22 | after_success: 23 | - if [[ $TRAVIS_PYTHON_VERSION == '3.6' ]]; then 24 | coveralls; 25 | fi 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | bitcoin-blockchain-parser is free software: you can redistribute it and/or 2 | modify it under the terms of the GNU Lesser General Public License as 3 | published by the Free Software Foundation, either version 3 of the License, 4 | or (at your option) any later version. 5 | 6 | bitcoin-blockchain-parser is distributed in the hope that it will be useful, but 7 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 8 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 9 | below for more details. 10 | 11 | 12 | 13 | GNU LESSER GENERAL PUBLIC LICENSE 14 | Version 3, 29 June 2007 15 | 16 | Copyright (C) 2007 Free Software Foundation, Inc. 17 | Everyone is permitted to copy and distribute verbatim copies 18 | of this license document, but changing it is not allowed. 19 | 20 | 21 | This version of the GNU Lesser General Public License incorporates 22 | the terms and conditions of version 3 of the GNU General Public 23 | License, supplemented by the additional permissions listed below. 24 | 25 | 0. Additional Definitions. 26 | 27 | As used herein, "this License" refers to version 3 of the GNU Lesser 28 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 29 | General Public License. 30 | 31 | "The Library" refers to a covered work governed by this License, 32 | other than an Application or a Combined Work as defined below. 33 | 34 | An "Application" is any work that makes use of an interface provided 35 | by the Library, but which is not otherwise based on the Library. 36 | Defining a subclass of a class defined by the Library is deemed a mode 37 | of using an interface provided by the Library. 38 | 39 | A "Combined Work" is a work produced by combining or linking an 40 | Application with the Library. The particular version of the Library 41 | with which the Combined Work was made is also called the "Linked 42 | Version". 43 | 44 | The "Minimal Corresponding Source" for a Combined Work means the 45 | Corresponding Source for the Combined Work, excluding any source code 46 | for portions of the Combined Work that, considered in isolation, are 47 | based on the Application, and not on the Linked Version. 48 | 49 | The "Corresponding Application Code" for a Combined Work means the 50 | object code and/or source code for the Application, including any data 51 | and utility programs needed for reproducing the Combined Work from the 52 | Application, but excluding the System Libraries of the Combined Work. 53 | 54 | 1. Exception to Section 3 of the GNU GPL. 55 | 56 | You may convey a covered work under sections 3 and 4 of this License 57 | without being bound by section 3 of the GNU GPL. 58 | 59 | 2. Conveying Modified Versions. 60 | 61 | If you modify a copy of the Library, and, in your modifications, a 62 | facility refers to a function or data to be supplied by an Application 63 | that uses the facility (other than as an argument passed when the 64 | facility is invoked), then you may convey a copy of the modified 65 | version: 66 | 67 | a) under this License, provided that you make a good faith effort to 68 | ensure that, in the event an Application does not supply the 69 | function or data, the facility still operates, and performs 70 | whatever part of its purpose remains meaningful, or 71 | 72 | b) under the GNU GPL, with none of the additional permissions of 73 | this License applicable to that copy. 74 | 75 | 3. Object Code Incorporating Material from Library Header Files. 76 | 77 | The object code form of an Application may incorporate material from 78 | a header file that is part of the Library. You may convey such object 79 | code under terms of your choice, provided that, if the incorporated 80 | material is not limited to numerical parameters, data structure 81 | layouts and accessors, or small macros, inline functions and templates 82 | (ten or fewer lines in length), you do both of the following: 83 | 84 | a) Give prominent notice with each copy of the object code that the 85 | Library is used in it and that the Library and its use are 86 | covered by this License. 87 | 88 | b) Accompany the object code with a copy of the GNU GPL and this license 89 | document. 90 | 91 | 4. Combined Works. 92 | 93 | You may convey a Combined Work under terms of your choice that, 94 | taken together, effectively do not restrict modification of the 95 | portions of the Library contained in the Combined Work and reverse 96 | engineering for debugging such modifications, if you also do each of 97 | the following: 98 | 99 | a) Give prominent notice with each copy of the Combined Work that 100 | the Library is used in it and that the Library and its use are 101 | covered by this License. 102 | 103 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 104 | document. 105 | 106 | c) For a Combined Work that displays copyright notices during 107 | execution, include the copyright notice for the Library among 108 | these notices, as well as a reference directing the user to the 109 | copies of the GNU GPL and this license document. 110 | 111 | d) Do one of the following: 112 | 113 | 0) Convey the Minimal Corresponding Source under the terms of this 114 | License, and the Corresponding Application Code in a form 115 | suitable for, and under terms that permit, the user to 116 | recombine or relink the Application with a modified version of 117 | the Linked Version to produce a modified Combined Work, in the 118 | manner specified by section 6 of the GNU GPL for conveying 119 | Corresponding Source. 120 | 121 | 1) Use a suitable shared library mechanism for linking with the 122 | Library. A suitable mechanism is one that (a) uses at run time 123 | a copy of the Library already present on the user's computer 124 | system, and (b) will operate properly with a modified version 125 | of the Library that is interface-compatible with the Linked 126 | Version. 127 | 128 | e) Provide Installation Information, but only if you would otherwise 129 | be required to provide such information under section 6 of the 130 | GNU GPL, and only to the extent that such information is 131 | necessary to install and execute a modified version of the 132 | Combined Work produced by recombining or relinking the 133 | Application with a modified version of the Linked Version. (If 134 | you use option 4d0, the Installation Information must accompany 135 | the Minimal Corresponding Source and Corresponding Application 136 | Code. If you use option 4d1, you must provide the Installation 137 | Information in the manner specified by section 6 of the GNU GPL 138 | for conveying Corresponding Source.) 139 | 140 | 5. Combined Libraries. 141 | 142 | You may place library facilities that are a work based on the 143 | Library side by side in a single library together with other library 144 | facilities that are not Applications and are not covered by this 145 | License, and convey such a combined library under terms of your 146 | choice, if you do both of the following: 147 | 148 | a) Accompany the combined library with a copy of the same work based 149 | on the Library, uncombined with any other library facilities, 150 | conveyed under the terms of this License. 151 | 152 | b) Give prominent notice with the combined library that part of it 153 | is a work based on the Library, and explaining where to find the 154 | accompanying uncombined form of the same work. 155 | 156 | 6. Revised Versions of the GNU Lesser General Public License. 157 | 158 | The Free Software Foundation may publish revised and/or new versions 159 | of the GNU Lesser General Public License from time to time. Such new 160 | versions will be similar in spirit to the present version, but may 161 | differ in detail to address new problems or concerns. 162 | 163 | Each version is given a distinguishing version number. If the 164 | Library as you received it specifies that a certain numbered version 165 | of the GNU Lesser General Public License "or any later version" 166 | applies to it, you have the option of following the terms and 167 | conditions either of that published version or of any later version 168 | published by the Free Software Foundation. If the Library as you 169 | received it does not specify a version number of the GNU Lesser 170 | General Public License, you may choose any version of the GNU Lesser 171 | General Public License ever published by the Free Software Foundation. 172 | 173 | If the Library as you received it specifies that a proxy can decide 174 | whether future versions of the GNU Lesser General Public License shall 175 | apply, that proxy's public statement of acceptance of any version is 176 | permanent authorization for you to choose that version for the 177 | Library. 178 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bitcoin-blockchain-parser [![Build Status](https://travis-ci.org/alecalve/python-bitcoin-blockchain-parser.svg?branch=master)] 2 | 3 | ------------------------- 4 | ### Run Google Colab 5 | 6 | https://colab.research.google.com/drive/1OShIMVcFZ_khsUIBOIV1lzrqAGo1gfm_?usp=sharing 7 | 8 | ------------------------- 9 | 10 | (https://travis-ci.org/alecalve/python-bitcoin-blockchain-parser) [![Coverage Status](https://coveralls.io/repos/alecalve/python-bitcoin-blockchain-parser/badge.svg?branch=master&service=github)](https://coveralls.io/github/alecalve/python-bitcoin-blockchain-parser?branch=master) 11 | This Python 3 library provides a parser for the raw data stored by bitcoind. 12 | 13 | ## Features 14 | - Detects outputs types 15 | - Detects addresses in outputs 16 | - Interprets scripts 17 | - Supports SegWit 18 | - Supports ordered block parsing 19 | 20 | ## Examples 21 | 22 | Below are two basic examples for parsing the blockchain. More examples are available in the examples directory. 23 | 24 | ### Unordered Blocks 25 | 26 | This blockchain parser parses raw blocks saved in Bitcoin Core's `.blk` file format. Bitcoin Core does not guarantee that these blocks are saved in order. If your application does not require that blocks are parsed in order, the `Blockchain.get_unordered_blocks(...)` method can be used: 27 | 28 | ```python 29 | import os 30 | from blockchain_parser.blockchain import Blockchain 31 | 32 | 33 | 34 | # Instantiate the Blockchain by giving the path to the directory 35 | # containing the .blk files created by bitcoind 36 | blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) 37 | for block in blockchain.get_unordered_blocks(): 38 | for tx in block.transactions: 39 | for no, output in enumerate(tx.outputs): 40 | print("tx=%s outputno=%d type=%s value=%s" % (tx.hash, no, output.type, output.value)) 41 | ``` 42 | 43 | ### Ordered Blocks 44 | 45 | If maintaining block order is necessary for your application, you should use the `Blockchain.get_ordered_blocks(...)` method. This method uses Bitcoin Core's LevelDB index to locate ordered block data in it's `.blk` files. 46 | 47 | ```python 48 | import os 49 | from blockchain_parser.blockchain import Blockchain 50 | 51 | # To get the blocks ordered by height, you need to provide the path of the 52 | # `index` directory (LevelDB index) being maintained by bitcoind. It contains 53 | # .ldb files and is present inside the `blocks` directory. 54 | for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), end=1000): 55 | print("height=%d block=%s" % (block.height, block.hash)) 56 | ``` 57 | 58 | Blocks can be iterated in reverse by specifying a start parameter that is greater than the end parameter. 59 | 60 | ```python 61 | for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), start=510000, end=0): 62 | print("height=%d block=%s" % (block.height, block.hash)) 63 | ``` 64 | 65 | Building the LevelDB index can take a while which can make iterative development and debugging challenging. For this reason, `Blockchain.get_ordered_blocks(...)` supports caching the LevelDB index database using [pickle](https://docs.python.org/3.6/library/pickle.html). To use a cache simply pass `cache=filename` to the ordered blocks method. If the cached file does not exist it will be created for faster parsing the next time the method is run. If the cached file already exists it will be used instead of re-parsing the LevelDB database. 66 | 67 | ```python 68 | for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), cache='index-cache.pickle'): 69 | print("height=%d block=%s" % (block.height, block.hash)) 70 | ``` 71 | 72 | **NOTE**: You must manually/programmatically delete the cache file in order to rebuild the cache. Don't forget to do this each time you would like to re-parse the blockchain with a higher block height than the first time you saved the cache file as the new blocks will not be included in the cache. 73 | 74 | ## Installing 75 | 76 | Requirements : python-bitcoinlib, plyvel, coverage for tests 77 | 78 | plyvel requires leveldb development libraries for LevelDB >1.2.X 79 | 80 | On Linux, install libleveldb-dev 81 | 82 | ``` 83 | sudo apt-get install libleveldb-dev 84 | ``` 85 | 86 | Then, just run 87 | ``` 88 | python setup.py install 89 | ``` 90 | 91 | ## Tests 92 | 93 | Run the test suite by lauching 94 | ``` 95 | ./tests.sh 96 | ``` 97 | 98 | ---- 99 | 100 | | | Donation Address | 101 | | --- | --- | 102 | | ♥ __BTC__ | 1Lw2kh9WzCActXSGHxyypGLkqQZfxDpw8v | 103 | | ♥ __ETH__ | 0xaBd66CF90898517573f19184b3297d651f7b90bf | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /blockchain_parser/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers 2 | # 3 | # This file is part of bitcoin-blockchain-parser. 4 | # 5 | # It is subject to the license terms in the LICENSE file found in the top-level 6 | # directory of this distribution. 7 | # 8 | # No part of bitcoin-blockchain-parser, including this file, may be copied, 9 | # modified, propagated, or distributed except according to the terms contained 10 | # in the LICENSE file. 11 | 12 | __version__ = "0.1.4" 13 | -------------------------------------------------------------------------------- /blockchain_parser/address.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers 2 | # 3 | # This file is part of bitcoin-blockchain-parser. 4 | # 5 | # It is subject to the license terms in the LICENSE file found in the top-level 6 | # directory of this distribution. 7 | # 8 | # No part of bitcoin-blockchain-parser, including this file, may be copied, 9 | # modified, propagated, or distributed except according to the terms contained 10 | # in the LICENSE file. 11 | 12 | from bitcoin import base58 13 | from .utils import btc_ripemd160, double_sha256 14 | 15 | 16 | class Address(object): 17 | """Represents a bitcoin address""" 18 | 19 | def __init__(self, hash, public_key, address, type): 20 | self._hash = hash 21 | self.public_key = public_key 22 | self._address = address 23 | self.type = type 24 | 25 | def __repr__(self): 26 | return "Address(addr=%s)" % self.address 27 | 28 | @classmethod 29 | def from_public_key(cls, public_key): 30 | """Constructs an Address object from a public key""" 31 | return cls(None, public_key, None, "normal") 32 | 33 | @classmethod 34 | def from_ripemd160(cls, hash, type="normal"): 35 | """Constructs an Address object from a RIPEMD-160 hash, it may be a 36 | normal address or a P2SH address, the latter is indicated by setting 37 | type to 'p2sh'""" 38 | return cls(hash, None, None, type) 39 | 40 | @property 41 | def hash(self): 42 | """Returns the RIPEMD-160 hash corresponding to this address""" 43 | if self.public_key is not None and self._hash is None: 44 | self._hash = btc_ripemd160(self.public_key) 45 | 46 | return self._hash 47 | 48 | @property 49 | def address(self): 50 | """Returns the base58 encoded representation of this address""" 51 | if self._address is None: 52 | version = b'\x00' if self.type == "normal" else b'\x05' 53 | checksum = double_sha256(version + self.hash) 54 | 55 | self._address = base58.encode(version + self.hash + checksum[:4]) 56 | return self._address 57 | 58 | def is_p2sh(self): 59 | return self.type == "p2sh" 60 | -------------------------------------------------------------------------------- /blockchain_parser/block.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers 2 | # 3 | # This file is part of bitcoin-blockchain-parser. 4 | # 5 | # It is subject to the license terms in the LICENSE file found in the top-level 6 | # directory of this distribution. 7 | # 8 | # No part of bitcoin-blockchain-parser, including this file, may be copied, 9 | # modified, propagated, or distributed except according to the terms contained 10 | # in the LICENSE file. 11 | 12 | from .transaction import Transaction 13 | from .block_header import BlockHeader 14 | from .utils import format_hash, decode_varint, double_sha256 15 | 16 | 17 | def get_block_transactions(raw_hex): 18 | """Given the raw hexadecimal representation of a block, 19 | yields the block's transactions 20 | """ 21 | # Skipping the header 22 | transaction_data = raw_hex[80:] 23 | 24 | # Decoding the number of transactions, offset is the size of 25 | # the varint (1 to 9 bytes) 26 | n_transactions, offset = decode_varint(transaction_data) 27 | 28 | for i in range(n_transactions): 29 | # Try from 1024 (1KiB) -> 1073741824 (1GiB) slice widths 30 | for j in range(0, 20): 31 | try: 32 | offset_e = offset + (1024 * 2 ** j) 33 | transaction = Transaction.from_hex( 34 | transaction_data[offset:offset_e]) 35 | yield transaction 36 | break 37 | except: 38 | continue 39 | 40 | # Skipping to the next transaction 41 | offset += transaction.size 42 | 43 | 44 | class Block(object): 45 | """ 46 | Represents a Bitcoin block, contains its header and its transactions. 47 | """ 48 | 49 | def __init__(self, raw_hex, height=None): 50 | self.hex = raw_hex 51 | self._hash = None 52 | self._transactions = None 53 | self._header = None 54 | self._n_transactions = None 55 | self.size = len(raw_hex) 56 | self.height = height 57 | 58 | def __repr__(self): 59 | return "Block(%s)" % self.hash 60 | 61 | @classmethod 62 | def from_hex(cls, raw_hex): 63 | """Builds a block object from its bytes representation""" 64 | return cls(raw_hex) 65 | 66 | @property 67 | def hash(self): 68 | """Returns the block's hash (double sha256 of its 80 bytes header""" 69 | if self._hash is None: 70 | self._hash = format_hash(double_sha256(self.hex[:80])) 71 | return self._hash 72 | 73 | @property 74 | def n_transactions(self): 75 | """Return the number of transactions contained in this block, 76 | it is faster to use this than to use len(block.transactions) 77 | as there's no need to parse all transactions to get this information 78 | """ 79 | if self._n_transactions is None: 80 | self._n_transactions = decode_varint(self.hex[80:])[0] 81 | 82 | return self._n_transactions 83 | 84 | @property 85 | def transactions(self): 86 | """Returns a list of the block's transactions represented 87 | as Transaction objects""" 88 | if self._transactions is None: 89 | self._transactions = list(get_block_transactions(self.hex)) 90 | 91 | return self._transactions 92 | 93 | @property 94 | def header(self): 95 | """Returns a BlockHeader object corresponding to this block""" 96 | if self._header is None: 97 | self._header = BlockHeader.from_hex(self.hex[:80]) 98 | return self._header 99 | -------------------------------------------------------------------------------- /blockchain_parser/block_header.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers 2 | # 3 | # This file is part of bitcoin-blockchain-parser. 4 | # 5 | # It is subject to the license terms in the LICENSE file found in the top-level 6 | # directory of this distribution. 7 | # 8 | # No part of bitcoin-blockchain-parser, including this file, may be copied, 9 | # modified, propagated, or distributed except according to the terms contained 10 | # in the LICENSE file. 11 | 12 | from datetime import datetime 13 | from bitcoin.core import CBlockHeader 14 | 15 | from .utils import decode_uint32, format_hash 16 | 17 | 18 | class BlockHeader(object): 19 | """Represents a block header""" 20 | 21 | def __init__(self, raw_hex): 22 | self._version = None 23 | self._previous_block_hash = None 24 | self._merkle_root = None 25 | self._timestamp = None 26 | self._bits = None 27 | self._nonce = None 28 | self._difficulty = None 29 | 30 | self.hex = raw_hex[:80] 31 | 32 | def __repr__(self): 33 | return "BlockHeader(previous_block_hash=%s)" % self.previous_block_hash 34 | 35 | @classmethod 36 | def from_hex(cls, raw_hex): 37 | """Builds a BlockHeader object from its bytes representation""" 38 | return cls(raw_hex) 39 | 40 | @property 41 | def version(self): 42 | """Return the block's version""" 43 | if self._version is None: 44 | self._version = decode_uint32(self.hex[:4]) 45 | return self._version 46 | 47 | @property 48 | def previous_block_hash(self): 49 | """Return the hash of the previous block""" 50 | if self._previous_block_hash is None: 51 | self._previous_block_hash = format_hash(self.hex[4:36]) 52 | return self._previous_block_hash 53 | 54 | @property 55 | def merkle_root(self): 56 | """Returns the block's merkle root""" 57 | if self._merkle_root is None: 58 | self._merkle_root = format_hash(self.hex[36:68]) 59 | return self._merkle_root 60 | 61 | @property 62 | def timestamp(self): 63 | """Returns the timestamp of the block as a UTC datetime object""" 64 | if self._timestamp is None: 65 | self._timestamp = datetime.utcfromtimestamp( 66 | decode_uint32(self.hex[68:72]) 67 | ) 68 | return self._timestamp 69 | 70 | @property 71 | def bits(self): 72 | """Returns the bits (difficulty target) of the block""" 73 | if self._bits is None: 74 | self._bits = decode_uint32(self.hex[72:76]) 75 | return self._bits 76 | 77 | @property 78 | def nonce(self): 79 | """Returns the block's nonce""" 80 | if self._nonce is None: 81 | self._nonce = decode_uint32(self.hex[76:80]) 82 | return self._nonce 83 | 84 | @property 85 | def difficulty(self): 86 | """Returns the block's difficulty target as a float""" 87 | if self._difficulty is None: 88 | self._difficulty = CBlockHeader.calc_difficulty(self.bits) 89 | 90 | return self._difficulty 91 | -------------------------------------------------------------------------------- /blockchain_parser/blockchain.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers 2 | # 3 | # This file is part of bitcoin-blockchain-parser. 4 | # 5 | # It is subject to the license terms in the LICENSE file found in the top-level 6 | # directory of this distribution. 7 | # 8 | # No part of bitcoin-blockchain-parser, including this file, may be copied, 9 | # modified, propagated, or distributed except according to the terms contained 10 | # in the LICENSE file. 11 | 12 | import os 13 | import mmap 14 | import struct 15 | import pickle 16 | import stat 17 | import plyvel 18 | 19 | from .block import Block 20 | from .index import DBBlockIndex 21 | from .utils import format_hash 22 | 23 | 24 | # Constant separating blocks in the .blk files 25 | BITCOIN_CONSTANT = b"\xf9\xbe\xb4\xd9" 26 | 27 | 28 | def get_files(path): 29 | """ 30 | Given the path to the .bitcoin directory, returns the sorted list of .blk 31 | files contained in that directory 32 | """ 33 | if not stat.S_ISDIR(os.stat(path)[stat.ST_MODE]): 34 | return [path] 35 | files = os.listdir(path) 36 | files = [f for f in files if f.startswith("blk") and f.endswith(".dat")] 37 | files = map(lambda x: os.path.join(path, x), files) 38 | return sorted(files) 39 | 40 | 41 | def get_blocks(blockfile): 42 | """ 43 | Given the name of a .blk file, for every block contained in the file, 44 | yields its raw hexadecimal value 45 | """ 46 | with open(blockfile, "rb") as f: 47 | if os.name == 'nt': 48 | size = os.path.getsize(f.name) 49 | raw_data = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ) 50 | else: 51 | # Unix-only call, will not work on Windows, see python doc. 52 | raw_data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ) 53 | length = len(raw_data) 54 | offset = 0 55 | block_count = 0 56 | while offset < (length - 4): 57 | if raw_data[offset:offset+4] == BITCOIN_CONSTANT: 58 | offset += 4 59 | size = struct.unpack(" -1: 185 | # if this block is the same height as the last block an orphan 186 | # occurred, now we have to figure out which of the two to keep 187 | if blockIdx.height == last_height: 188 | 189 | # loop through future blocks until we find a chain 6 blocks 190 | # long that includes this block. If we can't find one 191 | # remove this block as it is invalid 192 | if self._index_confirmed(blockIndexes[i:]): 193 | 194 | # if this block is confirmed, the unconfirmed block is 195 | # the previous one. Remove it. 196 | orphans.append(blockIndexes[i - 1].hash) 197 | else: 198 | 199 | # if this block isn't confirmed, remove it. 200 | orphans.append(blockIndexes[i].hash) 201 | 202 | last_height = blockIdx.height 203 | 204 | # filter out the orphan blocks, so we are left only with block indexes 205 | # that have been confirmed 206 | # (or are new enough that they haven't yet been confirmed) 207 | blockIndexes = list(filter(lambda block: block.hash not in orphans, blockIndexes)) 208 | 209 | if end is None: 210 | end = len(blockIndexes) 211 | 212 | if end < start: 213 | blockIndexes = list(reversed(blockIndexes)) 214 | start = len(blockIndexes) - start 215 | end = len(blockIndexes) - end 216 | 217 | for blkIdx in blockIndexes[start:end]: 218 | if blkIdx.file == -1 or blkIdx.data_pos == -1: 219 | break 220 | blkFile = os.path.join(self.path, "blk%05d.dat" % blkIdx.file) 221 | yield Block(get_block(blkFile, blkIdx.data_pos), blkIdx.height) 222 | -------------------------------------------------------------------------------- /blockchain_parser/index.py: -------------------------------------------------------------------------------- 1 | from struct import unpack 2 | 3 | from .utils import format_hash 4 | 5 | BLOCK_HAVE_DATA = 8 6 | BLOCK_HAVE_UNDO = 16 7 | 8 | 9 | def _read_varint(raw_hex): 10 | """ 11 | Reads the weird format of VarInt present in src/serialize.h of bitcoin core 12 | and being used for storing data in the leveldb. 13 | This is not the VARINT format described for general bitcoin serialization 14 | use. 15 | """ 16 | n = 0 17 | pos = 0 18 | while True: 19 | data = raw_hex[pos] 20 | pos += 1 21 | n = (n << 7) | (data & 0x7f) 22 | if data & 0x80 == 0: 23 | return n, pos 24 | n += 1 25 | 26 | 27 | class DBBlockIndex(object): 28 | def __init__(self, blk_hash, raw_hex): 29 | self.hash = blk_hash 30 | pos = 0 31 | n_version, i = _read_varint(raw_hex[pos:]) 32 | pos += i 33 | self.height, i = _read_varint(raw_hex[pos:]) 34 | pos += i 35 | self.status, i = _read_varint(raw_hex[pos:]) 36 | pos += i 37 | self.n_tx, i = _read_varint(raw_hex[pos:]) 38 | pos += i 39 | if self.status & (BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO): 40 | self.file, i = _read_varint(raw_hex[pos:]) 41 | pos += i 42 | else: 43 | self.file = -1 44 | 45 | if self.status & BLOCK_HAVE_DATA: 46 | self.data_pos, i = _read_varint(raw_hex[pos:]) 47 | pos += i 48 | else: 49 | self.data_pos = -1 50 | if self.status & BLOCK_HAVE_UNDO: 51 | self.undo_pos, i = _read_varint(raw_hex[pos:]) 52 | pos += i 53 | 54 | assert(pos + 80 == len(raw_hex)) 55 | self.version, p, m, time, bits, self.nonce = unpack( 56 | " 0) 44 | size = int(data[0]) 45 | assert(size <= 255) 46 | 47 | if size < 253: 48 | return size, 1 49 | 50 | if size == 253: 51 | format_ = ' 3 \ 35 | and is_ascii_text(operation): 36 | print(block.header.timestamp, operation.decode("ascii")) 37 | break 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-bitcoinlib==0.5.0 2 | plyvel==1.0.4 3 | coverage==4.0.2 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from blockchain_parser import __version__ 3 | 4 | 5 | setup( 6 | name='blockchain-parser', 7 | version=__version__, 8 | packages=find_packages(), 9 | url='https://github.com/alecalve/python-bitcoin-blockchain-parser', 10 | author='Antoine Le Calvez', 11 | author_email='antoine@p2sh.info', 12 | description='Bitcoin blockchain parser', 13 | test_suite='blockchain_parser.tests', 14 | classifiers=[ 15 | 'Development Status :: 5 - Production/Stable', 16 | 'Environment :: Console', 17 | 'Intended Audience :: Developers', 18 | 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)', 19 | 'Topic :: Software Development :: Libraries', 20 | ], 21 | install_requires=[ 22 | 'python-bitcoinlib==0.5.0', 23 | 'plyvel==1.0.4' 24 | ] 25 | ) 26 | -------------------------------------------------------------------------------- /tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | coverage run --append --include='blockchain_parser/*' --omit='*/tests/*' setup.py test 4 | coverage report 5 | coverage erase 6 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | #-*-mode: ini; encoding: utf-8-*- 2 | 3 | [tox] #------------------------------------------------------------------- 4 | 5 | envlist = reset,py27,py33,py34,py35,pypy,pypy3,stats 6 | skip_missing_interpreters = True 7 | 8 | [testenv] #--------------------------------------------------------------- 9 | 10 | commands = 11 | coverage run --append --include='blockchain_parser/*' --omit='tests/*' setup.py test -q 12 | 13 | deps = 14 | coverage 15 | 16 | setenv = 17 | PYTHONWARNINGS = all 18 | 19 | [testenv:reset] #--------------------------------------------------------- 20 | 21 | commands = 22 | coverage erase 23 | 24 | [testenv:stats] #--------------------------------------------------------- 25 | 26 | commands = 27 | coverage report 28 | coverage html 29 | -------------------------------------------------------------------------------- /travis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # based on https://github.com/wbolster/plyvel/blob/fa460e431982e94034fe226faef570ce498c89ac/travis.sh 4 | set -e -u -x 5 | 6 | LEVELDB_VERSION=1.20 7 | 8 | wget https://github.com/google/leveldb/archive/v${LEVELDB_VERSION}.tar.gz 9 | tar xf v${LEVELDB_VERSION}.tar.gz 10 | cd leveldb-${LEVELDB_VERSION}/ 11 | make 12 | 13 | # based on https://gist.github.com/dustismo/6203329 14 | sudo scp -r out-static/lib* out-shared/lib* /usr/local/lib/ 15 | cd include/ 16 | sudo scp -r leveldb /usr/local/include/ 17 | sudo ldconfig 18 | --------------------------------------------------------------------------------