├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── blockchain_parser
├── __init__.py
├── address.py
├── block.py
├── block_header.py
├── blockchain.py
├── index.py
├── input.py
├── output.py
├── script.py
├── tests
│ ├── __init__.py
│ ├── bip69_false.txt
│ ├── bip69_true.txt
│ ├── segwit.txt
│ ├── test_address.py
│ ├── test_block.py
│ ├── test_index.py
│ ├── test_output.py
│ ├── test_script.py
│ ├── test_transaction.py
│ └── test_utils.py
├── transaction.py
└── utils.py
├── examples
├── non-standard-outputs.py
├── ordered-blocks.py
└── texts-in-coinbases.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests.sh
├── tox.ini
└── travis.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 |
47 | # Translations
48 | *.mo
49 | *.pot
50 |
51 | # Django stuff:
52 | *.log
53 |
54 | # Sphinx documentation
55 | docs/_build/
56 |
57 | # PyBuilder
58 | target/
59 |
60 | # PyCharm
61 | .idea/
62 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | language:
4 | - python
5 |
6 | python:
7 | - "3.3"
8 | - "3.4"
9 | - "3.5"
10 | - "3.6"
11 |
12 | before_install:
13 | - ./travis.sh
14 |
15 | install:
16 | - pip install -r requirements.txt
17 | - pip install coveralls
18 |
19 | script:
20 | - coverage run --append --include='blockchain_parser/*' --omit='*/tests/*' setup.py test
21 |
22 | after_success:
23 | - if [[ $TRAVIS_PYTHON_VERSION == '3.6' ]]; then
24 | coveralls;
25 | fi
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | bitcoin-blockchain-parser is free software: you can redistribute it and/or
2 | modify it under the terms of the GNU Lesser General Public License as
3 | published by the Free Software Foundation, either version 3 of the License,
4 | or (at your option) any later version.
5 |
6 | bitcoin-blockchain-parser is distributed in the hope that it will be useful, but
7 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
8 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
9 | below for more details.
10 |
11 |
12 |
13 | GNU LESSER GENERAL PUBLIC LICENSE
14 | Version 3, 29 June 2007
15 |
16 | Copyright (C) 2007 Free Software Foundation, Inc.
17 | Everyone is permitted to copy and distribute verbatim copies
18 | of this license document, but changing it is not allowed.
19 |
20 |
21 | This version of the GNU Lesser General Public License incorporates
22 | the terms and conditions of version 3 of the GNU General Public
23 | License, supplemented by the additional permissions listed below.
24 |
25 | 0. Additional Definitions.
26 |
27 | As used herein, "this License" refers to version 3 of the GNU Lesser
28 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
29 | General Public License.
30 |
31 | "The Library" refers to a covered work governed by this License,
32 | other than an Application or a Combined Work as defined below.
33 |
34 | An "Application" is any work that makes use of an interface provided
35 | by the Library, but which is not otherwise based on the Library.
36 | Defining a subclass of a class defined by the Library is deemed a mode
37 | of using an interface provided by the Library.
38 |
39 | A "Combined Work" is a work produced by combining or linking an
40 | Application with the Library. The particular version of the Library
41 | with which the Combined Work was made is also called the "Linked
42 | Version".
43 |
44 | The "Minimal Corresponding Source" for a Combined Work means the
45 | Corresponding Source for the Combined Work, excluding any source code
46 | for portions of the Combined Work that, considered in isolation, are
47 | based on the Application, and not on the Linked Version.
48 |
49 | The "Corresponding Application Code" for a Combined Work means the
50 | object code and/or source code for the Application, including any data
51 | and utility programs needed for reproducing the Combined Work from the
52 | Application, but excluding the System Libraries of the Combined Work.
53 |
54 | 1. Exception to Section 3 of the GNU GPL.
55 |
56 | You may convey a covered work under sections 3 and 4 of this License
57 | without being bound by section 3 of the GNU GPL.
58 |
59 | 2. Conveying Modified Versions.
60 |
61 | If you modify a copy of the Library, and, in your modifications, a
62 | facility refers to a function or data to be supplied by an Application
63 | that uses the facility (other than as an argument passed when the
64 | facility is invoked), then you may convey a copy of the modified
65 | version:
66 |
67 | a) under this License, provided that you make a good faith effort to
68 | ensure that, in the event an Application does not supply the
69 | function or data, the facility still operates, and performs
70 | whatever part of its purpose remains meaningful, or
71 |
72 | b) under the GNU GPL, with none of the additional permissions of
73 | this License applicable to that copy.
74 |
75 | 3. Object Code Incorporating Material from Library Header Files.
76 |
77 | The object code form of an Application may incorporate material from
78 | a header file that is part of the Library. You may convey such object
79 | code under terms of your choice, provided that, if the incorporated
80 | material is not limited to numerical parameters, data structure
81 | layouts and accessors, or small macros, inline functions and templates
82 | (ten or fewer lines in length), you do both of the following:
83 |
84 | a) Give prominent notice with each copy of the object code that the
85 | Library is used in it and that the Library and its use are
86 | covered by this License.
87 |
88 | b) Accompany the object code with a copy of the GNU GPL and this license
89 | document.
90 |
91 | 4. Combined Works.
92 |
93 | You may convey a Combined Work under terms of your choice that,
94 | taken together, effectively do not restrict modification of the
95 | portions of the Library contained in the Combined Work and reverse
96 | engineering for debugging such modifications, if you also do each of
97 | the following:
98 |
99 | a) Give prominent notice with each copy of the Combined Work that
100 | the Library is used in it and that the Library and its use are
101 | covered by this License.
102 |
103 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
104 | document.
105 |
106 | c) For a Combined Work that displays copyright notices during
107 | execution, include the copyright notice for the Library among
108 | these notices, as well as a reference directing the user to the
109 | copies of the GNU GPL and this license document.
110 |
111 | d) Do one of the following:
112 |
113 | 0) Convey the Minimal Corresponding Source under the terms of this
114 | License, and the Corresponding Application Code in a form
115 | suitable for, and under terms that permit, the user to
116 | recombine or relink the Application with a modified version of
117 | the Linked Version to produce a modified Combined Work, in the
118 | manner specified by section 6 of the GNU GPL for conveying
119 | Corresponding Source.
120 |
121 | 1) Use a suitable shared library mechanism for linking with the
122 | Library. A suitable mechanism is one that (a) uses at run time
123 | a copy of the Library already present on the user's computer
124 | system, and (b) will operate properly with a modified version
125 | of the Library that is interface-compatible with the Linked
126 | Version.
127 |
128 | e) Provide Installation Information, but only if you would otherwise
129 | be required to provide such information under section 6 of the
130 | GNU GPL, and only to the extent that such information is
131 | necessary to install and execute a modified version of the
132 | Combined Work produced by recombining or relinking the
133 | Application with a modified version of the Linked Version. (If
134 | you use option 4d0, the Installation Information must accompany
135 | the Minimal Corresponding Source and Corresponding Application
136 | Code. If you use option 4d1, you must provide the Installation
137 | Information in the manner specified by section 6 of the GNU GPL
138 | for conveying Corresponding Source.)
139 |
140 | 5. Combined Libraries.
141 |
142 | You may place library facilities that are a work based on the
143 | Library side by side in a single library together with other library
144 | facilities that are not Applications and are not covered by this
145 | License, and convey such a combined library under terms of your
146 | choice, if you do both of the following:
147 |
148 | a) Accompany the combined library with a copy of the same work based
149 | on the Library, uncombined with any other library facilities,
150 | conveyed under the terms of this License.
151 |
152 | b) Give prominent notice with the combined library that part of it
153 | is a work based on the Library, and explaining where to find the
154 | accompanying uncombined form of the same work.
155 |
156 | 6. Revised Versions of the GNU Lesser General Public License.
157 |
158 | The Free Software Foundation may publish revised and/or new versions
159 | of the GNU Lesser General Public License from time to time. Such new
160 | versions will be similar in spirit to the present version, but may
161 | differ in detail to address new problems or concerns.
162 |
163 | Each version is given a distinguishing version number. If the
164 | Library as you received it specifies that a certain numbered version
165 | of the GNU Lesser General Public License "or any later version"
166 | applies to it, you have the option of following the terms and
167 | conditions either of that published version or of any later version
168 | published by the Free Software Foundation. If the Library as you
169 | received it does not specify a version number of the GNU Lesser
170 | General Public License, you may choose any version of the GNU Lesser
171 | General Public License ever published by the Free Software Foundation.
172 |
173 | If the Library as you received it specifies that a proxy can decide
174 | whether future versions of the GNU Lesser General Public License shall
175 | apply, that proxy's public statement of acceptance of any version is
176 | permanent authorization for you to choose that version for the
177 | Library.
178 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bitcoin-blockchain-parser []
2 |
3 | -------------------------
4 | ### Run Google Colab
5 |
6 | https://colab.research.google.com/drive/1OShIMVcFZ_khsUIBOIV1lzrqAGo1gfm_?usp=sharing
7 |
8 | -------------------------
9 |
10 | (https://travis-ci.org/alecalve/python-bitcoin-blockchain-parser) [](https://coveralls.io/github/alecalve/python-bitcoin-blockchain-parser?branch=master)
11 | This Python 3 library provides a parser for the raw data stored by bitcoind.
12 |
13 | ## Features
14 | - Detects outputs types
15 | - Detects addresses in outputs
16 | - Interprets scripts
17 | - Supports SegWit
18 | - Supports ordered block parsing
19 |
20 | ## Examples
21 |
22 | Below are two basic examples for parsing the blockchain. More examples are available in the examples directory.
23 |
24 | ### Unordered Blocks
25 |
26 | This blockchain parser parses raw blocks saved in Bitcoin Core's `.blk` file format. Bitcoin Core does not guarantee that these blocks are saved in order. If your application does not require that blocks are parsed in order, the `Blockchain.get_unordered_blocks(...)` method can be used:
27 |
28 | ```python
29 | import os
30 | from blockchain_parser.blockchain import Blockchain
31 |
32 |
33 |
34 | # Instantiate the Blockchain by giving the path to the directory
35 | # containing the .blk files created by bitcoind
36 | blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks'))
37 | for block in blockchain.get_unordered_blocks():
38 | for tx in block.transactions:
39 | for no, output in enumerate(tx.outputs):
40 | print("tx=%s outputno=%d type=%s value=%s" % (tx.hash, no, output.type, output.value))
41 | ```
42 |
43 | ### Ordered Blocks
44 |
45 | If maintaining block order is necessary for your application, you should use the `Blockchain.get_ordered_blocks(...)` method. This method uses Bitcoin Core's LevelDB index to locate ordered block data in it's `.blk` files.
46 |
47 | ```python
48 | import os
49 | from blockchain_parser.blockchain import Blockchain
50 |
51 | # To get the blocks ordered by height, you need to provide the path of the
52 | # `index` directory (LevelDB index) being maintained by bitcoind. It contains
53 | # .ldb files and is present inside the `blocks` directory.
54 | for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), end=1000):
55 | print("height=%d block=%s" % (block.height, block.hash))
56 | ```
57 |
58 | Blocks can be iterated in reverse by specifying a start parameter that is greater than the end parameter.
59 |
60 | ```python
61 | for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), start=510000, end=0):
62 | print("height=%d block=%s" % (block.height, block.hash))
63 | ```
64 |
65 | Building the LevelDB index can take a while which can make iterative development and debugging challenging. For this reason, `Blockchain.get_ordered_blocks(...)` supports caching the LevelDB index database using [pickle](https://docs.python.org/3.6/library/pickle.html). To use a cache simply pass `cache=filename` to the ordered blocks method. If the cached file does not exist it will be created for faster parsing the next time the method is run. If the cached file already exists it will be used instead of re-parsing the LevelDB database.
66 |
67 | ```python
68 | for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), cache='index-cache.pickle'):
69 | print("height=%d block=%s" % (block.height, block.hash))
70 | ```
71 |
72 | **NOTE**: You must manually/programmatically delete the cache file in order to rebuild the cache. Don't forget to do this each time you would like to re-parse the blockchain with a higher block height than the first time you saved the cache file as the new blocks will not be included in the cache.
73 |
74 | ## Installing
75 |
76 | Requirements : python-bitcoinlib, plyvel, coverage for tests
77 |
78 | plyvel requires leveldb development libraries for LevelDB >1.2.X
79 |
80 | On Linux, install libleveldb-dev
81 |
82 | ```
83 | sudo apt-get install libleveldb-dev
84 | ```
85 |
86 | Then, just run
87 | ```
88 | python setup.py install
89 | ```
90 |
91 | ## Tests
92 |
93 | Run the test suite by lauching
94 | ```
95 | ./tests.sh
96 | ```
97 |
98 | ----
99 |
100 | | | Donation Address |
101 | | --- | --- |
102 | | ♥ __BTC__ | 1Lw2kh9WzCActXSGHxyypGLkqQZfxDpw8v |
103 | | ♥ __ETH__ | 0xaBd66CF90898517573f19184b3297d651f7b90bf |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/blockchain_parser/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers
2 | #
3 | # This file is part of bitcoin-blockchain-parser.
4 | #
5 | # It is subject to the license terms in the LICENSE file found in the top-level
6 | # directory of this distribution.
7 | #
8 | # No part of bitcoin-blockchain-parser, including this file, may be copied,
9 | # modified, propagated, or distributed except according to the terms contained
10 | # in the LICENSE file.
11 |
12 | __version__ = "0.1.4"
13 |
--------------------------------------------------------------------------------
/blockchain_parser/address.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers
2 | #
3 | # This file is part of bitcoin-blockchain-parser.
4 | #
5 | # It is subject to the license terms in the LICENSE file found in the top-level
6 | # directory of this distribution.
7 | #
8 | # No part of bitcoin-blockchain-parser, including this file, may be copied,
9 | # modified, propagated, or distributed except according to the terms contained
10 | # in the LICENSE file.
11 |
12 | from bitcoin import base58
13 | from .utils import btc_ripemd160, double_sha256
14 |
15 |
16 | class Address(object):
17 | """Represents a bitcoin address"""
18 |
19 | def __init__(self, hash, public_key, address, type):
20 | self._hash = hash
21 | self.public_key = public_key
22 | self._address = address
23 | self.type = type
24 |
25 | def __repr__(self):
26 | return "Address(addr=%s)" % self.address
27 |
28 | @classmethod
29 | def from_public_key(cls, public_key):
30 | """Constructs an Address object from a public key"""
31 | return cls(None, public_key, None, "normal")
32 |
33 | @classmethod
34 | def from_ripemd160(cls, hash, type="normal"):
35 | """Constructs an Address object from a RIPEMD-160 hash, it may be a
36 | normal address or a P2SH address, the latter is indicated by setting
37 | type to 'p2sh'"""
38 | return cls(hash, None, None, type)
39 |
40 | @property
41 | def hash(self):
42 | """Returns the RIPEMD-160 hash corresponding to this address"""
43 | if self.public_key is not None and self._hash is None:
44 | self._hash = btc_ripemd160(self.public_key)
45 |
46 | return self._hash
47 |
48 | @property
49 | def address(self):
50 | """Returns the base58 encoded representation of this address"""
51 | if self._address is None:
52 | version = b'\x00' if self.type == "normal" else b'\x05'
53 | checksum = double_sha256(version + self.hash)
54 |
55 | self._address = base58.encode(version + self.hash + checksum[:4])
56 | return self._address
57 |
58 | def is_p2sh(self):
59 | return self.type == "p2sh"
60 |
--------------------------------------------------------------------------------
/blockchain_parser/block.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers
2 | #
3 | # This file is part of bitcoin-blockchain-parser.
4 | #
5 | # It is subject to the license terms in the LICENSE file found in the top-level
6 | # directory of this distribution.
7 | #
8 | # No part of bitcoin-blockchain-parser, including this file, may be copied,
9 | # modified, propagated, or distributed except according to the terms contained
10 | # in the LICENSE file.
11 |
12 | from .transaction import Transaction
13 | from .block_header import BlockHeader
14 | from .utils import format_hash, decode_varint, double_sha256
15 |
16 |
17 | def get_block_transactions(raw_hex):
18 | """Given the raw hexadecimal representation of a block,
19 | yields the block's transactions
20 | """
21 | # Skipping the header
22 | transaction_data = raw_hex[80:]
23 |
24 | # Decoding the number of transactions, offset is the size of
25 | # the varint (1 to 9 bytes)
26 | n_transactions, offset = decode_varint(transaction_data)
27 |
28 | for i in range(n_transactions):
29 | # Try from 1024 (1KiB) -> 1073741824 (1GiB) slice widths
30 | for j in range(0, 20):
31 | try:
32 | offset_e = offset + (1024 * 2 ** j)
33 | transaction = Transaction.from_hex(
34 | transaction_data[offset:offset_e])
35 | yield transaction
36 | break
37 | except:
38 | continue
39 |
40 | # Skipping to the next transaction
41 | offset += transaction.size
42 |
43 |
44 | class Block(object):
45 | """
46 | Represents a Bitcoin block, contains its header and its transactions.
47 | """
48 |
49 | def __init__(self, raw_hex, height=None):
50 | self.hex = raw_hex
51 | self._hash = None
52 | self._transactions = None
53 | self._header = None
54 | self._n_transactions = None
55 | self.size = len(raw_hex)
56 | self.height = height
57 |
58 | def __repr__(self):
59 | return "Block(%s)" % self.hash
60 |
61 | @classmethod
62 | def from_hex(cls, raw_hex):
63 | """Builds a block object from its bytes representation"""
64 | return cls(raw_hex)
65 |
66 | @property
67 | def hash(self):
68 | """Returns the block's hash (double sha256 of its 80 bytes header"""
69 | if self._hash is None:
70 | self._hash = format_hash(double_sha256(self.hex[:80]))
71 | return self._hash
72 |
73 | @property
74 | def n_transactions(self):
75 | """Return the number of transactions contained in this block,
76 | it is faster to use this than to use len(block.transactions)
77 | as there's no need to parse all transactions to get this information
78 | """
79 | if self._n_transactions is None:
80 | self._n_transactions = decode_varint(self.hex[80:])[0]
81 |
82 | return self._n_transactions
83 |
84 | @property
85 | def transactions(self):
86 | """Returns a list of the block's transactions represented
87 | as Transaction objects"""
88 | if self._transactions is None:
89 | self._transactions = list(get_block_transactions(self.hex))
90 |
91 | return self._transactions
92 |
93 | @property
94 | def header(self):
95 | """Returns a BlockHeader object corresponding to this block"""
96 | if self._header is None:
97 | self._header = BlockHeader.from_hex(self.hex[:80])
98 | return self._header
99 |
--------------------------------------------------------------------------------
/blockchain_parser/block_header.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers
2 | #
3 | # This file is part of bitcoin-blockchain-parser.
4 | #
5 | # It is subject to the license terms in the LICENSE file found in the top-level
6 | # directory of this distribution.
7 | #
8 | # No part of bitcoin-blockchain-parser, including this file, may be copied,
9 | # modified, propagated, or distributed except according to the terms contained
10 | # in the LICENSE file.
11 |
12 | from datetime import datetime
13 | from bitcoin.core import CBlockHeader
14 |
15 | from .utils import decode_uint32, format_hash
16 |
17 |
18 | class BlockHeader(object):
19 | """Represents a block header"""
20 |
21 | def __init__(self, raw_hex):
22 | self._version = None
23 | self._previous_block_hash = None
24 | self._merkle_root = None
25 | self._timestamp = None
26 | self._bits = None
27 | self._nonce = None
28 | self._difficulty = None
29 |
30 | self.hex = raw_hex[:80]
31 |
32 | def __repr__(self):
33 | return "BlockHeader(previous_block_hash=%s)" % self.previous_block_hash
34 |
35 | @classmethod
36 | def from_hex(cls, raw_hex):
37 | """Builds a BlockHeader object from its bytes representation"""
38 | return cls(raw_hex)
39 |
40 | @property
41 | def version(self):
42 | """Return the block's version"""
43 | if self._version is None:
44 | self._version = decode_uint32(self.hex[:4])
45 | return self._version
46 |
47 | @property
48 | def previous_block_hash(self):
49 | """Return the hash of the previous block"""
50 | if self._previous_block_hash is None:
51 | self._previous_block_hash = format_hash(self.hex[4:36])
52 | return self._previous_block_hash
53 |
54 | @property
55 | def merkle_root(self):
56 | """Returns the block's merkle root"""
57 | if self._merkle_root is None:
58 | self._merkle_root = format_hash(self.hex[36:68])
59 | return self._merkle_root
60 |
61 | @property
62 | def timestamp(self):
63 | """Returns the timestamp of the block as a UTC datetime object"""
64 | if self._timestamp is None:
65 | self._timestamp = datetime.utcfromtimestamp(
66 | decode_uint32(self.hex[68:72])
67 | )
68 | return self._timestamp
69 |
70 | @property
71 | def bits(self):
72 | """Returns the bits (difficulty target) of the block"""
73 | if self._bits is None:
74 | self._bits = decode_uint32(self.hex[72:76])
75 | return self._bits
76 |
77 | @property
78 | def nonce(self):
79 | """Returns the block's nonce"""
80 | if self._nonce is None:
81 | self._nonce = decode_uint32(self.hex[76:80])
82 | return self._nonce
83 |
84 | @property
85 | def difficulty(self):
86 | """Returns the block's difficulty target as a float"""
87 | if self._difficulty is None:
88 | self._difficulty = CBlockHeader.calc_difficulty(self.bits)
89 |
90 | return self._difficulty
91 |
--------------------------------------------------------------------------------
/blockchain_parser/blockchain.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers
2 | #
3 | # This file is part of bitcoin-blockchain-parser.
4 | #
5 | # It is subject to the license terms in the LICENSE file found in the top-level
6 | # directory of this distribution.
7 | #
8 | # No part of bitcoin-blockchain-parser, including this file, may be copied,
9 | # modified, propagated, or distributed except according to the terms contained
10 | # in the LICENSE file.
11 |
12 | import os
13 | import mmap
14 | import struct
15 | import pickle
16 | import stat
17 | import plyvel
18 |
19 | from .block import Block
20 | from .index import DBBlockIndex
21 | from .utils import format_hash
22 |
23 |
24 | # Constant separating blocks in the .blk files
25 | BITCOIN_CONSTANT = b"\xf9\xbe\xb4\xd9"
26 |
27 |
28 | def get_files(path):
29 | """
30 | Given the path to the .bitcoin directory, returns the sorted list of .blk
31 | files contained in that directory
32 | """
33 | if not stat.S_ISDIR(os.stat(path)[stat.ST_MODE]):
34 | return [path]
35 | files = os.listdir(path)
36 | files = [f for f in files if f.startswith("blk") and f.endswith(".dat")]
37 | files = map(lambda x: os.path.join(path, x), files)
38 | return sorted(files)
39 |
40 |
41 | def get_blocks(blockfile):
42 | """
43 | Given the name of a .blk file, for every block contained in the file,
44 | yields its raw hexadecimal value
45 | """
46 | with open(blockfile, "rb") as f:
47 | if os.name == 'nt':
48 | size = os.path.getsize(f.name)
49 | raw_data = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
50 | else:
51 | # Unix-only call, will not work on Windows, see python doc.
52 | raw_data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
53 | length = len(raw_data)
54 | offset = 0
55 | block_count = 0
56 | while offset < (length - 4):
57 | if raw_data[offset:offset+4] == BITCOIN_CONSTANT:
58 | offset += 4
59 | size = struct.unpack(" -1:
185 | # if this block is the same height as the last block an orphan
186 | # occurred, now we have to figure out which of the two to keep
187 | if blockIdx.height == last_height:
188 |
189 | # loop through future blocks until we find a chain 6 blocks
190 | # long that includes this block. If we can't find one
191 | # remove this block as it is invalid
192 | if self._index_confirmed(blockIndexes[i:]):
193 |
194 | # if this block is confirmed, the unconfirmed block is
195 | # the previous one. Remove it.
196 | orphans.append(blockIndexes[i - 1].hash)
197 | else:
198 |
199 | # if this block isn't confirmed, remove it.
200 | orphans.append(blockIndexes[i].hash)
201 |
202 | last_height = blockIdx.height
203 |
204 | # filter out the orphan blocks, so we are left only with block indexes
205 | # that have been confirmed
206 | # (or are new enough that they haven't yet been confirmed)
207 | blockIndexes = list(filter(lambda block: block.hash not in orphans, blockIndexes))
208 |
209 | if end is None:
210 | end = len(blockIndexes)
211 |
212 | if end < start:
213 | blockIndexes = list(reversed(blockIndexes))
214 | start = len(blockIndexes) - start
215 | end = len(blockIndexes) - end
216 |
217 | for blkIdx in blockIndexes[start:end]:
218 | if blkIdx.file == -1 or blkIdx.data_pos == -1:
219 | break
220 | blkFile = os.path.join(self.path, "blk%05d.dat" % blkIdx.file)
221 | yield Block(get_block(blkFile, blkIdx.data_pos), blkIdx.height)
222 |
--------------------------------------------------------------------------------
/blockchain_parser/index.py:
--------------------------------------------------------------------------------
1 | from struct import unpack
2 |
3 | from .utils import format_hash
4 |
5 | BLOCK_HAVE_DATA = 8
6 | BLOCK_HAVE_UNDO = 16
7 |
8 |
9 | def _read_varint(raw_hex):
10 | """
11 | Reads the weird format of VarInt present in src/serialize.h of bitcoin core
12 | and being used for storing data in the leveldb.
13 | This is not the VARINT format described for general bitcoin serialization
14 | use.
15 | """
16 | n = 0
17 | pos = 0
18 | while True:
19 | data = raw_hex[pos]
20 | pos += 1
21 | n = (n << 7) | (data & 0x7f)
22 | if data & 0x80 == 0:
23 | return n, pos
24 | n += 1
25 |
26 |
27 | class DBBlockIndex(object):
28 | def __init__(self, blk_hash, raw_hex):
29 | self.hash = blk_hash
30 | pos = 0
31 | n_version, i = _read_varint(raw_hex[pos:])
32 | pos += i
33 | self.height, i = _read_varint(raw_hex[pos:])
34 | pos += i
35 | self.status, i = _read_varint(raw_hex[pos:])
36 | pos += i
37 | self.n_tx, i = _read_varint(raw_hex[pos:])
38 | pos += i
39 | if self.status & (BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO):
40 | self.file, i = _read_varint(raw_hex[pos:])
41 | pos += i
42 | else:
43 | self.file = -1
44 |
45 | if self.status & BLOCK_HAVE_DATA:
46 | self.data_pos, i = _read_varint(raw_hex[pos:])
47 | pos += i
48 | else:
49 | self.data_pos = -1
50 | if self.status & BLOCK_HAVE_UNDO:
51 | self.undo_pos, i = _read_varint(raw_hex[pos:])
52 | pos += i
53 |
54 | assert(pos + 80 == len(raw_hex))
55 | self.version, p, m, time, bits, self.nonce = unpack(
56 | " 0)
44 | size = int(data[0])
45 | assert(size <= 255)
46 |
47 | if size < 253:
48 | return size, 1
49 |
50 | if size == 253:
51 | format_ = ' 3 \
35 | and is_ascii_text(operation):
36 | print(block.header.timestamp, operation.decode("ascii"))
37 | break
38 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-bitcoinlib==0.5.0
2 | plyvel==1.0.4
3 | coverage==4.0.2
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | from blockchain_parser import __version__
3 |
4 |
5 | setup(
6 | name='blockchain-parser',
7 | version=__version__,
8 | packages=find_packages(),
9 | url='https://github.com/alecalve/python-bitcoin-blockchain-parser',
10 | author='Antoine Le Calvez',
11 | author_email='antoine@p2sh.info',
12 | description='Bitcoin blockchain parser',
13 | test_suite='blockchain_parser.tests',
14 | classifiers=[
15 | 'Development Status :: 5 - Production/Stable',
16 | 'Environment :: Console',
17 | 'Intended Audience :: Developers',
18 | 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)',
19 | 'Topic :: Software Development :: Libraries',
20 | ],
21 | install_requires=[
22 | 'python-bitcoinlib==0.5.0',
23 | 'plyvel==1.0.4'
24 | ]
25 | )
26 |
--------------------------------------------------------------------------------
/tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | coverage run --append --include='blockchain_parser/*' --omit='*/tests/*' setup.py test
4 | coverage report
5 | coverage erase
6 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | #-*-mode: ini; encoding: utf-8-*-
2 |
3 | [tox] #-------------------------------------------------------------------
4 |
5 | envlist = reset,py27,py33,py34,py35,pypy,pypy3,stats
6 | skip_missing_interpreters = True
7 |
8 | [testenv] #---------------------------------------------------------------
9 |
10 | commands =
11 | coverage run --append --include='blockchain_parser/*' --omit='tests/*' setup.py test -q
12 |
13 | deps =
14 | coverage
15 |
16 | setenv =
17 | PYTHONWARNINGS = all
18 |
19 | [testenv:reset] #---------------------------------------------------------
20 |
21 | commands =
22 | coverage erase
23 |
24 | [testenv:stats] #---------------------------------------------------------
25 |
26 | commands =
27 | coverage report
28 | coverage html
29 |
--------------------------------------------------------------------------------
/travis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # based on https://github.com/wbolster/plyvel/blob/fa460e431982e94034fe226faef570ce498c89ac/travis.sh
4 | set -e -u -x
5 |
6 | LEVELDB_VERSION=1.20
7 |
8 | wget https://github.com/google/leveldb/archive/v${LEVELDB_VERSION}.tar.gz
9 | tar xf v${LEVELDB_VERSION}.tar.gz
10 | cd leveldb-${LEVELDB_VERSION}/
11 | make
12 |
13 | # based on https://gist.github.com/dustismo/6203329
14 | sudo scp -r out-static/lib* out-shared/lib* /usr/local/lib/
15 | cd include/
16 | sudo scp -r leveldb /usr/local/include/
17 | sudo ldconfig
18 |
--------------------------------------------------------------------------------