├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bitcoinquery ├── __init__.py ├── cli.py ├── test │ ├── __init__.py │ └── test_cli.py └── util │ ├── __init__.py │ ├── config.py │ └── mongodb.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | 3 | # Packages 4 | *.egg 5 | *.egg-info 6 | dist 7 | build 8 | eggs 9 | parts 10 | bin 11 | include 12 | lib 13 | local 14 | share 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | 20 | # Installer logs 21 | pip-log.txt 22 | 23 | # Unit test / coverage reports 24 | .coverage 25 | .tox 26 | 27 | #Translations 28 | *.mo 29 | 30 | #Mr Developer 31 | .mr.developer.cfg 32 | 33 | #Virtualenv 34 | .virtual 35 | .virt 36 | 37 | #Ropemacs 38 | .ropeproject 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Andres Buritica 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | bitcoinquery 3 | ============ 4 | 5 | bitcoinquery stores the Bitcoin blockchain in a MongoDB database to 6 | allow querying of block and transaction data 7 | 8 | Usage 9 | ===== 10 | 11 | In order to use bitcoinquery you need a Bitcoind RPC server. The 12 | server must maintain a full index in order to retrieve all 13 | transactions. To do this, run the server with the -txindex option or 14 | set txindex=1 in the server's conf file. If you are already running a 15 | server without a full index you must reindex the server with the 16 | -reindex option. 17 | 18 | Once the Bitcoind server is setup properly you can start collecting 19 | data using the service described in the Collection_ section. You can 20 | start making queries right away but it will take a while to store the 21 | whole blockchain. 22 | 23 | Installation 24 | ============ 25 | 26 | System dependencies 27 | ------------------- 28 | 29 | - Python 2.7 30 | - MongoDB 2.4.0 31 | - Bitcoind 0.8.1.0 32 | 33 | Python external dependencies 34 | ---------------------------- 35 | 36 | - python-setuptools 37 | - python-virtualenv 38 | 39 | Setup 40 | ----- 41 | 42 | To install bitcoinquery run the following commands from the project's 43 | base directory. You can download the source code from github_:: 44 | 45 | virtualenv .virtual 46 | .virtual/bin/python setup.py install 47 | # At this point, bitcoinquery will already be in easy-install.pth. 48 | # So, pip will not attempt to download it 49 | .virtual/bin/pip install bitcoinquery[test] 50 | 51 | # The test requirement installs all the dependencies. But, 52 | # depending on the service you wish to run you might want to 53 | # install only the appropriate dependencies as listed in 54 | # setup.py. For example to run blockchain-collect you only need 55 | # the mongo and bitcoin requirements which install the pymongo and 56 | # python-bitcoinrpc dependencies 57 | .virtual/bin/pip install bitcoinquery[mongo,bitcoin] 58 | 59 | Services 60 | ======== 61 | 62 | It is recommended that you use an init daemon such as upstart_ or 63 | runit_ to run the bitcoinquery services. 64 | 65 | Collection 66 | ---------- 67 | 68 | To start the service which collects and stores block and transaction 69 | data call the ``blockchain-collect`` cli with the ``CONFIG`` 70 | argument:: 71 | 72 | .virtual/bin/blockchain-collect collect.conf 73 | 74 | where ``collect.conf`` looks like:: 75 | 76 | [bitcoind] 77 | url = http://:@: 78 | 79 | [mongodb] 80 | host = : 81 | database = bitcoinquery 82 | collections = blocks,transactions,errors 83 | 84 | You can also specify a MongoDB replica set with the replica-set 85 | option. 86 | 87 | Querying 88 | ======== 89 | 90 | Block data is stored in the ``blocks`` MongoDB collection with the 91 | block height as the document _id. Transaction data is stored in 92 | ``transactions`` with the transaction hash as the document _id. Errors 93 | encountered during transaction retrieval are stored in the ``errors`` 94 | collection. 95 | 96 | Example 97 | ------- 98 | 99 | Find the number of public keys:: 100 | 101 | import pymongo 102 | 103 | db = pymongo.Connection().bitcoinquery 104 | def fn(): 105 | for t in db.transactions.find(): 106 | for v in t['vout']: 107 | if v['scriptPubKey']['type'] == 'pubkey': 108 | for a in v['scriptPubKey']['addresses']: 109 | yield a 110 | 111 | keys = set([i for i in fn()]) 112 | print 'There are {count} public keys'.format(count=len(keys)) 113 | 114 | Developing 115 | ========== 116 | 117 | To start developing follow the instructions in the Installation_ 118 | section but replace:: 119 | 120 | .virtual/bin/python setup.py install 121 | 122 | with:: 123 | 124 | .virtual/bin/python setup.py develop 125 | 126 | If you like to use IPython you can install it with the dev 127 | requirement:: 128 | 129 | .virtual/bin/pip install bitcoinquery[dev] 130 | 131 | .. _runit: http://smarden.org/runit/ 132 | .. _upstart: http://upstart.ubuntu.com/ 133 | .. _github: https://github.com/thelinuxkid/bitcoinquery 134 | -------------------------------------------------------------------------------- /bitcoinquery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelinuxkid/bitcoinquery/54daa68ab5d0c6e1c582855eb666728f40cb1160/bitcoinquery/__init__.py -------------------------------------------------------------------------------- /bitcoinquery/cli.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | import time 4 | import decimal 5 | import json 6 | import random 7 | 8 | import pymongo 9 | import bson.json_util 10 | 11 | from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException 12 | 13 | from bitcoinquery.util.config import config_parser 14 | from bitcoinquery.util import mongodb 15 | 16 | log = logging.getLogger(__name__) 17 | 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser( 21 | description='Start the blockchain data collection service', 22 | ) 23 | parser.add_argument( 24 | 'config', 25 | help=('path to the file with information on how to ' 26 | 'connect to other services' 27 | ), 28 | metavar='CONFIG', 29 | type=str, 30 | ) 31 | parser.add_argument( 32 | '-v', 33 | '--verbose', 34 | action='store_true', 35 | default=False, 36 | help='output DEBUG logging statements (default: %(default)s)', 37 | ) 38 | args = parser.parse_args() 39 | logging.basicConfig( 40 | level=logging.DEBUG if args.verbose else logging.INFO, 41 | format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', 42 | datefmt='%Y-%m-%dT%H:%M:%S', 43 | ) 44 | config = config_parser(args.config) 45 | database = mongodb.database(config) 46 | url = config.get('bitcoind', 'url') 47 | service = AuthServiceProxy(url) 48 | return (database, service) 49 | 50 | 51 | def wait(diff): 52 | delay = random.randint(1, 60) 53 | seconds = 60*10+delay 54 | if not diff: 55 | log.info( 56 | 'Sleeping {seconds} seconds'.format(seconds=seconds) 57 | ) 58 | time.sleep(seconds) 59 | 60 | 61 | def _json_decimal(value): 62 | if type(value) is decimal.Decimal: 63 | return str(value) 64 | raise TypeError( 65 | '{value} is not JSON serializable'.format( 66 | value=value, 67 | ) 68 | ) 69 | 70 | 71 | def _bson_upsert(obj): 72 | son = dict([ 73 | ('$set', obj), 74 | ]) 75 | son = json.dumps(son, default=_json_decimal) 76 | son = bson.json_util.loads(son) 77 | son = dict([ 78 | ('document', son) 79 | ]) 80 | return son 81 | 82 | 83 | def collect(database, service): 84 | start = database.blocks.find_one( 85 | sort=[('_id', pymongo.DESCENDING)], 86 | field=['_id'], 87 | ) 88 | if start is None: 89 | # Skip genesis block 90 | start = 1 91 | else: 92 | # Always reprocess last block in case we missed transactions 93 | start = start['_id'] 94 | count = service.getblockcount() 95 | diff = work = count - start 96 | log.info('Starting at block {start}'.format(start=start)) 97 | log.info('Reprocessing last block in database'.format(diff=diff)) 98 | log.info('Processing {diff} new blocks'.format(diff=diff)) 99 | 100 | current = service.getblockhash(start) 101 | while current: 102 | block = service.getblock(current) 103 | kwargs = _bson_upsert(block) 104 | mongodb.safe_upsert( 105 | collection=database.blocks, 106 | _id=block['height'], 107 | **kwargs 108 | ) 109 | txs = block['tx'] 110 | for tx in txs: 111 | try: 112 | raw = service.getrawtransaction(tx) 113 | decoded = service.decoderawtransaction(raw) 114 | decoded['bitcoinquery'] = dict([ 115 | ('blockhash', current), 116 | ('blockheight', block['height']), 117 | ]) 118 | kwargs = _bson_upsert(decoded) 119 | mongodb.safe_upsert( 120 | collection=database.transactions, 121 | _id=decoded['txid'], 122 | **kwargs 123 | ) 124 | except JSONRPCException, e: 125 | log.debug( 126 | 'Failed to get retrieve transaction {tx} in ' 127 | 'block {current}'.format( 128 | tx=tx, 129 | current=current, 130 | ) 131 | ) 132 | error = dict([ 133 | ('txid', tx), 134 | ('error', e.error), 135 | ]) 136 | database.errors.insert(error) 137 | current = block.get('nextblockhash') 138 | # Processing last block does not count as work 139 | return work 140 | 141 | 142 | def blockchain_collect(): 143 | (database, service) = parse_args() 144 | work = collect(database, service) 145 | wait(work) 146 | log.info('Ending') 147 | -------------------------------------------------------------------------------- /bitcoinquery/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelinuxkid/bitcoinquery/54daa68ab5d0c6e1c582855eb666728f40cb1160/bitcoinquery/test/__init__.py -------------------------------------------------------------------------------- /bitcoinquery/test/test_cli.py: -------------------------------------------------------------------------------- 1 | import mock 2 | 3 | from bitcoinquery import cli 4 | 5 | 6 | @mock.patch('bitcoinquery.cli._bson_upsert') 7 | @mock.patch('bitcoinquery.util.mongodb.safe_upsert') 8 | def test_collect_simple(fake_upsert, fake_bson): 9 | fake_db = mock.Mock() 10 | one = dict([ 11 | ('_id', 2), 12 | ]) 13 | fake_db.blocks.find_one.return_value = one 14 | 15 | fake_sv = mock.Mock() 16 | fake_sv.getblockcount.return_value = 2 17 | fake_sv.getblockhash.return_value = 'foo hash' 18 | block = dict([ 19 | ('tx', ['foo tx']), 20 | ('nextblockhash', None), 21 | ('height', 2), 22 | ]) 23 | fake_sv.getblock.return_value = block 24 | fake_sv.getrawtransaction.return_value = 'foo raw' 25 | decoded = dict([ 26 | ('txid', 'foo txid'), 27 | ]) 28 | fake_sv.decoderawtransaction.return_value = decoded 29 | 30 | block_bson = dict([('block', 'bson')]) 31 | transaction_bson = dict([('transaction', 'bson')]) 32 | fake_bson.side_effect = [block_bson, transaction_bson] 33 | 34 | cli.collect(fake_db, fake_sv) 35 | 36 | query = mock.call.blocks.find_one( 37 | sort=[('_id', -1)], 38 | field=['_id'], 39 | ) 40 | db_calls = [query] 41 | assert fake_db.mock_calls == db_calls 42 | 43 | count = mock.call.getblockcount() 44 | hash_ = mock.call.getblockhash(2) 45 | block = mock.call.getblock('foo hash') 46 | raw = mock.call.getrawtransaction('foo tx') 47 | decode = mock.call.decoderawtransaction('foo raw') 48 | sv_calls = [count, hash_, block, raw, decode] 49 | assert fake_sv.mock_calls == sv_calls 50 | 51 | block_upsert = mock.call( 52 | collection=fake_db.blocks, 53 | _id=2, 54 | block='bson', 55 | ) 56 | transaction_upsert = mock.call( 57 | collection=fake_db.transactions, 58 | _id='foo txid', 59 | transaction='bson', 60 | ) 61 | upsert_calls = [block_upsert, transaction_upsert] 62 | assert fake_upsert.mock_calls == upsert_calls 63 | -------------------------------------------------------------------------------- /bitcoinquery/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelinuxkid/bitcoinquery/54daa68ab5d0c6e1c582855eb666728f40cb1160/bitcoinquery/util/__init__.py -------------------------------------------------------------------------------- /bitcoinquery/util/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ConfigParser import SafeConfigParser, NoOptionError 4 | 5 | 6 | def abs_path(path): 7 | path = os.path.expanduser(path) 8 | path = os.path.abspath(path) 9 | 10 | return path 11 | 12 | 13 | def config_option(fn, section, option): 14 | try: 15 | return fn(section, option) 16 | except NoOptionError: 17 | return None 18 | 19 | 20 | def config_parser(path): 21 | path = abs_path(path) 22 | config = SafeConfigParser() 23 | with open(path) as fp: 24 | config.readfp(fp) 25 | 26 | return config 27 | -------------------------------------------------------------------------------- /bitcoinquery/util/mongodb.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | 3 | from pymongo.database import Database 4 | from pymongo.collection import Collection 5 | from pymongo.errors import InvalidName 6 | 7 | DEFAULT_DB_HOST = 'localhost:27017' 8 | 9 | 10 | class ProxyDatabase(Database): 11 | """ 12 | A Mongo database which only allows collections specified at object 13 | creation 14 | """ 15 | 16 | def __init__(self, connection, name, collections, *args, **kwargs): 17 | self._collections = collections 18 | Database.__init__(self, connection, name, *args, **kwargs) 19 | 20 | def __getattr__(self, name): 21 | """Get a collection of this database by name. 22 | 23 | Raises InvalidName if an invalid collection name is used or 24 | if the collection name is not in the collections list. 25 | 26 | :Parameters: 27 | - `name`: the name of the collection to get 28 | """ 29 | if name not in self._collections: 30 | raise InvalidName( 31 | 'Collection {name} is not in collections list'.format( 32 | name=name, 33 | ) 34 | ) 35 | return Collection(self, name) 36 | 37 | 38 | def _connection(config): 39 | conn = dict(config.items('mongodb')) 40 | if 'host' not in conn: 41 | conn['host'] = DEFAULT_DB_HOST 42 | 43 | colls = conn['collections'].split(',') 44 | colls = [coll.strip() for coll in colls] 45 | conn['collections'] = colls 46 | return conn 47 | 48 | 49 | def database( 50 | config, 51 | read_preference=None, 52 | ): 53 | conn = _connection(config) 54 | host = conn['host'] 55 | replica_set = conn.get('replica-set') 56 | db = conn['database'] 57 | colls = conn['collections'] 58 | 59 | if replica_set: 60 | conn = pymongo.ReplicaSetConnection( 61 | host, 62 | replicaSet=replica_set, 63 | ) 64 | # ReadPreference.PRIMARY is the default 65 | if read_preference is not None: 66 | conn.read_preference = read_preference 67 | else: 68 | conn = pymongo.Connection(host) 69 | 70 | db = ProxyDatabase(conn, db, colls) 71 | return db 72 | 73 | 74 | def create_indices( 75 | collection, 76 | indices, 77 | ): 78 | for index in indices: 79 | collection.ensure_index(index.items()) 80 | 81 | 82 | def safe_upsert( 83 | collection, 84 | _id, 85 | **kwargs 86 | ): 87 | if kwargs: 88 | collection.update( 89 | spec=dict([ 90 | ('_id', _id), 91 | ]), 92 | upsert=True, 93 | safe=True, 94 | **kwargs 95 | ) 96 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from setuptools import setup, find_packages 3 | import os 4 | 5 | EXTRAS_REQUIRES = dict( 6 | mongo=[ 7 | 'pymongo>=2.3', 8 | ], 9 | bitcoin=[ 10 | 'python-bitcoinrpc>=0.1', 11 | ], 12 | test=[ 13 | 'nose>=1.3.0', 14 | 'mock>=0.8.0', 15 | ], 16 | dev=[ 17 | 'ipython>=0.13', 18 | ], 19 | ) 20 | 21 | # Pypi package documentation 22 | root = os.path.dirname(__file__) 23 | path = os.path.join(root, 'README.rst') 24 | with open(path) as fp: 25 | long_description = fp.read() 26 | 27 | # Tests always depend on all other requirements, except dev 28 | for k,v in EXTRAS_REQUIRES.iteritems(): 29 | if k == 'test' or k == 'dev': 30 | continue 31 | EXTRAS_REQUIRES['test'] += v 32 | 33 | setup( 34 | name='bitcoinquery', 35 | version='0.0.2', 36 | description='Bitcoinquery -- Query blockchain data', 37 | long_description=long_description, 38 | author='Andres Buritica', 39 | author_email='andres@thelinuxkid.com', 40 | maintainer='Andres Buritica', 41 | maintainer_email='andres@thelinuxkid.com', 42 | url='https://github.com/thelinuxkid/bitcoinquery', 43 | license='MIT', 44 | packages = find_packages(), 45 | test_suite='nose.collector', 46 | install_requires=[ 47 | 'setuptools', 48 | ], 49 | entry_points={ 50 | 'console_scripts': [ 51 | 'blockchain-collect = bitcoinquery.cli:blockchain_collect[mongo,bitcoin]', 52 | ], 53 | }, 54 | extras_require=EXTRAS_REQUIRES, 55 | dependency_links=[ 56 | 'http://github.com/jgarzik/python-bitcoinrpc/tarball/master#egg=python-bitcoinrpc-0.1', 57 | ], 58 | classifiers=[ 59 | 'Development Status :: 4 - Beta', 60 | 'Intended Audience :: Developers', 61 | 'Natural Language :: English', 62 | 'License :: OSI Approved :: MIT License', 63 | 'Programming Language :: Python', 64 | 'Programming Language :: Python :: 2.7' 65 | ], 66 | ) 67 | --------------------------------------------------------------------------------