├── log
└── README
├── var
└── README
├── datafeed
├── __init__.py
├── tests
│ ├── __init__.py
│ ├── 001.npy
│ ├── 005.npy
│ ├── 005_na.npy
│ ├── minute.npy
│ ├── reports.dump
│ ├── test_server.py
│ ├── runtests.py
│ ├── helper.py
│ ├── test_exchange.py
│ ├── test_client.py
│ ├── test_dividend.py
│ ├── test_imiguserver.py
│ └── test_datastore.py
├── providers
│ ├── __init__.py
│ ├── tests
│ │ ├── test_dzh.py
│ │ ├── test_nasdaq.py
│ │ ├── test_sina.py
│ │ ├── test_yahoo.py
│ │ ├── test_google.py
│ │ └── google_data.csv
│ ├── http_fetcher.py
│ ├── nasdaq.py
│ ├── sina.py
│ ├── yahoo.py
│ ├── google.py
│ ├── dzh.py
│ └── tongshi.py
├── bidict.py
├── utils.py
├── quote.py
├── dividend.py
├── exchange.py
├── client.py
└── imiguserver.py
├── pip.txt
├── .gitignore
├── config_example.py
├── wjf.py
├── setup.py
├── example
├── adjust.py
├── dzh.py
├── bench.py
├── bench_dump.py
└── bench_dataset.py
├── bin
├── ec2-datafeed
└── datafeed
├── Makefile
├── README.md
├── server.py
└── LICENSE
/log/README:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/var/README:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datafeed/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datafeed/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datafeed/providers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pip.txt:
--------------------------------------------------------------------------------
1 | h5py
2 | numpy
3 | tornado
4 |
--------------------------------------------------------------------------------
/datafeed/tests/001.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinhm/datafeed/HEAD/datafeed/tests/001.npy
--------------------------------------------------------------------------------
/datafeed/tests/005.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinhm/datafeed/HEAD/datafeed/tests/005.npy
--------------------------------------------------------------------------------
/datafeed/tests/005_na.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinhm/datafeed/HEAD/datafeed/tests/005_na.npy
--------------------------------------------------------------------------------
/datafeed/tests/minute.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinhm/datafeed/HEAD/datafeed/tests/minute.npy
--------------------------------------------------------------------------------
/datafeed/tests/reports.dump:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinhm/datafeed/HEAD/datafeed/tests/reports.dump
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.bdb
3 | *.DAT
4 | *.PWR
5 | *.pyc
6 | .ropeproject
7 | .coverage
8 | htmlcov
9 | data/*dump
10 | MANIFEST
11 | build
12 | datafeed.egg-info
13 | dist
14 | log/*
15 | var/*
16 | config.py
17 | .dir-locals.el
18 |
--------------------------------------------------------------------------------
/config_example.py:
--------------------------------------------------------------------------------
1 |
2 | # Server address
3 | # Used for WJF client.
4 | SERVER_ADDR = '10.0.0.2'
5 |
6 | # Set password to enable authtication
7 | # Default to None, no auth needed.
8 | # AUTH_PASSWORD = 'YourPassword'
9 | AUTH_PASSWORD = None
10 |
--------------------------------------------------------------------------------
/wjf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 |
6 | import config
7 |
8 | from datafeed.providers.tongshi import run_tongshi_win
9 |
10 |
11 | if __name__=='__main__':
12 | run_tongshi_win(config.SERVER_ADDR, config.AUTH_PASSWORD)
13 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #from distutils.core import setup
2 | from setuptools import setup, find_packages
3 |
4 | setup(
5 | name='datafeed',
6 | version='0.6',
7 | author='yinhm',
8 | author_email='epaulin@gmail.com',
9 | packages=['datafeed', 'datafeed/providers', ],
10 | license='Apache 2.0 Licence',
11 | long_description=open('README.md').read(),
12 | )
13 |
--------------------------------------------------------------------------------
/datafeed/tests/test_server.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import re
4 | import time
5 | import unittest
6 |
7 | from datafeed.client import Client
8 | from datafeed.server import Server, Application, Request, Handler
9 |
10 | from mock import Mock, patch
11 |
12 |
13 | class HandlerTest(unittest.TestCase):
14 | pass
15 |
16 | if __name__ == '__main__':
17 | unittest.main()
18 |
--------------------------------------------------------------------------------
/datafeed/tests/runtests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import unittest
3 |
4 | TEST_MODULES = [
5 | 'datafeed.tests.test_client',
6 | 'datafeed.tests.test_datastore',
7 | 'datafeed.tests.test_exchange',
8 | 'datafeed.tests.test_imiguserver',
9 | 'datafeed.tests.test_server',
10 | ]
11 |
12 | def all():
13 | return unittest.defaultTestLoader.loadTestsFromNames(TEST_MODULES)
14 |
15 | if __name__ == '__main__':
16 | import tornado.testing
17 | tornado.testing.main()
18 |
--------------------------------------------------------------------------------
/datafeed/bidict.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """A bidirectional dict.
3 | """
4 | import itertools
5 |
6 | class Bidict(dict):
7 | def __init__(self, iterable=(), **kwargs):
8 | self.update(iterable, **kwargs)
9 |
10 | def update(self, iterable=(), **kwargs):
11 | if hasattr(iterable, 'iteritems'):
12 | iterable = iterable.iteritems()
13 | for (key, value) in itertools.chain(iterable, kwargs.iteritems()):
14 | self[key] = value
15 |
16 | def __setitem__(self, key, value):
17 | if key in self:
18 | del self[key]
19 | if value in self:
20 | del self[value]
21 | dict.__setitem__(self, key, value)
22 | dict.__setitem__(self, value, key)
23 |
24 | def __delitem__(self, key):
25 | value = self[key]
26 | dict.__delitem__(self, key)
27 | dict.__delitem__(self, value)
28 |
29 | def __repr__(self):
30 | return '%s(%s)' % (type(self).__name__, dict.__repr__(self))
31 |
--------------------------------------------------------------------------------
/example/adjust.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 | import datetime
6 | import os
7 | import sys
8 |
9 | import numpy as np
10 |
11 |
12 | ROOT_PATH = os.path.join(os.path.realpath(os.path.dirname(__file__)), '..')
13 | sys.path[0:0] = [ROOT_PATH]
14 |
15 | from cStringIO import StringIO
16 | from pandas import DataFrame
17 |
18 | from datafeed.client import Client
19 | from datafeed.dividend import Dividend
20 |
21 | client = Client()
22 | symbol = 'SH600036'
23 |
24 | y = client.get_day(symbol, 1000)
25 | dividends = client.get_dividend(symbol)
26 |
27 | index = np.array([datetime.date.fromtimestamp(v) for v in y['time']],
28 | dtype=object)
29 | y = DataFrame.from_records(y, index=index, exclude=['time'])
30 |
31 | print dividends
32 |
33 | for div in dividends:
34 | d = Dividend(div)
35 | d.adjust(y)
36 |
37 | day = '20080725'
38 | print datetime.datetime.fromtimestamp(client.get_day(symbol, day)['time'])
39 |
40 | d1 = client.get_day(symbol, day)
41 | print d1
42 |
43 |
--------------------------------------------------------------------------------
/bin/ec2-datafeed:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ROOT=$(dirname $0)/..
4 |
5 | export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk/
6 | export EC2_HOME=$ROOT/bin/ec2-api-tools
7 | export EC2_PRIVATE_KEY=$ROOT/.ec2/pk-aws.x509.pem
8 | export EC2_CERT=$ROOT/.ec2/cert-aws.x509.pem
9 |
10 | REGION=us-west-1
11 | INSTANCE=i-f6b5f5b2
12 | DESC="amazon ec2 windows server, instance id $INSTANCE"
13 |
14 |
15 | case "$1" in
16 | start)
17 | echo -n "Starting $DESC: "
18 | $EC2_HOME/bin/ec2-start-instances --region $REGION $INSTANCE
19 | ;;
20 | stop)
21 | echo -n "Stopping $DESC: "
22 | $EC2_HOME/bin/ec2-stop-instances --region $REGION $INSTANCE
23 | ;;
24 | status)
25 | echo -n "Status $DESC: "
26 | $EC2_HOME/bin/ec2-describe-instances --region $REGION $INSTANCE
27 | $EC2_HOME/bin/ec2-describe-instances
28 | ;;
29 | restart)
30 | echo -n "Restarting $DESC: "
31 | stop
32 | sleep 60
33 | start
34 | ;;
35 | *)
36 | N=$(dirname $0)/ec2-datafeed
37 | echo "Usage: $N {status|start|stop|restart}" >&2
38 | exit 1
39 | ;;
40 | esac
41 |
42 | exit 0
43 |
--------------------------------------------------------------------------------
/datafeed/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2010 yinhm
5 |
6 | import datetime
7 | import json
8 |
9 | import numpy as np
10 |
11 | from json import encoder
12 | encoder.FLOAT_REPR = lambda f: format(f, '.2f')
13 |
14 |
15 | __all__ = ['print2f', 'json_encode', 'json_decode']
16 |
17 |
18 | class print2f(float):
19 | def __repr__(self):
20 | return "%0.2f" % self
21 |
22 |
23 | def json_encode(value):
24 | """JSON-encodes the given Python object."""
25 | # JSON permits but does not require forward slashes to be escaped.
26 | # This is useful when json data is emitted in a tags from prematurely terminating
28 | # the javscript. Some json libraries do this escaping by default,
29 | # although python's standard library does not, so we do it here.
30 | # http://stackoverflow.com/questions/1580647/json-why-are-forward-slashes-escaped
31 | return json.dumps(value).replace("", "<\\/")
32 |
33 |
34 | def json_decode(value):
35 | """Returns Python objects for the given JSON string."""
36 | return json.loads(value)
37 |
--------------------------------------------------------------------------------
/datafeed/quote.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from datetime import datetime
4 | from datafeed.exchange import Security
5 |
6 | __all__ = ['Report', 'Day', 'Minute', 'SecurityList']
7 |
8 |
9 | class _Struct(object):
10 |
11 | def __init__(self, security, adict):
12 | assert isinstance(security, Security)
13 |
14 | self.__dict__.update(adict)
15 | self.security = security
16 |
17 | def assert_data(self):
18 | pass
19 |
20 | def __getstate__(self):
21 | odict = self.__dict__.copy()
22 | odict.pop('_raw_data', None)
23 | return odict
24 |
25 | def __setstate__(self, state):
26 | self.__dict__.update(state)
27 |
28 | def todict(self):
29 | return self.__getstate__()
30 |
31 | class Report(_Struct):
32 |
33 | def __init__(self, security, adict):
34 | assert isinstance(adict['price'], float)
35 | assert isinstance(adict['time'], datetime)
36 |
37 | super(Report, self).__init__(security, adict)
38 |
39 | def __str__(self):
40 | return "%s, %s, %s" % (self.security, self.price, self.time)
41 |
42 |
43 | class Day(_Struct):
44 | pass
45 |
46 |
47 | class Minute(_Struct):
48 | pass
49 |
50 | class SecurityList(_Struct):
51 | pass
52 |
--------------------------------------------------------------------------------
/datafeed/tests/helper.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import numpy
3 | import os
4 | import time
5 |
6 | datadir ='/tmp/datafeed-%d' % int(time.time())
7 | os.mkdir(datadir)
8 |
9 | def sample_key():
10 | return 'SH000001'
11 |
12 | def sample():
13 | dt = datetime.datetime.now()
14 | timestamp = int(time.time())
15 |
16 | d = {
17 | 'SH000001' : {
18 | 'amount': 84596203520.0,
19 | 'close': 2856.9899999999998,
20 | 'high': 2880.5599999999999,
21 | 'low': 2851.9499999999998,
22 | 'name': u'\u4e0a\u8bc1\u6307\u6570',
23 | 'open': 2868.73,
24 | 'preclose': 2875.8600000000001,
25 | 'price': 2856.9899999999998,
26 | 'symbol': u'SH000001',
27 | 'time': str(dt),
28 | 'timestamp': timestamp,
29 | 'volume': 75147848.0
30 | }
31 | }
32 | return d
33 |
34 | def sample_minutes():
35 | path = os.path.dirname(os.path.realpath(__file__))
36 | data = numpy.load(os.path.join(path, 'minute.npy'))
37 |
38 | today = datetime.datetime.today()
39 | for row in data:
40 | day = datetime.datetime.fromtimestamp(int(row['time']))
41 | t = time.mktime((today.year, today.month, today.day,
42 | day.hour, day.minute, 0, 0, 0, 0))
43 | row['time'] = int(t)
44 |
45 | return data
46 |
--------------------------------------------------------------------------------
/example/dzh.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 | import datetime
6 | import os
7 | import sys
8 |
9 | import numpy as np
10 |
11 | ROOT_PATH = os.path.join(os.path.realpath(os.path.dirname(__file__)), '..')
12 | sys.path[0:0] = [ROOT_PATH]
13 |
14 | from cStringIO import StringIO
15 | from datafeed.client import Client
16 | from datafeed.datastore import Manager
17 | from datafeed.exchange import *
18 | from datafeed.providers.dzh import *
19 |
20 | var_path = os.path.join(ROOT_PATH, 'var')
21 |
22 | client = Client()
23 | store = Manager('/tmp/df', SH())
24 |
25 | filename = os.path.join(var_path, "dzh/sh/MIN1.DAT")
26 | io = DzhMinute()
27 | for symbol, ohlcs in io.read(filename, 'SH'):
28 | client.put_minute(symbol, ohlcs)
29 |
30 | filename = os.path.join(var_path, "dzh/sh/MIN1.DAT")
31 | io = DzhMinute()
32 | for symbol, ohlcs in io.read(filename, 'SH'):
33 | for ohlc in ohlcs:
34 | ohlc['time'] = ohlc['time'] - 8 * 3600
35 | print symbol
36 | #client.put_1minute(symbol, ohlcs)
37 | store.oneminstore.update(symbol, ohlcs)
38 |
39 |
40 | filename = os.path.join(var_path, "dzh/sh/MIN.DAT")
41 | io = DzhFiveMinute()
42 | for symbol, ohlcs in io.read(filename, 'SH'):
43 | for ohlc in ohlcs:
44 | ohlc['time'] = ohlc['time'] - 8 * 3600
45 | print symbol
46 | client.put_5minute(symbol, ohlcs)
47 | # store.fiveminstore.update(symbol, ohlcs)
48 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Convenience to run tests and coverage.
2 | # This probably won't work on Windows.
3 |
4 | FLAGS=
5 | TESTS=`find datafeed -name test_[a-z]\*.py`
6 | NONTESTS=`find datafeed -name [a-z]\*.py ! -name test_\*.py`
7 | PORT=8082
8 | ADDRESS=localhost
9 | PYTHON=python -Wignore
10 |
11 | test:
12 | $(PYTHON) -m datafeed/tests/runtests $(FLAGS)
13 |
14 | full_test:
15 | for i in $(TESTS); \
16 | do \
17 | echo $$i; \
18 | $(PYTHON) -m `dirname $$i`/`basename $$i .py` $(FLAGS); \
19 | done
20 |
21 | test_datastore:
22 | $(PYTHON) -m datafeed/tests/test_datastore $(FLAGS)
23 |
24 | test_imiguserver:
25 | $(PYTHON) -m datafeed/tests/test_imiguserver $(FLAGS)
26 |
27 | test_server:
28 | $(PYTHON) -m datafeed/tests/test_server $(FLAGS)
29 |
30 | test_client:
31 | $(PYTHON) -m datafeed/tests/test_client $(FLAGS)
32 |
33 | test_exchange:
34 | $(PYTHON) -m datafeed/tests/test_exchange $(FLAGS)
35 |
36 | test_s_google:
37 | $(PYTHON) -m datafeed/providers/tests/test_google $(FLAGS)
38 |
39 | c cov cove cover coverage:
40 | python-coverage erase
41 | for i in $(TESTS); \
42 | do \
43 | echo $$i; \
44 | PYTHONPATH=. python-coverage run -p $$i; \
45 | done
46 | python-coverage combine
47 | python-coverage html -d "`pwd`/htmlcov" $(NONTESTS)
48 | python-coverage report -m $(NONTESTS)
49 | echo "open file://`pwd`/htmlcov/index.html"
50 |
51 | serve:
52 | server.py --port $(PORT) --address $(ADDRESS)
53 |
54 | debug:
55 | server.py --port $(PORT) --address $(ADDRESS) --debug
56 |
57 | clean:
58 | rm -rf htmlcov
59 | rm -f `find . -name \*.pyc -o -name \*~ -o -name @* -o -name \*.orig`
60 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | A Datafeed System for Financial Data.
2 | =====================================
3 | Datafeed is an fast, extensible quotes data storage build on
4 | Python/HDF5. IMIGU (http://imigu.com) has been using Datafeed on production
5 | more than a year.
6 |
7 | Datafeed is licensed under the Apache Licence, Version 2.0
8 | (http://www.apache.org/licenses/LICENSE-2.0.html).
9 |
10 | ## Components
11 |
12 | * Quotes store server.
13 | * Client to interactive with quote server(get/put).
14 | * Datafeeds providers client, including Yahoo, Google, etc.
15 |
16 |
17 | ## Python version support
18 |
19 | Officially 2.7. Python 3 may works but not tested.
20 |
21 |
22 | ## Dependencies
23 |
24 | * NumPy: 1.5.0 or higher
25 | * h5py: 2.0 or higher
26 | * tornado: 2.0 or higher
27 |
28 | Install dependent packages with pip:
29 |
30 | pip install -r pip.txt
31 |
32 |
33 | ## Optional packages
34 |
35 | * pandas: dividend/split and more
36 | * python-dateutil: <2.0, RSS parsing
37 | * pycurl: url fetch
38 | * pywin32: only needed if you want to run TongShi client
39 |
40 |
41 | ## INSTALLATION
42 |
43 | git clone git://github.com/yinhm/datafeed.git
44 |
45 |
46 | ## Run
47 |
48 | cd datafeed
49 | cp config_example.py config.py
50 | python server.py
51 |
52 |
53 | ## Client
54 |
55 | from datafeed.client import Client
56 | c = Client()
57 | c.get_report("SH000001")
58 |
59 |
60 | ## TODO
61 |
62 | * Documentation
63 |
64 |
65 | ## License
66 |
67 | Apache Public License (APL) 2.0
68 |
69 |
70 | ## Thanks
71 |
72 | * Big thanks to my company ( http://jeebo.cn ) allow me to open source Datafeed.
73 |
--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2010 yinhm
5 |
6 | '''A datafeed server daemon.
7 | '''
8 | import config
9 | import logging
10 | import os
11 | import signal
12 | import sys
13 | import tornado
14 |
15 | from tornado import ioloop
16 | from tornado.options import define, options
17 |
18 | from datafeed.exchange import SH
19 | from datafeed.imiguserver import ImiguApplication
20 | from datafeed.server import Server
21 |
22 |
23 | DATA_DIR = os.path.join(os.path.realpath(os.path.dirname(__file__)),
24 | 'var')
25 |
26 | define("port", default=8082, help="run on the given port", type=int)
27 | define("datadir", default=DATA_DIR, help="default data dir", type=str)
28 |
29 |
30 | def main():
31 | tornado.options.parse_command_line()
32 |
33 | app = ImiguApplication(options.datadir, SH())
34 | server = Server(app, auth_password=config.AUTH_PASSWORD)
35 | server.listen(options.port)
36 | io_loop = tornado.ioloop.IOLoop.instance()
37 |
38 | check_time = 1 * 1000 # every second
39 | scheduler = ioloop.PeriodicCallback(app.periodic_job,
40 | check_time,
41 | io_loop=io_loop)
42 |
43 | def shutdown(signum, frame):
44 | print 'Signal handler called with signal', signum
45 | io_loop.stop()
46 | scheduler.stop()
47 | server.log_stats()
48 | logging.info("==> Exiting datafeed.")
49 |
50 | signal.signal(signal.SIGTERM, shutdown)
51 | signal.signal(signal.SIGINT, shutdown)
52 |
53 | scheduler.start()
54 | io_loop.start()
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/example/bench.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2010 yinhm
5 |
6 | '''A datafeed server daemon.
7 | '''
8 |
9 | import datetime
10 | import logging
11 | import marshal
12 | import os
13 | import sys
14 | import time
15 | import tornado
16 |
17 | from tornado import ioloop
18 | from tornado.options import define, options
19 |
20 |
21 | sys.path[0:0] = ['..']
22 |
23 |
24 | from datafeed.exchange import SH
25 | from datafeed.imiguserver import ImiguApplication
26 | from datafeed.server import Server, Request
27 |
28 |
29 | tornado.options.parse_command_line()
30 | app = ImiguApplication('/tmp/df', SH())
31 |
32 |
33 | today = datetime.datetime.today()
34 | timestamp = int(time.mktime((today.year, today.month, today.day,
35 | 15, 0, 0, 0, 0, 0)))
36 | dt = datetime.datetime.fromtimestamp(timestamp)
37 |
38 | d = {
39 | 'SH000001' : {
40 | 'amount': 84596203520.0,
41 | 'close': 2856.9899999999998,
42 | 'high': 2880.5599999999999,
43 | 'low': 2851.9499999999998,
44 | 'name': u'\u4e0a\u8bc1\u6307\u6570',
45 | 'open': 2868.73,
46 | 'preclose': 2875.8600000000001,
47 | 'price': 2856.9899999999998,
48 | 'symbol': u'SH000001',
49 | 'time': str(dt),
50 | 'timestamp': timestamp,
51 | 'volume': 75147848.0
52 | }
53 | }
54 |
55 | app.dbm.update_reports(d)
56 |
57 | path = os.path.dirname(os.path.realpath(__file__))
58 | f = open(path + '/../datafeed/tests/reports.dump', 'r')
59 | data = marshal.load(f)
60 | for v in data.itervalues():
61 | if 'amount' not in v:
62 | continue
63 | v['time'] = str(dt)
64 | v['timestamp'] = timestamp
65 | app.dbm.update_reports(data)
66 |
67 | request = Request(None, 'archive_minute')
68 | app(request)
69 |
70 |
71 | def main():
72 | request = Request(None, 'archive_minute')
73 | app(request)
74 |
75 | if __name__ == "__main__":
76 | import cProfile
77 | cProfile.run('main()', '/tmp/fooprof')
78 |
--------------------------------------------------------------------------------
/datafeed/tests/test_exchange.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import unittest
4 |
5 | from datetime import datetime
6 | from datafeed.exchange import *
7 |
8 | class ExchangeTest(unittest.TestCase):
9 |
10 | def test_NYSE(self):
11 | nyse = NYSE()
12 | self.assertEqual(str(nyse), 'NYSE')
13 |
14 | def test_singleton(self):
15 | lon_1 = LON()
16 | lon_2 = LON()
17 | self.assertEqual(lon_1, lon_2)
18 |
19 | def test_security(self):
20 | stock = Security(SH(), '600123')
21 | self.assertEqual('SH:600123', str(stock))
22 |
23 | def test_security_init_from_abbr(self):
24 | stock = Security.from_abbr('SH', '600123')
25 | self.assertEqual('SH:600123', str(stock))
26 |
27 | def test_shanghai_exchange_pre_open_time(self):
28 | today = datetime.today()
29 | sh = SH()
30 | pre_open_time = SH.pre_open_time(day=today)
31 | ret = datetime.fromtimestamp(pre_open_time)
32 | self.assertEqual(ret.hour, 9)
33 | self.assertEqual(ret.minute, 15)
34 |
35 | def test_shanghai_exchange_open_time(self):
36 | today = datetime.today()
37 | sh = SH()
38 | open_time = SH.open_time(day=today)
39 | ret = datetime.fromtimestamp(open_time)
40 | self.assertEqual(ret.hour, 9)
41 | self.assertEqual(ret.minute, 30)
42 |
43 | def test_shanghai_exchange_open_time(self):
44 | today = datetime.today()
45 | sh = SH()
46 | break_time = SH.break_time(day=today)
47 | ret = datetime.fromtimestamp(break_time)
48 | self.assertEqual(ret.hour, 11)
49 | self.assertEqual(ret.minute, 30)
50 |
51 | def test_shanghai_exchange_open_time(self):
52 | today = datetime.today()
53 | sh = SH()
54 | close_time = SZ.close_time(day=today)
55 | ret = datetime.fromtimestamp(close_time)
56 | self.assertEqual(ret.hour, 15)
57 | self.assertEqual(ret.minute, 0)
58 |
59 |
60 | if __name__ == '__main__':
61 | unittest.main()
62 |
--------------------------------------------------------------------------------
/datafeed/providers/tests/test_dzh.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from __future__ import with_statement
4 |
5 | import os
6 | import unittest
7 | from cStringIO import StringIO
8 |
9 | from datafeed.providers.dzh import *
10 |
11 | class DzhDayTest(unittest.TestCase):
12 |
13 | def assertFloatEqual(self, actual, expt):
14 | if abs(actual - expt) < 0.1 ** 5:
15 | return True
16 | return False
17 |
18 | def test_read_generator(self):
19 | path = os.path.join(os.path.realpath(os.path.dirname(__file__)),
20 | '../../../var')
21 |
22 | filename = os.path.join(path, "dzh/sh/DAY.DAT")
23 | io = DzhDay()
24 | f = io.read(filename, 'SH')
25 | symbol, ohlcs = f.next()
26 |
27 | self.assertEqual(symbol, "SH000001")
28 |
29 | ohlc = ohlcs[0]
30 |
31 | self.assertEqual(ohlc['time'], 661564800)
32 | self.assertFloatEqual(ohlc['open'], 96.05)
33 | self.assertFloatEqual(ohlc['close'], 99.98)
34 | self.assertFloatEqual(ohlc['volume'], 1260.0)
35 | self.assertFloatEqual(ohlc['amount'], 494000.0)
36 |
37 |
38 | class DzhDividendTest(unittest.TestCase):
39 |
40 | def test_read_generator(self):
41 | io = DzhDividend()
42 | r = io.read()
43 | data = r.next()
44 |
45 | self.assertEqual(data[0], "SZ000001")
46 |
47 | divs = data[1]
48 | self.assertEqual(divs[0]['time'], 701308800)
49 | self.assertEqual(divs[0]['split'], 0.5)
50 | self.assertTrue(abs(divs[0]['dividend'] - 0.20) < 0.000001)
51 |
52 |
53 |
54 | class DzhSectorTest(unittest.TestCase):
55 |
56 | def test_read_generator(self):
57 | io = DzhSector()
58 | r = io.read()
59 | sector, options = r.next()
60 |
61 | self.assertEqual(sector, "行业")
62 | self.assertTrue(options.has_key("工程建筑"))
63 | self.assertTrue(len(options["工程建筑"]) > 0)
64 |
65 |
66 | if __name__ == '__main__':
67 | unittest.main()
68 |
--------------------------------------------------------------------------------
/example/bench_dump.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 | import datetime
6 | import h5py
7 | import os
8 | import shelve
9 | import sys
10 | import timeit
11 |
12 | import cPickle as pickle
13 | import numpy as np
14 |
15 | ROOT_PATH = os.path.join(os.path.realpath(os.path.dirname(__file__)), '..')
16 | sys.path[0:0] = [ROOT_PATH]
17 |
18 | from cStringIO import StringIO
19 | from datafeed.client import Client
20 | from datafeed.datastore import *
21 | from datafeed.exchange import *
22 | from datafeed.providers.dzh import *
23 |
24 | var_path = os.path.join(ROOT_PATH, 'var')
25 | store = Manager('/tmp/df', SH())
26 | filename = os.path.join(var_path, "20101202.h5")
27 | date = datetime.datetime.strptime('20101202', '%Y%m%d').date()
28 |
29 | hdf_store = h5py.File(filename)
30 | f1 = NumpyFile(hdf_store, date, SH().market_minutes)
31 | f2 = shelve.open('/tmp/dump.shelve')
32 |
33 | def f1_bench_read():
34 | for k, v in hdf_store.iteritems():
35 | if isinstance(v, h5py.Group):
36 | continue
37 | f1[str(k)] = v[:]
38 |
39 | def f1_bench_dump():
40 | pickle.dump(f1, open('/tmp/dump.pickle', 'wb'), -1)
41 |
42 |
43 | def f2_bench_read():
44 | for k, v in hdf_store.iteritems():
45 | if isinstance(v, h5py.Group):
46 | continue
47 | f2[str(k)] = v[:]
48 |
49 | def f2_bench_dump():
50 | f2.close()
51 |
52 |
53 | if __name__ == '__main__':
54 | d = 1
55 |
56 | timer = timeit.Timer(stmt='f1_bench_read()',
57 | setup="from __main__ import f1_bench_read")
58 | result = timer.timeit(number=d)
59 | print result
60 |
61 | timer = timeit.Timer(stmt='f1_bench_dump()',
62 | setup="from __main__ import f1_bench_dump")
63 | result = timer.timeit(number=d)
64 | print result
65 |
66 | timer = timeit.Timer(stmt='f2_bench_read()',
67 | setup="from __main__ import f2_bench_read")
68 | result = timer.timeit(number=d)
69 | print result
70 |
71 | timer = timeit.Timer(stmt='f2_bench_dump()',
72 | setup="from __main__ import f2_bench_dump")
73 | result = timer.timeit(number=d)
74 | print result
75 |
76 |
--------------------------------------------------------------------------------
/example/bench_dataset.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 | import datetime
6 | import h5py
7 | import os
8 | import random
9 | import sys
10 | import time
11 | import timeit
12 |
13 | import numpy as np
14 |
15 | DTYPE = np.dtype({'names': ('time', 'price', 'volume', 'amount'),
16 | 'formats': ('i4', 'f4', 'f4', 'f4')})
17 |
18 |
19 | def bench_ds():
20 | filename = '/tmp/bench-%d.h5' % int(time.time())
21 |
22 | symbols = ["SH%.6d" % i for i in xrange(10000)]
23 |
24 | f = h5py.File(filename)
25 | for symbol in symbols:
26 | f.create_dataset(symbol, (240, ), DTYPE)
27 | f.close()
28 |
29 | for x in xrange(10):
30 | # open for bench again
31 | f = h5py.File(filename)
32 | random.shuffle(symbols)
33 | for symbol in symbols:
34 | ds = f[symbol]
35 | f.close()
36 |
37 |
38 | def require_dataset(handle, symbol):
39 | gid = symbol[:3]
40 | group = handle.require_group(gid)
41 | try:
42 | ds = group[symbol]
43 | except KeyError:
44 | ds = group.create_dataset(symbol, (240, ), DTYPE)
45 | return ds
46 |
47 | def dataset(handle, symbol):
48 | path = "%s/%s" % (symbol[:3], symbol)
49 | return handle[path]
50 |
51 |
52 | def bench_grouped_ds():
53 | filename = '/tmp/bench-%d.h5' % int(time.time())
54 |
55 | symbols = ["SH%.6d" % i for i in xrange(10000)]
56 |
57 | f = h5py.File(filename)
58 | for symbol in symbols:
59 | require_dataset(f, symbol)
60 | f.close()
61 |
62 | for x in xrange(10):
63 | # open for bench again
64 | f = h5py.File(filename)
65 | random.shuffle(symbols)
66 | for symbol in symbols:
67 | ds = dataset(f, symbol)
68 | f.close()
69 |
70 |
71 | if __name__ == '__main__':
72 | d = 1
73 |
74 | ds_timer = timeit.Timer(stmt='bench_ds()',
75 | setup="from __main__ import bench_ds")
76 | ds_result = ds_timer.timeit(number=d)
77 | print ds_result
78 |
79 | grouped_ds_timer = timeit.Timer(stmt='bench_grouped_ds()',
80 | setup="from __main__ import bench_grouped_ds")
81 | grouped_ds_result = grouped_ds_timer.timeit(number=d)
82 | print grouped_ds_result
83 |
--------------------------------------------------------------------------------
/datafeed/providers/tests/test_nasdaq.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import datetime
4 | import os
5 | import unittest
6 |
7 | from datafeed.exchange import *
8 | from datafeed.providers.nasdaq import *
9 |
10 |
11 | class NasdaqSecurityTest(unittest.TestCase):
12 |
13 | def test_str(self):
14 | s = NasdaqSecurity(NYSE(), 'MMM')
15 | self.assertEqual(str(s), 'NYSE:MMM')
16 |
17 |
18 | class NasdaqListTest(unittest.TestCase):
19 | _RAW_DATA = '''"Symbol","Name","LastSale","MarketCap","IPOyear","Sector","Industry","Summary Quote",
20 | "MMM","3M Company","91.97","65351766690","n/a","Health Care","Medical/Dental Instruments","http://quotes.nasdaq.com/asp/SummaryQuote.asp?symbol=MMM&selected=MMM",
21 | "SVN","7 Days Group Holdings Limited","18.6","345048600","2009","Consumer Services","Hotels/Resorts","http://quotes.nasdaq.com/asp/SummaryQuote.asp?symbol=SVN&selected=SVN",
22 | "NDN","99 Cents Only Stores","20.2","1415515000","1996","Consumer Services","Department/Specialty Retail Stores","http://quotes.nasdaq.com/asp/SummaryQuote.asp?symbol=NDN&selected=NDN",
23 | "AHC","A.H. Belo Corporation","6.83","130575940","n/a","Consumer Services","Newspapers/Magazines","http://quotes.nasdaq.com/asp/SummaryQuote.asp?symbol=AHC&selected=AHC",'''
24 |
25 |
26 | def test_nasdaq_report(self):
27 | ret = NasdaqList.parse(NYSE(), self._RAW_DATA)
28 | i = 0
29 | for r in ret:
30 | if i == 0:
31 | self.assertEqual(r.security.exchange, NYSE())
32 | self.assertEqual(r.security.symbol, 'MMM')
33 | self.assertEqual(r.name, "3M Company")
34 | self.assertEqual(r.price, 91.97)
35 |
36 | if i == 1:
37 | self.assertEqual(r.security.exchange, NYSE())
38 | self.assertEqual(r.security.symbol, 'SVN')
39 |
40 | i += 1
41 |
42 | self.assertEqual(i, 4)
43 |
44 |
45 | class NasdaqListFetcherTest(unittest.TestCase):
46 |
47 | def test_init(self):
48 | f = NasdaqListFetcher()
49 | self.assertEqual(f._base_url,
50 | 'http://www.nasdaq.com/screening/companies-by-industry.aspx')
51 |
52 | def test_fetch_with_wrong_arguments(self):
53 | f = NasdaqListFetcher()
54 | self.assertRaises(AssertionError, f.fetch, SH())
55 |
56 |
57 | if __name__ == '__main__':
58 | unittest.main()
59 |
--------------------------------------------------------------------------------
/bin/datafeed:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 |
3 | ### BEGIN INIT INFO
4 | # Provides: imigu
5 | # Required-Start: $all
6 | # Required-Stop: $all
7 | # Default-Start: 2 3 4 5
8 | # Default-Stop: 0 1 6
9 | # Short-Description: starts the datafeed server
10 | # Description: starts datafeed using start-stop-daemon
11 | ### END INIT INFO
12 |
13 | PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
14 |
15 | NAME=datafeed
16 | DESC=datafeed
17 |
18 | CHART_DAEMON=/home/josh/www/datafeed/chart.py
19 | CHART_NAME=datafeed-chart
20 | DAEMON_OPTS_CHART='--log_file_prefix=/home/josh/www/datafeed/log/chart.log'
21 |
22 | SERVER_DAEMON=/home/josh/www/datafeed/server.py
23 | SERVER_NAME=datafeed-server
24 | DAEMON_OPTS_SERVER='--log_file_prefix=/home/josh/www/datafeed/log/datafeed.log'
25 |
26 |
27 | test -x $DAEMON || exit 0
28 |
29 | set -e
30 |
31 | case "$1" in
32 | start)
33 | echo -n "Starting $DESC: "
34 | start-stop-daemon --make-pidfile --background --start --quiet --pidfile /var/run/$SERVER_NAME.pid \
35 | --exec $SERVER_DAEMON -- $DAEMON_OPTS_SERVER
36 | start-stop-daemon --make-pidfile --background --start --quiet --pidfile /var/run/$CHART_NAME.pid \
37 | --exec $CHART_DAEMON -- $DAEMON_OPTS_CHART
38 | echo "$CHART_NAME, $SERVER_NAME"
39 | ;;
40 | stop)
41 | echo -n "Stopping $DESC: "
42 | start-stop-daemon --stop --oknodo --pidfile /var/run/$CHART_NAME.pid
43 | start-stop-daemon --stop --oknodo --pidfile /var/run/$SERVER_NAME.pid
44 | echo "$CHART_NAME, $SERVER_NAME."
45 | ;;
46 |
47 | restart|force-reload)
48 | echo -n "Restarting $DESC: "
49 | start-stop-daemon --stop --oknodo --retry 30 --pidfile /var/run/$CHART_NAME.pid
50 | start-stop-daemon --stop --oknodo --retry 30 --pidfile /var/run/$SERVER_NAME.pid
51 | sleep 1
52 | start-stop-daemon --make-pidfile --background --start --quiet --pidfile /var/run/$SERVER_NAME.pid \
53 | --exec $SERVER_DAEMON -- $DAEMON_OPTS_SERVER
54 | start-stop-daemon --make-pidfile --background --start --quiet --pidfile \
55 | /var/run/$CHART_NAME.pid --exec $CHART_DAEMON -- $DAEMON_OPTS_CHART
56 | echo "$CHART_NAME, $SERVER_NAME"
57 | ;;
58 | *)
59 | N=/etc/init.d/$NAME
60 | echo "Usage: $N {start|stop|restart|force-reload}" >&2
61 | exit 1
62 | ;;
63 | esac
64 |
65 | exit 0
66 |
--------------------------------------------------------------------------------
/datafeed/providers/tests/test_sina.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from __future__ import with_statement
4 |
5 | import os
6 | import unittest
7 |
8 | from datetime import datetime, date
9 | from datafeed.exchange import *
10 | from datafeed.providers.sina import *
11 |
12 |
13 | class SinaSecurityTest(unittest.TestCase):
14 |
15 | def test_abbr_sh(self):
16 | s = SinaSecurity(SH(), '600028')
17 | self.assertEqual(s._abbr, 'sh')
18 |
19 | def test_abbr_sz(self):
20 | s = SinaSecurity(SZ(), '000001')
21 | self.assertEqual(s._abbr, 'sz')
22 |
23 | def test_sina_id(self):
24 | s = SinaSecurity(SH(), '600028')
25 | self.assertEqual(str(s), 'sh600028')
26 |
27 | def test_abbr_to_exchange(self):
28 | ex = SinaSecurity.get_exchange_from_abbr("sh")
29 | self.assertEqual(ex, SH())
30 |
31 | def test_ss_abbr(self):
32 | ret = SinaSecurity.from_string('sh600028')
33 | self.assertEqual(ret.exchange, SH())
34 | self.assertEqual(ret.symbol, '600028')
35 |
36 |
37 | class SinaReportTest(unittest.TestCase):
38 | _RAW_DATA = '''var hq_str_sh000001="上证指数,2911.510,2911.511,2932.188,2933.460,2890.225,0,0,96402722,102708976572,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2011-05-03,15:03:11";
39 | var hq_str_sh600028="中国石化,8.64,8.64,8.68,8.71,8.58,8.68,8.69,27761321,240634267,11289,8.68,759700,8.67,556338,8.66,455296,8.65,56600,8.64,143671,8.69,341859,8.70,361255,8.71,314051,8.72,342155,8.73,2011-05-03,15:03:11";'''
40 |
41 | def test_sina_report(self):
42 | ret = SinaReport.parse(self._RAW_DATA)
43 | i = 0
44 | for r in ret:
45 | if i == 1:
46 | self.assertEqual(r.security.exchange, SH())
47 | self.assertEqual(r.security.symbol, '600028')
48 | self.assertEqual(r.name, '中国石化')
49 | self.assertEqual(r.open, 8.64)
50 | self.assertEqual(r.preclose, 8.64)
51 | self.assertEqual(str(r.date), "2011-05-03")
52 |
53 | i += 1
54 |
55 |
56 | class SinaReportFetcherTest(unittest.TestCase):
57 |
58 | def test_init(self):
59 | f = SinaReportFetcher()
60 | self.assertEqual(f._base_url, 'http://hq.sinajs.cn')
61 | self.assertEqual(f._time_out, 20)
62 | self.assertEqual(f._request_size, 100)
63 |
64 | def test_init_with_wrong_arguments(self):
65 | self.assertRaises(AssertionError,
66 | SinaReportFetcher,
67 | request_size=200)
68 |
69 | def test_fetch(self):
70 | f = SinaReportFetcher(request_size=2)
71 | s1 = SinaSecurity(SH(), '000001')
72 | s2 = SinaSecurity(SH(), '600028')
73 | s3 = SinaSecurity(SZ(), '000976')
74 |
75 | def callback(body):
76 | qs = SinaReport.parse(body)
77 | for quote in qs:
78 | if quote.security == s1:
79 | # something must wrong if SSE Composite Index goes down to 100
80 | self.assertTrue(quote.price > 100)
81 |
82 | f.fetch(s1, s2, s3,
83 | callback=callback)
84 |
85 |
86 | if __name__ == '__main__':
87 | unittest.main()
88 |
--------------------------------------------------------------------------------
/datafeed/tests/test_client.py:
--------------------------------------------------------------------------------
1 | '''
2 | @FIXME
3 | ======
4 | due to client not async we need to start a real server in terminal to perform
5 | this tests.
6 | '''
7 |
8 | from __future__ import with_statement
9 |
10 | import marshal
11 | import os
12 | import sys
13 | import time
14 | import numpy
15 | import socket
16 | import unittest
17 |
18 | import numpy as np
19 |
20 | from cStringIO import StringIO
21 |
22 | from datetime import datetime
23 | from datafeed.client import Client
24 |
25 | class ClientTest(unittest.TestCase):
26 |
27 | def setUp(self):
28 | self.client = Client()
29 |
30 | today = datetime.today()
31 | timestamp = int(time.mktime((today.year, today.month, today.day,
32 | 10, 30, 0, 0, 0, 0)))
33 | dt = datetime.fromtimestamp(timestamp)
34 |
35 | d = {
36 | 'SH000001' : {
37 | 'amount': 84596203520.0,
38 | 'close': 2856.9899999999998,
39 | 'high': 2880.5599999999999,
40 | 'low': 2851.9499999999998,
41 | 'name': u'\u4e0a\u8bc1\u6307\u6570',
42 | 'open': 2868.73,
43 | 'preclose': 2875.8600000000001,
44 | 'price': 2856.9899999999998,
45 | 'symbol': u'SH000001',
46 | 'time': str(dt),
47 | 'timestamp': timestamp,
48 | 'volume': 75147848.0
49 | }
50 | }
51 | self.client.put_reports(d)
52 |
53 | def test_connect(self):
54 | self.client.connect()
55 | self.assertTrue(isinstance(self.client._sock, socket._socketobject))
56 |
57 | def test_put_reports(self):
58 | path = os.path.dirname(os.path.realpath(__file__))
59 | r = self.client.get_report('SH000001')
60 | f = open(os.path.join(path, 'reports.dump'), 'r')
61 | data = marshal.load(f)
62 | for v in data.itervalues():
63 | if 'amount' not in v:
64 | continue
65 | v['time'] = r['time']
66 | v['timestamp'] = r['timestamp']
67 |
68 | ret = self.client.put_reports(data)
69 | self.assertEqual(ret, 'OK')
70 |
71 | def test_put_empty_reports(self):
72 | ret = self.client.put_reports({})
73 | self.assertEqual(ret, 'OK')
74 |
75 | def test_get_list(self):
76 | stocks = self.client.get_list()
77 | self.assertTrue(isinstance(stocks, dict))
78 | self.assertTrue('SH000001' in stocks)
79 |
80 | def test_get_report(self):
81 | quote = self.client.get_report('SH000001')
82 | self.assertTrue(isinstance(quote, dict))
83 | self.assertTrue(isinstance(quote['price'], float))
84 |
85 | def test_get_reports(self):
86 | stocks = self.client.get_reports('SH000001', 'KeyError')
87 | self.assertTrue(isinstance(stocks, dict))
88 | self.assertTrue('SH000001' in stocks)
89 | self.assertFalse('KeyError' in stocks)
90 |
91 | def test_put_then_get_minute(self):
92 | path = os.path.dirname(os.path.realpath(__file__))
93 | data = numpy.load(os.path.join(path, 'minute.npy'))
94 |
95 | symbol = 'SH999999'
96 |
97 | today = datetime.today()
98 | for row in data:
99 | day = datetime.fromtimestamp(int(row['time']))
100 | t = time.mktime((today.year, today.month, today.day,
101 | day.hour, day.minute, 0, 0, 0, 0))
102 |
103 | row['time'] = int(t)
104 |
105 | self.client.put_minute(symbol, data)
106 |
107 | ret = self.client.get_minute(symbol, int(time.time()))
108 | self.assertEqual(data['price'].tolist(), ret['price'].tolist())
109 |
110 |
111 | if __name__ == '__main__':
112 | unittest.main()
113 |
--------------------------------------------------------------------------------
/datafeed/dividend.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 |
6 | import datetime
7 | import numpy as np
8 |
9 | from pandas import DataFrame
10 | from pandas import TimeSeries
11 | from pandas import DatetimeIndex
12 |
13 |
14 | class Dividend(object):
15 |
16 | def __init__(self, div):
17 | """
18 | Paramaters:
19 | div: numpy dividend data.
20 | """
21 | assert div['time'] > 0
22 | assert abs(div['split']) > 0 or \
23 | abs(div['purchase']) > 0 or \
24 | abs(div['dividend']) > 0
25 |
26 | self._npd = div
27 |
28 | def adjust(self, frame):
29 | '''Adjust price, volume of quotes data.
30 |
31 | Paramaters
32 | ----------
33 | frame: DataFrame of OHLCs.
34 | '''
35 | if self.ex_date <= frame.index[0].date(): # no adjustment needed
36 | return True
37 |
38 | if self.ex_date > datetime.date.today(): # not mature
39 | return True
40 |
41 | self._divide(frame)
42 | self._split(frame)
43 |
44 | def _divide(self, frame):
45 | """divided close price to adjclose column
46 |
47 | WARNING
48 | =======
49 | frame should be chronological ordered otherwise wrong backfill.
50 | """
51 | if self.cash_afterward == 0:
52 | return
53 |
54 | cashes = [self.cash_afterward, 0.0]
55 | adj_day = self.ex_date - datetime.timedelta(days=1)
56 | indexes = []
57 | indexes.append(self.d2t(adj_day))
58 | indexes.append(self.d2t(datetime.date.today()))
59 |
60 | cashes = TimeSeries(cashes, index=indexes)
61 | ri_cashes = cashes.reindex(frame.index, method='backfill')
62 |
63 | frame['adjclose'] = frame['adjclose'] - ri_cashes
64 |
65 | def _split(self, frame):
66 | if self.share_afterward == 1:
67 | return
68 |
69 | splits = [self.share_afterward, 1.0]
70 | adj_day = self.ex_date - datetime.timedelta(days=1)
71 | indexes = []
72 | indexes.append(self.d2t(adj_day))
73 | indexes.append(self.d2t(datetime.date.today()))
74 |
75 | splits = TimeSeries(splits, index=indexes)
76 | ri_splits = splits.reindex(frame.index, method='backfill')
77 |
78 | frame['adjclose'] = frame['adjclose'] / ri_splits
79 |
80 | @property
81 | def ex_date(self):
82 | return datetime.date.fromtimestamp(self._npd['time'])
83 |
84 | @property
85 | def cash_afterward(self):
86 | return self._npd['dividend'] - self._npd['purchase'] * self._npd['purchase_price']
87 |
88 | @property
89 | def share_afterward(self):
90 | return 1 + self._npd['purchase'] + self._npd['split']
91 |
92 | def d2t(self, date):
93 | return datetime.datetime.combine(date, datetime.time())
94 |
95 |
96 | def adjust(y, divs, capitalize=False):
97 | """Return fully adjusted OHLCs data base on dividends
98 |
99 | Paramaters:
100 | y: numpy
101 | divs: numpy of dividends
102 |
103 | Return:
104 | DataFrame objects
105 | """
106 | index = DatetimeIndex([datetime.datetime.fromtimestamp(v) for v in y['time']])
107 | y = DataFrame.from_records(y, index=index, exclude=['time'])
108 | y['adjclose'] = y['close']
109 |
110 | for div in divs:
111 | if div['split'] + div['purchase'] + div['dividend'] == 0:
112 | continue
113 | d = Dividend(div)
114 | d.adjust(y)
115 |
116 | factor = y['adjclose'] / y['close']
117 | frame = y.copy()
118 | frame['open'] = frame['open'] * factor
119 | frame['high'] = frame['high'] * factor
120 | frame['low'] = frame['low'] * factor
121 | frame['close'] = frame['close'] * factor
122 | frame['volume'] = frame['volume'] * (1 / factor)
123 |
124 | if capitalize:
125 | columns = [k.capitalize() for k in frame.columns]
126 | columns[-1] = 'Adjusted'
127 | frame.columns = columns
128 | del(frame['Amount'])
129 | return frame
130 |
--------------------------------------------------------------------------------
/datafeed/providers/http_fetcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import functools
4 | import logging
5 | import sys
6 |
7 | from tornado.curl_httpclient import CurlAsyncHTTPClient as AsyncHTTPClient
8 | from tornado import ioloop
9 |
10 |
11 | try:
12 | from itertools import izip_longest
13 | except ImportError:
14 | """Python 2.5 support"""
15 | from itertools import izip, chain, repeat
16 | if sys.version_info[:2] < (2,6):
17 | def izip_longest(*args, **kwds):
18 | # izip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
19 | fillvalue = kwds.get('fillvalue')
20 | def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
21 | yield counter() # yields the fillvalue, or raises IndexError
22 | fillers = repeat(fillvalue)
23 | iters = [chain(it, sentinel(), fillers) for it in args]
24 | try:
25 | for tup in izip(*iters):
26 | yield tup
27 | except IndexError:
28 | pass
29 |
30 |
31 | __all__ = ['Fetcher', 'DayFetcher', 'zip_slice']
32 |
33 |
34 | class Fetcher(object):
35 | _MAX_CLIENTS = 10
36 |
37 | def __init__(self, base_url=None, time_out=20, max_clients=10):
38 | assert isinstance(base_url, basestring)
39 | assert isinstance(time_out, int)
40 | assert isinstance(max_clients, int)
41 | assert max_clients <= self._MAX_CLIENTS
42 |
43 | self._base_url = base_url
44 | self._time_out = time_out
45 | self._max_clients = max_clients
46 |
47 | self._io_loop = ioloop.IOLoop()
48 |
49 | self.queue_len = 0
50 |
51 | def fetch(self, *args, **kwargs):
52 | ret = []
53 | if not len(args) > 0:
54 | return ret
55 |
56 | urls = self._fetching_urls(*args, **kwargs)
57 |
58 | http = AsyncHTTPClient(self._io_loop)
59 | i = 0
60 | for url in urls:
61 | callback = self._callback(args[i], **kwargs)
62 | logging.info("start urlfetch %s" % url)
63 | http.fetch(url, callback)
64 | self.queue_len = self.queue_len + 1
65 | i += 1
66 |
67 | self._io_loop.start()
68 | return ret
69 |
70 | def _fetching_urls(self, *args, **kwargs):
71 | raise NotImplementedError()
72 |
73 | def _slice(self, iterable, fillvalue=None):
74 | return zip_slice(self._request_size, iterable, fillvalue)
75 |
76 | def _callback(self, security, **kwargs):
77 | pass
78 |
79 | def stop(self):
80 | if self.queue_len == 0:
81 | self._io_loop.stop()
82 |
83 |
84 | class DayFetcher(Fetcher):
85 |
86 | def _fetching_urls(self, *args, **kwargs):
87 | assert 'start_date' in kwargs
88 | assert 'end_date' in kwargs
89 |
90 | urls = (self._make_url(s, **kwargs) for s in args)
91 | return urls
92 |
93 | def _make_url(self, security, **kwargs):
94 | raise NotImplementedError()
95 |
96 | def _callback(self, security, **kwargs):
97 | if 'callback' in kwargs:
98 | callback = kwargs['callback']
99 | else:
100 | callback = None
101 |
102 | return functools.partial(self._handle_request,
103 | callback,
104 | security)
105 |
106 | def _handle_request(self, callback, security, response):
107 | try:
108 | self.queue_len = self.queue_len - 1
109 |
110 | if response.error:
111 | logging.error(response.error)
112 | else:
113 | callback(security, response.body)
114 | except StandardError:
115 | logging.error("Wrong data format.")
116 | finally:
117 | self.stop()
118 |
119 |
120 | def zip_slice(len_each, iterable, fillvalue=None):
121 | "zip_slice(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
122 | assert isinstance(len_each, int)
123 | args = [iter(iterable)] * len_each
124 | return izip_longest(fillvalue=fillvalue, *args)
125 |
--------------------------------------------------------------------------------
/datafeed/providers/nasdaq.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | NASDAQ && NYSE stocks list.
5 | http://www.nasdaq.com/screening/companies-by-industry.aspx
6 | """
7 | import csv
8 | import functools
9 | import logging
10 | import sys
11 |
12 | from cStringIO import StringIO
13 |
14 | from tornado import httpclient
15 | from tornado import ioloop
16 |
17 | from datafeed.exchange import *
18 | from datafeed.quote import *
19 | from datafeed.providers.http_fetcher import *
20 | from datafeed.utils import json_decode
21 |
22 | __all__ = ['NasdaqSecurity',
23 | 'NasdaqList', 'NasdaqListFetcher']
24 |
25 |
26 | class NasdaqSecurity(Security):
27 | pass
28 |
29 | class NasdaqList(SecurityList):
30 |
31 | # Tags format defined by NasdaqReportFetcher which is:
32 | # "sl1d1t1c1ohgv"
33 | # FIXME: Nasdaq quotes became N/A during session after hours.
34 | _DEFINITIONS = (
35 | ("symbol", lambda x: x.strip()),
36 | ("name", str),
37 | ("price", float),
38 | ("market_cap", str),
39 | ("ipo_year", str),
40 | ("sector", str),
41 | ("industry", str),
42 | ("summary", str)
43 | )
44 |
45 | def __init__(self, exchange, raw_data):
46 | raw_data.pop()
47 | assert len(raw_data) == len(self._DEFINITIONS)
48 |
49 | i = 0
50 | data = {}
51 | for conf in self._DEFINITIONS:
52 | key, callback = conf
53 | data[key] = callback(raw_data[i])
54 | i += 1
55 |
56 | security = NasdaqSecurity(exchange, data.pop('symbol'), data['name'])
57 | super(NasdaqList, self).__init__(security, data)
58 |
59 | def __repr__(self):
60 | return "%s\r\n%s" % (self.security, self.name)
61 |
62 | def __str__(self):
63 | return "%s" % (self.security, )
64 |
65 | @staticmethod
66 | def parse(exchange, rawdata):
67 | """Parse security list for specific exchange.
68 | """
69 | f = StringIO(rawdata)
70 | r = csv.reader(f)
71 | r.next()
72 | return (NasdaqList(exchange, line) for line in r)
73 |
74 |
75 | class NasdaqListFetcher(Fetcher):
76 |
77 | # Maximum number of stocks we'll batch fetch.
78 | _MAX_REQUEST_SIZE = 100
79 | _BASE_URL = "http://www.nasdaq.com/screening/companies-by-industry.aspx"
80 |
81 | def __init__(self, base_url=_BASE_URL,
82 | time_out=20, max_clients=10, request_size=100):
83 | assert request_size <= self._MAX_REQUEST_SIZE
84 |
85 | super(NasdaqListFetcher, self).__init__(base_url, time_out, max_clients)
86 | self._request_size = request_size
87 |
88 | def _fetching_urls(self, *args, **kwargs):
89 | """Making list of fetching urls from exchanges.
90 | """
91 | for arg in args:
92 | assert isinstance(arg, NYSE) \
93 | or isinstance(arg, NASDAQ) \
94 | or isinstance(arg, AMEX)
95 | return (self._make_url(arg) for arg in args)
96 |
97 | def _make_url(self, exchange):
98 | """Make url to fetch.
99 |
100 | example:
101 | http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NYSE&render=download
102 | """
103 | return "%s?exchange=%s&render=download" % (self._base_url, exchange)
104 |
105 | def _callback(self, security, **kwargs):
106 | if 'callback' in kwargs:
107 | callback = kwargs['callback']
108 | else:
109 | callback = None
110 | return functools.partial(self._handle_request,
111 | callback,
112 | security)
113 |
114 | def _handle_request(self, callback, exchange, response):
115 | try:
116 | self.queue_len = self.queue_len - 1
117 |
118 | if response.error:
119 | logging.error(response.error)
120 | else:
121 | callback(exchange, response.body)
122 | except StandardError:
123 | logging.error("Wrong data format.")
124 | finally:
125 | self.stop()
126 |
--------------------------------------------------------------------------------
/datafeed/providers/sina.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | Stocks:
5 | http://hq.sinajs.cn/list=sh600028,sz000100
6 |
7 |
8 | Indexes:
9 | http://hq.sinajs.cn/list=s_sh000001
10 |
11 |
12 | Charts:
13 | http://image.sinajs.cn/newchart/min/n/sh000001.gif
14 | http://image.sinajs.cn/newchart/daily/n/sh000001.gif
15 | """
16 |
17 | import functools
18 | import logging
19 | import sys
20 |
21 | from dateutil import parser
22 |
23 | from datafeed.exchange import *
24 | from datafeed.quote import *
25 | from datafeed.providers.http_fetcher import Fetcher
26 | from tornado.escape import json_decode
27 |
28 | __all__ = ['SinaSecurity', 'SinaReport', 'SinaReportFetcher']
29 |
30 | # Sina finance
31 | _EXCHANGES = {
32 | "HK": 'HGK', #Hongkong
33 | "SH": "SHA", #Shanghai
34 | "SZ": "SHE", #ShenZhen
35 | "NASDAQ": "NASDAQ", # NASDAQ
36 | }
37 |
38 |
39 | class SinaSecurity(Security):
40 | def __str__(self):
41 | """Symbol with exchange abbr suffix"""
42 | return "%s%s" % (self._abbr, self.symbol)
43 |
44 | @property
45 | def _abbr(self):
46 | """Sina finance specific exchange abbr."""
47 | return str(self.exchange).lower()
48 |
49 | @classmethod
50 | def from_string(cls, idstr):
51 | abbr = idstr[:2]
52 | symbol = idstr[2:]
53 | exchange = cls.get_exchange_from_abbr(abbr)
54 | return cls(exchange, symbol)
55 |
56 | @classmethod
57 | def get_exchange_from_abbr(cls, abbr):
58 | """Get exchange from sina abbr."""
59 | klass = getattr(sys.modules[__name__], abbr.upper())
60 | return klass()
61 |
62 |
63 | class SinaReport(Report):
64 |
65 | # Data example:
66 | # var hq_str_sh600028="中国石化,8.64,8.64,8.68,8.71,8.58,8.68,8.69,
67 | # 27761321,240634267,11289,8.68,759700,8.67,556338,8.66,455296,8.65,
68 | # 56600,8.64,143671,8.69,341859,8.70,361255,8.71,314051,8.72,342155,8.73,
69 | # 2011-05-03,15:03:11";'''
70 | _DEFINITIONS = (
71 | ("name", str),
72 | ("open", float),
73 | ("preclose", float),
74 | ("price", float),
75 | ("high", float),
76 | ("low", float),
77 | ("bid", float),
78 | ("ask", float),
79 | ("volume", int),
80 | ("amount", float),
81 | ("bid1", int),
82 | ("bidp1", float),
83 | ("bid2", int),
84 | ("bidp2", float),
85 | ("bid3", int),
86 | ("bidp3", float),
87 | ("bid4", int),
88 | ("bidp4", float),
89 | ("bid5", int),
90 | ("bidp5", float),
91 | ("ask1", int),
92 | ("askp1", float),
93 | ("ask2", int),
94 | ("askp2", float),
95 | ("ask3", int),
96 | ("askp3", float),
97 | ("ask4", int),
98 | ("askp4", float),
99 | ("ask5", int),
100 | ("askp5", float),
101 | ("date", lambda x: parser.parse(x).date()),
102 | ("time", lambda x: parser.parse(x))
103 | )
104 |
105 | def __init__(self, security, raw_data):
106 | assert len(raw_data) == 32
107 |
108 | data = {}
109 | i = 0
110 | for conf in self._DEFINITIONS:
111 | key, callback = conf
112 | data[key] = callback(raw_data[i])
113 | i += 1
114 |
115 | super(SinaReport, self).__init__(security, data)
116 |
117 | @staticmethod
118 | def parse(rawdata):
119 | from cStringIO import StringIO
120 |
121 | f = StringIO(rawdata)
122 | return (SinaReport.parse_line(line) for line in f)
123 |
124 | @staticmethod
125 | def parse_line(line):
126 | splited = line.split('"')
127 | idstr = splited[0].split('_').pop()[:-1]
128 | s = SinaSecurity.from_string(idstr)
129 | return SinaReport(s, splited[1].split(','))
130 |
131 |
132 | class SinaReportFetcher(Fetcher):
133 |
134 | # Maximum number of stocks we'll batch fetch.
135 | _MAX_REQUEST_SIZE = 100
136 |
137 | def __init__(self, base_url='http://hq.sinajs.cn',
138 | time_out=20, max_clients=10, request_size=100):
139 | assert request_size <= self._MAX_REQUEST_SIZE
140 |
141 | super(SinaReportFetcher, self).__init__(base_url, time_out, max_clients)
142 | self._request_size = request_size
143 |
144 | def _fetching_urls(self, *args, **kwargs):
145 | ids = (str(s) for s in args)
146 | ids = self._slice(ids)
147 |
148 | return (self._make_url(filter(lambda x: x != None, i)) for i in ids)
149 |
150 | def _make_url(self, ids):
151 | return "%s/list=%s" % (self._base_url, ','.join(ids))
152 |
153 | def _callback(self, security, **kwargs):
154 | if 'callback' in kwargs:
155 | callback = kwargs['callback']
156 | else:
157 | callback = None
158 | return functools.partial(self._handle_request, callback)
159 |
160 | def _handle_request(self, callback, response):
161 | try:
162 | self.queue_len = self.queue_len - 1
163 |
164 | if response.error:
165 | logging.error(response.error)
166 | else:
167 | callback(response.body)
168 | except StandardError:
169 | logging.error("Wrong data format.")
170 | finally:
171 | self.stop()
172 |
--------------------------------------------------------------------------------
/datafeed/exchange.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''Basic market infos.
3 |
4 | #TBD: extract currency from major exchanges.
5 | '''
6 | import sys
7 | import time
8 |
9 | from datetime import datetime
10 |
11 |
12 | __all__ = ['StockExchange',
13 | 'AMEX', 'LON', 'NASDAQ',
14 | 'NYSE', 'HK', 'SH', 'SZ', 'TYO',
15 | 'YahooNA', 'Security']
16 |
17 |
18 | class StockExchange(object):
19 | '''Major stock exchanges, see:
20 | - http://en.wikipedia.org/wiki/Stock_exchange
21 | - http://www.wikinvest.com/wiki/List_of_Stock_Exchanges
22 | '''
23 | _pre_market_session = None
24 | _market_session = None
25 | _market_break_session = None
26 |
27 | _instances = dict()
28 |
29 | def __new__(cls, *args, **kwargs):
30 | klass = cls.__name__
31 | if not cls._instances.has_key(klass):
32 | cls._instances[klass] = super(StockExchange, cls).__new__(
33 | cls, *args, **kwargs)
34 | return cls._instances[klass]
35 |
36 | @classmethod
37 | def change_time(cls, hour, minute, day=None, now=None):
38 | if now:
39 | day = datetime.fromtimestamp(now)
40 | if not day:
41 | day = datetime.today()
42 | t = time.mktime((day.year, day.month, day.day,
43 | hour, minute, 0, 0, 0, 0))
44 |
45 | return t
46 |
47 | @classmethod
48 | def pre_open_time(cls, **kwargs):
49 | return cls.change_time(cls._pre_market_session[0][0],
50 | cls._pre_market_session[0][1],
51 | **kwargs)
52 |
53 | @classmethod
54 | def open_time(cls, **kwargs):
55 | return cls.change_time(cls._market_session[0][0],
56 | cls._market_session[0][1],
57 | **kwargs)
58 |
59 | @classmethod
60 | def break_time(cls, **kwargs):
61 | return cls.change_time(cls._market_break_session[0][0],
62 | cls._market_break_session[0][1],
63 | **kwargs)
64 |
65 | @classmethod
66 | def close_time(cls, **kwargs):
67 | return cls.change_time(cls._market_session[1][0],
68 | cls._market_session[1][1],
69 | **kwargs)
70 |
71 | def __repr__(self):
72 | return self.__class__.__name__
73 | __str__ = __repr__
74 |
75 |
76 | class AMEX(StockExchange):
77 | name = 'American Stock Exchange' # NYSE Amex Equities
78 | currency = ('$', 'USD')
79 | timezone = 'US/Eastern'
80 | _market_session = ((9, 30), (16, 0))
81 |
82 |
83 | class HK(StockExchange):
84 | name = 'Hong Kong Stock Exchange'
85 | currency = ('$', 'HKD')
86 | timezone = 'Asia/Shanghai'
87 | _pre_market_session = ((9, 30), (9, 50))
88 | _market_session = ((10, 0), (16, 0))
89 | _market_break_session = ((12, 0), (13, 30))
90 |
91 |
92 | class LON(StockExchange):
93 | name = 'London Stock Exchange'
94 | currency = ('$', 'GBX')
95 | timezone = 'Europe/London'
96 | _market_session = ((9, 0), (17, 0))
97 |
98 |
99 | class NASDAQ(StockExchange):
100 | name = 'NASDAQ Stock Exchange'
101 | currency = ('$', 'USD')
102 | timezone = 'US/Eastern'
103 | _market_session = ((9, 30), (16, 0))
104 |
105 |
106 | class NYSE(StockExchange):
107 | name = 'New York Stock Exchange'
108 | currency = ('$', 'USD')
109 | timezone = 'US/Eastern'
110 | _market_session = ((9, 30), (16, 0))
111 |
112 |
113 | class NYSEARCA(NYSE):
114 | pass
115 |
116 |
117 | class SH(StockExchange):
118 | name = 'Shanghai Stock Exchange'
119 | currency = ('¥', 'CNY')
120 | timezone = 'Asia/Shanghai'
121 | _pre_market_session = ((9, 15), (9, 25))
122 | _market_session = ((9, 30), (15, 0))
123 | _market_break_session = ((11, 30), (13, 0))
124 |
125 | # Daily minute data count.
126 | market_minutes = 242
127 |
128 |
129 | class SZ(SH):
130 | timezone = 'Asia/Shanghai'
131 | name = 'Shenzhen Stock Exchange'
132 |
133 |
134 | class TYO(StockExchange):
135 | name = 'Tokyo Stock Exchange'
136 | currency = ('¥', 'JPY')
137 | timezone = 'Asia/Tokyo'
138 | _market_session = ((9, 0), (15, 0))
139 |
140 |
141 | class YahooNA(StockExchange):
142 | name = 'Exchange N/A for Yahoo!'
143 | currency = ('$', 'USD') #default to usd
144 | timezone = "GMT" #default to GMT
145 |
146 | def __str__(self):
147 | return ""
148 |
149 | class Security(object):
150 | """Finance securities includes:
151 | - stocks
152 | - stock indexes
153 | - funds/mutual funds
154 | - options
155 | - bonds
156 | """
157 | modules = sys.modules[__name__]
158 |
159 | __slots__ = ['exchange', 'symbol', 'name']
160 |
161 | def __init__(self, exchange, symbol, name=None):
162 | assert isinstance(exchange, StockExchange), "Wrong exchange."
163 | self.exchange = exchange
164 | self.symbol = symbol
165 | self.name = name
166 |
167 | def __eq__(self, other):
168 | return self.exchange == other.exchange and \
169 | self.symbol == other.symbol
170 |
171 | def __getstate__(self):
172 | return self.exchange, self.symbol
173 |
174 | def __setstate__(self, state):
175 | self.exchange, self.symbol = state
176 |
177 | def __repr__(self):
178 | args = []
179 | args.append("%s()" % self.exchange)
180 | args.append("'%s'" % self.symbol)
181 | if self.name:
182 | args.append("'%s'" % self.name)
183 | return "%s(%s)" % (self.__class__.__name__,
184 | ', '.join(args))
185 |
186 | def __str__(self):
187 | """Symbol with exchange abbr (pre)suffix.
188 | """
189 | return "%s:%s" % (self._abbr, self.symbol)
190 |
191 | @classmethod
192 | def from_security(cls, security):
193 | """Helper method for convert from different services adapter."""
194 | assert isinstance(security, Security)
195 | return cls(security.exchange,
196 | security.symbol,
197 | security.name)
198 |
199 | @classmethod
200 | def from_abbr(cls, abbr, symbol, name=None):
201 | ex = getattr(cls.modules, abbr)
202 | return cls(ex(), symbol, name)
203 |
204 | @property
205 | def _abbr(self):
206 | """Symbol with exchange abbr suffix.
207 | """
208 | return str(self.exchange)
209 |
--------------------------------------------------------------------------------
/datafeed/providers/tests/test_yahoo.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import datetime
4 | import os
5 | import unittest
6 |
7 | from datafeed.exchange import *
8 | from datafeed.providers.yahoo import *
9 |
10 |
11 | class YahooSecurityTest(unittest.TestCase):
12 |
13 | def test_abbr_sha(self):
14 | s = YahooSecurity(SH(), '600028')
15 | self.assertEqual(s._abbr, 'SS')
16 |
17 | def test_abbr_she(self):
18 | s = YahooSecurity(SZ(), '000001')
19 | self.assertEqual(s._abbr, 'SZ')
20 |
21 | def test_yahoo_id(self):
22 | s = YahooSecurity(SH(), '600028')
23 | self.assertEqual(str(s), '600028.SS')
24 |
25 | def test_abbr_to_exchange(self):
26 | ex = YahooSecurity.get_exchange_from_abbr("SS")
27 | self.assertEqual(ex, SH())
28 |
29 | def test_ss_abbr(self):
30 | ret = YahooSecurity.from_string('600028.SS')
31 | self.assertEqual(ret.exchange, SH())
32 | self.assertEqual(ret.symbol, '600028')
33 | self.assertEqual(str(ret), '600028.SS')
34 |
35 |
36 | class YahooReportTest(unittest.TestCase):
37 | _RAW_DATA = '''"GOOG",533.89,"5/3/2011","4:00pm",-4.67,537.13,542.01,529.63,2081574
38 | "AAPL",348.20,"5/3/2011","4:00pm",+1.92,347.91,349.89,345.62,11198607
39 | "600028.SS",8.58,"5/4/2011","1:47am",-0.10,8.64,8.67,8.55,23045288'''
40 |
41 |
42 | def test_yahoo_report(self):
43 | ret = YahooReport.parse(self._RAW_DATA)
44 | i = 0
45 | for r in ret:
46 | if i == 0:
47 | self.assertEqual(r.security.exchange, YahooNA())
48 | self.assertEqual(r.security.symbol, 'GOOG')
49 | self.assertEqual(str(r.date), "2011-05-03")
50 | self.assertEqual(r.time.hour, 16)
51 | self.assertEqual(r.time.minute, 0)
52 | self.assertEqual(r.price, 533.89)
53 | self.assertEqual(r.change, -4.67)
54 | self.assertEqual(r.open, 537.13)
55 | self.assertEqual(r.high, 542.01)
56 | self.assertEqual(r.low, 529.63)
57 | self.assertEqual(r.volume, 2081574)
58 |
59 | if i == 2:
60 | self.assertEqual(r.security.exchange, SH())
61 | self.assertEqual(r.security.symbol, '600028')
62 |
63 | i += 1
64 |
65 | self.assertEqual(i, 3)
66 |
67 |
68 | class YahooDayTest(unittest.TestCase):
69 | def test_parse_day(self):
70 | path = os.path.dirname(os.path.realpath(__file__))
71 | f = open(os.path.join(path, 'yahoo_tables.csv'), 'r')
72 | data = f.read()
73 | f.close()
74 |
75 | security = YahooSecurity(YahooNA(), 'GOOG')
76 | iters = YahooDay.parse(security, data)
77 | i = 0
78 | for ohlc in iters:
79 | if i == 0:
80 | # 2011-05-03,537.13,542.01,529.63,533.89,2081500,533.89
81 | self.assertEqual(str(ohlc.date), "2011-05-03")
82 | self.assertEqual(ohlc.open, 537.13)
83 | self.assertEqual(ohlc.high, 542.01)
84 | self.assertEqual(ohlc.low, 529.63)
85 | self.assertEqual(ohlc.close, 533.89)
86 | self.assertEqual(ohlc.volume, 2081500)
87 | self.assertEqual(ohlc.adjclose, 533.89)
88 | i += 1
89 |
90 |
91 | class YahooReportFetcherTest(unittest.TestCase):
92 |
93 | def test_init(self):
94 | f = YahooReportFetcher()
95 | self.assertEqual(f._base_url, 'http://download.finance.yahoo.com/d/quotes.csv')
96 |
97 | def test_init_with_arguments(self):
98 | f = YahooReportFetcher(time_out=10, request_size=50)
99 | self.assertEqual(f._time_out, 10)
100 | self.assertEqual(f._request_size, 50)
101 |
102 | def test_init_with_wrong_arguments(self):
103 | self.assertRaises(AssertionError,
104 | YahooReportFetcher,
105 | request_size=200)
106 |
107 | def test_fetch(self):
108 | f = YahooReportFetcher(request_size=2)
109 | s1 = YahooSecurity(YahooNA(), 'GOOG')
110 | s2 = YahooSecurity(YahooNA(), 'AAPL')
111 | s3 = YahooSecurity(SH(), '000001')
112 |
113 | def callback(body):
114 | qs = YahooReport.parse(body)
115 | for quote in qs:
116 | if quote.security == s3:
117 | # something must wrong if SSE Composite Index goes down to 100
118 | self.assertTrue(quote.price > 100)
119 |
120 | f.fetch(s1, s2, s3,
121 | callback=callback)
122 |
123 | class YahooDayFetcherTest(unittest.TestCase):
124 |
125 | def test_init(self):
126 | f = YahooDayFetcher()
127 | self.assertEqual(f._base_url, 'http://ichart.finance.yahoo.com/table.csv')
128 | self.assertEqual(f._time_out, 20)
129 | self.assertEqual(f._max_clients, 10)
130 |
131 | def test_init_with_wrong_arguments(self):
132 | self.assertRaises(AssertionError,
133 | YahooReportFetcher,
134 | max_clients=20)
135 |
136 | def test_fetch(self):
137 | f = YahooDayFetcher()
138 | s1 = YahooSecurity(YahooNA(), 'GOOG')
139 | s2 = YahooSecurity(YahooNA(), 'AAPL')
140 |
141 | def callback(security, body):
142 | iters = YahooDay.parse(security, body)
143 | i = 0
144 | for ohlc in iters:
145 | self.assertTrue(ohlc.security in (s1, s2))
146 | if i == 0 and ohlc.security == s1:
147 | self.assertEqual(str(ohlc.date), "2011-04-28")
148 | self.assertEqual(ohlc.open, 538.06)
149 | self.assertEqual(ohlc.high, 539.25)
150 | self.assertEqual(ohlc.low, 534.08)
151 | self.assertEqual(ohlc.close, 537.97)
152 | self.assertEqual(ohlc.volume, 2037400.0)
153 |
154 | i += 1
155 |
156 | start_date = datetime.datetime.strptime("2011-04-01", "%Y-%m-%d").date()
157 | end_date = datetime.datetime.strptime("2011-04-28", "%Y-%m-%d").date()
158 | f.fetch(s1, s2,
159 | callback=callback,
160 | start_date=start_date,
161 | end_date=end_date)
162 |
163 | class YahooNewsFetcherTest(unittest.TestCase):
164 | def test_fetch(self):
165 | f = YahooNewsFetcher()
166 | s1 = YahooSecurity(YahooNA(), 'GOOG')
167 | s2 = YahooSecurity(YahooNA(), 'AAPL')
168 | s3 = YahooSecurity(SH(), '000001')
169 |
170 | def callback(security, response):
171 | self.assertTrue(response.body.startswith(' CR LF
101 | $ CR LF
102 | CR LF
103 | ...
104 | $ CR LF
105 | CR LF
106 |
107 |
108 | See the following example:
109 |
110 | *3
111 | $3
112 | SET
113 | $5
114 | mykey
115 | $7
116 | myvalue
117 | """
118 | return self._execute_command(args[0], args[-1], self._build_data(*args))
119 |
120 | def _build_data(self, *args):
121 | cmds = ('$%s\r\n%s\r\n' % (len(arg), arg) for arg in args)
122 | return '*%s\r\n%s' % (len(args), ''.join(cmds))
123 |
124 | def _execute_command(self, command, format, data):
125 | self.send(data)
126 | return self._parse_response(command, format)
127 |
128 | def send(self, data):
129 | self.ensure_connected()
130 | try:
131 | self._sock.sendall(data)
132 | except socket.error, e:
133 | if self.reconnect():
134 | self._sock.sendall(data)
135 | else:
136 | raise StandardError("Error %s while writing to socket. %s." % \
137 | e.args)
138 |
139 | def _parse_response(self, command, format):
140 | response = self.read()[:-2] # strip last two characters (\r\n)
141 | if not response:
142 | self.disconnect()
143 | raise StandardError("Socket closed on remote end")
144 |
145 | # server returned a null value
146 | if response in ('$-1', '*-1'):
147 | return None
148 | reply_type, response = response[0], response[1:]
149 |
150 | # server returned an error
151 | if reply_type == '-':
152 | if response.startswith('ERR '):
153 | response = response[4:]
154 | raise Exception(response)
155 | # single value
156 | elif reply_type == '+':
157 | return response
158 | # integer value
159 | elif reply_type == ':':
160 | return int(response)
161 | # bulk response
162 | elif reply_type == '$':
163 | length = int(response)
164 | response = length and self.read(length) or ''
165 | self.read(2) # read the \r\n delimiter
166 |
167 | if format == 'json':
168 | return json_decode(response)
169 | elif format == 'npy':
170 | qdata = StringIO(response)
171 | return np.load(qdata)
172 | else:
173 | return response
174 |
175 | raise Exception("Unknown response type for: %s" % command)
176 |
177 | def auth(self):
178 | self.execute_command('AUTH', self._password, 'plain')
179 |
180 | def get_mtime(self):
181 | return self.execute_command('GET_MTIME', 'plain')
182 |
183 | def get_list(self, match='', format='json'):
184 | return self.execute_command('GET_LIST', match, format)
185 |
186 | def get_report(self, symbol, format='json'):
187 | return self.execute_command('GET_REPORT', symbol, format)
188 |
189 | def get_reports(self, *args, **kwargs):
190 | format = 'json'
191 | if 'format' in kwargs:
192 | format = kwargs['format']
193 | args = args + (format,)
194 | return self.execute_command('GET_REPORTS', *args)
195 |
196 | def get_minute(self, symbol, timestamp=0, format='npy'):
197 | """Get minute history data.
198 |
199 | timestamp: 0 for last day data.
200 | """
201 | assert isinstance(timestamp, int)
202 | return self.execute_command('GET_MINUTE', symbol, str(timestamp), format)
203 |
204 | def get_1minute(self, symbol, date, format='npy'):
205 | """Get minute history data.
206 |
207 | date: specific day to retrieve.
208 | """
209 | return self.execute_command('GET_1MINUTE', symbol, date, format)
210 |
211 | def get_5minute(self, symbol, date, format='npy'):
212 | """Get minute history data.
213 |
214 | date: specific day to retrieve.
215 | """
216 | return self.execute_command('GET_5MINUTE', symbol, date, format)
217 |
218 | def get_day(self, symbol, length_or_date, format='npy'):
219 | assert isinstance(length_or_date, int) or len(length_or_date) == 8
220 | return self.execute_command('GET_DAY', symbol, str(length_or_date), format)
221 |
222 | def get_dividend(self, symbol, format='npy'):
223 | return self.execute_command('GET_DIVIDEND', symbol, format)
224 |
225 | def get_fin(self, symbol, format='npy'):
226 | return self.execute_command('GET_FIN', symbol, format)
227 |
228 | def get_sector(self, name, format='json'):
229 | return self.execute_command('GET_SECTOR', name, format)
230 |
231 | def get_stats(self):
232 | return self.execute_command('GET_STATS', 'json')
233 |
234 | def put_reports(self, adict):
235 | assert isinstance(adict, dict)
236 | data = zlib.compress(marshal.dumps(adict))
237 | return self.execute_command('PUT_REPORTS', data, 'zip')
238 |
239 | def put_minute(self, symbol, rawdata):
240 | memfile = StringIO()
241 | np.save(memfile, rawdata)
242 | return self.execute_command('PUT_MINUTE', symbol, memfile.getvalue(), 'npy')
243 |
244 | def put_1minute(self, symbol, rawdata):
245 | memfile = StringIO()
246 | np.save(memfile, rawdata)
247 | return self.execute_command('PUT_1MINUTE', symbol, memfile.getvalue(), 'npy')
248 |
249 | def put_5minute(self, symbol, rawdata):
250 | memfile = StringIO()
251 | np.save(memfile, rawdata)
252 | return self.execute_command('PUT_5MINUTE', symbol, memfile.getvalue(), 'npy')
253 |
254 | def put_day(self, symbol, rawdata):
255 | memfile = StringIO()
256 | np.save(memfile, rawdata)
257 | return self.execute_command('PUT_DAY', symbol, memfile.getvalue(), 'npy')
258 |
259 | def archive_minute(self):
260 | return self.execute_command('ARCHIVE_MINUTE')
261 |
--------------------------------------------------------------------------------
/datafeed/tests/test_dividend.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import unittest
4 |
5 | import datetime
6 | import numpy as np
7 | import time
8 |
9 | from pandas import DataFrame, lib
10 | from pandas import TimeSeries
11 |
12 | from datafeed.datastore import Day
13 | from datafeed.dividend import Dividend, adjust
14 |
15 |
16 | def date2unixtime(date):
17 | return int(time.mktime(date.timetuple()))
18 |
19 |
20 | class DividendTest(unittest.TestCase):
21 | dtype = [('time', ' 0:
70 | symbol, abbr = idstr.split('.')
71 | exchange = cls.get_exchange_from_abbr(abbr)
72 | else:
73 | symbol = idstr
74 | # US, Japan, Lodon exchnages on Yahoo! finance have no suffix
75 | exchange = YahooNA()
76 | return cls(exchange, symbol)
77 |
78 | @classmethod
79 | def get_exchange_from_abbr(cls, abbr):
80 | """get exchange from yahoo abbr"""
81 | ex = _EXCHANGES[abbr]
82 | ex_cls = getattr(sys.modules[__name__], ex)
83 | return ex_cls()
84 |
85 |
86 | class YahooReport(Report):
87 |
88 | # Tags format defined by YahooReportFetcher which is:
89 | # "sl1d1t1c1ohgv"
90 | # FIXME: Yahoo quotes became N/A during session after hours.
91 | _DEFINITIONS = (
92 | ("symbol", str),
93 | ("price", float),
94 | ("date", lambda x: parser.parse(x).date()),
95 | ("time", parser.parse),
96 | ("change", float),
97 | ("open", float),
98 | ("high", float),
99 | ("low", float),
100 | ("volume", float),
101 | )
102 |
103 | def __init__(self, raw_data):
104 | assert len(raw_data) == len(self._DEFINITIONS)
105 |
106 | i = 0
107 | data = {}
108 | for conf in self._DEFINITIONS:
109 | key, callback = conf
110 | data[key] = callback(raw_data[i])
111 | i += 1
112 |
113 | security = YahooSecurity.from_string(data.pop('symbol'))
114 | super(YahooReport, self).__init__(security, data)
115 |
116 | @staticmethod
117 | def parse(rawdata):
118 | f = StringIO(rawdata)
119 | r = csv.reader(f)
120 | return (YahooReport(line) for line in r)
121 |
122 |
123 | class YahooDay(Day):
124 |
125 | _DEFINITIONS = (
126 | ("date", lambda x: parser.parse(x).date()),
127 | ("open", float),
128 | ("high", float),
129 | ("low", float),
130 | ("close", float),
131 | ("volume", float),
132 | ("adjclose", float))
133 |
134 | def __init__(self, security, raw_data):
135 | assert len(raw_data) == len(self._DEFINITIONS)
136 |
137 | data = {}
138 | i = 0
139 | for conf in self._DEFINITIONS:
140 | data[conf[0]] = conf[1](raw_data[i])
141 | i += 1
142 |
143 | super(YahooDay, self).__init__(security, data)
144 |
145 | @staticmethod
146 | def parse(security, rawdata):
147 | f = StringIO(rawdata)
148 | r = csv.reader(f)
149 | r.next() # skip header
150 | return (YahooDay(security, line) for line in r)
151 |
152 |
153 | class YahooReportFetcher(Fetcher):
154 |
155 | # Live quotes tags format,
156 | # consistent with downloads link on the web page.
157 | _FORMAT = "sl1d1t1c1ohgv"
158 |
159 | # Maximum number of stocks we'll batch fetch.
160 | _MAX_REQUEST_SIZE = 100
161 |
162 | def __init__(self, base_url='http://download.finance.yahoo.com/d/quotes.csv',
163 | time_out=20, max_clients=10, request_size=100):
164 | assert request_size <= self._MAX_REQUEST_SIZE
165 |
166 | super(YahooReportFetcher, self).__init__(base_url, time_out, max_clients)
167 | self._request_size = request_size
168 |
169 | def _fetching_urls(self, *args, **kwargs):
170 | ids = (str(s) for s in args)
171 | ids = self._slice(ids)
172 |
173 | return (self._make_url(filter(lambda x: x != None, i)) for i in ids)
174 |
175 | def _make_url(self, ids):
176 | """Make url to fetch.
177 |
178 | example:
179 | http://download.finance.yahoo.com/d/quotes.csv?s=GOOG+AAPL+600028.SS&f=sl1d1t1c1ohgv&e=.csv
180 | """
181 | return "%s?s=%s&f=%s&e=.csv" % (self._base_url, '+'.join(ids), self._FORMAT)
182 |
183 | def _callback(self, security, **kwargs):
184 | if 'callback' in kwargs:
185 | callback = kwargs['callback']
186 | else:
187 | callback = None
188 | return functools.partial(self._handle_request, callback)
189 |
190 | def _handle_request(self, callback, response):
191 | try:
192 | self.queue_len = self.queue_len - 1
193 |
194 | if response.error:
195 | logging.error(response.error)
196 | else:
197 | callback(response.body)
198 | except StandardError:
199 | logging.error("Wrong data format.")
200 | finally:
201 | self.stop()
202 |
203 |
204 | class YahooDayFetcher(DayFetcher):
205 |
206 | def __init__(self, base_url='http://ichart.finance.yahoo.com/table.csv',
207 | time_out=20, max_clients=10):
208 | super(YahooDayFetcher, self).__init__(base_url, time_out, max_clients)
209 |
210 | def _make_url(self, security, **kwargs):
211 | """Make url to fetch.
212 |
213 | Parameters:
214 | s Stock Ticker (for example, MSFT)
215 | a Start Month (0-based; 0=January, 11=December)
216 | b Start Day
217 | c Start Year
218 | d End Month (0-based; 0=January, 11=December)
219 | e End Day
220 | f End Year
221 | g Always use the letter d
222 |
223 | example:
224 | http://ichart.finance.yahoo.com/table.csv?s=GOOG&d=4&e=4&f=2011&g=d&a=7&b=19&c=2004&ignore=.csv
225 | """
226 | url_format = "%s?s=%s&g=d&a=%s&b=%s&c=%s"
227 | url_format += "&d=%s&e=%s&f=%s"
228 |
229 | start_date = kwargs['start_date']
230 | end_date = kwargs['end_date']
231 | url = url_format % (self._base_url, str(security),
232 | start_date.month - 1, start_date.day, start_date.year,
233 | end_date.month - 1, end_date.day, end_date.year)
234 | return url
235 |
236 |
237 | class YahooNewsFetcher(Fetcher):
238 | _BASE_URL = "http://feeds.finance.yahoo.com/rss/2.0/headline"
239 |
240 | # Maximum number of stocks we'll batch fetch.
241 | _MAX_REQUEST_SIZE = 100
242 |
243 | def __init__(self, base_url=_BASE_URL, time_out=10, max_clients=5):
244 | super(YahooNewsFetcher, self).__init__(base_url, time_out, max_clients)
245 |
246 | def _fetching_urls(self, *args, **kwargs):
247 | return (self._make_url(str(security)) for security in args)
248 |
249 | def _make_url(self, symbol):
250 | """Make url to fetch.
251 |
252 | example:
253 | http://feeds.finance.yahoo.com/rss/2.0/headline?s=yhoo®ion=US&lang=en-US
254 | """
255 | return "%s?s=%s®ion=US&lang=en-US" % (self._base_url, symbol)
256 |
257 | def _callback(self, security, **kwargs):
258 | if 'callback' in kwargs:
259 | callback = kwargs['callback']
260 | else:
261 | callback = None
262 | return functools.partial(self._handle_request,
263 | callback,
264 | security)
265 |
266 | def _handle_request(self, callback, security, response):
267 | try:
268 | self.queue_len = self.queue_len - 1
269 |
270 | if response.error:
271 | logging.error(response.error)
272 | else:
273 | callback(security, response)
274 | finally:
275 | self.stop()
276 |
--------------------------------------------------------------------------------
/datafeed/providers/google.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import sys
4 | import functools
5 | import logging
6 |
7 | from datetime import timedelta
8 | from dateutil import parser
9 |
10 | from datafeed.bidict import Bidict
11 | from datafeed.exchange import *
12 | from datafeed.quote import *
13 | from datafeed.providers.http_fetcher import *
14 | from datafeed.utils import json_decode
15 |
16 | __all__ = ['GoogleSecurity', 'currency2float',
17 | 'GoogleReport', 'GoogleReportFetcher',
18 | 'GoogleDay', 'GoogleDayFetcher',
19 | 'GoogleNewsFetcher']
20 |
21 | # See: http://www.google.com/intl/en-US/help/stock_disclaimer.html
22 | # Google finance support more exchanges, adding here if you need it.
23 | _EXCHANGES = Bidict({
24 | "HK": 'HGK', #Hongkong
25 | "SH": "SHA", #Shanghai
26 | "SZ": "SHE", #ShenZhen
27 | "NASDAQ": "NASDAQ",
28 | "NYSE": "NYSE",
29 | "NYSEARCA": "NYSEARCA",
30 | "AMEX": "AMEX",
31 | })
32 |
33 |
34 | def currency2float(currency):
35 | """convert currency to float
36 |
37 | >>> currency2float("10.08")
38 | 10.08
39 | >>> currency2float("12,313.66")
40 | 12313.66
41 | >>> currency2float("102.5M")
42 | 102500000
43 | """
44 | if currency == '':
45 | return ''
46 | if currency[-1:] == "M":
47 | currency = currency[:-1]
48 | return currency2float(currency) * 10**6
49 | return float(currency.replace(",", ""))
50 |
51 |
52 | class GoogleSecurity(Security):
53 | @property
54 | def _abbr(self):
55 | """Google finance specific exchange abbr."""
56 | return _EXCHANGES[str(self.exchange)]
57 |
58 | @classmethod
59 | def from_string(cls, idstr):
60 | """Parse a google symbol(eg: NASDAQ:GOOG) string."""
61 | abbr, symbol = idstr.split(':')
62 | return cls.from_abbr(abbr, symbol)
63 |
64 | @classmethod
65 | def from_abbr(cls, abbr, symbol):
66 | """Create from exchange abbr and symbol."""
67 | exchange = cls.get_exchange_from_abbr(abbr)
68 | return cls(exchange, symbol)
69 |
70 | @classmethod
71 | def get_exchange_from_abbr(cls, abbr):
72 | """get exchange from google abbr."""
73 | ex = _EXCHANGES[abbr]
74 | ex_cls = getattr(sys.modules[__name__], ex)
75 | return ex_cls()
76 |
77 |
78 | class GoogleReport(Report):
79 |
80 | # This only contains common tags.
81 | # You could retrieve special tag data from self._raw_data.
82 | _TAGS_DEFINITION = {
83 | 't': ("symbol", str),
84 | "e": ("abbr", str),
85 | 'op': ("open", currency2float),
86 | 'hi': ("high", currency2float),
87 | 'lo': ("low", currency2float),
88 | 'lt': ("time", parser.parse),
89 | 'l': ("price", currency2float),
90 | 'c': ("change", currency2float),
91 | 'vo': ("volume", currency2float)
92 | }
93 |
94 | _raw_data = {}
95 |
96 | def __init__(self, raw_data):
97 | self.assert_raw(raw_data)
98 | self._raw_data = raw_data
99 |
100 | data = {}
101 | for key, value in self._TAGS_DEFINITION.iteritems():
102 | data[value[0]] = value[1](raw_data[key])
103 | security = GoogleSecurity.from_abbr(data.pop('abbr'),
104 | data.pop('symbol'))
105 |
106 | super(GoogleReport, self).__init__(security, data)
107 |
108 | def assert_raw(self, raw_data):
109 | assert isinstance(raw_data['t'], basestring)
110 | assert isinstance(raw_data['e'], basestring)
111 | assert isinstance(raw_data['l'], basestring)
112 | assert isinstance(raw_data['lt'], basestring)
113 | assert isinstance(raw_data['vo'], basestring)
114 |
115 | def __getitem__(self, key):
116 | """Proxy to untouched raw data."""
117 | return self._raw_data[key]
118 |
119 | @property
120 | def preclose(self):
121 | return self.price - self.change
122 |
123 | @staticmethod
124 | def parse(rawdata):
125 | # don't known why & escaped.
126 | data = rawdata.strip()[3:].replace('\\x', '')
127 | parsed = json_decode(data)
128 | return (GoogleReport(x) for x in parsed)
129 |
130 |
131 | class GoogleDay(Day):
132 |
133 | _DEFINITIONS = (
134 | ("date", lambda x: parser.parse(x).date()),
135 | ("open", currency2float),
136 | ("high", currency2float),
137 | ("low", currency2float),
138 | ("close", currency2float),
139 | ("volume", currency2float))
140 |
141 | def __init__(self, security, raw_data):
142 | assert len(raw_data) == 6
143 |
144 | data = {}
145 | i = 0
146 | for conf in self._DEFINITIONS:
147 | data[conf[0]] = conf[1](raw_data[i])
148 | i += 1
149 |
150 | super(GoogleDay, self).__init__(security, data)
151 |
152 | @staticmethod
153 | def parse(security, rawdata):
154 | import csv
155 | from cStringIO import StringIO
156 |
157 | f = StringIO(rawdata)
158 | r = csv.reader(f)
159 | r.next() # skip header
160 | return (GoogleDay(security, line) for line in r)
161 |
162 |
163 | class GoogleReportFetcher(Fetcher):
164 |
165 | # Maximum number of stocks we'll batch fetch.
166 | _MAX_REQUEST_SIZE = 100
167 |
168 | def __init__(self, base_url='http://www.google.com/finance/info',
169 | time_out=20, max_clients=10, request_size=100):
170 | assert request_size <= self._MAX_REQUEST_SIZE
171 |
172 | super(GoogleReportFetcher, self).__init__(base_url, time_out, max_clients)
173 | self._request_size = request_size
174 |
175 | def _fetching_urls(self, *args, **kwargs):
176 | gids = (str(s) for s in args)
177 | gids = self._slice(gids)
178 |
179 | return (self._make_url(filter(lambda x: x != None, i)) for i in gids)
180 |
181 | def _make_url(self, ids):
182 | """Make url to fetch.
183 |
184 | example:
185 | http://www.google.com/finance/info?q=SHA:000001,NASDAQ:GOOG&infotype=infoquoteall
186 | """
187 | return "%s?q=%s&infotype=infoquoteall" % (self._base_url,
188 | ','.join(ids))
189 |
190 | def _callback(self, security, **kwargs):
191 | if 'callback' in kwargs:
192 | callback = kwargs['callback']
193 | else:
194 | callback = None
195 | return functools.partial(self._handle_request, callback)
196 |
197 | def _handle_request(self, callback, response):
198 | try:
199 | self.queue_len = self.queue_len - 1
200 |
201 | if response.error:
202 | logging.error(response.error)
203 | else:
204 | callback(response.body)
205 | finally:
206 | self.stop()
207 |
208 |
209 | class GoogleDayFetcher(DayFetcher):
210 |
211 | def __init__(self, base_url='http://www.google.com/finance/historical',
212 | time_out=20, max_clients=10):
213 | super(GoogleDayFetcher, self).__init__(base_url, time_out, max_clients)
214 |
215 | def _make_url(self, security, **kwargs):
216 | """Generate url to fetch.
217 |
218 | example:
219 |
220 | http://www.google.com/finance/historical?q=NASDAQ:GOOG&startdate=2011-04-01&enddate=2011-04-28&output=csv
221 |
222 | Google finance return one day more data, typically this isn't a
223 | problem, we decrease the enddate by one day for passing tests.
224 | """
225 | url_format = "%s?q=%s&startdate=%s&enddate=%s&output=csv"
226 | return url_format % (self._base_url,
227 | str(security),
228 | kwargs['start_date'],
229 | kwargs['end_date'] - timedelta(days=1))
230 |
231 |
232 | class GoogleNewsFetcher(Fetcher):
233 | _BASE_URL = "http://www.google.com/finance/company_news?q=%s&output=rss"
234 |
235 | # Maximum number of stocks we'll batch fetch.
236 | _MAX_REQUEST_SIZE = 100
237 |
238 | def __init__(self, base_url=_BASE_URL, time_out=10, max_clients=5):
239 | super(GoogleNewsFetcher, self).__init__(base_url, time_out, max_clients)
240 |
241 | def _fetching_urls(self, *args, **kwargs):
242 | return (self._make_url(str(security)) for security in args)
243 |
244 | def _make_url(self, symbol):
245 | """Make url to fetch.
246 |
247 | example:
248 | http://www.google.com/finance/company_news?q=NASDAQ:GOOG&output=rss
249 | """
250 | return self._base_url % symbol
251 |
252 | def _callback(self, security, **kwargs):
253 | if 'callback' in kwargs:
254 | callback = kwargs['callback']
255 | else:
256 | callback = None
257 | return functools.partial(self._handle_request,
258 | callback,
259 | security)
260 |
261 | def _handle_request(self, callback, security, response):
262 | try:
263 | self.queue_len = self.queue_len - 1
264 |
265 | if response.error:
266 | logging.error(response.error)
267 | else:
268 | callback(security, response)
269 | finally:
270 | self.stop()
271 |
--------------------------------------------------------------------------------
/datafeed/imiguserver.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2011 yinhm
5 |
6 | '''Imigu.com specific datafeed server implementation.
7 | '''
8 |
9 | import datetime
10 | import logging
11 | import time
12 |
13 | import numpy as np
14 |
15 | from datafeed.providers.dzh import DzhDividend, DzhSector
16 | from datafeed.server import *
17 | from datafeed.utils import *
18 |
19 |
20 | __all__ = ['ImiguApplication', 'ImiguHandler']
21 |
22 |
23 | class SnapshotIndexError(KeyError):
24 | pass
25 |
26 |
27 |
28 | class ImiguApplication(Application):
29 |
30 | def __init__(self, datadir, exchange):
31 | self.archive_minute_time = 0
32 | self.archive_day_time = 0
33 | self.crontab_time = 0
34 |
35 | self._tasks = []
36 |
37 | super(ImiguApplication, self).__init__(datadir, exchange, handler=ImiguHandler)
38 |
39 | # last quote time reset to SH000001's timestamp
40 | try:
41 | r = self.dbm.get_report("SH000001")
42 | self.dbm.set_mtime(r['timestamp'])
43 | except KeyError:
44 | self.dbm.set_mtime(time.time())
45 |
46 | def periodic_job(self):
47 | today = datetime.datetime.today()
48 |
49 | if self.scheduled_archive_minute(today):
50 | request = Request(None, 'archive_minute')
51 | self.__call__(request)
52 |
53 | if self.scheduled_archive_day(today):
54 | request = Request(None, 'archive_day')
55 | self.__call__(request)
56 |
57 | if self.scheduled_crontab_daily(today):
58 | request = Request(None, 'crontab_daily')
59 | self.__call__(request)
60 |
61 | if len(self._tasks) > 0:
62 | logging.info("tasks left: %s" % len(self._tasks))
63 | request = Request(None, 'run_task')
64 | self.__call__(request)
65 |
66 | def scheduled_archive_minute(self, today):
67 | """Test is archive minute scheduled.
68 | """
69 | now = time.time()
70 |
71 | market_open_at = self.exchange.open_time(now=now)
72 | if now < market_open_at:
73 | # Should not archive any data if market not open yet.
74 | logging.debug("market not open yet")
75 | return False
76 |
77 | market_closing_at = self.exchange.close_time(now=now)
78 | if now > (market_closing_at + 60 * 5):
79 | # Do not archive if time passed 15:05.
80 | # Should be archived already. If not, something is broken.
81 | logging.debug("market closed more than 5 minutes")
82 | return False
83 |
84 | # quote_time = self.dbm.mtime
85 | # if (now - quote_time) > 60:
86 | # return False
87 |
88 | # in session, we run it every 60 sec or greater
89 | if today.second == 0 or (now - self.archive_minute_time) > 60:
90 | return True
91 |
92 | return False
93 |
94 | def scheduled_archive_day(self, today):
95 | """Test is daily archive scheduled.
96 | """
97 | now = time.time()
98 | close_time = self.exchange.close_time(now=now)
99 |
100 | if now < close_time:
101 | logging.debug("market not closed yet.")
102 | return False
103 |
104 | if self.dbm.mtime < close_time:
105 | logging.debug("No market data: Weekday or holiday or datafeed receiver broken.")
106 | return False
107 |
108 | if self.dbm.mtime < self.archive_day_time:
109 | logging.debug("Already archived.")
110 | return False
111 |
112 | # skip 60 * 3 sec make sure we got the last data
113 | if now > (close_time + 60 * 3):
114 | return True
115 |
116 | return False
117 |
118 | def scheduled_crontab_daily(self, today):
119 | """Test is daily crontab scheduled.
120 | """
121 | if today.hour == 8:
122 | if today.minute == 0 and today.second == 0:
123 | return True
124 |
125 | now = time.time()
126 | if today.minute == 0 and (now - self.crontab_time) > 86400:
127 | # not runned before
128 | return True
129 |
130 | return False
131 |
132 | def task_add(self, task):
133 | self._tasks.append(task)
134 |
135 | def task_reserve(self):
136 | return self._tasks.pop(0)
137 |
138 | class ImiguHandler(Handler):
139 |
140 | SUPPORTED_METHODS = Handler.SUPPORTED_METHODS + \
141 | ('archive_day',
142 | 'archive_minute',
143 | 'crontab_daily',
144 | 'sync_dividend',
145 | 'sync_sector',
146 | 'run_task')
147 |
148 |
149 | ###### periodic jobs ######
150 |
151 | def archive_day(self, *args):
152 | """Archive daily data from report datastore.
153 | """
154 | dt = datetime.datetime.fromtimestamp(self.dbm.mtime).date()
155 |
156 | store = self.dbm.daystore
157 | reports = self.dbm.get_reports()
158 | for symbol, report in reports:
159 | if 'timestamp' not in report:
160 | continue
161 |
162 | d = datetime.datetime.fromtimestamp(report['timestamp'])
163 |
164 | if dt != d.date():
165 | # skip instruments which no recent report data
166 | continue
167 |
168 | t = int(time.mktime(d.date().timetuple()))
169 |
170 | row = (t, report['open'], report['high'], report['low'],
171 | report['close'], report['volume'], report['amount'])
172 |
173 | data = np.array([row], dtype=store.DTYPE)
174 | store.update(symbol, data)
175 |
176 | self.application.archive_day_time = time.time()
177 | logging.info("daily data archived.")
178 |
179 | if self.request.connection:
180 | self.request.write("+OK\r\n")
181 |
182 | def archive_minute(self, *args):
183 | '''Archive minute data from report datastore.
184 | '''
185 | logging.info("starting archive minute...")
186 | self.application.archive_minute_time = time.time()
187 |
188 | dbm = self.dbm
189 | pre_open_time = dbm.exchange.pre_open_time(now=dbm.mtime)
190 | open_time = dbm.exchange.open_time(now=dbm.mtime)
191 | break_time = dbm.exchange.break_time(now=dbm.mtime)
192 | close_time = dbm.exchange.close_time(now=dbm.mtime)
193 |
194 | try:
195 | report = dbm.get_report('SH000001')
196 | rts = report['timestamp']
197 | except KeyError:
198 | logging.error("No SH000001 data.")
199 | if not self.request.connection:
200 | return
201 | return self.request.write("-ERR No data yet.\r\n")
202 |
203 | if rts < pre_open_time:
204 | logging.error("wrong report time: %s." % \
205 | (datetime.datetime.fromtimestamp(rts), ))
206 | if not self.request.connection:
207 | return
208 | return self.request.write("-ERR No data yet.\r\n")
209 |
210 | mintime, index = ImiguHandler.get_snapshot_index(open_time, rts)
211 |
212 | if index < 0:
213 | raise SnapshotIndexError
214 |
215 | # Rotate when we sure there is new data coming in.
216 | dbm.rotate_minute_store()
217 | store = dbm.minutestore
218 |
219 | snapshot_time = mintime
220 | cleanup_callback = lambda r: r
221 | if index > 120 and index < 210:
222 | # sometimes we received report within 11:31 - 12:59
223 | # reset to 11:30
224 | snapshot_time = break_time
225 | def cleanup_callback(r):
226 | r['timestamp'] = break_time
227 | r['time'] = str(datetime.datetime.fromtimestamp(break_time))
228 |
229 | index = 120
230 | elif index >= 210 and index <= 330:
231 | index = index - 89 # subtract 11:31 - 12:59
232 | elif index > 330:
233 | # sometimes we received report after 15:00
234 | # reset to 15:00
235 | snapshot_time = close_time
236 | def cleanup_callback(r):
237 | r['timestamp'] = close_time
238 | r['time'] = str(datetime.datetime.fromtimestamp(close_time))
239 |
240 | index = 241
241 |
242 | reports = dbm.get_reports()
243 | for key, report in reports:
244 | if 'timestamp' not in report:
245 | # Wrong data
246 | continue
247 |
248 | if mintime - report['timestamp'] > 1800:
249 | # no new data in 30 mins, something broken
250 | # skip this symbol when unknown
251 | continue
252 |
253 | cleanup_callback(report)
254 |
255 | mindata = (snapshot_time, report['price'], report['volume'], report['amount'])
256 | y = np.array(mindata, dtype=store.DTYPE)
257 |
258 | store.set(key, index, y)
259 |
260 | #store.flush()
261 |
262 | logging.info("snapshot to %i (index of %i)." % (mintime, index))
263 | self.request.write_ok()
264 |
265 | @classmethod
266 | def get_snapshot_index(cls, open_time, report_time):
267 | ts = time.time()
268 | d = datetime.datetime.fromtimestamp(ts)
269 | mintime = time.mktime((d.year, d.month, d.day,
270 | d.hour, d.minute,
271 | 0, 0, 0, 0))
272 | index = int((mintime - open_time) / 60)
273 | logging.info("minute data at %i (index of %i)." % (mintime, index))
274 | return (int(mintime), index)
275 |
276 | def crontab_daily(self, *args):
277 | self.application.crontab_time = time.time()
278 | self.sync_dividend()
279 | self.sync_sector()
280 |
281 | def sync_dividend(self, *args):
282 | io = DzhDividend()
283 | for symbol, data in io.read():
284 | self.dbm.update_dividend(symbol, data)
285 | self.dbm.divstore.flush()
286 | self.request.write_ok()
287 |
288 | def sync_sector(self, *args):
289 | io = DzhSector()
290 | for sector, options in io.read():
291 | self.dbm.sectorstore[sector] = options
292 | self.request.write_ok()
293 |
294 | def run_task(self):
295 | for i in xrange(300):
296 | try:
297 | task = self.application.task_reserve()
298 | except IndexError:
299 | break
300 | task.run()
301 |
302 |
303 | class Task(object):
304 | __slots__ = ['store', 'key', 'index', 'data']
305 |
306 | def __init__(self, store, key, index, data):
307 | self.store = store
308 | self.key = key
309 | self.index = index
310 | self.data = data
311 |
312 | def run(self):
313 | self.store.set(self.key, self.index, self.data)
314 |
--------------------------------------------------------------------------------
/datafeed/providers/tests/test_google.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from __future__ import with_statement
4 |
5 | import os
6 | import unittest
7 |
8 | from datetime import datetime, date
9 | from datafeed.exchange import *
10 | from datafeed.providers.http_fetcher import *
11 | from datafeed.providers.google import *
12 |
13 |
14 | class GoogleSecurityTest(unittest.TestCase):
15 |
16 | def test_abbr_sha(self):
17 | s = GoogleSecurity(SH(), '600028')
18 | self.assertEqual(s._abbr, 'SHA')
19 |
20 | def test_abbr_she(self):
21 | s = GoogleSecurity(SZ(), '000001')
22 | self.assertEqual(s._abbr, 'SHE')
23 |
24 | def test_abbr_hgk(self):
25 | s = GoogleSecurity(HK(), '000001')
26 | self.assertEqual(str(s._abbr), 'HGK')
27 |
28 | def test_google_id(self):
29 | s = GoogleSecurity(SH(), '600028')
30 | self.assertEqual(str(s), 'SHA:600028')
31 |
32 | def test_abbr_to_exchange(self):
33 | ex = GoogleSecurity.get_exchange_from_abbr("SHA")
34 | self.assertEqual(ex, SH())
35 |
36 | def test_ss_abbr(self):
37 | ret = GoogleSecurity.from_string('SHA:600028')
38 | self.assertEqual(ret.exchange, SH())
39 | self.assertEqual(ret.symbol, '600028')
40 | self.assertEqual(str(ret), 'SHA:600028')
41 |
42 | def test_zip_slice(self):
43 | ret = [r for r in zip_slice(3, 'ABCED')]
44 | self.assertEqual(ret, [('A', 'B', 'C'), ('E', 'D', None)])
45 |
46 |
47 | class GoogleReportTest(unittest.TestCase):
48 | _RAW_DATA = '// [ { "id": "7521596" ,"t" : "000001" ,"e" : "SHA" ,"l" : "2,925.53" ,"l_cur" : "CN¥2,925.53" ,"s": "0" ,"ltt":"3:00PM CST" ,"lt" : "Apr 27, 3:00PM CST" ,"c" : "-13.46" ,"cp" : "-0.46" ,"ccol" : "chr" ,"eo" : "" ,"delay": "" ,"op" : "2,946.33" ,"hi" : "2,961.13" ,"lo" : "2,907.66" ,"vo" : "105.49M" ,"avvo" : "" ,"hi52" : "3,478.01" ,"lo52" : "1,844.09" ,"mc" : "" ,"pe" : "" ,"fwpe" : "" ,"beta" : "" ,"eps" : "" ,"name" : "SSE Composite Index" ,"type" : "Company" } ,{ "id": "697073" ,"t" : "600028" ,"e" : "SHA" ,"l" : "8.64" ,"l_cur" : "CN¥8.64" ,"s": "0" ,"ltt":"3:00PM CST" ,"lt" : "Apr 29, 3:00PM CST" ,"c" : "+0.12" ,"cp" : "1.41" ,"ccol" : "chg" ,"eo" : "" ,"delay": "" ,"op" : "8.57" ,"hi" : "8.66" ,"lo" : "8.53" ,"vo" : "42.28M" ,"avvo" : "" ,"hi52" : "10.09" ,"lo52" : "7.67" ,"mc" : "749.11B" ,"pe" : "10.70" ,"fwpe" : "" ,"beta" : "" ,"eps" : "0.81" ,"name" : "China Petroleum \x26 Chemical Corporation" ,"type" : "Company" } ,{ "id": "694653" ,"t" : "GOOG" ,"e" : "NASDAQ" ,"l" : "532.82" ,"l_cur" : "532.82" ,"s": "1" ,"ltt":"4:00PM EDT" ,"lt" : "Apr 26, 4:00PM EDT" ,"c" : "+7.77" ,"cp" : "1.48" ,"ccol" : "chg" ,"el": "535.97" ,"el_cur": "535.97" ,"elt" : "Apr 27, 4:15AM EDT" ,"ec" : "+3.15" ,"ecp" : "0.59" ,"eccol" : "chg" ,"div" : "" ,"yld" : "" ,"eo" : "" ,"delay": "" ,"op" : "526.52" ,"hi" : "537.44" ,"lo" : "525.21" ,"vo" : "100.00" ,"avvo" : "2.80M" ,"hi52" : "642.96" ,"lo52" : "433.63" ,"mc" : "171.31B" ,"pe" : "19.53" ,"fwpe" : "" ,"beta" : "1.19" ,"eps" : "27.28" ,"name" : "Google Inc." ,"type" : "Company" } ]'
49 |
50 |
51 | def test_currenct_to_float(self):
52 | self.assertEqual(currency2float("10.08"), 10.08)
53 | self.assertEqual(currency2float("12,313.66"), 12313.66)
54 | self.assertEqual(currency2float("102.5M"), 102500000)
55 |
56 | def test_google_report(self):
57 | ret = GoogleReport.parse(self._RAW_DATA)
58 | i = 0
59 | for r in ret:
60 | if i == 0:
61 | self.assertEqual(r.security.exchange, SH())
62 | self.assertEqual(r.security.symbol, '000001')
63 | self.assertEqual(r.price, 2925.53)
64 | self.assertEqual(r.open, 2946.33)
65 | self.assertEqual(r.high, 2961.13)
66 | self.assertEqual(r.low, 2907.66)
67 | self.assertEqual(r.change, -13.46)
68 |
69 | diff = r.preclose - 2938.99
70 | self.assertTrue(abs(diff) < 0.000001)
71 | self.assertTrue(isinstance(r.time, datetime))
72 | self.assertEqual(r.time.hour, 15)
73 | if i == 2:
74 | self.assertEqual(r.security.exchange, NASDAQ())
75 | self.assertEqual(r.security.symbol, 'GOOG')
76 | self.assertTrue(r.time.hour, 16)
77 |
78 | self.assertEqual(r['el'], "535.97")
79 |
80 |
81 | i += 1
82 |
83 | def test_google_report_parse_with_excption(self):
84 | data = '// [ { "id": "694653" ,"t" : "GOOG" ,"e" : "NASDAQ" ,"l" : "520.90" ,"l_cur" : "520.90" ,"s": "0" ,"ltt":"4:00PM EDT" ,"lt" : "May 27, 4:00PM EDT" ,"c" : "+2.77" ,"cp" : "0.53" ,"ccol" : "chg" ,"eo" : "" ,"delay": "" ,"op" : "518.48" ,"hi" : "521.79" ,"lo" : "516.30" ,"vo" : "1.75M" ,"avvo" : "2.91M" ,"hi52" : "642.96" ,"lo52" : "433.63" ,"mc" : "167.86B" ,"pe" : "20.23" ,"fwpe" : "" ,"beta" : "1.17" ,"eps" : "25.75" ,"name" : "Google Inc." ,"type" : "Company" } ,{ "id": "697227" ,"t" : "FRCMQ" ,"e" : "PINK" ,"l" : "0.0045" ,"l_cur" : "0.00" ,"s": "0" ,"ltt":"2:13PM EST" ,"lt" : "Jan 24, 2:13PM EST" ,"c" : "0.0000" ,"cp" : "0.00" ,"ccol" : "chb" ,"eo" : "" ,"delay": "15" ,"op" : "" ,"hi" : "" ,"lo" : "" ,"vo" : "0.00" ,"avvo" : "1.17M" ,"hi52" : "0.14" ,"lo52" : "0.00" ,"mc" : "404,839.00" ,"pe" : "0.00" ,"fwpe" : "" ,"beta" : "1.30" ,"eps" : "7.57" ,"name" : "Fairpoint Communications, Inc." ,"type" : "Company" } ,{ "id": "5521731" ,"t" : "APPL" ,"e" : "PINK" ,"l" : "0.0000" ,"l_cur" : "0.00" ,"s": "0" ,"ltt":"" ,"lt" : "" ,"c" : "" ,"cp" : "" ,"ccol" : "" ,"eo" : "" ,"delay": "15" ,"op" : "" ,"hi" : "" ,"lo" : "" ,"vo" : "0.00" ,"avvo" : "" ,"hi52" : "" ,"lo52" : "" ,"mc" : "" ,"pe" : "" ,"fwpe" : "" ,"beta" : "" ,"eps" : "" ,"name" : "APPELL PETE CORP" ,"type" : "Company" } ]'
85 |
86 | iterable = GoogleReport.parse(data)
87 |
88 | i = 0
89 | while 1:
90 | try:
91 | i += 1
92 | r = iterable.next()
93 | except ValueError:
94 | continue
95 | except KeyError:
96 | continue
97 | except StopIteration:
98 | break
99 |
100 | if i == 1:
101 | self.assertEqual(r.security.symbol, 'GOOG')
102 | if i == 3:
103 | self.assertEqual(r.security.symbol, 'APPL')
104 |
105 |
106 | class GoogleDayTest(unittest.TestCase):
107 | def test_parse_day(self):
108 | path = os.path.dirname(os.path.realpath(__file__))
109 | f = open(os.path.join(path, 'google_data.csv'), 'r')
110 | data = f.read()
111 | f.close()
112 |
113 | security = GoogleSecurity(NASDAQ(), 'GOOG')
114 | iters = GoogleDay.parse(security, data)
115 | i = 0
116 | for ohlc in iters:
117 | if i == 0:
118 | # 2011-04-28,538.06,539.25,534.08,537.97,2037378
119 | self.assertTrue(isinstance(ohlc.date, date))
120 | self.assertEqual(ohlc.open, 538.06)
121 | self.assertEqual(ohlc.high, 539.25)
122 | self.assertEqual(ohlc.low, 534.08)
123 | self.assertEqual(ohlc.close, 537.97)
124 | self.assertEqual(ohlc.volume, 2037378.0)
125 | i += 1
126 |
127 |
128 | class GoogleReportFetcherTest(unittest.TestCase):
129 |
130 | def test_init(self):
131 | f = GoogleReportFetcher()
132 | self.assertEqual(f._base_url, 'http://www.google.com/finance/info')
133 | self.assertEqual(f._time_out, 20)
134 | self.assertEqual(f._request_size, 100)
135 |
136 | def test_init_with_arguments(self):
137 | f = GoogleReportFetcher(base_url='http://www.google.com.hk/finance/info',
138 | time_out=10,
139 | request_size=50)
140 | self.assertEqual(f._base_url, 'http://www.google.com.hk/finance/info')
141 | self.assertEqual(f._time_out, 10)
142 | self.assertEqual(f._request_size, 50)
143 |
144 | def test_init_with_wrong_arguments(self):
145 | self.assertRaises(AssertionError,
146 | GoogleReportFetcher,
147 | request_size=200)
148 |
149 | def test_fetch(self):
150 | f = GoogleReportFetcher(request_size=2)
151 | s1 = GoogleSecurity(SH(), '000001')
152 | s2 = GoogleSecurity(SH(), '600028')
153 | s3 = GoogleSecurity(NASDAQ(), 'GOOG')
154 |
155 | def callback(body):
156 | qs = GoogleReport.parse(body)
157 | for quote in qs:
158 | if quote.security == s1:
159 | # something must wrong if SSE Composite Index goes down to 100
160 | self.assertTrue(quote.price > 100)
161 |
162 | f.fetch(s1, s2, s3,
163 | callback=callback)
164 |
165 | def test_fetch_nyse(self):
166 | symbols = "NYSE:MMM,NYSE:SVN,NYSE:NDN,NYSE:AHC,NYSE:AIR,NYSE:AAN,NYSE:ABB,NYSE:ABT,NYSE:ANF,NYSE:ABH,NYSE:ABM,NYSE:ABVT,NYSE:AKR,NYSE:ACN,NYSE:ABD,NYSE:AH,NYSE:ACW,NYSE:ACE,NYSE:ATV,NYSE:ATU,NYSE:AYI,NYSE:ADX,NYSE:AGRO,NYSE:PVD,NYSE:AEA,NYSE:AAP,NYSE:AMD,NYSE:ASX,NYSE:AAV,NYSE:ATE,NYSE:AGC,NYSE:AVK,NYSE:LCM,NYSE:ACM,NYSE:ANW,NYSE:AEB,NYSE:AED,NYSE:AEF,NYSE:AEG,NYSE:AEH,NYSE:AEV,NYSE:AER,NYSE:ARX,NYSE:ARO,NYSE:AET,NYSE:AMG,NYSE:AFL,NYSE:AGCO,NYSE:NCV,NYSE:NCZ,NYSE:NIE,NYSE:NGZ,NYSE:NAI,NYSE:A,NYSE:AGL,NYSE:AEM,NYSE:ADC,NYSE:GRO,NYSE:AGU,NYSE:AL,NYSE:APD,NYSE:AYR,NYSE:ARG,NYSE:AKS,NYSE:ABA/CL,NYSE:ALM,NYSE:ALP^N,NYSE:ALP^O,NYSE:ALP^P,NYSE:ALQ/CL,NYSE:ALZ/CL,NYSE:ALG,NYSE:ALK,NYSE:AIN,NYSE:ALB,NYSE:ALU,NYSE:AA,NYSE:ALR,NYSE:ALR^B,NYSE:ALEX,NYSE:ALX,NYSE:ARE,NYSE:ARE^C,NYSE:Y,NYSE:ATI,NYSE:AGN,NYSE:ALE,NYSE:AKP,NYSE:AB,NYSE:ADS,NYSE:AIQ,NYSE:AFB,NYSE:AYN,NYSE:AOI,NYSE:AWF,NYSE:ACG,NYSE:LNT,NYSE:ATK,NYSE:AFC,NYSE:AIB"
167 | symbols = symbols.split(',')
168 |
169 | symbols = [GoogleSecurity.from_string(s) for s in symbols]
170 |
171 | f = GoogleReportFetcher()
172 |
173 | def callback(body):
174 | rs = GoogleReport.parse(body)
175 | for r in rs:
176 | pass
177 |
178 | f.fetch(*symbols, callback=callback)
179 |
180 |
181 | class GoogleDayFetcherTest(unittest.TestCase):
182 |
183 | def test_init(self):
184 | f = GoogleDayFetcher()
185 | self.assertEqual(f._base_url, 'http://www.google.com/finance/historical')
186 | self.assertEqual(f._time_out, 20)
187 | self.assertEqual(f._max_clients, 10)
188 |
189 | def test_init_with_wrong_arguments(self):
190 | self.assertRaises(AssertionError,
191 | GoogleReportFetcher,
192 | max_clients=20)
193 |
194 | def test_fetch(self):
195 | f = GoogleDayFetcher()
196 | s1 = GoogleSecurity(NASDAQ(), 'GOOG')
197 | s2 = GoogleSecurity(NASDAQ(), 'AAPL')
198 |
199 | def callback(security, body):
200 | iters = GoogleDay.parse(security, body)
201 | i = 0
202 | for ohlc in iters:
203 | self.assertTrue(ohlc.security in (s1, s2))
204 | if i == 0 and ohlc.security == s1:
205 | self.assertEqual(str(ohlc.date), "2011-04-28")
206 | self.assertEqual(ohlc.open, 538.06)
207 | self.assertEqual(ohlc.high, 539.25)
208 | self.assertEqual(ohlc.low, 534.08)
209 | self.assertEqual(ohlc.close, 537.97)
210 | self.assertEqual(ohlc.volume, 2037378.0)
211 |
212 | i += 1
213 |
214 | start_date = datetime.strptime("2011-04-01", "%Y-%m-%d").date()
215 | end_date = datetime.strptime("2011-04-28", "%Y-%m-%d").date()
216 | f.fetch(s1, s2,
217 | callback=callback,
218 | start_date=start_date,
219 | end_date=end_date)
220 |
221 |
222 | if __name__ == '__main__':
223 | unittest.main()
224 |
--------------------------------------------------------------------------------
/datafeed/providers/dzh.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2010 yinhm
5 |
6 | """大智慧日线数据抓换
7 |
8 | 大智慧数据格式分析:
9 | http://hi.baidu.com/wu988/blog/item/9321c4036917a30f728da55d.html
10 |
11 | 文件路径
12 | -------
13 | $DZH/data/sh/day.dat
14 |
15 | 文件头
16 | -----
17 | 起止地址 数据内容 数据含义 数据类型
18 | 00 - 03 F4 9B 13 FC 文件标志 int
19 | 04 - 07 00 06 00 00 未知 int
20 | 08 - 0B 00 00 00 00 保留 int
21 | 0C - 0F 97 04 00 00 证券总数 int
22 | 10 - 13 00 18 00 00 未知 int 需添加之起始块号
23 | 14 - 17 DB 17 00 00 未知 int 当前最后空块号
24 |
25 | 记录块号为FFFF表示未分配.从41000h开始的8KB为第0号数据存储块.
26 |
27 | """
28 |
29 | import os
30 | import ConfigParser
31 | import urllib2
32 |
33 | from collections import OrderedDict
34 | from cStringIO import StringIO
35 | from struct import *
36 |
37 | import h5py
38 | import numpy as np
39 |
40 |
41 | __all__ = ['DzhDay', 'DzhDividend',
42 | 'DzhMinute', 'DzhFiveMinute',
43 | 'DzhSector']
44 |
45 |
46 | class EndOfIndexError(StandardError):
47 | pass
48 |
49 | class FileNotFoundError(StandardError):
50 | pass
51 |
52 |
53 | def gb2utf(value, ignore=True):
54 | if ignore:
55 | return unicode(value, 'gb18030', 'ignore').encode('utf-8', 'ignore')
56 | else:
57 | return unicode(value, 'gb18030').encode('utf-8')
58 |
59 |
60 | class DzhDay(object):
61 | """大智慧日线数据"""
62 |
63 | _COUNT_SDTART = int('0x0c', 16)
64 | _INDEX_START = int('0x18', 16)
65 | _BLOCK_START = int('0x41000', 16) # OHLCs
66 | _BLOCK_SIZE = 256 * 32
67 |
68 | _DTYPE = [('time', '>> index = read_index(f)
153 | >>> index
154 | ('000001', 4767, [0, 1132, 1135])
155 |
156 | """
157 | symbol = unpack('10s', self.f.read(10))[0].replace('\x00', '')
158 |
159 | if symbol == '':
160 | raise EOFError
161 |
162 | count = unpack('i', self.f.read(4))[0]
163 |
164 | blocks = []
165 |
166 | for i in range(25):
167 | block_id = unpack('h', self.f.read(2))[0]
168 | if block_id != -1: # 0xff 0xff
169 | blocks.append(block_id)
170 |
171 | return (symbol, count, blocks)
172 |
173 | def read_block(self, block, timestamps, ohlcs):
174 | """read ohlc data rows for a symbol
175 |
176 | data length
177 | -----------
178 | 8KB each symbol, 256 * 32bytes
179 |
180 | ohlc记录格式
181 | -----------
182 | 41000 - 41003 80 47 B2 2B 日期 int
183 | 41004 - 41007 B9 1E 25 41 开盘价 float
184 | 41008 - 4100B CD CC 4C 41 最高价 float
185 | 4100C - 4100F EC 51 18 41 最低价 float
186 | 41010 - 41013 9A 99 41 41 收盘价 float
187 | 41014 - 41017 80 06 B2 47 成交量 float
188 | 41018 - 4101B 40 1C BC 4C 成交金额 float
189 | 4101C - 4101D 00 00 上涨家数 short
190 | 4101E - 4101F 00 00 下跌家数 short
191 | 日期为unixtime.
192 |
193 | Returns
194 | -------
195 | True on success or Error raised
196 | """
197 | try:
198 | self.f.seek(self._BLOCK_START + self._BLOCK_SIZE * block, 0) # reseek to block head
199 | except:
200 | print "wrong block size? %d" % block
201 |
202 | for i in range(256):
203 | rawdata = self.f.read(4)
204 |
205 | if rawdata == '':
206 | raise EOFError
207 |
208 | timestamp = unpack('i', rawdata)[0]
209 | if timestamp <= 0:
210 | # invalid: \x00 * 4 || \xff * 4
211 | self.f.seek(24, 1)
212 | else:
213 | ohlc = np.frombuffer(self.f.read(24), dtype=np.float32)
214 |
215 | timestamps.append(timestamp)
216 | ohlcs.append(ohlc)
217 |
218 | self.f.seek(4, 1) # skip 2*2short for rasie/up count
219 |
220 | return True
221 |
222 |
223 | class DzhMinute(DzhDay):
224 | """大智慧1分钟数据"""
225 | _BLOCK_START = int('0x41000', 16)
226 | _BLOCK_SIZE = 384 * 32
227 |
228 |
229 | class DzhFiveMinute(DzhDay):
230 | """大智慧5分钟数据
231 |
232 | IMPORTANT:
233 |
234 | 大智慧五分钟数据时区处理有误,导致time数据相差8小时。
235 | 数据读取未对原始数据做任何改动,实际使用中,需手工修正,eg:
236 |
237 | for symbol, ohlcs in io.read('MIN1.DAT', 'SH'):
238 | for ohlc in ohlcs:
239 | ohlc['time'] = ohlc['time'] - 8 * 3600
240 | """
241 | #_BLOCK_START = int('0x41000', 16)
242 | #_BLOCK_SIZE = 384 * 32
243 |
244 |
245 | class DzhFetcher(object):
246 | _IPS = ('222.73.103.181', '222.73.103.183')
247 | _PATH = None
248 |
249 | def __init__(self):
250 | self.ips = list(self._IPS)
251 | self._fetched = False
252 |
253 | def fetch_next_server(self):
254 | self.ips.pop
255 | if len(self.ips) == 0:
256 | raise FileNotFoundError
257 | return self.fetch()
258 |
259 | def fetch(self):
260 | try:
261 | r = urllib2.urlopen(self.data_url())
262 | data = r.read()
263 | self.f = StringIO(data)
264 | self._fetched = True
265 | except URLError:
266 | return self.fetch_next_server()
267 |
268 | def data_url(self):
269 | assert self._PATH, "No file path."
270 |
271 | if len(self.ips) == 0:
272 | return None
273 |
274 | return "http://" + self.ips[-1] + self._PATH
275 |
276 |
277 | class DzhDividend(DzhFetcher):
278 | '''大智慧除权数据'''
279 | _PATH = '/platform/download/PWR/full.PWR'
280 |
281 | def read(self):
282 | """Generator of 大智慧除权数据
283 |
284 | Example of yield data:
285 |
286 | symbol: 'SZ000001'
287 | dividends: [{ :date_ex_dividend => '1992-03-23',
288 | :split => 0.500,
289 | :purchase => 0.000,
290 | :purchase_price => 0.000,
291 | :dividend => 0.200 }... ]
292 | """
293 | if self._fetched == False:
294 | self.fetch()
295 |
296 | # skip head
297 | self.f.seek(12, 0)
298 |
299 | try:
300 | while True:
301 | yield self._read_symbol()
302 | except EOFError:
303 | raise StopIteration
304 | finally:
305 | self.f.close()
306 | #except Exception as e:
307 | # print(e)
308 |
309 | def _read_symbol(self):
310 | dividends = []
311 |
312 | rawsymbol = self.f.read(16)
313 | if rawsymbol == '':
314 | raise EOFError
315 |
316 | symbol = unpack('16s', rawsymbol)[0].replace('\x00', '')
317 |
318 | rawdate = self.f.read(4)
319 |
320 | dt = np.dtype([('time', np.int32),
321 | ('split', np.float32),
322 | ('purchase', np.float32),
323 | ('purchase_price', np.float32),
324 | ('dividend', np.float32)])
325 | while (rawdate) != "\xff" * 4:
326 | dividend = np.frombuffer(rawdate + self.f.read(16), dtype=dt)
327 | dividends.append(dividend)
328 |
329 | rawdate = self.f.read(4)
330 | if rawdate == '':
331 | break
332 |
333 | return (symbol, np.fromiter(dividends, dtype=dt))
334 |
335 |
336 |
337 | _SECTORS = ('行业', '概念', '地域',
338 | '证监会行业', '指数板块')
339 | class DzhSector(DzhFetcher):
340 | '''大智慧板块数据'''
341 |
342 | _PATH = '/platform/download/ABK/full.ABK'
343 |
344 | def read(self):
345 | """Generator of 大智慧板块数据
346 | """
347 | if self._fetched == False:
348 | self.fetch()
349 |
350 | content = self.f.read()
351 | file = StringIO()
352 | file.write(gb2utf(content))
353 | file.seek(0)
354 |
355 | config = ConfigParser.ConfigParser()
356 | config.readfp(file)
357 |
358 | for sector in _SECTORS:
359 | options = OrderedDict()
360 | for name, value in config.items(sector):
361 | options[name] = value.split(' ')
362 | yield sector, options
363 |
364 | self.f.close()
365 | file.close
366 |
367 |
368 | if __name__ == '__main__':
369 | from cStringIO import StringIO
370 | from datafeed.client import Client
371 |
372 | client = Client()
373 |
374 | # path = os.path.join(os.path.realpath(os.path.dirname(__file__)),
375 | # '../../var')
376 |
377 | # filename = os.path.join(path, "/dzh/sh/DAY.DAT")
378 | # io = DzhDay()
379 | # for symbol, ohlcs in io.read(filename, 'SH') :
380 | # memfile = StringIO()
381 | # np.save(memfile, ohlcs)
382 | # client.put('DayHistory', symbol, memfile.getvalue())
383 |
384 |
385 | io = DzhDividend()
386 | for data in io.read():
387 | memfile = StringIO()
388 | np.save(memfile, data[1])
389 | client.put('dividend', data[0], memfile.getvalue())
390 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/datafeed/tests/test_imiguserver.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import datetime
4 | import re
5 | import time
6 | import unittest
7 |
8 | from datafeed.exchange import SH
9 | from datafeed.imiguserver import ImiguApplication, ImiguHandler, SnapshotIndexError
10 | from datafeed.server import Request
11 | from datafeed.tests import helper
12 |
13 | from mock import Mock, patch
14 |
15 |
16 | class ImiguApplicationTest(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.application = ImiguApplication(helper.datadir, SH())
20 | self.application.dbm._mtime = 1291167000
21 | self.open_time = 1291167000
22 | self.close_time = 1291186800
23 |
24 | key = helper.sample_key()
25 | sample = helper.sample()
26 | sample[key]['timestamp'] = 1291167000
27 | self.application.dbm.reportstore.update(sample)
28 |
29 |
30 | @patch.object(time, 'time')
31 | def test_archive_day_09_29(self, mock_time):
32 | mock_time.return_value = self.open_time - 1 # not open
33 |
34 | today = datetime.datetime.today()
35 | ret = self.application.scheduled_archive_day(today)
36 | self.assertFalse(ret)
37 |
38 | @patch.object(time, 'time')
39 | def test_archive_day_15_05_no_data(self, mock_time):
40 | mock_time.return_value = self.close_time + 300
41 |
42 | self.application.dbm._mtime = self.close_time - 86400
43 |
44 | today = datetime.datetime.today()
45 | ret = self.application.scheduled_archive_day(today)
46 | self.assertFalse(ret)
47 |
48 | @patch.object(time, 'time')
49 | def test_archive_day_15_05_01(self, mock_time):
50 | mock_time.return_value = self.close_time + 181 # closed more than 3 minutes
51 |
52 | self.application.dbm._mtime = self.close_time + 180 + 1
53 |
54 | today = datetime.datetime.today()
55 | ret = self.application.scheduled_archive_day(today)
56 | self.assertTrue(ret)
57 |
58 | @patch.object(time, 'time')
59 | def test_archive_day_15_05_01_archived_before(self, mock_time):
60 | mock_time.return_value = self.close_time + 181 # closed more than 3 minutes
61 |
62 | self.application.archive_day_time = self.close_time + 180
63 |
64 | today = datetime.datetime.today()
65 | ret = self.application.scheduled_archive_day(today)
66 | self.assertFalse(ret)
67 |
68 | @patch.object(time, 'time')
69 | def test_archive_minute_09_29(self, mock_time):
70 | mock_time.return_value = self.open_time - 1 # before open
71 |
72 | today = datetime.datetime.today()
73 | ret = self.application.scheduled_archive_minute(today)
74 | self.assertFalse(ret)
75 |
76 | @patch.object(time, 'time')
77 | def test_archive_minute_09_30(self, mock_time):
78 | mock_time.return_value = self.open_time
79 |
80 | today = datetime.datetime.today()
81 | ret = self.application.scheduled_archive_minute(today)
82 | self.assertTrue(ret)
83 |
84 | @patch.object(time, 'time')
85 | def test_archive_minute_14_30(self, mock_time):
86 | mock_time.return_value = self.close_time - 1800 # in session
87 |
88 | today = datetime.datetime.today()
89 | ret = self.application.scheduled_archive_minute(today)
90 | self.assertTrue(ret)
91 |
92 | @patch.object(time, 'time')
93 | def test_archive_minute_14_30_05_if_not_archived(self, mock_time):
94 | mock_time.return_value = self.close_time - 1795 # in session
95 |
96 | self.application.archive_minute_time = self.close_time - 1860
97 |
98 | today = datetime.datetime.today()
99 | ret = self.application.scheduled_archive_minute(today)
100 | self.assertTrue(ret)
101 |
102 | @patch.object(time, 'time')
103 | def test_archive_minute_14_30_05_if_archived(self, mock_time):
104 | mock_time.return_value = self.close_time - 1795 # in session
105 |
106 | self.application.archive_minute_time = self.close_time - 1800
107 |
108 | today = datetime.datetime.today()
109 | ret = self.application.scheduled_archive_minute(today)
110 | self.assertFalse(ret)
111 |
112 | @patch.object(time, 'time')
113 | def test_archive_minute_15_00(self, mock_time):
114 | mock_time.return_value = self.close_time
115 |
116 | today = datetime.datetime.today()
117 | ret = self.application.scheduled_archive_minute(today)
118 | self.assertTrue(ret)
119 |
120 | @patch.object(time, 'time')
121 | def test_archive_minute_15_03(self, mock_time):
122 | mock_time.return_value = self.close_time + 180
123 |
124 | today = datetime.datetime.today()
125 | ret = self.application.scheduled_archive_minute(today)
126 | self.assertTrue(ret)
127 |
128 | @patch.object(time, 'time')
129 | def test_archive_minute_15_05_01(self, mock_time):
130 | mock_time.return_value = self.close_time + 300 + 1 # closed
131 |
132 | today = datetime.datetime.today()
133 | ret = self.application.scheduled_archive_minute(today)
134 | self.assertFalse(ret)
135 |
136 | @patch.object(time, 'time')
137 | def test_crontab_08_00_00(self, mock_time):
138 | mock_time.return_value = self.open_time - 3600 - 1800
139 |
140 | today = datetime.datetime.fromtimestamp(time.time())
141 | ret = self.application.scheduled_crontab_daily(today)
142 | self.assertTrue(ret)
143 |
144 | @patch.object(time, 'time')
145 | def test_crontab_08_00_01_if_not_running(self, mock_time):
146 | mock_time.return_value = self.open_time - 3600 - 1799
147 |
148 | self.application.crontab_time = self.open_time - 86400 - 7200
149 | today = datetime.datetime.fromtimestamp(time.time())
150 | ret = self.application.scheduled_crontab_daily(today)
151 | self.assertTrue(ret)
152 |
153 | @patch.object(time, 'time')
154 | def test_crontab_09_30(self, mock_time):
155 | mock_time.return_value = self.open_time
156 |
157 | today = datetime.datetime.fromtimestamp(time.time())
158 | ret = self.application.scheduled_crontab_daily(today)
159 | self.assertFalse(ret)
160 |
161 | def test_archive_day(self):
162 | r = {
163 | 'amount': 84596203520.0,
164 | 'close': 2856.9899999999998,
165 | 'high': 2880.5599999999999,
166 | 'low': 2851.9499999999998,
167 | 'name': u'\u4e0a\u8bc1\u6307\u6570',
168 | 'open': 2868.73,
169 | 'preclose': 2875.8600000000001,
170 | 'price': 2856.9899999999998,
171 | 'symbol': 'SH000001',
172 | 'volume': 75147848.0
173 | }
174 |
175 | day = datetime.datetime.today()
176 | ts = time.mktime((day.year, day.month, day.day,
177 | 15, 0, 0, 0, 0, 0))
178 | day_ts = time.mktime((day.year, day.month, day.day,
179 | 0, 0, 0, 0, 0, 0))
180 | r['timestamp'] = ts
181 | r['time'] = str(datetime.datetime.fromtimestamp(ts))
182 |
183 | data = {'SH000001': r}
184 |
185 | import zlib
186 | import marshal
187 | data = zlib.compress(marshal.dumps(data))
188 |
189 | request = Request(None, 'put_reports', data)
190 | self.application(request)
191 |
192 | request = Request(None, 'archive_day')
193 | self.application(request)
194 |
195 | y = self.application.dbm.daystore.get('SH000001', 1)
196 | self.assertEqual(y[0]['time'], day_ts)
197 | self.assertTrue((y[0]['open'] - 2868.73) < 0.1 ** 6)
198 |
199 | @patch.object(ImiguHandler, 'get_snapshot_index')
200 | def test_fix_report_when_archive(self, mock_index):
201 | # set to after hours: 15:30 implicates error data
202 | # some datafeed still sending data even market was closed.
203 | day = datetime.datetime.today()
204 | ts = time.mktime((day.year, day.month, day.day,
205 | 15, 30, 0, 0, 0, 0))
206 | mock_index.return_value = (ts, 360)
207 |
208 | r = {
209 | 'amount': 84596203520.0,
210 | 'close': 2856.9899999999998,
211 | 'high': 2880.5599999999999,
212 | 'low': 2851.9499999999998,
213 | 'name': u'\u4e0a\u8bc1\u6307\u6570',
214 | 'open': 2868.73,
215 | 'preclose': 2875.8600000000001,
216 | 'price': 2856.9899999999998,
217 | 'symbol': 'SH000001',
218 | 'time': '2010-12-08 14:02:57',
219 | 'timestamp': 1291788177,
220 | 'volume': 75147848.0
221 | }
222 |
223 |
224 | r['timestamp'] = ts
225 | r['time'] = str(datetime.datetime.fromtimestamp(ts))
226 |
227 | data = {'SH000001': r}
228 |
229 | import zlib
230 | import marshal
231 | data = zlib.compress(marshal.dumps(data))
232 |
233 | request = Request(None, 'put_reports', data)
234 | self.application(request)
235 |
236 | close_time = time.mktime((day.year, day.month, day.day,
237 | 15, 0, 0, 0, 0, 0))
238 |
239 | request = Request(None, 'archive_minute', data)
240 | self.application(request)
241 |
242 | r = self.application.dbm.get_report('SH000001')
243 | self.assertEqual(r['timestamp'], close_time)
244 | self.assertEqual(r['open'], 2868.73)
245 |
246 | @patch.object(ImiguHandler, 'get_snapshot_index')
247 | def test_archive_minute_at_open_time(self, mock_index):
248 | # set data time to pre-market(centralized competitive pricing)
249 | day = datetime.datetime.today()
250 | t1 = time.mktime((day.year, day.month, day.day,
251 | 9, 26, 0, 0, 0, 0))
252 | open_time = time.mktime((day.year, day.month, day.day,
253 | 9, 30, 0, 0, 0, 0))
254 | mock_index.return_value = (open_time, 0)
255 |
256 | r = {
257 | 'amount': 10000.0,
258 | 'close': 0.0,
259 | 'high': 3000.0,
260 | 'low': 3000.0,
261 | 'name': u'\u4e0a\u8bc1\u6307\u6570',
262 | 'open': 3000.0,
263 | 'preclose': 2875.0,
264 | 'price': 3000.0,
265 | 'symbol': 'SH000001',
266 | 'volume': 900000.0
267 | }
268 |
269 | r['timestamp'] = t1
270 | r['time'] = str(datetime.datetime.fromtimestamp(t1))
271 |
272 | data = {'SH000001': r}
273 |
274 | import zlib
275 | import marshal
276 | data = zlib.compress(marshal.dumps(data))
277 |
278 | request = Request(None, 'put_reports', data)
279 | self.application(request)
280 |
281 | self.assertEqual(self.application.dbm.mtime, t1)
282 |
283 | request = Request(None, 'archive_minute')
284 | self.application(request)
285 |
286 | y = self.application.dbm.minutestore.get('SH000001')
287 | self.assertEqual(y[0]['time'], open_time)
288 | self.assertEqual(y[0]['price'], 3000.0)
289 |
290 | @patch.object(ImiguHandler, 'get_snapshot_index')
291 | def test_archive_minute_raise_at_wrong_index(self, mock_index):
292 | # set data time to pre-market(centralized competitive pricing)
293 | day = datetime.datetime.today()
294 | t1 = time.mktime((day.year, day.month, day.day,
295 | 9, 26, 0, 0, 0, 0))
296 | mock_index.return_value = (t1, -4)
297 |
298 | request = Request(None, 'archive_minute')
299 | self.assertRaises(SnapshotIndexError,
300 | self.application,
301 | request)
302 |
303 | @patch.object(time, 'time')
304 | def test_get_snapshot_index(self, mock_time):
305 | mock_time.return_value = 1309829400
306 | report_time = 1309829160
307 |
308 | mintime, index = ImiguHandler.get_snapshot_index(1309829400, report_time)
309 |
310 | self.assertEqual(mintime, 1309829400)
311 | self.assertEqual(index, 0)
312 |
313 |
314 | if __name__ == '__main__':
315 | unittest.main()
316 |
--------------------------------------------------------------------------------
/datafeed/providers/tests/google_data.csv:
--------------------------------------------------------------------------------
1 | 日期、開市價、最高價、最低價、收市價、成交量
2 | 2011-04-28,538.06,539.25,534.08,537.97,2037378
3 | 2011-04-27,538.00,538.11,534.35,537.76,2297840
4 | 2011-04-26,526.52,537.44,525.21,532.82,3530591
5 | 2011-04-25,525.25,527.00,522.01,525.05,1629933
6 | 2011-04-21,525.10,525.10,525.10,525.10,0
7 | 2011-04-20,525.90,526.82,521.39,525.73,3059160
8 | 2011-04-19,529.95,530.88,520.90,521.53,2685395
9 | 2011-04-18,526.42,527.66,519.00,526.84,5041089
10 | 2011-04-15,545.29,545.75,530.06,530.70,14050013
11 | 2011-04-14,575.19,579.45,572.10,578.51,5456289
12 | 2011-04-13,575.51,577.60,571.75,576.28,2071646
13 | 2011-04-12,575.00,576.92,568.05,570.61,2085766
14 | 2011-04-11,576.20,578.10,573.00,577.37,1858382
15 | 2011-04-08,584.89,584.89,578.06,578.16,1902581
16 | 2011-04-07,575.73,580.64,574.19,580.00,2531975
17 | 2011-04-06,572.18,575.16,568.00,574.18,2668385
18 | 2011-04-05,581.08,581.49,565.68,569.09,6048187
19 | 2011-04-04,593.00,594.74,583.10,587.68,2054817
20 | 2011-04-01,588.76,595.19,588.76,591.80,2613266
21 | 2011-03-31,583.00,588.16,581.74,586.76,2031385
22 | 2011-03-30,584.38,585.50,580.58,581.84,1422475
23 | 2011-03-29,576.00,581.89,573.01,581.73,1605138
24 | 2011-03-28,582.07,584.99,574.71,575.36,2218910
25 | 2011-03-25,586.88,586.91,579.24,579.74,2859497
26 | 2011-03-24,585.43,588.39,578.80,586.89,2097978
27 | 2011-03-23,575.19,582.45,572.00,582.16,1816164
28 | 2011-03-22,577.27,579.23,572.51,577.32,1887185
29 | 2011-03-21,570.22,579.80,569.02,576.50,3022174
30 | 2011-03-18,564.64,567.99,559.74,561.06,3302850
31 | 2011-03-17,564.48,569.00,560.54,561.36,2899727
32 | 2011-03-16,568.01,569.79,551.28,557.10,3800712
33 | 2011-03-15,557.50,571.00,555.50,569.56,4006021
34 | 2011-03-14,572.80,578.29,568.02,569.99,2813969
35 | 2011-03-11,578.22,580.00,573.33,576.71,3029401
36 | 2011-03-10,585.44,586.62,579.45,580.30,3127211
37 | 2011-03-09,591.00,594.51,585.75,591.77,2150991
38 | 2011-03-08,592.93,597.98,590.20,592.31,2287771
39 | 2011-03-07,600.55,603.69,587.00,591.66,3461738
40 | 2011-03-04,608.33,608.98,600.20,600.62,3011542
41 | 2011-03-03,606.38,611.49,605.00,609.56,1949335
42 | 2011-03-02,599.80,606.00,595.19,600.79,2026933
43 | 2011-03-01,617.78,619.22,599.30,600.76,3323575
44 | 2011-02-28,610.00,616.49,608.01,613.40,2284336
45 | 2011-02-25,611.86,614.72,609.50,610.04,1935852
46 | 2011-02-24,611.39,613.09,601.35,608.82,2712959
47 | 2011-02-23,610.33,614.70,604.00,611.32,2891878
48 | 2011-02-22,620.03,624.93,607.77,610.21,3642208
49 | 2011-02-18,630.08,630.08,630.08,630.08,0
50 | 2011-02-17,621.25,627.25,620.28,625.26,1479175
51 | 2011-02-16,625.63,626.50,622.10,624.22,1686963
52 | 2011-02-15,627.32,630.09,623.10,624.15,2092614
53 | 2011-02-14,623.34,629.64,620.04,628.15,2131868
54 | 2011-02-11,613.79,625.00,613.00,624.50,2593390
55 | 2011-02-10,613.90,617.50,611.56,616.44,2336117
56 | 2011-02-09,616.87,619.45,612.34,616.50,1843394
57 | 2011-02-08,614.93,619.63,614.51,618.38,1696452
58 | 2011-02-07,610.16,618.39,609.21,614.30,1802017
59 | 2011-02-04,610.15,611.44,606.61,610.98,1552505
60 | 2011-02-03,609.48,611.45,606.13,610.15,1497474
61 | 2011-02-02,611.00,614.34,607.53,612.00,1760755
62 | 2011-02-01,604.49,613.36,603.11,611.04,2745862
63 | 2011-01-31,603.60,604.47,595.55,600.36,2810069
64 | 2011-01-28,619.07,620.36,599.76,600.99,4233962
65 | 2011-01-27,617.89,619.70,613.25,616.79,2019427
66 | 2011-01-26,620.33,622.49,615.28,616.50,2041746
67 | 2011-01-25,608.20,620.69,606.52,619.91,3647644
68 | 2011-01-24,607.57,612.49,601.23,611.08,4604997
69 | 2011-01-21,639.58,641.73,611.36,611.83,8904357
70 | 2011-01-20,632.21,634.08,623.29,626.77,5485733
71 | 2011-01-19,642.12,642.96,629.66,631.75,3412573
72 | 2011-01-18,626.06,641.99,625.27,639.63,3620063
73 | 2011-01-14,624.18,624.18,624.18,624.18,0
74 | 2011-01-13,616.97,619.67,614.16,616.69,1333947
75 | 2011-01-12,619.35,619.35,614.77,616.87,1632698
76 | 2011-01-11,617.71,618.80,614.50,616.01,1440991
77 | 2011-01-10,614.80,615.39,608.56,614.21,1581069
78 | 2011-01-07,615.91,618.25,610.13,616.44,2101270
79 | 2011-01-06,610.68,618.43,610.05,613.50,2058176
80 | 2011-01-05,600.07,610.33,600.05,609.07,2532225
81 | 2011-01-04,605.62,606.18,600.12,602.12,1825499
82 | 2011-01-03,596.48,605.59,596.48,604.35,2366286
83 | 2010-12-31,596.74,598.42,592.03,593.97,1541459
84 | 2010-12-30,598.00,601.33,597.39,598.86,989479
85 | 2010-12-29,602.00,602.41,598.92,601.00,1019823
86 | 2010-12-28,602.05,603.87,598.01,598.92,1064746
87 | 2010-12-27,602.74,603.78,599.50,602.38,1208128
88 | 2010-12-23,604.23,604.23,604.23,604.23,0
89 | 2010-12-22,604.00,607.00,603.28,605.49,1207929
90 | 2010-12-21,598.57,604.72,597.62,603.07,1881688
91 | 2010-12-20,594.65,597.88,588.66,595.06,1973768
92 | 2010-12-17,591.00,592.56,587.67,590.80,3089328
93 | 2010-12-16,592.85,593.77,588.07,591.71,1596820
94 | 2010-12-15,594.20,596.45,589.15,590.30,2168131
95 | 2010-12-14,597.09,598.29,592.48,594.91,1643253
96 | 2010-12-13,597.12,603.00,594.09,594.62,2403442
97 | 2010-12-10,593.14,593.99,590.29,592.21,1705121
98 | 2010-12-09,593.88,595.58,589.00,591.50,1868972
99 | 2010-12-08,591.97,592.52,583.69,590.54,1757126
100 | 2010-12-07,591.27,593.00,586.00,587.14,3047879
101 | 2010-12-06,580.57,582.00,576.61,578.36,2095525
102 | 2010-12-03,569.45,576.48,568.00,573.00,2633126
103 | 2010-12-02,568.66,573.33,565.35,571.82,2550572
104 | 2010-12-01,563.00,571.57,562.40,564.35,3757609
105 | 2010-11-30,574.32,574.32,553.31,555.71,7117385
106 | 2010-11-29,589.17,589.80,579.95,582.11,2859953
107 | 2010-11-26,590.46,592.98,587.00,590.00,1311038
108 | 2010-11-24,594.97,594.97,594.97,594.97,0
109 | 2010-11-23,587.01,589.01,578.20,583.01,2166947
110 | 2010-11-22,587.47,593.44,582.75,591.22,2186511
111 | 2010-11-19,597.00,597.89,590.34,590.83,2298978
112 | 2010-11-18,589.00,599.98,588.56,596.56,2590876
113 | 2010-11-17,585.00,589.50,581.37,583.55,2442412
114 | 2010-11-16,592.76,597.89,583.45,583.72,3308665
115 | 2010-11-15,603.08,604.00,594.05,595.47,3480120
116 | 2010-11-12,613.99,616.90,601.21,603.29,3395209
117 | 2010-11-11,619.70,619.85,614.21,617.19,2262019
118 | 2010-11-10,622.08,623.00,617.51,622.88,2499828
119 | 2010-11-09,630.00,630.85,620.51,624.82,2240293
120 | 2010-11-08,625.12,629.49,623.13,626.77,2102262
121 | 2010-11-05,623.18,625.49,621.11,625.08,1766546
122 | 2010-11-04,624.64,629.92,622.10,624.27,3571173
123 | 2010-11-03,617.50,621.83,613.50,620.18,3382183
124 | 2010-11-02,618.67,620.00,614.58,615.60,1999774
125 | 2010-11-01,615.73,620.66,611.21,615.00,3088417
126 | 2010-10-29,617.07,619.00,612.99,613.70,2281279
127 | 2010-10-28,620.05,621.00,613.30,618.58,2187396
128 | 2010-10-27,615.77,620.00,612.33,616.47,2242414
129 | 2010-10-26,613.10,621.23,611.03,618.60,2513633
130 | 2010-10-25,615.59,624.74,614.97,616.50,3158700
131 | 2010-10-22,611.92,614.82,610.05,612.53,2256598
132 | 2010-10-21,611.51,616.00,606.00,611.99,2923627
133 | 2010-10-20,608.14,617.38,607.50,607.98,3526976
134 | 2010-10-19,608.85,614.82,602.86,607.83,4591178
135 | 2010-10-18,600.55,619.69,600.55,617.71,7109313
136 | 2010-10-15,599.27,601.64,591.60,601.45,14824722
137 | 2010-10-14,544.18,545.25,537.11,540.93,6634018
138 | 2010-10-13,547.00,547.49,542.33,543.30,3060836
139 | 2010-10-12,540.12,545.99,537.79,541.39,3757178
140 | 2010-10-11,538.48,544.60,537.17,538.84,2631310
141 | 2010-10-08,532.77,537.60,527.62,536.35,2863473
142 | 2010-10-07,536.21,537.20,529.14,530.01,2398910
143 | 2010-10-06,539.26,539.95,529.94,534.35,2766690
144 | 2010-10-05,528.38,540.00,526.55,538.23,3507302
145 | 2010-10-04,524.95,528.25,518.85,522.35,1940494
146 | 2010-10-01,530.00,530.62,523.00,525.62,2226112
147 | 2010-09-30,529.16,531.87,518.92,525.79,3244626
148 | 2010-09-29,527.85,532.94,524.71,527.69,2173878
149 | 2010-09-28,533.48,533.59,518.45,527.17,3654832
150 | 2010-09-27,528.85,536.85,528.85,530.41,3107622
151 | 2010-09-24,521.74,527.83,518.26,527.29,3363247
152 | 2010-09-23,514.61,519.69,511.30,513.48,2317644
153 | 2010-09-22,512.86,517.78,511.68,516.00,2537779
154 | 2010-09-21,509.68,519.98,508.91,513.46,4467227
155 | 2010-09-20,492.50,510.41,492.06,508.28,4413416
156 | 2010-09-17,483.75,491.20,481.18,490.15,5649448
157 | 2010-09-16,479.95,482.45,479.41,481.06,1972569
158 | 2010-09-15,479.95,481.89,478.50,480.64,2403545
159 | 2010-09-14,482.01,484.75,480.08,480.43,2218739
160 | 2010-09-13,480.90,484.35,479.53,482.27,2245650
161 | 2010-09-10,479.02,479.79,475.08,476.14,1974340
162 | 2010-09-09,477.83,480.40,470.58,476.18,2435299
163 | 2010-09-08,465.19,472.50,464.51,470.58,2409028
164 | 2010-09-07,464.50,467.59,463.02,464.40,1711335
165 | 2010-09-06,470.30,470.30,470.30,470.30,0
166 | 2010-09-03,470.30,470.30,470.30,470.30,0
167 | 2010-09-02,462.84,464.43,460.31,463.18,1685384
168 | 2010-09-01,454.98,464.94,452.50,460.34,3237824
169 | 2010-08-31,450.11,454.87,448.00,450.02,1946998
170 | 2010-08-30,459.15,459.76,452.42,452.69,1236219
171 | 2010-08-27,452.56,459.99,448.31,458.83,2313147
172 | 2010-08-26,456.06,457.26,450.44,450.98,1781767
173 | 2010-08-25,450.00,457.81,450.00,454.62,2592104
174 | 2010-08-24,457.70,457.70,450.92,451.39,2763814
175 | 2010-08-23,461.50,468.25,457.73,464.07,2989159
176 | 2010-08-20,467.97,471.59,462.18,462.02,3923319
177 | 2010-08-19,481.01,482.51,467.25,467.97,3925513
178 | 2010-08-18,490.44,490.44,481.55,482.15,2686374
179 | 2010-08-17,488.53,494.70,486.10,490.52,1890646
180 | 2010-08-16,483.68,489.87,483.68,485.59,1306685
181 | 2010-08-13,489.00,491.19,486.01,486.35,1940047
182 | 2010-08-12,483.94,494.75,483.94,492.01,2205032
183 | 2010-08-11,497.73,498.00,491.50,491.74,2226003
184 | 2010-08-10,502.35,506.00,498.57,503.71,2075569
185 | 2010-08-09,502.25,505.50,501.36,505.35,1873209
186 | 2010-08-06,505.40,505.74,496.05,500.22,3321695
187 | 2010-08-05,505.89,508.60,503.56,508.10,2426460
188 | 2010-08-04,492.18,507.00,491.05,506.32,3814120
189 | 2010-08-03,490.50,492.46,486.76,489.83,1803185
190 | 2010-08-02,488.99,493.28,486.94,490.41,1867475
191 | 2010-07-30,479.65,487.36,479.14,484.85,2144492
192 | 2010-07-29,485.95,488.88,479.33,484.99,2675420
193 | 2010-07-28,494.94,495.25,482.67,484.35,2502447
194 | 2010-07-27,490.58,497.50,490.17,492.63,2451984
195 | 2010-07-26,489.09,490.75,484.88,488.97,1996691
196 | 2010-07-23,480.77,490.59,480.01,490.06,2267180
197 | 2010-07-22,483.23,488.98,482.48,484.81,2148487
198 | 2010-07-21,484.00,485.70,475.43,477.50,3285364
199 | 2010-07-20,461.03,482.99,460.60,481.59,4056832
200 | 2010-07-19,461.01,469.65,457.52,466.18,4549465
201 | 2010-07-16,469.12,470.56,459.52,459.60,7824721
202 | 2010-07-15,491.73,494.70,482.68,494.02,4858782
203 | 2010-07-14,489.88,493.83,486.46,491.34,3118534
204 | 2010-07-13,482.25,492.99,480.28,489.20,3976281
205 | 2010-07-12,472.37,479.44,471.08,475.83,3334411
206 | 2010-07-09,471.96,473.26,462.78,467.49,4332702
207 | 2010-07-08,453.55,457.33,449.66,456.56,2670490
208 | 2010-07-07,438.31,451.29,435.38,450.20,3131936
209 | 2010-07-06,444.00,447.67,433.63,436.07,2561426
210 | 2010-07-05,436.55,436.55,436.55,436.55,0
211 | 2010-07-02,436.55,436.55,436.55,436.55,0
212 | 2010-07-01,445.29,448.40,433.63,439.49,3513877
213 | 2010-06-30,454.96,457.83,444.72,444.95,3603698
214 | 2010-06-29,463.44,464.55,451.12,454.26,3502108
215 | 2010-06-28,472.59,477.55,469.01,472.08,1762594
216 | 2010-06-25,477.06,477.65,470.56,472.68,2245289
217 | 2010-06-24,479.66,482.75,473.26,475.10,1893761
218 | 2010-06-23,486.89,486.89,478.16,482.05,2029650
219 | 2010-06-22,489.90,496.60,485.73,486.25,2219930
220 | 2010-06-21,499.90,500.97,484.89,488.56,2989344
221 | 2010-06-18,502.51,503.47,498.13,500.03,2878739
222 | 2010-06-17,503.45,505.87,496.69,500.08,1977894
223 | 2010-06-16,496.17,504.00,496.11,501.27,2291967
224 | 2010-06-15,483.08,500.40,482.18,497.99,4261561
225 | 2010-06-14,494.48,494.50,483.19,483.19,2040942
226 | 2010-06-11,482.50,488.71,481.62,488.50,1784318
227 | 2010-06-10,480.37,488.50,475.84,487.01,2585930
228 | 2010-06-09,487.22,488.88,472.00,474.02,2732731
229 | 2010-06-08,487.85,488.84,477.54,484.78,2685052
230 | 2010-06-07,499.06,500.91,483.15,485.52,3637036
231 | 2010-06-04,499.72,509.25,496.70,498.72,3923904
232 | 2010-06-03,495.11,508.00,494.70,505.60,3651897
233 | 2010-06-02,486.68,493.87,481.46,493.37,2540721
234 | 2010-06-01,480.43,491.06,480.12,482.37,2666997
235 | 2010-05-31,485.63,485.63,485.63,485.63,0
236 | 2010-05-28,485.63,485.63,485.63,485.63,0
237 | 2010-05-27,484.86,492.31,481.05,490.46,2810047
238 | 2010-05-26,482.07,489.76,475.00,475.47,3484042
239 | 2010-05-25,468.16,477.45,464.01,477.07,3021908
240 | 2010-05-24,480.73,489.79,476.80,477.16,4350322
241 | 2010-05-21,469.06,485.00,464.40,472.05,9693537
242 | 2010-05-20,485.07,485.58,473.80,475.01,4917963
243 | 2010-05-19,496.26,499.44,487.74,494.43,3446369
244 | 2010-05-18,510.00,510.97,497.07,498.37,2830286
245 | 2010-05-17,506.78,508.36,498.35,507.97,2796534
246 | 2010-05-14,509.77,510.99,496.25,507.53,4123922
247 | 2010-05-13,516.50,522.00,510.37,510.88,3328146
248 | 2010-05-12,512.04,512.04,502.00,505.39,3852157
249 | 2010-05-11,515.67,519.88,508.22,509.05,3324317
250 | 2010-05-10,513.97,522.82,512.60,521.65,4127928
251 | 2010-05-07,499.97,505.32,481.33,493.14,5093752
252 | 2010-05-06,508.75,517.52,460.00,498.67,5000043
253 | 2010-05-05,500.98,515.72,500.47,509.76,4582120
254 | 2010-05-04,526.52,526.74,504.21,506.37,6078530
255 | 2010-05-03,526.50,532.92,525.08,530.60,1860317
256 | 2010-04-30,531.13,537.68,525.44,525.70,2440207
257 | 2010-04-29,533.37,536.50,526.67,532.00,3058815
258 | 2010-04-28,532.10,534.83,521.03,529.19,3409197
259 | 2010-04-27,528.94,538.33,527.24,529.06,3844727
260 | 2010-04-26,544.97,544.99,529.21,531.64,4373614
261 | 2010-04-23,547.25,549.32,542.27,544.99,2089882
262 | 2010-04-22,552.00,552.50,543.35,547.06,3283651
263 | 2010-04-21,556.46,560.25,552.16,554.30,2392748
264 | 2010-04-20,554.17,559.66,551.06,555.04,2979333
265 | 2010-04-19,548.75,553.99,545.00,550.10,3895772
266 | 2010-04-16,563.00,568.81,549.63,550.14,12239411
267 | 2010-04-15,592.17,597.84,588.29,595.30,6761708
268 | 2010-04-14,590.06,592.34,584.01,589.00,3403800
269 | 2010-04-13,572.53,588.88,571.13,586.77,3912262
270 | 2010-04-12,567.35,574.00,566.22,572.73,2353551
271 | 2010-04-09,568.00,568.77,564.00,566.22,2056835
272 | 2010-04-08,563.32,569.85,560.05,567.49,1951735
273 | 2010-04-07,567.30,568.75,561.86,563.54,2581016
274 | 2010-04-06,569.46,570.89,565.40,568.22,2060014
275 | 2010-04-05,570.90,574.88,569.00,571.01,1902063
276 | 2010-04-02,568.80,568.80,568.80,568.80,0
277 | 2010-04-01,568.80,568.80,568.80,568.80,0
278 | 2010-03-31,565.05,569.74,562.81,567.12,3030869
279 | 2010-03-30,562.83,567.63,560.28,566.71,1987571
280 | 2010-03-29,563.00,564.72,560.57,562.45,3105180
281 | 2010-03-26,565.27,567.39,560.02,562.69,2696461
282 | 2010-03-25,559.02,572.00,558.66,562.88,3931664
283 | 2010-03-24,545.51,559.85,539.70,557.33,6569358
284 | 2010-03-23,557.04,558.31,542.00,549.00,5501283
285 | 2010-03-22,556.11,566.85,554.28,557.50,4005472
286 | 2010-03-19,566.23,568.00,557.28,560.00,4794128
287 | 2010-03-18,564.72,568.44,562.96,566.40,1777117
288 | 2010-03-17,568.30,571.45,564.25,565.56,3322497
289 | 2010-03-16,561.83,568.42,560.76,565.20,3432853
290 | 2010-03-15,566.68,569.45,556.00,563.18,4656570
291 | 2010-03-12,588.14,588.28,579.16,579.54,2756215
292 | 2010-03-11,574.26,586.21,574.20,581.14,4235226
293 | 2010-03-10,563.76,578.50,562.21,576.45,5659067
294 | 2010-03-09,559.85,564.66,556.50,560.19,3177588
295 | 2010-03-08,564.78,565.18,561.01,562.48,2386506
296 | 2010-03-05,561.35,567.67,559.90,564.21,3913364
297 | 2010-03-04,546.50,556.13,546.20,554.59,3184077
298 | 2010-03-03,542.36,548.12,539.25,545.32,3090124
299 | 2010-03-02,535.48,545.66,535.01,541.06,4357461
300 | 2010-03-01,529.20,533.29,527.74,532.69,2238334
301 | 2010-02-26,527.42,531.75,523.48,526.80,2049372
302 | 2010-02-25,527.12,528.49,520.00,526.43,3309373
303 | 2010-02-24,534.39,538.44,530.51,531.47,2326901
304 | 2010-02-23,543.00,543.63,532.29,535.07,2874790
305 | 2010-02-22,547.35,547.50,541.00,542.80,2144741
306 |
--------------------------------------------------------------------------------
/datafeed/tests/test_datastore.py:
--------------------------------------------------------------------------------
1 | from __future__ import with_statement
2 |
3 | import h5py
4 | import os
5 | import re
6 | import time
7 | import unittest
8 |
9 | import numpy as np
10 |
11 | from datetime import datetime
12 |
13 | from mock import Mock, patch
14 |
15 | from datafeed.exchange import SH
16 | from datafeed.datastore import *
17 | from datafeed.tests import helper
18 |
19 |
20 | class ManagerTest(unittest.TestCase):
21 |
22 | def setUp(self):
23 | self.manager = Manager(helper.datadir, SH())
24 |
25 | def test_store_filename(self):
26 | ret = self.manager._store
27 | self.assertEqual(ret.filename, '%s/data.h5' % helper.datadir)
28 | self.assertTrue(isinstance(ret, h5py.File))
29 |
30 | def test_daystore(self):
31 | ret = self.manager.daystore
32 | self.assertTrue(isinstance(ret, Day))
33 |
34 | def test_not_inited_minutestore(self):
35 | ret = self.manager._minutestore
36 | self.assertEqual(ret, None)
37 |
38 | def test_init_manager_with_minute_store(self):
39 | self.manager.set_mtime(1291341180)
40 | self.assertTrue(isinstance(self.manager.minutestore, Minute))
41 | self.assertTrue(isinstance(self.manager.minutestore.handle, MinuteSnapshotCache))
42 |
43 | def test_minute_filename_market_not_open(self):
44 | # not open yet
45 | ts = 1291312380
46 | self.manager.set_mtime(ts)
47 | date = datetime.fromtimestamp(ts).date()
48 | self.assertEqual(date, self.manager.minutestore.date)
49 | self.assertEqual('/minsnap/20101203', self.manager.minutestore.pathname)
50 |
51 | def test_minute_filename_opened(self):
52 | # in session
53 | ts = 1291341180
54 | date = datetime.fromtimestamp(ts).date()
55 | self.manager.set_mtime(ts)
56 | self.assertEqual(date, self.manager.minutestore.date)
57 | self.assertEqual('/minsnap/20101203', self.manager.minutestore.pathname)
58 |
59 | def test_rotate_minute_store(self):
60 | dbm = self.manager
61 | dbm.set_mtime(1291341180)
62 | self.assertTrue(isinstance(dbm.minutestore.handle, MinuteSnapshotCache))
63 |
64 | dbm.set_mtime(1291341180 + 86400)
65 | dbm.rotate_minute_store()
66 | self.assertEqual('/minsnap/20101204', dbm.minutestore.pathname)
67 |
68 | def test_get_minutestore(self):
69 | store = self.manager.get_minutestore_at(1291341180)
70 | self.assertTrue(isinstance(store, Minute))
71 | self.assertEqual('/minsnap/20101203', store.pathname)
72 |
73 | def test_update_day_should_call_to_correctly_store(self):
74 | p1 = {'time': int(time.time())}
75 | data = [p1]
76 | store = Mock()
77 |
78 | self.manager.get_minutestore_at = Mock(return_value=store)
79 | self.manager.update_minute("SH000001", data)
80 | self.manager.get_minutestore_at.assert_called_with(p1['time'])
81 |
82 | def test_get_minutestore_force_cache(self):
83 | store = self.manager.get_minutestore_at(1291341180, memory=True)
84 | self.assertTrue(isinstance(store.handle, MinuteSnapshotCache))
85 |
86 | def test_get_minutestore_force_no_cache(self):
87 | ts = int(time.time())
88 | store = self.manager.get_minutestore_at(ts, memory=False)
89 | self.assertTrue(isinstance(store.handle, h5py.Group))
90 |
91 | def test_get_minutestore_default_cache(self):
92 | ts = int(time.time())
93 | store = self.manager.get_minutestore_at(ts)
94 | self.assertTrue(isinstance(store.handle, MinuteSnapshotCache))
95 |
96 | def test_5minstore(self):
97 | ret = self.manager.fiveminstore
98 | self.assertTrue(isinstance(ret, FiveMinute))
99 |
100 |
101 | class DictStoreTest(unittest.TestCase):
102 |
103 | def test_init_store(self):
104 | filename = '%s/dstore_init.dump' % helper.datadir
105 | data = {'r1': 'v1'}
106 | ds = DictStore(filename, data)
107 | r1 = ds['r1']
108 | self.assertTrue(r1, 'v1')
109 |
110 | def test_reopen_file(self):
111 | filename = '%s/dstore_reopen.dump' % helper.datadir
112 |
113 | data = {'r1': 'v1'}
114 | ds = DictStore(filename, data)
115 | ds.close()
116 |
117 | ds = DictStore.open(filename)
118 | r1 = ds['r1']
119 | self.assertTrue(r1, 'v1')
120 |
121 |
122 | class DictStoreNamespaceTest(unittest.TestCase):
123 |
124 | def setUp(self):
125 | class Impl(DictStoreNamespace):
126 | pass
127 | filename = '%s/dsn_impl.dump' % helper.datadir
128 | self.store = DictStore(filename, {})
129 | self.impl = Impl(self.store)
130 |
131 | def test_inited_impl(self):
132 | self.assertTrue(self.store.has_key('impl'))
133 | self.assertEqual(self.impl.keys(), [])
134 |
135 | def test_set_and_get_item(self):
136 | self.impl['k12'] = 'v21'
137 | self.assertEqual(self.impl['k12'], 'v21')
138 |
139 | def test_set_and_get_item2(self):
140 | self.impl['k12'] = 'v21'
141 | self.assertEqual(self.impl.get('k12'), 'v21')
142 |
143 |
144 | class ReportTest(unittest.TestCase):
145 |
146 | def test_init_store(self):
147 | filename = '%s/dstore.dump' % helper.datadir
148 | store = DictStore.open(filename)
149 | rstore = Report(store)
150 | sample = helper.sample()
151 |
152 | rstore.update(sample)
153 | key = 'SH000001'
154 | self.assertEqual(rstore[key], sample[key])
155 |
156 | store.close()
157 | self.assertRaises(AssertionError, rstore.set, key, sample)
158 | self.assertRaises(AssertionError, rstore.get, key)
159 |
160 | store = DictStore.open(filename)
161 | rstore = Report(store)
162 | self.assertEqual(rstore[key], sample[key])
163 |
164 |
165 | class DayTest(unittest.TestCase):
166 |
167 | def setUp(self):
168 | self.store = Day(h5py.File('%s/data.h5' % helper.datadir))
169 |
170 | def test_namespace(self):
171 | h = self.store.handle
172 | self.assertTrue(isinstance(h, h5py.Group))
173 | self.assertEqual(h.name, '/day')
174 |
175 | def test_get_from_not_exist_symbol(self):
176 | key = 'SH987654'
177 | self.assertRaises(KeyError, self.store.get, symbol=key, length=1)
178 |
179 |
180 | class MinuteTest(unittest.TestCase):
181 |
182 | def setUp(self):
183 | ts = int(time.mktime((2011, 1, 1, 1, 1, 0, 0, 0, 0)))
184 | date = datetime.fromtimestamp(ts).date()
185 | self.store = Minute(h5py.File('%s/data.h5' % helper.datadir),
186 | date,
187 | SH().market_minutes)
188 |
189 | def test_namespace(self):
190 | h = self.store.handle
191 | self.assertTrue(isinstance(h, h5py.Group))
192 | self.assertEqual(h.name, '/minsnap/20110101')
193 |
194 | def test_get_from_not_exist_symbol(self):
195 | key = 'SH987654'
196 | self.assertRaises(KeyError, self.store.get, symbol=key)
197 |
198 |
199 | class OneMinuteTest(unittest.TestCase):
200 |
201 | def setUp(self):
202 | self.store = OneMinute(h5py.File('%s/data.h5' % helper.datadir))
203 |
204 | def test_namespace(self):
205 | h = self.store.handle
206 | self.assertTrue(isinstance(h, h5py.Group))
207 | self.assertEqual(h.name, '/1min')
208 |
209 | def test_get_from_not_exist_symbol(self):
210 | key = 'SH987654'
211 | self.assertRaises(KeyError, self.store.get, symbol=key, date=datetime.today())
212 |
213 | def test_get_after_update(self):
214 | key = 'SH000001'
215 | date = datetime.fromtimestamp(1316588100)
216 | x = np.array([
217 | (1316588100, 3210.860107421875, 3215.239990234375, 3208.43994140625,
218 | 3212.919921875, 62756.0, 49122656.0),
219 | (1316588400, 3213.43994140625, 3214.47998046875, 3206.800048828125,
220 | 3206.840087890625, 81252.0, 55866096.0)
221 | ], dtype=FiveMinute.DTYPE)
222 | self.store.update(key, x)
223 |
224 | y = self.store.get(key, date)
225 | np.testing.assert_array_equal(y, x)
226 |
227 | def test_update_multi_days(self):
228 | key = 'SH000001'
229 | x = np.array([
230 | (1316501700, 3130.8701171875, 3137.739990234375, 3128.81005859375,
231 | 3132.580078125, 30530.0, 20179424.0),
232 | (1316502000, 3132.68994140625, 3142.75, 3129.8798828125,
233 | 3141.5400390625, 57703.0, 41456768.0),
234 | (1316588100, 3210.860107421875, 3215.239990234375, 3208.43994140625,
235 | 3212.919921875, 62756.0, 49122656.0),
236 | (1316588400, 3213.43994140625, 3214.47998046875, 3206.800048828125,
237 | 3206.840087890625, 81252.0, 55866096.0)
238 | ], dtype=FiveMinute.DTYPE)
239 | self.store.update(key, x)
240 |
241 | date = datetime.fromtimestamp(1316501700).date()
242 | y = self.store.get(key, date)
243 | np.testing.assert_array_equal(y, x[:2])
244 |
245 | date = datetime.fromtimestamp(1316588400).date()
246 | y = self.store.get(key, date)
247 | np.testing.assert_array_equal(y, x[2:])
248 |
249 | def test_update_partial_data(self):
250 | market_minutes = 60 * 24 # assume 1min data
251 | store = OneMinute(h5py.File('%s/data.h5' % helper.datadir),
252 | market_minutes)
253 | self.assertEqual(store.time_interval, 60)
254 | self.assertEqual(store.shape_x, 1440)
255 |
256 | key = '999'
257 | path = os.path.dirname(os.path.realpath(__file__))
258 | data = np.load(os.path.join(path, '001.npy'))
259 |
260 | store.update(key, data)
261 |
262 | date = datetime.fromtimestamp(1397621820).date()
263 | y = store.get(key, date)
264 | row1, row2 = y[737], y[1036]
265 | np.testing.assert_array_equal(row1, data[0])
266 | np.testing.assert_array_equal(row2, data[-1])
267 |
268 |
269 | class FiveMinuteTest(unittest.TestCase):
270 |
271 | def setUp(self):
272 | self.store = FiveMinute(h5py.File('%s/data.h5' % helper.datadir))
273 |
274 | def test_namespace(self):
275 | h = self.store.handle
276 | self.assertTrue(isinstance(h, h5py.Group))
277 | self.assertEqual(h.name, '/5min')
278 |
279 | def test_get_from_not_exist_symbol(self):
280 | key = 'SH987654'
281 | self.assertRaises(KeyError, self.store.get, symbol=key, date=datetime.today())
282 |
283 | def test_get_after_update(self):
284 | key = 'SH000001'
285 | date = datetime.fromtimestamp(1316588100)
286 | x = np.array([
287 | (1316588100, 3210.860107421875, 3215.239990234375, 3208.43994140625,
288 | 3212.919921875, 62756.0, 49122656.0),
289 | (1316588400, 3213.43994140625, 3214.47998046875, 3206.800048828125,
290 | 3206.840087890625, 81252.0, 55866096.0)
291 | ], dtype=FiveMinute.DTYPE)
292 | self.store.update(key, x)
293 |
294 | y = self.store.get(key, date)
295 | np.testing.assert_array_equal(y, x)
296 |
297 | def test_update_multi_days(self):
298 | key = 'SH000001'
299 | x = np.array([
300 | (1316501700, 3130.8701171875, 3137.739990234375, 3128.81005859375,
301 | 3132.580078125, 30530.0, 20179424.0),
302 | (1316502000, 3132.68994140625, 3142.75, 3129.8798828125,
303 | 3141.5400390625, 57703.0, 41456768.0),
304 | (1316588100, 3210.860107421875, 3215.239990234375, 3208.43994140625,
305 | 3212.919921875, 62756.0, 49122656.0),
306 | (1316588400, 3213.43994140625, 3214.47998046875, 3206.800048828125,
307 | 3206.840087890625, 81252.0, 55866096.0)
308 | ], dtype=FiveMinute.DTYPE)
309 | self.store.update(key, x)
310 |
311 | date = datetime.fromtimestamp(1316501700).date()
312 | y = self.store.get(key, date)
313 | np.testing.assert_array_equal(y, x[:2])
314 |
315 | date = datetime.fromtimestamp(1316588400).date()
316 | y = self.store.get(key, date)
317 | np.testing.assert_array_equal(y, x[2:])
318 |
319 | def test_update_multi_partial_days_data(self):
320 | market_minutes = 1440 # 5min data
321 | store = FiveMinute(h5py.File('%s/data.h5' % helper.datadir),
322 | market_minutes)
323 | self.assertEqual(store.time_interval, 300)
324 | self.assertEqual(store.shape_x, 288)
325 |
326 | key = '9991'
327 | path = os.path.dirname(os.path.realpath(__file__))
328 | data = np.load(os.path.join(path, '005.npy'))
329 |
330 | store.update(key, data)
331 |
332 | date = datetime.fromtimestamp(data[0]['time']).date()
333 | y1 = store.get(key, date)
334 | np.testing.assert_array_equal(y1[196], data[0])
335 |
336 | date = datetime.fromtimestamp(data[-1]['time']).date()
337 | y2 = store.get(key, date)
338 | np.testing.assert_array_equal(y2[206], data[-1])
339 |
340 | def test_update_multi_hold_data(self):
341 | market_minutes = 1440 # 5min data
342 | store = FiveMinute(h5py.File('%s/data.h5' % helper.datadir),
343 | market_minutes)
344 | key = '9992'
345 | path = os.path.dirname(os.path.realpath(__file__))
346 | data = np.load(os.path.join(path, '005_na.npy'))
347 |
348 | store.update(key, data)
349 |
350 | date = datetime.fromtimestamp(data[-1]['time']).date()
351 | y2 = store.get(key, date)
352 |
353 | # Data has holes between index 171 and index 172.
354 | np.testing.assert_array_equal(y2[0], data[132])
355 | np.testing.assert_array_equal(y2[167], data[-1])
356 | np.testing.assert_array_equal(y2[39], data[171])
357 | np.testing.assert_array_equal(y2[43], data[172])
358 |
359 |
360 | class MinuteSnapshotCacheTest(unittest.TestCase):
361 |
362 | def setUp(self):
363 | self.filename = '%s/dstore_mincache.dump' % helper.datadir
364 | self.date = datetime.today().date()
365 | self.store = DictStore.open(self.filename)
366 | self.mstore = MinuteSnapshotCache(self.store, self.date)
367 |
368 | def test_inited_date(self):
369 | self.assertEqual(self.mstore.date, datetime.today().date())
370 |
371 | def test_true_of_store(self):
372 | ms = Minute(self.mstore, datetime.today().date(), SH().market_minutes)
373 | self.assertTrue(ms)
374 |
375 | def test_set_get(self):
376 | x = helper.sample_minutes()
377 |
378 | symbol = 'TS123456'
379 | self.mstore[symbol] = x
380 | y = self.mstore[symbol]
381 | np.testing.assert_array_equal(y, x)
382 |
383 | def test_reopen(self):
384 | x = helper.sample_minutes()
385 |
386 | symbol = 'TS123456'
387 | self.mstore[symbol] = x
388 |
389 | # closed
390 | self.store.close()
391 | self.assertRaises(AssertionError, self.mstore.get, symbol)
392 |
393 | # reopen
394 | store = DictStore.open(self.filename)
395 | mstore = MinuteSnapshotCache(store, self.date)
396 |
397 | # testing reopen data
398 | y = mstore[symbol]
399 | np.testing.assert_array_equal(y, x)
400 |
401 | def test_rotate(self):
402 | x = helper.sample_minutes()
403 |
404 | symbol = 'TS123456'
405 | self.mstore[symbol] = x
406 |
407 | dbm = Manager(helper.datadir, SH())
408 | tostore = dbm._minutestore_at(self.date, memory=False)
409 |
410 | # rewrite
411 | self.mstore.rotate(tostore)
412 |
413 | # cache cleaned after rotate
414 | self.assertRaises(KeyError, self.mstore.get, symbol)
415 |
416 | # testing persistent data
417 | y = tostore[symbol]
418 | np.testing.assert_array_equal(y, x)
419 |
420 | # reopen
421 | mstore = MinuteSnapshotCache(self.store, self.date)
422 |
423 | # testing reopen data
424 | self.assertRaises(KeyError, mstore.get, symbol)
425 |
426 |
427 | if __name__ == '__main__':
428 | unittest.main()
429 | import shutil
430 | shutil.rmtree(helper.datadir)
431 |
--------------------------------------------------------------------------------
/datafeed/providers/tongshi.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright 2010 yinhm
5 |
6 | '''网际风数据接口实现
7 |
8 | 接口
9 | ===
10 | 网际风接口兼容通视协议,通视协议是一个企业规范,因为出现的最早且使用广泛,逐渐
11 | 成为默认的行业标准。
12 |
13 | 通视协议是点播方式,网际风数据源与此不同,采用了全推方式。全推方式接更适合大量
14 | 数据更新。
15 |
16 | 网际风协议在通视基础上增加了盘口,除权,财务等数据。
17 |
18 | 接口调用方式参考文档:
19 | 分析家通视规范: http://www.51wjf.com/stkdrv.txt
20 | 网际风规范: http://www.51wjf.com/wjffun.txt
21 |
22 | 实现
23 | ===
24 | 网际风分客户端和stock.dll两部分,使用python ctypes加载stock.dll触发网际风客户端
25 | 自动运行,登陆服务器,接收数据。网际风数据采用推送方式返回给
26 | stock.dll,stock.dll接收到数据后使用windows message通知监听程序(如本脚本),监
27 | 听程序根据message中的信息不同处理相应数据。
28 | '''
29 |
30 | import os
31 | import sys
32 | import thread
33 | import time
34 |
35 | import win32api
36 | import win32con
37 | import win32gui
38 | import winerror
39 |
40 | from ctypes import *
41 | from ctypes.wintypes import *
42 |
43 | from datetime import datetime
44 |
45 | import numpy as np
46 |
47 | from datafeed.client import Client
48 |
49 |
50 | RCV_WORK_SENDMSG = 4
51 |
52 | RCV_REPORT = 0x3f001234
53 | RCV_FILEDATA = 0x3f001235
54 |
55 | STKLABEL_LEN = 10 # 股号数据长度,国内市场股号编码兼容钱龙
56 | STKNAME_LEN = 32 # 股名长度
57 | MAX_PATH = 260 # http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx#maxpath
58 |
59 |
60 | FILE_HISTORY_EX = 2 # 补日线数据
61 | FILE_MINUTE_EX = 4 # 补分钟线数据
62 | FILE_POWER_EX = 6 # 补充除权数据
63 |
64 |
65 | # 下列2条补数据类型为网际风新增的扩充类型,通视协议中并未包含下述类型:
66 | FILE_5MINUTE_EX=0x51 # 补5分钟K线 数据格式与日线完全相同 仅仅参数不同而已
67 | FILE_1MINUTE_EX=0x52 # 补1分钟K线 数据格式与日线完全相同 仅仅参数不同而已
68 |
69 | FILE_BASE_EX = 0x1000 # 钱龙兼容基本资料文件,m_szFileName仅包含文件名
70 | FILE_NEWS_EX = 0x1002 # 新闻类,其类型由m_szFileName中子目录名来定
71 | FILE_HTML_EX = 0x1004 # HTML文件,m_szFileName为URL
72 |
73 | FILE_SOFTWARE_EX = 0x2000 # 升级软件
74 |
75 | # 上海市场
76 | MARKET_SH = 18515
77 | # 深圳市场
78 | MARKET_SZ = 23123
79 |
80 |
81 | def format_market(value):
82 | if value == MARKET_SH:
83 | return 'SH'
84 | elif value == MARKET_SZ:
85 | return 'SZ'
86 | else:
87 | raise Exception('Unknown market.')
88 |
89 |
90 | class Report(Structure):
91 | '''tagRCV_REPORT_STRUCTExV3 data structure
92 | '''
93 | _pack_ = 1
94 | _fields_ = [('m_cbSize', WORD),
95 | ('m_time', c_int), # time_t结构
96 | ('m_wMarket', WORD),
97 | ('m_szLabel', c_char * STKLABEL_LEN), # 股票代码,以'\0'结尾
98 | ('m_szName', c_char * STKNAME_LEN), # 股票名称,以'\0'结尾
99 |
100 | ('m_fLastClose', c_float),
101 | ('m_fOpen', c_float),
102 | ('m_fHigh', c_float),
103 | ('m_fLow', c_float),
104 | ('m_fNewPrice', c_float),
105 | ('m_fVolume', c_float),
106 | ('m_fAmount', c_float),
107 |
108 | ('m_fBuyPrice', c_float * 3),
109 | ('m_fBuyVolume', c_float * 3),
110 | ('m_fSellPrice', c_float * 3),
111 | ('m_fSellVolume', c_float * 3),
112 |
113 | ('m_fBuyPrice4', c_float),
114 | ('m_fBuyVolume4', c_float),
115 | ('m_fSellPrice4', c_float),
116 | ('m_fSellVolume4', c_float),
117 |
118 | ('m_fBuyPrice5', c_float),
119 | ('m_fBuyVolume5', c_float),
120 | ('m_fSellPrice5', c_float),
121 | ('m_fSellVolume5', c_float)]
122 |
123 |
124 | @property
125 | def symbol(self):
126 | return format_market(self.m_wMarket) + self.m_szLabel
127 |
128 | def is_valid(self):
129 | """Is this report data valid?
130 |
131 | We seems get data full of zero if stock got suspended.
132 | Use this method to detect is the data valid so you can filter it.
133 | """
134 | return self.m_fNewPrice > 0
135 |
136 | def to_dict(self):
137 | '''Convert to dict object.
138 | '''
139 | t = datetime.fromtimestamp(self.m_time)
140 | t = t.strftime('%Y-%m-%d %H:%M:%S')
141 |
142 | quote = {
143 | 'time' : t,
144 | 'timestamp': self.m_time,
145 | 'price' : self.m_fNewPrice,
146 | 'amount' : self.m_fAmount,
147 | 'volume' : self.m_fVolume,
148 | 'symbol' : self.symbol,
149 | 'name' : self.m_szName.decode('gbk'),
150 | 'open' : self.m_fOpen,
151 | 'high' : self.m_fHigh,
152 | 'low' : self.m_fLow,
153 | 'close' : self.m_fNewPrice,
154 | 'preclose' : self.m_fLastClose
155 | }
156 | return quote
157 |
158 |
159 | class Head(Structure):
160 | '''头数据'''
161 | _fields_ = [('m_dwHeadTag', DWORD),
162 | ('m_wMarket', WORD),
163 | ('m_szLabel', c_char * STKLABEL_LEN)]
164 |
165 |
166 | class History(Structure):
167 | '''补充日线数据'''
168 |
169 | _fields_ = [('m_time', c_int),
170 | ('m_fOpen', c_float),
171 | ('m_fHigh', c_float),
172 | ('m_fLow', c_float),
173 | ('m_fClose', c_float),
174 | ('m_fVolume', c_float),
175 | ('m_fAmount', c_float),
176 | ('m_wAdvance', WORD),
177 | ('m_wDecline', WORD)]
178 |
179 | def to_tuple(self):
180 | """Convert ohlc to tuple.
181 |
182 | Returns
183 | -------
184 | tuple
185 | """
186 | return (self.m_time,
187 | self.m_fOpen,
188 | self.m_fHigh,
189 | self.m_fLow,
190 | self.m_fClose,
191 | self.m_fVolume,
192 | self.m_fAmount)
193 |
194 |
195 | class HistoryUnion(Union):
196 | '''日线数据头 or 日线数据'''
197 |
198 | _fields_ = [('data', History),
199 | ('head', Head)]
200 |
201 | DTYPE = [('time', '= 0:
486 | return True
487 | return False
488 |
489 |
490 | def run_tongshi_win(server_addr='localhost', server_password=None):
491 | if program_running():
492 | print "already running"
493 | exit(0)
494 |
495 | w=MainWindow(host=server_addr, password=server_password)
496 | win32gui.PumpMessages()
497 |
498 | if __name__=='__main__':
499 | run_tongshi_win()
500 |
--------------------------------------------------------------------------------