├── .gitignore
├── .img
    ├── Screen Shot 2016-04-10 at 20.15.51.png
    └── Screen Shot 2016-04-10 at 20.25.15.png
├── .travis.yml
├── LICENSE
├── MANIFEST
├── Makefile
├── README.md
├── duka
    ├── __init__.py
    ├── app
    │   ├── __init__.py
    │   └── app.py
    ├── core
    │   ├── __init__.py
    │   ├── candle.py
    │   ├── csv_dumper.py
    │   ├── fetch.py
    │   ├── processor.py
    │   └── utils.py
    ├── main.py
    └── tests
    │   ├── __init__.py
    │   ├── test_dates_generator.py
    │   └── test_find_sunday.py
├── requirements.txt
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 | atlassian-ide-plugin.xml
64 | .idea
65 | *.csv
66 | test_data/
67 | 


--------------------------------------------------------------------------------
/.img/Screen Shot 2016-04-10 at 20.15.51.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giuse88/duka/76415dc6ea41141096c4ca4c50196cc6c077ac30/.img/Screen Shot 2016-04-10 at 20.15.51.png


--------------------------------------------------------------------------------
/.img/Screen Shot 2016-04-10 at 20.25.15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giuse88/duka/76415dc6ea41141096c4ca4c50196cc6c077ac30/.img/Screen Shot 2016-04-10 at 20.25.15.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.5"
4 | install:
5 |   - pip install -r requirements.txt
6 | script: make test
7 | notifications:
8 |   email: false
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Giuseppe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | setup.cfg
3 | setup.py
4 | duka/__init__.py
5 | duka/main.py
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: test test-register test-release register release
 2 | 
 3 | test:
 4 | 	@python3 -m unittest discover -s ./duka/tests -p "test_*"
 5 | 
 6 | test-register:
 7 | 	python setup.py register -r pypitest
 8 | 
 9 | test-release: test-register
10 | 	python setup.py sdist upload -r pypitest
11 | 
12 | register:
13 | 	python setup.py register -r pypi
14 | 
15 | release: register
16 | 	python setup.py sdist upload -r pypi
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # duka - Dukascopy data downloader [![Build Status](https://travis-ci.org/giuse88/duka.svg?branch=master)](https://travis-ci.org/giuse88/duka)
  2 | 
  3 | Finding good Forex data is difficult or expensive. Dukascopy has made available an excellent [web tool](https://www.dukascopy.com/swiss/english/marketwatch/historical/) to download tick data for a large a variety of 
  4 | Forex, CFD and commodities. This is awesome and extremely useful for people, like me, trying to study the Forex market. 
  5 | However, it takes a lot of time to download a large data set from the website because you can download only one day per time. In order to solve this issue, I created **duka**.  
  6 | 
  7 | **duka** is a small terminal application that can be used to download ticks for a given date range from the Dukascopy historical data feed for one or more symbols. **duka** takes advantage of python threads and coroutine in order to speed up the download. It takes roughly 10m to download tick data for  one year for a given instrument. No bad :)
  8 | 
  9 | Key features :
 10 |  - Ticks data with volumes
 11 |  - Candle formatting with different time-frames ( from 1 minute to 1 day )
 12 |  - CSV output
 13 |  - multi-thread support
 14 |  - Large variety of symbols
 15 | 
 16 | This is what **duka** looks like:
 17 | 
 18 | ![duka](.img/Screen Shot 2016-04-10 at 20.15.51.png)
 19 | 
 20 | As you can see, **duka** estimates the time left until the download is completed. This is extremely useful when downloading a large data set. 
 21 | 
 22 | 
 23 | I hope you enjoy it!! 
 24 | 
 25 | 
 26 | ## Installation
 27 | 
 28 | **duka** requires python 3.5 and request 2.0.1. It can be installed using `pip` as follows:
 29 | 
 30 | ```
 31 | pip install duka
 32 | ```
 33 | 
 34 | ## Usage
 35 | ```
 36 |  usage: duka [options]
 37 | 
 38 |  positional arguments:
 39 |     SYMBOLS               symbol list using format EURUSD EURGBP 
 40 | 
 41 |  optional arguments:
 42 |      -h           show help message and exit 
 43 |      -v           show program's version number and exit
 44 |      -d DAY       specific day format YYYY-MM-DD (default today)
 45 |      -s STARTDATE start date format YYYY-MM-DD (default today)
 46 |      -e ENDDATE   end date format YYYY-MM-DD (default today)
 47 |      -c CANDLE    use candles instead of ticks. Accepted values M1 M2 M5 M10 M15 M30 H1 H4 D1
 48 |      -f FOLDER    the dowloaded data will be saved in FOLDER (default '.')
 49 |      -t THREAD    number of threads (default 10)
 50 |      --header     include CSV header (default false)
 51 | ```
 52 | 
 53 | ## Examples
 54 | 
 55 | 
 56 | - Help
 57 | 
 58 |    ```
 59 |    duka -h
 60 |    ```
 61 | - Download last available tick set (i.e. yesterday if not Saturday ) for `EURUSD` and `GBPUSD` 
 62 | 
 63 |   ```
 64 |   duka EURUSD GBPUSD 
 65 |   ```
 66 | - Download ticks for the `EURUSD` for a specific day
 67 | 
 68 |   ``` 
 69 |   duka EURUSD -d 2016-02-02
 70 |   ```
 71 | - Download ticks for the `EURUSD` between two dates. For example:
 72 | 
 73 |   ```
 74 |   duka EURUSD -s 2015-01-01 -e 2016-12-31 
 75 |   ```
 76 |   download all ticks for the 2015 year
 77 | 
 78 | - We can specify only the start date. The default end date will be today. For example:   
 79 |   
 80 |   ```
 81 |   duka EURUSD -s 2016-01-01
 82 |   ```
 83 |   downloads all ticks from the beginning of the year until now. 
 84 | 
 85 | All data is saved in the current folder. You can also specify the number of threads to be used by setting the `t` option. 
 86 | I recommend not to use too many threads because you might encounter problems opening too many connection to the server. 
 87 | 
 88 | ## Helping 
 89 | Found a bug? Missing a feature? Open a issue and I will try to fix it as soon as possible. Pull request are also welcomed. :) 
 90 | 
 91 | ## License
 92 | 
 93 | This software is licensed under the MIT License.
 94 | 
 95 | Copyright Giuseppe Pes, 2016.
 96 | 
 97 | Permission is hereby granted, free of charge, to any person obtaining a
 98 | copy of this software and associated documentation files (the
 99 | "Software"), to deal in the Software without restriction, including
100 | without limitation the rights to use, copy, modify, merge, publish,
101 | distribute, sublicense, and/or sell copies of the Software, and to permit
102 | persons to whom the Software is furnished to do so, subject to the
103 | following conditions:
104 | 
105 | The above copyright notice and this permission notice shall be included
106 | in all copies or substantial portions of the Software.
107 | 
108 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
109 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
110 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
111 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
112 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
113 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
114 | USE OR OTHER DEALINGS IN THE SOFTWARE.
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/duka/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giuse88/duka/76415dc6ea41141096c4ca4c50196cc6c077ac30/duka/__init__.py


--------------------------------------------------------------------------------
/duka/app/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import app
2 | 


--------------------------------------------------------------------------------
/duka/app/app.py:
--------------------------------------------------------------------------------
  1 | import concurrent
  2 | import threading
  3 | import time
  4 | from collections import deque
  5 | from datetime import timedelta, date
  6 | 
  7 | from ..core import decompress, fetch_day, Logger
  8 | from ..core.csv_dumper import CSVDumper
  9 | from ..core.utils import is_debug_mode, TimeFrame
 10 | 
 11 | SATURDAY = 5
 12 | day_counter = 0
 13 | 
 14 | 
 15 | def days(start, end):
 16 |     if start > end:
 17 |         return
 18 |     end = end + timedelta(days=1)
 19 |     today = date.today()
 20 |     while start != end:
 21 |         if start.weekday() != SATURDAY and start != today:
 22 |             yield start
 23 |         start = start + timedelta(days=1)
 24 | 
 25 | 
 26 | def format_left_time(seconds):
 27 |     if seconds < 0:
 28 |         return "--:--:--"
 29 |     m, s = divmod(seconds, 60)
 30 |     h, m = divmod(m, 60)
 31 |     return "%d:%02d:%02d" % (h, m, s)
 32 | 
 33 | 
 34 | def update_progress(done, total, avg_time_per_job, threads):
 35 |     progress = 1 if total == 0 else done / total
 36 |     progress = int((1.0 if progress > 1.0 else progress) * 100)
 37 |     remainder = 100 - progress
 38 |     estimation = (avg_time_per_job * (total - done) / threads)
 39 |     if not is_debug_mode():
 40 |         print('\r[{0}] {1}%  Left : {2}  '.format('#' * progress + '-' * remainder, progress,
 41 |                                                   format_left_time(estimation)), end='')
 42 | 
 43 | 
 44 | def how_many_days(start, end):
 45 |     return sum(1 for _ in days(start, end))
 46 | 
 47 | 
 48 | def avg(fetch_times):
 49 |     if len(fetch_times) != 0:
 50 |         return sum(fetch_times) / len(fetch_times)
 51 |     else:
 52 |         return -1
 53 | 
 54 | 
 55 | def name(symbol, timeframe, start, end):
 56 |     ext = ".csv"
 57 | 
 58 |     for x in dir(TimeFrame):
 59 |         if getattr(TimeFrame, x) == timeframe:
 60 |             ts_str = x
 61 | 
 62 |     name = symbol + "_" + ts_str + "_" + str(start)
 63 | 
 64 |     if start != end:
 65 |         name += "_" + str(end)
 66 | 
 67 |     return name + ext
 68 | 
 69 | 
 70 | def app(symbols, start, end, threads, timeframe, folder, header):
 71 |     if start > end:
 72 |         return
 73 |     lock = threading.Lock()
 74 |     global day_counter
 75 |     total_days = how_many_days(start, end)
 76 | 
 77 |     if total_days == 0:
 78 |         return
 79 | 
 80 |     last_fetch = deque([], maxlen=5)
 81 |     update_progress(day_counter, total_days, -1, threads)
 82 | 
 83 |     def do_work(symbol, day, csv):
 84 |         global day_counter
 85 |         star_time = time.time()
 86 |         Logger.info("Fetching day {0}".format(day))
 87 |         try:
 88 |             csv.append(day, decompress(symbol, day, fetch_day(symbol, day)))
 89 |         except Exception as e:
 90 |             print("ERROR for {0}, {1} Exception : {2}".format(day, symbol, str(e)))
 91 |         elapsed_time = time.time() - star_time
 92 |         last_fetch.append(elapsed_time)
 93 |         with lock:
 94 |             day_counter += 1
 95 |         Logger.info("Day {0} fetched in {1}s".format(day, elapsed_time))
 96 | 
 97 |     futures = []
 98 | 
 99 |     with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
100 | 
101 |         files = {symbol: CSVDumper(symbol, timeframe, start, end, folder, header) for symbol in symbols}
102 | 
103 |         for symbol in symbols:
104 |             for day in days(start, end):
105 |                 futures.append(executor.submit(do_work, symbol, day, files[symbol]))
106 | 
107 |         for future in concurrent.futures.as_completed(futures):
108 |             if future.exception() is None:
109 |                 update_progress(day_counter, total_days, avg(last_fetch), threads)
110 |             else:
111 |                 Logger.error("An error happen when fetching data : ", future.exception())
112 | 
113 |         Logger.info("Fetching data terminated")
114 |         for file in files.values():
115 |             file.dump()
116 | 
117 |     update_progress(day_counter, total_days, avg(last_fetch), threads)
118 | 


--------------------------------------------------------------------------------
/duka/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .fetch import fetch_day
2 | from .csv_dumper import CSVDumper
3 | from .utils import valid_date, Logger, set_up_signals
4 | from .processor import decompress
5 | 
6 | __all__ = ['fetch_day', 'decompress', 'CSVDumper', 'valid_date', 'Logger', 'set_up_signals']
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/duka/core/candle.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | 
 4 | class Candle:
 5 |     def __init__(self, symbol, timestamp, timeframe, sorted_values):
 6 |         self.symbol = symbol
 7 |         self.timestamp = timestamp
 8 |         self.timeframe = timeframe
 9 |         self.open_price = sorted_values[0]
10 |         self.close_price = sorted_values[len(sorted_values) - 1]
11 |         self.high = max(sorted_values)
12 |         self.low = min(sorted_values)
13 | 
14 |     def __str__(self):
15 |         return str(datetime.fromtimestamp(self.timestamp)) + " [" + str(self.timestamp) + "] " \
16 |                + "-- " + self.symbol + " -- " \
17 |                + "{ H:" + str(self.high) + " L:" + str(self.low) + " O: " \
18 |                + str(self.open_price) + " C: " + str(self.close_price) + " }"
19 | 
20 |     def __eq__(self, other):
21 |         return self.symbol == other.symbol \
22 |                and self.timestamp == other.timestamp \
23 |                and self.timeframe == other.timeframe \
24 |                and self.close_price == other.close_price \
25 |                and self.open_price == other.open_price \
26 |                and self.high == other.high \
27 |                and self.low == other.low
28 | 
29 |     def __repr__(self):
30 |         return self.__str__()


--------------------------------------------------------------------------------
/duka/core/csv_dumper.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import time
 3 | from os.path import join
 4 | 
 5 | from .candle import Candle
 6 | from .utils import TimeFrame, stringify, Logger
 7 | 
 8 | TEMPLATE_FILE_NAME = "{}-{}_{:02d}_{:02d}-{}_{:02d}_{:02d}.csv"
 9 | 
10 | 
11 | def format_float(number):
12 |     return format(number, '.5f')
13 | 
14 | 
15 | class CSVFormatter(object):
16 |     COLUMN_TIME = 0
17 |     COLUMN_ASK = 1
18 |     COLUMN_BID = 2
19 |     COLUMN_ASK_VOLUME = 3
20 |     COLUMN_BID_VOLUME = 4
21 | 
22 | 
23 | def write_tick(writer, tick):
24 |     writer.writerow(
25 |         {'time': tick[0],
26 |          'ask': format_float(tick[1]),
27 |          'bid': format_float(tick[2]),
28 |          'ask_volume': tick[3],
29 |          'bid_volume': tick[4]})
30 | 
31 | 
32 | def write_candle(writer, candle):
33 |     writer.writerow(
34 |         {'time': stringify(candle.timestamp),
35 |          'open': format_float(candle.open_price),
36 |          'close': format_float(candle.close_price),
37 |          'high': format_float(candle.high),
38 |          'low': format_float(candle.low)})
39 | 
40 | 
41 | class CSVDumper:
42 |     def __init__(self, symbol, timeframe, start, end, folder, header=False):
43 |         self.symbol = symbol
44 |         self.timeframe = timeframe
45 |         self.start = start
46 |         self.end = end
47 |         self.folder = folder
48 |         self.include_header = header
49 |         self.buffer = {}
50 | 
51 |     def get_header(self):
52 |         if self.timeframe == TimeFrame.TICK:
53 |             return ['time', 'ask', 'bid', 'ask_volume', 'bid_volume']
54 |         return ['time', 'open', 'close', 'high', 'low']
55 | 
56 |     def append(self, day, ticks):
57 |         previous_key = None
58 |         current_ticks = []
59 |         self.buffer[day] = []
60 |         for tick in ticks:
61 |             if self.timeframe == TimeFrame.TICK:
62 |                 self.buffer[day].append(tick)
63 |             else:
64 |                 ts = time.mktime(tick[0].timetuple())
65 |                 key = int(ts - (ts % self.timeframe))
66 |                 if previous_key != key and previous_key is not None:
67 |                     n = int((key - previous_key) / self.timeframe)
68 |                     for i in range(0, n):
69 |                         self.buffer[day].append(
70 |                             Candle(self.symbol, previous_key + i * self.timeframe, self.timeframe, current_ticks))
71 |                     current_ticks = []
72 |                 current_ticks.append(tick[1])
73 |                 previous_key = key
74 | 
75 |         if self.timeframe != TimeFrame.TICK:
76 |             self.buffer[day].append(Candle(self.symbol, previous_key, self.timeframe, current_ticks))
77 | 
78 |     def dump(self):
79 |         file_name = TEMPLATE_FILE_NAME.format(self.symbol,
80 |                                               self.start.year, self.start.month, self.start.day,
81 |                                               self.end.year, self.end.month, self.end.day)
82 | 
83 |         Logger.info("Writing {0}".format(file_name))
84 | 
85 |         with open(join(self.folder, file_name), 'w') as csv_file:
86 |             writer = csv.DictWriter(csv_file, fieldnames=self.get_header())
87 |             if self.include_header:
88 |                 writer.writeheader()
89 |             for day in sorted(self.buffer.keys()):
90 |                 for value in self.buffer[day]:
91 |                     if self.timeframe == TimeFrame.TICK:
92 |                         write_tick(writer, value)
93 |                     else:
94 |                         write_candle(writer, value)
95 | 
96 |         Logger.info("{0} completed".format(file_name))
97 | 


--------------------------------------------------------------------------------
/duka/core/fetch.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import datetime
 3 | import threading
 4 | import time
 5 | from functools import reduce
 6 | from io import BytesIO, DEFAULT_BUFFER_SIZE
 7 | 
 8 | import requests
 9 | 
10 | from ..core.utils import Logger, is_dst
11 | 
12 | URL = "https://www.dukascopy.com/datafeed/{currency}/{year}/{month:02d}/{day:02d}/{hour:02d}h_ticks.bi5"
13 | ATTEMPTS = 5
14 | 
15 | 
16 | async def get(url):
17 |     loop = asyncio.get_event_loop()
18 |     buffer = BytesIO()
19 |     id = url[35:].replace('/', " ")
20 |     start = time.time()
21 |     Logger.info("Fetching {0}".format(id))
22 |     for i in range(ATTEMPTS):
23 |         try:
24 |             res = await loop.run_in_executor(None, lambda: requests.get(url, stream=True))
25 |             if res.status_code == 200:
26 |                 for chunk in res.iter_content(DEFAULT_BUFFER_SIZE):
27 |                     buffer.write(chunk)
28 |                 Logger.info("Fetched {0} completed in {1}s".format(id, time.time() - start))
29 |                 if len(buffer.getbuffer()) <= 0:
30 |                     Logger.info("Buffer for {0} is empty ".format(id))
31 |                 return buffer.getbuffer()
32 |             else:
33 |                 Logger.warn("Request to {0} failed with error code : {1} ".format(url, str(res.status_code)))
34 |         except Exception as e:
35 |             Logger.warn("Request {0} failed with exception : {1}".format(id, str(e)))
36 |             time.sleep(0.5 * i)
37 | 
38 |     raise Exception("Request failed for {0} after ATTEMPTS attempts".format(url))
39 | 
40 | 
41 | def create_tasks(symbol, day):
42 | 
43 |     start = 0
44 | 
45 |     if is_dst(day):
46 |         start = 1
47 | 
48 |     url_info = {
49 |         'currency': symbol,
50 |         'year': day.year,
51 |         'month': day.month - 1,
52 |         'day': day.day
53 |     }
54 |     tasks = [asyncio.ensure_future(get(URL.format(**url_info, hour=i))) for i in range(0, 24)]
55 | 
56 |     # if is_dst(day):
57 |     #     next_day = day + datetime.timedelta(days=1)
58 |     #     url_info = {
59 |     #         'currency': symbol,
60 |     #         'year': next_day.year,
61 |     #         'month': next_day.month - 1,
62 |     #         'day': next_day.day
63 |     #     }
64 |     #     tasks.append(asyncio.ensure_future(get(URL.format(**url_info, hour=0))))
65 |     return tasks
66 | 
67 | 
68 | def fetch_day(symbol, day):
69 |     local_data = threading.local()
70 |     loop = getattr(local_data, 'loop', asyncio.new_event_loop())
71 |     asyncio.set_event_loop(loop)
72 |     loop = asyncio.get_event_loop()
73 |     tasks = create_tasks(symbol, day)
74 |     loop.run_until_complete(asyncio.wait(tasks))
75 | 
76 |     def add(acc, task):
77 |         acc.write(task.result())
78 |         return acc
79 | 
80 |     return reduce(add, tasks, BytesIO()).getbuffer()
81 | 


--------------------------------------------------------------------------------
/duka/core/processor.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | from datetime import timedelta, datetime
 3 | from lzma import LZMADecompressor, LZMAError, FORMAT_AUTO
 4 | from .utils import is_dst
 5 | 
 6 | 
 7 | def decompress_lzma(data):
 8 |     results = []
 9 |     len(data)
10 |     while True:
11 |         decomp = LZMADecompressor(FORMAT_AUTO, None, None)
12 |         try:
13 |             res = decomp.decompress(data)
14 |         except LZMAError:
15 |             if results:
16 |                 break  # Leftover data is not a valid LZMA/XZ stream; ignore it.
17 |             else:
18 |                 raise  # Error on the first iteration; bail out.
19 |         results.append(res)
20 |         data = decomp.unused_data
21 |         if not data:
22 |             break
23 |         if not decomp.eof:
24 |             raise LZMAError("Compressed data ended before the end-of-stream marker was reached")
25 |     return b"".join(results)
26 | 
27 | 
28 | def tokenize(buffer):
29 |     token_size = 20
30 |     size = int(len(buffer) / token_size)
31 |     tokens = []
32 |     for i in range(0, size):
33 |         tokens.append(struct.unpack('!IIIff', buffer[i * token_size: (i + 1) * token_size]))
34 |     return tokens
35 | 
36 | 
37 | def add_hour(ticks):
38 |     if len(ticks) is 0:
39 |         return ticks
40 | 
41 |     hour_delta = 0
42 | 
43 |     if ticks[0][0].weekday() == 6 or (ticks[0][0].day == 1 and ticks[0][0].month == 1):
44 |         if is_dst(ticks[0][0].date()):
45 |             hour_delta = 21
46 |         else:
47 |             hour_delta = 22
48 | 
49 |     for index, v in enumerate(ticks):
50 |         if index != 0:
51 |             if ticks[index - 1][0].minute > ticks[index][0].minute:
52 |                 hour_delta = ticks[index - 1][0].hour + 1
53 |             else:
54 |                 hour_delta = ticks[index - 1][0].hour
55 |         ticks[index] = (v[0] + timedelta(hours=hour_delta), v[1], v[2], v[3], v[4])
56 | 
57 |     return ticks
58 | 
59 | 
60 | def normalize(symbol, day, ticks):
61 |     def norm(time, ask, bid, volume_ask, volume_bid):
62 |         date = datetime(day.year, day.month, day.day) + timedelta(milliseconds=time)
63 |         # date.replace(tzinfo=datetime.tzinfo("UTC"))
64 |         point = 100000
65 |         if symbol.lower() in ['usdrub', 'xagusd', 'xauusd']:
66 |             point = 1000
67 |         return date, ask / point, bid / point, round(volume_ask * 1000000), round(volume_bid * 1000000)
68 | 
69 |     return add_hour(list(map(lambda x: norm(*x), ticks)))
70 | 
71 | 
72 | def decompress(symbol, day, compressed_buffer):
73 |     if compressed_buffer.nbytes == 0:
74 |         return compressed_buffer
75 |     return normalize(symbol, day, tokenize(decompress_lzma(compressed_buffer)))
76 | 


--------------------------------------------------------------------------------
/duka/core/utils.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import signal
  5 | import sys
  6 | import time
  7 | from datetime import datetime, timedelta, date
  8 | 
  9 | TEMPLATE = '%(asctime)s - %(levelname)s - %(threadName)s [%(thread)d] -  %(message)s'
 10 | 
 11 | SUNDAY = 7
 12 | 
 13 | 
 14 | class TimeFrame(object):
 15 |     TICK = 0
 16 |     S_30 = 30
 17 |     M1 = 60
 18 |     M2 = 120
 19 |     M5 = 300
 20 |     M10 = 600
 21 |     M15 = 900
 22 |     M30 = 1800
 23 |     H1 = 3600
 24 |     H4 = 14400
 25 |     D1 = 86400
 26 | 
 27 | 
 28 | def valid_date(s):
 29 |     try:
 30 |         return datetime.strptime(s, "%Y-%m-%d").date()
 31 |     except ValueError:
 32 |         msg = "Not a valid date: '{0}'.".format(s)
 33 |         raise argparse.ArgumentTypeError(msg)
 34 | 
 35 | 
 36 | def valid_timeframe(s):
 37 |     try:
 38 |         return getattr(TimeFrame, s.upper())
 39 |     except AttributeError:
 40 |         msg = "Not a valid time frame: '{0}'.".format(s)
 41 |         raise argparse.ArgumentTypeError(msg)
 42 | 
 43 | 
 44 | def is_debug_mode():
 45 |     log_env = os.getenv('LOG', None)
 46 |     if log_env is not None:
 47 |         return log_env.upper() == 'DEBUG'
 48 |     else:
 49 |         return False
 50 | 
 51 | 
 52 | def get_logger():
 53 |     logger = logging.getLogger('duka')
 54 |     if is_debug_mode():
 55 |         out_hdlr = logging.StreamHandler(sys.stdout)
 56 |         out_hdlr.setFormatter(logging.Formatter(TEMPLATE))
 57 |         out_hdlr.setLevel(logging.INFO)
 58 |         logger.addHandler(out_hdlr)
 59 |         logger.setLevel(logging.INFO)
 60 |     else:
 61 |         logger.addHandler(logging.NullHandler())
 62 |     return logger
 63 | 
 64 | 
 65 | Logger = get_logger()
 66 | 
 67 | 
 68 | def set_up_signals():
 69 |     def signal_handler(signal, frame):
 70 |         sys.exit(0)
 71 | 
 72 |     signal.signal(signal.SIGINT, signal_handler)
 73 | 
 74 | 
 75 | DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
 76 | 
 77 | 
 78 | def to_utc_timestamp(time_str):
 79 |     return time.mktime(from_time_string(time_str).timetuple())
 80 | 
 81 | 
 82 | def from_time_string(time_str):
 83 |     if '.' not in time_str:
 84 |         time_str += '.0'
 85 |     return datetime.strptime(time_str, DATETIME_FORMAT)
 86 | 
 87 | 
 88 | def stringify(timestamp):
 89 |     return str(datetime.fromtimestamp(timestamp))
 90 | 
 91 | 
 92 | def find_sunday(year, month, position):
 93 |     start = date(year, month, 1)
 94 |     day_delta = timedelta(days=1)
 95 |     counter = 0
 96 | 
 97 |     while True:
 98 |         if start.isoweekday() == SUNDAY:
 99 |             counter += 1
100 |         if counter == position:
101 |             return start
102 |         start += day_delta
103 | 
104 | 
105 | def find_dst_begin(year):
106 |     """
107 |     DST starts the second sunday of March
108 |     """
109 |     return find_sunday(year, 3, 2)
110 | 
111 | 
112 | def find_dst_end(year):
113 |     """
114 |     DST ends the first sunday of November
115 |     """
116 |     return find_sunday(year, 11, 1)
117 | 
118 | 
119 | def is_dst(day):
120 |     return day >= find_dst_begin(day.year) and day < find_dst_end(day.year)
121 | 


--------------------------------------------------------------------------------
/duka/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3.5
 2 | 
 3 | import argparse
 4 | from datetime import date, timedelta
 5 | 
 6 | from duka.app import app
 7 | from duka.core import valid_date, set_up_signals
 8 | from duka.core.utils import valid_timeframe, TimeFrame
 9 | 
10 | VERSION = '0.2.1'
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser(prog='duka', usage='%(prog)s [options]')
15 |     parser.add_argument('-v', '--version', action='version',
16 |                         version='Version: %(prog)s-{version}'.format(version=VERSION))
17 |     parser.add_argument('symbols', metavar='SYMBOLS', type=str, nargs='+',
18 |                         help='symbol list using format EURUSD EURGBP')
19 |     parser.add_argument('-d', '--day', type=valid_date, help='specific day format YYYY-MM-DD (default today)',
20 |                         default=date.today() - timedelta(1))
21 |     parser.add_argument('-s', '--startdate', type=valid_date, help='start date format YYYY-MM-DD (default today)')
22 |     parser.add_argument('-e', '--enddate', type=valid_date, help='end date format YYYY-MM-DD (default today)')
23 |     parser.add_argument('-t', '--thread', type=int, help='number of threads (default 20)', default=5)
24 |     parser.add_argument('-f', '--folder', type=str, help='destination folder (default .)', default='.')
25 |     parser.add_argument('-c', '--candle', type=valid_timeframe,
26 |                         help='use candles instead of ticks. Accepted values M1 M2 M5 M10 M15 M30 H1 H4',
27 |                         default=TimeFrame.TICK)
28 |     parser.add_argument('--header', action='store_true', help='include CSV header (default false)', default=False)
29 |     args = parser.parse_args()
30 | 
31 |     if args.startdate is not None:
32 |         start = args.startdate
33 |     else:
34 |         start = args.day
35 | 
36 |     if args.enddate is not None:
37 |         end = args.enddate
38 |     else:
39 |         end = args.day
40 | 
41 |     set_up_signals()
42 |     app(args.symbols, start, end, args.thread, args.candle, args.folder, args.header)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     main()
47 | 


--------------------------------------------------------------------------------
/duka/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'giuseppe'
2 | 


--------------------------------------------------------------------------------
/duka/tests/test_dates_generator.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from datetime import date
 3 | from duka.app.app import days
 4 | 
 5 | 
 6 | class TestDateGenerator(unittest.TestCase):
 7 | 
 8 |     def test_two_dates(self):
 9 |         start = date(2016, 1, 20)
10 |         end = date(2016, 1, 21)
11 |         date_list = [d for d in days(start, end)]
12 |         self.assertEqual(len(date_list), 2)
13 |         self.assertEqual(date_list[0], start)
14 |         self.assertEqual(date_list[len(date_list)-1], end)
15 | 
16 |     def test_one_single_date(self):
17 |         start = date(2016, 1, 20)
18 |         end = date(2016, 1, 20)
19 |         date_list = [d for d in days(start, end)]
20 |         self.assertEqual(len(date_list), 1)
21 |         self.assertEqual(date_list[0], start)
22 |         self.assertEqual(date_list[len(date_list)-1], end)
23 | 
24 |     def test_skip_saturdays(self):
25 |         start = date(2016, 1, 2)
26 |         end = date(2016, 1, 2)
27 |         date_list = [d for d in days(start, end)]
28 |         self.assertEqual(len(date_list), 0)
29 | 
30 |     def test_empty_result_when_start_is_bigger_than_end(self):
31 |         start = date(2016, 9, 2)
32 |         end = date(2016, 1, 2)
33 |         date_list = [d for d in days(start, end)]
34 |         self.assertEqual(len(date_list), 0)
35 | 
36 |     def test_not_fetch_today(self):
37 |         start = date.today()
38 |         end = start
39 |         date_list = [d for d in days(start, end)]
40 |         self.assertEqual(len(date_list), 0)
41 | 


--------------------------------------------------------------------------------
/duka/tests/test_find_sunday.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import unittest
 3 | 
 4 | from duka.core.utils import find_sunday, find_dst_begin, find_dst_end, is_dst
 5 | 
 6 | 
 7 | class TestFindSunday(unittest.TestCase):
 8 |     def test_find_8_march_2015(self):
 9 |         res = find_sunday(2015, 3, 2)
10 |         self.assertEqual(res.day, 8)
11 | 
12 |     def test_find_9_march_2014(self):
13 |         res = find_sunday(2014, 3, 2)
14 |         self.assertEqual(res.day, 9)
15 | 
16 |     def test_find_13_march_2016(self):
17 |         res = find_sunday(2016, 3, 2)
18 |         self.assertEqual(res.day, 13)
19 | 
20 |     def test_find_1_november_2015(self):
21 |         res = find_sunday(2015, 11, 1)
22 |         self.assertEqual(res.day, 1)
23 | 
24 |     def test_find_2_november_2014(self):
25 |         res = find_sunday(2014, 11, 1)
26 |         self.assertEqual(res.day, 2)
27 | 
28 |     def test_find_6_november_2016(self):
29 |         res = find_sunday(2016, 11, 1)
30 |         self.assertEqual(res.day, 6)
31 | 
32 |     def test_dst_2015(self):
33 |         start = find_dst_begin(2015)
34 |         end = find_dst_end(2015)
35 |         self.assertEqual(start.day, 8)
36 |         self.assertEqual(start.month, 3)
37 |         self.assertEqual(end.day, 1)
38 |         self.assertEqual(end.month, 11)
39 | 
40 |     def test_is_dst(self):
41 |         day = datetime.date(2015, 4, 5)
42 |         self.assertTrue(is_dst(day))
43 | 
44 |     def test_is_not_dst(self):
45 |         day = datetime.date(2015, 1, 1)
46 |         self.assertFalse(is_dst(day))
47 | 
48 |     def test_day_change_is_dst(self):
49 |         day = datetime.date(2015, 3, 8)
50 |         self.assertTrue(is_dst(day))
51 | 
52 |     def test_day_change_back_is_not_dst(self):
53 |         day = datetime.date(2015, 11, 1)
54 |         self.assertFalse(is_dst(day))
55 | 
56 |     def test_is_dst(self):
57 |         day = datetime.date(2013, 11, 3)
58 |         self.assertFalse(is_dst(day))
59 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.9.1
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | NAME = "duka"
 6 | VERSION = '0.2.1'
 7 | 
 8 | setup(
 9 |     name=NAME,
10 |     packages=find_packages(),
11 |     install_requires=['requests>=2.9.1'],
12 |     version=VERSION,
13 |     description='Dukascopy Bank SA historical data downloader',
14 |     author='Giuseppe Pes',
15 |     author_email='giuse88@gmail.com',
16 |     url='https://github.com/giuse88/duka',
17 |     download_url='https://github.com/giuse88/duka/tarball/' + VERSION,
18 |     keywords=['dukascopy', 'forex', 'finance', 'historical data', 'price', 'currency'],
19 |     entry_points={
20 |         'console_scripts': [
21 |             'duka = duka.main:main',
22 |         ],
23 |     },
24 |     classifiers=[
25 |         "Environment :: Console",
26 |         "Programming Language :: Python :: 3.5",
27 |         "Programming Language :: Python :: 3 :: Only",
28 |         "Development Status :: 4 - Beta",
29 |         "Environment :: Other Environment",
30 |         "Intended Audience :: Developers",
31 |         "Intended Audience :: Financial and Insurance Industry",
32 |         "License :: OSI Approved :: MIT License",
33 |         "Operating System :: OS Independent",
34 |     ],
35 | )
36 | 
37 | 


--------------------------------------------------------------------------------