├── tests
├── __init__.py
├── conftest.py
├── test_cli.py
├── test_utils.py
└── test_api.py
├── .coveragerc
├── binance_history
├── config.py
├── exceptions.py
├── constants.py
├── __init__.py
├── cli.py
├── api.py
└── utils.py
├── .gitignore
├── Makefile
├── docs
├── source
│ ├── index.rst
│ └── conf.py
├── Makefile
└── make.bat
├── .github
└── workflows
│ ├── publish.yml
│ └── tests.yml
├── .readthedocs.yml
├── LICENSE
├── pyproject.toml
└── README.rst
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | concurrency=multiprocessing
--------------------------------------------------------------------------------
/binance_history/config.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | CACHE_DIR = Path.home() / ".binance-history"
4 |
--------------------------------------------------------------------------------
/binance_history/exceptions.py:
--------------------------------------------------------------------------------
1 | class NetworkError(Exception):
2 | pass
3 |
4 |
5 | class DataNotFound(Exception):
6 | pass
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .vscode/
3 | __pycache__/
4 | dist/
5 |
6 | .DS_Store
7 |
8 | .coverage
9 | .coverage.*
10 | coverage.xml
11 |
12 | docs/build
13 |
14 | t/
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | coverage:
2 | pytest --cov && coverage html
3 |
4 | clean:
5 | rm .coverage && rm -rf htmlcov/ && rm -rf dist/ && rm -rf .pytest_cache/ && rm -rf tests/.pytest_cache
6 |
--------------------------------------------------------------------------------
/binance_history/constants.py:
--------------------------------------------------------------------------------
1 | TIMEFRAMES = [
2 | "1s",
3 | "1m",
4 | "3m",
5 | "5m",
6 | "15m",
7 | "30m",
8 | "1h",
9 | "2h",
10 | "4h",
11 | "6h",
12 | "8h",
13 | "12h",
14 | "1d",
15 | "3d",
16 | "1w",
17 | "1M",
18 | ]
19 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import coverage
2 | import pytest
3 |
4 | from binance_history import config
5 |
6 | coverage.process_startup()
7 |
8 |
9 | @pytest.fixture(scope="session", autouse=True)
10 | def set_cache_dir(tmp_path_factory):
11 | config.CACHE_DIR = tmp_path_factory.getbasetemp()
12 |
--------------------------------------------------------------------------------
/binance_history/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from .api import fetch_klines, fetch_agg_trades, fetch_data
4 |
5 | from importlib import metadata
6 |
7 | __version__ = metadata.version(__package__)
8 |
9 | del metadata, sys
10 |
11 | __all__ = ["fetch_klines", "fetch_agg_trades", "fetch_data"]
12 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to binance-history's documentation!
2 | ===========================================
3 |
4 | **binance-history** is a Python library for fetching cryptocurrency klines
5 | or trades data. It downloads data from the `Binance Public Data
6 | `_.
7 |
8 | API
9 | ---
10 |
11 | .. automodule:: binance_history
12 | :members:
13 | :member-order: bysource
14 |
15 |
16 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish
2 |
3 | on:
4 | push:
5 | tags:
6 | - '*.*.*'
7 | jobs:
8 | Publish:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Checkout code
12 | uses: actions/checkout@v3
13 | - name: Install Poetry
14 | run: pipx install poetry
15 | - name: Build
16 | run: poetry build
17 | - name: Check distributions
18 | run: ls -ahl dist
19 | - name: Publish to PyPI
20 | env:
21 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
22 | run: poetry publish
23 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: "ubuntu-22.04"
5 | tools:
6 | python: "3.11"
7 | jobs:
8 | post_create_environment:
9 | # Install poetry
10 | # https://python-poetry.org/docs/#installing-manually
11 | - pip install poetry
12 | # Tell poetry to not use a virtual environment
13 | - poetry config virtualenvs.create false
14 | # Install dependencies with 'docs' dependency group
15 | # https://python-poetry.org/docs/managing-dependencies/#dependency-groups
16 | - poetry install --only main --only docs
17 |
18 | sphinx:
19 | configuration: docs/source/conf.py
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 | on:
3 | push:
4 | paths-ignore:
5 | - 'README.rst'
6 | branches:
7 | - master
8 | pull_request:
9 | paths-ignore:
10 | - 'README.rst'
11 | branches:
12 | - '**'
13 | jobs:
14 | Tests:
15 | name: "Python ${{ matrix.python-version }}"
16 | runs-on: ubuntu-latest
17 |
18 | strategy:
19 | matrix:
20 | python-version: ["3.8", "3.9", "3.10", "3.11"]
21 |
22 | steps:
23 | - uses: actions/checkout@v3
24 | - name: Install Poetry
25 | run: pipx install poetry
26 | - name: Set up Python
27 | uses: actions/setup-python@v4
28 | with:
29 | python-version: "${{ matrix.python-version }}"
30 | cache: 'poetry'
31 | - name: Install dependencies
32 | run: poetry install --without dev --without docs -vvv
33 | - name: Run tests
34 | run: |
35 | poetry run pytest --cov
36 | poetry run coverage xml
37 | - name: Upload coverage
38 | uses: codecov/codecov-action@v3
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Meng Xiangzhuo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "binance-history"
3 | version = "0.1.7"
4 | description = "Fetch binance historical klines or trades easily."
5 | authors = ["Meng Xiangzhuo "]
6 | readme = "README.rst"
7 | packages = [{ include = "binance_history" }]
8 | license = "MIT"
9 | homepage = "https://github.com/xzmeng/binance-history"
10 | repository = "https://github.com/xzmeng/binance-history"
11 | documentation = "https://binance-history.readthedocs.io/"
12 | keywords = [
13 | "crypto",
14 | "cryptocurrency",
15 | "backtest",
16 | "backtesting",
17 | "BTC",
18 | "binance",
19 | ]
20 |
21 | [tool.poetry.dependencies]
22 | python = "^3.8"
23 | httpx = "^0.23.1"
24 | pandas = "^1.5.2"
25 | loguru = "^0.6.0"
26 | pendulum = "^2.1.2"
27 |
28 |
29 | [tool.poetry.group.test.dependencies]
30 | pytest = "^7.2.0"
31 | coverage = "^7.0.1"
32 | pytest-cov = "^4.0.0"
33 |
34 |
35 | [tool.poetry.group.dev.dependencies]
36 | jupyter = "^1.0.0"
37 | black = { extras = ["d"], version = "^22.12.0" }
38 |
39 |
40 | [tool.poetry.group.cli.dependencies]
41 | click = "^8.1.3"
42 | openpyxl = "^3.0.10"
43 |
44 |
45 | [tool.poetry.group.docs.dependencies]
46 | sphinx = "^5.0.0"
47 | furo = "^2022.12.7"
48 |
49 | [tool.poetry.scripts]
50 | bh = { callable = "binance_history.cli:main", extras = ["cli"] }
51 |
52 | [build-system]
53 | requires = ["poetry-core"]
54 | build-backend = "poetry.core.masonry.api"
55 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # For the full list of built-in configuration values, see the documentation:
4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
5 |
6 | # -- Project information -----------------------------------------------------
7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
8 | import pathlib
9 | import sys
10 |
11 | sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix())
12 | from binance_history import __version__
13 |
14 | project = "binance-history"
15 | copyright = "2022, Meng Xiangzhuo"
16 | author = "Meng Xiangzhuo"
17 | release = __version__
18 |
19 | # -- General configuration ---------------------------------------------------
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
21 |
22 | extensions = [
23 | "sphinx.ext.autodoc",
24 | "sphinx.ext.autosummary",
25 | "sphinx.ext.intersphinx",
26 | ]
27 | autodoc_typehints = "description"
28 | intersphinx_mapping = {
29 | "python": ("https://docs.python.org/3/", None),
30 | "pandas": ("https://pandas.pydata.org/docs/", None),
31 | }
32 |
33 | templates_path = ["_templates"]
34 | exclude_patterns = []
35 |
36 | # -- Options for HTML output -------------------------------------------------
37 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
38 |
39 | html_theme = "furo"
40 | html_static_path = ["_static"]
41 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 | import pandas as pd
4 |
5 |
6 | def test_cli_fetch_klines(tmp_path):
7 | csv_path = tmp_path / "a.csv"
8 | json_path = tmp_path / "a.json"
9 | excel_path = tmp_path / "a.xlsx"
10 | non_support_path = tmp_path / "a.sb"
11 |
12 | cmd = (
13 | "bh --data-type klines --asset-type spot --symbol BTCUSDT --start 2022-1-2"
14 | " --end 2022-1-10 --timeframe 15m --tz Asia/Shanghai --output-path {}"
15 | )
16 | subprocess.run(cmd.format(csv_path), shell=True, check=True)
17 | assert csv_path.exists()
18 |
19 | df = pd.read_csv(csv_path, parse_dates=True, index_col=0)
20 | assert df.index[0].day == 2
21 | assert df.index[-1].day == 10
22 |
23 | subprocess.run(cmd.format(json_path), shell=True, check=True)
24 | assert json_path.exists()
25 |
26 | subprocess.run(cmd.format(excel_path), shell=True, check=True)
27 | assert excel_path.exists()
28 |
29 | subprocess.run(cmd.format(excel_path), shell=True, check=True)
30 | assert excel_path.exists()
31 |
32 | process = subprocess.run(
33 | cmd.format(non_support_path),
34 | shell=True,
35 | capture_output=True,
36 | text=True,
37 | )
38 | assert process.returncode != 0
39 | assert "not support extension name: sb" in process.stderr
40 |
41 |
42 | def test_cli_fetch_agg_trades(tmp_path):
43 | csv_path = tmp_path / "a.csv"
44 |
45 | cmd = (
46 | "bh --data-type aggTrades --asset-type spot --symbol ETCBTC --start 2022-1-2"
47 | " --end '2022-1-4 12:00' --tz Asia/Shanghai --output-path {}"
48 | )
49 | subprocess.run(cmd.format(csv_path), shell=True, check=True)
50 | assert csv_path.exists()
51 |
52 | df = pd.read_csv(csv_path, parse_dates=True, index_col=0)
53 | assert df.index[0].day == 2
54 | assert df.index[-1].day == 4
55 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas import Timestamp, Timedelta
3 |
4 | from binance_history.utils import gen_data_url, gen_dates
5 |
6 |
7 | def test_gen_data_url():
8 | assert (
9 | gen_data_url(
10 | "klines", "spot", "monthly", "BTCUSDT", Timestamp("2022-11"), timeframe="1m"
11 | )
12 | == "https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2022-11.zip"
13 | )
14 | assert (
15 | gen_data_url(
16 | "klines",
17 | "spot",
18 | "daily",
19 | "BTCUSDT",
20 | Timestamp("2022-11-01"),
21 | timeframe="1m",
22 | )
23 | == "https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-11-01.zip"
24 | )
25 | assert (
26 | gen_data_url("aggTrades", "spot", "monthly", "BTCUSDT", Timestamp("2022-11"))
27 | == "https://data.binance.vision/data/spot/monthly/aggTrades/BTCUSDT/BTCUSDT-aggTrades-2022-11.zip"
28 | )
29 | with pytest.raises(ValueError):
30 | gen_data_url("aggTrades", "spot", "annual", "BTCUSDT", Timestamp("2022-11"))
31 | with pytest.raises(ValueError):
32 | gen_data_url("klines", "spot", "daily", "BTCUSDT", Timestamp("2022-11"))
33 | with pytest.raises(ValueError):
34 | gen_data_url("trades", "spot", "daily", "BTCUSDT", Timestamp("2022-11"))
35 |
36 |
37 | def test_gen_dates():
38 | months, days = gen_dates(
39 | "klines", "spot", "BTCUSDT", Timestamp("2022-2-10"), Timestamp("2022-3-5"), "1m"
40 | )
41 | assert months == [Timestamp("2022-2"), Timestamp("2022-3")]
42 | assert days == []
43 |
44 | months, days = gen_dates(
45 | "klines", "spot", "BTCUSDT", Timestamp("2022-1-31"), Timestamp("2022-3-5"), "1m"
46 | )
47 | assert months == [Timestamp("2022-1"), Timestamp("2022-2"), Timestamp("2022-3")]
48 |
49 | with pytest.raises(ValueError):
50 | gen_dates(
51 | "klines",
52 | "spot",
53 | "BTCUSDT",
54 | Timestamp("2022-1-1"),
55 | Timestamp("2021-12-1"),
56 | "1m",
57 | )
58 |
--------------------------------------------------------------------------------
/binance_history/cli.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import click
4 | from loguru import logger
5 |
6 | from .api import fetch_data
7 | from .constants import TIMEFRAMES
8 | from .utils import unify_datetime
9 |
10 |
11 | @click.command()
12 | @click.option(
13 | "--symbol", required=True, help="The binance market pair name, e.g. BTCUSDT"
14 | )
15 | @click.option("--start", required=True, help="The start datetime, e.g. '2022-1-2 1:10'")
16 | @click.option("--end", required=True, help="The end datetime, e.g. '2022-1-25 2:20")
17 | @click.option(
18 | "--data-type",
19 | type=click.Choice(["klines", "aggTrades"]),
20 | default="klines",
21 | help="choose klines or aggTrades to download, default to 'klines'",
22 | )
23 | @click.option(
24 | "--asset-type",
25 | type=click.Choice(["spot", "futures/um", "futures/cm"]),
26 | default="spot",
27 | help="choose spot or futures data, default to 'spot'",
28 | )
29 | @click.option(
30 | "--timeframe",
31 | default="15m",
32 | type=click.Choice(TIMEFRAMES),
33 | help="The timeframe of klines, default to '15m', can be omitted if --data-type is not 'klines'",
34 | )
35 | @click.option(
36 | "--tz",
37 | default=None,
38 | help="The tz database name of time zone, use your local time zone if omitted'",
39 | )
40 | @click.option(
41 | "--output-path",
42 | help="The path you want to save the downloaded data, support format: [csv, json, xlsx], e.g. a.xlsx",
43 | required=True,
44 | )
45 | @logger.catch(onerror=lambda _: sys.exit(1))
46 | def main(data_type, asset_type, symbol, timeframe, start, end, tz, output_path):
47 | df = fetch_data(
48 | data_type=data_type,
49 | asset_type=asset_type,
50 | symbol=symbol,
51 | timeframe=timeframe,
52 | start=unify_datetime(start),
53 | end=unify_datetime(end),
54 | tz=tz,
55 | )
56 | ext = output_path.split(".")[-1]
57 |
58 | if ext == "csv":
59 | df.to_csv(output_path)
60 | elif ext == "json":
61 | df.to_json(output_path, orient="records")
62 | elif ext == "xlsx":
63 | df.index = df.index.tz_convert(None)
64 | if "close_datetime" in df.columns:
65 | df["close_datetime"] = df.close_datetime.dt.tz_convert(None)
66 | df.to_excel(output_path)
67 | else:
68 | raise ValueError(f"not support extension name: {ext}")
69 |
--------------------------------------------------------------------------------
/binance_history/api.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 | import pandas as pd
4 | import pendulum
5 | from pandas import DataFrame
6 |
7 | from .utils import gen_dates, get_data, unify_datetime
8 | from typing import Optional, Union
9 |
10 |
11 | def fetch_klines(
12 | symbol: str,
13 | start: Union[str, datetime],
14 | end: Union[str, datetime],
15 | timeframe: str = "1m",
16 | asset_type: str = "spot",
17 | tz: Optional[str] = None,
18 | ) -> DataFrame:
19 | """convinience function by calling ``fetch_data``"""
20 |
21 | return fetch_data(
22 | data_type="klines",
23 | asset_type=asset_type,
24 | symbol=symbol,
25 | start=start,
26 | end=end,
27 | timeframe=timeframe,
28 | tz=tz,
29 | )
30 |
31 |
32 | def fetch_agg_trades(
33 | symbol: str,
34 | start: Union[str, datetime],
35 | end: Union[str, datetime],
36 | asset_type: str = "spot",
37 | tz: Optional[str] = None,
38 | ) -> DataFrame:
39 | """convinience function by calling ``fetch_data``"""
40 |
41 | return fetch_data(
42 | data_type="aggTrades",
43 | asset_type=asset_type,
44 | symbol=symbol,
45 | start=start,
46 | end=end,
47 | tz=tz,
48 | )
49 |
50 |
51 | def fetch_data(
52 | symbol: str,
53 | asset_type: str,
54 | data_type: str,
55 | start: datetime,
56 | end: datetime,
57 | tz: Optional[str] = None,
58 | timeframe: Optional[str] = None,
59 | ) -> DataFrame:
60 | """
61 | :param symbol: The binance market pair name. e.g. ``'BTCUSDT'``.
62 | :param start: The start datetime of requested data. If it's an instance of ``datetime.datetime``,
63 | :param asset_type: The asset type of requested data. It must be one of ``'spot'``, ``'futures/um'``, ``'futures/cm'``.
64 | :param data_type: The type of requested data. It must be one of ``'klines'``, ``'agg_trades'``.
65 | it's timezone is ignored. If it's a ``str``, it should be parsed by
66 | `dateutil `_, e.g. ``"2022-1-1 8:10"``.
67 | :param end: The end datetime of requested data. If it's an instance of ``datetime.datetime``,
68 | it's timezone is ignored. If it's a ``str``, it should be parsed by
69 | `dateutil `_, e.g. ``"2022-1-2 8:10"``.
70 | :param tz: Timezone of ``start``, ``end``, and the open/close datetime of the returned dataframe.
71 | It should be a time zone name of `tz database `_, e.g. "Asia/Shanghai".
72 | Your can find a full list of available time zone names in
73 | `List of tz database time zones `_.
74 | :param timeframe: The kline interval. e.g. "1m". see ``binance_history.constants.TIMEFRAMES``
75 | to see the full list of available intervals.
76 | :return: A pandas dataframe with columns `open`, `high`, `low`, `close`, `volume`, `trades`, `close_datetime`.
77 | the dataframe's index is the open datetime of klines, the timezone of the datetime is set by ``tz``,
78 | if it is None, your local timezone will be used.
79 | """
80 | if tz is None:
81 | tz = pendulum.local_timezone().name
82 |
83 | start, end = unify_datetime(start), unify_datetime(end)
84 |
85 | start, end = pd.Timestamp(start, tz=tz), pd.Timestamp(end, tz=tz)
86 |
87 | symbol = symbol.upper().replace("/", "")
88 |
89 | months, days = gen_dates(
90 | data_type,
91 | asset_type,
92 | symbol,
93 | start.tz_convert(None),
94 | end.tz_convert(None),
95 | timeframe=timeframe,
96 | )
97 | monthly_dfs = [
98 | get_data(data_type, asset_type, "monthly", symbol, dt, tz, timeframe)
99 | for dt in months
100 | ]
101 | daily_dfs = [
102 | get_data(data_type, asset_type, "daily", symbol, dt, tz, timeframe)
103 | for dt in days
104 | ]
105 | df = pd.concat(monthly_dfs + daily_dfs)
106 | return df.loc[start:end]
107 |
--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | import pendulum
4 | import pytest
5 | from pandas import Timestamp, Timedelta
6 |
7 | from binance_history import fetch_klines, fetch_agg_trades
8 |
9 |
10 | @pytest.mark.parametrize(
11 | ("symbol", "asset_type"),
12 | [
13 | ("BTCUSDT", "spot"),
14 | ("BTCUSDT", "futures/um"),
15 | ("BTCUSD_PERP", "futures/cm"),
16 | ],
17 | )
18 | @pytest.mark.parametrize(
19 | ("start", "end", "tz"),
20 | [
21 | ("2022-1-2", "2022-1-20", "Asia/Shanghai"),
22 | (
23 | pendulum.datetime(2022, 1, 2, tz="Europe/Paris"),
24 | pendulum.datetime(2022, 1, 20, tz="Europe/Paris"),
25 | "Asia/Shanghai",
26 | ),
27 | (
28 | datetime.datetime(2022, 1, 2),
29 | datetime.datetime(2022, 1, 20),
30 | "Asia/Shanghai",
31 | ),
32 | ],
33 | )
34 | @pytest.mark.parametrize("timeframe", ["1m"])
35 | def test_fetch_klines_1m_one_month(symbol, start, end, timeframe, asset_type, tz):
36 | klines = fetch_klines(
37 | symbol=symbol,
38 | start=start,
39 | end=end,
40 | timeframe=timeframe,
41 | asset_type=asset_type,
42 | tz=tz,
43 | )
44 |
45 | first_opentime = Timestamp("2022-1-2", tz="Asia/Shanghai")
46 | first_closetime = Timestamp("2022-1-2 0:0:59.999", tz="Asia/Shanghai")
47 | last_opentime = Timestamp("2022-1-20", tz="Asia/Shanghai")
48 | last_closetime = Timestamp("2022-1-20 0:0:59.999", tz="Asia/Shanghai")
49 |
50 | assert klines.index[0] == first_opentime
51 | assert klines.close_datetime[0] == first_closetime
52 | assert klines.index[-1] == last_opentime
53 | assert klines.close_datetime[-1] == last_closetime
54 |
55 |
56 | def test_fetch_klines_1m_many_months():
57 | symbol = "BTCUSDT"
58 | start = "2022-1-1 5:29"
59 | end = "2022-2-3 11:31"
60 | tz = "Asia/Shanghai"
61 |
62 | klines = fetch_klines(
63 | symbol=symbol,
64 | start=start,
65 | end=end,
66 | tz=tz,
67 | )
68 |
69 | assert klines.index[0] == Timestamp(start, tz=tz)
70 | assert klines.close_datetime[0] == Timestamp("2022-1-1 5:29:59.999", tz=tz)
71 | assert klines.index[-1] == Timestamp(end, tz=tz)
72 | assert klines.close_datetime[-1] == Timestamp("2022-2-3 11:31:59.999", tz=tz)
73 |
74 |
75 | def test_fetch_klines_15m_many_months():
76 | symbol = "BTCUSDT"
77 | start = "2022-1-1 5:29"
78 | end = "2022-2-3 11:31"
79 | tz = "Asia/Shanghai"
80 |
81 | klines = fetch_klines(
82 | symbol=symbol,
83 | start=start,
84 | end=end,
85 | timeframe="15m",
86 | tz=tz,
87 | )
88 |
89 | assert klines.index[0] == Timestamp("2022-1-1 5:30", tz=tz)
90 | assert klines.close_datetime[0] == Timestamp("2022-1-1 5:44:59.999", tz=tz)
91 | assert klines.index[-1] == Timestamp("2022-2-3 11:30", tz=tz)
92 | assert klines.close_datetime[-1] == Timestamp("2022-2-3 11:44:59.999", tz=tz)
93 |
94 |
95 | def test_fetch_klines_1h_recent_days():
96 | symbol = "BTCUSDT"
97 | start = Timestamp("2022-11-2 5:29")
98 | end = Timestamp.now() - Timedelta(days=3)
99 | tz = "Asia/Shanghai"
100 |
101 | klines = fetch_klines(
102 | symbol=symbol,
103 | start=start,
104 | end=end,
105 | timeframe="1h",
106 | tz=tz,
107 | )
108 |
109 | assert klines.index[0] == Timestamp("2022-11-2 6:00", tz=tz)
110 | assert klines.close_datetime[0] == Timestamp("2022-11-2 6:59:59.999", tz=tz)
111 | assert klines.index[-1] == Timestamp(
112 | year=end.year, month=end.month, day=end.day, hour=end.hour, tz=tz
113 | )
114 | assert klines.close_datetime[-1] == Timestamp(
115 | year=end.year,
116 | month=end.month,
117 | day=end.day,
118 | hour=end.hour,
119 | minute=59,
120 | second=59,
121 | microsecond=999000,
122 | tz=tz,
123 | )
124 |
125 |
126 | @pytest.mark.parametrize(
127 | ("start", "end", "tz"),
128 | [
129 | ("2022-10-2", "2022-10-19 23:59:59", "Asia/Shanghai"),
130 | ],
131 | )
132 | @pytest.mark.parametrize(
133 | ("symbol", "asset_type"), [("ETCBTC", "spot"), ("LTCBUSD", "futures/um")]
134 | )
135 | def test_fetch_agg_trades_one_month(symbol, start, end, tz, asset_type):
136 | agg_trades = fetch_agg_trades(symbol, start, end, asset_type, tz)
137 | assert agg_trades.index[0].day == 2
138 | assert agg_trades.index[-1].day == 19
139 |
140 |
141 | def test_wrong_datetime_type():
142 | with pytest.raises(TypeError):
143 | fetch_klines("btcusdt", 3, 4)
144 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Binance History
3 | ===============
4 |
5 | .. image:: https://img.shields.io/pypi/v/binance-history
6 | :target: https://pypi.org/project/binance-history/
7 | :alt: pypi version
8 |
9 | .. image:: https://img.shields.io/github/license/xzmeng/binance-history
10 | :target: https://github.com/xzmeng/binance-history/blob/master/LICENSE
11 | :alt: License - MIT
12 |
13 | .. image:: https://img.shields.io/codecov/c/github/xzmeng/binance-history
14 | :target: https://codecov.io/github/xzmeng/binance-history
15 | :alt: Coverage
16 |
17 | .. image:: https://img.shields.io/github/actions/workflow/status/xzmeng/binance-history/tests.yml?label=tests
18 | :target: https://github.com/xzmeng/binance-history/actions
19 | :alt: Tests Status
20 |
21 | .. image:: https://readthedocs.org/projects/binance-history/badge/?version=latest
22 | :target: https://binance-history.readthedocs.io/en/latest/?badge=latest
23 | :alt: Documentation Status
24 |
25 | .. image:: https://img.shields.io/pypi/pyversions/binance-history
26 | :alt: PyPI - Python Version
27 |
28 | Fetch binance public data easily.
29 |
30 | Supports Python **3.8+**.
31 |
32 | Installation
33 | ============
34 |
35 | .. code-block:: bash
36 |
37 | $ pip install binance-history
38 |
39 | Usage
40 | =====
41 | `API docs `_
42 |
43 | Klines
44 | ------
45 |
46 | .. code-block:: python
47 |
48 | >>> import binance_history as bh
49 | >>> klines = bh.fetch_klines(
50 | ... symbol="BTCUSDT",
51 | ... timeframe="1m",
52 | ... start="2022-12-14",
53 | ... end="2022-12-24",
54 | ... )
55 | open high low close volume quote_volume trades close_datetime
56 | open_datetime
57 | 2022-12-14 00:00:00+08:00 17753.54 17768.41 17752.78 17766.99 240.82918 4.277685e+06 5241 2022-12-14 00:00:59.999000+08:00
58 | 2022-12-14 00:01:00+08:00 17766.99 17786.40 17764.37 17781.81 311.47670 5.536668e+06 6278 2022-12-14 00:01:59.999000+08:00
59 | 2022-12-14 00:02:00+08:00 17781.81 17790.54 17771.44 17785.37 372.12992 6.616562e+06 6911 2022-12-14 00:02:59.999000+08:00
60 | 2022-12-14 00:03:00+08:00 17786.23 17800.18 17774.63 17777.35 401.52223 7.142210e+06 6926 2022-12-14 00:03:59.999000+08:00
61 | 2022-12-14 00:04:00+08:00 17777.35 17785.98 17769.15 17781.93 218.03837 3.876373e+06 5519 2022-12-14 00:04:59.999000+08:00
62 | ... ... ... ... ... ... ... ... ...
63 | 2022-12-23 23:56:00+08:00 16850.22 16850.22 16839.55 16842.59 146.38906 2.465894e+06 4229 2022-12-23 23:56:59.999000+08:00
64 | 2022-12-23 23:57:00+08:00 16842.59 16846.22 16839.00 16840.99 86.95440 1.464495e+06 3152 2022-12-23 23:57:59.999000+08:00
65 | 2022-12-23 23:58:00+08:00 16840.99 16843.61 16827.28 16830.27 208.41471 3.508642e+06 4918 2022-12-23 23:58:59.999000+08:00
66 | 2022-12-23 23:59:00+08:00 16830.27 16836.66 16824.41 16832.16 154.10833 2.593717e+06 4502 2022-12-23 23:59:59.999000+08:00
67 | 2022-12-24 00:00:00+08:00 16832.15 16833.62 16828.42 16830.52 119.28572 2.007721e+06 3725 2022-12-24 00:00:59.999000+08:00
68 |
69 | [14401 rows x 8 columns]
70 |
71 | AggTrades
72 | ---------
73 |
74 | .. code-block:: python
75 |
76 | >>> bh.fetch_agg_trades(
77 | ... symbol="ETCBTC",
78 | ... start="2022-11 01:05",
79 | ... end="2022-11-25 3:20",
80 | ... tz="Europe/Paris"
81 | ... )
82 | price quantity is_buyer_maker
83 | datetime
84 | 2022-11-01 01:05:09.435000+01:00 0.001187 1.60 True
85 | 2022-11-01 01:05:17.639000+01:00 0.001186 29.56 True
86 | 2022-11-01 01:05:18.616000+01:00 0.001186 8.43 True
87 | 2022-11-01 01:05:18.621000+01:00 0.001186 37.31 True
88 | 2022-11-01 01:05:18.748000+01:00 0.001186 0.17 True
89 | ... ... ... ...
90 | 2022-11-25 03:19:18.317000+01:00 0.001199 5.00 False
91 | 2022-11-25 03:19:19.482000+01:00 0.001199 10.69 False
92 | 2022-11-25 03:19:23.270000+01:00 0.001199 7.55 True
93 | 2022-11-25 03:19:26.082000+01:00 0.001199 2.56 True
94 | 2022-11-25 03:19:40.375000+01:00 0.001199 2.20 False
95 |
96 |
97 | Command Line
98 | ------------
99 | **binance-history** comes with a command line interface,
100 | you need to install some extra dependencies to use it:
101 |
102 | .. code-block:: bash
103 |
104 | $ pip install 'binance-history[cli]'
105 |
106 |
107 | .. code-block:: bash
108 |
109 | $ bh --help
110 | Usage: bh [OPTIONS]
111 |
112 | Options:
113 | --symbol TEXT The binance market pair name, e.g. BTCUSDT
114 | [required]
115 | --start TEXT The start datetime, e.g. '2022-1-2 1:10'
116 | [required]
117 | --end TEXT The end datetime, e.g. '2022-1-25 2:20
118 | [required]
119 | --data-type [klines|aggTrades] choose klines or aggTrades to download,
120 | default to 'klines'
121 | --asset-type [spot|futures/um|futures/cm]
122 | choose spot or futures data, default to
123 | 'spot'
124 | --timeframe [1s|1m|3m|5m|15m|30m|1h|2h|4h|6h|8h|12h|1d|3d|1w|1M]
125 | The timeframe of klines, default to '15m',
126 | can be omitted if --data-type is not
127 | 'klines'
128 | --tz TEXT The tz database name of time zone, use your
129 | local time zone if omitted'
130 | --output-path TEXT The path you want to save the downloaded
131 | data, support format: [csv, json, xlsx],
132 | e.g. a.xlsx [required]
133 | --help Show this message and exit.
134 |
135 | $ bh --start 2022-1-5 --end 2022-1-7 --symbol ETCBTC --output-path a.xlsx
136 |
--------------------------------------------------------------------------------
/binance_history/utils.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import io
3 | import os
4 | import os.path
5 | import zipfile
6 | from pathlib import Path
7 | from typing import Optional, Union
8 | from urllib.parse import urlparse
9 |
10 | import httpx
11 | import pandas as pd
12 | import pendulum
13 | from pandas import Timestamp, DataFrame
14 |
15 | from . import config
16 | from .exceptions import NetworkError, DataNotFound
17 |
18 |
19 | def gen_data_url(
20 | data_type: str,
21 | asset_type: str,
22 | freq: str,
23 | symbol: str,
24 | dt: Timestamp,
25 | timeframe: Optional[str] = None,
26 | ):
27 | url: str
28 | date_str: str
29 |
30 | if freq == "monthly":
31 | date_str = dt.strftime("%Y-%m")
32 | elif freq == "daily":
33 | date_str = dt.strftime("%Y-%m-%d")
34 | else:
35 | raise ValueError(f"freq must be 'monthly' or 'daily', but got '{freq}'")
36 |
37 | if data_type == "klines":
38 | if timeframe is None:
39 | raise ValueError("'timeframe' must not be None when data_type is 'klines'")
40 | url = (
41 | f"https://data.binance.vision/data/{asset_type}/{freq}/{data_type}/{symbol}/{timeframe}"
42 | f"/{symbol}-{timeframe}-{date_str}.zip"
43 | )
44 | elif data_type == "aggTrades":
45 | url = (
46 | f"https://data.binance.vision/data/{asset_type}/{freq}/{data_type}/{symbol}"
47 | f"/{symbol}-{data_type}-{date_str}.zip"
48 | )
49 | else:
50 | raise ValueError(f"data_type must be 'klines', but got '{data_type}'")
51 | return url
52 |
53 |
54 | def unify_datetime(input: Union[str, datetime.datetime]) -> datetime.datetime:
55 | if isinstance(input, str):
56 | return pendulum.parse(input, strict=False).replace(tzinfo=None)
57 | elif isinstance(input, datetime.datetime):
58 | return input.replace(tzinfo=None)
59 | else:
60 | raise TypeError(input)
61 |
62 |
63 | def exists_month(month_url):
64 | try:
65 | resp = httpx.head(month_url)
66 | except (httpx.TimeoutException, httpx.NetworkError) as e:
67 | raise NetworkError(e)
68 |
69 | if resp.status_code == 200:
70 | return True
71 | elif resp.status_code == 404:
72 | return False
73 | else:
74 | raise NetworkError(resp.status_code)
75 |
76 |
77 | def gen_dates(
78 | data_type: str,
79 | asset_type: str,
80 | symbol: str,
81 | start: Timestamp,
82 | end: Timestamp,
83 | timeframe: Optional[str] = None,
84 | ):
85 | assert start.tz is None and end.tz is None
86 |
87 | if start > end:
88 | raise ValueError("start cannot be greater than end")
89 |
90 | months = pd.date_range(
91 | Timestamp(start.year, start.month, 1),
92 | end,
93 | freq="MS",
94 | ).to_list()
95 |
96 | assert len(months) > 0
97 |
98 | last_month_url = gen_data_url(
99 | data_type, asset_type, "monthly", symbol, months[-1], timeframe=timeframe
100 | )
101 |
102 | if not exists_month(last_month_url):
103 | daily_month = months.pop()
104 | if len(months) > 1:
105 | second_last_month_url = gen_data_url(
106 | data_type,
107 | asset_type,
108 | "monthly",
109 | symbol,
110 | months[-1],
111 | timeframe=timeframe,
112 | )
113 | if not exists_month(second_last_month_url):
114 | daily_month = months.pop()
115 |
116 | days = pd.date_range(
117 | Timestamp(daily_month.year, daily_month.month, 1),
118 | end,
119 | freq="D",
120 | ).to_list()
121 | else:
122 | days = []
123 |
124 | return months, days
125 |
126 |
127 | def get_data(
128 | data_type: str,
129 | asset_type: str,
130 | freq: str,
131 | symbol: str,
132 | dt: Timestamp,
133 | data_tz: str,
134 | timeframe: Optional[str] = None,
135 | ) -> DataFrame:
136 | if data_type == "klines":
137 | assert timeframe is not None
138 |
139 | url = gen_data_url(data_type, asset_type, freq, symbol, dt, timeframe)
140 |
141 | df = load_data_from_disk(url)
142 | if df is None:
143 | df = download_data(data_type, data_tz, url)
144 | save_data_to_disk(url, df)
145 | return df
146 |
147 |
148 | def download_data(data_type: str, data_tz: str, url: str) -> DataFrame:
149 | assert data_type in ["klines", "aggTrades"]
150 |
151 | try:
152 | resp = httpx.get(url)
153 | except (httpx.TimeoutException, httpx.NetworkError) as e:
154 | raise NetworkError(e)
155 |
156 | if resp.status_code == 200:
157 | pass
158 | elif resp.status_code == 404:
159 | raise DataNotFound(url)
160 | else:
161 | raise NetworkError(url)
162 |
163 | if data_type == "klines":
164 | return load_klines(data_tz, resp.content)
165 | elif data_type == "aggTrades":
166 | return load_agg_trades(data_tz, resp.content)
167 |
168 |
169 | def load_klines(data_tz: str, content: bytes) -> DataFrame:
170 | with zipfile.ZipFile(io.BytesIO(content)) as zipf:
171 | csv_name = zipf.namelist()[0]
172 | with zipf.open(csv_name, "r") as csvfile:
173 | df = pd.read_csv(
174 | csvfile,
175 | usecols=range(9),
176 | header=None,
177 | names=[
178 | "open_ms",
179 | "open",
180 | "high",
181 | "low",
182 | "close",
183 | "volume",
184 | "close_ms",
185 | "quote_volume",
186 | "trades",
187 | ],
188 | )
189 | df["open_datetime"] = pd.to_datetime(
190 | df.open_ms, unit="ms", utc=True
191 | ).dt.tz_convert(data_tz)
192 | df["close_datetime"] = pd.to_datetime(
193 | df.close_ms, unit="ms", utc=True
194 | ).dt.tz_convert(data_tz)
195 | del df["open_ms"]
196 | del df["close_ms"]
197 | df.set_index("open_datetime", inplace=True)
198 | return df
199 |
200 |
201 | def load_agg_trades(data_tz: str, content: bytes) -> DataFrame:
202 | with zipfile.ZipFile(io.BytesIO(content)) as zipf:
203 | csv_name = zipf.namelist()[0]
204 | with zipf.open(csv_name, "r") as csvfile:
205 | df = pd.read_csv(
206 | csvfile,
207 | header=0,
208 | usecols=[1, 2, 5, 6],
209 | names=["price", "quantity", "timestamp", "is_buyer_maker"],
210 | )
211 | df["datetime"] = pd.to_datetime(
212 | df.timestamp, unit="ms", utc=True
213 | ).dt.tz_convert(data_tz)
214 | del df["timestamp"]
215 | df.set_index("datetime", inplace=True)
216 | return df
217 |
218 |
219 | def get_local_data_path(url: str) -> Path:
220 | path = urlparse(url).path
221 | return config.CACHE_DIR / path[1:]
222 |
223 |
224 | def save_data_to_disk(url: str, df: DataFrame) -> None:
225 | path = get_local_data_path(url)
226 | path.parent.mkdir(parents=True, exist_ok=True)
227 | df.to_pickle(path)
228 |
229 |
230 | def load_data_from_disk(url: str) -> Union[DataFrame, None]:
231 | path = get_local_data_path(url)
232 | if os.path.exists(path):
233 | return pd.read_pickle(path)
234 | else:
235 | return None
236 |
--------------------------------------------------------------------------------