├── tests ├── __init__.py ├── conftest.py ├── test_cli.py ├── test_utils.py └── test_api.py ├── .coveragerc ├── binance_history ├── config.py ├── exceptions.py ├── constants.py ├── __init__.py ├── cli.py ├── api.py └── utils.py ├── .gitignore ├── Makefile ├── docs ├── source │ ├── index.rst │ └── conf.py ├── Makefile └── make.bat ├── .github └── workflows │ ├── publish.yml │ └── tests.yml ├── .readthedocs.yml ├── LICENSE ├── pyproject.toml └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | concurrency=multiprocessing -------------------------------------------------------------------------------- /binance_history/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | CACHE_DIR = Path.home() / ".binance-history" 4 | -------------------------------------------------------------------------------- /binance_history/exceptions.py: -------------------------------------------------------------------------------- 1 | class NetworkError(Exception): 2 | pass 3 | 4 | 5 | class DataNotFound(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | __pycache__/ 4 | dist/ 5 | 6 | .DS_Store 7 | 8 | .coverage 9 | .coverage.* 10 | coverage.xml 11 | 12 | docs/build 13 | 14 | t/ -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | coverage: 2 | pytest --cov && coverage html 3 | 4 | clean: 5 | rm .coverage && rm -rf htmlcov/ && rm -rf dist/ && rm -rf .pytest_cache/ && rm -rf tests/.pytest_cache 6 | -------------------------------------------------------------------------------- /binance_history/constants.py: -------------------------------------------------------------------------------- 1 | TIMEFRAMES = [ 2 | "1s", 3 | "1m", 4 | "3m", 5 | "5m", 6 | "15m", 7 | "30m", 8 | "1h", 9 | "2h", 10 | "4h", 11 | "6h", 12 | "8h", 13 | "12h", 14 | "1d", 15 | "3d", 16 | "1w", 17 | "1M", 18 | ] 19 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import coverage 2 | import pytest 3 | 4 | from binance_history import config 5 | 6 | coverage.process_startup() 7 | 8 | 9 | @pytest.fixture(scope="session", autouse=True) 10 | def set_cache_dir(tmp_path_factory): 11 | config.CACHE_DIR = tmp_path_factory.getbasetemp() 12 | -------------------------------------------------------------------------------- /binance_history/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from .api import fetch_klines, fetch_agg_trades, fetch_data 4 | 5 | from importlib import metadata 6 | 7 | __version__ = metadata.version(__package__) 8 | 9 | del metadata, sys 10 | 11 | __all__ = ["fetch_klines", "fetch_agg_trades", "fetch_data"] 12 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to binance-history's documentation! 2 | =========================================== 3 | 4 | **binance-history** is a Python library for fetching cryptocurrency klines 5 | or trades data. It downloads data from the `Binance Public Data 6 | `_. 7 | 8 | API 9 | --- 10 | 11 | .. automodule:: binance_history 12 | :members: 13 | :member-order: bysource 14 | 15 | 16 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*.*.*' 7 | jobs: 8 | Publish: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v3 13 | - name: Install Poetry 14 | run: pipx install poetry 15 | - name: Build 16 | run: poetry build 17 | - name: Check distributions 18 | run: ls -ahl dist 19 | - name: Publish to PyPI 20 | env: 21 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }} 22 | run: poetry publish 23 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-22.04" 5 | tools: 6 | python: "3.11" 7 | jobs: 8 | post_create_environment: 9 | # Install poetry 10 | # https://python-poetry.org/docs/#installing-manually 11 | - pip install poetry 12 | # Tell poetry to not use a virtual environment 13 | - poetry config virtualenvs.create false 14 | # Install dependencies with 'docs' dependency group 15 | # https://python-poetry.org/docs/managing-dependencies/#dependency-groups 16 | - poetry install --only main --only docs 17 | 18 | sphinx: 19 | configuration: docs/source/conf.py -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | paths-ignore: 5 | - 'README.rst' 6 | branches: 7 | - master 8 | pull_request: 9 | paths-ignore: 10 | - 'README.rst' 11 | branches: 12 | - '**' 13 | jobs: 14 | Tests: 15 | name: "Python ${{ matrix.python-version }}" 16 | runs-on: ubuntu-latest 17 | 18 | strategy: 19 | matrix: 20 | python-version: ["3.8", "3.9", "3.10", "3.11"] 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Install Poetry 25 | run: pipx install poetry 26 | - name: Set up Python 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: "${{ matrix.python-version }}" 30 | cache: 'poetry' 31 | - name: Install dependencies 32 | run: poetry install --without dev --without docs -vvv 33 | - name: Run tests 34 | run: | 35 | poetry run pytest --cov 36 | poetry run coverage xml 37 | - name: Upload coverage 38 | uses: codecov/codecov-action@v3 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Meng Xiangzhuo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "binance-history" 3 | version = "0.1.7" 4 | description = "Fetch binance historical klines or trades easily." 5 | authors = ["Meng Xiangzhuo "] 6 | readme = "README.rst" 7 | packages = [{ include = "binance_history" }] 8 | license = "MIT" 9 | homepage = "https://github.com/xzmeng/binance-history" 10 | repository = "https://github.com/xzmeng/binance-history" 11 | documentation = "https://binance-history.readthedocs.io/" 12 | keywords = [ 13 | "crypto", 14 | "cryptocurrency", 15 | "backtest", 16 | "backtesting", 17 | "BTC", 18 | "binance", 19 | ] 20 | 21 | [tool.poetry.dependencies] 22 | python = "^3.8" 23 | httpx = "^0.23.1" 24 | pandas = "^1.5.2" 25 | loguru = "^0.6.0" 26 | pendulum = "^2.1.2" 27 | 28 | 29 | [tool.poetry.group.test.dependencies] 30 | pytest = "^7.2.0" 31 | coverage = "^7.0.1" 32 | pytest-cov = "^4.0.0" 33 | 34 | 35 | [tool.poetry.group.dev.dependencies] 36 | jupyter = "^1.0.0" 37 | black = { extras = ["d"], version = "^22.12.0" } 38 | 39 | 40 | [tool.poetry.group.cli.dependencies] 41 | click = "^8.1.3" 42 | openpyxl = "^3.0.10" 43 | 44 | 45 | [tool.poetry.group.docs.dependencies] 46 | sphinx = "^5.0.0" 47 | furo = "^2022.12.7" 48 | 49 | [tool.poetry.scripts] 50 | bh = { callable = "binance_history.cli:main", extras = ["cli"] } 51 | 52 | [build-system] 53 | requires = ["poetry-core"] 54 | build-backend = "poetry.core.masonry.api" 55 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | import pathlib 9 | import sys 10 | 11 | sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix()) 12 | from binance_history import __version__ 13 | 14 | project = "binance-history" 15 | copyright = "2022, Meng Xiangzhuo" 16 | author = "Meng Xiangzhuo" 17 | release = __version__ 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = [ 23 | "sphinx.ext.autodoc", 24 | "sphinx.ext.autosummary", 25 | "sphinx.ext.intersphinx", 26 | ] 27 | autodoc_typehints = "description" 28 | intersphinx_mapping = { 29 | "python": ("https://docs.python.org/3/", None), 30 | "pandas": ("https://pandas.pydata.org/docs/", None), 31 | } 32 | 33 | templates_path = ["_templates"] 34 | exclude_patterns = [] 35 | 36 | # -- Options for HTML output ------------------------------------------------- 37 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 38 | 39 | html_theme = "furo" 40 | html_static_path = ["_static"] 41 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | import pandas as pd 4 | 5 | 6 | def test_cli_fetch_klines(tmp_path): 7 | csv_path = tmp_path / "a.csv" 8 | json_path = tmp_path / "a.json" 9 | excel_path = tmp_path / "a.xlsx" 10 | non_support_path = tmp_path / "a.sb" 11 | 12 | cmd = ( 13 | "bh --data-type klines --asset-type spot --symbol BTCUSDT --start 2022-1-2" 14 | " --end 2022-1-10 --timeframe 15m --tz Asia/Shanghai --output-path {}" 15 | ) 16 | subprocess.run(cmd.format(csv_path), shell=True, check=True) 17 | assert csv_path.exists() 18 | 19 | df = pd.read_csv(csv_path, parse_dates=True, index_col=0) 20 | assert df.index[0].day == 2 21 | assert df.index[-1].day == 10 22 | 23 | subprocess.run(cmd.format(json_path), shell=True, check=True) 24 | assert json_path.exists() 25 | 26 | subprocess.run(cmd.format(excel_path), shell=True, check=True) 27 | assert excel_path.exists() 28 | 29 | subprocess.run(cmd.format(excel_path), shell=True, check=True) 30 | assert excel_path.exists() 31 | 32 | process = subprocess.run( 33 | cmd.format(non_support_path), 34 | shell=True, 35 | capture_output=True, 36 | text=True, 37 | ) 38 | assert process.returncode != 0 39 | assert "not support extension name: sb" in process.stderr 40 | 41 | 42 | def test_cli_fetch_agg_trades(tmp_path): 43 | csv_path = tmp_path / "a.csv" 44 | 45 | cmd = ( 46 | "bh --data-type aggTrades --asset-type spot --symbol ETCBTC --start 2022-1-2" 47 | " --end '2022-1-4 12:00' --tz Asia/Shanghai --output-path {}" 48 | ) 49 | subprocess.run(cmd.format(csv_path), shell=True, check=True) 50 | assert csv_path.exists() 51 | 52 | df = pd.read_csv(csv_path, parse_dates=True, index_col=0) 53 | assert df.index[0].day == 2 54 | assert df.index[-1].day == 4 55 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pandas import Timestamp, Timedelta 3 | 4 | from binance_history.utils import gen_data_url, gen_dates 5 | 6 | 7 | def test_gen_data_url(): 8 | assert ( 9 | gen_data_url( 10 | "klines", "spot", "monthly", "BTCUSDT", Timestamp("2022-11"), timeframe="1m" 11 | ) 12 | == "https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2022-11.zip" 13 | ) 14 | assert ( 15 | gen_data_url( 16 | "klines", 17 | "spot", 18 | "daily", 19 | "BTCUSDT", 20 | Timestamp("2022-11-01"), 21 | timeframe="1m", 22 | ) 23 | == "https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-11-01.zip" 24 | ) 25 | assert ( 26 | gen_data_url("aggTrades", "spot", "monthly", "BTCUSDT", Timestamp("2022-11")) 27 | == "https://data.binance.vision/data/spot/monthly/aggTrades/BTCUSDT/BTCUSDT-aggTrades-2022-11.zip" 28 | ) 29 | with pytest.raises(ValueError): 30 | gen_data_url("aggTrades", "spot", "annual", "BTCUSDT", Timestamp("2022-11")) 31 | with pytest.raises(ValueError): 32 | gen_data_url("klines", "spot", "daily", "BTCUSDT", Timestamp("2022-11")) 33 | with pytest.raises(ValueError): 34 | gen_data_url("trades", "spot", "daily", "BTCUSDT", Timestamp("2022-11")) 35 | 36 | 37 | def test_gen_dates(): 38 | months, days = gen_dates( 39 | "klines", "spot", "BTCUSDT", Timestamp("2022-2-10"), Timestamp("2022-3-5"), "1m" 40 | ) 41 | assert months == [Timestamp("2022-2"), Timestamp("2022-3")] 42 | assert days == [] 43 | 44 | months, days = gen_dates( 45 | "klines", "spot", "BTCUSDT", Timestamp("2022-1-31"), Timestamp("2022-3-5"), "1m" 46 | ) 47 | assert months == [Timestamp("2022-1"), Timestamp("2022-2"), Timestamp("2022-3")] 48 | 49 | with pytest.raises(ValueError): 50 | gen_dates( 51 | "klines", 52 | "spot", 53 | "BTCUSDT", 54 | Timestamp("2022-1-1"), 55 | Timestamp("2021-12-1"), 56 | "1m", 57 | ) 58 | -------------------------------------------------------------------------------- /binance_history/cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import click 4 | from loguru import logger 5 | 6 | from .api import fetch_data 7 | from .constants import TIMEFRAMES 8 | from .utils import unify_datetime 9 | 10 | 11 | @click.command() 12 | @click.option( 13 | "--symbol", required=True, help="The binance market pair name, e.g. BTCUSDT" 14 | ) 15 | @click.option("--start", required=True, help="The start datetime, e.g. '2022-1-2 1:10'") 16 | @click.option("--end", required=True, help="The end datetime, e.g. '2022-1-25 2:20") 17 | @click.option( 18 | "--data-type", 19 | type=click.Choice(["klines", "aggTrades"]), 20 | default="klines", 21 | help="choose klines or aggTrades to download, default to 'klines'", 22 | ) 23 | @click.option( 24 | "--asset-type", 25 | type=click.Choice(["spot", "futures/um", "futures/cm"]), 26 | default="spot", 27 | help="choose spot or futures data, default to 'spot'", 28 | ) 29 | @click.option( 30 | "--timeframe", 31 | default="15m", 32 | type=click.Choice(TIMEFRAMES), 33 | help="The timeframe of klines, default to '15m', can be omitted if --data-type is not 'klines'", 34 | ) 35 | @click.option( 36 | "--tz", 37 | default=None, 38 | help="The tz database name of time zone, use your local time zone if omitted'", 39 | ) 40 | @click.option( 41 | "--output-path", 42 | help="The path you want to save the downloaded data, support format: [csv, json, xlsx], e.g. a.xlsx", 43 | required=True, 44 | ) 45 | @logger.catch(onerror=lambda _: sys.exit(1)) 46 | def main(data_type, asset_type, symbol, timeframe, start, end, tz, output_path): 47 | df = fetch_data( 48 | data_type=data_type, 49 | asset_type=asset_type, 50 | symbol=symbol, 51 | timeframe=timeframe, 52 | start=unify_datetime(start), 53 | end=unify_datetime(end), 54 | tz=tz, 55 | ) 56 | ext = output_path.split(".")[-1] 57 | 58 | if ext == "csv": 59 | df.to_csv(output_path) 60 | elif ext == "json": 61 | df.to_json(output_path, orient="records") 62 | elif ext == "xlsx": 63 | df.index = df.index.tz_convert(None) 64 | if "close_datetime" in df.columns: 65 | df["close_datetime"] = df.close_datetime.dt.tz_convert(None) 66 | df.to_excel(output_path) 67 | else: 68 | raise ValueError(f"not support extension name: {ext}") 69 | -------------------------------------------------------------------------------- /binance_history/api.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pandas as pd 4 | import pendulum 5 | from pandas import DataFrame 6 | 7 | from .utils import gen_dates, get_data, unify_datetime 8 | from typing import Optional, Union 9 | 10 | 11 | def fetch_klines( 12 | symbol: str, 13 | start: Union[str, datetime], 14 | end: Union[str, datetime], 15 | timeframe: str = "1m", 16 | asset_type: str = "spot", 17 | tz: Optional[str] = None, 18 | ) -> DataFrame: 19 | """convinience function by calling ``fetch_data``""" 20 | 21 | return fetch_data( 22 | data_type="klines", 23 | asset_type=asset_type, 24 | symbol=symbol, 25 | start=start, 26 | end=end, 27 | timeframe=timeframe, 28 | tz=tz, 29 | ) 30 | 31 | 32 | def fetch_agg_trades( 33 | symbol: str, 34 | start: Union[str, datetime], 35 | end: Union[str, datetime], 36 | asset_type: str = "spot", 37 | tz: Optional[str] = None, 38 | ) -> DataFrame: 39 | """convinience function by calling ``fetch_data``""" 40 | 41 | return fetch_data( 42 | data_type="aggTrades", 43 | asset_type=asset_type, 44 | symbol=symbol, 45 | start=start, 46 | end=end, 47 | tz=tz, 48 | ) 49 | 50 | 51 | def fetch_data( 52 | symbol: str, 53 | asset_type: str, 54 | data_type: str, 55 | start: datetime, 56 | end: datetime, 57 | tz: Optional[str] = None, 58 | timeframe: Optional[str] = None, 59 | ) -> DataFrame: 60 | """ 61 | :param symbol: The binance market pair name. e.g. ``'BTCUSDT'``. 62 | :param start: The start datetime of requested data. If it's an instance of ``datetime.datetime``, 63 | :param asset_type: The asset type of requested data. It must be one of ``'spot'``, ``'futures/um'``, ``'futures/cm'``. 64 | :param data_type: The type of requested data. It must be one of ``'klines'``, ``'agg_trades'``. 65 | it's timezone is ignored. If it's a ``str``, it should be parsed by 66 | `dateutil `_, e.g. ``"2022-1-1 8:10"``. 67 | :param end: The end datetime of requested data. If it's an instance of ``datetime.datetime``, 68 | it's timezone is ignored. If it's a ``str``, it should be parsed by 69 | `dateutil `_, e.g. ``"2022-1-2 8:10"``. 70 | :param tz: Timezone of ``start``, ``end``, and the open/close datetime of the returned dataframe. 71 | It should be a time zone name of `tz database `_, e.g. "Asia/Shanghai". 72 | Your can find a full list of available time zone names in 73 | `List of tz database time zones `_. 74 | :param timeframe: The kline interval. e.g. "1m". see ``binance_history.constants.TIMEFRAMES`` 75 | to see the full list of available intervals. 76 | :return: A pandas dataframe with columns `open`, `high`, `low`, `close`, `volume`, `trades`, `close_datetime`. 77 | the dataframe's index is the open datetime of klines, the timezone of the datetime is set by ``tz``, 78 | if it is None, your local timezone will be used. 79 | """ 80 | if tz is None: 81 | tz = pendulum.local_timezone().name 82 | 83 | start, end = unify_datetime(start), unify_datetime(end) 84 | 85 | start, end = pd.Timestamp(start, tz=tz), pd.Timestamp(end, tz=tz) 86 | 87 | symbol = symbol.upper().replace("/", "") 88 | 89 | months, days = gen_dates( 90 | data_type, 91 | asset_type, 92 | symbol, 93 | start.tz_convert(None), 94 | end.tz_convert(None), 95 | timeframe=timeframe, 96 | ) 97 | monthly_dfs = [ 98 | get_data(data_type, asset_type, "monthly", symbol, dt, tz, timeframe) 99 | for dt in months 100 | ] 101 | daily_dfs = [ 102 | get_data(data_type, asset_type, "daily", symbol, dt, tz, timeframe) 103 | for dt in days 104 | ] 105 | df = pd.concat(monthly_dfs + daily_dfs) 106 | return df.loc[start:end] 107 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pendulum 4 | import pytest 5 | from pandas import Timestamp, Timedelta 6 | 7 | from binance_history import fetch_klines, fetch_agg_trades 8 | 9 | 10 | @pytest.mark.parametrize( 11 | ("symbol", "asset_type"), 12 | [ 13 | ("BTCUSDT", "spot"), 14 | ("BTCUSDT", "futures/um"), 15 | ("BTCUSD_PERP", "futures/cm"), 16 | ], 17 | ) 18 | @pytest.mark.parametrize( 19 | ("start", "end", "tz"), 20 | [ 21 | ("2022-1-2", "2022-1-20", "Asia/Shanghai"), 22 | ( 23 | pendulum.datetime(2022, 1, 2, tz="Europe/Paris"), 24 | pendulum.datetime(2022, 1, 20, tz="Europe/Paris"), 25 | "Asia/Shanghai", 26 | ), 27 | ( 28 | datetime.datetime(2022, 1, 2), 29 | datetime.datetime(2022, 1, 20), 30 | "Asia/Shanghai", 31 | ), 32 | ], 33 | ) 34 | @pytest.mark.parametrize("timeframe", ["1m"]) 35 | def test_fetch_klines_1m_one_month(symbol, start, end, timeframe, asset_type, tz): 36 | klines = fetch_klines( 37 | symbol=symbol, 38 | start=start, 39 | end=end, 40 | timeframe=timeframe, 41 | asset_type=asset_type, 42 | tz=tz, 43 | ) 44 | 45 | first_opentime = Timestamp("2022-1-2", tz="Asia/Shanghai") 46 | first_closetime = Timestamp("2022-1-2 0:0:59.999", tz="Asia/Shanghai") 47 | last_opentime = Timestamp("2022-1-20", tz="Asia/Shanghai") 48 | last_closetime = Timestamp("2022-1-20 0:0:59.999", tz="Asia/Shanghai") 49 | 50 | assert klines.index[0] == first_opentime 51 | assert klines.close_datetime[0] == first_closetime 52 | assert klines.index[-1] == last_opentime 53 | assert klines.close_datetime[-1] == last_closetime 54 | 55 | 56 | def test_fetch_klines_1m_many_months(): 57 | symbol = "BTCUSDT" 58 | start = "2022-1-1 5:29" 59 | end = "2022-2-3 11:31" 60 | tz = "Asia/Shanghai" 61 | 62 | klines = fetch_klines( 63 | symbol=symbol, 64 | start=start, 65 | end=end, 66 | tz=tz, 67 | ) 68 | 69 | assert klines.index[0] == Timestamp(start, tz=tz) 70 | assert klines.close_datetime[0] == Timestamp("2022-1-1 5:29:59.999", tz=tz) 71 | assert klines.index[-1] == Timestamp(end, tz=tz) 72 | assert klines.close_datetime[-1] == Timestamp("2022-2-3 11:31:59.999", tz=tz) 73 | 74 | 75 | def test_fetch_klines_15m_many_months(): 76 | symbol = "BTCUSDT" 77 | start = "2022-1-1 5:29" 78 | end = "2022-2-3 11:31" 79 | tz = "Asia/Shanghai" 80 | 81 | klines = fetch_klines( 82 | symbol=symbol, 83 | start=start, 84 | end=end, 85 | timeframe="15m", 86 | tz=tz, 87 | ) 88 | 89 | assert klines.index[0] == Timestamp("2022-1-1 5:30", tz=tz) 90 | assert klines.close_datetime[0] == Timestamp("2022-1-1 5:44:59.999", tz=tz) 91 | assert klines.index[-1] == Timestamp("2022-2-3 11:30", tz=tz) 92 | assert klines.close_datetime[-1] == Timestamp("2022-2-3 11:44:59.999", tz=tz) 93 | 94 | 95 | def test_fetch_klines_1h_recent_days(): 96 | symbol = "BTCUSDT" 97 | start = Timestamp("2022-11-2 5:29") 98 | end = Timestamp.now() - Timedelta(days=3) 99 | tz = "Asia/Shanghai" 100 | 101 | klines = fetch_klines( 102 | symbol=symbol, 103 | start=start, 104 | end=end, 105 | timeframe="1h", 106 | tz=tz, 107 | ) 108 | 109 | assert klines.index[0] == Timestamp("2022-11-2 6:00", tz=tz) 110 | assert klines.close_datetime[0] == Timestamp("2022-11-2 6:59:59.999", tz=tz) 111 | assert klines.index[-1] == Timestamp( 112 | year=end.year, month=end.month, day=end.day, hour=end.hour, tz=tz 113 | ) 114 | assert klines.close_datetime[-1] == Timestamp( 115 | year=end.year, 116 | month=end.month, 117 | day=end.day, 118 | hour=end.hour, 119 | minute=59, 120 | second=59, 121 | microsecond=999000, 122 | tz=tz, 123 | ) 124 | 125 | 126 | @pytest.mark.parametrize( 127 | ("start", "end", "tz"), 128 | [ 129 | ("2022-10-2", "2022-10-19 23:59:59", "Asia/Shanghai"), 130 | ], 131 | ) 132 | @pytest.mark.parametrize( 133 | ("symbol", "asset_type"), [("ETCBTC", "spot"), ("LTCBUSD", "futures/um")] 134 | ) 135 | def test_fetch_agg_trades_one_month(symbol, start, end, tz, asset_type): 136 | agg_trades = fetch_agg_trades(symbol, start, end, asset_type, tz) 137 | assert agg_trades.index[0].day == 2 138 | assert agg_trades.index[-1].day == 19 139 | 140 | 141 | def test_wrong_datetime_type(): 142 | with pytest.raises(TypeError): 143 | fetch_klines("btcusdt", 3, 4) 144 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Binance History 3 | =============== 4 | 5 | .. image:: https://img.shields.io/pypi/v/binance-history 6 | :target: https://pypi.org/project/binance-history/ 7 | :alt: pypi version 8 | 9 | .. image:: https://img.shields.io/github/license/xzmeng/binance-history 10 | :target: https://github.com/xzmeng/binance-history/blob/master/LICENSE 11 | :alt: License - MIT 12 | 13 | .. image:: https://img.shields.io/codecov/c/github/xzmeng/binance-history 14 | :target: https://codecov.io/github/xzmeng/binance-history 15 | :alt: Coverage 16 | 17 | .. image:: https://img.shields.io/github/actions/workflow/status/xzmeng/binance-history/tests.yml?label=tests 18 | :target: https://github.com/xzmeng/binance-history/actions 19 | :alt: Tests Status 20 | 21 | .. image:: https://readthedocs.org/projects/binance-history/badge/?version=latest 22 | :target: https://binance-history.readthedocs.io/en/latest/?badge=latest 23 | :alt: Documentation Status 24 | 25 | .. image:: https://img.shields.io/pypi/pyversions/binance-history 26 | :alt: PyPI - Python Version 27 | 28 | Fetch binance public data easily. 29 | 30 | Supports Python **3.8+**. 31 | 32 | Installation 33 | ============ 34 | 35 | .. code-block:: bash 36 | 37 | $ pip install binance-history 38 | 39 | Usage 40 | ===== 41 | `API docs `_ 42 | 43 | Klines 44 | ------ 45 | 46 | .. code-block:: python 47 | 48 | >>> import binance_history as bh 49 | >>> klines = bh.fetch_klines( 50 | ... symbol="BTCUSDT", 51 | ... timeframe="1m", 52 | ... start="2022-12-14", 53 | ... end="2022-12-24", 54 | ... ) 55 | open high low close volume quote_volume trades close_datetime 56 | open_datetime 57 | 2022-12-14 00:00:00+08:00 17753.54 17768.41 17752.78 17766.99 240.82918 4.277685e+06 5241 2022-12-14 00:00:59.999000+08:00 58 | 2022-12-14 00:01:00+08:00 17766.99 17786.40 17764.37 17781.81 311.47670 5.536668e+06 6278 2022-12-14 00:01:59.999000+08:00 59 | 2022-12-14 00:02:00+08:00 17781.81 17790.54 17771.44 17785.37 372.12992 6.616562e+06 6911 2022-12-14 00:02:59.999000+08:00 60 | 2022-12-14 00:03:00+08:00 17786.23 17800.18 17774.63 17777.35 401.52223 7.142210e+06 6926 2022-12-14 00:03:59.999000+08:00 61 | 2022-12-14 00:04:00+08:00 17777.35 17785.98 17769.15 17781.93 218.03837 3.876373e+06 5519 2022-12-14 00:04:59.999000+08:00 62 | ... ... ... ... ... ... ... ... ... 63 | 2022-12-23 23:56:00+08:00 16850.22 16850.22 16839.55 16842.59 146.38906 2.465894e+06 4229 2022-12-23 23:56:59.999000+08:00 64 | 2022-12-23 23:57:00+08:00 16842.59 16846.22 16839.00 16840.99 86.95440 1.464495e+06 3152 2022-12-23 23:57:59.999000+08:00 65 | 2022-12-23 23:58:00+08:00 16840.99 16843.61 16827.28 16830.27 208.41471 3.508642e+06 4918 2022-12-23 23:58:59.999000+08:00 66 | 2022-12-23 23:59:00+08:00 16830.27 16836.66 16824.41 16832.16 154.10833 2.593717e+06 4502 2022-12-23 23:59:59.999000+08:00 67 | 2022-12-24 00:00:00+08:00 16832.15 16833.62 16828.42 16830.52 119.28572 2.007721e+06 3725 2022-12-24 00:00:59.999000+08:00 68 | 69 | [14401 rows x 8 columns] 70 | 71 | AggTrades 72 | --------- 73 | 74 | .. code-block:: python 75 | 76 | >>> bh.fetch_agg_trades( 77 | ... symbol="ETCBTC", 78 | ... start="2022-11 01:05", 79 | ... end="2022-11-25 3:20", 80 | ... tz="Europe/Paris" 81 | ... ) 82 | price quantity is_buyer_maker 83 | datetime 84 | 2022-11-01 01:05:09.435000+01:00 0.001187 1.60 True 85 | 2022-11-01 01:05:17.639000+01:00 0.001186 29.56 True 86 | 2022-11-01 01:05:18.616000+01:00 0.001186 8.43 True 87 | 2022-11-01 01:05:18.621000+01:00 0.001186 37.31 True 88 | 2022-11-01 01:05:18.748000+01:00 0.001186 0.17 True 89 | ... ... ... ... 90 | 2022-11-25 03:19:18.317000+01:00 0.001199 5.00 False 91 | 2022-11-25 03:19:19.482000+01:00 0.001199 10.69 False 92 | 2022-11-25 03:19:23.270000+01:00 0.001199 7.55 True 93 | 2022-11-25 03:19:26.082000+01:00 0.001199 2.56 True 94 | 2022-11-25 03:19:40.375000+01:00 0.001199 2.20 False 95 | 96 | 97 | Command Line 98 | ------------ 99 | **binance-history** comes with a command line interface, 100 | you need to install some extra dependencies to use it: 101 | 102 | .. code-block:: bash 103 | 104 | $ pip install 'binance-history[cli]' 105 | 106 | 107 | .. code-block:: bash 108 | 109 | $ bh --help 110 | Usage: bh [OPTIONS] 111 | 112 | Options: 113 | --symbol TEXT The binance market pair name, e.g. BTCUSDT 114 | [required] 115 | --start TEXT The start datetime, e.g. '2022-1-2 1:10' 116 | [required] 117 | --end TEXT The end datetime, e.g. '2022-1-25 2:20 118 | [required] 119 | --data-type [klines|aggTrades] choose klines or aggTrades to download, 120 | default to 'klines' 121 | --asset-type [spot|futures/um|futures/cm] 122 | choose spot or futures data, default to 123 | 'spot' 124 | --timeframe [1s|1m|3m|5m|15m|30m|1h|2h|4h|6h|8h|12h|1d|3d|1w|1M] 125 | The timeframe of klines, default to '15m', 126 | can be omitted if --data-type is not 127 | 'klines' 128 | --tz TEXT The tz database name of time zone, use your 129 | local time zone if omitted' 130 | --output-path TEXT The path you want to save the downloaded 131 | data, support format: [csv, json, xlsx], 132 | e.g. a.xlsx [required] 133 | --help Show this message and exit. 134 | 135 | $ bh --start 2022-1-5 --end 2022-1-7 --symbol ETCBTC --output-path a.xlsx 136 | -------------------------------------------------------------------------------- /binance_history/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import io 3 | import os 4 | import os.path 5 | import zipfile 6 | from pathlib import Path 7 | from typing import Optional, Union 8 | from urllib.parse import urlparse 9 | 10 | import httpx 11 | import pandas as pd 12 | import pendulum 13 | from pandas import Timestamp, DataFrame 14 | 15 | from . import config 16 | from .exceptions import NetworkError, DataNotFound 17 | 18 | 19 | def gen_data_url( 20 | data_type: str, 21 | asset_type: str, 22 | freq: str, 23 | symbol: str, 24 | dt: Timestamp, 25 | timeframe: Optional[str] = None, 26 | ): 27 | url: str 28 | date_str: str 29 | 30 | if freq == "monthly": 31 | date_str = dt.strftime("%Y-%m") 32 | elif freq == "daily": 33 | date_str = dt.strftime("%Y-%m-%d") 34 | else: 35 | raise ValueError(f"freq must be 'monthly' or 'daily', but got '{freq}'") 36 | 37 | if data_type == "klines": 38 | if timeframe is None: 39 | raise ValueError("'timeframe' must not be None when data_type is 'klines'") 40 | url = ( 41 | f"https://data.binance.vision/data/{asset_type}/{freq}/{data_type}/{symbol}/{timeframe}" 42 | f"/{symbol}-{timeframe}-{date_str}.zip" 43 | ) 44 | elif data_type == "aggTrades": 45 | url = ( 46 | f"https://data.binance.vision/data/{asset_type}/{freq}/{data_type}/{symbol}" 47 | f"/{symbol}-{data_type}-{date_str}.zip" 48 | ) 49 | else: 50 | raise ValueError(f"data_type must be 'klines', but got '{data_type}'") 51 | return url 52 | 53 | 54 | def unify_datetime(input: Union[str, datetime.datetime]) -> datetime.datetime: 55 | if isinstance(input, str): 56 | return pendulum.parse(input, strict=False).replace(tzinfo=None) 57 | elif isinstance(input, datetime.datetime): 58 | return input.replace(tzinfo=None) 59 | else: 60 | raise TypeError(input) 61 | 62 | 63 | def exists_month(month_url): 64 | try: 65 | resp = httpx.head(month_url) 66 | except (httpx.TimeoutException, httpx.NetworkError) as e: 67 | raise NetworkError(e) 68 | 69 | if resp.status_code == 200: 70 | return True 71 | elif resp.status_code == 404: 72 | return False 73 | else: 74 | raise NetworkError(resp.status_code) 75 | 76 | 77 | def gen_dates( 78 | data_type: str, 79 | asset_type: str, 80 | symbol: str, 81 | start: Timestamp, 82 | end: Timestamp, 83 | timeframe: Optional[str] = None, 84 | ): 85 | assert start.tz is None and end.tz is None 86 | 87 | if start > end: 88 | raise ValueError("start cannot be greater than end") 89 | 90 | months = pd.date_range( 91 | Timestamp(start.year, start.month, 1), 92 | end, 93 | freq="MS", 94 | ).to_list() 95 | 96 | assert len(months) > 0 97 | 98 | last_month_url = gen_data_url( 99 | data_type, asset_type, "monthly", symbol, months[-1], timeframe=timeframe 100 | ) 101 | 102 | if not exists_month(last_month_url): 103 | daily_month = months.pop() 104 | if len(months) > 1: 105 | second_last_month_url = gen_data_url( 106 | data_type, 107 | asset_type, 108 | "monthly", 109 | symbol, 110 | months[-1], 111 | timeframe=timeframe, 112 | ) 113 | if not exists_month(second_last_month_url): 114 | daily_month = months.pop() 115 | 116 | days = pd.date_range( 117 | Timestamp(daily_month.year, daily_month.month, 1), 118 | end, 119 | freq="D", 120 | ).to_list() 121 | else: 122 | days = [] 123 | 124 | return months, days 125 | 126 | 127 | def get_data( 128 | data_type: str, 129 | asset_type: str, 130 | freq: str, 131 | symbol: str, 132 | dt: Timestamp, 133 | data_tz: str, 134 | timeframe: Optional[str] = None, 135 | ) -> DataFrame: 136 | if data_type == "klines": 137 | assert timeframe is not None 138 | 139 | url = gen_data_url(data_type, asset_type, freq, symbol, dt, timeframe) 140 | 141 | df = load_data_from_disk(url) 142 | if df is None: 143 | df = download_data(data_type, data_tz, url) 144 | save_data_to_disk(url, df) 145 | return df 146 | 147 | 148 | def download_data(data_type: str, data_tz: str, url: str) -> DataFrame: 149 | assert data_type in ["klines", "aggTrades"] 150 | 151 | try: 152 | resp = httpx.get(url) 153 | except (httpx.TimeoutException, httpx.NetworkError) as e: 154 | raise NetworkError(e) 155 | 156 | if resp.status_code == 200: 157 | pass 158 | elif resp.status_code == 404: 159 | raise DataNotFound(url) 160 | else: 161 | raise NetworkError(url) 162 | 163 | if data_type == "klines": 164 | return load_klines(data_tz, resp.content) 165 | elif data_type == "aggTrades": 166 | return load_agg_trades(data_tz, resp.content) 167 | 168 | 169 | def load_klines(data_tz: str, content: bytes) -> DataFrame: 170 | with zipfile.ZipFile(io.BytesIO(content)) as zipf: 171 | csv_name = zipf.namelist()[0] 172 | with zipf.open(csv_name, "r") as csvfile: 173 | df = pd.read_csv( 174 | csvfile, 175 | usecols=range(9), 176 | header=None, 177 | names=[ 178 | "open_ms", 179 | "open", 180 | "high", 181 | "low", 182 | "close", 183 | "volume", 184 | "close_ms", 185 | "quote_volume", 186 | "trades", 187 | ], 188 | ) 189 | df["open_datetime"] = pd.to_datetime( 190 | df.open_ms, unit="ms", utc=True 191 | ).dt.tz_convert(data_tz) 192 | df["close_datetime"] = pd.to_datetime( 193 | df.close_ms, unit="ms", utc=True 194 | ).dt.tz_convert(data_tz) 195 | del df["open_ms"] 196 | del df["close_ms"] 197 | df.set_index("open_datetime", inplace=True) 198 | return df 199 | 200 | 201 | def load_agg_trades(data_tz: str, content: bytes) -> DataFrame: 202 | with zipfile.ZipFile(io.BytesIO(content)) as zipf: 203 | csv_name = zipf.namelist()[0] 204 | with zipf.open(csv_name, "r") as csvfile: 205 | df = pd.read_csv( 206 | csvfile, 207 | header=0, 208 | usecols=[1, 2, 5, 6], 209 | names=["price", "quantity", "timestamp", "is_buyer_maker"], 210 | ) 211 | df["datetime"] = pd.to_datetime( 212 | df.timestamp, unit="ms", utc=True 213 | ).dt.tz_convert(data_tz) 214 | del df["timestamp"] 215 | df.set_index("datetime", inplace=True) 216 | return df 217 | 218 | 219 | def get_local_data_path(url: str) -> Path: 220 | path = urlparse(url).path 221 | return config.CACHE_DIR / path[1:] 222 | 223 | 224 | def save_data_to_disk(url: str, df: DataFrame) -> None: 225 | path = get_local_data_path(url) 226 | path.parent.mkdir(parents=True, exist_ok=True) 227 | df.to_pickle(path) 228 | 229 | 230 | def load_data_from_disk(url: str) -> Union[DataFrame, None]: 231 | path = get_local_data_path(url) 232 | if os.path.exists(path): 233 | return pd.read_pickle(path) 234 | else: 235 | return None 236 | --------------------------------------------------------------------------------