├── .github └── workflows │ ├── cron_export.yml │ ├── manual_export.yml │ └── refresh_token.yml ├── .gitignore ├── LICENSE ├── README.md ├── cli.py └── requirements.txt /.github/workflows/cron_export.yml: -------------------------------------------------------------------------------- 1 | name: Daily Export of Baidu Tongji Data 2 | 3 | on: 4 | schedule: 5 | - cron: '30 11 * * *' # UTC Time, Beijing Time: 19:30 6 | workflow_dispatch: 7 | 8 | jobs: 9 | export: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Add Mask 14 | run: | 15 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_REFRESH_TOKEN }}" 16 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_ACCESS_TOKEN }}" 17 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_API_KEY }}" 18 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_SECRET_KEY }}" 19 | echo "::add-mask::${{ secrets.REPO_ACCESS_TOKEN }}" 20 | echo "::add-mask::${{ secrets.GIT_EMAIL }}" 21 | echo "::add-mask::${{ secrets.GIT_USERNAME }}" 22 | - name: Clone Baidu Tongji Exporter 23 | uses: actions/checkout@v2 24 | with: 25 | repository: ${{ github.repository }} 26 | - name: Set up Python 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: '3.x' 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install -r requirements.txt 34 | - name: Clone Data Repo 35 | env: 36 | BAIDU_TONGJI_ACCESS_TOKEN: ${{ secrets.BAIDU_TONGJI_ACCESS_TOKEN }} 37 | run: | 38 | git config --global user.email "${{ secrets.GIT_EMAIL }}" 39 | git config --global user.name "${{ secrets.GIT_USERNAME }}" 40 | git config --global credential.helper cache 41 | git clone https://${{secrets.REPO_ACCESS_TOKEN}}@github.com/${{secrets.REPO_NAME}} data 42 | python cli.py fetch -p ./data 43 | cd data 44 | git add . 45 | git commit -m "$(TZ=Asia/Shanghai date)" 46 | git push 47 | -------------------------------------------------------------------------------- /.github/workflows/manual_export.yml: -------------------------------------------------------------------------------- 1 | name: Manual Export of Baidu Tongji Data 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | export_date: 7 | type: string 8 | description: Date to export, in form of YYYY-MM-DD of Beijing time 9 | required: true 10 | 11 | jobs: 12 | export: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Add Mask 17 | run: | 18 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_REFRESH_TOKEN }}" 19 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_ACCESS_TOKEN }}" 20 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_API_KEY }}" 21 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_SECRET_KEY }}" 22 | echo "::add-mask::${{ secrets.REPO_ACCESS_TOKEN }}" 23 | echo "::add-mask::${{ secrets.GIT_EMAIL }}" 24 | echo "::add-mask::${{ secrets.GIT_USERNAME }}" 25 | - name: Clone Baidu Tongji Exporter 26 | uses: actions/checkout@v2 27 | with: 28 | repository: ${{ github.repository }} 29 | - name: Set up Python 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: '3.x' 33 | - name: Install dependencies 34 | run: | 35 | python -m pip install --upgrade pip 36 | pip install -r requirements.txt 37 | - name: Clone Data Repo 38 | env: 39 | BAIDU_TONGJI_ACCESS_TOKEN: ${{ secrets.BAIDU_TONGJI_ACCESS_TOKEN }} 40 | run: | 41 | git config --global user.email "${{ secrets.GIT_EMAIL }}" 42 | git config --global user.name "${{ secrets.GIT_USERNAME }}" 43 | git config --global credential.helper cache 44 | git clone https://${{secrets.REPO_ACCESS_TOKEN}}@github.com/${{secrets.REPO_NAME}} data 45 | python cli.py fetch -p ./data -d "${{ github.event.inputs.export_date }}" 46 | cd data 47 | git add . 48 | git commit -m "$(TZ=Asia/Shanghai date)" 49 | git push 50 | -------------------------------------------------------------------------------- /.github/workflows/refresh_token.yml: -------------------------------------------------------------------------------- 1 | name: Refresh Baidu Tongji Tokens 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * 1' 6 | workflow_dispatch: 7 | 8 | jobs: 9 | refresh: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Add Mask 14 | run: | 15 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_REFRESH_TOKEN }}" 16 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_ACCESS_TOKEN }}" 17 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_API_KEY }}" 18 | echo "::add-mask::${{ secrets.BAIDU_TONGJI_SECRET_KEY }}" 19 | echo "::add-mask::${{ secrets.REPO_ACCESS_TOKEN }}" 20 | - name: Clone Baidu Tongji Exporter 21 | uses: actions/checkout@v2 22 | with: 23 | repository: ${{ github.repository }} 24 | - name: Set up Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: '3.x' 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install -r requirements.txt 32 | - name: Refresh Tokens 33 | id: refresh 34 | env: 35 | BAIDU_TONGJI_REFRESH_TOKEN: ${{ secrets.BAIDU_TONGJI_REFRESH_TOKEN }} 36 | BAIDU_TONGJI_API_KEY: ${{ secrets.BAIDU_TONGJI_API_KEY }} 37 | BAIDU_TONGJI_SECRET_KEY: ${{ secrets.BAIDU_TONGJI_SECRET_KEY }} 38 | run: | 39 | python cli.py refresh-token > tokens.key 40 | refresh_token=`sed '1q;d' tokens.key` 41 | access_token=`sed '2q;d' tokens.key` 42 | echo "::add-mask::$refresh_token" 43 | echo "::add-mask::$access_token" 44 | echo "Refresh token: $refresh_token" 45 | echo "Access token: $access_token" 46 | echo "::set-output name=REFRESH_TOKEN::$refresh_token" 47 | echo "::set-output name=ACCESS_TOKEN::$access_token" 48 | - name: Update Refresh Token 49 | uses: hmanzur/actions-set-secret@v2.0.0 50 | with: 51 | name: 'BAIDU_TONGJI_REFRESH_TOKEN' 52 | value: ${{ steps.refresh.outputs.REFRESH_TOKEN }} 53 | repository: ${{ github.repository }} 54 | token: ${{ secrets.REPO_ACCESS_TOKEN }} 55 | - name: Update Access Token 56 | uses: hmanzur/actions-set-secret@v2.0.0 57 | with: 58 | name: 'BAIDU_TONGJI_ACCESS_TOKEN' 59 | value: ${{ steps.refresh.outputs.ACCESS_TOKEN }} 60 | repository: ${{ github.repository }} 61 | token: ${{ secrets.REPO_ACCESS_TOKEN }} 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 gyro永不抽风 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BaiduTongjiExporter 2 | 3 | **本项目已适配 Github Actions,Fork 本项目即可使用,具体使用方法见下文。** 4 | 5 | 由于百度统计的如下政策修改,开发本工具用于数据定时导出. 6 | 7 | > 尊敬的百度统计用户您好,基础统计报告对于分析云站点最早查询时间将调整为2年,其余站点最早查询时间调整为1年,为避免数据丢失,建议您于2022.4.12前完成历史数据的下载或截图备份。如有疑问或更久的数据存储时长需求,您可发邮件至ext_tongji_reply@baidu.com 或 点击咨询 与我们联系,由此给您造成的不便请您谅解。 8 | 9 | 下面是定时同步策略: 10 | 11 | 每日获取【北京时间次日 12 时】: 12 | * 网站概况(趋势数据) 13 | * 网站概况(地域分布) 14 | * 网站概况(来源网站、搜索词、入口页面、受访页面) 15 | * 趋势分析【今日,昨日对比】 16 | * 实时访客 17 | * 全部来源【今日,昨日对比】 18 | * 搜索引擎【今日,昨日对比】 19 | * 搜索词【今日,昨日对比】 20 | * 外部链接【今日,昨日对比】 21 | * 受访页面【今日,昨日对比】 22 | * 入口页面【今日,昨日对比】 23 | * 受访域名【今日,昨日对比】 24 | * 地域分布【今日,昨日对比】 25 | * 地域分布(按国家)【今日,昨日对比】 26 | 27 | 每周获取【次周一 12 时】: 28 | * 网站概况(趋势数据)【周度】 29 | * 网站概况(地域分布)【周度】 30 | * 网站概况(来源网站、搜索词、入口页面、受访页面)【周度】 31 | * 趋势分析【本周,上周对比】 32 | * 全部来源【本周,上周对比】 33 | * 搜索引擎【本周,上周对比】 34 | * 搜索词【本周,上周对比】 35 | * 外部链接【本周,上周对比】 36 | * 受访页面【本周,上周对比】 37 | * 入口页面【本周,上周对比】 38 | * 受访域名【本周,上周对比】 39 | * 地域分布【本周,上周对比】 40 | * 地域分布(按国家)【本周,上周对比】 41 | 42 | 每月获取【北京时间次月 1 日 12 时】: 43 | * 网站概况(趋势数据)【月度】 44 | * 网站概况(地域分布)【月度】 45 | * 网站概况(来源网站、搜索词、入口页面、受访页面)【月度】 46 | * 趋势分析【本月,上月对比】 47 | * 全部来源【本月,上月对比】 48 | * 搜索引擎【本月,上月对比】 49 | * 搜索词【本月,上月对比】 50 | * 外部链接【本月,上月对比】 51 | * 受访页面【本月,上月对比】 52 | * 入口页面【本月,上月对比】 53 | * 受访域名【本月,上月对比】 54 | * 地域分布【本月,上月对比】 55 | * 地域分布(按国家)【本月,上月对比】 56 | 57 | 脚本还支持通过指定时间段下载除实时访客外的历史数据。 58 | 59 | ## CLI 使用方法 60 | 61 | **所有的 SECRETS / TOKEN 都可以通过环境变量传递** 62 | 63 | 处理北京时间昨天的数据: 64 | 65 | ``` 66 | > python .\cli.py fetch --help 67 | Usage: cli.py fetch [OPTIONS] 68 | 69 | Options: 70 | -t, --access-token TEXT Access token 71 | -p, --path TEXT Path to save report 72 | -d, --date TEXT Date to get report, in form of YYYY-MM-DD of 73 | Beijing time 74 | --help Show this message and exit. 75 | ``` 76 | 77 | 处理指定时间的历史数据: 78 | 79 | ``` 80 | > python .\cli.py fetch-all --help 81 | Usage: cli.py fetch-all [OPTIONS] [%Y-%m-%d] [%Y-%m-%d] 82 | 83 | Fetch all history reports by indicating start date and end date in form of 84 | YYYY-MM-DD 85 | 86 | Options: 87 | -t, --access-token TEXT Access token 88 | -p, --path TEXT Path to save report 89 | --help Show this message and exit. 90 | ``` 91 | 92 | 用例:获取 `2020-12-20` 到 `2022-04-10` 的数据,指定路径为 `./reports` 93 | 94 | ```bash 95 | python cli.py fetch-all 2020-12-20 2022-04-10 -p ./reports -t 96 | ``` 97 | 98 | 安装依赖: 99 | 100 | ```bash 101 | pip install -r requirements.txt 102 | ``` 103 | 104 | 刷新 Token (具体内容详见百度统计文档): 105 | 106 | ``` 107 | > python .\cli.py refresh-token --help 108 | Usage: cli.py refresh-token [OPTIONS] 109 | 110 | refresh access token: output refresh token in the first line, access token 111 | in the second line 112 | 113 | Options: 114 | -r, --refresh-token TEXT Refresh token 115 | -k, --api-key TEXT API key 116 | -s, --secret-key TEXT Secret key 117 | --help Show this message and exit. 118 | ``` 119 | 120 | ## GitHub Actions 使用方法 121 | 122 | Github Actions 有三个: 123 | 124 | * `refresh_token`: 用于自动刷新百度统计 `ACCESS_TOKEN` 和 `REFRESH_TOKEN`,UTC 时间每周一凌晨执行 125 | * `cron_export`: 用于每日自动导出数据,会自动检测是否为周初以及月初,并执行相应的自动导出任务 126 | * `manual_export`: 用于手动导出指定日期,用于补救某些日期 action failed 但是没有重试,或者 cron 因为系统错误未正确发起的情况 127 | 128 | 首先需要明确:导出的数据并非存储在本仓库,我自己是另行指定了一个 Private Repo。为了让自己的 GitHub 的 Activity History 不至于太难看,我还建了一个 Github 的小号,充当 bot。Action 可以指定 bot 的用户名与邮箱。 129 | 130 | 下面是仓库的 secrets 配置详情: 131 | 132 | * `BAIDU_TONGJI_ACCESS_TOKEN`: 百度统计 `ACCESS_TOKEN`.(新建即可,会自动刷新) 133 | * `BAIDU_TONGJI_REFRESH_TOKEN`: 百度统计 `REFRESH_TOKEN`. 见[文档](https://tongji.baidu.com/api/manual/) 134 | * `BAIDU_TONGJI_API_KEY`: 百度统计 `API_KEY`. 见[文档](https://tongji.baidu.com/api/manual/) 135 | * `BAIDU_TONGJI_SECRET_KEY`: 百度统计 `SECRET_KEY`. 见[文档](https://tongji.baidu.com/api/manual/) 136 | * `REPO_ACCESS_TOKEN`: `Github` 的 `Personal Access Token`,用于自动更新 `Secrets` 与拉取 private repo. 见[文档](https://docs.github.com/en/enterprise-cloud@latest/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) 137 | * `REPO_NAME`: 存放数据的 repo 名称,格式:`/` 138 | * `GIT_EMAIL`: `push` 导出数据的账户邮箱 139 | * `GIT_USERNAME`: `push` 导出数据的账户用户名 140 | 141 | Fork 本项目后配置完 secrets 即可使用。 142 | -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | import baidutongji 2 | import click 3 | import os 4 | import datetime 5 | import json 6 | import time 7 | 8 | 9 | @click.group() 10 | def cli(): 11 | pass 12 | 13 | 14 | def save_report(report, name, path): 15 | with open(os.path.join(path, f'{name}.json'), 'w+', encoding='utf8') as f: 16 | json.dump(report, f, indent=4, ensure_ascii=False) 17 | 18 | 19 | def save_all(access_token, site_id, start_date, end_date, start_date2, end_date2, path, delay): 20 | time.sleep(delay) 21 | save_report(baidutongji.getTimeTrendRpt(access_token, site_id, start_date, end_date, baidutongji.TimeTrendRptMetrics().setAllTrue()), 'overview-getTimeTrendRpt', path) 22 | print('Time Trend Report Finished') 23 | time.sleep(delay) 24 | save_report(baidutongji.getDistrictRpt(access_token, site_id, start_date, end_date, baidutongji.DistrictRptMetrics().setAllTrue()), 'overview-getDistrictRpt', path) 25 | print('District Report Finished') 26 | time.sleep(delay) 27 | save_report(baidutongji.getCommonTrackRpt(access_token, site_id, start_date, end_date, baidutongji.CommonTrackRptMetrics().setAllTrue()), 'overview-getCommonTrackRpt', path) 28 | print('Common Track Report Finished') 29 | time.sleep(delay) 30 | save_report(baidutongji.getTrendTime(access_token, site_id, start_date, end_date, baidutongji.TrendTimeMetrics().setAllTrue(), start_date2, end_date2), 'trend-time', path) 31 | print('Time Trend Finished') 32 | time.sleep(delay) 33 | save_report(baidutongji.getSourceAll(access_token, site_id, start_date, end_date, baidutongji.SourceMetrics().setAllTrue(), start_date2, end_date2), 'source-all', path) 34 | print('Source Report (All) Finished') 35 | time.sleep(delay) 36 | save_report(baidutongji.getSourceEngine(access_token, site_id, start_date, end_date, baidutongji.SourceMetrics().setAllTrue(), start_date2, end_date2), 'source-engine', path) 37 | print('Source Engine Report Finished') 38 | time.sleep(delay) 39 | save_report(baidutongji.getSourceSearchword(access_token, site_id, start_date, end_date, baidutongji.SourceMetrics().setAllTrue(), start_date2, end_date2), 'source-searchword', path) 40 | print('Source Searchword Report Finished') 41 | time.sleep(delay) 42 | save_report(baidutongji.getSourceLink(access_token, site_id, start_date, end_date, baidutongji.SourceMetrics().setAllTrue(), start_date2, end_date2), 'source-link', path) 43 | print('Source Link Report Finished') 44 | time.sleep(delay) 45 | save_report(baidutongji.getVisitToppage(access_token, site_id, start_date, end_date, baidutongji.VisitToppageMetrics().setAllTrue(), start_date2, end_date2), 'visit-toppage', path) 46 | print('Top Page Report Finished') 47 | time.sleep(delay) 48 | save_report(baidutongji.getVisitLandingpage(access_token, site_id, start_date, end_date, baidutongji.VisitLandingpageMetrics().setAllTrue(), start_date2, end_date2), 'visit-landingpage', path) 49 | print('Landing Page Report Finished') 50 | time.sleep(delay) 51 | save_report(baidutongji.getVisitTopdomain(access_token, site_id, start_date, end_date, baidutongji.VisitTopdomainMetrics().setAllTrue(), start_date2, end_date2), 'visit-topdomain', path) 52 | print('Top Domain Report Finished') 53 | time.sleep(delay) 54 | save_report(baidutongji.getVisitDistrict(access_token, site_id, start_date, end_date, baidutongji.SourceMetrics().setAllTrue(), start_date2, end_date2), 'visit-district', path) 55 | print('District Report Finished') 56 | time.sleep(delay) 57 | save_report(baidutongji.getVisitWorld(access_token, site_id, start_date, end_date, baidutongji.SourceMetrics().setAllTrue(), start_date2, end_date2), 'visit-world', path) 58 | print('World Report Finished') 59 | 60 | 61 | @cli.command() 62 | @click.option('--access-token', '-t', default=None, help='Access token') 63 | @click.option('--path', '-p', default='.', help='Path to save report') 64 | @click.option('--date', '-d', default=None, help='Date to get report, in form of YYYY-MM-DD of Beijing time') 65 | @click.option('--delay', '-D', default=15, help='Delay in seconds between each request') 66 | def fetch(access_token, path, date, delay): 67 | if access_token is None: 68 | access_token = os.environ.get('BAIDU_TONGJI_ACCESS_TOKEN') 69 | if access_token is None: 70 | raise click.ClickException('Access token is required') 71 | 72 | os.makedirs(path, exist_ok=True) 73 | 74 | # 实时运行,统计的是昨日以及之前 75 | if date is None: 76 | SHA_TZ = datetime.timezone(datetime.timedelta(hours=8), name='Asia/Shanghai') 77 | 78 | utc_now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) 79 | beijing_now = utc_now.astimezone(SHA_TZ) 80 | 81 | date = datetime.date(beijing_now.year, beijing_now.month, beijing_now.day) 82 | date = date - datetime.timedelta(days=1) 83 | 84 | real_time = True 85 | # 指定日期 86 | else: 87 | date = datetime.datetime.strptime(date, '%Y-%m-%d').date() 88 | real_time = False 89 | 90 | cron_week = (date.weekday() == 6) 91 | cron_month = ((date + datetime.timedelta(days=1)).day == 1) 92 | 93 | yesterday = date - datetime.timedelta(days=1) 94 | 95 | if cron_week: 96 | this_week_start = date - datetime.timedelta(days=6) 97 | this_week_end = date 98 | last_week_start = this_week_start - datetime.timedelta(days=7) 99 | last_week_end = this_week_start - datetime.timedelta(days=1) 100 | 101 | if cron_month: 102 | this_month_start = date - datetime.timedelta(days=date.day - 1) 103 | this_month_end = date 104 | last_month_end = this_month_start - datetime.timedelta(days=1) 105 | last_month_start = this_month_start - datetime.timedelta(days=last_month_end.day) 106 | 107 | for site in baidutongji.getSiteList(access_token)['list']: 108 | domain = site['domain'] 109 | site_id = site['site_id'] 110 | click.echo('Fetching site: {}'.format(domain)) 111 | 112 | domain_path = os.path.join(path, domain) 113 | os.makedirs(domain_path, exist_ok=True) 114 | 115 | # day 116 | day_path = os.path.join(domain_path, f'day/{date.strftime("%Y%m%d")}') 117 | print(f'Saving day report to {day_path}') 118 | os.makedirs(day_path, exist_ok=True) 119 | 120 | save_all(access_token, site_id, date, date, yesterday, yesterday, day_path, delay) 121 | 122 | if real_time: 123 | # 实时访客 124 | day_path = os.path.join(domain_path, f'day/{(date + datetime.timedelta(days=1)).strftime("%Y%m%d")}') 125 | print(f'Saving realtime report to {day_path}') 126 | os.makedirs(day_path, exist_ok=True) 127 | time.sleep(delay) 128 | save_report(baidutongji.getTrendLatest(access_token, site_id, baidutongji.TrendLatestMetrics().setAllTrue()), f'trend-latest-{beijing_now.strftime("%H-%M-%S-UTC+8")}', day_path) 129 | 130 | # week 131 | if cron_week: 132 | week_path = os.path.join(domain_path, f'week/{this_week_start.strftime("%Y%m%d")}-{this_week_end.strftime("%Y%m%d")}') 133 | print(f'Saving week report to {week_path}') 134 | os.makedirs(week_path, exist_ok=True) 135 | time.sleep(delay) 136 | save_all(access_token, site_id, this_week_start, this_week_end, last_week_start, last_week_end, week_path, delay) 137 | 138 | # month 139 | if cron_month: 140 | month_path = os.path.join(domain_path, f'month/{this_month_start.strftime("%Y%m%d")}-{this_month_end.strftime("%Y%m%d")}') 141 | print(f'Saving month report to {month_path}') 142 | os.makedirs(month_path, exist_ok=True) 143 | time.sleep(delay) 144 | save_all(access_token, site_id, this_month_start, this_month_end, last_month_start, last_month_end, month_path, delay) 145 | 146 | 147 | @cli.command() 148 | @click.pass_context 149 | @click.option('--access-token', '-t', default=None, help='Access token') 150 | @click.option('--path', '-p', default='.', help='Path to save report') 151 | @click.argument('start_date', type=click.DateTime(formats=['%Y-%m-%d'])) 152 | @click.argument('end_date', type=click.DateTime(formats=['%Y-%m-%d'])) 153 | @click.option('--delay', '-D', default=15, help='Delay in seconds between each request') 154 | def fetch_all(ctx, access_token, path, start_date: click.DateTime, end_date: click.DateTime, delay: int): 155 | '''Fetch all history reports by indicating start date and end date in form of YYYY-MM-DD''' 156 | while start_date <= end_date: 157 | ctx.invoke(fetch, access_token=access_token, path=path, date=start_date.strftime('%Y-%m-%d'), delay=delay) 158 | start_date = start_date + datetime.timedelta(days=1) 159 | 160 | 161 | @cli.command() 162 | @click.option('--refresh-token', '-r', default=None, help='Refresh token') 163 | @click.option('--api-key', '-k', default=None, help='API key') 164 | @click.option('--secret-key', '-s', default=None, help='Secret key') 165 | def refresh_token(refresh_token, api_key, secret_key): 166 | '''refresh access token: output refresh token in the first line, access token in the second line''' 167 | if refresh_token is None: 168 | refresh_token = os.environ.get('BAIDU_TONGJI_REFRESH_TOKEN') 169 | if refresh_token is None: 170 | raise click.ClickException('Refresh token is required') 171 | 172 | if api_key is None: 173 | api_key = os.environ.get('BAIDU_TONGJI_API_KEY') 174 | if api_key is None: 175 | raise click.ClickException('API key is required') 176 | 177 | if secret_key is None: 178 | secret_key = os.environ.get('BAIDU_TONGJI_SECRET_KEY') 179 | if secret_key is None: 180 | raise click.ClickException('Secret key is required') 181 | 182 | res = baidutongji.refreshAccessToken(api_key, secret_key, refresh_token) 183 | 184 | refresh_token = res['refresh_token'] 185 | access_token = res['access_token'] 186 | 187 | # output 188 | click.echo(f'{refresh_token}\n{access_token}') 189 | 190 | 191 | if __name__ == '__main__': 192 | cli() 193 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | baidutongji==0.1.6 2 | certifi==2021.10.8 3 | charset-normalizer==2.0.12 4 | click==8.1.2 5 | colorama==0.4.4 6 | idna==3.3 7 | requests==2.27.1 8 | urllib3==1.26.9 9 | --------------------------------------------------------------------------------