├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ └── feature_request.md └── workflows │ ├── geektime-dl-ci.yml │ └── publish.yml ├── .gitignore ├── Dockerfile ├── README.md ├── docs ├── .gitignore ├── .vuepress │ ├── components │ │ ├── PostCard.vue │ │ └── PostList.vue │ ├── config.js │ ├── enhanceApp.js │ └── public │ │ ├── Wechat.jpeg │ │ ├── conglingkaishixuejiagou.jpeg │ │ ├── favicon.jpg │ │ ├── geektime.gif │ │ ├── qiuyuedechanpinshouji.jpeg │ │ ├── rengongzhinengjichuke.jpeg │ │ ├── tuijianxitong36shi.jpeg │ │ ├── zhuyundejishuguanlike.jpeg │ │ └── zuoertingfeng.jpeg ├── README.md ├── bonus.md ├── faq.md ├── geektime_data.js ├── guide.md ├── intro.md ├── package-lock.json ├── package.json ├── recruit.md └── tldr.md ├── geektime.py ├── geektime_dl ├── __init__.py ├── cache.py ├── cli │ ├── __init__.py │ ├── command.py │ ├── ebook.py │ ├── login.py │ └── query.py ├── dal.py ├── ebook │ ├── __init__.py │ ├── ebook.py │ └── templates │ │ └── article.html ├── gt_apis.py ├── log.py └── utils.py ├── requirements ├── base.txt └── dev.txt ├── setup.py └── tests ├── conftest.py ├── test_cli ├── test_basic.py ├── test_ebook.py └── test_query.py ├── test_ebook_util.py ├── test_gt_apis.py └── test_utils.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | count = True 3 | max-complexity = 10 4 | max-line-length = 80 5 | statistics = True 6 | ignore = W391, W503, W504 7 | exclude = 8 | __pycache__ 9 | venv 10 | .venv -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/workflows/geektime-dl-ci.yml: -------------------------------------------------------------------------------- 1 | name: CI & CD 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | max-parallel: 1 13 | matrix: 14 | python-version: [3.7] 15 | 16 | steps: 17 | - uses: actions/checkout@v1 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v1 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip wheel 25 | pip install -r requirements/base.txt 26 | - name: Lint with flake8 27 | run: | 28 | pip install flake8 29 | flake8 30 | - name: Test with pytest 31 | env: 32 | account: ${{ secrets.account }} 33 | password: ${{ secrets.password }} 34 | run: | 35 | pip install -r requirements/dev.txt 36 | python -m pytest 37 | - name: Coverage 38 | run: | 39 | pip install coverage coveralls 40 | coverage run --source=geektime_dl -m pytest tests/ 41 | coveralls --service=github 42 | env: 43 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 44 | 45 | docs: 46 | runs-on: ubuntu-latest 47 | 48 | steps: 49 | - name: Checkout master 50 | uses: actions/checkout@v2 51 | with: 52 | ref: master 53 | 54 | - name: Setup node 55 | uses: actions/setup-node@v1 56 | with: 57 | node-version: "12.x" 58 | 59 | - name: Build project 60 | run: | 61 | cd docs 62 | npm install 63 | npm run docs:build 64 | 65 | - name: Upload gh-pages 66 | uses: peaceiris/actions-gh-pages@v3 67 | with: 68 | github_token: ${{ secrets.GITHUB_TOKEN }} 69 | publish_dir: ./docs/.vuepress/dist 70 | 71 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *.log 3 | .DS_Store 4 | 5 | .cache 6 | .pytest_cache 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Packages 12 | *.egg 13 | *.egg-info 14 | dist 15 | build 16 | eggs 17 | parts 18 | bin 19 | var 20 | sdist 21 | develop-eggs 22 | .installed.cfg 23 | lib 24 | lib64 25 | __pycache__ 26 | 27 | # Installer logs 28 | pip-log.txt 29 | 30 | # Unit test / coverage reports 31 | .coverage 32 | .tox 33 | nosetests.xml 34 | 35 | 36 | # Virtual environment 37 | .venv 38 | .venv3 39 | venv 40 | 41 | # Environment files 42 | .idea 43 | 44 | # tmp files 45 | geektime.cfg 46 | htmlcov 47 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-alpine 2 | 3 | RUN apk add --no-cache jpeg-dev zlib-dev 4 | RUN apk add --no-cache --virtual .build-deps build-base linux-headers \ 5 | && pip install Pillow 6 | RUN pip install -U geektime_dl 7 | 8 | WORKDIR /output 9 | 10 | ENTRYPOINT ["geektime"] 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | :sparkles: 重要 :sparkles: 3 | 4 | **查看 [使用文档](https://jachinlin.github.io/geektime_dl/) 获取最新使用信息。** 5 | 6 |

7 | 8 | 本 README.md 不再更新!:point_down: 9 | 10 |

11 | 左耳听风 12 |

13 | 14 | # 把极客时间装进 Kindle 15 | 16 | [![PyPI](https://img.shields.io/pypi/v/geektime-dl.svg)](https://pypi.org/project/geektime-dl/) 17 | [![CI & CD](https://github.com/jachinlin/geektime_dl/workflows/CI%20&%20CD/badge.svg)](https://github.com/jachinlin/geektime_dl/actions) 18 | [![Coverage Status](https://coveralls.io/repos/github/jachinlin/geektime_dl/badge.svg?branch=master)](https://coveralls.io/github/jachinlin/geektime_dl?branch=master) 19 | 20 | 极客时间专栏文章的质量都是非常高的,比如耗子哥的《左耳听风》、朱赟的《朱赟的技术管理课》和王天一的《人工智能基础课》,都是我非常喜欢的专栏。这些专栏深入浅出,将知识和经验传授于读者,都是值得多次阅读的。 21 | 22 | 然而,每当空闲时间时,都需要掏出手机才能阅读专栏文章,这在某种情况下是很不便的,尤其坐地铁且没有网络时。作为一个 kindle 党,最好的解决方案就是 kindle 电子书。于是有了这个项目 23 | 24 | >[把极客时间装进Kindle](https://github.com/jachinlin/geektime_dl) 25 | 26 | 27 | 28 | ## 安装 29 | 30 | ```bash 31 | pip install -U geektime_dl 32 | 33 | # 或者安装最新代码 34 | pip install -U git+https://github.com/jachinlin/geektime_dl.git 35 | ``` 36 | 37 | ## 使用 38 | 39 | 40 | **查看帮助信息** 41 | 42 | 43 | 1、查看 cli subcmd 44 | 45 | ```bash 46 | geektime help 47 | ``` 48 | 49 | 2、查看具体 cli subcmd 帮助信息 50 | 51 | ```bash 52 | geektime --help 53 | ``` 54 | 55 | `` 为具体的子命令名,可以从 help 子命令查看。 56 | 57 | 58 | **登录** 59 | 60 | ```bash 61 | geektime login [--account=] [--password=] [--area=] 62 | ``` 63 | 64 | `[]`表示可选,`<>`表示相应变量值,下同 65 | 66 | - account: 手机账号,不提供可稍后手动输入 67 | - password: 账号密码,不提供可稍后手动输入 68 | - area: 注册手机号所属地区,默认86 69 | 70 | 71 | **查看课程列表** 72 | 73 | 74 | ```bash 75 | geektime query 76 | ``` 77 | 78 | 79 | 执行该命令后,我们可以看到专栏、视频、微课等课程的课程标题、订阅情况、更新频率还有课程ID,这个**课程ID**很重要,咱们下边的操作就是基于这个ID进行的。 80 | ``` 81 | 专栏 82 | 课程ID 已订阅 课程标题 更新频率/课时·时长 83 | 49 否 朱赟的技术管理课 (全集) 84 | 48 是 左耳听风 (全集) 85 | ...... 86 | ``` 87 | 88 | 89 | **制作电子书** 90 | 91 | ```bash 92 | geektime ebook [--output-folder=] 93 | ``` 94 | 95 | - course_id: 课程ID,可以从 query subcmd 查看 96 | - output_folder: 电子书存放目录,默认`cwd` 97 | 98 | notice: 此 subcmd 需要先执行 login subcmd 99 | 100 | 101 | ## Todo list 102 | 103 | - [X] 评论 104 | - [X] 批量下载 105 | - [X] docker 106 | - [ ] 支持 mathjax 数学公式 107 | - [ ] ... 108 | 109 | 110 | ## 其他 111 | 112 | 1. 注意版权,勿传播电子书 113 | 2. pr or issue is welcome 114 | 115 | 116 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist -------------------------------------------------------------------------------- /docs/.vuepress/components/PostCard.vue: -------------------------------------------------------------------------------- 1 | 39 | 40 | 59 | 60 | 107 | 108 | -------------------------------------------------------------------------------- /docs/.vuepress/components/PostList.vue: -------------------------------------------------------------------------------- 1 | 12 | 13 | 25 | 26 | 31 | -------------------------------------------------------------------------------- /docs/.vuepress/config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | title: 'geektime-dl', 3 | base: '/geektime_dl/', 4 | description: '把极客时间装进 Kindle', 5 | head: [ 6 | ['link', { rel: "icon", type: "image/jpg", href: "/favicon.jpg"}] 7 | ], 8 | plugins: { 9 | 'baidu-tongji': { 10 | hm: '7972bc564f84e320d4f261fe1ada61da' 11 | } 12 | }, 13 | themeConfig: { 14 | lastUpdated: '上次更新', 15 | repo: 'jachinlin/geektime_dl', 16 | repoLabel: 'GitHub', 17 | docsDir: 'docs', 18 | editLinks: true, 19 | editLinkText: '帮助我们改善此页面!', 20 | nav: [ 21 | { text: '首页', link: '/' }, 22 | { text: '指南', link: '/guide' }, 23 | { text: '招聘', link: '/recruit' } 24 | ], 25 | displayAllHeaders: true, 26 | sidebar: { 27 | '/': [ 28 | { 29 | collapsable: false, 30 | sidebarDepth: 1, 31 | children: [ 32 | '', 33 | 'tldr', 34 | 'intro', 35 | 'guide', 36 | 'faq', 37 | 'bonus' 38 | ] 39 | } 40 | ] 41 | } 42 | } 43 | } -------------------------------------------------------------------------------- /docs/.vuepress/enhanceApp.js: -------------------------------------------------------------------------------- 1 | import Element from 'element-ui' 2 | import 'element-ui/lib/theme-chalk/index.css' 3 | 4 | export default ({ 5 | Vue, // the version of Vue being used in the VuePress app 6 | options, // the options for the root Vue instance 7 | router, // the router instance for the app 8 | siteData // site metadata 9 | }) => { 10 | Vue.use(Element); 11 | } -------------------------------------------------------------------------------- /docs/.vuepress/public/Wechat.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/Wechat.jpeg -------------------------------------------------------------------------------- /docs/.vuepress/public/conglingkaishixuejiagou.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/conglingkaishixuejiagou.jpeg -------------------------------------------------------------------------------- /docs/.vuepress/public/favicon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/favicon.jpg -------------------------------------------------------------------------------- /docs/.vuepress/public/geektime.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/geektime.gif -------------------------------------------------------------------------------- /docs/.vuepress/public/qiuyuedechanpinshouji.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/qiuyuedechanpinshouji.jpeg -------------------------------------------------------------------------------- /docs/.vuepress/public/rengongzhinengjichuke.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/rengongzhinengjichuke.jpeg -------------------------------------------------------------------------------- /docs/.vuepress/public/tuijianxitong36shi.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/tuijianxitong36shi.jpeg -------------------------------------------------------------------------------- /docs/.vuepress/public/zhuyundejishuguanlike.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/zhuyundejishuguanlike.jpeg -------------------------------------------------------------------------------- /docs/.vuepress/public/zuoertingfeng.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/zuoertingfeng.jpeg -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | home: true 3 | heroImage: /geektime.gif 4 | actionText: 快速开始 → 5 | actionLink: /tldr 6 | footer: Copyright © 2018-present Jachin Lin 7 | --- 8 | 9 | ------ 10 | 11 | [![travis](https://travis-ci.org/jachinlin/geektime_dl.svg?branch=master)](https://travis-ci.org/jachinlin/geektime_dl) 12 | [![codecov](https://codecov.io/gh/jachinlin/geektime_dl/branch/master/graph/badge.svg)](https://codecov.io/gh/jachinlin/geektime_dl) 13 | [![Python versions](https://img.shields.io/pypi/pyversions/geektime-dl.svg)](https://pypi.org/project/geektime-dl/) 14 | [![PyPI](https://img.shields.io/pypi/v/geektime-dl.svg)](https://pypi.org/project/geektime-dl/) 15 | [![CI & CD](https://github.com/jachinlin/geektime_dl/workflows/CI%20&%20CD/badge.svg)](https://github.com/jachinlin/geektime_dl/actions) 16 | 17 | 这个工具只适用于具有以下两个特殊需求的用户,如果你不满足其中一个,那么可以点击右上角的叉叉,没必要浪费时间在这里。 18 | 19 | - [极客时间](https://time.geekbang.org/)忠实付费用户 20 | - Kindle 党 21 | 22 | 23 | 24 | 极客时间专栏文章的质量都是非常高的,比如耗子哥的《左耳听风》、朱赟的《朱赟的技术管理课》和王天一的《人工智能基础课》,都是我非常喜欢的专栏。这些专栏深入浅出,将知识和经验传授于读者,都是值得多次阅读的。 25 | 26 | 然而,每当空闲时间时,都需要掏出手机才能阅读专栏文章,这在某些情况下是很不便利的,尤其坐地铁且没有网络时。作为一个kindle党,最好的解决方案就是kindle电子书。于是,有了这个项目 —— [把极客时间装进 Kindle](https://github.com/jachinlin/geektime_dl)。 27 | -------------------------------------------------------------------------------- /docs/bonus.md: -------------------------------------------------------------------------------- 1 | # 福利 2 | 3 | ## 快手内推直达部门负责人 4 | 5 | [快手内推直达部门负责人!](/recruit.html) 6 | 7 | ## 极客时间扫码购买返利 8 | 9 | 从下方的二维码扫码购买课程者,添加微信( [二维码](/recruit.html#交流群)) 即可返回一半的「分享奖赏」。 10 | 11 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## 是否存在法律风险? 4 | 5 | [issue#15](https://github.com/jachinlin/geektime_dl/issues/15) 6 | 7 | 本项目的初衷是方便自己学习极客时间。它的确带来了便利,利己及人,所以我选择把它开源了。 8 | 9 | 在这里恳请大家不要随意散发电子书给其他人(包括亲朋好友),更不要用于商业用途或者从事盗版买卖。 10 | 11 | ## 我的账号会被盗吗? 12 | 13 | 代码都开源了,可以随时查看源码。或者,使用 [wireshark](https://www.wireshark.org/) 等软件进行抓包。 14 | 15 | 使用 geektime-dl 可能会出现的问题就是触发官方的限流措施。 16 | 17 | ## 我能查看没有购买的课程吗? 18 | 19 | 不可以,请到[极客时间](https://time.geekbang.org/)购买课程,再使用 geektime-dl 如果需要的话。 20 | 21 | ## 出现非法图形验证码怎么办? 22 | 23 | [issue#67](https://github.com/jachinlin/geektime_dl/issues/67) 24 | 25 | 当使用 geektime 触发图形验证码时,请稍等会再重新 [geektime login](/guide.html#登录) 就能登上了。 26 | 27 | 如果哪位同学对图形验证这一块比较擅长或者感兴趣,欢迎 [pr](https://github.com/jachinlin/geektime_dl/compare)。 28 | 29 | ## 其他疑问 30 | 31 | 如果你还有其他疑问的话,欢迎[提 issue](https://github.com/jachinlin/geektime_dl/issues/new) 或者加入[交流群](/neitui.html#交流群)讨论。 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /docs/geektime_data.js: -------------------------------------------------------------------------------- 1 | export const columnShareData = [ 2 | { 3 | title: '左耳听风', 4 | summary: '陈皓,人称耗子叔,有 20 年软件开发及相关工作经验,先后在阿里巴巴、亚马逊、汤森路透等知名公司任职,' + 5 | '对 IT 底层技术平台有深入的了解,尤其在大规模分布式系统的基础架构方面颇有研究。' + 6 | '此外,他在团队管理、项目管理,以及程序员个人成长等方面也有自己一套独特的见解和方法。' + 7 | '在“左耳听风”专栏中的每篇文章都是陈皓对自己多年“堵过的枪眼儿”“填过的坑儿”的深入思考和凝练,' + 8 | '是一些与个人或企业切身利益相关的内容,或者说是更具指导性、更为商业化的内容。用他自己的话说,是一些非常来之不易的宝贵经验 ... ', 9 | path: `/geektime_dl/zuoertingfeng.jpeg`, 10 | image: '/zuoertingfeng.jpeg', 11 | date: '2017-10-01', 12 | location: 'Beijing', 13 | readMoreText: '扫码购买返现 18 元' 14 | }, 15 | { 16 | title: '朱赟的技术管理课', 17 | summary: '在本专栏中,Angela 会更加聚焦在技术管理、技术实践、硅谷文化和个人成长领域,继续以女工程师和技术领导的视角,为大家讲解技术和认知的故事。' + 18 | '她愿意把自己在技术和管理上的领悟及忠告、在硅谷工作的体会与见识,通过这个专栏分享给大家。把自己觉得好的东西第一时间分享给你,这就是 Angela 推出这个专栏的初衷。' + 19 | 'Angela 的专栏主要聚焦在技术管理、技术实践、硅谷文化和个人成长四个领域 ... ', 20 | path: `/geektime_dl/zhuyundejishuguanlike.jpeg`, 21 | image: '/zhuyundejishuguanlike.jpeg', 22 | date: '2017-11-01', 23 | location: 'American', 24 | readMoreText: '扫码购买返现 9 元' 25 | }, 26 | { 27 | title: '邱岳的产品手记', 28 | summary: '邱岳,人称“二爷”,是公众号“二爷鉴书”的作者。邱岳常年宅在家中弹琴读书写作,作品多以书评为主。写得多了大家以为他是个专业写书评的,其实他的真实身份是一位从业近十年的产品经理。' + 29 | '在这个专栏中,邱岳将首次以产品经理为主题,将自己的所见所闻、所思所想毫无保留地写出来,他希望自己的经验和思考能为读者提供启发,' + 30 | '促使大家能跳出自身固有的思维框架,重新审视自己的工作,从而获得成长 ... ', 31 | path: `/geektime_dl/qiuyuedechanpinshouji.jpeg`, 32 | image: '/qiuyuedechanpinshouji.jpeg', 33 | date: '2017-12-01', 34 | location: 'Beijing', 35 | readMoreText: '扫码购买返现 12 元' 36 | }, 37 | { 38 | title: '人工智能基础课', 39 | summary: '人工智能的重要性已无需赘述,但作为一个跨学科产物,它包含的内容浩如烟海,各种复杂的模型和算法更是让人望而生畏。' + 40 | '对于大多数的新手来说,如何入手人工智能其实都是一头雾水,比如到底需要哪些数学基础、是否要有工程经验、对于深度学习框架应该关注什么等等。' + 41 | '在“人工智能基础课”专栏里,王天一教授将结合自己的积累与思考,和你分享他对人工智能的理解,' + 42 | '用通俗易懂的语言从零开始教你掌握人工智能的基础知识,梳理出人工智能学习路径,为今后深耕人工智能相关领域打下坚实的基础 ... ', 43 | path: `/geektime_dl/rengongzhinengjichuke.jpeg`, 44 | image: '/rengongzhinengjichuke.jpeg', 45 | date: '2017-12-01', 46 | location: 'Beijing', 47 | readMoreText: '扫码购买返现 9 元' 48 | }, 49 | { 50 | title: '推荐系统36式', 51 | summary: '刑无刀(本名陈开江),现为“贝壳找房”资深算法专家,从事算法类产品的研发。' + 52 | '曾任新浪微博资深算法工程师,考拉 FM 算法主管。 刑无刀有 8 年的推荐系统方向从业经历,他在算法、架构、产品方面均有丰富的实践经验。' + 53 | '本专栏将为推荐系统学习者架构起整体的知识脉络,并在此基础上补充实践案例与经验,力图解决你系统起步阶段 80% 的问题 ... ', 54 | path: `/geektime_dl/tuijianxitong36shi.jpeg`, 55 | image: '/tuijianxitong36shi.jpeg', 56 | date: '2018-02-01', 57 | location: 'Beijing', 58 | readMoreText: '扫码购买返现 9 元' 59 | }, 60 | { 61 | title: '从0开始学架构', 62 | summary: '李运华,资深技术专家。目前带领多个研发团队,承担架构设计、架构重构、技术团队管理、技术培训等职责,曾就职于华为和 UCWeb,写过《面向对象葵花宝典》一书。' + 63 | '在本专栏中,华仔会从架构基础、三大架构模式和实战的角度分享他一整套的架构设计方法论,希望你学习后不仅能够快速理解陌生的架构设计,' + 64 | '自己也能对架构设计游刃有余,并且可以给身边正在迷惘的同学指点迷津,实践所学,分享所学 ... ', 65 | path: `/geektime_dl/conglingkaishixuejiagou.jpeg`, 66 | image: '/conglingkaishixuejiagou.jpeg', 67 | date: '2018-04-01', 68 | location: 'Beijing', 69 | readMoreText: '扫码购买返现 12 元' 70 | } 71 | ] -------------------------------------------------------------------------------- /docs/guide.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # 使用说明 4 | 5 | 阅读下文前,请先确保已[安装](/intro.html#安装) geektime-dl。 6 | 7 | ## 登录 8 | 9 | ```bash 10 | geektime login [--account=] [--password=] [--area=] 11 | ``` 12 | 13 | `[]` 表示可选,`<>` 表示相应变量值。 14 | 15 | > 下边其他命令中的 `[]` 和 `<>` 也表示这个意思,就不一一说明了。 16 | 17 | 这个命令有三个参数, 18 | 19 | - account: 手机账号,不提供可稍后手动输入 20 | - password: 账号密码,不提供可稍后手动输入 21 | - area: 注册手机号所属地区,默认 `86`。当您是美国手机号注册时,area 需要设置为 `1`。 22 | 23 | 您也可以通过以下命令获取详细帮助信息。 24 | 25 | ```bash 26 | geektime login --help 27 | ``` 28 | 29 | 登录成功后,您的账号密码将会保存在 `$(pwd)/geektime.cfg`。执行其他操作时,geektime 将从这个配置文件读取账号密码。 30 | 31 | ## 查看课程列表 32 | 33 | > 执行该命令前,请确保账号密码已经保存在 `$(pwd)/geektime.cfg`。 34 | > 35 | > 如果没有,请执行 `geektime login` 进行账号密码验证和保存。 36 | 37 | ```bash 38 | geektime query 39 | ``` 40 | 41 | 执行该命令后,我们可以看到专栏、视频、微课等课程的课程标题、订阅情况、更新频率还有课程ID,这个 **课程ID** 很重要,我们下边的操作就是基于这个ID进行的。 42 | 43 | 这里,我截取部分输出结果: 44 | 45 | ```bash 46 | (venv3) ➜ geektime query 47 | 专栏 48 | 课程ID 已订阅 已完结 课程标题 49 | 301 否 否 数据中台实战课 50 | 298 否 否 检索技术核心20讲 51 | 297 否 否 SRE实战手册 52 | 296 否 否 图解 Google V8 53 | ``` 54 | 55 | 56 | ## 制作电子书 57 | 58 | > 执行该命令前,请确保账号密码已经保存在 `$(pwd)/geektime.cfg`。 59 | > 60 | > 如果没有,请执行 `geektime login` 进行账号密码验证和保存。 61 | 62 | 63 | ```bash 64 | geektime ebook [--comments-count=] 65 | ``` 66 | 67 | 参数 `course_id` 表示课程ID,可以从 `geektime query` 查看获取到; 68 | `comments_count` 表示评论条数,不设置的话则默认为 0条,您可以根据专栏评论的含金量来调整该参数大小。 69 | 70 | 示例: 71 | ```bash 72 | geektime ebook 49 --comments-count=10 73 | ``` 74 | 75 | ### 推送到 Kindle 设备 76 | 77 | 78 | 如果您想把制作完成的电子书自动推送到心爱的 Kindle 设备的话,需要提供以下 smtp 配置和 Kindle 推送邮箱: 79 | 80 | 81 | - --smtp-encryption 82 | - --smtp-host 83 | - --smtp-port 84 | - --smtp-user 85 | - --smtp-password 86 | - --email-to:Kindle 推送邮箱: 87 | 88 | 89 | 然后在[制作电子书基础命令](/guide.html#制作电子书)后添加 `--push` 以及上面参数即可。 90 | 91 | 例如, 92 | 93 | ```bash 94 | geektime ebook 49 --push --smtp-host=smtp.qq.com --smtp-port=465 --smtp-encryption=ssl --smtp-user=your_qq_number@qq.com --smtp-password=your_password --email-to=your_kindle_email@kindle.cn 95 | ``` 96 | 97 | 执行该命令后,smtp 配置和 Kindle 推送邮箱就会保存在 `$(pwd)/geektime.cfg`,下次推送电子书时就不用添加这些参数了,只要 `geektime ebook 49 --push` 即可。打开 `$(pwd)/geektime.cfg` 验证一下吧。 98 | 99 | 至于邮箱 smtp 配置和 Kindle邮箱配置就自行 google 吧。 100 | 101 | ### 压缩电子书大小 102 | 103 | 直接使用 `geektime ebook ` 生成的电子书大于 50M(因为含有大量图片),超过邮箱附件的大小限制,所以我们需要对图片进行压缩,这时候参数 `--image-ratio` 就发挥作用了。 104 | 105 | 试试这么操作吧 106 | 107 | ```bash 108 | geektime ebook 49 --image-ratio=0.2 109 | ``` 110 | 111 | ### 批量制作电子书 112 | 113 | ```bash 114 | geektime ebook 115 | ``` 116 | 上述命令可以批量制作电子书,参数 `course_ids` 表示课程ID 集合,课程ID 集合使用半角逗号 `,` 和 `-` 进行拼接,`all` 则表示全部已购买课程ID 集合 117 | 。例如: 118 | 119 | - 制作48、49号课程电子书,可以执行 `geektime ebook 48,49` 120 | - 制作48到50号课程电子书,可以执行 `geektime ebook 48-50` 121 | - 制作所有已购买课程电子书,可以执行 `geektime ebook all` 122 | 123 | 124 | ### 更多用法 125 | 126 | 您也可以通过下边命令发现更多用法 127 | 128 | ```bash 129 | geektime ebook --help 130 | ``` 131 | 132 | 133 | 134 | ## 下载音频 135 | 136 | `geektime-dl` 除了可以制作 Kindle 电子书,把极客时间装进 Kindle,还提供了下载课程音频的附加功能,来看一下怎么使用吧。 137 | 138 | 139 | > 执行该命令前,请确保账号密码已经保存在 `$(pwd)/geektime.cfg`。 140 | > 141 | > 如果没有,请执行 `geektime login` 进行账号密码验证和保存。 142 | 143 | ```bash 144 | geektime mp3 [--url-only] 145 | ``` 146 | 147 | 这条命令会下载课程音频到 `$(pwd)/mp3/<课程名称>/` 中。 148 | 如果开启 `--url-only`,则只会保存音频链接到 `$(pwd)/mp3/<课程名称>/<课程名称>.mp3.txt`,不会下载音频文件。 149 | 150 | 您也可以通过以下命令获取更多帮助信息。 151 | 152 | ```bash 153 | geektime mp3 --help 154 | ``` 155 | 156 | -------------------------------------------------------------------------------- /docs/intro.md: -------------------------------------------------------------------------------- 1 | 2 | # 简介 3 | 4 | ## 项目结构 5 | 6 | 这个项目主要包括下边这几个部分: 7 | 8 | - kindle_maker: 一个 mobi 电子书制作工具。用户使用 kindle_maker 就可以轻松制作出一本精美的 kindle 电子书。这部分已拎出来放在单独的项目里,具体使用方式见该项目文档: [kindle_maker](https://github.com/jachinlin/kindle_maker); 9 | 10 | - utils: 提供了 mp3/mp4 下载、邮件发送、html 文件生成等功能; 11 | 12 | - gk_apis: 封装了极客时间 api; 13 | 14 | - store_client: 缓存极客时间专栏数据至本地 json 文件; 15 | 16 | - cli: 提供若干cmd 命令,将上面这几个部分连接在一起,最后使用 kindle_maker 制作电子书,或者使用下载器下载相关音视频。 17 | 18 | ## 主要依赖 19 | 20 | - [Python](https://dPocs.python.org/3.6/): 支持的 Python 版本为 3.6 及以上 21 | 22 | - [requests](http://www.python-requests.org/en/master/): 网络请求 23 | 24 | - [Jinja2](http://jinja.pocoo.org/): html 模板引擎 25 | 26 | - [kindle_maker](https://github.com/jachinlin/kindle_maker): 制作 kindle 电子书 27 | 28 | ## 安装 29 | 30 | ### 安装 Python 解释器 31 | 32 | 目前仅支持 Python3.6+(包含),请在 [Python 官网](https://www.python.org/downloads/)下载并安装您熟悉的版本对应的 Python 解释器。 33 | 34 | ### 虚拟环境 35 | 36 | ```bash 37 | mkdir geektime $$ cd geektime 38 | python3 -m venv venv3 && source venv3/bin/activate 39 | ``` 40 | 41 | ### 安装 geektime-dl 42 | 43 | ```bash 44 | pip install -U geektime_dl 45 | ``` 46 | 47 | 或者源码安装,这样可以获取最新的特性 48 | 49 | ```bash 50 | pip install -U git+https://github.com/jachinlin/geektime_dl.git 51 | ``` 52 | 53 | ### 检验是否正确安装 54 | 55 | ```bash 56 | geektime help 57 | ``` 58 | 59 | 执行上述命令,如果出现 `command not found: geektime`,则说明没有正确安装,请按照上面步骤重新按照,如果还有困难的话,可以[提 issue](https://github.com/jachinlin/geektime_dl/issues/new)获取帮助;如果 terminal 显示的是其他信息,则说明您已经正确安装该软件了,恭喜您,咱们可以进行下一步了。 60 | ## 查看帮助信息 61 | 62 | ```bash 63 | geektime help 64 | ``` 65 | 66 | 该命令会显示所有支持的命令(command),以及所支持的命令的简要说明,具体输出如下: 67 | 68 | ```bash 69 | Available commands: 70 | daily 保存每日一课视频 71 | ebook 将专栏文章制作成电子书 72 | help Display the list of available commands 73 | login 登录极客时间,保存账号密码至配置文件 74 | mp3 保存专栏音频 75 | query 查看课程列表 76 | 77 | Use 'geektime --help' for individual command help. 78 | ``` 79 | 80 | 通过下边的操作可以查看具体命令(command)的帮助信息 81 | 82 | ```bash 83 | geektime --help 84 | ``` 85 | 86 | 例如, 87 | 88 | ```bash 89 | geektime ebook --help 90 | ``` 91 | 92 | 这条命令就可以显示出 `ebook` 命令(制作 mobi 电子书命令)的使用说明和所有的参数说明 93 | 94 | ```bash 95 | usage: geektime ebook 96 | [-h] [-a ACCOUNT] [-p PASSWORD] [--area AREA] [--config CONFIG] 97 | [-o OUTPUT_FOLDER] [--no-login] [--image-ratio IMAGE_RATIO] 98 | [--image-min-height IMAGE_MIN_HEIGHT] 99 | [--image-min-width IMAGE_MIN_WIDTH] [--email-to EMAIL_TO] 100 | [--smtp-password SMTP_PASSWORD] [--smtp-user SMTP_USER] 101 | [--smtp-encryption SMTP_ENCRYPTION] [--smtp-port SMTP_PORT] 102 | [--smtp-host SMTP_HOST] [--push] [--comments-count COMMENTS_COUNT] 103 | [--force] 104 | course_ids 105 | 106 | 将专栏文章制作成电子书 107 | 108 | positional arguments: 109 | course_ids specify the target course ids 110 | 111 | optional arguments: 112 | -h, --help show this help message and exit 113 | -a ACCOUNT, --account ACCOUNT 114 | specify the account phone number (default: None) 115 | -p PASSWORD, --password PASSWORD 116 | specify the account password (default: None) 117 | --area AREA specify the account country code (default: 86) 118 | --config CONFIG specify alternate config file (default: 119 | /Users/linjiaxian/dev/geektime_dl/geektime.cfg) 120 | -o OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER 121 | specify the output folder (default: 122 | /Users/linjiaxian/dev/geektime_dl) 123 | --no-login no login, just for test (default: False) 124 | --image-ratio IMAGE_RATIO 125 | image ratio (default: None) 126 | --image-min-height IMAGE_MIN_HEIGHT 127 | image min height (default: None) 128 | --image-min-width IMAGE_MIN_WIDTH 129 | image min width (default: None) 130 | --email-to EMAIL_TO specify the kindle receiver email (default: None) 131 | --smtp-password SMTP_PASSWORD 132 | specify the smtp password (default: None) 133 | --smtp-user SMTP_USER 134 | specify the smtp user (default: None) 135 | --smtp-encryption SMTP_ENCRYPTION 136 | specify the a smtp encryption (default: None) 137 | --smtp-port SMTP_PORT 138 | specify the a smtp port (default: None) 139 | --smtp-host SMTP_HOST 140 | specify the smtp host (default: None) 141 | --push push to kindle (default: False) 142 | --comments-count COMMENTS_COUNT 143 | the count of comments to fetch each post (default: 0) 144 | --force do not use the cache data (default: False) 145 | ``` 146 | 147 | 具体命令的参数说明是使用 `argparse` 生成的,如果你对于上面的参数说明感到迷惑的话,可以先阅读 [argparse 的文档](https://docs.python.org/3.8/howto/argparse.html)。 148 | 149 | 下一步,请阅读[使用说明](/guide.html)。 150 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "geektime-dl", 3 | "version": "1.1.2", 4 | "description": "把极客时间装进 Kindle", 5 | "scripts": { 6 | "docs:dev": "vuepress dev .", 7 | "docs:build": "vuepress build ." 8 | }, 9 | "author": "linjx1000+github@gmail.com", 10 | "license": "MIT", 11 | "dependencies": { 12 | "core-js": "^2.6.11", 13 | "element-ui": "^2.13.1", 14 | "vue-feather-icons": "^5.0.0" 15 | }, 16 | "devDependencies": { 17 | "vuepress": "^1.4.1", 18 | "vuepress-plugin-baidu-tongji": "^1.0.1" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /docs/recruit.md: -------------------------------------------------------------------------------- 1 | # 快手招聘(长期有效,海量 HC,急缺) 2 | 3 | 本人目前在[快手](https://www.kuaishou.com/)担任 Python 研发工程师 4 | 5 | 坐标:北京 - 平台研发部 - 基础架构组 6 | 7 | ## 基础架构团队招聘 8 | 9 | 基础架构组是做什么的 → [快手基础架构演进实录](https://mp.weixin.qq.com/s/C2nZTJrUjnt_sVE8oaoBRg) 10 | 11 | 团队目前接近 40 人,急缺各路人才: 12 | 13 | - Java 工程师/专家/架构师 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/2705) [JD2](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/3297) 14 | - Cpp 工程师/专家/架构师 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/2949) [JD2](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5201) 15 | - Python 工程师/专家/架构师 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/2913) 16 | 17 | 涉及 18 | 19 | - 服务治理 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5200) [JD2](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5206) 20 | - 消息队列 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5198) 21 | - 实时监控 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5207) 22 | - 高可用平台 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5197) 23 | - 编译系统 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5208) 24 | - 网络接入 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5476) 25 | 26 | 等多个方向。 27 | 28 | 29 | 上述链接没有囊括全部 JD, 你可以到 [基础架构招聘](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/?name=%E5%9F%BA%E7%A1%80%E6%9E%B6%E6%9E%84) 上发现更多 JD 信息。 30 | 31 | 欢迎有兴趣的同学找我内推,发送简历至 linjx1000+2018@gmail.com 32 | 33 | 邮件要求: 34 | 35 | 1. 邮件标题:社招 +【岗位】+ 【姓名】 36 | 2. 简历:请附于附件,简历名称同邮件标题 37 | 3. 邮件内容:包含 JD 链接即可 38 | 39 | 内推服务承诺: 40 | 41 | 1. 邮件必回复 42 | 2. 直推部门负责人 43 | 3. 及时反馈面试进度和面试评价 44 | 4. 各类问题可加微信联系 45 | 46 | ## 其他部门内推 47 | 48 | 招聘岗位请见 [快手招聘](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/),所有岗位接受内推 49 | 50 | 内推方式和要求同上 51 | 52 | 内推服务承诺除了「直推部门老大」,其他同上 53 | 54 | ## 交流群 55 | 56 | 请加个人微信,备注「GitHub 内推」,通过后拉您入群,获取最新招聘信息和内推反馈信息。 57 | 58 | 交流群 -------------------------------------------------------------------------------- /docs/tldr.md: -------------------------------------------------------------------------------- 1 | # TL;DR; 2 | 3 | 安装 geektime-dl 4 | 5 | ```bash 6 | mkdir geektime $$ cd geektime 7 | python3 -m venv venv3 && source venv3/bin/activate 8 | pip install -U geektime_dl 9 | ``` 10 | 11 | 12 | 13 | 查看课程列表,获取课程 ID 14 | 15 | ```bash 16 | geektime query 17 | ``` 18 | 19 | 制作 Kindle 电子书 20 | 21 | ```bash 22 | geektime ebook 49 23 | ``` 24 | 25 | 26 | 打开电子书 27 | 28 | ```bash 29 | open ebook/朱赟的技术管理课\[更新完毕\].mobi 30 | ``` 31 | 32 | 这就足够了。 33 | 34 | 如果您还想了解更多用法,可以进行[下一步](/intro.html)阅读。 -------------------------------------------------------------------------------- /geektime.py: -------------------------------------------------------------------------------- 1 | from geektime_dl import main 2 | 3 | if __name__ == '__main__': 4 | main() 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /geektime_dl/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | 4 | def main(): 5 | from geektime_dl import cli 6 | cli.main() 7 | 8 | 9 | geektime = main 10 | 11 | 12 | -------------------------------------------------------------------------------- /geektime_dl/cache.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import datetime 4 | import json 5 | import traceback 6 | from abc import ABC, abstractmethod 7 | 8 | from peewee import ( 9 | SqliteDatabase, 10 | Model, 11 | DoesNotExist, 12 | IntegerField, 13 | CharField, 14 | TextField, 15 | BooleanField, 16 | DateTimeField 17 | ) 18 | 19 | from geektime_dl.utils import get_working_folder 20 | from geektime_dl.log import logger 21 | 22 | 23 | db_file = get_working_folder() / 'gt.sqlite' 24 | db = SqliteDatabase(str(db_file)) 25 | 26 | 27 | class BaseModel(Model): 28 | class Meta: 29 | database = db 30 | 31 | 32 | class ColumnIntro(BaseModel): 33 | id = IntegerField(primary_key=True) 34 | column_id = IntegerField(unique=True) 35 | column_title = CharField() 36 | author_name = CharField() 37 | column_intro = TextField() 38 | column_cover = CharField() 39 | column_type = IntegerField() 40 | update_frequency = CharField() 41 | is_finish = BooleanField() 42 | had_sub = BooleanField() 43 | articles = TextField() 44 | 45 | created = DateTimeField(default=datetime.datetime.now) 46 | modified = DateTimeField(default=datetime.datetime.now) 47 | 48 | def save(self, *args, **kwargs): 49 | self.modified = datetime.datetime.now() 50 | return super(ColumnIntro, self).save(*args, **kwargs) 51 | 52 | 53 | class Article(BaseModel): 54 | id = IntegerField(primary_key=True) 55 | article_id = IntegerField(unique=True) 56 | article_title = CharField() 57 | article_cover = CharField() 58 | article_content = TextField() 59 | audio_download_url = CharField() 60 | comments = TextField() 61 | 62 | created = DateTimeField(default=datetime.datetime.now) 63 | modified = DateTimeField(default=datetime.datetime.now) 64 | 65 | def save(self, *args, **kwargs): 66 | self.modified = datetime.datetime.now() 67 | return super(Article, self).save(*args, **kwargs) 68 | 69 | 70 | class TempKV(BaseModel): 71 | id = IntegerField(primary_key=True) 72 | key = CharField(unique=True) 73 | value = TextField() 74 | expire = IntegerField() # seconds 75 | 76 | created = DateTimeField(default=datetime.datetime.now) 77 | modified = DateTimeField(default=datetime.datetime.now) 78 | 79 | def save(self, *args, **kwargs): 80 | self.modified = datetime.datetime.now() 81 | return super(TempKV, self).save(*args, **kwargs) 82 | 83 | def is_expired(self) -> bool: 84 | if self.expire <= 0: 85 | return False 86 | now = datetime.datetime.now() 87 | return (now - self.modified).seconds > self.expire 88 | 89 | 90 | def init_cache(): 91 | db.connect() 92 | db.create_tables([ColumnIntro, Article, TempKV], safe=True) 93 | 94 | 95 | init_cache() 96 | 97 | 98 | class GeektimeCache(ABC): 99 | 100 | @abstractmethod 101 | def get_column_intro(self, column_id: int) -> dict: 102 | """""" 103 | 104 | @abstractmethod 105 | def save_column_intro(self, course_intro: dict) -> None: 106 | """""" 107 | 108 | @abstractmethod 109 | def get_article(self, article_id: int) -> dict: 110 | """""" 111 | 112 | @abstractmethod 113 | def save_article(self, article_info: dict) -> None: 114 | """""" 115 | 116 | @abstractmethod 117 | def get(self, key: str) -> dict: 118 | """""" 119 | 120 | @abstractmethod 121 | def set(self, key: str, value: dict, expire: int) -> None: 122 | """""" 123 | 124 | 125 | class EmptyCache(GeektimeCache): 126 | 127 | def get_column_intro(self, column_id: int) -> dict: 128 | return {} 129 | 130 | def save_column_intro(self, course_intro: dict) -> None: 131 | return 132 | 133 | def get_article(self, article_id: int) -> dict: 134 | return {} 135 | 136 | def save_article(self, article_info: dict) -> None: 137 | return 138 | 139 | def get(self, key: str) -> dict: 140 | return {} 141 | 142 | def set(self, key: str, value: dict, expire: int) -> None: 143 | pass 144 | 145 | 146 | class SqliteCache(GeektimeCache): 147 | 148 | def get_column_intro(self, column_id: int) -> dict: 149 | try: 150 | column = ColumnIntro.get( 151 | ColumnIntro.column_id == column_id 152 | ) 153 | 154 | cache = { 155 | "id": column.column_id, 156 | "column_id": column.column_id, 157 | "column_title": column.column_title, 158 | "author_name": column.author_name, 159 | "column_intro": column.column_intro, 160 | "column_cover": column.column_cover, 161 | "column_type": column.column_type, 162 | "update_frequency": column.update_frequency, 163 | "is_finish": column.is_finish, 164 | "had_sub": column.had_sub, 165 | "articles": json.loads(column.articles) 166 | } 167 | logger.info("get column intro from cache, column_title={}".format( 168 | cache['column_title'] 169 | )) 170 | return cache 171 | except DoesNotExist: 172 | return {} 173 | except Exception: 174 | logger.error('ERROR: {}'.format(traceback.format_exc())) 175 | return {} 176 | 177 | def save_column_intro(self, course_intro: dict) -> None: 178 | try: 179 | try: 180 | column = ColumnIntro.get( 181 | ColumnIntro.column_id == course_intro['id'] 182 | ) 183 | except DoesNotExist: 184 | column = ColumnIntro() 185 | column.column_id = course_intro['id'] 186 | column.column_title = course_intro['column_title'] 187 | column.author_name = course_intro['author_name'] 188 | column.column_intro = course_intro['column_intro'] 189 | column.column_cover = course_intro['column_cover'] 190 | column.column_type = course_intro['column_type'] 191 | column.update_frequency = course_intro['update_frequency'] 192 | column.is_finish = course_intro['is_finish'] 193 | column.had_sub = course_intro['had_sub'] 194 | column.articles = json.dumps(course_intro['articles']) 195 | column.save() 196 | logger.info("save column intro to cache, column_title={}".format( 197 | course_intro['column_title'] 198 | )) 199 | except Exception: 200 | logger.error('ERROR: {}'.format(traceback.format_exc())) 201 | 202 | def get_article(self, article_id: int) -> dict: 203 | try: 204 | article = Article.get( 205 | Article.article_id == article_id 206 | ) 207 | 208 | cache = { 209 | "id": article.article_id, 210 | "article_id": article.article_id, 211 | "article_title": article.article_title, 212 | "article_cover": article.article_cover, 213 | "article_content": article.article_content, 214 | "audio_download_url": article.audio_download_url, 215 | "comments": json.loads(article.comments) 216 | } 217 | logger.info("get article from cache, article_title={}".format( 218 | cache['article_title'] 219 | )) 220 | return cache 221 | except DoesNotExist: 222 | return {} 223 | except Exception: 224 | logger.error('ERROR: {}'.format(traceback.format_exc())) 225 | return {} 226 | 227 | def save_article(self, article_info: dict) -> None: 228 | try: 229 | try: 230 | article = Article.get( 231 | Article.article_id == article_info['article_id'] 232 | ) 233 | except DoesNotExist: 234 | article = Article() 235 | article.article_id = article_info['article_id'] 236 | article.article_title = article_info['article_title'] 237 | article.article_cover = article_info['article_cover'] 238 | article.article_content = article_info['article_content'] 239 | article.audio_download_url = article_info['audio_download_url'] 240 | article.comments = json.dumps(article_info['comments']) 241 | article.save() 242 | logger.info("save article to cache, article_title={}".format( 243 | article_info['article_title'] 244 | )) 245 | except Exception: 246 | logger.error('ERROR: {}'.format(traceback.format_exc())) 247 | 248 | def get(self, key: str) -> dict: 249 | try: 250 | try: 251 | kv: TempKV = TempKV.get(TempKV.key == key) 252 | except DoesNotExist: 253 | return {} 254 | 255 | if kv.is_expired(): 256 | logger.info("get kv expired, key={}".format(key)) 257 | return {} 258 | val_dict = json.loads(str(kv.value)) 259 | logger.info("get kv, key={}, value= {}".format( 260 | key, kv.value[:100] 261 | )) 262 | return val_dict 263 | except Exception: 264 | logger.error('ERROR: {}'.format(traceback.format_exc())) 265 | return {} 266 | 267 | def set(self, key: str, value: dict, expire: int) -> None: 268 | try: 269 | try: 270 | kv: TempKV = TempKV.get(TempKV.key == key) 271 | except DoesNotExist: 272 | kv = TempKV() 273 | 274 | val_str = json.dumps(value) 275 | kv.key = key 276 | kv.value = val_str 277 | kv.expire = expire 278 | kv.save() 279 | logger.info("set kv, key={}, value= {}, expire={}".format( 280 | key, val_str[:100], expire 281 | )) 282 | except Exception: 283 | logger.error('ERROR: {}'.format(traceback.format_exc())) 284 | -------------------------------------------------------------------------------- /geektime_dl/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | # flake8: noqa 3 | 4 | from geektime_dl.cli.command import Command, main, add_argument 5 | from geektime_dl.cli import ( 6 | login, query, ebook 7 | ) 8 | -------------------------------------------------------------------------------- /geektime_dl/cli/command.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import sys 4 | import os 5 | import traceback 6 | import configparser 7 | import argparse 8 | import functools 9 | 10 | from geektime_dl.log import logger 11 | from geektime_dl.dal import get_data_client, DataClient 12 | from geektime_dl.utils import get_working_folder 13 | 14 | commands = {} 15 | 16 | cwd = os.path.abspath('.') 17 | geektime_cfg = str(get_working_folder() / 'geektime.cfg') 18 | 19 | 20 | class CommandType(type): 21 | def __init__(cls, name, bases, attrs): 22 | super(CommandType, cls).__init__(name, bases, attrs) 23 | name = getattr(cls, name, cls.__name__.lower()) 24 | cls.name = name 25 | if name != 'command': 26 | commands[name] = cls 27 | 28 | 29 | class Help(metaclass=CommandType): 30 | """Display the list of available commands""" 31 | 32 | def work(self, args: list): 33 | result = ["Available commands:"] 34 | names = list(commands) 35 | padding = max([len(k) for k in names]) + 2 36 | for k in sorted(names): 37 | name = k.ljust(padding, ' ') 38 | doc = (commands[k].__doc__ or '').split('\n')[0] 39 | result.append(" %s%s" % (name, doc)) 40 | result.append( 41 | "\nUse '{} --help' for individual command help.".format( 42 | sys.argv[0].split(os.path.sep)[-1])) 43 | 44 | result = '\n'.join(result) + '\n' 45 | sys.stdout.write(result) 46 | return result 47 | 48 | 49 | def add_argument(*args, **kwargs): 50 | def decorator(func): 51 | @functools.wraps(func) 52 | def wrap(*a, **kw): 53 | return func(*a, **kw) 54 | 55 | if not hasattr(wrap, 'save_cfg_keys'): 56 | wrap.save_cfg_keys = [] 57 | if not hasattr(wrap, 'arguments'): 58 | wrap.arguments = [] 59 | if kwargs.get('save'): 60 | kwargs.pop('save') 61 | if 'dest' in kwargs: 62 | wrap.save_cfg_keys.append(kwargs['dest']) 63 | wrap.arguments.append((args, kwargs)) 64 | return wrap 65 | return decorator 66 | 67 | 68 | class Command(metaclass=CommandType): 69 | _default_save_cfg_keys = ['area', 'account', 'password', 'output_folder'] 70 | 71 | def __init__(self): 72 | self._parser = None 73 | 74 | @staticmethod 75 | def is_course_finished(course_info: dict): 76 | return course_info['update_frequency'] in ['全集', '已完结'] or \ 77 | course_info['is_finish'] 78 | 79 | @staticmethod 80 | def get_data_client(cfg: dict) -> DataClient: 81 | try: 82 | dc = get_data_client(cfg) 83 | return dc 84 | except Exception: 85 | raise ValueError( 86 | "invalid geektime account or password\n" 87 | "Use '{} login --help' for help.\n".format( 88 | sys.argv[0].split(os.path.sep)[-1])) 89 | 90 | @property 91 | def parser(self) -> argparse.ArgumentParser: 92 | if self._parser: 93 | return self._parser 94 | parser = argparse.ArgumentParser( 95 | prog='{} {}'.format(sys.argv[0], self.name), 96 | description=self.__doc__, 97 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 98 | ) 99 | parser.add_argument("-a", "--account", dest="account", 100 | help="specify the account phone number") 101 | parser.add_argument("-p", "--password", dest="password", 102 | help="specify the account password") 103 | parser.add_argument("--area", dest="area", default='86', 104 | help="specify the account country code") 105 | parser.add_argument("--config", dest="config", default=geektime_cfg, 106 | help="specify alternate config file") 107 | parser.add_argument("-o", "--output-folder", dest="output_folder", 108 | default=cwd, help="specify the output folder") 109 | 110 | parser.add_argument("--no-login", dest="no_login", action='store_true', 111 | default=False, help="no login, just for test") 112 | for args, kwargs in getattr(self.run, 'arguments', []): 113 | parser.add_argument(*args, **kwargs) 114 | self._parser = parser 115 | return parser 116 | 117 | @staticmethod 118 | def load_cfg(cfg_file: str) -> dict: 119 | p = configparser.RawConfigParser() 120 | cfg = dict() 121 | try: 122 | p.read([cfg_file]) 123 | for (name, value) in p.items('default'): 124 | cfg[name] = value 125 | except IOError: 126 | pass 127 | except configparser.NoSectionError: 128 | pass 129 | 130 | return cfg 131 | 132 | def _parse_config(self, args: list): 133 | 134 | cfg_file = geektime_cfg 135 | if '--config' in args: 136 | index = args.index('--config') + 1 137 | if index < len(args): 138 | cfg_file = args[index] 139 | saved_cfg = self.load_cfg(cfg_file) 140 | 141 | save_cfg_keys = (getattr(self.run, 'save_cfg_keys', []) + 142 | self._default_save_cfg_keys) 143 | for key in save_cfg_keys: 144 | if key in saved_cfg: 145 | _ = ['--{}'.format(key.replace('_', '-')), saved_cfg[key]] 146 | # add saved configs in front so that 147 | # it has the chance to be overridden 148 | args = _ + args 149 | opt = self.parser.parse_args(args) 150 | cfg = vars(opt) 151 | 152 | saved_cfg.update({k: cfg[k] for k in save_cfg_keys if cfg.get(k)}) 153 | self.save_cfg(saved_cfg, cfg_file) 154 | return cfg 155 | 156 | @classmethod 157 | def save_cfg(cls, cfg: dict, cfg_file: str) -> None: 158 | 159 | old_cfg = cls.load_cfg(cfg_file) 160 | old_cfg.update(cfg) 161 | cfg = old_cfg 162 | p = configparser.RawConfigParser() 163 | p.add_section('default') 164 | for opt in sorted(cfg): 165 | p.set('default', opt, cfg[opt]) 166 | 167 | # try to create the directories and write the file 168 | cfg_exist = os.path.exists(cfg_file) 169 | if not cfg_exist and not os.path.exists(os.path.dirname(cfg_file)): 170 | try: 171 | os.makedirs(os.path.dirname(cfg_file)) 172 | except OSError: 173 | sys.stderr.write( 174 | "ERROR: couldn't create the config directory\n") 175 | try: 176 | with open(cfg_file, 'w') as f: 177 | p.write(f) 178 | except IOError: 179 | sys.stderr.write("ERROR: couldn't write the config file\n") 180 | 181 | def work(self, args: list): 182 | if '--help' in args or '-h' in args: 183 | self.parser.parse_args(args) 184 | return 185 | cfg = self._parse_config(args) 186 | return self.run(cfg) 187 | 188 | def run(self, args: dict): 189 | raise NotImplementedError 190 | 191 | 192 | def main(): 193 | args = sys.argv[1:] 194 | 195 | # default subcommand 196 | command = Help.name 197 | 198 | # subcommand discovery 199 | if len(args): 200 | command = args[0] 201 | args = args[1:] 202 | 203 | if command in commands: 204 | o = commands[command]() 205 | try: 206 | o.work(args) 207 | except Exception as e: 208 | sys.stderr.write("ERROR: {}\n".format(e)) 209 | logger.error('ERROR: {}'.format(traceback.format_exc())) 210 | else: 211 | sys.stderr.write('Unknown command %r\n\n' % (command,)) 212 | Help().work(args) 213 | -------------------------------------------------------------------------------- /geektime_dl/cli/ebook.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import os 4 | import pathlib 5 | import sys 6 | import json 7 | import datetime 8 | 9 | from termcolor import colored 10 | from ebook import make_ebook 11 | from ebook.ebooklib import format_file_name 12 | from tqdm import tqdm 13 | 14 | from geektime_dl.cli import Command, add_argument 15 | from geektime_dl.ebook.ebook import Render 16 | from geektime_dl.gt_apis import GkApiError 17 | from geektime_dl.utils import ( 18 | get_working_folder, 19 | parse_column_ids 20 | ) 21 | 22 | 23 | class EBook(Command): 24 | """将专栏文章制作成电子书""" 25 | 26 | def _format_title(self, c): 27 | """ 28 | 课程文件名 29 | """ 30 | 31 | t = format_file_name(c['column_title']) 32 | if not c['had_sub']: 33 | t += '[免费试读]' 34 | elif self.is_course_finished(c): 35 | pass 36 | else: 37 | t += '[未完待续{}]'.format(datetime.date.today()) 38 | return t 39 | 40 | def _generate_source_files(self, course_intro: dict, articles: list, 41 | source_folder: str, no_cache: bool = False, 42 | **kwargs) -> None: 43 | """ 44 | 下载课程源文件 45 | """ 46 | column_title = course_intro['column_title'] 47 | _out_dir = source_folder 48 | 49 | render = Render(str(_out_dir)) 50 | # introduction 51 | if not no_cache and os.path.isfile(os.path.join(_out_dir, '简介.html')): 52 | sys.stdout.write('{}简介 exists\n'.format(column_title)) 53 | else: 54 | render.render_article_html( 55 | '简介', course_intro['column_intro'], **kwargs) 56 | sys.stdout.write('下载{}简介 done\n'.format(column_title)) 57 | # cover 58 | if not no_cache and os.path.isfile(os.path.join(_out_dir, 'cover.jpg')): 59 | sys.stdout.write('{}封面 exists\n'.format(column_title)) 60 | else: 61 | render.generate_cover_img(course_intro['column_cover']) 62 | sys.stdout.write('下载{}封面 done\n'.format(column_title)) 63 | # toc 64 | ebook_name = self._format_title(course_intro) 65 | render.render_toc_md( 66 | ebook_name, 67 | ['简介'] 68 | + [format_file_name(t['article_title']) for t in articles] 69 | ) 70 | sys.stdout.write('下载{}目录 done\n'.format(column_title)) 71 | # articles 72 | articles = tqdm(articles) 73 | for article in articles: 74 | articles.set_description('HTML 文件下载中:{}'.format( 75 | article['article_title'][:10])) 76 | file_basename = format_file_name(article['article_title']) 77 | fn = os.path.join(_out_dir, '{}.html'.format(file_basename)) 78 | if not no_cache and os.path.isfile(fn): 79 | continue 80 | render.render_article_html( 81 | file_basename, article['article_content'], **kwargs) 82 | 83 | @add_argument("course_ids", type=str, 84 | help="specify the target course ids") 85 | @add_argument("--no-cache", dest="no_cache", action='store_true', 86 | default=False, help="do not use the cache data") 87 | @add_argument("--comments-count", dest="comments_count", type=int, 88 | default=0, save=True, 89 | help="the count of comments to fetch each post") 90 | @add_argument("--image-min-width", dest="image_min_width", type=int, 91 | save=True, help="image min width") 92 | @add_argument("--image-min-height", dest="image_min_height", type=int, 93 | save=True, help="image min height") 94 | @add_argument("--image-ratio", dest="image_ratio", type=float, save=True, 95 | help="image ratio") 96 | @add_argument("--format", dest="format", type=str, save=True, 97 | default='mobi', help="ebook format") 98 | def run(self, cfg: dict) -> None: 99 | course_ids = parse_column_ids(cfg['course_ids']) 100 | 101 | for course_id in course_ids: 102 | self._run_once(course_id, cfg) 103 | 104 | def _run_once(self, course_id: int, cfg: dict): 105 | dc = self.get_data_client(cfg) 106 | output_folder = self._make_output_folder(cfg['output_folder']) 107 | no_cache = cfg['no_cache'] 108 | wf = get_working_folder() 109 | try: 110 | course_intro = dc.get_column_intro(course_id, no_cache=no_cache) 111 | except GkApiError as e: 112 | sys.stderr.write('{}\n\n'.format(e)) 113 | return 114 | if int(course_intro['column_type']) not in (1, 2): 115 | sys.stderr.write("ERROR: 该课程不提供文本:{}".format( 116 | course_intro['column_title'])) 117 | return 118 | 119 | # fetch raw data 120 | print(colored('开始制作电子书:{}-{}'.format( 121 | course_id, course_intro['column_title']), 'green')) 122 | pbar_desc = '数据爬取中:{}'.format(course_intro['column_title'][:10]) 123 | article_ids = course_intro['articles'] 124 | article_ids = tqdm(article_ids) 125 | article_ids.set_description(pbar_desc) 126 | articles = list() 127 | for a in article_ids: 128 | aid = a['id'] 129 | article = dc.get_article_content(aid, no_cache=no_cache) 130 | if cfg['comments_count'] > 0: 131 | article['article_content'] += self._render_comment_html( 132 | article['comments'], 133 | cfg['comments_count'] 134 | ) 135 | articles.append(article) 136 | 137 | if cfg.get('dont_ebook', False): 138 | return 139 | 140 | # source file 141 | source_folder = wf / format_file_name(course_intro['column_title']) 142 | source_folder.mkdir(exist_ok=True) 143 | self._generate_source_files( 144 | course_intro, articles, str(source_folder), **cfg 145 | ) 146 | 147 | # ebook 未完结或者 no_cache 都会重新制作电子书 148 | ebook_name = '{}.{}'.format( 149 | self._format_title(course_intro), cfg['format']) 150 | fp = pathlib.Path(output_folder) / ebook_name 151 | if (not no_cache and self.is_course_finished(course_intro) 152 | and fp.exists()): 153 | print(colored("{} exists\n".format(ebook_name), 'green')) 154 | else: 155 | make_ebook( 156 | source_dir=str(source_folder), 157 | output_dir=output_folder, 158 | format=cfg['format'] 159 | ) 160 | print(colored('制作电子书完成:{}-{}'.format( 161 | course_id, course_intro['column_title']), 'green')) 162 | 163 | @staticmethod 164 | def _make_output_folder(output_folder: str): 165 | output_folder = os.path.expanduser(output_folder) 166 | if not os.path.isdir(output_folder): 167 | os.makedirs(output_folder) 168 | return output_folder 169 | 170 | @staticmethod 171 | def _timestamp2str(timestamp: int) -> str: 172 | if not timestamp: 173 | return '' 174 | return datetime.datetime.fromtimestamp( 175 | int(timestamp)).strftime("%Y-%m-%d %H:%M:%S") 176 | 177 | def _render(self, c): 178 | replies = json.loads(c.get('replies')) 179 | 180 | reply = replies[0] if replies else {} 181 | replies_html = """
182 |
183 |
{}{}
185 |
{}
187 |
188 | """.format( 189 | reply.get('user_name'), 190 | self._timestamp2str(reply.get('ctime')), 191 | reply.get('content') 192 | ) if reply else '' 193 | 194 | likes = "[{}赞]".format(c['like_count']) if c['like_count'] else '' 195 | c_html = """ 196 |
  • 197 |
    198 |
    200 | {user_name} {comment_time} 201 |
    202 |
    204 | {comment_content} {like_count} 205 |
    206 | {replies} 207 |
    208 |
  • 209 | """.format( 210 | user_name=c['user_name'], 211 | like_count=likes, 212 | comment_content=c['comment_content'], 213 | comment_time=self._timestamp2str(c['comment_ctime']), 214 | replies=replies_html 215 | ) 216 | return c_html 217 | 218 | def _render_comment_html(self, comments, comment_count): 219 | """ 220 | 生成评论的 html 文本 221 | """ 222 | if not comments: 223 | return '' 224 | 225 | count = min(len(comments), int(comment_count)) 226 | comments = comments[:count] 227 | 228 | html = '\n
    \n'.join([ 229 | self._render(c) 230 | for c in comments 231 | ]) 232 | h = """

    精选留言:

    233 |
      234 | """ 235 | f = '
    ' 236 | return h + html + f 237 | -------------------------------------------------------------------------------- /geektime_dl/cli/login.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import sys 4 | 5 | from geektime_dl.gt_apis import GkApiClient, GkApiError 6 | from geektime_dl.cli import Command 7 | 8 | 9 | class Login(Command): 10 | """登录极客时间,保存账号密码至配置文件""" 11 | 12 | def run(self, args: dict): 13 | area = args['area'] 14 | account = args['account'] 15 | password = args['password'] 16 | need_save = not (area and account and password) 17 | 18 | if not account: 19 | account = input("enter your registered account(phone): ") 20 | if not area: 21 | area = input("enter country code: enter for 86 ") or '86' 22 | if not password: 23 | password = input("account: +{} {}\n" 24 | "enter password: ".format(area, account)) 25 | 26 | try: 27 | GkApiClient(account=account, password=password, area=area) 28 | if need_save: 29 | new_cfg = { 30 | 'account': account, 31 | 'password': password, 32 | 'area': area 33 | } 34 | Command.save_cfg(new_cfg, args['config']) 35 | 36 | except GkApiError as e: 37 | sys.stdout.write( 38 | "login fail, error message:{}\nEnter again\n".format(e) 39 | ) 40 | area = input("enter country code: enter for 86 ") or '86' 41 | account = input("enter your registered account(phone): ") 42 | password = input("account: +{} {}\n" 43 | "enter password: ".format(area, account)) 44 | 45 | GkApiClient(account=account, password=password, area=area) 46 | 47 | new_cfg = { 48 | 'account': account, 49 | 'password': password, 50 | 'area': area 51 | } 52 | Command.save_cfg(new_cfg, args['config']) 53 | 54 | sys.stdout.write("Login succeed\n") 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /geektime_dl/cli/query.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import sys 4 | 5 | from geektime_dl.cli import Command, add_argument 6 | 7 | _COLUMN_INDEX = "1" 8 | 9 | 10 | class Query(Command): 11 | """查看专栏列表""" 12 | 13 | @add_argument("--no-cache", dest="no_cache", action='store_true', 14 | default=False, help="do not use the cache data") 15 | def run(self, cfg: dict): 16 | 17 | dc = self.get_data_client(cfg) 18 | 19 | data = dc.get_column_list(no_cache=cfg['no_cache']) 20 | 21 | result_str = '' 22 | columns = data[_COLUMN_INDEX]['list'] 23 | result_str += '专栏\n' 24 | result_str += "\t{:<12}{}\t{}\t{:<10}\n".format( 25 | '课程ID', '已订阅', '已完结', '课程标题') 26 | for c in columns: 27 | is_finished = self.is_course_finished(c) 28 | result_str += "\t{:<15}{}\t{}\t{:<10}\n".format( 29 | str(c['id']), 30 | '是' if c['had_sub'] else '否', 31 | '是' if is_finished else '否', 32 | c['column_title'], 33 | 34 | ) 35 | 36 | sys.stdout.write(result_str) 37 | return result_str 38 | 39 | -------------------------------------------------------------------------------- /geektime_dl/dal.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import json 4 | import threading 5 | 6 | from tqdm import tqdm 7 | 8 | from geektime_dl.gt_apis import GkApiClient 9 | from geektime_dl.utils import synchronized, read_local_cookies 10 | from geektime_dl.cache import GeektimeCache, EmptyCache, SqliteCache 11 | 12 | 13 | class DataClient: 14 | 15 | def __init__(self, gk: GkApiClient, cache: GeektimeCache): 16 | self._gt = gk 17 | self._cache: GeektimeCache = cache 18 | self._lock = threading.Lock() # 限制并发 19 | 20 | def get_column_list(self, **kwargs) -> dict: 21 | """ 22 | 获取专栏列表 23 | """ 24 | use_cache = not kwargs.get("no_cache", False) 25 | key = "column_all" 26 | expire = 1 * 24 * 3600 # 1 day 27 | if use_cache: 28 | value = self._cache.get(key) 29 | if value: 30 | return value 31 | data = self._gt.get_course_list() 32 | if use_cache: 33 | self._cache.set(key, data, expire) 34 | 35 | return data 36 | 37 | @synchronized() 38 | def get_column_intro(self, column_id: int, **kwargs) -> dict: 39 | """ 40 | 获取专栏简介 41 | """ 42 | use_cache = not kwargs.get("no_cache", False) 43 | if use_cache: 44 | cache = self._cache.get_column_intro(column_id) 45 | if cache and cache['is_finish'] and cache['had_sub']: 46 | return cache 47 | 48 | course_intro = self._gt.get_course_intro(column_id) 49 | course_intro['column_id'] = course_intro['id'] 50 | articles = self._gt.get_post_list_of(column_id) 51 | course_intro['articles'] = articles 52 | 53 | if use_cache: 54 | self._cache.save_column_intro(course_intro) 55 | 56 | return course_intro 57 | 58 | @synchronized() 59 | def get_article_content(self, article_id: int, **kwargs) -> dict: 60 | """ 61 | 获取 article 的所有内容,包括评论 62 | """ 63 | use_cache = not kwargs.get("no_cache", False) 64 | if use_cache: 65 | cache = self._cache.get_article(article_id) 66 | if cache: 67 | return cache 68 | 69 | article_info = self._gt.get_post_content(article_id) 70 | article_info['article_id'] = article_info['id'] 71 | article_info['comments'] = self._get_article_comments(article_id) 72 | 73 | if use_cache: 74 | self._cache.save_article(article_info) 75 | 76 | return article_info 77 | 78 | def _get_article_comments(self, article_id: int) -> list: 79 | """ 80 | 获取 article 的评论 81 | """ 82 | data = self._gt.get_post_comments(article_id) 83 | for c in data: 84 | c['replies'] = json.dumps(c.get('replies', [])) 85 | return data 86 | 87 | def get_video_collection_list(self, **kwargs) -> list: 88 | """ 89 | 获取每日一课合辑列表 90 | """ 91 | return self._gt.get_video_collection_list() 92 | 93 | @synchronized() 94 | def get_video_collection_intro(self, collection_id: int, **kwargs) -> dict: 95 | """ 96 | 获取每日一课合辑简介 97 | """ 98 | data = self._gt.get_video_collection_intro(collection_id) 99 | return data 100 | 101 | @synchronized() 102 | def get_daily_content(self, video_id: int, **kwargs) -> dict: 103 | """ 104 | 获取每日一课内容 105 | """ 106 | data = self._gt.get_post_content(video_id) 107 | return data 108 | 109 | def get_video_collection_content(self, collection_id: int, 110 | force: bool = False, 111 | pbar=True, pbar_desc='') -> list: 112 | """ 113 | 获取每日一课合辑ID 为 collection_id 的所有视频内容 114 | """ 115 | data = [] 116 | v_ids = self._gt.get_video_list_of(collection_id) 117 | if pbar: 118 | v_ids = tqdm(v_ids) 119 | v_ids.set_description(pbar_desc) 120 | for v_id in v_ids: 121 | v = self.get_daily_content(v_id['article_id'], force=force) 122 | data.append(v) 123 | return data 124 | 125 | 126 | dc_global = None 127 | _dc_global_lock = threading.Lock() 128 | 129 | 130 | def get_data_client(cfg: dict) -> DataClient: 131 | with _dc_global_lock: 132 | global dc_global 133 | if dc_global is not None: 134 | return dc_global 135 | 136 | gk = GkApiClient( 137 | account=cfg['account'], 138 | password=cfg['password'], 139 | area=cfg['area'], 140 | no_login=cfg['no_login'], 141 | lazy_login=True, 142 | cookies=read_local_cookies() 143 | ) 144 | 145 | if cfg.get('no_cache', False): 146 | cache = EmptyCache() 147 | else: 148 | cache = SqliteCache() 149 | 150 | dc = DataClient(gk, cache=cache) 151 | dc_global = dc 152 | 153 | return dc 154 | -------------------------------------------------------------------------------- /geektime_dl/ebook/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | -------------------------------------------------------------------------------- /geektime_dl/ebook/ebook.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import os 4 | import re 5 | import time 6 | import contextlib 7 | import pathlib 8 | from urllib.parse import urlparse 9 | import io 10 | 11 | import requests 12 | from jinja2 import Environment, FileSystemLoader 13 | from PIL import Image 14 | 15 | 16 | class Render: 17 | 18 | def __init__(self, output_folder: str): 19 | 20 | self._output_folder = output_folder 21 | self._jinja_env = Environment(loader=FileSystemLoader( 22 | '{}/templates/'.format(os.path.dirname(__file__)) 23 | )) 24 | 25 | def _render_file( 26 | self, template_name: str, context: dict, filename: str) -> None: 27 | """ 28 | 生成 html 文件 29 | """ 30 | template = self._jinja_env.get_template(template_name) 31 | with open(os.path.join(self._output_folder, filename), "w") as f: 32 | f.write(template.render(**context)) 33 | 34 | def render_toc_md(self, title: str, headers: list) -> None: 35 | """ 36 | 生成目录文件 toc.mc 37 | """ 38 | with open(os.path.join(self._output_folder, 'toc.md'), "w") as f: 39 | headers = ['# {}'.format(h) for h in headers] 40 | f.writelines('\n'.join([title] + headers)) 41 | 42 | def render_article_html(self, title: str, content: str, **kwargs) -> None: 43 | """ 44 | 生成 html 文件 45 | """ 46 | content = self._parse_image(content, **kwargs) 47 | self._render_file( 48 | 'article.html', 49 | {'title': title, 'content': content}, 50 | '{}.html'.format(title) 51 | ) 52 | 53 | def generate_cover_img(self, url: str) -> None: 54 | """ 55 | 下载 url 作为封面 56 | """ 57 | with contextlib.suppress(Exception): 58 | r = requests.get(url, timeout=20) 59 | cover = os.path.join(self._output_folder, 'cover.jpg') 60 | with open(cover, 'wb') as f: 61 | f.write(r.content) 62 | 63 | def _parse_image(self, content: str, **kwargs) -> str: 64 | """ 65 | 下载 content(html text) 中的 image 66 | """ 67 | # remove the xxx `style=xxx` 68 | p = r'img (.{1,15}=".*?") src=".*?"' 69 | fucking_styles = re.findall(p, content) 70 | for style in fucking_styles: 71 | content = content.replace(style, '') 72 | 73 | p = r'' 74 | empty_imgs = re.findall(p, content) 75 | for empty_img in empty_imgs: 76 | content = content.replace(empty_img, '') 77 | 78 | p = r'img\s+src="(.*?)"' 79 | img_url_list = re.findall(p, content) 80 | 81 | for url in img_url_list: 82 | with contextlib.suppress(Exception): 83 | url_local = self._format_url_path(url) 84 | r = requests.get(url, timeout=20) 85 | img_fn = os.path.join(self._output_folder, url_local) 86 | self._save_img( 87 | r.content, img_fn, 88 | min_width=kwargs.get('image_min_width'), 89 | min_height=kwargs.get('image_min_height'), 90 | ratio=kwargs.get('image_ratio') 91 | ) 92 | content = content.replace(url, url_local) 93 | 94 | return content 95 | 96 | @staticmethod 97 | def _save_img(content: bytes, filename: str, 98 | min_width: int = None, min_height: int = None, 99 | ratio: float = None) -> None: 100 | min_width = min_width or 500 101 | min_height = min_height or 500 102 | ratio = ratio or 0.5 103 | 104 | img = Image.open(io.BytesIO(content)) 105 | w, h = img.size 106 | if w <= min_width or h <= min_height: 107 | img.save(filename, img.format) 108 | return 109 | 110 | rw, rh = int(w * ratio), int(h * ratio) 111 | if rw < min_width: 112 | rw, rh = min_width, int(rh * min_width / rw) 113 | if rh < min_height: 114 | rw, rh = int(rw * min_height / rh), min_height 115 | img.thumbnail((rw, rh)) 116 | img.save(filename, img.format) 117 | 118 | @staticmethod 119 | def _format_url_path(url: str) -> str: 120 | o = urlparse(url) 121 | u = pathlib.Path(o.path) 122 | stem, suffix = u.stem, u.suffix 123 | return '{}-{}{}'.format(stem, int(time.time()), suffix) 124 | -------------------------------------------------------------------------------- /geektime_dl/ebook/templates/article.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {{title}} 5 | 6 | 7 | 8 |

    {{title}}

    9 | 10 | {{content | safe}} 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /geektime_dl/gt_apis.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | 4 | import threading 5 | import functools 6 | import time 7 | import contextlib 8 | from typing import Optional 9 | 10 | import requests 11 | 12 | from geektime_dl.utils import ( 13 | synchronized, 14 | Singleton, 15 | get_random_user_agent 16 | ) 17 | from geektime_dl.log import logger 18 | 19 | 20 | class GkApiError(Exception): 21 | """""" 22 | 23 | 24 | def _retry(func): 25 | """ 26 | 0.1s 后重试 27 | """ 28 | @functools.wraps(func) 29 | def wrap(gk_api: 'GkApiClient', *args, **kwargs): 30 | try: 31 | res = func(gk_api, *args, **kwargs) 32 | return res 33 | except requests.RequestException: 34 | time.sleep(0.1) 35 | gk_api.reset_session() 36 | return func(gk_api, *args, **kwargs) 37 | except GkApiError: 38 | raise 39 | except Exception as e: 40 | raise GkApiError("geektime api error") from e 41 | 42 | return wrap 43 | 44 | 45 | class GkApiClient(metaclass=Singleton): 46 | """ 47 | 一个课程,包括专栏、视频、微课等,称作 `course` 或者 `column` 48 | 课程下的章节,包括文章、者视频等,称作 `post` 或者 `article` 49 | """ 50 | 51 | def __init__(self, account: str, password: str, area: str = '86', 52 | no_login: bool = False, lazy_login: bool = True, 53 | cookies: Optional[dict] = None): 54 | self._cookies = None 55 | self._lock = threading.Lock() 56 | self._account = account 57 | self._password = password 58 | self._area = area 59 | self._no_login = no_login 60 | self._ua = get_random_user_agent() 61 | 62 | if cookies: 63 | self._cookies = cookies 64 | return 65 | 66 | if lazy_login or no_login: 67 | return 68 | self.reset_session() 69 | 70 | def _post(self, url: str, data: dict = None, **kwargs) -> requests.Response: 71 | with contextlib.suppress(Exception): 72 | for k in ['cellphone', 'password']: 73 | if data and k in data: 74 | data[k] = 'xxx' 75 | logger.info("request geektime api, {}, {}".format(url, data)) 76 | 77 | headers = kwargs.setdefault('headers', {}) 78 | headers.update({ 79 | 'Content-Type': 'application/json', 80 | 'User-Agent': self._ua 81 | }) 82 | resp = requests.post(url, json=data, timeout=10, **kwargs) 83 | resp.raise_for_status() 84 | 85 | if resp.json().get('code') != 0: 86 | raise GkApiError('geektime api fail:' + resp.json()['error']['msg']) 87 | 88 | return resp 89 | 90 | @synchronized() 91 | def reset_session(self) -> None: 92 | """登录""" 93 | url = 'https://account.geekbang.org/account/ticket/login' 94 | 95 | self._ua = get_random_user_agent() 96 | headers = { 97 | 'Accept': 'application/json, text/plain, */*', 98 | 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', # noqa: E501 99 | 'Host': 'account.geekbang.org', 100 | 'Referer': 'https://account.geekbang.org/signin?redirect=https%3A%2F%2Fwww.geekbang.org%2F', # noqa: E501 101 | } 102 | 103 | data = { 104 | "country": self._area, 105 | "cellphone": self._account, 106 | "password": self._password, 107 | "captcha": "", 108 | "remember": 1, 109 | "platform": 3, 110 | "appid": 1 111 | } 112 | 113 | resp = self._post(url, data, headers=headers) 114 | 115 | self._cookies = resp.cookies 116 | 117 | @_retry 118 | def get_course_list(self) -> dict: 119 | """ 120 | 获取课程列表 121 | :return: 122 | key: value 123 | '1' 124 | '2' 125 | '3' 126 | '4': 127 | """ 128 | url = 'https://time.geekbang.org/serv/v1/column/all' 129 | headers = { 130 | 'Referer': 'https://time.geekbang.org/paid-content', 131 | } 132 | if not self._cookies and not self._no_login: 133 | self.reset_session() 134 | 135 | resp = self._post(url, headers=headers, cookies=self._cookies) 136 | return resp.json()['data'] 137 | 138 | @_retry 139 | def get_post_list_of(self, course_id: int) -> list: 140 | """获取课程所有章节列表""" 141 | url = 'https://time.geekbang.org/serv/v1/column/articles' 142 | data = { 143 | "cid": str(course_id), "size": 1000, "prev": 0, "order": "newest" 144 | } 145 | headers = { 146 | 'Referer': 'https://time.geekbang.org/column/{}'.format(course_id), 147 | } 148 | 149 | if not self._cookies and not self._no_login: 150 | self.reset_session() 151 | 152 | resp = self._post(url, data, headers=headers, cookies=self._cookies) 153 | 154 | if not resp.json()['data']: 155 | raise Exception('course not exists:%s' % course_id) 156 | 157 | return resp.json()['data']['list'][::-1] 158 | 159 | @_retry 160 | def get_course_intro(self, course_id: int) -> dict: 161 | """课程简介""" 162 | url = 'https://time.geekbang.org/serv/v1/column/intro' 163 | headers = { 164 | 'Referer': 'https://time.geekbang.org/column/{}'.format(course_id), 165 | } 166 | 167 | if not self._cookies and not self._no_login: 168 | self.reset_session() 169 | 170 | resp = self._post( 171 | url, {'cid': str(course_id)}, headers=headers, cookies=self._cookies 172 | ) 173 | 174 | data = resp.json()['data'] 175 | if not data: 176 | raise GkApiError('无效的课程 ID: {}'.format(course_id)) 177 | return data 178 | 179 | @_retry 180 | def get_post_content(self, post_id: int) -> dict: 181 | """课程章节详情""" 182 | url = 'https://time.geekbang.org/serv/v1/article' 183 | headers = { 184 | 'Referer': 'https://time.geekbang.org/column/article/{}'.format( 185 | post_id) 186 | } 187 | 188 | if not self._cookies and not self._no_login: 189 | self.reset_session() 190 | 191 | resp = self._post( 192 | url, {'id': post_id}, headers=headers, cookies=self._cookies 193 | ) 194 | 195 | return resp.json()['data'] 196 | 197 | @_retry 198 | def get_post_comments(self, post_id: int) -> list: 199 | """课程章节评论""" 200 | url = 'https://time.geekbang.org/serv/v1/comments' 201 | headers = { 202 | 'Referer': 'https://time.geekbang.org/column/article/{}'.format( 203 | post_id) 204 | } 205 | 206 | if not self._cookies and not self._no_login: 207 | self.reset_session() 208 | 209 | resp = self._post( 210 | url, {"aid": str(post_id), "prev": 0}, 211 | headers=headers, cookies=self._cookies 212 | ) 213 | 214 | return resp.json()['data']['list'] 215 | 216 | @_retry 217 | def get_video_collection_intro(self, collection_id: int) -> dict: 218 | """每日一课合辑简介""" 219 | url = 'https://time.geekbang.org/serv/v2/video/GetCollectById' 220 | headers = { 221 | 'Referer': 'https://time.geekbang.org/dailylesson/collection/{}'.format( # noqa: E501 222 | collection_id) 223 | } 224 | 225 | if not self._cookies and not self._no_login: 226 | self.reset_session() 227 | 228 | resp = self._post( 229 | url, {'id': str(collection_id)}, 230 | headers=headers, cookies=self._cookies 231 | ) 232 | 233 | data = resp.json()['data'] 234 | return data 235 | 236 | @_retry 237 | def get_video_collection_list(self) -> list: 238 | """每日一课合辑列表""" 239 | # 没分析出接口 240 | ids = list(range(3, 82)) + list(range(104, 141)) 241 | return [{'collection_id': id_} for id_ in ids] 242 | 243 | @_retry 244 | def get_video_list_of(self, collection_id: int) -> list: 245 | """每日一课合辑视频列表""" 246 | 247 | url = 'https://time.geekbang.org/serv/v2/video/GetListByType' 248 | headers = { 249 | 'Referer': 'https://time.geekbang.org/dailylesson/collection/{}'.format( # noqa: E501 250 | collection_id) 251 | } 252 | 253 | if not self._cookies and not self._no_login: 254 | self.reset_session() 255 | 256 | resp = self._post( 257 | url, {"id": str(collection_id), "size": 50}, 258 | headers=headers, cookies=self._cookies 259 | ) 260 | 261 | return resp.json()['data']['list'] 262 | -------------------------------------------------------------------------------- /geektime_dl/log.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import os 4 | import logging 5 | 6 | from geektime_dl.utils import get_working_folder 7 | 8 | LOG_PATH = str(get_working_folder() / 'geektime.log') 9 | LOG_FORMAT = '\t'.join([ 10 | 'log_time=%(asctime)s', 11 | 'levelname=%(levelname)s', 12 | '%(message)s', 13 | 'location=%(pathname)s:%(lineno)d']) 14 | 15 | level = logging.DEBUG if os.getenv('DEBUG') == '1' else logging.INFO 16 | logger = logging.getLogger('geektime') 17 | file_handler = logging.FileHandler(filename=LOG_PATH) 18 | 19 | file_handler.setLevel(level) 20 | file_handler.setFormatter(logging.Formatter(LOG_FORMAT)) 21 | logger.setLevel(level) 22 | logger.addHandler(file_handler) 23 | 24 | -------------------------------------------------------------------------------- /geektime_dl/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import contextlib 3 | import random 4 | import threading 5 | import pathlib 6 | from functools import wraps 7 | from typing import List 8 | 9 | _working_folder = pathlib.Path.home() / '.geektime_dl' 10 | _working_folder.mkdir(exist_ok=True) 11 | 12 | 13 | def get_working_folder(): 14 | return _working_folder 15 | 16 | 17 | def synchronized(lock_attr='_lock'): 18 | def decorator(func): 19 | @wraps(func) 20 | def wrapper(self, *args, **kwargs): 21 | lock = getattr(self, lock_attr) 22 | try: 23 | lock.acquire() 24 | return func(self, *args, **kwargs) 25 | finally: 26 | lock.release() 27 | return wrapper 28 | return decorator 29 | 30 | 31 | class Singleton(type): 32 | _instances = {} 33 | _lock = threading.Lock() 34 | 35 | @synchronized() 36 | def __call__(cls, *args, **kwargs): 37 | if cls not in cls._instances: 38 | cls._instances[cls] = super().__call__(*args, **kwargs) 39 | return cls._instances[cls] 40 | 41 | @classmethod 42 | @synchronized() 43 | def clear_singletons(cls): 44 | return cls._instances.clear() 45 | 46 | 47 | def read_cookies_from_file(file_path: pathlib.Path) -> dict: 48 | cookies = {} 49 | with open(file_path, 'r') as f: 50 | for line in f.read().split(';'): 51 | n, v = line.split('=', 1) 52 | with contextlib.suppress(Exception): 53 | _ = v.strip().encode('latin-1') 54 | cookies[n.strip()] = v.strip() 55 | return cookies 56 | 57 | 58 | def read_local_cookies() -> dict: 59 | fn = get_working_folder() / 'cookies' 60 | if not fn.exists(): 61 | return {} 62 | return read_cookies_from_file(fn) 63 | 64 | 65 | def parse_column_ids(ids_str: str) -> List[int]: 66 | def _int(num): 67 | try: 68 | return int(num) 69 | except Exception: 70 | raise ValueError('illegal column ids: {}'.format(ids_str)) 71 | res = list() 72 | segments = ids_str.split(',') 73 | for seg in segments: 74 | if '-' in seg: 75 | s, e = seg.split('-', 1) 76 | res.extend(range(_int(s), _int(e) + 1)) 77 | else: 78 | res.append(_int(seg)) 79 | res = list(set(res)) 80 | res.sort() 81 | return res 82 | 83 | 84 | _default_ua_list = [ 85 | "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Mobile Safari/537.36", # noqa: E501 86 | ] 87 | 88 | _ua_list = list() 89 | _ua_list_lock = threading.Lock() 90 | 91 | 92 | def get_user_agents() -> list: 93 | global _ua_list 94 | if _ua_list: 95 | return _ua_list 96 | 97 | with _ua_list_lock: 98 | if _ua_list: 99 | return _ua_list 100 | fp = get_working_folder() / 'user-agents.txt' 101 | if not fp.exists(): 102 | _ua_list = _default_ua_list 103 | return _ua_list 104 | with open(fp) as f: 105 | uas = list() 106 | for ua in f.readlines(): 107 | uas.append(ua.strip()) 108 | _ua_list = uas 109 | return _ua_list 110 | 111 | 112 | def get_random_user_agent() -> str: 113 | return random.choice(get_user_agents()) 114 | -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/jachinlin/ebook-py.git 2 | requests 3 | termcolor 4 | tqdm 5 | pillow 6 | jinja2 7 | peewee -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | -r base.txt 2 | pytest 3 | flake8 4 | codecov 5 | pytest-cov 6 | pytest-mock -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | from setuptools import setup, find_packages 6 | 7 | version = '1.2.0' 8 | 9 | 10 | def read(fname): 11 | with open(os.path.join(os.path.dirname(__file__), fname)) as f: 12 | return f.read() 13 | 14 | 15 | setup( 16 | name='geektime_dl', 17 | version=version, 18 | author='jachinlin', 19 | author_email='linjx1000+github@gmail.com', 20 | url='https://jachinlin.github.io/geektime_dl', 21 | description='把极客时间装进 Kindle', 22 | long_description=read('README.md'), 23 | long_description_content_type='text/markdown', 24 | license='MIT', 25 | classifiers=[ 26 | 'Programming Language :: Python', 27 | 'Programming Language :: Python :: 3', 28 | 'Programming Language :: Python :: 3.5', 29 | 'Programming Language :: Python :: 3.6', 30 | 'Programming Language :: Python :: 3.7', 31 | 'Programming Language :: Python :: 3 :: Only' 32 | ], 33 | keywords='kindle ebook mobi geektime', 34 | packages=find_packages(exclude=['examples', 'tests']), 35 | package_data={'geektime_dl': ['ebook/templates/*']}, 36 | install_requires=[ 37 | 'wheel', 38 | 'git+https://github.com/jachinlin/ebook-py.git', 39 | 'requests', 40 | 'termcolor', 41 | 'tqdm', 42 | 'pillow', 43 | 'jinja2', 44 | 'peewee' 45 | ], 46 | entry_points={ 47 | 'console_scripts': [ 48 | 'geektime = geektime_dl:geektime', 49 | ], 50 | } 51 | ) 52 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import os 4 | 5 | import pytest 6 | 7 | from geektime_dl.gt_apis import GkApiClient 8 | from geektime_dl.ebook.ebook import Render 9 | 10 | 11 | @pytest.fixture 12 | def gk() -> GkApiClient: 13 | return GkApiClient('', '', no_login=True) 14 | 15 | 16 | class FakeGk: 17 | def __init__(self): 18 | self._access_count = 0 19 | 20 | def get_course_intro(self, course_id: int): 21 | self._access_count += 1 22 | return {'id': course_id, 'access_count': self._access_count} 23 | 24 | def get_course_list(self): 25 | return { 26 | '1': {'list': []}, '2': {'list': []}, 27 | '3': {'list': []}, '4': {'list': []} 28 | } 29 | 30 | def get_post_content(self, post_id: int): 31 | return {'id': post_id} 32 | 33 | def get_post_list_of(self, course_id: int): 34 | return [{'id': 123}, {'id': 456}] 35 | 36 | def get_post_comments(self, post_id: int): 37 | return [] 38 | 39 | 40 | @pytest.fixture 41 | def output_folder() -> str: 42 | return '/tmp' 43 | 44 | 45 | @pytest.fixture 46 | def render(output_folder) -> Render: 47 | r = Render(output_folder) 48 | return r 49 | 50 | 51 | @pytest.fixture 52 | def db_file() -> str: 53 | path = '/tmp/test.json' 54 | if os.path.exists(path): 55 | os.remove(path) 56 | yield path 57 | os.remove(path) 58 | 59 | 60 | @pytest.fixture(scope='session') 61 | def column_id(): 62 | return 49 63 | 64 | 65 | @pytest.fixture(scope='session') 66 | def article_id(): 67 | return 780 68 | 69 | 70 | @pytest.fixture(scope='session') 71 | def video_course_id(): 72 | return 66 73 | -------------------------------------------------------------------------------- /tests/test_cli/test_basic.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import pytest 4 | 5 | from geektime_dl.cli import command 6 | 7 | 8 | def test_command_type(): 9 | class MyCMD(metaclass=command.CommandType): 10 | pass 11 | 12 | assert MyCMD.name == 'mycmd' 13 | assert MyCMD.name in command.commands 14 | assert command.commands[MyCMD.name] is MyCMD 15 | 16 | 17 | def test_command_of_help(): 18 | Help = command.commands[command.Help.name] 19 | result = Help().work(args=[]) 20 | assert "Available commands:" in result 21 | 22 | 23 | # test command base class 24 | def test_add_argument_basic(): 25 | class ArgsParse(command.Command): 26 | @command.add_argument("-n", "--name", dest="name") 27 | def run(self, cfg): 28 | return cfg 29 | 30 | cmd = ArgsParse() 31 | args = cmd.work([]) 32 | assert isinstance(args, dict) 33 | assert 'name' in args and args['name'] is None 34 | 35 | args = cmd.work(['--name', 'geektime']) 36 | assert isinstance(args, dict) 37 | assert 'name' in args and args['name'] == 'geektime' 38 | 39 | 40 | def test_add_argument_required(): 41 | class ArgsParse(command.Command): 42 | @command.add_argument("-n", "--name", dest="name", required=True) 43 | def run(self, cfg): 44 | return cfg 45 | 46 | cmd = ArgsParse() 47 | with pytest.raises(SystemExit): 48 | cmd.work([]) 49 | 50 | 51 | def test_add_argument_save(tmp_path): 52 | class ArgsParse(command.Command): 53 | @command.add_argument("-n", "--name", dest="name", save=True) 54 | def run(self, cfg): 55 | return cfg 56 | 57 | # default 58 | cfg_file = tmp_path / 'test.cfg' 59 | cmd = ArgsParse() 60 | cmd.work(['--config', str(cfg_file)]) 61 | args = command.Command.load_cfg(str(cfg_file)) 62 | assert set(args.keys()) == {'area', 'output_folder'} 63 | 64 | # will save name=geektime to cfg_file 65 | cmd.work(['--config', str(cfg_file), '-n=geektime']) 66 | args = command.Command.load_cfg(str(cfg_file)) 67 | assert 'name' in args and args['name'] == 'geektime' 68 | 69 | # retrieve name=geektime in cfg_file 70 | cmd.work(['--config', str(cfg_file)]) 71 | args = command.Command.load_cfg(str(cfg_file)) 72 | assert 'name' in args and args['name'] == 'geektime' 73 | 74 | cfg_file.unlink() 75 | 76 | -------------------------------------------------------------------------------- /tests/test_cli/test_ebook.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | from geektime_dl.cli import ebook 4 | from geektime_dl.utils import Singleton 5 | 6 | 7 | def setup_function(func): 8 | Singleton.clear_singletons() 9 | 10 | 11 | def test_ebook(tmp_path, mocker, column_id): 12 | mocker.stub(name='sys.stdout.write') 13 | cmd = ebook.EBook() 14 | 15 | cmd.work(args=[ 16 | str(column_id), 17 | '-a=0', 18 | '-p=0', 19 | '--output-folder', str(tmp_path), 20 | '--no-login' 21 | ]) 22 | 23 | # todo 24 | # mobi = tmp_path / '朱赟的技术管理课[免费试读].mobi' 25 | # assert mobi.exists() 26 | -------------------------------------------------------------------------------- /tests/test_cli/test_query.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import os 4 | from geektime_dl.cli import query 5 | 6 | 7 | def test_query(tmp_path, mocker): 8 | mocker.stub(name='sys.stdout.write') 9 | qr = query.Query() 10 | 11 | res = qr.work(args=[ 12 | '-a={}'.format(os.getenv('account')), 13 | '-p={}'.format(os.getenv('password')), 14 | '--no-login' 15 | ]) 16 | 17 | assert res 18 | -------------------------------------------------------------------------------- /tests/test_ebook_util.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import os 4 | 5 | from geektime_dl.ebook.ebook import Render 6 | 7 | 8 | def test_render_article_html(render: Render, output_folder: str): 9 | 10 | title = 'hello' 11 | content = '

    hello world

    ' 12 | render.render_article_html(title, content) 13 | fn = os.path.join(output_folder, title + '.html') 14 | 15 | assert os.path.isfile(fn) 16 | with open(fn) as f: 17 | assert content in f.read() 18 | 19 | os.remove(fn) 20 | 21 | 22 | def test_render_toc_md(render: Render, output_folder: str): 23 | title = 'hello' 24 | headers = ['标题1', '标题2'] 25 | render.render_toc_md(title, headers) 26 | fn = os.path.join(output_folder, 'toc.md') 27 | 28 | assert os.path.isfile(fn) 29 | with open(fn) as f: 30 | ls = f.readlines() 31 | assert len(ls) == 3 32 | assert ls[0].rstrip('\n') == title 33 | assert ls[1].rstrip('\n') == '# {}'.format(headers[0]) 34 | assert ls[2].rstrip('\n') == '# {}'.format(headers[1]) 35 | 36 | os.remove(fn) 37 | 38 | -------------------------------------------------------------------------------- /tests/test_gt_apis.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | from geektime_dl.gt_apis import GkApiClient 4 | 5 | course_keys_needed = { 6 | 'id', 'column_title', 'had_sub', 'is_finish', 'update_frequency' 7 | } 8 | post_keys_needed = { 9 | 'id', 'article_title', 'article_content', 'column_id' 10 | } 11 | comment_keys_needed = { 12 | 'user_name', 'like_count', 'comment_content', 'comment_ctime' 13 | } 14 | daily_video_keys_needed = { 15 | 'id', 'article_title', 'column_had_sub', 'video_media_map' 16 | } 17 | 18 | video_id = 2184 19 | collection_id = 141 20 | daily_id = 113850 21 | 22 | 23 | # def test_api_get_course_list(gk: GkApiClient): 24 | # res = gk.get_course_list() 25 | # 26 | # assert isinstance(res, dict) 27 | # assert {'1', '2', '3', '4'} & set(res.keys()) 28 | # for type_ in {'1', '2', '3', '4'}: 29 | # course_list = res[type_]['list'] 30 | # course = course_list[0] 31 | # assert isinstance(course, dict) 32 | # for key in course_keys_needed: 33 | # assert course.get(key) is not None, '{} 不存在'.format(key) 34 | 35 | 36 | def test_api_get_course_intro(gk: GkApiClient, column_id): 37 | course = gk.get_course_intro(column_id) 38 | assert isinstance(course, dict) 39 | for key in course_keys_needed: 40 | assert course.get(key) is not None, '{} 不存在'.format(key) 41 | 42 | 43 | def test_api_get_course_post_list(gk: GkApiClient, column_id): 44 | course = gk.get_post_list_of(column_id) 45 | assert course and isinstance(course, list) 46 | article = course[0] 47 | for key in {'id'}: 48 | assert article.get(key) is not None, '{} 不存在'.format(key) 49 | 50 | 51 | def test_api_get_post_content(gk: GkApiClient, article_id): 52 | article = gk.get_post_content(article_id) 53 | assert article and isinstance(article, dict) 54 | for key in post_keys_needed: 55 | assert article.get(key) is not None, '{} 不存在'.format(key) 56 | 57 | # mp3 58 | assert article.get('audio_download_url') 59 | # mp4 60 | article = gk.get_post_content(video_id) 61 | vm = article.get('video_media_map') 62 | assert vm, 'video_media_map 不存在' 63 | assert vm['sd']['url'] 64 | assert vm['hd']['url'] 65 | 66 | 67 | def test_api_get_post_comments(gk: GkApiClient, article_id): 68 | res = gk.get_post_comments(article_id) 69 | assert res and isinstance(res, list) 70 | comment = res[0] 71 | for key in comment_keys_needed: 72 | assert comment.get(key) is not None, '{} 不存在'.format(key) 73 | 74 | 75 | def test_api_get_video_collection_intro(gk: GkApiClient): 76 | course = gk.get_video_collection_intro(collection_id) 77 | assert isinstance(course, dict) 78 | for key in {'cid', 'title'}: 79 | assert course.get(key) is not None, '{} 不存在'.format(key) 80 | 81 | 82 | def test_api_get_video_collection_list(gk: GkApiClient): 83 | col_list = gk.get_video_collection_list() 84 | assert col_list and isinstance(col_list, list) 85 | col = col_list[0] 86 | for key in {'collection_id'}: 87 | assert col.get(key) is not None, '{} 不存在'.format(key) 88 | 89 | 90 | # def test_api_get_collection_video_list(gk: GkApiClient): 91 | # v_list = gk.get_video_list_of(collection_id) 92 | # assert v_list and isinstance(v_list, list) 93 | # video = v_list[0] 94 | # for key in {'article_id', 'is_sub'}: 95 | # assert video.get(key) is not None, '{} 不存在'.format(key) 96 | 97 | 98 | def test_api_get_vedio_content(gk: GkApiClient): 99 | video = gk.get_post_content(daily_id) 100 | assert video and isinstance(video, dict) 101 | for key in daily_video_keys_needed: 102 | assert video.get(key) is not None, '{} 不存在'.format(key) 103 | 104 | # video_url 105 | assert 'video_media_map' in video 106 | # assert vm['sd']['url'] 107 | # assert vm['hd']['url'] 108 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import threading 4 | import time 5 | 6 | from geektime_dl.utils import ( 7 | get_working_folder, 8 | Singleton, 9 | synchronized, 10 | parse_column_ids 11 | ) 12 | from geektime_dl import log 13 | 14 | 15 | def test_logging(): 16 | log.logger.info('guess where i will be ') 17 | 18 | log_file = get_working_folder() / 'geektime.log' 19 | with open(log_file) as f: 20 | logs = f.read() 21 | assert 'guess where i will be ' in logs 22 | assert 'INFO' in logs 23 | 24 | 25 | def test_singleton(): 26 | class S(metaclass=Singleton): 27 | pass 28 | 29 | a = S() 30 | b = S() 31 | assert a is b 32 | 33 | 34 | def test_synchronized(): 35 | 36 | class A(object): 37 | def __init__(self): 38 | self._lock = threading.Lock() 39 | 40 | def func(self): 41 | time.sleep(0.2) 42 | 43 | @synchronized() 44 | def synchronized_func(self): 45 | time.sleep(0.2) 46 | 47 | a = A() 48 | 49 | def time_cost(func) -> float: 50 | start = time.time() 51 | t_list = [] 52 | for i in range(2): 53 | t = threading.Thread(target=func) 54 | t_list.append(t) 55 | t.start() 56 | for t in t_list: 57 | t.join() 58 | return time.time() - start 59 | 60 | assert time_cost(a.synchronized_func) >= 0.2 * 2 61 | assert time_cost(a.func) < 0.2 * 2 62 | 63 | 64 | def test_parse_column_ids(): 65 | ids = '1' 66 | ids2 = '1-3' 67 | ids3 = '3,6-8' 68 | assert parse_column_ids(ids) == [1] 69 | assert parse_column_ids(ids2) == [1, 2, 3] 70 | assert parse_column_ids(ids3) == [3, 6, 7, 8] 71 | --------------------------------------------------------------------------------