├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   └── feature_request.md
    └── workflows
    │   ├── geektime-dl-ci.yml
    │   └── publish.yml
├── .gitignore
├── Dockerfile
├── README.md
├── docs
    ├── .gitignore
    ├── .vuepress
    │   ├── components
    │   │   ├── PostCard.vue
    │   │   └── PostList.vue
    │   ├── config.js
    │   ├── enhanceApp.js
    │   └── public
    │   │   ├── Wechat.jpeg
    │   │   ├── conglingkaishixuejiagou.jpeg
    │   │   ├── favicon.jpg
    │   │   ├── geektime.gif
    │   │   ├── qiuyuedechanpinshouji.jpeg
    │   │   ├── rengongzhinengjichuke.jpeg
    │   │   ├── tuijianxitong36shi.jpeg
    │   │   ├── zhuyundejishuguanlike.jpeg
    │   │   └── zuoertingfeng.jpeg
    ├── README.md
    ├── bonus.md
    ├── faq.md
    ├── geektime_data.js
    ├── guide.md
    ├── intro.md
    ├── package-lock.json
    ├── package.json
    ├── recruit.md
    └── tldr.md
├── geektime.py
├── geektime_dl
    ├── __init__.py
    ├── cache.py
    ├── cli
    │   ├── __init__.py
    │   ├── command.py
    │   ├── ebook.py
    │   ├── login.py
    │   └── query.py
    ├── dal.py
    ├── ebook
    │   ├── __init__.py
    │   ├── ebook.py
    │   └── templates
    │   │   └── article.html
    ├── gt_apis.py
    ├── log.py
    └── utils.py
├── requirements
    ├── base.txt
    └── dev.txt
├── setup.py
└── tests
    ├── conftest.py
    ├── test_cli
        ├── test_basic.py
        ├── test_ebook.py
        └── test_query.py
    ├── test_ebook_util.py
    ├── test_gt_apis.py
    └── test_utils.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | count = True
 3 | max-complexity = 10
 4 | max-line-length = 80
 5 | statistics = True
 6 | ignore = W391, W503, W504
 7 | exclude =
 8 |     __pycache__
 9 |     venv
10 |     .venv


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | 
 5 | ---
 6 | 
 7 | **Is your feature request related to a problem? Please describe.**
 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 9 | 
10 | **Describe the solution you'd like**
11 | A clear and concise description of what you want to happen.
12 | 
13 | **Describe alternatives you've considered**
14 | A clear and concise description of any alternative solutions or features you've considered.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 | 


--------------------------------------------------------------------------------
/.github/workflows/geektime-dl-ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI & CD
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       max-parallel: 1
13 |       matrix:
14 |         python-version: [3.7]
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v1
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v1
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 |     - name: Install dependencies
23 |       run: |
24 |         python -m pip install --upgrade pip wheel
25 |         pip install -r requirements/base.txt
26 |     - name: Lint with flake8
27 |       run: |
28 |         pip install flake8
29 |         flake8
30 |     - name: Test with pytest
31 |       env:
32 |         account: ${{ secrets.account }}
33 |         password: ${{ secrets.password }}
34 |       run: |
35 |         pip install -r requirements/dev.txt
36 |         python -m pytest
37 |     - name: Coverage
38 |       run: |
39 |         pip install coverage coveralls
40 |         coverage run --source=geektime_dl -m pytest tests/
41 |         coveralls --service=github
42 |       env:
43 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44 | 
45 |   docs:
46 |     runs-on: ubuntu-latest
47 | 
48 |     steps:
49 |       - name: Checkout master
50 |         uses: actions/checkout@v2
51 |         with:
52 |           ref: master
53 | 
54 |       - name: Setup node
55 |         uses: actions/setup-node@v1
56 |         with:
57 |           node-version: "12.x"
58 | 
59 |       - name: Build project
60 |         run: |
61 |           cd docs
62 |           npm install
63 |           npm run docs:build
64 | 
65 |       - name: Upload gh-pages
66 |         uses: peaceiris/actions-gh-pages@v3
67 |         with:
68 |           github_token: ${{ secrets.GITHUB_TOKEN }}
69 |           publish_dir: ./docs/.vuepress/dist
70 | 
71 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v1
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | *.log
 3 | .DS_Store
 4 | 
 5 | .cache
 6 | .pytest_cache
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Packages
12 | *.egg
13 | *.egg-info
14 | dist
15 | build
16 | eggs
17 | parts
18 | bin
19 | var
20 | sdist
21 | develop-eggs
22 | .installed.cfg
23 | lib
24 | lib64
25 | __pycache__
26 | 
27 | # Installer logs
28 | pip-log.txt
29 | 
30 | # Unit test / coverage reports
31 | .coverage
32 | .tox
33 | nosetests.xml
34 | 
35 | 
36 | # Virtual environment
37 | .venv
38 | .venv3
39 | venv
40 | 
41 | # Environment files
42 | .idea
43 | 
44 | # tmp files
45 | geektime.cfg
46 | htmlcov
47 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7-alpine
 2 | 
 3 | RUN apk add --no-cache jpeg-dev zlib-dev
 4 | RUN apk add --no-cache --virtual .build-deps build-base linux-headers \
 5 |     && pip install Pillow
 6 | RUN pip install -U geektime_dl
 7 | 
 8 | WORKDIR /output
 9 | 
10 | ENTRYPOINT ["geektime"]
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | :sparkles: 重要 :sparkles:
  3 | 
  4 | **查看 [使用文档](https://jachinlin.github.io/geektime_dl/) 获取最新使用信息。**
  5 | 
  6 | <br/><br/>
  7 | 
  8 | 本 README.md 不再更新！:point_down:
  9 | 
 10 | <p align="center">
 11 |     <img  width="80%" src="https://raw.githubusercontent.com/jachinlin/geektime_dl/master/docs/.vuepress/public/geektime.gif" alt="左耳听风">
 12 | </p>
 13 | 
 14 | # 把极客时间装进 Kindle
 15 | 
 16 | [![PyPI](https://img.shields.io/pypi/v/geektime-dl.svg)](https://pypi.org/project/geektime-dl/)
 17 | [![CI & CD](https://github.com/jachinlin/geektime_dl/workflows/CI%20&%20CD/badge.svg)](https://github.com/jachinlin/geektime_dl/actions)
 18 | [![Coverage Status](https://coveralls.io/repos/github/jachinlin/geektime_dl/badge.svg?branch=master)](https://coveralls.io/github/jachinlin/geektime_dl?branch=master)
 19 | 
 20 | 极客时间专栏文章的质量都是非常高的，比如耗子哥的《左耳听风》、朱赟的《朱赟的技术管理课》和王天一的《人工智能基础课》，都是我非常喜欢的专栏。这些专栏深入浅出，将知识和经验传授于读者，都是值得多次阅读的。
 21 | 
 22 | 然而，每当空闲时间时，都需要掏出手机才能阅读专栏文章，这在某种情况下是很不便的，尤其坐地铁且没有网络时。作为一个 kindle 党，最好的解决方案就是 kindle 电子书。于是有了这个项目
 23 | 
 24 | >[把极客时间装进Kindle](https://github.com/jachinlin/geektime_dl)
 25 | 
 26 | 
 27 | 
 28 | ## 安装
 29 | 
 30 | ```bash
 31 | pip install -U geektime_dl
 32 | 
 33 | # 或者安装最新代码
 34 | pip install -U git+https://github.com/jachinlin/geektime_dl.git
 35 | ```
 36 | 
 37 | ## 使用
 38 | 
 39 | 
 40 | **查看帮助信息**
 41 | 
 42 | 
 43 | 1、查看 cli subcmd
 44 | 
 45 | ```bash
 46 | geektime help
 47 | ```
 48 | 
 49 | 2、查看具体 cli subcmd 帮助信息
 50 | 
 51 | ```bash
 52 | geektime <subcmd> --help
 53 | ```
 54 | 
 55 | `<subcmd>` 为具体的子命令名，可以从 help 子命令查看。
 56 | 
 57 | 
 58 | **登录**
 59 | 
 60 | ```bash
 61 | geektime login  [--account=<account>] [--password=<password>] [--area=<area>]
 62 | ```
 63 | 
 64 | `[]`表示可选，`<>`表示相应变量值，下同
 65 | 
 66 | - account: 手机账号，不提供可稍后手动输入
 67 | - password: 账号密码，不提供可稍后手动输入
 68 | - area: 注册手机号所属地区，默认86
 69 | 
 70 | 
 71 | **查看课程列表**
 72 | 
 73 | 
 74 | ```bash
 75 | geektime query
 76 | ```
 77 | 
 78 | 
 79 | 执行该命令后，我们可以看到专栏、视频、微课等课程的课程标题、订阅情况、更新频率还有课程ID，这个**课程ID**很重要，咱们下边的操作就是基于这个ID进行的。
 80 | ```
 81 | 专栏
 82 |         课程ID        已订阅       课程标题             更新频率/课时·时长
 83 |         49             否         朱赟的技术管理课      (全集)
 84 |         48             是         左耳听风      (全集)
 85 |         ......
 86 | ```
 87 | 
 88 | 
 89 | **制作电子书**
 90 | 
 91 | ```bash
 92 | geektime ebook <course_id> [--output-folder=<output_folder>]
 93 | ```
 94 | 
 95 | - course_id: 课程ID，可以从 query subcmd 查看
 96 | - output_folder: 电子书存放目录，默认`cwd`
 97 | 
 98 | notice: 此 subcmd 需要先执行 login subcmd
 99 | 
100 | 
101 | ## Todo list
102 | 
103 | - [X] 评论
104 | - [X] 批量下载
105 | - [X] docker
106 | - [ ] 支持 mathjax 数学公式
107 | - [ ] ...
108 | 
109 | 
110 | ## 其他
111 | 
112 | 1. 注意版权，勿传播电子书
113 | 2. pr or issue is welcome
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist


--------------------------------------------------------------------------------
/docs/.vuepress/components/PostCard.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <el-card shadow="hover" class="post-card">
  3 |     <el-row @click="go(post.path)">
  4 |       <el-col :lg="10" :md="12" :sm="12" v-if="post.image" class="post-cover">
  5 |         <img :src="$withBase(post.image)" :alt="post.title" >
  6 |       </el-col>
  7 |       <el-col :lg="14" :md="12" :sm="12" class="post-content">
  8 |         <div>
  9 |           <h2 class="post-title">
 10 |             <a :href="post.path" target="_blank">{{ post.title }}</a>
 11 |           </h2>
 12 |         </div>
 13 |         <div class="post-summary" v-if="post.summary">
 14 |           <p>
 15 |             {{ post.summary }}
 16 |             <a :href="post.path" target="_blank" class="post-read-more">
 17 |               {{ post.readMoreText || 'Read more'}}
 18 |             </a>
 19 |           </p>
 20 |         </div>
 21 |       </el-col>
 22 |     </el-row>
 23 |     <div class="post-footer">
 24 |       <div class="post-footer-item">
 25 |         <CalendarIcon class="post-footer-item-icon"/>
 26 |         {{ new Date(post.date.trim()).toDateString() }}
 27 |       </div>
 28 |       <div class="post-footer-item" v-if="post.readingTime">
 29 |         <ClockIcon class="post-footer-item-icon"/>
 30 |         {{ post.readingTime }}
 31 |       </div>
 32 |       <div class="post-footer-item" v-if="post.location">
 33 |         <NavigationIcon class="post-footer-item-icon"/>
 34 |         {{ post.location }}
 35 |       </div>
 36 |     </div>
 37 |   </el-card>
 38 | </template>
 39 | 
 40 | <script>
 41 | import { NavigationIcon, ClockIcon, CalendarIcon } from "vue-feather-icons";
 42 | 
 43 | export default {
 44 |   name: 'post-card',
 45 |   props: {
 46 |     post: {
 47 |       type: Object,
 48 |       required: true
 49 |     }
 50 |   },
 51 |   components: { NavigationIcon, ClockIcon, CalendarIcon },
 52 |   methods: {
 53 |     go (path) {
 54 |       this.$router.push(path)
 55 |     }
 56 |   }
 57 | }
 58 | </script>
 59 | 
 60 | <style>
 61 |   .post-card {
 62 |   }
 63 |   .post-cover {
 64 |     padding: 16px;
 65 |   }
 66 |   .post-cover img {
 67 |     border-radius: 16px;
 68 |     width: 100%;
 69 |   }
 70 |   .post-content {
 71 |     padding: 0 20px;
 72 |   }
 73 |   .post-title {
 74 |     margin-bottom: 10px;
 75 |   }
 76 |   .post-title a {
 77 |     color: #6c5b7b;
 78 |   }
 79 |   .post-summary {
 80 |     margin: 10px 0;
 81 |     color: #6c757d;
 82 |     word-wrap: break-word;
 83 |   }
 84 |   .post-read-more {
 85 |     color: #6c5b7b;
 86 |   }
 87 |   .post-footer {
 88 |     font-size: 13px;
 89 |     text-align: left;
 90 |     border-top: 1px solid #dee2e6;
 91 |     border-color: #f8f9fa;
 92 |     display: flex;
 93 |     justify-content: flex-end;
 94 |     margin-top: 10px;
 95 |     padding: 20px;
 96 |     color: #6c757d;
 97 |   }
 98 |   .post-footer-item {
 99 |     margin-right: 1rem;
100 | 
101 |   }
102 |   .post-footer-item-icon {
103 |     vertical-align: middle;
104 |     width: 17px;
105 |   }
106 | </style>
107 | 
108 | 


--------------------------------------------------------------------------------
/docs/.vuepress/components/PostList.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div>
 3 |     <post-card
 4 |       v-for="post in posts"
 5 |       :key="post.key"
 6 |       :post="post"
 7 |       class="post-card"
 8 |       shadow="hover"
 9 |     />
10 |   </div>
11 | </template>
12 | 
13 | <script>
14 | import { columnShareData } from '../../geektime_data.js'
15 | export default {
16 |   data: function () {
17 |     return {
18 |       posts: columnShareData,
19 |     }
20 |   },
21 |   created() {
22 |   }
23 | };
24 | </script>
25 | 
26 | <style>
27 | .post-card {
28 |   margin-bottom: 20px;
29 | }
30 | </style>
31 | 


--------------------------------------------------------------------------------
/docs/.vuepress/config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   title: 'geektime-dl',
 3 |   base: '/geektime_dl/',
 4 |   description: '把极客时间装进 Kindle',
 5 |   head: [
 6 |     ['link', { rel: "icon", type: "image/jpg", href: "/favicon.jpg"}]
 7 |   ],
 8 |   plugins: {
 9 |     'baidu-tongji': {
10 |       hm: '7972bc564f84e320d4f261fe1ada61da'
11 |     }
12 |   },
13 |   themeConfig: {
14 |     lastUpdated: '上次更新',
15 |     repo: 'jachinlin/geektime_dl',
16 |     repoLabel: 'GitHub',
17 |     docsDir: 'docs',
18 |     editLinks: true,
19 |     editLinkText: '帮助我们改善此页面！',
20 |     nav: [
21 |       { text: '首页', link: '/' },
22 |       { text: '指南', link: '/guide' },
23 |       { text: '招聘', link: '/recruit' }
24 |     ],
25 |     displayAllHeaders: true,
26 |     sidebar: {
27 |       '/': [
28 |         {
29 |           collapsable: false,
30 |           sidebarDepth: 1,
31 |           children: [
32 |             '',
33 |             'tldr',
34 |             'intro',
35 |             'guide',
36 |             'faq',
37 |             'bonus'
38 |           ]
39 |         }
40 |       ]
41 |     }
42 |   }
43 | }


--------------------------------------------------------------------------------
/docs/.vuepress/enhanceApp.js:
--------------------------------------------------------------------------------
 1 | import Element from 'element-ui'
 2 | import 'element-ui/lib/theme-chalk/index.css'
 3 | 
 4 | export default ({
 5 |     Vue, // the version of Vue being used in the VuePress app
 6 |     options, // the options for the root Vue instance
 7 |     router, // the router instance for the app
 8 |     siteData // site metadata
 9 | }) => {
10 |     Vue.use(Element);
11 | }


--------------------------------------------------------------------------------
/docs/.vuepress/public/Wechat.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/Wechat.jpeg


--------------------------------------------------------------------------------
/docs/.vuepress/public/conglingkaishixuejiagou.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/conglingkaishixuejiagou.jpeg


--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/favicon.jpg


--------------------------------------------------------------------------------
/docs/.vuepress/public/geektime.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/geektime.gif


--------------------------------------------------------------------------------
/docs/.vuepress/public/qiuyuedechanpinshouji.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/qiuyuedechanpinshouji.jpeg


--------------------------------------------------------------------------------
/docs/.vuepress/public/rengongzhinengjichuke.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/rengongzhinengjichuke.jpeg


--------------------------------------------------------------------------------
/docs/.vuepress/public/tuijianxitong36shi.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/tuijianxitong36shi.jpeg


--------------------------------------------------------------------------------
/docs/.vuepress/public/zhuyundejishuguanlike.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/zhuyundejishuguanlike.jpeg


--------------------------------------------------------------------------------
/docs/.vuepress/public/zuoertingfeng.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jachinlin/geektime_dl/4c957004e3724a05d6b5913de7e6b49cf9a150d1/docs/.vuepress/public/zuoertingfeng.jpeg


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | home: true
 3 | heroImage: /geektime.gif
 4 | actionText: 快速开始 →
 5 | actionLink: /tldr
 6 | footer:  Copyright © 2018-present Jachin Lin
 7 | ---
 8 | 
 9 | ------
10 | 
11 | [![travis](https://travis-ci.org/jachinlin/geektime_dl.svg?branch=master)](https://travis-ci.org/jachinlin/geektime_dl)
12 | [![codecov](https://codecov.io/gh/jachinlin/geektime_dl/branch/master/graph/badge.svg)](https://codecov.io/gh/jachinlin/geektime_dl)
13 | [![Python versions](https://img.shields.io/pypi/pyversions/geektime-dl.svg)](https://pypi.org/project/geektime-dl/)
14 | [![PyPI](https://img.shields.io/pypi/v/geektime-dl.svg)](https://pypi.org/project/geektime-dl/)
15 | [![CI & CD](https://github.com/jachinlin/geektime_dl/workflows/CI%20&%20CD/badge.svg)](https://github.com/jachinlin/geektime_dl/actions)
16 | 
17 | 这个工具只适用于具有以下两个特殊需求的用户，如果你不满足其中一个，那么可以点击右上角的叉叉，没必要浪费时间在这里。
18 | 
19 | - [极客时间](https://time.geekbang.org/)忠实付费用户
20 | - Kindle 党
21 | 
22 | 
23 | 
24 | 极客时间专栏文章的质量都是非常高的，比如耗子哥的《左耳听风》、朱赟的《朱赟的技术管理课》和王天一的《人工智能基础课》，都是我非常喜欢的专栏。这些专栏深入浅出，将知识和经验传授于读者，都是值得多次阅读的。
25 | 
26 | 然而，每当空闲时间时，都需要掏出手机才能阅读专栏文章，这在某些情况下是很不便利的，尤其坐地铁且没有网络时。作为一个kindle党，最好的解决方案就是kindle电子书。于是，有了这个项目 —— [把极客时间装进 Kindle](https://github.com/jachinlin/geektime_dl)。
27 | 


--------------------------------------------------------------------------------
/docs/bonus.md:
--------------------------------------------------------------------------------
 1 | # 福利
 2 | 
 3 | ## 快手内推直达部门负责人
 4 | 
 5 | [快手内推直达部门负责人！](/recruit.html)
 6 | 
 7 | ## 极客时间扫码购买返利
 8 | 
 9 | 从下方的二维码扫码购买课程者，添加微信（ [二维码](/recruit.html#交流群)） 即可返回一半的「分享奖赏」。
10 | 
11 | <post-list/>


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | # FAQ
 2 | 
 3 | ## 是否存在法律风险？
 4 | 
 5 | [issue#15](https://github.com/jachinlin/geektime_dl/issues/15)
 6 | 
 7 | 本项目的初衷是方便自己学习极客时间。它的确带来了便利，利己及人，所以我选择把它开源了。
 8 | 
 9 | 在这里恳请大家不要随意散发电子书给其他人（包括亲朋好友），更不要用于商业用途或者从事盗版买卖。
10 | 
11 | ## 我的账号会被盗吗？
12 | 
13 | 代码都开源了，可以随时查看源码。或者，使用 [wireshark](https://www.wireshark.org/) 等软件进行抓包。
14 | 
15 | 使用 geektime-dl 可能会出现的问题就是触发官方的限流措施。
16 | 
17 | ## 我能查看没有购买的课程吗？
18 | 
19 | 不可以，请到[极客时间](https://time.geekbang.org/)购买课程，再使用 geektime-dl 如果需要的话。
20 | 
21 | ## 出现非法图形验证码怎么办？
22 | 
23 | [issue#67](https://github.com/jachinlin/geektime_dl/issues/67)
24 | 
25 | 当使用 geektime 触发图形验证码时，请稍等会再重新 [geektime login](/guide.html#登录) 就能登上了。
26 | 
27 | 如果哪位同学对图形验证这一块比较擅长或者感兴趣，欢迎 [pr](https://github.com/jachinlin/geektime_dl/compare)。
28 | 
29 | ## 其他疑问
30 | 
31 | 如果你还有其他疑问的话，欢迎[提 issue](https://github.com/jachinlin/geektime_dl/issues/new) 或者加入[交流群](/neitui.html#交流群)讨论。
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/geektime_data.js:
--------------------------------------------------------------------------------
 1 | export const columnShareData = [
 2 |   {
 3 |     title: '左耳听风',
 4 |     summary: '陈皓，人称耗子叔，有 20 年软件开发及相关工作经验，先后在阿里巴巴、亚马逊、汤森路透等知名公司任职，' +
 5 |         '对 IT 底层技术平台有深入的了解，尤其在大规模分布式系统的基础架构方面颇有研究。' +
 6 |         '此外，他在团队管理、项目管理，以及程序员个人成长等方面也有自己一套独特的见解和方法。' +
 7 |         '在“左耳听风”专栏中的每篇文章都是陈皓对自己多年“堵过的枪眼儿”“填过的坑儿”的深入思考和凝练，' +
 8 |         '是一些与个人或企业切身利益相关的内容，或者说是更具指导性、更为商业化的内容。用他自己的话说，是一些非常来之不易的宝贵经验 ... ',
 9 |     path: `/geektime_dl/zuoertingfeng.jpeg`,
10 |     image: '/zuoertingfeng.jpeg',
11 |     date: '2017-10-01',
12 |     location: 'Beijing',
13 |     readMoreText: '扫码购买返现 18 元'
14 |   },
15 |   {
16 |     title: '朱赟的技术管理课',
17 |     summary: '在本专栏中，Angela 会更加聚焦在技术管理、技术实践、硅谷文化和个人成长领域，继续以女工程师和技术领导的视角，为大家讲解技术和认知的故事。' +
18 |         '她愿意把自己在技术和管理上的领悟及忠告、在硅谷工作的体会与见识，通过这个专栏分享给大家。把自己觉得好的东西第一时间分享给你，这就是 Angela 推出这个专栏的初衷。' +
19 |         'Angela 的专栏主要聚焦在技术管理、技术实践、硅谷文化和个人成长四个领域 ... ',
20 |     path: `/geektime_dl/zhuyundejishuguanlike.jpeg`,
21 |     image: '/zhuyundejishuguanlike.jpeg',
22 |     date: '2017-11-01',
23 |     location: 'American',
24 |     readMoreText: '扫码购买返现 9 元'
25 |   },
26 |   {
27 |     title: '邱岳的产品手记',
28 |     summary: '邱岳，人称“二爷”，是公众号“二爷鉴书”的作者。邱岳常年宅在家中弹琴读书写作，作品多以书评为主。写得多了大家以为他是个专业写书评的，其实他的真实身份是一位从业近十年的产品经理。' +
29 |         '在这个专栏中，邱岳将首次以产品经理为主题，将自己的所见所闻、所思所想毫无保留地写出来，他希望自己的经验和思考能为读者提供启发，' +
30 |         '促使大家能跳出自身固有的思维框架，重新审视自己的工作，从而获得成长 ... ',
31 |     path: `/geektime_dl/qiuyuedechanpinshouji.jpeg`,
32 |     image: '/qiuyuedechanpinshouji.jpeg',
33 |     date: '2017-12-01',
34 |     location: 'Beijing',
35 |     readMoreText: '扫码购买返现 12 元'
36 |   },
37 |   {
38 |     title: '人工智能基础课',
39 |     summary: '人工智能的重要性已无需赘述，但作为一个跨学科产物，它包含的内容浩如烟海，各种复杂的模型和算法更是让人望而生畏。' +
40 |         '对于大多数的新手来说，如何入手人工智能其实都是一头雾水，比如到底需要哪些数学基础、是否要有工程经验、对于深度学习框架应该关注什么等等。' +
41 |         '在“人工智能基础课”专栏里，王天一教授将结合自己的积累与思考，和你分享他对人工智能的理解，' +
42 |         '用通俗易懂的语言从零开始教你掌握人工智能的基础知识，梳理出人工智能学习路径，为今后深耕人工智能相关领域打下坚实的基础 ... ',
43 |     path: `/geektime_dl/rengongzhinengjichuke.jpeg`,
44 |     image: '/rengongzhinengjichuke.jpeg',
45 |     date: '2017-12-01',
46 |     location: 'Beijing',
47 |     readMoreText: '扫码购买返现 9 元'
48 |   },
49 |   {
50 |     title: '推荐系统36式',
51 |     summary: '刑无刀（本名陈开江），现为“贝壳找房”资深算法专家，从事算法类产品的研发。' +
52 |         '曾任新浪微博资深算法工程师，考拉 FM 算法主管。 刑无刀有 8 年的推荐系统方向从业经历，他在算法、架构、产品方面均有丰富的实践经验。' +
53 |         '本专栏将为推荐系统学习者架构起整体的知识脉络，并在此基础上补充实践案例与经验，力图解决你系统起步阶段 80% 的问题 ... ',
54 |     path: `/geektime_dl/tuijianxitong36shi.jpeg`,
55 |     image: '/tuijianxitong36shi.jpeg',
56 |     date: '2018-02-01',
57 |     location: 'Beijing',
58 |     readMoreText: '扫码购买返现 9 元'
59 |   },
60 |   {
61 |     title: '从0开始学架构',
62 |     summary: '李运华，资深技术专家。目前带领多个研发团队，承担架构设计、架构重构、技术团队管理、技术培训等职责，曾就职于华为和 UCWeb，写过《面向对象葵花宝典》一书。' +
63 |         '在本专栏中，华仔会从架构基础、三大架构模式和实战的角度分享他一整套的架构设计方法论，希望你学习后不仅能够快速理解陌生的架构设计，' +
64 |         '自己也能对架构设计游刃有余，并且可以给身边正在迷惘的同学指点迷津，实践所学，分享所学 ... ',
65 |     path: `/geektime_dl/conglingkaishixuejiagou.jpeg`,
66 |     image: '/conglingkaishixuejiagou.jpeg',
67 |     date: '2018-04-01',
68 |     location: 'Beijing',
69 |     readMoreText: '扫码购买返现 12 元'
70 |   }
71 | ]


--------------------------------------------------------------------------------
/docs/guide.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # 使用说明
  4 | 
  5 | 阅读下文前，请先确保已[安装](/intro.html#安装) geektime-dl。
  6 | 
  7 | ## 登录
  8 | 
  9 | ```bash
 10 | geektime login  [--account=<account>] [--password=<password>] [--area=<area>]
 11 | ```
 12 | 
 13 | `[]` 表示可选，`<>` 表示相应变量值。
 14 | 
 15 | > 下边其他命令中的 `[]` 和 `<>` 也表示这个意思，就不一一说明了。
 16 | 
 17 | 这个命令有三个参数，
 18 | 
 19 | - account: 手机账号，不提供可稍后手动输入
 20 | - password: 账号密码，不提供可稍后手动输入
 21 | - area: 注册手机号所属地区，默认 `86`。当您是美国手机号注册时，area 需要设置为 `1`。
 22 | 
 23 | 您也可以通过以下命令获取详细帮助信息。
 24 | 
 25 | ```bash
 26 | geektime login --help
 27 | ```
 28 | 
 29 | 登录成功后，您的账号密码将会保存在 `$(pwd)/geektime.cfg`。执行其他操作时，geektime 将从这个配置文件读取账号密码。
 30 | 
 31 | ## 查看课程列表
 32 | 
 33 | > 执行该命令前，请确保账号密码已经保存在 `$(pwd)/geektime.cfg`。
 34 | >
 35 | > 如果没有，请执行 `geektime login` 进行账号密码验证和保存。
 36 | 
 37 | ```bash
 38 | geektime query
 39 | ```
 40 | 
 41 | 执行该命令后，我们可以看到专栏、视频、微课等课程的课程标题、订阅情况、更新频率还有课程ID，这个 **课程ID** 很重要，我们下边的操作就是基于这个ID进行的。
 42 | 
 43 | 这里，我截取部分输出结果：
 44 | 
 45 | ```bash
 46 | (venv3) ➜ geektime query
 47 | 专栏
 48 | 	课程ID        已订阅	已完结	课程标题
 49 | 	301            否	否	数据中台实战课
 50 | 	298            否	否	检索技术核心20讲
 51 | 	297            否	否	SRE实战手册
 52 | 	296            否	否	图解 Google V8
 53 | ```
 54 | 
 55 | 
 56 | ## 制作电子书
 57 | 
 58 | > 执行该命令前，请确保账号密码已经保存在 `$(pwd)/geektime.cfg`。
 59 | >
 60 | > 如果没有，请执行 `geektime login` 进行账号密码验证和保存。
 61 | 
 62 | 
 63 | ```bash
 64 | geektime ebook <course_id>  [--comments-count=<comments_count>]
 65 | ```
 66 | 
 67 | 参数 `course_id` 表示课程ID，可以从 `geektime query` 查看获取到；
 68 | `comments_count` 表示评论条数，不设置的话则默认为 0条，您可以根据专栏评论的含金量来调整该参数大小。
 69 | 
 70 | 示例：
 71 | ```bash
 72 | geektime ebook 49 --comments-count=10
 73 | ```
 74 | 
 75 | ### 推送到 Kindle 设备
 76 | 
 77 | 
 78 | 如果您想把制作完成的电子书自动推送到心爱的 Kindle 设备的话，需要提供以下 smtp 配置和 Kindle 推送邮箱：
 79 | 
 80 | 
 81 | - --smtp-encryption
 82 | - --smtp-host
 83 | - --smtp-port
 84 | - --smtp-user
 85 | - --smtp-password
 86 | - --email-to：Kindle 推送邮箱：
 87 | 
 88 | 
 89 | 然后在[制作电子书基础命令](/guide.html#制作电子书)后添加 `--push` 以及上面参数即可。
 90 | 
 91 | 例如，
 92 | 
 93 | ```bash
 94 | geektime ebook 49 --push --smtp-host=smtp.qq.com --smtp-port=465 --smtp-encryption=ssl --smtp-user=your_qq_number@qq.com --smtp-password=your_password --email-to=your_kindle_email@kindle.cn
 95 | ```
 96 | 
 97 | 执行该命令后，smtp 配置和 Kindle 推送邮箱就会保存在 `$(pwd)/geektime.cfg`，下次推送电子书时就不用添加这些参数了，只要 `geektime ebook 49 --push` 即可。打开 `$(pwd)/geektime.cfg` 验证一下吧。
 98 | 
 99 | 至于邮箱 smtp 配置和 Kindle邮箱配置就自行 google 吧。
100 | 
101 | ### 压缩电子书大小
102 | 
103 | 直接使用 `geektime ebook <course_id>` 生成的电子书大于 50M（因为含有大量图片），超过邮箱附件的大小限制，所以我们需要对图片进行压缩，这时候参数 `--image-ratio` 就发挥作用了。
104 | 
105 | 试试这么操作吧
106 | 
107 | ```bash
108 | geektime ebook 49 --image-ratio=0.2
109 | ```
110 | 
111 | ### 批量制作电子书
112 | 
113 | ```bash
114 | geektime ebook <course_ids>
115 | ```
116 | 上述命令可以批量制作电子书，参数 `course_ids` 表示课程ID 集合，课程ID 集合使用半角逗号 `,` 和 `-` 进行拼接，`all` 则表示全部已购买课程ID 集合
117 | 。例如：
118 | 
119 | - 制作48、49号课程电子书，可以执行 `geektime ebook 48,49`
120 | - 制作48到50号课程电子书，可以执行 `geektime ebook 48-50`
121 | - 制作所有已购买课程电子书，可以执行 `geektime ebook all`
122 | 
123 | 
124 | ### 更多用法
125 | 
126 | 您也可以通过下边命令发现更多用法
127 | 
128 | ```bash
129 | geektime ebook --help
130 | ```
131 | 
132 | 
133 | 
134 | ## 下载音频
135 | 
136 | `geektime-dl` 除了可以制作 Kindle 电子书，把极客时间装进 Kindle，还提供了下载课程音频的附加功能，来看一下怎么使用吧。
137 | 
138 | 
139 | > 执行该命令前，请确保账号密码已经保存在 `$(pwd)/geektime.cfg`。
140 | >
141 | > 如果没有，请执行 `geektime login` 进行账号密码验证和保存。
142 | 
143 | ```bash
144 | geektime mp3 <course_id> [--url-only]
145 | ```
146 | 
147 | 这条命令会下载课程音频到 `$(pwd)/mp3/<课程名称>/` 中。
148 | 如果开启 `--url-only`，则只会保存音频链接到 `$(pwd)/mp3/<课程名称>/<课程名称>.mp3.txt`，不会下载音频文件。
149 | 
150 | 您也可以通过以下命令获取更多帮助信息。
151 | 
152 | ```bash
153 | geektime mp3 --help
154 | ```
155 | 
156 | 


--------------------------------------------------------------------------------
/docs/intro.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # 简介
  3 | 
  4 | ## 项目结构
  5 | 
  6 | 这个项目主要包括下边这几个部分：
  7 | 
  8 | - kindle_maker: 一个 mobi 电子书制作工具。用户使用 kindle_maker 就可以轻松制作出一本精美的 kindle 电子书。这部分已拎出来放在单独的项目里，具体使用方式见该项目文档： [kindle_maker](https://github.com/jachinlin/kindle_maker)；
  9 | 
 10 | - utils: 提供了 mp3/mp4 下载、邮件发送、html 文件生成等功能；
 11 | 
 12 | - gk_apis: 封装了极客时间 api；
 13 | 
 14 | - store_client: 缓存极客时间专栏数据至本地 json 文件；
 15 | 
 16 | - cli: 提供若干cmd 命令，将上面这几个部分连接在一起，最后使用 kindle_maker 制作电子书，或者使用下载器下载相关音视频。
 17 | 
 18 | ## 主要依赖
 19 | 
 20 | - [Python](https://dPocs.python.org/3.6/): 支持的 Python 版本为 3.6 及以上
 21 | 
 22 | - [requests](http://www.python-requests.org/en/master/): 网络请求
 23 | 
 24 | - [Jinja2](http://jinja.pocoo.org/): html 模板引擎
 25 | 
 26 | - [kindle_maker](https://github.com/jachinlin/kindle_maker): 制作 kindle 电子书
 27 | 
 28 | ## 安装
 29 | 
 30 | ### 安装 Python 解释器
 31 | 
 32 | 目前仅支持 Python3.6+（包含），请在 [Python 官网](https://www.python.org/downloads/)下载并安装您熟悉的版本对应的 Python 解释器。
 33 | 
 34 | ### 虚拟环境
 35 | 
 36 | ```bash
 37 | mkdir geektime $$ cd geektime
 38 | python3 -m venv venv3 && source venv3/bin/activate
 39 | ```
 40 | 
 41 | ### 安装 geektime-dl
 42 | 
 43 | ```bash
 44 | pip install -U geektime_dl
 45 | ```
 46 | 
 47 | 或者源码安装，这样可以获取最新的特性
 48 | 
 49 | ```bash
 50 | pip install -U git+https://github.com/jachinlin/geektime_dl.git
 51 | ```
 52 | 
 53 | ### 检验是否正确安装
 54 | 
 55 | ```bash
 56 | geektime help
 57 | ```
 58 | 
 59 | 执行上述命令，如果出现 `command not found: geektime`，则说明没有正确安装，请按照上面步骤重新按照，如果还有困难的话，可以[提 issue](https://github.com/jachinlin/geektime_dl/issues/new)获取帮助；如果 terminal 显示的是其他信息，则说明您已经正确安装该软件了，恭喜您，咱们可以进行下一步了。
 60 | ## 查看帮助信息
 61 | 
 62 | ```bash
 63 | geektime help
 64 | ```
 65 | 
 66 | 该命令会显示所有支持的命令（command），以及所支持的命令的简要说明，具体输出如下：
 67 | 
 68 | ```bash
 69 | Available commands:
 70 |     daily  保存每日一课视频
 71 |     ebook  将专栏文章制作成电子书
 72 |     help   Display the list of available commands
 73 |     login  登录极客时间，保存账号密码至配置文件
 74 |     mp3    保存专栏音频
 75 |     query  查看课程列表
 76 | 
 77 | Use 'geektime <command> --help' for individual command help.
 78 | ```
 79 | 
 80 | 通过下边的操作可以查看具体命令（command）的帮助信息
 81 | 
 82 | ```bash
 83 | geektime <command> --help
 84 | ```
 85 | 
 86 | 例如，
 87 | 
 88 | ```bash
 89 | geektime ebook --help
 90 | ```
 91 | 
 92 | 这条命令就可以显示出 `ebook` 命令（制作 mobi 电子书命令）的使用说明和所有的参数说明
 93 | 
 94 | ```bash
 95 | usage: geektime ebook
 96 |        [-h] [-a ACCOUNT] [-p PASSWORD] [--area AREA] [--config CONFIG]
 97 |        [-o OUTPUT_FOLDER] [--no-login] [--image-ratio IMAGE_RATIO]
 98 |        [--image-min-height IMAGE_MIN_HEIGHT]
 99 |        [--image-min-width IMAGE_MIN_WIDTH] [--email-to EMAIL_TO]
100 |        [--smtp-password SMTP_PASSWORD] [--smtp-user SMTP_USER]
101 |        [--smtp-encryption SMTP_ENCRYPTION] [--smtp-port SMTP_PORT]
102 |        [--smtp-host SMTP_HOST] [--push] [--comments-count COMMENTS_COUNT]
103 |        [--force]
104 |        course_ids
105 | 
106 | 将专栏文章制作成电子书
107 | 
108 | positional arguments:
109 |   course_ids            specify the target course ids
110 | 
111 | optional arguments:
112 |   -h, --help            show this help message and exit
113 |   -a ACCOUNT, --account ACCOUNT
114 |                         specify the account phone number (default: None)
115 |   -p PASSWORD, --password PASSWORD
116 |                         specify the account password (default: None)
117 |   --area AREA           specify the account country code (default: 86)
118 |   --config CONFIG       specify alternate config file (default:
119 |                         /Users/linjiaxian/dev/geektime_dl/geektime.cfg)
120 |   -o OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
121 |                         specify the output folder (default:
122 |                         /Users/linjiaxian/dev/geektime_dl)
123 |   --no-login            no login, just for test (default: False)
124 |   --image-ratio IMAGE_RATIO
125 |                         image ratio (default: None)
126 |   --image-min-height IMAGE_MIN_HEIGHT
127 |                         image min height (default: None)
128 |   --image-min-width IMAGE_MIN_WIDTH
129 |                         image min width (default: None)
130 |   --email-to EMAIL_TO   specify the kindle receiver email (default: None)
131 |   --smtp-password SMTP_PASSWORD
132 |                         specify the smtp password (default: None)
133 |   --smtp-user SMTP_USER
134 |                         specify the smtp user (default: None)
135 |   --smtp-encryption SMTP_ENCRYPTION
136 |                         specify the a smtp encryption (default: None)
137 |   --smtp-port SMTP_PORT
138 |                         specify the a smtp port (default: None)
139 |   --smtp-host SMTP_HOST
140 |                         specify the smtp host (default: None)
141 |   --push                push to kindle (default: False)
142 |   --comments-count COMMENTS_COUNT
143 |                         the count of comments to fetch each post (default: 0)
144 |   --force               do not use the cache data (default: False)
145 | ```
146 | 
147 | 具体命令的参数说明是使用 `argparse` 生成的，如果你对于上面的参数说明感到迷惑的话，可以先阅读 [argparse 的文档](https://docs.python.org/3.8/howto/argparse.html)。
148 | 
149 | 下一步，请阅读[使用说明](/guide.html)。
150 | 


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "geektime-dl",
 3 |   "version": "1.1.2",
 4 |   "description": "把极客时间装进 Kindle",
 5 |   "scripts": {
 6 |     "docs:dev": "vuepress dev .",
 7 |     "docs:build": "vuepress build ."
 8 |   },
 9 |   "author": "linjx1000+github@gmail.com",
10 |   "license": "MIT",
11 |   "dependencies": {
12 |     "core-js": "^2.6.11",
13 |     "element-ui": "^2.13.1",
14 |     "vue-feather-icons": "^5.0.0"
15 |   },
16 |   "devDependencies": {
17 |     "vuepress": "^1.4.1",
18 |     "vuepress-plugin-baidu-tongji": "^1.0.1"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/docs/recruit.md:
--------------------------------------------------------------------------------
 1 | # 快手招聘（长期有效，海量 HC，急缺）
 2 | 
 3 | 本人目前在[快手](https://www.kuaishou.com/)担任 Python 研发工程师
 4 | 
 5 | 坐标：北京 - 平台研发部 - 基础架构组
 6 | 
 7 | ## 基础架构团队招聘
 8 | 
 9 | 基础架构组是做什么的 → [快手基础架构演进实录](https://mp.weixin.qq.com/s/C2nZTJrUjnt_sVE8oaoBRg)
10 | 
11 | 团队目前接近 40 人，急缺各路人才： 
12 | 
13 | - Java 工程师/专家/架构师 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/2705) [JD2](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/3297)
14 | - Cpp 工程师/专家/架构师 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/2949) [JD2](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5201)
15 | - Python 工程师/专家/架构师 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/2913)
16 |     
17 | 涉及
18 | 
19 | - 服务治理 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5200) [JD2](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5206)
20 | - 消息队列 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5198)
21 | - 实时监控 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5207)
22 | - 高可用平台 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5197)
23 | - 编译系统 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5208)
24 | - 网络接入 [JD1](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/job-info/5476)
25 | 
26 | 等多个方向。
27 | 
28 | 
29 | 上述链接没有囊括全部 JD, 你可以到 [基础架构招聘](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/?name=%E5%9F%BA%E7%A1%80%E6%9E%B6%E6%9E%84) 上发现更多 JD 信息。
30 | 
31 | 欢迎有兴趣的同学找我内推，发送简历至 linjx1000+2018@gmail.com
32 | 
33 | 邮件要求：
34 | 
35 | 1. 邮件标题：社招 +【岗位】+ 【姓名】
36 | 2. 简历：请附于附件，简历名称同邮件标题
37 | 3. 邮件内容：包含 JD 链接即可
38 | 
39 | 内推服务承诺：
40 | 
41 | 1. 邮件必回复
42 | 2. 直推部门负责人
43 | 3. 及时反馈面试进度和面试评价
44 | 4. 各类问题可加微信联系
45 | 
46 | ## 其他部门内推
47 | 
48 | 招聘岗位请见 [快手招聘](https://zhaopin.kuaishou.cn/recruit/e/#/official/social/)，所有岗位接受内推
49 | 
50 | 内推方式和要求同上
51 | 
52 | 内推服务承诺除了「直推部门老大」，其他同上
53 | 
54 | ## 交流群
55 | 
56 | 请加个人微信，备注「GitHub 内推」，通过后拉您入群，获取最新招聘信息和内推反馈信息。
57 | 
58 | <img :src="$withBase('/Wechat.jpeg')" alt="交流群">


--------------------------------------------------------------------------------
/docs/tldr.md:
--------------------------------------------------------------------------------
 1 | # TL;DR;
 2 | 
 3 | 安装 geektime-dl
 4 | 
 5 | ```bash
 6 | mkdir geektime $$ cd geektime
 7 | python3 -m venv venv3 && source venv3/bin/activate
 8 | pip install -U geektime_dl
 9 | ```
10 | 
11 | 
12 | 
13 | 查看课程列表，获取课程 ID
14 | 
15 | ```bash
16 | geektime query
17 | ```
18 | 
19 | 制作 Kindle 电子书
20 | 
21 | ```bash
22 | geektime ebook 49
23 | ```
24 | 
25 | 
26 | 打开电子书
27 | 
28 | ```bash
29 | open ebook/朱赟的技术管理课\[更新完毕\].mobi
30 | ```
31 | 
32 | 这就足够了。
33 | 
34 | 如果您还想了解更多用法，可以进行[下一步](/intro.html)阅读。


--------------------------------------------------------------------------------
/geektime.py:
--------------------------------------------------------------------------------
1 | from geektime_dl import main
2 | 
3 | if __name__ == '__main__':
4 |     main()
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/geektime_dl/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | 
 4 | def main():
 5 |     from geektime_dl import cli
 6 |     cli.main()
 7 | 
 8 | 
 9 | geektime = main
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/geektime_dl/cache.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import datetime
  4 | import json
  5 | import traceback
  6 | from abc import ABC, abstractmethod
  7 | 
  8 | from peewee import (
  9 |     SqliteDatabase,
 10 |     Model,
 11 |     DoesNotExist,
 12 |     IntegerField,
 13 |     CharField,
 14 |     TextField,
 15 |     BooleanField,
 16 |     DateTimeField
 17 | )
 18 | 
 19 | from geektime_dl.utils import get_working_folder
 20 | from geektime_dl.log import logger
 21 | 
 22 | 
 23 | db_file = get_working_folder() / 'gt.sqlite'
 24 | db = SqliteDatabase(str(db_file))
 25 | 
 26 | 
 27 | class BaseModel(Model):
 28 |     class Meta:
 29 |         database = db
 30 | 
 31 | 
 32 | class ColumnIntro(BaseModel):
 33 |     id = IntegerField(primary_key=True)
 34 |     column_id = IntegerField(unique=True)
 35 |     column_title = CharField()
 36 |     author_name = CharField()
 37 |     column_intro = TextField()
 38 |     column_cover = CharField()
 39 |     column_type = IntegerField()
 40 |     update_frequency = CharField()
 41 |     is_finish = BooleanField()
 42 |     had_sub = BooleanField()
 43 |     articles = TextField()
 44 | 
 45 |     created = DateTimeField(default=datetime.datetime.now)
 46 |     modified = DateTimeField(default=datetime.datetime.now)
 47 | 
 48 |     def save(self, *args, **kwargs):
 49 |         self.modified = datetime.datetime.now()
 50 |         return super(ColumnIntro, self).save(*args, **kwargs)
 51 | 
 52 | 
 53 | class Article(BaseModel):
 54 |     id = IntegerField(primary_key=True)
 55 |     article_id = IntegerField(unique=True)
 56 |     article_title = CharField()
 57 |     article_cover = CharField()
 58 |     article_content = TextField()
 59 |     audio_download_url = CharField()
 60 |     comments = TextField()
 61 | 
 62 |     created = DateTimeField(default=datetime.datetime.now)
 63 |     modified = DateTimeField(default=datetime.datetime.now)
 64 | 
 65 |     def save(self, *args, **kwargs):
 66 |         self.modified = datetime.datetime.now()
 67 |         return super(Article, self).save(*args, **kwargs)
 68 | 
 69 | 
 70 | class TempKV(BaseModel):
 71 |     id = IntegerField(primary_key=True)
 72 |     key = CharField(unique=True)
 73 |     value = TextField()
 74 |     expire = IntegerField()  # seconds
 75 | 
 76 |     created = DateTimeField(default=datetime.datetime.now)
 77 |     modified = DateTimeField(default=datetime.datetime.now)
 78 | 
 79 |     def save(self, *args, **kwargs):
 80 |         self.modified = datetime.datetime.now()
 81 |         return super(TempKV, self).save(*args, **kwargs)
 82 | 
 83 |     def is_expired(self) -> bool:
 84 |         if self.expire <= 0:
 85 |             return False
 86 |         now = datetime.datetime.now()
 87 |         return (now - self.modified).seconds > self.expire
 88 | 
 89 | 
 90 | def init_cache():
 91 |     db.connect()
 92 |     db.create_tables([ColumnIntro, Article, TempKV], safe=True)
 93 | 
 94 | 
 95 | init_cache()
 96 | 
 97 | 
 98 | class GeektimeCache(ABC):
 99 | 
100 |     @abstractmethod
101 |     def get_column_intro(self, column_id: int) -> dict:
102 |         """"""
103 | 
104 |     @abstractmethod
105 |     def save_column_intro(self, course_intro: dict) -> None:
106 |         """"""
107 | 
108 |     @abstractmethod
109 |     def get_article(self, article_id: int) -> dict:
110 |         """"""
111 | 
112 |     @abstractmethod
113 |     def save_article(self, article_info: dict) -> None:
114 |         """"""
115 | 
116 |     @abstractmethod
117 |     def get(self, key: str) -> dict:
118 |         """"""
119 | 
120 |     @abstractmethod
121 |     def set(self, key: str, value: dict, expire: int) -> None:
122 |         """"""
123 | 
124 | 
125 | class EmptyCache(GeektimeCache):
126 | 
127 |     def get_column_intro(self, column_id: int) -> dict:
128 |         return {}
129 | 
130 |     def save_column_intro(self, course_intro: dict) -> None:
131 |         return
132 | 
133 |     def get_article(self, article_id: int) -> dict:
134 |         return {}
135 | 
136 |     def save_article(self, article_info: dict) -> None:
137 |         return
138 | 
139 |     def get(self, key: str) -> dict:
140 |         return {}
141 | 
142 |     def set(self, key: str, value: dict, expire: int) -> None:
143 |         pass
144 | 
145 | 
146 | class SqliteCache(GeektimeCache):
147 | 
148 |     def get_column_intro(self, column_id: int) -> dict:
149 |         try:
150 |             column = ColumnIntro.get(
151 |                 ColumnIntro.column_id == column_id
152 |             )
153 | 
154 |             cache = {
155 |                 "id": column.column_id,
156 |                 "column_id": column.column_id,
157 |                 "column_title": column.column_title,
158 |                 "author_name": column.author_name,
159 |                 "column_intro": column.column_intro,
160 |                 "column_cover": column.column_cover,
161 |                 "column_type": column.column_type,
162 |                 "update_frequency": column.update_frequency,
163 |                 "is_finish": column.is_finish,
164 |                 "had_sub": column.had_sub,
165 |                 "articles": json.loads(column.articles)
166 |             }
167 |             logger.info("get column intro from cache, column_title={}".format(
168 |                 cache['column_title']
169 |             ))
170 |             return cache
171 |         except DoesNotExist:
172 |             return {}
173 |         except Exception:
174 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
175 |             return {}
176 | 
177 |     def save_column_intro(self, course_intro: dict) -> None:
178 |         try:
179 |             try:
180 |                 column = ColumnIntro.get(
181 |                     ColumnIntro.column_id == course_intro['id']
182 |                 )
183 |             except DoesNotExist:
184 |                 column = ColumnIntro()
185 |             column.column_id = course_intro['id']
186 |             column.column_title = course_intro['column_title']
187 |             column.author_name = course_intro['author_name']
188 |             column.column_intro = course_intro['column_intro']
189 |             column.column_cover = course_intro['column_cover']
190 |             column.column_type = course_intro['column_type']
191 |             column.update_frequency = course_intro['update_frequency']
192 |             column.is_finish = course_intro['is_finish']
193 |             column.had_sub = course_intro['had_sub']
194 |             column.articles = json.dumps(course_intro['articles'])
195 |             column.save()
196 |             logger.info("save column intro to cache, column_title={}".format(
197 |                 course_intro['column_title']
198 |             ))
199 |         except Exception:
200 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
201 | 
202 |     def get_article(self, article_id: int) -> dict:
203 |         try:
204 |             article = Article.get(
205 |                 Article.article_id == article_id
206 |             )
207 | 
208 |             cache = {
209 |                 "id": article.article_id,
210 |                 "article_id": article.article_id,
211 |                 "article_title": article.article_title,
212 |                 "article_cover": article.article_cover,
213 |                 "article_content": article.article_content,
214 |                 "audio_download_url": article.audio_download_url,
215 |                 "comments": json.loads(article.comments)
216 |             }
217 |             logger.info("get article from cache, article_title={}".format(
218 |                 cache['article_title']
219 |             ))
220 |             return cache
221 |         except DoesNotExist:
222 |             return {}
223 |         except Exception:
224 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
225 |             return {}
226 | 
227 |     def save_article(self, article_info: dict) -> None:
228 |         try:
229 |             try:
230 |                 article = Article.get(
231 |                     Article.article_id == article_info['article_id']
232 |                 )
233 |             except DoesNotExist:
234 |                 article = Article()
235 |             article.article_id = article_info['article_id']
236 |             article.article_title = article_info['article_title']
237 |             article.article_cover = article_info['article_cover']
238 |             article.article_content = article_info['article_content']
239 |             article.audio_download_url = article_info['audio_download_url']
240 |             article.comments = json.dumps(article_info['comments'])
241 |             article.save()
242 |             logger.info("save article to cache, article_title={}".format(
243 |                 article_info['article_title']
244 |             ))
245 |         except Exception:
246 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
247 | 
248 |     def get(self, key: str) -> dict:
249 |         try:
250 |             try:
251 |                 kv: TempKV = TempKV.get(TempKV.key == key)
252 |             except DoesNotExist:
253 |                 return {}
254 | 
255 |             if kv.is_expired():
256 |                 logger.info("get kv expired, key={}".format(key))
257 |                 return {}
258 |             val_dict = json.loads(str(kv.value))
259 |             logger.info("get kv, key={}, value= {}".format(
260 |                 key, kv.value[:100]
261 |             ))
262 |             return val_dict
263 |         except Exception:
264 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
265 |             return {}
266 | 
267 |     def set(self, key: str, value: dict, expire: int) -> None:
268 |         try:
269 |             try:
270 |                 kv: TempKV = TempKV.get(TempKV.key == key)
271 |             except DoesNotExist:
272 |                 kv = TempKV()
273 | 
274 |             val_str = json.dumps(value)
275 |             kv.key = key
276 |             kv.value = val_str
277 |             kv.expire = expire
278 |             kv.save()
279 |             logger.info("set kv, key={}, value= {}, expire={}".format(
280 |                 key, val_str[:100], expire
281 |             ))
282 |         except Exception:
283 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
284 | 


--------------------------------------------------------------------------------
/geektime_dl/cli/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | # flake8: noqa
3 | 
4 | from geektime_dl.cli.command import Command, main, add_argument
5 | from geektime_dl.cli import (
6 |     login, query, ebook
7 | )
8 | 


--------------------------------------------------------------------------------
/geektime_dl/cli/command.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import sys
  4 | import os
  5 | import traceback
  6 | import configparser
  7 | import argparse
  8 | import functools
  9 | 
 10 | from geektime_dl.log import logger
 11 | from geektime_dl.dal import get_data_client, DataClient
 12 | from geektime_dl.utils import get_working_folder
 13 | 
 14 | commands = {}
 15 | 
 16 | cwd = os.path.abspath('.')
 17 | geektime_cfg = str(get_working_folder() / 'geektime.cfg')
 18 | 
 19 | 
 20 | class CommandType(type):
 21 |     def __init__(cls, name, bases, attrs):
 22 |         super(CommandType, cls).__init__(name, bases, attrs)
 23 |         name = getattr(cls, name, cls.__name__.lower())
 24 |         cls.name = name
 25 |         if name != 'command':
 26 |             commands[name] = cls
 27 | 
 28 | 
 29 | class Help(metaclass=CommandType):
 30 |     """Display the list of available commands"""
 31 | 
 32 |     def work(self, args: list):
 33 |         result = ["Available commands:"]
 34 |         names = list(commands)
 35 |         padding = max([len(k) for k in names]) + 2
 36 |         for k in sorted(names):
 37 |             name = k.ljust(padding, ' ')
 38 |             doc = (commands[k].__doc__ or '').split('\n')[0]
 39 |             result.append("    %s%s" % (name, doc))
 40 |         result.append(
 41 |             "\nUse '{} <command> --help' for individual command help.".format(
 42 |                 sys.argv[0].split(os.path.sep)[-1]))
 43 | 
 44 |         result = '\n'.join(result) + '\n'
 45 |         sys.stdout.write(result)
 46 |         return result
 47 | 
 48 | 
 49 | def add_argument(*args, **kwargs):
 50 |     def decorator(func):
 51 |         @functools.wraps(func)
 52 |         def wrap(*a, **kw):
 53 |             return func(*a, **kw)
 54 | 
 55 |         if not hasattr(wrap, 'save_cfg_keys'):
 56 |             wrap.save_cfg_keys = []
 57 |         if not hasattr(wrap, 'arguments'):
 58 |             wrap.arguments = []
 59 |         if kwargs.get('save'):
 60 |             kwargs.pop('save')
 61 |             if 'dest' in kwargs:
 62 |                 wrap.save_cfg_keys.append(kwargs['dest'])
 63 |         wrap.arguments.append((args, kwargs))
 64 |         return wrap
 65 |     return decorator
 66 | 
 67 | 
 68 | class Command(metaclass=CommandType):
 69 |     _default_save_cfg_keys = ['area', 'account', 'password', 'output_folder']
 70 | 
 71 |     def __init__(self):
 72 |         self._parser = None
 73 | 
 74 |     @staticmethod
 75 |     def is_course_finished(course_info: dict):
 76 |         return course_info['update_frequency'] in ['全集', '已完结'] or \
 77 |             course_info['is_finish']
 78 | 
 79 |     @staticmethod
 80 |     def get_data_client(cfg: dict) -> DataClient:
 81 |         try:
 82 |             dc = get_data_client(cfg)
 83 |             return dc
 84 |         except Exception:
 85 |             raise ValueError(
 86 |                 "invalid geektime account or password\n"
 87 |                 "Use '{} login --help' for  help.\n".format(
 88 |                     sys.argv[0].split(os.path.sep)[-1]))
 89 | 
 90 |     @property
 91 |     def parser(self) -> argparse.ArgumentParser:
 92 |         if self._parser:
 93 |             return self._parser
 94 |         parser = argparse.ArgumentParser(
 95 |             prog='{} {}'.format(sys.argv[0], self.name),
 96 |             description=self.__doc__,
 97 |             formatter_class=argparse.ArgumentDefaultsHelpFormatter
 98 |         )
 99 |         parser.add_argument("-a", "--account", dest="account",
100 |                             help="specify the account phone number")
101 |         parser.add_argument("-p", "--password", dest="password",
102 |                             help="specify the account password")
103 |         parser.add_argument("--area", dest="area", default='86',
104 |                             help="specify the account country code")
105 |         parser.add_argument("--config", dest="config", default=geektime_cfg,
106 |                             help="specify alternate config file")
107 |         parser.add_argument("-o", "--output-folder", dest="output_folder",
108 |                             default=cwd, help="specify the output folder")
109 | 
110 |         parser.add_argument("--no-login", dest="no_login", action='store_true',
111 |                             default=False, help="no login, just for test")
112 |         for args, kwargs in getattr(self.run, 'arguments', []):
113 |             parser.add_argument(*args, **kwargs)
114 |         self._parser = parser
115 |         return parser
116 | 
117 |     @staticmethod
118 |     def load_cfg(cfg_file: str) -> dict:
119 |         p = configparser.RawConfigParser()
120 |         cfg = dict()
121 |         try:
122 |             p.read([cfg_file])
123 |             for (name, value) in p.items('default'):
124 |                 cfg[name] = value
125 |         except IOError:
126 |             pass
127 |         except configparser.NoSectionError:
128 |             pass
129 | 
130 |         return cfg
131 | 
132 |     def _parse_config(self, args: list):
133 | 
134 |         cfg_file = geektime_cfg
135 |         if '--config' in args:
136 |             index = args.index('--config') + 1
137 |             if index < len(args):
138 |                 cfg_file = args[index]
139 |         saved_cfg = self.load_cfg(cfg_file)
140 | 
141 |         save_cfg_keys = (getattr(self.run, 'save_cfg_keys', []) +
142 |                          self._default_save_cfg_keys)
143 |         for key in save_cfg_keys:
144 |             if key in saved_cfg:
145 |                 _ = ['--{}'.format(key.replace('_', '-')), saved_cfg[key]]
146 |                 # add saved configs in front so that
147 |                 # it has the chance to be overridden
148 |                 args = _ + args
149 |         opt = self.parser.parse_args(args)
150 |         cfg = vars(opt)
151 | 
152 |         saved_cfg.update({k: cfg[k] for k in save_cfg_keys if cfg.get(k)})
153 |         self.save_cfg(saved_cfg, cfg_file)
154 |         return cfg
155 | 
156 |     @classmethod
157 |     def save_cfg(cls, cfg: dict, cfg_file: str) -> None:
158 | 
159 |         old_cfg = cls.load_cfg(cfg_file)
160 |         old_cfg.update(cfg)
161 |         cfg = old_cfg
162 |         p = configparser.RawConfigParser()
163 |         p.add_section('default')
164 |         for opt in sorted(cfg):
165 |             p.set('default', opt, cfg[opt])
166 | 
167 |         # try to create the directories and write the file
168 |         cfg_exist = os.path.exists(cfg_file)
169 |         if not cfg_exist and not os.path.exists(os.path.dirname(cfg_file)):
170 |             try:
171 |                 os.makedirs(os.path.dirname(cfg_file))
172 |             except OSError:
173 |                 sys.stderr.write(
174 |                     "ERROR: couldn't create the config directory\n")
175 |         try:
176 |             with open(cfg_file, 'w') as f:
177 |                 p.write(f)
178 |         except IOError:
179 |             sys.stderr.write("ERROR: couldn't write the config file\n")
180 | 
181 |     def work(self, args: list):
182 |         if '--help' in args or '-h' in args:
183 |             self.parser.parse_args(args)
184 |             return
185 |         cfg = self._parse_config(args)
186 |         return self.run(cfg)
187 | 
188 |     def run(self, args: dict):
189 |         raise NotImplementedError
190 | 
191 | 
192 | def main():
193 |     args = sys.argv[1:]
194 | 
195 |     # default subcommand
196 |     command = Help.name
197 | 
198 |     # subcommand discovery
199 |     if len(args):
200 |         command = args[0]
201 |         args = args[1:]
202 | 
203 |     if command in commands:
204 |         o = commands[command]()
205 |         try:
206 |             o.work(args)
207 |         except Exception as e:
208 |             sys.stderr.write("ERROR: {}\n".format(e))
209 |             logger.error('ERROR: {}'.format(traceback.format_exc()))
210 |     else:
211 |         sys.stderr.write('Unknown command %r\n\n' % (command,))
212 |         Help().work(args)
213 | 


--------------------------------------------------------------------------------
/geektime_dl/cli/ebook.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import os
  4 | import pathlib
  5 | import sys
  6 | import json
  7 | import datetime
  8 | 
  9 | from termcolor import colored
 10 | from ebook import make_ebook
 11 | from ebook.ebooklib import format_file_name
 12 | from tqdm import tqdm
 13 | 
 14 | from geektime_dl.cli import Command, add_argument
 15 | from geektime_dl.ebook.ebook import Render
 16 | from geektime_dl.gt_apis import GkApiError
 17 | from geektime_dl.utils import (
 18 |     get_working_folder,
 19 |     parse_column_ids
 20 | )
 21 | 
 22 | 
 23 | class EBook(Command):
 24 |     """将专栏文章制作成电子书"""
 25 | 
 26 |     def _format_title(self, c):
 27 |         """
 28 |         课程文件名
 29 |         """
 30 | 
 31 |         t = format_file_name(c['column_title'])
 32 |         if not c['had_sub']:
 33 |             t += '[免费试读]'
 34 |         elif self.is_course_finished(c):
 35 |             pass
 36 |         else:
 37 |             t += '[未完待续{}]'.format(datetime.date.today())
 38 |         return t
 39 | 
 40 |     def _generate_source_files(self, course_intro: dict, articles: list,
 41 |                                source_folder: str, no_cache: bool = False,
 42 |                                **kwargs) -> None:
 43 |         """
 44 |         下载课程源文件
 45 |         """
 46 |         column_title = course_intro['column_title']
 47 |         _out_dir = source_folder
 48 | 
 49 |         render = Render(str(_out_dir))
 50 |         # introduction
 51 |         if not no_cache and os.path.isfile(os.path.join(_out_dir, '简介.html')):
 52 |             sys.stdout.write('{}简介 exists\n'.format(column_title))
 53 |         else:
 54 |             render.render_article_html(
 55 |                 '简介', course_intro['column_intro'], **kwargs)
 56 |             sys.stdout.write('下载{}简介 done\n'.format(column_title))
 57 |         # cover
 58 |         if not no_cache and os.path.isfile(os.path.join(_out_dir, 'cover.jpg')):
 59 |             sys.stdout.write('{}封面 exists\n'.format(column_title))
 60 |         else:
 61 |             render.generate_cover_img(course_intro['column_cover'])
 62 |             sys.stdout.write('下载{}封面 done\n'.format(column_title))
 63 |         # toc
 64 |         ebook_name = self._format_title(course_intro)
 65 |         render.render_toc_md(
 66 |             ebook_name,
 67 |             ['简介']
 68 |             + [format_file_name(t['article_title']) for t in articles]
 69 |         )
 70 |         sys.stdout.write('下载{}目录 done\n'.format(column_title))
 71 |         # articles
 72 |         articles = tqdm(articles)
 73 |         for article in articles:
 74 |             articles.set_description('HTML 文件下载中:{}'.format(
 75 |                 article['article_title'][:10]))
 76 |             file_basename = format_file_name(article['article_title'])
 77 |             fn = os.path.join(_out_dir, '{}.html'.format(file_basename))
 78 |             if not no_cache and os.path.isfile(fn):
 79 |                 continue
 80 |             render.render_article_html(
 81 |                 file_basename, article['article_content'], **kwargs)
 82 | 
 83 |     @add_argument("course_ids", type=str,
 84 |                   help="specify the target course ids")
 85 |     @add_argument("--no-cache", dest="no_cache", action='store_true',
 86 |                   default=False, help="do not use the cache data")
 87 |     @add_argument("--comments-count", dest="comments_count", type=int,
 88 |                   default=0, save=True,
 89 |                   help="the count of comments to fetch each post")
 90 |     @add_argument("--image-min-width", dest="image_min_width", type=int,
 91 |                   save=True, help="image min width")
 92 |     @add_argument("--image-min-height", dest="image_min_height", type=int,
 93 |                   save=True, help="image min height")
 94 |     @add_argument("--image-ratio", dest="image_ratio", type=float, save=True,
 95 |                   help="image ratio")
 96 |     @add_argument("--format", dest="format", type=str, save=True,
 97 |                   default='mobi', help="ebook format")
 98 |     def run(self, cfg: dict) -> None:
 99 |         course_ids = parse_column_ids(cfg['course_ids'])
100 | 
101 |         for course_id in course_ids:
102 |             self._run_once(course_id, cfg)
103 | 
104 |     def _run_once(self, course_id: int, cfg: dict):
105 |         dc = self.get_data_client(cfg)
106 |         output_folder = self._make_output_folder(cfg['output_folder'])
107 |         no_cache = cfg['no_cache']
108 |         wf = get_working_folder()
109 |         try:
110 |             course_intro = dc.get_column_intro(course_id, no_cache=no_cache)
111 |         except GkApiError as e:
112 |             sys.stderr.write('{}\n\n'.format(e))
113 |             return
114 |         if int(course_intro['column_type']) not in (1, 2):
115 |             sys.stderr.write("ERROR: 该课程不提供文本:{}".format(
116 |                 course_intro['column_title']))
117 |             return
118 | 
119 |         # fetch raw data
120 |         print(colored('开始制作电子书:{}-{}'.format(
121 |             course_id, course_intro['column_title']), 'green'))
122 |         pbar_desc = '数据爬取中:{}'.format(course_intro['column_title'][:10])
123 |         article_ids = course_intro['articles']
124 |         article_ids = tqdm(article_ids)
125 |         article_ids.set_description(pbar_desc)
126 |         articles = list()
127 |         for a in article_ids:
128 |             aid = a['id']
129 |             article = dc.get_article_content(aid, no_cache=no_cache)
130 |             if cfg['comments_count'] > 0:
131 |                 article['article_content'] += self._render_comment_html(
132 |                     article['comments'],
133 |                     cfg['comments_count']
134 |                 )
135 |             articles.append(article)
136 | 
137 |         if cfg.get('dont_ebook', False):
138 |             return
139 | 
140 |         # source file
141 |         source_folder = wf / format_file_name(course_intro['column_title'])
142 |         source_folder.mkdir(exist_ok=True)
143 |         self._generate_source_files(
144 |             course_intro, articles, str(source_folder), **cfg
145 |         )
146 | 
147 |         # ebook 未完结或者 no_cache 都会重新制作电子书
148 |         ebook_name = '{}.{}'.format(
149 |             self._format_title(course_intro), cfg['format'])
150 |         fp = pathlib.Path(output_folder) / ebook_name
151 |         if (not no_cache and self.is_course_finished(course_intro)
152 |                 and fp.exists()):
153 |             print(colored("{} exists\n".format(ebook_name), 'green'))
154 |         else:
155 |             make_ebook(
156 |                 source_dir=str(source_folder),
157 |                 output_dir=output_folder,
158 |                 format=cfg['format']
159 |             )
160 |             print(colored('制作电子书完成:{}-{}'.format(
161 |                 course_id, course_intro['column_title']), 'green'))
162 | 
163 |     @staticmethod
164 |     def _make_output_folder(output_folder: str):
165 |         output_folder = os.path.expanduser(output_folder)
166 |         if not os.path.isdir(output_folder):
167 |             os.makedirs(output_folder)
168 |         return output_folder
169 | 
170 |     @staticmethod
171 |     def _timestamp2str(timestamp: int) -> str:
172 |         if not timestamp:
173 |             return ''
174 |         return datetime.datetime.fromtimestamp(
175 |             int(timestamp)).strftime("%Y-%m-%d %H:%M:%S")
176 | 
177 |     def _render(self, c):
178 |         replies = json.loads(c.get('replies'))
179 | 
180 |         reply = replies[0] if replies else {}
181 |         replies_html = """<br/>
182 | <div>
183 |     <div style="color:#888;font-size:15.25px;font-weight:400;\
184 |         line-height:1.2">{}{}</div>
185 |     <div style="color:#353535;font-weight:400;white-space:normal;\
186 |         word-break:break-all;line-height:1.6">{}</div>
187 | </div>
188 |             """.format(
189 |             reply.get('user_name'),
190 |             self._timestamp2str(reply.get('ctime')),
191 |             reply.get('content')
192 |         ) if reply else ''
193 | 
194 |         likes = "[{}赞]".format(c['like_count']) if c['like_count'] else ''
195 |         c_html = """
196 | <li>
197 |     <div>
198 |         <div style="color: #888;font-size:15.25px;font-weight:400;\
199 |             line-height:1.2">
200 |             {user_name}  {comment_time}
201 |         </div>
202 |         <div style="color:#353535;font-weight:400;white-space:normal;\
203 |             word-break:break-all;line-height:1.6">
204 |             {comment_content} {like_count}
205 |         </div>
206 |         {replies}
207 |     </div>
208 | </li>
209 |             """.format(
210 |             user_name=c['user_name'],
211 |             like_count=likes,
212 |             comment_content=c['comment_content'],
213 |             comment_time=self._timestamp2str(c['comment_ctime']),
214 |             replies=replies_html
215 |         )
216 |         return c_html
217 | 
218 |     def _render_comment_html(self, comments, comment_count):
219 |         """
220 |         生成评论的 html 文本
221 |         """
222 |         if not comments:
223 |             return ''
224 | 
225 |         count = min(len(comments), int(comment_count))
226 |         comments = comments[:count]
227 | 
228 |         html = '\n<br/>\n'.join([
229 |             self._render(c)
230 |             for c in comments
231 |         ])
232 |         h = """<h2>精选留言：</h2>
233 |         <ul>
234 |         """
235 |         f = '</ul>'
236 |         return h + html + f
237 | 


--------------------------------------------------------------------------------
/geektime_dl/cli/login.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import sys
 4 | 
 5 | from geektime_dl.gt_apis import GkApiClient, GkApiError
 6 | from geektime_dl.cli import Command
 7 | 
 8 | 
 9 | class Login(Command):
10 |     """登录极客时间，保存账号密码至配置文件"""
11 | 
12 |     def run(self, args: dict):
13 |         area = args['area']
14 |         account = args['account']
15 |         password = args['password']
16 |         need_save = not (area and account and password)
17 | 
18 |         if not account:
19 |             account = input("enter your registered account(phone): ")
20 |         if not area:
21 |             area = input("enter country code: enter for 86 ") or '86'
22 |         if not password:
23 |             password = input("account: +{} {}\n"
24 |                              "enter password: ".format(area, account))
25 | 
26 |         try:
27 |             GkApiClient(account=account, password=password, area=area)
28 |             if need_save:
29 |                 new_cfg = {
30 |                     'account': account,
31 |                     'password': password,
32 |                     'area': area
33 |                 }
34 |                 Command.save_cfg(new_cfg, args['config'])
35 | 
36 |         except GkApiError as e:
37 |             sys.stdout.write(
38 |                 "login fail, error message:{}\nEnter again\n".format(e)
39 |             )
40 |             area = input("enter country code: enter for 86 ") or '86'
41 |             account = input("enter your registered account(phone): ")
42 |             password = input("account: +{} {}\n"
43 |                              "enter password: ".format(area, account))
44 | 
45 |             GkApiClient(account=account, password=password, area=area)
46 | 
47 |             new_cfg = {
48 |                 'account': account,
49 |                 'password': password,
50 |                 'area': area
51 |             }
52 |             Command.save_cfg(new_cfg, args['config'])
53 | 
54 |         sys.stdout.write("Login succeed\n")
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/geektime_dl/cli/query.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import sys
 4 | 
 5 | from geektime_dl.cli import Command, add_argument
 6 | 
 7 | _COLUMN_INDEX = "1"
 8 | 
 9 | 
10 | class Query(Command):
11 |     """查看专栏列表"""
12 | 
13 |     @add_argument("--no-cache", dest="no_cache", action='store_true',
14 |                   default=False, help="do not use the cache data")
15 |     def run(self, cfg: dict):
16 | 
17 |         dc = self.get_data_client(cfg)
18 | 
19 |         data = dc.get_column_list(no_cache=cfg['no_cache'])
20 | 
21 |         result_str = ''
22 |         columns = data[_COLUMN_INDEX]['list']
23 |         result_str += '专栏\n'
24 |         result_str += "\t{:<12}{}\t{}\t{:<10}\n".format(
25 |             '课程ID', '已订阅', '已完结', '课程标题')
26 |         for c in columns:
27 |             is_finished = self.is_course_finished(c)
28 |             result_str += "\t{:<15}{}\t{}\t{:<10}\n".format(
29 |                 str(c['id']),
30 |                 '是' if c['had_sub'] else '否',
31 |                 '是' if is_finished else '否',
32 |                 c['column_title'],
33 | 
34 |             )
35 | 
36 |         sys.stdout.write(result_str)
37 |         return result_str
38 | 
39 | 


--------------------------------------------------------------------------------
/geektime_dl/dal.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import json
  4 | import threading
  5 | 
  6 | from tqdm import tqdm
  7 | 
  8 | from geektime_dl.gt_apis import GkApiClient
  9 | from geektime_dl.utils import synchronized, read_local_cookies
 10 | from geektime_dl.cache import GeektimeCache, EmptyCache, SqliteCache
 11 | 
 12 | 
 13 | class DataClient:
 14 | 
 15 |     def __init__(self, gk: GkApiClient, cache: GeektimeCache):
 16 |         self._gt = gk
 17 |         self._cache: GeektimeCache = cache
 18 |         self._lock = threading.Lock()  # 限制并发
 19 | 
 20 |     def get_column_list(self, **kwargs) -> dict:
 21 |         """
 22 |         获取专栏列表
 23 |         """
 24 |         use_cache = not kwargs.get("no_cache", False)
 25 |         key = "column_all"
 26 |         expire = 1 * 24 * 3600  # 1 day
 27 |         if use_cache:
 28 |             value = self._cache.get(key)
 29 |             if value:
 30 |                 return value
 31 |         data = self._gt.get_course_list()
 32 |         if use_cache:
 33 |             self._cache.set(key, data, expire)
 34 | 
 35 |         return data
 36 | 
 37 |     @synchronized()
 38 |     def get_column_intro(self, column_id: int, **kwargs) -> dict:
 39 |         """
 40 |         获取专栏简介
 41 |         """
 42 |         use_cache = not kwargs.get("no_cache", False)
 43 |         if use_cache:
 44 |             cache = self._cache.get_column_intro(column_id)
 45 |             if cache and cache['is_finish'] and cache['had_sub']:
 46 |                 return cache
 47 | 
 48 |         course_intro = self._gt.get_course_intro(column_id)
 49 |         course_intro['column_id'] = course_intro['id']
 50 |         articles = self._gt.get_post_list_of(column_id)
 51 |         course_intro['articles'] = articles
 52 | 
 53 |         if use_cache:
 54 |             self._cache.save_column_intro(course_intro)
 55 | 
 56 |         return course_intro
 57 | 
 58 |     @synchronized()
 59 |     def get_article_content(self, article_id: int, **kwargs) -> dict:
 60 |         """
 61 |         获取 article 的所有内容，包括评论
 62 |         """
 63 |         use_cache = not kwargs.get("no_cache", False)
 64 |         if use_cache:
 65 |             cache = self._cache.get_article(article_id)
 66 |             if cache:
 67 |                 return cache
 68 | 
 69 |         article_info = self._gt.get_post_content(article_id)
 70 |         article_info['article_id'] = article_info['id']
 71 |         article_info['comments'] = self._get_article_comments(article_id)
 72 | 
 73 |         if use_cache:
 74 |             self._cache.save_article(article_info)
 75 | 
 76 |         return article_info
 77 | 
 78 |     def _get_article_comments(self, article_id: int) -> list:
 79 |         """
 80 |         获取 article 的评论
 81 |         """
 82 |         data = self._gt.get_post_comments(article_id)
 83 |         for c in data:
 84 |             c['replies'] = json.dumps(c.get('replies', []))
 85 |         return data
 86 | 
 87 |     def get_video_collection_list(self, **kwargs) -> list:
 88 |         """
 89 |         获取每日一课合辑列表
 90 |         """
 91 |         return self._gt.get_video_collection_list()
 92 | 
 93 |     @synchronized()
 94 |     def get_video_collection_intro(self, collection_id: int, **kwargs) -> dict:
 95 |         """
 96 |         获取每日一课合辑简介
 97 |         """
 98 |         data = self._gt.get_video_collection_intro(collection_id)
 99 |         return data
100 | 
101 |     @synchronized()
102 |     def get_daily_content(self, video_id: int, **kwargs) -> dict:
103 |         """
104 |         获取每日一课内容
105 |         """
106 |         data = self._gt.get_post_content(video_id)
107 |         return data
108 | 
109 |     def get_video_collection_content(self, collection_id: int,
110 |                                      force: bool = False,
111 |                                      pbar=True, pbar_desc='') -> list:
112 |         """
113 |         获取每日一课合辑ID 为 collection_id 的所有视频内容
114 |         """
115 |         data = []
116 |         v_ids = self._gt.get_video_list_of(collection_id)
117 |         if pbar:
118 |             v_ids = tqdm(v_ids)
119 |             v_ids.set_description(pbar_desc)
120 |         for v_id in v_ids:
121 |             v = self.get_daily_content(v_id['article_id'], force=force)
122 |             data.append(v)
123 |         return data
124 | 
125 | 
126 | dc_global = None
127 | _dc_global_lock = threading.Lock()
128 | 
129 | 
130 | def get_data_client(cfg: dict) -> DataClient:
131 |     with _dc_global_lock:
132 |         global dc_global
133 |         if dc_global is not None:
134 |             return dc_global
135 | 
136 |         gk = GkApiClient(
137 |             account=cfg['account'],
138 |             password=cfg['password'],
139 |             area=cfg['area'],
140 |             no_login=cfg['no_login'],
141 |             lazy_login=True,
142 |             cookies=read_local_cookies()
143 |         )
144 | 
145 |         if cfg.get('no_cache', False):
146 |             cache = EmptyCache()
147 |         else:
148 |             cache = SqliteCache()
149 | 
150 |         dc = DataClient(gk, cache=cache)
151 |         dc_global = dc
152 | 
153 |     return dc
154 | 


--------------------------------------------------------------------------------
/geektime_dl/ebook/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | 


--------------------------------------------------------------------------------
/geektime_dl/ebook/ebook.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import os
  4 | import re
  5 | import time
  6 | import contextlib
  7 | import pathlib
  8 | from urllib.parse import urlparse
  9 | import io
 10 | 
 11 | import requests
 12 | from jinja2 import Environment, FileSystemLoader
 13 | from PIL import Image
 14 | 
 15 | 
 16 | class Render:
 17 | 
 18 |     def __init__(self, output_folder: str):
 19 | 
 20 |         self._output_folder = output_folder
 21 |         self._jinja_env = Environment(loader=FileSystemLoader(
 22 |             '{}/templates/'.format(os.path.dirname(__file__))
 23 |         ))
 24 | 
 25 |     def _render_file(
 26 |             self, template_name: str, context: dict, filename: str) -> None:
 27 |         """
 28 |         生成 html 文件
 29 |         """
 30 |         template = self._jinja_env.get_template(template_name)
 31 |         with open(os.path.join(self._output_folder, filename), "w") as f:
 32 |             f.write(template.render(**context))
 33 | 
 34 |     def render_toc_md(self, title: str, headers: list) -> None:
 35 |         """
 36 |         生成目录文件 toc.mc
 37 |         """
 38 |         with open(os.path.join(self._output_folder, 'toc.md'), "w") as f:
 39 |             headers = ['# {}'.format(h) for h in headers]
 40 |             f.writelines('\n'.join([title] + headers))
 41 | 
 42 |     def render_article_html(self, title: str, content: str, **kwargs) -> None:
 43 |         """
 44 |         生成 html 文件
 45 |         """
 46 |         content = self._parse_image(content, **kwargs)
 47 |         self._render_file(
 48 |             'article.html',
 49 |             {'title': title, 'content': content},
 50 |             '{}.html'.format(title)
 51 |         )
 52 | 
 53 |     def generate_cover_img(self, url: str) -> None:
 54 |         """
 55 |         下载 url 作为封面
 56 |         """
 57 |         with contextlib.suppress(Exception):
 58 |             r = requests.get(url, timeout=20)
 59 |             cover = os.path.join(self._output_folder, 'cover.jpg')
 60 |             with open(cover, 'wb') as f:
 61 |                 f.write(r.content)
 62 | 
 63 |     def _parse_image(self, content: str, **kwargs) -> str:
 64 |         """
 65 |         下载 content(html text) 中的 image
 66 |         """
 67 |         # remove the xxx `style=xxx`
 68 |         p = r'img (.{1,15}=".*?") src=".*?"'
 69 |         fucking_styles = re.findall(p, content)
 70 |         for style in fucking_styles:
 71 |             content = content.replace(style, '')
 72 | 
 73 |         p = r'</?img>'
 74 |         empty_imgs = re.findall(p, content)
 75 |         for empty_img in empty_imgs:
 76 |             content = content.replace(empty_img, '')
 77 | 
 78 |         p = r'img\s+src="(.*?)"'
 79 |         img_url_list = re.findall(p, content)
 80 | 
 81 |         for url in img_url_list:
 82 |             with contextlib.suppress(Exception):
 83 |                 url_local = self._format_url_path(url)
 84 |                 r = requests.get(url, timeout=20)
 85 |                 img_fn = os.path.join(self._output_folder, url_local)
 86 |                 self._save_img(
 87 |                     r.content, img_fn,
 88 |                     min_width=kwargs.get('image_min_width'),
 89 |                     min_height=kwargs.get('image_min_height'),
 90 |                     ratio=kwargs.get('image_ratio')
 91 |                 )
 92 |                 content = content.replace(url, url_local)
 93 | 
 94 |         return content
 95 | 
 96 |     @staticmethod
 97 |     def _save_img(content: bytes, filename: str,
 98 |                   min_width: int = None, min_height: int = None,
 99 |                   ratio: float = None) -> None:
100 |         min_width = min_width or 500
101 |         min_height = min_height or 500
102 |         ratio = ratio or 0.5
103 | 
104 |         img = Image.open(io.BytesIO(content))
105 |         w, h = img.size
106 |         if w <= min_width or h <= min_height:
107 |             img.save(filename, img.format)
108 |             return
109 | 
110 |         rw, rh = int(w * ratio), int(h * ratio)
111 |         if rw < min_width:
112 |             rw, rh = min_width, int(rh * min_width / rw)
113 |         if rh < min_height:
114 |             rw, rh = int(rw * min_height / rh), min_height
115 |         img.thumbnail((rw, rh))
116 |         img.save(filename, img.format)
117 | 
118 |     @staticmethod
119 |     def _format_url_path(url: str) -> str:
120 |         o = urlparse(url)
121 |         u = pathlib.Path(o.path)
122 |         stem, suffix = u.stem, u.suffix
123 |         return '{}-{}{}'.format(stem, int(time.time()), suffix)
124 | 


--------------------------------------------------------------------------------
/geektime_dl/ebook/templates/article.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 2 | <html xmlns="http://www.w3.org/1999/xhtml">
 3 | <head>
 4 |     <title>{{title}}</title>
 5 |     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 6 | </head>
 7 | <body>
 8 | <h1>{{title}}</h1>
 9 | 
10 | {{content | safe}}
11 | 
12 | </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/geektime_dl/gt_apis.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | 
  4 | import threading
  5 | import functools
  6 | import time
  7 | import contextlib
  8 | from typing import Optional
  9 | 
 10 | import requests
 11 | 
 12 | from geektime_dl.utils import (
 13 |     synchronized,
 14 |     Singleton,
 15 |     get_random_user_agent
 16 | )
 17 | from geektime_dl.log import logger
 18 | 
 19 | 
 20 | class GkApiError(Exception):
 21 |     """"""
 22 | 
 23 | 
 24 | def _retry(func):
 25 |     """
 26 |     0.1s 后重试
 27 |     """
 28 |     @functools.wraps(func)
 29 |     def wrap(gk_api: 'GkApiClient', *args, **kwargs):
 30 |         try:
 31 |             res = func(gk_api, *args, **kwargs)
 32 |             return res
 33 |         except requests.RequestException:
 34 |             time.sleep(0.1)
 35 |             gk_api.reset_session()
 36 |             return func(gk_api, *args, **kwargs)
 37 |         except GkApiError:
 38 |             raise
 39 |         except Exception as e:
 40 |             raise GkApiError("geektime api error") from e
 41 | 
 42 |     return wrap
 43 | 
 44 | 
 45 | class GkApiClient(metaclass=Singleton):
 46 |     """
 47 |     一个课程，包括专栏、视频、微课等，称作 `course` 或者 `column`
 48 |     课程下的章节，包括文章、者视频等，称作 `post` 或者 `article`
 49 |     """
 50 | 
 51 |     def __init__(self, account: str, password: str, area: str = '86',
 52 |                  no_login: bool = False, lazy_login: bool = True,
 53 |                  cookies: Optional[dict] = None):
 54 |         self._cookies = None
 55 |         self._lock = threading.Lock()
 56 |         self._account = account
 57 |         self._password = password
 58 |         self._area = area
 59 |         self._no_login = no_login
 60 |         self._ua = get_random_user_agent()
 61 | 
 62 |         if cookies:
 63 |             self._cookies = cookies
 64 |             return
 65 | 
 66 |         if lazy_login or no_login:
 67 |             return
 68 |         self.reset_session()
 69 | 
 70 |     def _post(self, url: str, data: dict = None, **kwargs) -> requests.Response:
 71 |         with contextlib.suppress(Exception):
 72 |             for k in ['cellphone', 'password']:
 73 |                 if data and k in data:
 74 |                     data[k] = 'xxx'
 75 |             logger.info("request geektime api, {}, {}".format(url, data))
 76 | 
 77 |         headers = kwargs.setdefault('headers', {})
 78 |         headers.update({
 79 |             'Content-Type': 'application/json',
 80 |             'User-Agent': self._ua
 81 |         })
 82 |         resp = requests.post(url, json=data, timeout=10, **kwargs)
 83 |         resp.raise_for_status()
 84 | 
 85 |         if resp.json().get('code') != 0:
 86 |             raise GkApiError('geektime api fail:' + resp.json()['error']['msg'])
 87 | 
 88 |         return resp
 89 | 
 90 |     @synchronized()
 91 |     def reset_session(self) -> None:
 92 |         """登录"""
 93 |         url = 'https://account.geekbang.org/account/ticket/login'
 94 | 
 95 |         self._ua = get_random_user_agent()
 96 |         headers = {
 97 |             'Accept': 'application/json, text/plain, */*',
 98 |             'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',  # noqa: E501
 99 |             'Host': 'account.geekbang.org',
100 |             'Referer': 'https://account.geekbang.org/signin?redirect=https%3A%2F%2Fwww.geekbang.org%2F',  # noqa: E501
101 |         }
102 | 
103 |         data = {
104 |             "country": self._area,
105 |             "cellphone": self._account,
106 |             "password": self._password,
107 |             "captcha": "",
108 |             "remember": 1,
109 |             "platform": 3,
110 |             "appid": 1
111 |         }
112 | 
113 |         resp = self._post(url, data, headers=headers)
114 | 
115 |         self._cookies = resp.cookies
116 | 
117 |     @_retry
118 |     def get_course_list(self) -> dict:
119 |         """
120 |         获取课程列表
121 |         :return:
122 |             key: value
123 |             '1'
124 |             '2'
125 |             '3'
126 |             '4':
127 |         """
128 |         url = 'https://time.geekbang.org/serv/v1/column/all'
129 |         headers = {
130 |             'Referer': 'https://time.geekbang.org/paid-content',
131 |         }
132 |         if not self._cookies and not self._no_login:
133 |             self.reset_session()
134 | 
135 |         resp = self._post(url, headers=headers, cookies=self._cookies)
136 |         return resp.json()['data']
137 | 
138 |     @_retry
139 |     def get_post_list_of(self, course_id: int) -> list:
140 |         """获取课程所有章节列表"""
141 |         url = 'https://time.geekbang.org/serv/v1/column/articles'
142 |         data = {
143 |             "cid": str(course_id), "size": 1000, "prev": 0, "order": "newest"
144 |         }
145 |         headers = {
146 |             'Referer': 'https://time.geekbang.org/column/{}'.format(course_id),
147 |         }
148 | 
149 |         if not self._cookies and not self._no_login:
150 |             self.reset_session()
151 | 
152 |         resp = self._post(url, data, headers=headers, cookies=self._cookies)
153 | 
154 |         if not resp.json()['data']:
155 |             raise Exception('course not exists:%s' % course_id)
156 | 
157 |         return resp.json()['data']['list'][::-1]
158 | 
159 |     @_retry
160 |     def get_course_intro(self, course_id: int) -> dict:
161 |         """课程简介"""
162 |         url = 'https://time.geekbang.org/serv/v1/column/intro'
163 |         headers = {
164 |             'Referer': 'https://time.geekbang.org/column/{}'.format(course_id),
165 |         }
166 | 
167 |         if not self._cookies and not self._no_login:
168 |             self.reset_session()
169 | 
170 |         resp = self._post(
171 |             url, {'cid': str(course_id)}, headers=headers, cookies=self._cookies
172 |         )
173 | 
174 |         data = resp.json()['data']
175 |         if not data:
176 |             raise GkApiError('无效的课程 ID: {}'.format(course_id))
177 |         return data
178 | 
179 |     @_retry
180 |     def get_post_content(self, post_id: int) -> dict:
181 |         """课程章节详情"""
182 |         url = 'https://time.geekbang.org/serv/v1/article'
183 |         headers = {
184 |             'Referer': 'https://time.geekbang.org/column/article/{}'.format(
185 |                 post_id)
186 |         }
187 | 
188 |         if not self._cookies and not self._no_login:
189 |             self.reset_session()
190 | 
191 |         resp = self._post(
192 |             url, {'id': post_id}, headers=headers, cookies=self._cookies
193 |         )
194 | 
195 |         return resp.json()['data']
196 | 
197 |     @_retry
198 |     def get_post_comments(self, post_id: int) -> list:
199 |         """课程章节评论"""
200 |         url = 'https://time.geekbang.org/serv/v1/comments'
201 |         headers = {
202 |             'Referer': 'https://time.geekbang.org/column/article/{}'.format(
203 |                 post_id)
204 |         }
205 | 
206 |         if not self._cookies and not self._no_login:
207 |             self.reset_session()
208 | 
209 |         resp = self._post(
210 |             url, {"aid": str(post_id), "prev": 0},
211 |             headers=headers, cookies=self._cookies
212 |         )
213 | 
214 |         return resp.json()['data']['list']
215 | 
216 |     @_retry
217 |     def get_video_collection_intro(self, collection_id: int) -> dict:
218 |         """每日一课合辑简介"""
219 |         url = 'https://time.geekbang.org/serv/v2/video/GetCollectById'
220 |         headers = {
221 |             'Referer': 'https://time.geekbang.org/dailylesson/collection/{}'.format(  # noqa: E501
222 |                 collection_id)
223 |         }
224 | 
225 |         if not self._cookies and not self._no_login:
226 |             self.reset_session()
227 | 
228 |         resp = self._post(
229 |             url, {'id': str(collection_id)},
230 |             headers=headers, cookies=self._cookies
231 |         )
232 | 
233 |         data = resp.json()['data']
234 |         return data
235 | 
236 |     @_retry
237 |     def get_video_collection_list(self) -> list:
238 |         """每日一课合辑列表"""
239 |         # 没分析出接口
240 |         ids = list(range(3, 82)) + list(range(104, 141))
241 |         return [{'collection_id': id_} for id_ in ids]
242 | 
243 |     @_retry
244 |     def get_video_list_of(self, collection_id: int) -> list:
245 |         """每日一课合辑视频列表"""
246 | 
247 |         url = 'https://time.geekbang.org/serv/v2/video/GetListByType'
248 |         headers = {
249 |             'Referer': 'https://time.geekbang.org/dailylesson/collection/{}'.format(  # noqa: E501
250 |                 collection_id)
251 |         }
252 | 
253 |         if not self._cookies and not self._no_login:
254 |             self.reset_session()
255 | 
256 |         resp = self._post(
257 |             url, {"id": str(collection_id), "size": 50},
258 |             headers=headers, cookies=self._cookies
259 |         )
260 | 
261 |         return resp.json()['data']['list']
262 | 


--------------------------------------------------------------------------------
/geektime_dl/log.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import os
 4 | import logging
 5 | 
 6 | from geektime_dl.utils import get_working_folder
 7 | 
 8 | LOG_PATH = str(get_working_folder() / 'geektime.log')
 9 | LOG_FORMAT = '\t'.join([
10 |     'log_time=%(asctime)s',
11 |     'levelname=%(levelname)s',
12 |     '%(message)s',
13 |     'location=%(pathname)s:%(lineno)d'])
14 | 
15 | level = logging.DEBUG if os.getenv('DEBUG') == '1' else logging.INFO
16 | logger = logging.getLogger('geektime')
17 | file_handler = logging.FileHandler(filename=LOG_PATH)
18 | 
19 | file_handler.setLevel(level)
20 | file_handler.setFormatter(logging.Formatter(LOG_FORMAT))
21 | logger.setLevel(level)
22 | logger.addHandler(file_handler)
23 | 
24 | 


--------------------------------------------------------------------------------
/geektime_dl/utils.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | import contextlib
  3 | import random
  4 | import threading
  5 | import pathlib
  6 | from functools import wraps
  7 | from typing import List
  8 | 
  9 | _working_folder = pathlib.Path.home() / '.geektime_dl'
 10 | _working_folder.mkdir(exist_ok=True)
 11 | 
 12 | 
 13 | def get_working_folder():
 14 |     return _working_folder
 15 | 
 16 | 
 17 | def synchronized(lock_attr='_lock'):
 18 |     def decorator(func):
 19 |         @wraps(func)
 20 |         def wrapper(self, *args, **kwargs):
 21 |             lock = getattr(self, lock_attr)
 22 |             try:
 23 |                 lock.acquire()
 24 |                 return func(self, *args, **kwargs)
 25 |             finally:
 26 |                 lock.release()
 27 |         return wrapper
 28 |     return decorator
 29 | 
 30 | 
 31 | class Singleton(type):
 32 |     _instances = {}
 33 |     _lock = threading.Lock()
 34 | 
 35 |     @synchronized()
 36 |     def __call__(cls, *args, **kwargs):
 37 |         if cls not in cls._instances:
 38 |             cls._instances[cls] = super().__call__(*args, **kwargs)
 39 |         return cls._instances[cls]
 40 | 
 41 |     @classmethod
 42 |     @synchronized()
 43 |     def clear_singletons(cls):
 44 |         return cls._instances.clear()
 45 | 
 46 | 
 47 | def read_cookies_from_file(file_path: pathlib.Path) -> dict:
 48 |     cookies = {}
 49 |     with open(file_path, 'r') as f:
 50 |         for line in f.read().split(';'):
 51 |             n, v = line.split('=', 1)
 52 |             with contextlib.suppress(Exception):
 53 |                 _ = v.strip().encode('latin-1')
 54 |                 cookies[n.strip()] = v.strip()
 55 |     return cookies
 56 | 
 57 | 
 58 | def read_local_cookies() -> dict:
 59 |     fn = get_working_folder() / 'cookies'
 60 |     if not fn.exists():
 61 |         return {}
 62 |     return read_cookies_from_file(fn)
 63 | 
 64 | 
 65 | def parse_column_ids(ids_str: str) -> List[int]:
 66 |     def _int(num):
 67 |         try:
 68 |             return int(num)
 69 |         except Exception:
 70 |             raise ValueError('illegal column ids: {}'.format(ids_str))
 71 |     res = list()
 72 |     segments = ids_str.split(',')
 73 |     for seg in segments:
 74 |         if '-' in seg:
 75 |             s, e = seg.split('-', 1)
 76 |             res.extend(range(_int(s), _int(e) + 1))
 77 |         else:
 78 |             res.append(_int(seg))
 79 |     res = list(set(res))
 80 |     res.sort()
 81 |     return res
 82 | 
 83 | 
 84 | _default_ua_list = [
 85 |     "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Mobile Safari/537.36",  # noqa: E501
 86 | ]
 87 | 
 88 | _ua_list = list()
 89 | _ua_list_lock = threading.Lock()
 90 | 
 91 | 
 92 | def get_user_agents() -> list:
 93 |     global _ua_list
 94 |     if _ua_list:
 95 |         return _ua_list
 96 | 
 97 |     with _ua_list_lock:
 98 |         if _ua_list:
 99 |             return _ua_list
100 |         fp = get_working_folder() / 'user-agents.txt'
101 |         if not fp.exists():
102 |             _ua_list = _default_ua_list
103 |             return _ua_list
104 |         with open(fp) as f:
105 |             uas = list()
106 |             for ua in f.readlines():
107 |                 uas.append(ua.strip())
108 |             _ua_list = uas
109 |     return _ua_list
110 | 
111 | 
112 | def get_random_user_agent() -> str:
113 |     return random.choice(get_user_agents())
114 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/jachinlin/ebook-py.git
2 | requests
3 | termcolor
4 | tqdm
5 | pillow
6 | jinja2
7 | peewee


--------------------------------------------------------------------------------
/requirements/dev.txt:
--------------------------------------------------------------------------------
1 | -r base.txt
2 | pytest
3 | flake8
4 | codecov
5 | pytest-cov
6 | pytest-mock


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | from setuptools import setup, find_packages
 6 | 
 7 | version = '1.2.0'
 8 | 
 9 | 
10 | def read(fname):
11 |     with open(os.path.join(os.path.dirname(__file__), fname)) as f:
12 |         return f.read()
13 | 
14 | 
15 | setup(
16 |     name='geektime_dl',
17 |     version=version,
18 |     author='jachinlin',
19 |     author_email='linjx1000+github@gmail.com',
20 |     url='https://jachinlin.github.io/geektime_dl',
21 |     description='把极客时间装进 Kindle',
22 |     long_description=read('README.md'),
23 |     long_description_content_type='text/markdown',
24 |     license='MIT',
25 |     classifiers=[
26 |         'Programming Language :: Python',
27 |         'Programming Language :: Python :: 3',
28 |         'Programming Language :: Python :: 3.5',
29 |         'Programming Language :: Python :: 3.6',
30 |         'Programming Language :: Python :: 3.7',
31 |         'Programming Language :: Python :: 3 :: Only'
32 |     ],
33 |     keywords='kindle ebook mobi geektime',
34 |     packages=find_packages(exclude=['examples', 'tests']),
35 |     package_data={'geektime_dl': ['ebook/templates/*']},
36 |     install_requires=[
37 |         'wheel',
38 |         'git+https://github.com/jachinlin/ebook-py.git',
39 |         'requests',
40 |         'termcolor',
41 |         'tqdm',
42 |         'pillow',
43 |         'jinja2',
44 |         'peewee'
45 |     ],
46 |     entry_points={
47 |         'console_scripts': [
48 |             'geektime = geektime_dl:geektime',
49 |         ],
50 |     }
51 | )
52 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import os
 4 | 
 5 | import pytest
 6 | 
 7 | from geektime_dl.gt_apis import GkApiClient
 8 | from geektime_dl.ebook.ebook import Render
 9 | 
10 | 
11 | @pytest.fixture
12 | def gk() -> GkApiClient:
13 |     return GkApiClient('', '', no_login=True)
14 | 
15 | 
16 | class FakeGk:
17 |     def __init__(self):
18 |         self._access_count = 0
19 | 
20 |     def get_course_intro(self, course_id: int):
21 |         self._access_count += 1
22 |         return {'id': course_id, 'access_count': self._access_count}
23 | 
24 |     def get_course_list(self):
25 |         return {
26 |             '1': {'list': []}, '2': {'list': []},
27 |             '3': {'list': []}, '4': {'list': []}
28 |         }
29 | 
30 |     def get_post_content(self, post_id: int):
31 |         return {'id': post_id}
32 | 
33 |     def get_post_list_of(self, course_id: int):
34 |         return [{'id': 123}, {'id': 456}]
35 | 
36 |     def get_post_comments(self, post_id: int):
37 |         return []
38 | 
39 | 
40 | @pytest.fixture
41 | def output_folder() -> str:
42 |     return '/tmp'
43 | 
44 | 
45 | @pytest.fixture
46 | def render(output_folder) -> Render:
47 |     r = Render(output_folder)
48 |     return r
49 | 
50 | 
51 | @pytest.fixture
52 | def db_file() -> str:
53 |     path = '/tmp/test.json'
54 |     if os.path.exists(path):
55 |         os.remove(path)
56 |     yield path
57 |     os.remove(path)
58 | 
59 | 
60 | @pytest.fixture(scope='session')
61 | def column_id():
62 |     return 49
63 | 
64 | 
65 | @pytest.fixture(scope='session')
66 | def article_id():
67 |     return 780
68 | 
69 | 
70 | @pytest.fixture(scope='session')
71 | def video_course_id():
72 |     return 66
73 | 


--------------------------------------------------------------------------------
/tests/test_cli/test_basic.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import pytest
 4 | 
 5 | from geektime_dl.cli import command
 6 | 
 7 | 
 8 | def test_command_type():
 9 |     class MyCMD(metaclass=command.CommandType):
10 |         pass
11 | 
12 |     assert MyCMD.name == 'mycmd'
13 |     assert MyCMD.name in command.commands
14 |     assert command.commands[MyCMD.name] is MyCMD
15 | 
16 | 
17 | def test_command_of_help():
18 |     Help = command.commands[command.Help.name]
19 |     result = Help().work(args=[])
20 |     assert "Available commands:" in result
21 | 
22 | 
23 | # test command base class
24 | def test_add_argument_basic():
25 |     class ArgsParse(command.Command):
26 |         @command.add_argument("-n", "--name", dest="name")
27 |         def run(self, cfg):
28 |             return cfg
29 | 
30 |     cmd = ArgsParse()
31 |     args = cmd.work([])
32 |     assert isinstance(args, dict)
33 |     assert 'name' in args and args['name'] is None
34 | 
35 |     args = cmd.work(['--name', 'geektime'])
36 |     assert isinstance(args, dict)
37 |     assert 'name' in args and args['name'] == 'geektime'
38 | 
39 | 
40 | def test_add_argument_required():
41 |     class ArgsParse(command.Command):
42 |         @command.add_argument("-n", "--name", dest="name", required=True)
43 |         def run(self, cfg):
44 |             return cfg
45 | 
46 |     cmd = ArgsParse()
47 |     with pytest.raises(SystemExit):
48 |         cmd.work([])
49 | 
50 | 
51 | def test_add_argument_save(tmp_path):
52 |     class ArgsParse(command.Command):
53 |         @command.add_argument("-n", "--name", dest="name", save=True)
54 |         def run(self, cfg):
55 |             return cfg
56 | 
57 |     # default
58 |     cfg_file = tmp_path / 'test.cfg'
59 |     cmd = ArgsParse()
60 |     cmd.work(['--config', str(cfg_file)])
61 |     args = command.Command.load_cfg(str(cfg_file))
62 |     assert set(args.keys()) == {'area', 'output_folder'}
63 | 
64 |     # will save name=geektime to cfg_file
65 |     cmd.work(['--config', str(cfg_file), '-n=geektime'])
66 |     args = command.Command.load_cfg(str(cfg_file))
67 |     assert 'name' in args and args['name'] == 'geektime'
68 | 
69 |     # retrieve name=geektime in cfg_file
70 |     cmd.work(['--config', str(cfg_file)])
71 |     args = command.Command.load_cfg(str(cfg_file))
72 |     assert 'name' in args and args['name'] == 'geektime'
73 | 
74 |     cfg_file.unlink()
75 | 
76 | 


--------------------------------------------------------------------------------
/tests/test_cli/test_ebook.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | from geektime_dl.cli import ebook
 4 | from geektime_dl.utils import Singleton
 5 | 
 6 | 
 7 | def setup_function(func):
 8 |     Singleton.clear_singletons()
 9 | 
10 | 
11 | def test_ebook(tmp_path, mocker, column_id):
12 |     mocker.stub(name='sys.stdout.write')
13 |     cmd = ebook.EBook()
14 | 
15 |     cmd.work(args=[
16 |         str(column_id),
17 |         '-a=0',
18 |         '-p=0',
19 |         '--output-folder', str(tmp_path),
20 |         '--no-login'
21 |     ])
22 | 
23 |     # todo
24 |     # mobi = tmp_path / '朱赟的技术管理课[免费试读].mobi'
25 |     # assert mobi.exists()
26 | 


--------------------------------------------------------------------------------
/tests/test_cli/test_query.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import os
 4 | from geektime_dl.cli import query
 5 | 
 6 | 
 7 | def test_query(tmp_path, mocker):
 8 |     mocker.stub(name='sys.stdout.write')
 9 |     qr = query.Query()
10 | 
11 |     res = qr.work(args=[
12 |         '-a={}'.format(os.getenv('account')),
13 |         '-p={}'.format(os.getenv('password')),
14 |         '--no-login'
15 |     ])
16 | 
17 |     assert res
18 | 


--------------------------------------------------------------------------------
/tests/test_ebook_util.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import os
 4 | 
 5 | from geektime_dl.ebook.ebook import Render
 6 | 
 7 | 
 8 | def test_render_article_html(render: Render, output_folder: str):
 9 | 
10 |     title = 'hello'
11 |     content = '<p>hello world</p>'
12 |     render.render_article_html(title, content)
13 |     fn = os.path.join(output_folder, title + '.html')
14 | 
15 |     assert os.path.isfile(fn)
16 |     with open(fn) as f:
17 |         assert content in f.read()
18 | 
19 |     os.remove(fn)
20 | 
21 | 
22 | def test_render_toc_md(render: Render, output_folder: str):
23 |     title = 'hello'
24 |     headers = ['标题1', '标题2']
25 |     render.render_toc_md(title, headers)
26 |     fn = os.path.join(output_folder, 'toc.md')
27 | 
28 |     assert os.path.isfile(fn)
29 |     with open(fn) as f:
30 |         ls = f.readlines()
31 |         assert len(ls) == 3
32 |         assert ls[0].rstrip('\n') == title
33 |         assert ls[1].rstrip('\n') == '# {}'.format(headers[0])
34 |         assert ls[2].rstrip('\n') == '# {}'.format(headers[1])
35 | 
36 |     os.remove(fn)
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/test_gt_apis.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | from geektime_dl.gt_apis import GkApiClient
  4 | 
  5 | course_keys_needed = {
  6 |     'id', 'column_title', 'had_sub', 'is_finish', 'update_frequency'
  7 | }
  8 | post_keys_needed = {
  9 |     'id', 'article_title', 'article_content', 'column_id'
 10 | }
 11 | comment_keys_needed = {
 12 |     'user_name', 'like_count', 'comment_content', 'comment_ctime'
 13 | }
 14 | daily_video_keys_needed = {
 15 |     'id', 'article_title', 'column_had_sub', 'video_media_map'
 16 | }
 17 | 
 18 | video_id = 2184
 19 | collection_id = 141
 20 | daily_id = 113850
 21 | 
 22 | 
 23 | # def test_api_get_course_list(gk: GkApiClient):
 24 | #     res = gk.get_course_list()
 25 | #
 26 | #     assert isinstance(res, dict)
 27 | #     assert {'1', '2', '3', '4'} & set(res.keys())
 28 | #     for type_ in {'1', '2', '3', '4'}:
 29 | #         course_list = res[type_]['list']
 30 | #         course = course_list[0]
 31 | #         assert isinstance(course, dict)
 32 | #         for key in course_keys_needed:
 33 | #             assert course.get(key) is not None, '{} 不存在'.format(key)
 34 | 
 35 | 
 36 | def test_api_get_course_intro(gk: GkApiClient, column_id):
 37 |     course = gk.get_course_intro(column_id)
 38 |     assert isinstance(course, dict)
 39 |     for key in course_keys_needed:
 40 |         assert course.get(key) is not None, '{} 不存在'.format(key)
 41 | 
 42 | 
 43 | def test_api_get_course_post_list(gk: GkApiClient, column_id):
 44 |     course = gk.get_post_list_of(column_id)
 45 |     assert course and isinstance(course, list)
 46 |     article = course[0]
 47 |     for key in {'id'}:
 48 |         assert article.get(key) is not None, '{} 不存在'.format(key)
 49 | 
 50 | 
 51 | def test_api_get_post_content(gk: GkApiClient, article_id):
 52 |     article = gk.get_post_content(article_id)
 53 |     assert article and isinstance(article, dict)
 54 |     for key in post_keys_needed:
 55 |         assert article.get(key) is not None, '{} 不存在'.format(key)
 56 | 
 57 |     # mp3
 58 |     assert article.get('audio_download_url')
 59 |     # mp4
 60 |     article = gk.get_post_content(video_id)
 61 |     vm = article.get('video_media_map')
 62 |     assert vm, 'video_media_map 不存在'
 63 |     assert vm['sd']['url']
 64 |     assert vm['hd']['url']
 65 | 
 66 | 
 67 | def test_api_get_post_comments(gk: GkApiClient, article_id):
 68 |     res = gk.get_post_comments(article_id)
 69 |     assert res and isinstance(res, list)
 70 |     comment = res[0]
 71 |     for key in comment_keys_needed:
 72 |         assert comment.get(key) is not None, '{} 不存在'.format(key)
 73 | 
 74 | 
 75 | def test_api_get_video_collection_intro(gk: GkApiClient):
 76 |     course = gk.get_video_collection_intro(collection_id)
 77 |     assert isinstance(course, dict)
 78 |     for key in {'cid', 'title'}:
 79 |         assert course.get(key) is not None, '{} 不存在'.format(key)
 80 | 
 81 | 
 82 | def test_api_get_video_collection_list(gk: GkApiClient):
 83 |     col_list = gk.get_video_collection_list()
 84 |     assert col_list and isinstance(col_list, list)
 85 |     col = col_list[0]
 86 |     for key in {'collection_id'}:
 87 |         assert col.get(key) is not None, '{} 不存在'.format(key)
 88 | 
 89 | 
 90 | # def test_api_get_collection_video_list(gk: GkApiClient):
 91 | #     v_list = gk.get_video_list_of(collection_id)
 92 | #     assert v_list and isinstance(v_list, list)
 93 | #     video = v_list[0]
 94 | #     for key in {'article_id', 'is_sub'}:
 95 | #         assert video.get(key) is not None, '{} 不存在'.format(key)
 96 | 
 97 | 
 98 | def test_api_get_vedio_content(gk: GkApiClient):
 99 |     video = gk.get_post_content(daily_id)
100 |     assert video and isinstance(video, dict)
101 |     for key in daily_video_keys_needed:
102 |         assert video.get(key) is not None, '{} 不存在'.format(key)
103 | 
104 |     # video_url
105 |     assert 'video_media_map' in video
106 |     # assert vm['sd']['url']
107 |     # assert vm['hd']['url']
108 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | import threading
 4 | import time
 5 | 
 6 | from geektime_dl.utils import (
 7 |     get_working_folder,
 8 |     Singleton,
 9 |     synchronized,
10 |     parse_column_ids
11 | )
12 | from geektime_dl import log
13 | 
14 | 
15 | def test_logging():
16 |     log.logger.info('guess where i will be ')
17 | 
18 |     log_file = get_working_folder() / 'geektime.log'
19 |     with open(log_file) as f:
20 |         logs = f.read()
21 |         assert 'guess where i will be ' in logs
22 |         assert 'INFO' in logs
23 | 
24 | 
25 | def test_singleton():
26 |     class S(metaclass=Singleton):
27 |         pass
28 | 
29 |     a = S()
30 |     b = S()
31 |     assert a is b
32 | 
33 | 
34 | def test_synchronized():
35 | 
36 |     class A(object):
37 |         def __init__(self):
38 |             self._lock = threading.Lock()
39 | 
40 |         def func(self):
41 |             time.sleep(0.2)
42 | 
43 |         @synchronized()
44 |         def synchronized_func(self):
45 |             time.sleep(0.2)
46 | 
47 |     a = A()
48 | 
49 |     def time_cost(func) -> float:
50 |         start = time.time()
51 |         t_list = []
52 |         for i in range(2):
53 |             t = threading.Thread(target=func)
54 |             t_list.append(t)
55 |             t.start()
56 |         for t in t_list:
57 |             t.join()
58 |         return time.time() - start
59 | 
60 |     assert time_cost(a.synchronized_func) >= 0.2 * 2
61 |     assert time_cost(a.func) < 0.2 * 2
62 | 
63 | 
64 | def test_parse_column_ids():
65 |     ids = '1'
66 |     ids2 = '1-3'
67 |     ids3 = '3,6-8'
68 |     assert parse_column_ids(ids) == [1]
69 |     assert parse_column_ids(ids2) == [1, 2, 3]
70 |     assert parse_column_ids(ids3) == [3, 6, 7, 8]
71 | 


--------------------------------------------------------------------------------