├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── LICENSE.txt
├── README.chs.md
├── README.cht.md
├── README.md
├── icon3.ico
├── requirements.txt
├── setup.py
├── util
    ├── make_release_config.py
    └── make_verinfo.py
├── xeH
├── xeH.py
└── xeHentai
    ├── __init__.py
    ├── cli.py
    ├── config.py
    ├── const.py
    ├── core.py
    ├── filters.py
    ├── i18n
        ├── __init__.py
        ├── en_us.py
        ├── zh_hans.py
        └── zh_hant.py
    ├── proxy.py
    ├── rpc.py
    ├── task.py
    ├── updater
        ├── __init__.py
        ├── github.py
        └── updater.py
    ├── util
        ├── __init__.py
        └── logger.py
    └── worker.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | *.pyc
 4 | *.json
 5 | *.sh
 6 | *.log
 7 | .ehentai.cookie
 8 | release
 9 | desktop.ini
10 | verinfo.txt
11 | .atomignore
12 | make.bat
13 | config.py
14 | README.html
15 | CHANGELOG.html
16 | webui.gz
17 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | sudo: required
 3 | dist: xenial
 4 | 
 5 | matrix:
 6 |   include:
 7 |     - os: linux
 8 |       language: python
 9 |       python: "2.7"
10 |     - os: linux
11 |       language: python
12 |       python: "3.5"
13 |     - os: linux
14 |       language: python
15 |       python: "3.6"
16 |     - os: linux
17 |       language: python
18 |       python: "3.7"
19 |     # https://chocolatey.org/packages/python2
20 |     # - os: windows
21 |     #   language: sh
22 |     #   python: "2.7"
23 |     #   before_install:
24 |     #     - choco install python2
25 |     #     - export PATH="/c/Python26:/c/Python27/Scripts:$PATH"
26 |     #     - wget https://bootstrap.pypa.io/get-pip.py
27 |     #     - python ./get-pip.py
28 |     # https://chocolatey.org/packages/python/3.7.4
29 |     - os: windows
30 |       language: sh
31 |       python: "3.5"
32 |       before_install:
33 |         - choco install python --version 3.5.4
34 |         - export PATH="/c/Python35:/c/Python35/Scripts:$PATH"
35 |     - os: windows
36 |       language: sh
37 |       python: "3.6"
38 |       before_install:
39 |         - choco install python --version 3.6.8
40 |         - export PATH="/c/Python36:/c/Python36/Scripts:$PATH"
41 |     - os: windows
42 |       language: sh
43 |       python: "3.7"
44 |       before_install:
45 |         - choco install python --version 3.7.4
46 |         - export PATH="/c/Python37:/c/Python37/Scripts:$PATH"
47 | 
48 | install:
49 |     - python setup.py install
50 | 
51 | script:
52 |     - xeH --help
53 |     - xeH $TEST_URL_E --dir test1
54 |     # nested env currently have no effect on windows
55 |     - LANG=zh_CN.utf-8 LC_ALL=zh_CN.utf-8 xeH $TEST_URL_E --dir test1
56 |     - LANG=zh_TW.utf-8 LC_ALL=zh_TW.utf-8 xeH $TEST_URL_E --dir test1
57 |     - xeH $TEST_URL_E --dir test1 --archive true 
58 | 
59 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## 2.023
  4 | - 自动更新
  5 | - 修复保存任务时logger无法序列化的bug (感谢 @9chu)
  6 | 
  7 | ## 2.022
  8 | - 增加下载速度显示
  9 | - 增加低速自动重试 `low_speed_threshold`, 默认为`10KB/s`以下重试
 10 | - 增加本地缓存WebUI, 并在打开RPC时自动开启浏览器，可通过`rpc_open_browser`关闭
 11 | - 支持制作大于2G的压缩包
 12 | - 修复同内容文件处理, 优化重试表的键
 13 | - 修复RPC读图和制作压缩包间的竞争问题
 14 | - 修复flt_quota_check的返回参数为3个
 15 | - 修复配额检查中的路径和大小匹配仅在和期望大小不同时触发
 16 | - 修复没有开启rpc_secret但发送了token时的RPC参数列表
 17 | 
 18 | ## 2.021
 19 | - 支持 `multi page viewer`
 20 | - 不再支持Python 3.3
 21 | - 修复里站和表站CDN的IP
 22 | - 优化509错误判断
 23 | - 修复重命名时的编码问题
 24 | - 修复某些Python版本下跨磁盘移动时的问题
 25 | 
 26 | ## 2.020
 27 | - 增加RPC帮助函数：`get_info`，`get_config`，`update_config`，`get_image`
 28 | - 增加通过RPC看图和下载压缩包功能
 29 | - 增加`delete_task_files`选项，设置是否删除任务时同时删除下载的文件
 30 | - 修复Windows文件夹不能以空格结尾以及文件不能以`.`结尾
 31 | - 修复下载时的临时文件在Windows下报错Error 32的问题
 32 | 
 33 | ## 2.019
 34 | - 增加图片地址解析时的错误处理
 35 | - 增加使用流模式下载图片，优化判断丧尸线程
 36 | - 修复无法从环境变量中获得LOCALE时的问题
 37 | - 修复unichr被当成局部变量的问题
 38 | - 修复交互模式在Python3中的一个问题
 39 | 
 40 | ## 2.018
 41 | - 增加`jpn_title`选项，选择是否使用日语标题
 42 | - 增加`download_range`选项，选择下载范围
 43 | - 增加`timeout`选项，设置下载图片的超时
 44 | - 增加`proxy_image_only`选项，设置仅使用代理下载图片
 45 | - 命令行增加`--force`选项，设置忽略配额继续下载; 配置增加`ignored_errors`选项，设置忽略的错误码
 46 | - 检查下载的图片是否完整
 47 | - 识别`socks5h`代理字符串
 48 | 
 49 | ## 2.017
 50 | - 修复匹配网址的正则表达式
 51 | - 修复表站自动转换里站逻辑
 52 | - 修复下载图片重试后重命名失败的问题
 53 | - 修复原始文件名与自动编号冲突时重命名异常的问题
 54 | 
 55 | ## 2.016
 56 | - 修复超出配额的判断
 57 | - 修复可能会下到评论中的图的问题
 58 | - 修复无法匹配安装在根目录glype的代理问题
 59 | - 某些错误现在会显示详细信息
 60 | - 增加`proxy_image`选项，选择是否使用代理下载图片
 61 | 
 62 | ## 2.015
 63 | - 显示重命名时的错误
 64 | - 修复扩展名中多余的`.`
 65 | - 修复Windows下文件名的保留字符`<`, `>`
 66 | 
 67 | ## 2.014
 68 | - 修复cookie中只有nw判断为已登录的bug
 69 | - 登录失败时显示网页上的错误信息
 70 | - 交互支持逗号分割多个任务，命令行模式支持同时添加多个任务
 71 | - 修复重命名的bug
 72 | 
 73 | ## 2.013
 74 | - 修复页数>=1000页时抽风的bug
 75 | - 原始文件名冲突时自动+1
 76 | 
 77 | ## 2.012
 78 | - 修复Windows下中文路径的问题
 79 | 
 80 | ## 2.011
 81 | - 修复每页缩略图数量不是40时下载不完整的bug
 82 | - 90秒没有新下载图片则自动结束任务
 83 | - 本子包含重复图片时直接复制
 84 | 
 85 | ## 2.010
 86 | - 图片404时重试
 87 | 
 88 | ## 2.009
 89 | - 交互模式默认值改为配置中设置的值
 90 | 
 91 | ## 2.008
 92 | - 跳过Content Warning
 93 | 
 94 | ## 2.007
 95 | - 修复本子中有重复图时无法自动退出的bug
 96 | - 其他稳定性修复
 97 | 
 98 | ## 2.006
 99 | - 增加`make_archive`, 下载完成后生成zip压缩包并删除下载目录
100 | - 完善reload机制
101 | - 检测IP是否被ban并自动更换代理IP
102 | 
103 | ## 2.005
104 | - 增加`rpc_secret`
105 | - `i18n/zh_cn`更名为`i18n/zh_hans`
106 | 
107 | ## 2.004
108 | - 支持Python3
109 | 
110 | ## 2.003
111 | - 读取 .ehentai.cookie
112 | - 交互模式不保存任务
113 | - 添加 `--rename-ori` 参数和配置
114 | - 如果用户配置有问题，从内置配置读取默认值
115 | - 其他更新
116 | 
117 | ## 2.002
118 | - 支持`glype`代理类型
119 | 
120 | ## 2.001
121 | - 初始发布
122 | 


--------------------------------------------------------------------------------
/README.chs.md:
--------------------------------------------------------------------------------
  1 | ﻿# 绅♂士♂站♂小♂爬♂虫
  2 | 
  3 | [![Build Status](https://travis-ci.org/fffonion/xeHentai.svg?branch=master)](https://travis-ci.org/fffonion/xeHentai)
  4 | 
  5 | [English](README.md) [繁體中文](README.cht.md)
  6 | 
  7 | [xeHentai Web界面](https://github.com/fffonion/xeHentai-webui)
  8 | 
  9 | ## 快速入门
 10 | 
 11 | windows用户可以下载可执行文件 [这里](https://github.com/fffonion/xeHentai/releases) [或这里](http://dl.yooooo.us/share/xeHentai/)
 12 | 
 13 | 或者可以运行源码
 14 | 
 15 | ```shell
 16 | pip install -U requests[socks]
 17 | git clone https://github.com/fffonion/xeHentai.git
 18 | cd xeHentai
 19 | python ./setup.py install
 20 | xeH
 21 | ```
 22 | 
 23 | 新版本默认为命令行模式，如果需要使用交互模式，请运行`xeH.py -i`
 24 | 
 25 | ## 详细说明
 26 | 
 27 | ### 配置文件
 28 | 
 29 | 使用源码运行的用户请先将`xeHentai/config.py`复制到当前目录。
 30 | 
 31 | 配置的优先级为 交互模式 > 命令行参数 > 用户config.py > 内置config.py。
 32 | 
 33 | 常用参数: 
 34 | 
 35 |  - **daemon** 后台模式，仅支持posix兼容的系统，参见[运行模式](#运行模式)，默认为否
 36 |  - **dir** 下载目录，默认为当前目录
 37 |  - **download_ori** 是否下载原图，默认为否
 38 |  - **jpn_title** 是否使用日语标题，如果关闭则使用英文或罗马字标题，默认为是
 39 |  - **rename_ori** 将图片重命名为原始名称，如果关闭则使用序号，默认为否
 40 |  - **make_archive** 是否下载完成后生成zip压缩包，并删除下载目录，默认为否
 41 | 
 42 | 高级参数: 
 43 | 
 44 |  - **proxy** 代理列表，参见[代理](#代理)。
 45 |  - **proxy_image** 是否同时使用代理来下载图片和扫描网页，默认为是
 46 |  - **proxy_image_only** 是否仅使用代理来下载图片，不用于扫描网页，默认为否
 47 |  - **rpc_interface** RPC绑定的IP，参见[JSON-RPC](#json-rpc)，默认为`localhost`
 48 |  - **rpc_port** RPC绑定的端口，默认为`None`
 49 |  - **rpc_secret** RPC密钥，默认为`None` (不开启RPC服务器)
 50 |  - **rpc_open_browser** RPC服务端启动后自动打开浏览器页面，默认为是
 51 |  - **delete_task_files** 是否删除任务时同时删除下载的文件，默认为否
 52 |  - **download_range** 设置下载的图片范围，参见[下载范围](#下载范围)
 53 |  - **scan_thread_cnt** 扫描线程数，默认为`1`
 54 |  - **download_thread_cnt** 下载线程数，默认为`5`
 55 |  - **download_timeout** 设置下载图片的超时，默认为`10`秒
 56 |  - **low_speed_threshold** 设置最低下载速度，低于此值将换源重新下载，单位为KB/s，默认为`10`
 57 |  - **ignored_errors** 设置忽略的错误码，默认为空，错误码可以从`const.py`中获得
 58 |  - **auto_update** 自动检查更新，`check` 仅检查更新，`download` 下载更新，`off` 关闭检查；默认为`download`
 59 |  - **update_beta_channel** 设置是否更新到测试版，默认为否
 60 |  - **log_path** 日志路径，默认为`eh.log`
 61 |  - **log_verbose** 日志等级，可选1-3，值越大输出越详细，默认为`2`
 62 |  - **save_tasks** 是否保存任务到`h.json`，可用于断点续传，默认为否
 63 |  
 64 | 
 65 | ### 命令行模式
 66 | ```
 67 | 用法: xeH [-u USERNAME] [-k KEY] [-c COOKIE] [-i] [--daemon] [-d DIR] [-o]
 68 |            [-j BOOL] [-r BOOL] [-p PROXY] [--proxy-image | --proxy-image-only]
 69 |            [--rpc-interface ADDR] [--rpc-port PORT] [--rpc-secret ...]
 70 |            [--rpc-open-browser BOOL] [--delete-task-files BOOL] [-a BOOL]
 71 |            [--download-range a-b,c-d,e] [-t N] [--timeout N]
 72 |            [--low-speed-threshold N] [-f] [--auto-update {check,download,off}]
 73 |            [--update-beta-channel BOOL] [-l /path/to/eh.log] [-v] [-h]
 74 |            [--version]
 75 |            [url [url ...]]
 76 | 
 77 | 绅♂士下载器
 78 | 
 79 | 必选参数:
 80 |   url                   下载页的网址
 81 | 
 82 | 可选参数:
 83 |   -u USERNAME, --username USERNAME
 84 |                         用户名
 85 |   -k KEY, --key KEY     密码
 86 |   -c COOKIE, --cookie COOKIE
 87 |                         Cookie字符串, 如果指定了用户名和密码, 此项会被忽略
 88 |   -i, --interactive     交互模式, 如果开启后台模式, 此项会被忽略 (默认: False)
 89 |   --daemon              后台模式 (默认: False)
 90 |   -d DIR, --dir DIR     设置下载目录 (默认: 当前目录)
 91 |   -o, --download-ori    是否下载原始图片（如果存在）, 需要登录 (默认: False)
 92 |   -j BOOL, --jpn-title BOOL
 93 |                         使用日语标题, 如果关闭则使用英文或罗马字标题 (默认: True)
 94 |   -r BOOL, --rename-ori BOOL
 95 |                         将图片重命名为原始名称, 如果关闭则使用序号 (默认: False)
 96 |   -p PROXY, --proxy PROXY
 97 |                         设置代理, 可以指定多次, 当前支持的类型: socks5/4a, http(s), glype.
 98 |                         代理默认只用于扫描网页 (默认: 空)
 99 |   --proxy-image         同时使用代理来下载图片和扫描网页（默认: True)
100 |   --proxy-image-only    仅使用代理来下载图片, 不用于扫描网页 (默认: False)
101 |   --rpc-interface ADDR  设置JSON-RPC监听IP (默认: localhost)
102 |   --rpc-port PORT       设置JSON-RPC监听端口 (默认: None)
103 |   --rpc-secret ...      设置JSON-RPC密钥 (默认: None)
104 |   --rpc-open-browser BOOL
105 |                         RPC服务端启动后自动打开浏览器页面 (默认: True)
106 |   --delete-task-files BOOL
107 |                         删除任务时同时删除下载的文件 (默认: False)
108 |   -a BOOL, --archive BOOL
109 |                         下载完成后生成zip压缩包并删除下载目录 (默认: False)
110 |   --download-range a-b,c-d,e
111 |                         设置下载的图片范围, 格式为 开始位置-结束位置, 或者单张图片的位置, 使用逗号来分隔多个范围, 例如
112 |                         5-10,15,20-25, 默认为下载所有
113 |   --low-speed-threshold N
114 |                         设置最低下载速度，低于此值将换源重新下载 (默认: 10 KB/s)
115 |   -t N, --thread N      下载线程数 (默认: 5)
116 |   --timeout N           设置下载图片的超时 (默认: 10秒)
117 |   -f, --force           忽略配额判断, 继续下载 (默认: False)
118 |   --auto-update {check,download,off}
119 |                         检查并自动下载更新 (默认: download)
120 |   --update-beta-channel BOOL
121 |                         是否更新到测试分支 (默认: True)
122 |   -l /path/to/eh.log, --logpath /path/to/eh.log
123 |                         保存日志的路径 (默认: eh.log)
124 |   -v, --verbose         设置日志装逼等级 (默认: 2)
125 |   -h, --help            显示本帮助信息
126 |   --version             显示版本信息
127 | 
128 | ```
129 | 
130 | 如果参数未指定, 则使用config.py中的默认值；否则将覆盖config.py设置的值。
131 | 
132 | ### JSON-RPC
133 | 
134 | 在指定`rpc_interface`和`rpc_port`后, xeHentai会启动RPC服务器。使用[JSON-RPC 2.0](http://www.jsonrpc.org/specification)标准。典型的请求如下：
135 | 
136 | ```
137 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":[[args],{kwargs}]}'
138 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"}
139 | ```
140 | 
141 | `rpc_secret`可用于提高安全性。如果`rpc_secret`设置为**hentai**, 则需在params中带上这个值：
142 | ```
143 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":["token:hentai",[args],{kwargs}]}'
144 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"}
145 | ```
146 | 
147 | 其中`method`为调用的方法，必须以**xeH.** 开头。在[core.py](xeHentai/core.py)的xeHentai类中，所有不以下划线`_`开头的方法均可以通过RPC调用，但需将方法名的下划线命名法改为驼峰命名法。如`add_task`需改为`addTask`。
148 | 
149 | 参数列表请参阅xeHentai类。
150 | 
151 | 如果浏览器安装了用户脚本插件，可以[下载xeHentaiHelper.user.js](http://dl.yooooo.us/userscripts/xeHentaiHelper.user.js)，将会在页面上添加`Add to xeHentai`链接，以支持将当前页面添加到xeHentai中。Chrome用户需要安装[Tampermonkey](https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo)，
152 | Firefox用户需要安装[Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/)，Opera和傲游用户需要安装暴力猴。
153 | 
154 | **由于绅士站启用了https，而rpc走的是http，所以chrome用户需要点击地址栏右侧盾牌，选择“加载不安全的脚本”**
155 | 
156 | ### 运行模式
157 | 
158 | 如果通过命令行或交互模式指定了下载url，xeHentai会在下载完成`h.json`中存储的任务（如果存在）及指定的url后退出。
159 | 
160 | 如果命令行没有指定url，xeHentai将会在完成存档`h.json`中的队列（如果存在）后继续等待。
161 | 
162 | 如果指定了后台模式（`-d`或设置`daemon`为`True`），xeHentai将会在保持后台运行。
163 | 
164 | ### 代理
165 | 
166 | 目前支持三种模式的代理: 
167 | 
168 |  - socks代理，如`socks5h://127.0.0.1:1080`；如果需要在客户端解析DNS，请使用`socks5://127.0.0.1:1080`。
169 |  - http(s)代理，如`http://127.0.0.1:8080`。
170 |  - glype代理，如`http://example.com/browse.php?u=a&b=4`。请根据实际情况修改`b`的名称。glype是目前使用最广的php在线代理，使用时请取消勾选“加密url(Encrypt URL)”、取消勾选“移除脚本 (Remove Scripts)”、勾选“允许cookies (Allow Cookies)”后随意打开一个网页，然后把网址粘贴进来
171 | 
172 | 可以指定多个代理，格式如`['socks5h://127.0.0.1:1080', 'http://127.0.0.1:8080']`。
173 | 
174 | 默认情况下代理会被用于扫描网页和下载图片。如果不需要使用代理下载图片，请在配置文件中设置`proxy_image`为**False**。
175 | 
176 | 如果使用代理仅用于突破封锁的目的，则此项可以设置为`False`；如果需要保证隐私，请将此项设置为`True`。使用glype代理的用户建议将此项设为`False`。
177 | 
178 | 如果仅需要使用代理下载图片，不需要扫描网页，请在配置文件中设置`proxy_image_only`为**True**，或者在运行时加上`--proxy-image-only`参数。如果在配置中的`proxy_image`和`proxy_image_only`均为**True**，则`proxy_image`将被忽略。
179 | 
180 | ### 下载范围
181 | 
182 | 下载范围的格式为使用`开始位置-结束位置`，例如`5-10`表示下载第5到第10张图片，包括第5和第10张；或者单个位置，例如`15`表示下载第15张图片。
183 | 
184 | 可以通过逗号来分割多个范围，例如`5-10,15`表示下载第5到第10张图片以及第15张图片。
185 | 
186 | 如果不输入下载范围，则默认下载所有图片。
187 | 
188 | 
189 | ## 其他说明
190 | 
191 | ### 配额
192 | 
193 | 直接从服务器及镜像途径下载的图片计入配额，从H@H下载的不计算；下载新发布的、冷门的漫画以及原图更有可能消耗配额，下载热门漫画基本不消耗配额
194 | 
195 | ## License
196 | 
197 | GPLv3
198 | ***
199 | ![@fffonion](http://img.t.sinajs.cn/t5/style/images/register/logo.png)[@fffonion](http://weibo.com/376463435)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;![Blog](https://s.w.org/about/images/logos/wordpress-logo-32-blue.png)&nbsp;&nbsp;[博客](https://yooooo.us)
200 | 


--------------------------------------------------------------------------------
/README.cht.md:
--------------------------------------------------------------------------------
  1 | ﻿# 紳♂士♂站♂小♂爬♂蟲
  2 | 
  3 | [![Build Status](https://travis-ci.org/fffonion/xeHentai.svg?branch=master)](https://travis-ci.org/fffonion/xeHentai)
  4 | 
  5 | [English](README.md) [繁體中文](README.cht.md)
  6 | 
  7 | [xeHentai Web界面](https://github.com/fffonion/xeHentai-webui)
  8 | 
  9 | ## 快速入門
 10 | 
 11 | windows用戶可以下載可執行文件 [這裡](https://github.com/fffonion/xeHentai/releases) [或這裡](http://dl.yooooo.us/share/xeHentai/)
 12 | 
 13 | 或者可以運行源碼
 14 | 
 15 | ```shell
 16 | pip install -U requests[socks]
 17 | git clone https://github.com/fffonion/xeHentai.git
 18 | cd xeHentai
 19 | python ./setup.py install
 20 | xeH
 21 | ```
 22 | 
 23 | 新版本默認為命令行模式，如果需要使用交互模式，請運行`xeH.py -i`
 24 | 
 25 | ## 詳細說明
 26 | 
 27 | ### 配置文件
 28 | 
 29 | 使用源碼運行的用戶請先將`xeHentai/config.py`複製到當前目錄。
 30 | 
 31 | 配置的優先級為 交互模式 > 命令行參數 > 用戶config.py > 內置config.py。
 32 | 
 33 | 常用參數: 
 34 | 
 35 |  - **daemon** 後台模式，僅支持posix兼容的系統，參見[運行模式](#運行模式)，默認為否
 36 |  - **dir** 下載目錄，默認為當前目錄
 37 |  - **download_ori** 是否下載原圖，默認為否
 38 |  - **jpn_title** 是否使用日語標題，如果關閉則使用英文或羅馬字標題，默認為是
 39 |  - **rename_ori** 將圖片重命名為原始名稱，如果關閉則使用序號，默認為否
 40 |  - **make_archive** 是否下載完成後生成zip壓縮包，並刪除下載目錄，默認為否
 41 | 
 42 | 高級參數: 
 43 | 
 44 |  - **proxy** 代理列表，參見[代理](#代理)。
 45 |  - **proxy_image** 是否同時使用代理來下載圖片和掃描網頁，默認為是
 46 |  - **proxy_image_only** 是否僅使用代理來下載圖片，不用於掃描網頁，默認為否
 47 |  - **rpc_interface** RPC綁定的IP，參見[JSON-RPC](#json-rpc)，默認為`localhost`
 48 |  - **rpc_port** RPC綁定的埠，默認為`None`
 49 |  - **rpc_secret** RPC密鑰，默認為`None` (不開啟RPC伺服器)
 50 |  - **rpc_open_browser** RPC服務端啟動後自動打開瀏覽器頁面，默認為是
 51 |  - **delete_task_files** 是否刪除任務時同時刪除下載的文件，默認為否
 52 |  - **download_range** 設置下載的圖片範圍，參見[下載範圍](#下載範圍)
 53 |  - **scan_thread_cnt** 掃描線程數，默認為`1`
 54 |  - **download_thread_cnt** 下載線程數，默認為`5`
 55 |  - **download_timeout** 設置下載圖片的超時，默認為`10`秒
 56 |  - **low_speed_threshold** 設置最低下載速度，低於此值將換源重新下載，單位為KB/s，默認為`10`
 57 |  - **ignored_errors** 設置忽略的錯誤碼，默認為空，錯誤碼可以從`const.py`中獲得
 58 |  - **log_path** 日誌路徑，默認為`eh.log`
 59 |  - **log_verbose** 日誌等級，可選1-3，值越大輸出越詳細，默認為`2`
 60 |  - **save_tasks** 是否保存任務到`h.json`，可用於斷點續傳，默認為否
 61 |  
 62 | 
 63 | ### 命令行模式
 64 | ```
 65 | 用法: xeH [-u USERNAME] [-k KEY] [-c COOKIE] [-i] [--daemon] [-d DIR] [-o]
 66 |            [-j BOOL] [-r BOOL] [-p PROXY] [--proxy-image | --proxy-image-only]
 67 |            [--rpc-interface ADDR] [--rpc-port PORT] [--rpc-secret ...]
 68 |            [--rpc-open-browser BOOL] [--delete-task-files BOOL] [-a BOOL]
 69 |            [--download-range a-b,c-d,e] [-t N] [--timeout N]
 70 |            [--low-speed-threshold N] [-f] [-l /path/to/eh.log] [-v] [-h]
 71 |            [--version]
 72 |            [url [url ...]]
 73 | 
 74 | 紳♂士下載器
 75 | 
 76 | 必選參數:
 77 |   url                   下載頁的網址
 78 | 
 79 | 可選參數:
 80 |   -u USERNAME, --username USERNAME
 81 |                         用戶名
 82 |   -k KEY, --key KEY     密碼
 83 |   -c COOKIE, --cookie COOKIE
 84 |                         Cookie字符串, 如果指定了用戶名和密碼, 此項會被忽略
 85 |   -i, --interactive     交互模式, 如果開啟後台模式, 此項會被忽略 (默認: False)
 86 |   --daemon              後台模式 (默認: False)
 87 |   -d DIR, --dir DIR     設置下載目錄 (默認: 當前目錄)
 88 |   -o, --download-ori    是否下載原始圖片（如果存在）, 需要登錄 (默認: False)
 89 |   -j BOOL, --jpn-title BOOL
 90 |                         使用日語標題, 如果關閉則使用英文或羅馬字標題 (默認: True)
 91 |   -r BOOL, --rename-ori BOOL
 92 |                         將圖片重命名為原始名稱, 如果關閉則使用序號 (默認: False)
 93 |   -p PROXY, --proxy PROXY
 94 |                         設置代理, 可以指定多次, 當前支持的類型: socks5/4a, http(s), glype.
 95 |                         代理默認只用於掃描網頁 (默認: 空)
 96 |   --proxy-image         同時使用代理來下載圖片和掃描網頁（默認: True)
 97 |   --proxy-image-only    僅使用代理來下載圖片, 不用於掃描網頁 (默認: False)
 98 |   --rpc-interface ADDR  設置JSON-RPC監聽IP (默認: localhost)
 99 |   --rpc-port PORT       設置JSON-RPC監聽埠 (默認: None)
100 |   --rpc-secret ...      設置JSON-RPC密鑰 (默認: None)
101 |   --rpc-open-browser BOOL
102 |                         RPC服務端啟動後自動打開瀏覽器頁面 (默認: True)
103 |   --delete-task-files BOOL
104 |                         刪除任務時同時刪除下載的文件 (默認: False)
105 |   -a BOOL, --archive BOOL
106 |                         下載完成後生成zip壓縮包並刪除下載目錄 (默認: False)
107 |   --download-range a-b,c-d,e
108 |                         設置下載的圖片範圍, 格式為 開始位置-結束位置, 或者單張圖片的位置, 使用逗號來分隔多個範圍, 例如
109 |                         5-10,15,20-25, 默認為下載所有
110 |   --low-speed-threshold N
111 |                         設置最低下載速度，低於此值將換源重新下載 (默認: 10 KB/s)
112 |   -t N, --thread N      下載線程數 (默認: 5)
113 |   --timeout N           設置下載圖片的超時 (默認: 10秒)
114 |   -f, --force           忽略配額判斷, 繼續下載 (默認: False)
115 |   -l /path/to/eh.log, --logpath /path/to/eh.log
116 |                         保存日誌的路徑 (默認: eh.log)
117 |   -v, --verbose         設置日誌裝逼等級 (默認: 2)
118 |   -h, --help            顯示本幫助信息
119 |   --version             顯示版本信息
120 | 
121 | ```
122 | 
123 | 如果參數未指定, 則使用config.py中的默認值；否則將覆蓋config.py設置的值。
124 | 
125 | ### JSON-RPC
126 | 
127 | 在指定`rpc_interface`和`rpc_port`後, xeHentai會啟動RPC伺服器。使用[JSON-RPC 2.0](http://www.jsonrpc.org/specification)標准。典型的請求如下：
128 | 
129 | ```
130 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":[[args],{kwargs}]}'
131 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"}
132 | ```
133 | 
134 | `rpc_secret`可用於提高安全性。如果`rpc_secret`設置為**hentai**, 則需在params中帶上這個值：
135 | ```
136 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":["token:hentai",[args],{kwargs}]}'
137 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"}
138 | ```
139 | 
140 | 其中`method`為調用的方法，必須以**xeH.** 開頭。在[core.py](xeHentai/core.py)的xeHentai類中，所有不以下劃線`_`開頭的方法均可以通過RPC調用，但需將方法名的下劃線命名法改為駝峰命名法。如`add_task`需改為`addTask`。
141 | 
142 | 參數列表請參閱xeHentai類。
143 | 
144 | 如果瀏覽器安裝了用戶腳本插件，可以[下載xeHentaiHelper.user.js](http://dl.yooooo.us/userscripts/xeHentaiHelper.user.js)，將會在頁面上添加`Add to xeHentai`鏈接，以支持將當前頁面添加到xeHentai中。Chrome用戶需要安裝[Tampermonkey](https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo)，
145 | Firefox用戶需要安裝[Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/)，Opera和傲遊用戶需要安裝暴力猴。
146 | 
147 | **由於紳士站啟用了https，而rpc走的是http，所以chrome用戶需要點擊地址欄右側盾牌，選擇「加載不安全的腳本」**
148 | 
149 | ### 運行模式
150 | 
151 | 如果通過命令行或交互模式指定了下載url，xeHentai會在下載完成`h.json`中存儲的任務（如果存在）及指定的url後退出。
152 | 
153 | 如果命令行沒有指定url，xeHentai將會在完成存檔`h.json`中的隊列（如果存在）後繼續等待。
154 | 
155 | 如果指定了後台模式（`-d`或設置`daemon`為`True`），xeHentai將會在保持後台運行。
156 | 
157 | ### 代理
158 | 
159 | 目前支持三種模式的代理: 
160 | 
161 |  - socks代理，如`socks5h://127.0.0.1:1080`；如果需要在客戶端解析DNS，請使用`socks5://127.0.0.1:1080`。
162 |  - http(s)代理，如`http://127.0.0.1:8080`。
163 |  - glype代理，如`http://example.com/browse.php?u=a&b=4`。請根據實際情況修改`b`的名稱。glype是目前使用最廣的php在線代理，使用時請取消勾選「加密url(Encrypt URL)」、取消勾選「移除腳本 (Remove Scripts)」、勾選「允許cookies (Allow Cookies)」後隨意打開一個網頁，然後把網址粘貼進來
164 | 
165 | 可以指定多個代理，格式如`['socks5h://127.0.0.1:1080', 'http://127.0.0.1:8080']`。
166 | 
167 | 默認情況下代理會被用於掃描網頁和下載圖片。如果不需要使用代理下載圖片，請在配置文件中設置`proxy_image`為**False**。
168 | 
169 | 如果使用代理僅用於突破封鎖的目的，則此項可以設置為`False`；如果需要保證隱私，請將此項設置為`True`。使用glype代理的用戶建議將此項設為`False`。
170 | 
171 | 如果僅需要使用代理下載圖片，不需要掃描網頁，請在配置文件中設置`proxy_image_only`為**True**，或者在運行時加上`--proxy-image-only`參數。如果在配置中的`proxy_image`和`proxy_image_only`均為**True**，則`proxy_image`將被忽略。
172 | 
173 | ### 下載範圍
174 | 
175 | 下載範圍的格式為使用`開始位置-結束位置`，例如`5-10`表示下載第5到第10張圖片，包括第5和第10張；或者單個位置，例如`15`表示下載第15張圖片。
176 | 
177 | 可以通過逗號來分割多個範圍，例如`5-10,15`表示下載第5到第10張圖片以及第15張圖片。
178 | 
179 | 如果不輸入下載範圍，則默認下載所有圖片。
180 | 
181 | 
182 | ## 其他說明
183 | 
184 | ### 配額
185 | 
186 | 直接從伺服器及鏡像途徑下載的圖片計入配額，從H@H下載的不計算；下載新發布的、冷門的漫畫以及原圖更有可能消耗配額，下載熱門漫畫基本不消耗配額
187 | 
188 | ## License
189 | 
190 | GPLv3
191 | ***
192 | ![@fffonion](http://img.t.sinajs.cn/t5/style/images/register/logo.png)[@fffonion](http://weibo.com/376463435)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;![Blog](https://s.w.org/about/images/logos/wordpress-logo-32-blue.png)&nbsp;&nbsp;[博客](https://yooooo.us)
193 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ﻿# E-Hentai Dōjinshi Downloader
  2 | 
  3 | [![Build Status](https://travis-ci.org/fffonion/xeHentai.svg?branch=master)](https://travis-ci.org/fffonion/xeHentai)
  4 | 
  5 | [简体中文](README.chs.md) [繁體中文](README.cht.md)
  6 | 
  7 | [xeHentai WebUI](https://github.com/fffonion/xeHentai-webui)
  8 | 
  9 | ## TL;DR
 10 | 
 11 | Windows users can download packed binaries from [here](https://github.com/fffonion/xeHentai/releases) or [here](http://dl.yooooo.us/share/xeHentai/). The package is built using [PyInstaller](http://www.pyinstaller.org/).
 12 | 
 13 | Or run directly from source code:
 14 | 
 15 | ```shell
 16 | pip install -U requests[socks]
 17 | git clone https://github.com/fffonion/xeHentai.git
 18 | cd xeHentai
 19 | python ./setup.py install
 20 | xeH
 21 | ```
 22 | 
 23 | The program is running in non-interactive mode by default. To run interactively, use `xeH.py -i`.
 24 | 
 25 | ## For pros
 26 | 
 27 | ### Configuration file
 28 | 
 29 | If you are running from source code, please copy `xeHentai/config.py` to your current directory first. Use that file as your config file.
 30 | 
 31 | The priority of configuration is: Interactive inputs > Command line options > User config.py > Internal config.py.
 32 | 
 33 | Configuration keys：
 34 | 
 35 |  - **daemon** Set to run in default mode, can only use on posix-compatible systems. Refer to [Running Modes](#running-modes). Default to `False`.
 36 |  - **dir** Download directory. Default to current directory.
 37 |  - **download_ori** Set to download original images or not. Default to `False`.
 38 |  - **jpn_title** Set to select Japanese title or not. If set to `False`, English or Romaji title will be used. Default to `True`.
 39 |  - **rename_ori** Set to rename images to their orginal names. If set to `False`, image will be named in sequence numbers. Default to `False`.
 40 |  - **make_archive** Set to make a ZIP archive after download and delete downloaded directory. Default to `False`.
 41 | 
 42 |  - **proxy** Proxy list. Refer to [Proxies](#proxies).
 43 |  - **proxy_image** Set to use proxy both on downloading images and scanning webpages. Default to `True`.
 44 |  - **proxy_image_only** Set to use proxy only on downloading images. Default to `False`.
 45 |  - **rpc_interface** RPC server binding IP. Refer to [JSON-RPC](#json-rpc). Default to `localhost`.
 46 |  - **rpc_port** RPC server binding port. Default to `none` (not serving).
 47 |  - **rpc_secret** RPC secret key. Default to `None`.
 48 |  - **rpc_open_browser** automatically open browser after RPC server starts. Default to `True`.
 49 |  - **delete_task_files** Set to delete downloaded files when deleting a task. Default to `False`.
 50 |  - **download_range** Set image download range. Refer to [Download range](#download-range). Default to download all images.
 51 |  - **scan_thread_cnt** Thread count for scanning webpages. Default to `1`.
 52 |  - **download_thread_cnt** Thread count for downloading images. Default to `5`.
 53 |  - **download_timeout** Timeout of download images. Default to `10`s.
 54 |  - **low_speed_threshold** Retry download if speed is lower than specified value. Default to `10` KB/s.
 55 |  - **ignored_errors** Set the error codes to ignore and continue downloading. Default to *empty*. Error codes can be obtained from [const.py](xeHentai/const.py).
 56 |  - **auto_update** turn on auto update of program `check` for check only and `download` for download; `off` to turn off. Default to `download`.
 57 |  - **update_beta_channel** set to true to update to dev branch
 58 |  - **log_path** Set log file path. Default to `eh.log`.
 59 |  - **log_verbose** Set log level with integer from 1 to 3. Bigger value means more verbose output. Default to `2`.
 60 |  - **save_tasks** Set to save uncompleted tasks in `h.json`. Default to `False`.
 61 |  
 62 | 
 63 | ### Command line options
 64 | ```
 65 | Usage: xeh [-u USERNAME] [-k KEY] [-c COOKIE] [-i] [--daemon] [-d DIR] [-o]
 66 |            [-j BOOL] [-r BOOL] [-p PROXY] [--proxy-image | --proxy-image-only]
 67 |            [--rpc-interface ADDR] [--rpc-port PORT] [--rpc-secret ...]
 68 |            [--rpc-open-browser BOOL] [--delete-task-files BOOL] [-a BOOL]
 69 |            [--download-range a-b,c-d,e] [-t N] [--timeout N]
 70 |            [--low-speed-threshold N] [-f] [--auto-update {check,download,off}]
 71 |            [--update-beta-channel BOOL] [-l /path/to/eh.log] [-v] [-h]
 72 |            [--version]
 73 |            [url [url ...]]
 74 | 
 75 | xeHentai Downloader NG
 76 | 
 77 | positional arguments:
 78 |   url                   gallery url(s) to download
 79 | 
 80 | optional arguments:
 81 |   -u USERNAME, --username USERNAME
 82 |                         username
 83 |   -k KEY, --key KEY     password
 84 |   -c COOKIE, --cookie COOKIE
 85 |                         cookie string, will be overriden if given -u and -k
 86 |   -i, --interactive     interactive mode, will be ignored in daemon mode
 87 |                         (default: False)
 88 |   --daemon              daemon mode, can't use with -i (default: False)
 89 |   -d DIR, --dir DIR     set download directory (current:
 90 |                         /Users/fffonion/Dev/Python/xeHentai)
 91 |   -o, --download-ori    download original images, needs to login (current:
 92 |                         True)
 93 |   -j BOOL, --jpn-title BOOL
 94 |                         use Japanese title, use English/Romaji title if turned
 95 |                         off (default: True)
 96 |   -r BOOL, --rename-ori BOOL
 97 |                         rename gallery image to original name, use sequence
 98 |                         name if turned off (default: False)
 99 |   -p PROXY, --proxy PROXY
100 |                         set download proxies, can be used multiple times,
101 |                         currenlty supported: socks5/4a, http(s), glype.
102 |                         Proxies are only used on webpages by default (current:
103 |                         ['socks5h://127.0.0.1:16963'])
104 |   --proxy-image         use proxies on images and webpages (default: True)
105 |   --proxy-image-only    only use proxies on images, not webpages (current:
106 |                         False)
107 |   --rpc-interface ADDR  bind jsonrpc server to this address (current:
108 |                         localhost)
109 |   --rpc-port PORT       bind jsonrpc server to this port (default: 8010)
110 |   --rpc-secret ...      jsonrpc secret string (default: None)
111 |   --rpc-open-browser BOOL
112 |                         automatically open browser after RPC server starts
113 |                         (default: True)
114 |   --delete-task-files BOOL
115 |                         delete downloaded files when deleting a task (default:
116 |                         True)
117 |   -a BOOL, --archive BOOL
118 |                         make an archive (.zip) after download and delete
119 |                         directory (default: False)
120 |   --download-range a-b,c-d,e
121 |                         specify ranges of images to be downloaded, in format
122 |                         start-end, or single index, use comma to concat
123 |                         multiple ranges, e.g.: 5-10,15,20-25, default to
124 |                         download all images
125 |   -t N, --thread N      download threads count (default: 5)
126 |   --timeout N           set image download timeout (default: 10s)
127 |   --low-speed-threshold N
128 |                         retry download if speed is lower than specified value
129 |                         (default: 10 KB/s)
130 |   -f, --force           download regardless of quota exceeded warning
131 |                         (default: False)
132 |   --auto-update {check,download,off}
133 |                         check or download update automatically
134 |                         (default: download)
135 |   --update-beta-channel BOOL
136 |                         check update upon beta channel
137 |                         (default: True)
138 |   -l /path/to/eh.log, --logpath /path/to/eh.log
139 |                         define log path (default: eh.log)
140 |   -v, --verbose         show more detailed log (default: 3)
141 |   -h, --help            show this help message and exit
142 |   --version             show program's version number and exit
143 | 
144 | ```
145 | 
146 | If options are not defined, values from `config.py` will be used.
147 | 
148 | ### JSON-RPC
149 | 
150 | If `rpc_interface` and `rpc_port` are set, xeHentai will start a RPC server. The request and response follows the [JSON-RPC 2.0](http://www.jsonrpc.org/specification) standard.
151 | 
152 | ```
153 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":[[args],{kwargs}]}'
154 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"}
155 | ```
156 | 
157 | `rpc_secret` is a secret key to your RPC server. If it's set, client should include this value in the request. For example when `rpc_secret` is set to **hentai**: 
158 | ```
159 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":["token:hentai",[args],{kwargs}]}'
160 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"}
161 | ```
162 | 
163 | The method filed should start with **xeH.** and should be a public class method of **xeHentai** from [core.py](xeHentai/core.py). And change the name from *lower_case_with_underscores* notation to *lowerCamelCase* notation. For example, `add_task` becomes `addTask`.
164 | 
165 | Refer to **xeHentai** class from [core.py](xeHentai/core.py) for parameters list.
166 | 
167 | If your browser has a Userscript plugin, you can use [xeHentaiHelper.user.js](http://dl.yooooo.us/userscripts/xeHentaiHelper.user.js) to create tasks directly on e-hentai website. Chrome user will need to install [Tampermonkey](https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo), for firefox [Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/), and ViolentMonkey for Opera and Maxthon users.
168 | 
169 | **Because e-hentai has enabled https, Chrome user will needs to click on the shield icon in the far right of the address bar and click "Load anyway" or "Load unsafe scripts"**
170 | 
171 | ### Running modes
172 | 
173 | If xeHentai is ran from command line interface or interative mode, the program will exit after it finishes the tasks in `h.json` (if exists) and given URL.
174 | 
175 | If there's no URL given from command line, the program will exit after it finishes the tasks in `h.json`(if exists).
176 | 
177 | If program is running on daemon mode (`-d` is set or `daemon` is set to `True`), the program will keep running in background.
178 | 
179 | ### Proxies
180 | 
181 | xeHentai supports three types of proxies:
182 | 
183 |  - socks proxy: `socks5h://127.0.0.1:1080`. If you want to resolve DNS on client side, use `socks5://127.0.0.1:1080`.
184 |  - http(s) proxy: `http://127.0.0.1:8080`.
185 |  - glype proxy: `http://example.com/browse.php?u=a&b=4`. Please set value of `b` accordingly. glype is a widely used PHP proxy script. When using, uncheck **Encrypt URL**, **Remove Scripts** and check **Allow Cookies** and open a random URL. The paste the address into configuration.
186 | 
187 | Multiple proxies can be specified at the same time. The format can be like : `['socks5h://127.0.0.1:1080', 'http://127.0.0.1:8080']`. 
188 | 
189 | By default proxies are used to download images and scan webpages. If you don't want to use proxy on downloading images, set `proxy_image` to `False`.
190 | 
191 | glype users are encouraged to set `proxy_image` to `False`。
192 | 
193 | If you only want to use proxy to download image, set `proxy_image_only` to **True** in `config.py` or use the `--proxy-image-only` CLI option. If both `proxy_image` and `proxy_image_only` are set to **True**, `proxy_image` will be ignored.
194 | 
195 | ### Download range
196 | 
197 | Download ranges are set in format `start_positoin-end_positoin`. For example, `5-10` means number download first 5 to 10 images, including 5 and 10. Or use `15` to download number 15 only.
198 | 
199 | Multiple ranges can be seperated with comma. For example,`5-10,15`.
200 | 
201 | If no range is given, xeHentai will download all images.
202 | 
203 | 
204 | ## Misc
205 | 
206 | ### Image limit
207 | 
208 | Downloading images will be count towards image limit. This is calculated regarding the popularity of gallery, the server load and/or Hentai@Home bandwidth by e-hentai server.
209 | 
210 | ## License
211 | 
212 | GPLv3
213 | ***
214 | ![Blog](https://s.w.org/about/images/logos/wordpress-logo-32-blue.png)&nbsp;&nbsp;[Blog](https://yooooo.us)
215 | 


--------------------------------------------------------------------------------
/icon3.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fffonion/xeHentai/43fc55c0c662f195d048becf6276a26ce06b09d9/icon3.ico


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests[socks]
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | PROJ_NAME = 'xeHentai'
 4 | PACKAGE_NAME = 'xeHentai'
 5 | 
 6 | PROJ_METADATA = '%s.json' % PROJ_NAME
 7 | 
 8 | import os, json, imp
 9 | here = os.path.abspath(os.path.dirname(__file__))
10 | 
11 | try:
12 |     README = open(os.path.join(here, 'README.md')).read()
13 | except:
14 |     README = ""
15 | try:
16 |     CHANGELOG = open(os.path.join(here, 'CHANGELOG.md')).read()
17 | except:
18 |     CHANGELOG = ""
19 | VERSION = imp.load_source('version', os.path.join(here, '%s/const.py' % PACKAGE_NAME)).__version__
20 | 
21 | packages = [
22 |     'xeHentai',
23 |     'xeHentai.util',
24 |     'xeHentai.i18n',
25 |     'xeHentai.updater',
26 | ]
27 | requires = ['requests']
28 | 
29 | from setuptools import setup
30 | 
31 | setup(
32 |     name=PACKAGE_NAME,
33 |     version=VERSION,
34 |     description='xeHentai Downloader',
35 |     long_description=README + '\n\n' + CHANGELOG,
36 |     author='fffonion',
37 |     author_email='fffonion@gmail.com',
38 |     url='https://yooooo.us/2013/xehentai',
39 |     packages=packages,
40 |     package_dir={'requests': 'requests'},
41 |     include_package_data=True,
42 |     install_requires=requires,
43 |     license='GPLv3',
44 |     zip_safe=False,
45 |     classifiers=(
46 |         'Development Status :: 4 - Beta',
47 |         'Intended Audience :: End Users/Desktop',
48 |         'Natural Language :: English',
49 |         'OSI Approved :: GNU General Public License v3 (GPLv3)',
50 |         'Programming Language :: Python',
51 |         'Programming Language :: Python :: 2.6',
52 |         'Programming Language :: Python :: 2.7',
53 |         'Programming Language :: Python :: 3',
54 |         'Programming Language :: Python :: 3.3',
55 |         'Programming Language :: Python :: 3.4',
56 |         'Programming Language :: Python :: 3.5',
57 |         'Programming Language :: Python :: Implementation :: CPython',
58 |         'Programming Language :: Python :: Implementation :: PyPy'
59 |     ),
60 |     requires=requires,
61 |     entry_points = {'console_scripts': ["xeH = xeHentai.cli:start"]},
62 | )
63 | 


--------------------------------------------------------------------------------
/util/make_release_config.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | 
 3 | import os
 4 | import sys
 5 | import re
 6 | FILEPATH = os.path.join(sys.path[0], "..")
 7 | sys.path.insert(0, os.path.join(FILEPATH, "xeHentai"))
 8 | import config
 9 | 
10 | target = os.path.join(FILEPATH, "release")
11 | if not os.path.exists(target):
12 |     os.mkdir(target)
13 | 
14 | target = os.path.join(target, "config.py")
15 | cli = open(os.path.join(FILEPATH, "xeHentai", "cli.py"), "r", encoding="utf-8").read()
16 | zh_hans = open(os.path.join(FILEPATH, "xeHentai", "i18n", "zh_hans.py"), "r", encoding="utf-8").read()
17 | 
18 | f = open(target, "w", encoding="utf-8")
19 | f.write('''# coding:utf-8
20 | # --UTF8补丁-- #
21 | 
22 | ''')
23 | 
24 | other_mappings = {
25 |         "save_tasks": "是否保存任务到h.json，可用于断点续传",
26 |         "scan_thread_cnt": "扫描线程数",
27 | #        "download_range": "设置下载的图片范围, 格式为 开始位置-结束位置, 或者单张图片的位置\n" + \
28 | #                        "# 使用逗号来分隔多个范围, 例如 5-10,15,20-25, 默认为下载所有"
29 | }
30 | 
31 | for k in sorted(config.__dict__):
32 |     if k.startswith("__"):
33 |         continue
34 |     if k not in other_mappings:
35 |         if k == "download_range":
36 |             i18n = "XEH_OPT_download_range"
37 |         else:
38 |             i18n = re.findall(r"_def\[['\"]%s['\"]\].*?help\s*=\s*i18n.([^\)]+)\)" % k, cli, re.DOTALL)[0]
39 |         txt = re.findall(r"%s\s*=\s*['\"](.*?)\s*\(当前.+['\"]" % i18n, zh_hans, re.DOTALL)[0]
40 |         # multiline fix
41 |         txt = txt.replace('"', '').replace('\\\n', '\n# ')
42 |         txt = re.sub(r"\nXEH_.+", "", txt, re.DOTALL)
43 |     else:
44 |         txt = other_mappings[k]
45 |     f.write("# %s\n" % txt)
46 |     v = getattr(config, k)
47 |     if isinstance(v, str):
48 |         v = '"%s"' % v
49 |     f.write("%s = %s\n\n" % (k, v))
50 | 
51 | f.close()
52 | 


--------------------------------------------------------------------------------
/util/make_verinfo.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import os
 4 | import sys
 5 | sys.path.insert(0, os.path.join(sys.path[0], ".."))
 6 | from xeHentai import const
 7 | 
 8 | version = const.__version__
 9 | v = list(str(int(round(version * 1000))))
10 | print(version, v)
11 | tmpl='''# UTF-8
12 | #
13 | # For more details about fixed file info 'ffi' see:
14 | # http://msdn.microsoft.com/en-us/library/ms646997.aspx
15 | VSVersionInfo(
16 |   ffi=FixedFileInfo(
17 |     # filevers and prodvers should be always a tuple with four items: (1, 2, 3, 4)
18 |     # Set not needed items to zero 0.
19 |     filevers=(%s),
20 |     prodvers=(%s),
21 |     # Contains a bitmask that specifies the valid bits 'flags'r
22 |     mask=0x3f,
23 |     # Contains a bitmask that specifies the Boolean attributes of the file.
24 |     flags=0x0,
25 |     # The operating system for which this file was designed.
26 |     # 0x4 - NT and there is no need to change it.
27 |     OS=0x40004,
28 |     # The general type of file.
29 |     # 0x1 - the file is an application.
30 |     fileType=0x1,
31 |     # The function of the file.
32 |     # 0x0 - the function is not defined for this fileType
33 |     subtype=0x0,
34 |     # Creation date and time stamp.
35 |     date=(0, 0)
36 |     ),
37 |   kids=[
38 |     StringFileInfo(
39 |       [
40 |       StringTable(
41 |         u'080404B0',
42 |         [StringStruct(u'FileVersion', u'%s'),
43 |         StringStruct(u'ProductVersion', u'%s'),
44 |         StringStruct(u'OriginalFilename', u'xeHentai-%s.exe'),
45 |         StringStruct(u'InternalName', u'xeHentai'),
46 |         StringStruct(u'FileDescription', u'绅♂士漫画下载器'),
47 |         StringStruct(u'CompanyName', u'fffonion@gmail.com'),
48 |         StringStruct(u'LegalCopyright', u'GPLv3'),
49 |         StringStruct(u'ProductName', u'xeHentai')])
50 |       ]), 
51 |     VarFileInfo([VarStruct(u'Translation', [2052, 1200])])
52 |   ]
53 | )''' % (
54 | ", ".join(v), ", ".join(v), 
55 | ".".join(v), ".".join(v), version
56 | )
57 | 
58 | open("verinfo.txt", "w", encoding="utf-8").write(tmpl)
59 | 


--------------------------------------------------------------------------------
/xeH:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | import json
 6 | import zipfile
 7 | from threading import Thread
 8 | import xeHentai.const as const
 9 | 
10 | SRC_UPDATE_FILE = const.SRC_UPDATE_FILE
11 | if const.PY3K:
12 |     from importlib import reload
13 | 
14 | def load_update():
15 |     if os.path.exists(SRC_UPDATE_FILE):
16 |         try:
17 |             need_remove = False
18 |             update_id = ""
19 |             with zipfile.ZipFile(SRC_UPDATE_FILE, 'r') as z:
20 |                 try:
21 |                    r = json.loads(z.read("info.json"))
22 |                 except:
23 |                     need_remove = True
24 |                 else:
25 |                     if 'v' not in r and r['v'] != SRC_UPDATE_VERSION:
26 |                         # ignoring legacy file
27 |                         need_remove = True
28 |                     else:
29 |                         update_id = r["update_id"]
30 |             if need_remove:
31 |                 os.remove(SRC_UPDATE_FILE)
32 |                 return
33 |             v = const.__version__
34 |             sys.path.insert(0, SRC_UPDATE_FILE)
35 |             import xeHentai
36 |             reload(xeHentai)
37 |             xeHentai.const.VERSION_UPDATE = update_id
38 |             xeHentai.const.VERSION_UPDATE_LOADER = v
39 |         except:
40 |             if sys.path[0] == SRC_UPDATE_FILE:
41 |                 sys.path.pop(0)
42 |                 os.remove(SRC_UPDATE_FILE)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     load_update()
47 | 
48 |     from xeHentai import cli, i18n
49 |     cli.start()
50 | 


--------------------------------------------------------------------------------
/xeH.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | import json
 6 | import zipfile
 7 | from threading import Thread
 8 | import xeHentai.const as const
 9 | 
10 | SRC_UPDATE_FILE = const.SRC_UPDATE_FILE
11 | if const.PY3K:
12 |     from importlib import reload
13 | 
14 | def load_update():
15 |     if os.path.exists(SRC_UPDATE_FILE):
16 |         try:
17 |             need_remove = False
18 |             update_id = ""
19 |             with zipfile.ZipFile(SRC_UPDATE_FILE, 'r') as z:
20 |                 try:
21 |                    r = json.loads(z.read("info.json"))
22 |                 except:
23 |                     need_remove = True
24 |                 else:
25 |                     if 'v' not in r and r['v'] != SRC_UPDATE_VERSION:
26 |                         # ignoring legacy file
27 |                         need_remove = True
28 |                     else:
29 |                         update_id = r["update_id"]
30 |             if need_remove:
31 |                 os.remove(SRC_UPDATE_FILE)
32 |                 return
33 |             v = const.__version__
34 |             sys.path.insert(0, SRC_UPDATE_FILE)
35 |             import xeHentai
36 |             reload(xeHentai)
37 |             xeHentai.const.VERSION_UPDATE = update_id
38 |             xeHentai.const.VERSION_UPDATE_LOADER = v
39 |         except:
40 |             if sys.path[0] == SRC_UPDATE_FILE:
41 |                 sys.path.pop(0)
42 |                 os.remove(SRC_UPDATE_FILE)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     load_update()
47 | 
48 |     from xeHentai import cli, i18n
49 |     cli.start()
50 | 


--------------------------------------------------------------------------------
/xeHentai/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 


--------------------------------------------------------------------------------
/xeHentai/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | from __future__ import absolute_import
  7 | import os
  8 | import time
  9 | import argparse
 10 | import traceback
 11 | from threading import Thread
 12 | from .i18n import i18n
 13 | from .core import xeHentai
 14 | from .const import *
 15 | from .const import __version__
 16 | from .util import logger
 17 | 
 18 | from . import config as default_config
 19 | sys.path.insert(1, FILEPATH)
 20 | try:
 21 |     import config
 22 | except ImportError:
 23 |     config = default_config
 24 | sys.path.pop(1)
 25 | 
 26 | def start():
 27 |     opt = parse_opt()
 28 |     xeH = xeHentai()
 29 |     if opt.auto_update != "off":
 30 |         check_update(xeH.logger, {
 31 |             "auto_update": opt.auto_update,
 32 |             "update_beta_channel": opt.update_beta_channel,
 33 |         })
 34 |     if opt.daemon:
 35 |         if opt.interactive:
 36 |             xeH.logger.warning(i18n.XEH_OPT_IGNORING_I)
 37 |         if os.name == "posix":
 38 |             pid = os.fork()
 39 |             if pid == 0:
 40 |                 sys.stdin.close()
 41 |                 sys.stdout = open("/dev/null", "w")
 42 |                 sys.stderr = open("/dev/null", "w")
 43 |                 return main(xeH, opt)
 44 |         elif os.name == "nt":
 45 |             return xeH.logger.error(i18n.XEH_PLATFORM_NO_DAEMON % os.name)
 46 |         else:
 47 |             return xeH.logger.error(i18n.XEH_PLATFORM_NO_DAEMON % os.name)
 48 |         xeH.logger.info(i18n.XEH_DAEMON_START % pid)
 49 |     else:
 50 |         main(xeH, opt)
 51 | 
 52 | def check_update(l, cfg):
 53 |     from .updater.updater import check_update
 54 |     t = Thread(name="updater", target=check_update, args=(l, cfg))
 55 |     t.setDaemon(True)
 56 |     t.start()
 57 |     return t
 58 | 
 59 | def main(xeH, opt):
 60 |     xeH.update_config(**vars(opt))
 61 |     log = xeH.logger
 62 |     log.info(i18n.XEH_STARTED % xeH.verstr)
 63 |     if opt.cookie:
 64 |         xeH.set_cookie(opt.cookie)
 65 |     if opt.username and opt.key and not xeH.has_login:
 66 |         xeH.login_exhentai(opt.username, opt.key)
 67 |     if opt.interactive and not opt.daemon:
 68 |         try:
 69 |             r = interactive(xeH)
 70 |             opt.__dict__.update(r)
 71 |             xeH.update_config(**r)
 72 |         except (KeyboardInterrupt, SystemExit):
 73 |             log.info(i18n.XEH_CLEANUP)
 74 |             xeH._cleanup()
 75 |             return
 76 | 
 77 |     try:
 78 |         if opt.urls:
 79 |             for u in opt.urls:
 80 |                 xeH.add_task(u.strip())
 81 |             # Thread(target = lambda:(time.sleep(0.618), setattr(xeH, "_exit", XEH_STATE_SOFT_EXIT))).start()
 82 |         Thread(target = xeH._task_loop, name = "main" ).start()
 83 |         while xeH._exit < XEH_STATE_CLEAN:
 84 |             # if specify urls, finished this task and exit xeHentai
 85 |             if opt.urls and not [k for k, v in xeH._all_tasks.items() if TASK_STATE_WAITING <= v.state < TASK_STATE_FINISHED]:
 86 |                 xeH._exit = XEH_STATE_SOFT_EXIT
 87 |             time.sleep(1)
 88 |     except KeyboardInterrupt:
 89 |         log.info(i18n.XEH_CLEANUP)
 90 |         xeH._term_threads()
 91 |     except Exception as ex:
 92 |         log.error(i18n.XEH_CRITICAL_ERROR % traceback.format_exc())
 93 |         xeH._term_threads()
 94 |     else:
 95 |         sys.exit(0) # this is mandatory for single task auto exit
 96 |     try:
 97 |         # we should call cleanup ourself because we break out of task_loop
 98 |         xeH._cleanup()
 99 |     except KeyboardInterrupt:
100 |         pass
101 |     # this is mandatory for ctrl+c kill
102 |     os._exit(0)
103 | 
104 | ''' -ro --redirect-norm   是否应用在线代理到已解析到的非原图，默认不启用
105 |     -f  --force           即使超出配额也下载，默认为否
106 |     -j  --no-jp-name      是否不使用日语命名，默认为否'''
107 | 
108 | def _parse_range(s):
109 |     rg = []
110 |     s = s.replace("，", ",")
111 |     for r in s.split(','):
112 |         r = r.strip()
113 |         m = re.match(r'(\d+)(?:-(\d+))?$', r)
114 |         if not m:
115 |             raise argparse.ArgumentTypeError(logger.safestr(i18n.c(ERR_NOT_RANGE_FORMAT) % r))
116 |         start = int(m.group(1))
117 |         end = int(m.group(2) or start)
118 |         rg.append((start, end))
119 |     return sorted(rg)
120 | 
121 | class _AddToListAction(argparse.Action):
122 |     ''' This action add a value 'add_value' to the list 'dest' '''
123 |     def __init__(self, option_strings, dest, add_value=None, current=None, nargs=None, **kwargs):
124 |         super(_AddToListAction, self).__init__(option_strings, dest, default=None, nargs=0, **kwargs)
125 |         self.add_value = add_value
126 |         # to use in formatting output
127 |         self.current = current
128 |     
129 |     def __call__(self, parser, namespace, values, option_string=None):
130 |         if getattr(namespace, self.dest, None) is None:
131 |             setattr(namespace, self.dest, [])
132 |         items = list(getattr(namespace, self.dest))
133 |         items.append(self.add_value)
134 |         setattr(namespace, self.dest, items)
135 | 
136 | def parse_opt():
137 |     _def = {k:v for k,v in default_config.__dict__.items() if not k.startswith("_")}
138 |     _def.update({k:v for k,v in config.__dict__.items() if not k.startswith("_")})
139 |     if not PY3K:
140 |         for k in ('dir', 'log_path'):
141 |             _def[k] = _def[k].decode('utf-8')
142 |     parser = argparse.ArgumentParser(description = i18n.XEH_OPT_DESC, epilog = i18n.XEH_OPT_EPILOG, add_help = False)
143 |     # the followings are handled in cli
144 |     parser.add_argument('-u', '--username', help = i18n.XEH_OPT_u)
145 |     parser.add_argument('-k', '--key', help = i18n.XEH_OPT_k)
146 |     parser.add_argument('-c', '--cookie', help = i18n.XEH_OPT_c)
147 |     parser.add_argument('-i', '--interactive', action = 'store_true', default = False,
148 |                         help = i18n.XEH_OPT_i)
149 |     parser.add_argument('--daemon', action = 'store_true', default = _def['daemon'],
150 |                         help = i18n.XEH_OPT_daemon)
151 |     # the followings are passed to xeHentai
152 |     parser.add_argument('urls', metavar = 'url', type = str, nargs = '*',
153 |                         help = i18n.XEH_OPT_URLS)
154 |     parser.add_argument('-d', '--dir', default = os.path.abspath(_def['dir']),
155 |                         help = i18n.XEH_OPT_d)
156 |     parser.add_argument('-o', '--download-ori',
157 |                         action = 'store_true', default = _def['download_ori'],
158 |                         help = i18n.XEH_OPT_o)
159 |     parser.add_argument('-j', '--jpn-title', type = bool, metavar = "BOOL", default = _def['jpn_title'],
160 |                         dest = 'jpn_title', help = i18n.XEH_OPT_j)
161 |     parser.add_argument('-r', '--rename-ori', type = bool, metavar = "BOOL", default = _def['rename_ori'],
162 |                         help = i18n.XEH_OPT_r)
163 | 
164 |     parser.add_argument('-p', '--proxy', action = 'append', default = _def['proxy'],
165 |                         help = i18n.XEH_OPT_p)
166 |     group = parser.add_mutually_exclusive_group()
167 |     group.add_argument('--proxy-image', action = 'store_true', default = _def['proxy_image'],
168 |                         help = i18n.XEH_OPT_proxy_image)
169 |     group.add_argument('--proxy-image-only', action = 'store_true', default = _def['proxy_image_only'],
170 |                         help = i18n.XEH_OPT_proxy_image_only)
171 |     parser.add_argument('--rpc-interface', metavar = "ADDR", default = _def['rpc_interface'],
172 |                         help = i18n.XEH_OPT_rpc_interface)
173 |     parser.add_argument('--rpc-port', type = int, metavar = "PORT", default = _def['rpc_port'],
174 |                         help = i18n.XEH_OPT_rpc_port)
175 |     parser.add_argument('--rpc-secret', metavar = "...", default = _def['rpc_secret'],
176 |                         help = i18n.XEH_OPT_rpc_secret)
177 |     parser.add_argument('--rpc-open-browser', type = bool, metavar = "BOOL", default = _def['rpc_open_browser'],
178 |                         help = i18n.XEH_OPT_rpc_open_browser)
179 |     parser.add_argument('--delete-task-files', type = bool, metavar = "BOOL", default = _def['delete_task_files'],
180 |                         dest = 'delete_task_files', help = i18n.XEH_OPT_delete_task_files)
181 |     parser.add_argument('-a', '--archive', type = bool, metavar = "BOOL", default = _def['make_archive'],
182 |                         dest = 'make_archive', help = i18n.XEH_OPT_a)
183 |     parser.add_argument('--download-range', type = _parse_range, metavar = "a-b,c-d,e", default = None,
184 |                         dest = 'download_range', help = i18n.XEH_OPT_download_range)
185 |     parser.add_argument('-t', '--thread', type = int, metavar = 'N',
186 |                         default = _def['download_thread_cnt'], dest = 'download_thread_cnt',
187 |                         help = i18n.XEH_OPT_t)
188 |     parser.add_argument('--timeout', type = int, metavar = "N", default = _def['download_timeout'],
189 |                         dest = 'download_timeout', help = i18n.XEH_OPT_timeout)
190 |     parser.add_argument('--low-speed-threshold', type = int, metavar = "N", default = _def['low_speed_threshold'],
191 |                         dest = 'low_speed_threshold', help = i18n.XEH_OPT_low_speed)
192 |     parser.add_argument('-f', '--force', action = _AddToListAction,
193 |                         current = ERR_QUOTA_EXCEEDED in _def['ignored_errors'],
194 |                         add_value = ERR_QUOTA_EXCEEDED, dest='ignored_errors',
195 |                         help = i18n.XEH_OPT_f)
196 |     parser.add_argument('--auto-update', default = _def['auto_update'], choices = ("check", "download", "off"),
197 |                         dest = 'auto_update', help = i18n.XEH_OPT_auto_update)
198 |     parser.add_argument('--update-beta-channel', type = bool, metavar = "BOOL", default = _def['update_beta_channel'],
199 |                         dest = 'update_beta_channel', help = i18n.XEH_OPT_update_beta_channel)
200 | 
201 |     parser.add_argument('-l', '--logpath', metavar = '/path/to/eh.log',
202 |                         dest = 'log_path', default = os.path.abspath(_def['log_path']), help = i18n.XEH_OPT_l)
203 | 
204 |     parser.add_argument('-v', '--verbose', action = 'count', default = _def['log_verbose'],
205 |                         help = i18n.XEH_OPT_v)
206 |     parser.add_argument('-h','--help', action = 'help', help = i18n.XEH_OPT_h)
207 |     parser.add_argument('--version', action = 'version',
208 |                         version = '%s v%.3f%s' % (SCRIPT_NAME, __version__, '-dev' if DEVELOPMENT else ""),
209 |                         help = i18n.XEH_OPT_version)
210 |     args = parser.parse_args()
211 | 
212 |     return args
213 | 
214 | def interactive(xeH):
215 |     def _readline(x, default = ""):
216 |         if default:
217 |             x = x % default
218 |         _ = input(logger.safestr(x)) if PY3K else raw_input(logger.safestr(x))
219 |         _ = _ or default
220 |         return _ if PY3K else _.decode(locale.getdefaultlocale()[1] or 'utf-8')
221 | 
222 |     if not xeH.has_login and _readline(i18n.PS_LOGIN) == 'y':
223 |         uname = pwd = ""
224 |         while not uname:
225 |             uname = _readline(i18n.PS_USERNAME)
226 |         while not pwd:
227 |             pwd = _readline(i18n.PS_PASSWD)
228 |         xeH.login_exhentai(uname, pwd)
229 |     url = proxy = download_range = ""
230 |     while not url:
231 |         url = _readline(i18n.PS_URL)
232 |     url = url.split(",")
233 |     download_ori = _readline(i18n.PS_DOWNLOAD_ORI, 'y' if xeH.cfg['download_ori'] else 'n') == 'y'
234 |     proxy = _readline(i18n.PS_PROXY).strip()
235 |     proxy = [proxy] if proxy else xeH.cfg['proxy']
236 |     __def_dir = os.path.abspath(xeH.cfg['dir'])
237 |     # if not PY3K:
238 |     #    __def_dir = __def_dir.decode(sys.getfilesystemencoding())
239 |     _dir = _readline(i18n.PS_DOWNLOAD_DIR % __def_dir) or xeH.cfg['dir']
240 |     rename_ori = _readline(i18n.PS_RENAME_ORI, 'y' if xeH.cfg['rename_ori'] else 'n') == 'y'
241 |     make_archive = _readline(i18n.PS_MAKE_ARCHIVE, 'y' if xeH.cfg['make_archive'] else 'n') == 'y'
242 |     jpn_title = _readline(i18n.PS_JPN_TITLE, 'y' if xeH.cfg['jpn_title'] else 'n') == 'y'
243 |     while not download_range:
244 |         _ = _readline(i18n.PS_DOWNLOAD_RANGE)
245 |         if not _:
246 |             download_range = []
247 |             break
248 |         try:
249 |             download_range = _parse_range(logger.safestr(_))
250 |         except argparse.ArgumentTypeError as ex:
251 |             print(ex)
252 |         else:
253 |             break
254 |     return {'urls': url, 'proxy': proxy, 'download_ori': download_ori, 'dir': _dir, 'rename_ori':rename_ori,
255 |             'make_archive': make_archive, 'jpn_title': jpn_title, 'save_tasks': False,
256 |             'download_range': download_range}
257 | 


--------------------------------------------------------------------------------
/xeHentai/config.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | # DO NOT EDIT THIS FILE
 3 | # make a copy to your working directory
 4 | # and edit that file
 5 | 
 6 | # Daemon mode
 7 | daemon = False
 8 | 
 9 | # set download directory
10 | dir = "."
11 | # download original images, needs to login
12 | download_ori = False
13 | # Set if use Japanese title if available
14 | jpn_title = True
15 | # rename gallery image to original name, use sequence name if turned off
16 | rename_ori = False
17 | 
18 | # set download proxies
19 | # currenlty supported: socks5/4a, http(s), glype
20 | # by default, proxy is only used on webpages
21 | proxy = []
22 | # also use proxy to download images
23 | proxy_image = True
24 | # only use proxy on images, not webpages
25 | # if set to True, the value of proxy_image will be ignored
26 | proxy_image_only = False
27 | 
28 | # bind jsonrpc server to this address
29 | rpc_interface = 'localhost'
30 | # bind jsonrpc server to this port
31 | rpc_port = None
32 | # jsonrpc secret string
33 | rpc_secret = None
34 | # auto open browser on rpc start
35 | rpc_open_browser = True
36 | 
37 | # make an archive (.zip) after download and delete directory
38 | make_archive = False
39 | # specify ranges of images to be downloaded, in format
40 | # start-end, or single index, use comma to concat
41 | # multiple ranges, e.g.: 5-10,15,20-25, default to
42 | # download all images
43 | download_range = None
44 | # scan threads count
45 | scan_thread_cnt = 1
46 | # download threads count
47 | download_thread_cnt = 5
48 | # set image download timeout
49 | download_timeout = 10
50 | 
51 | # ignore these error codes, continue download
52 | # to use predefined error codes, use:
53 | # import const as __c
54 | # ignored_errors = [__c.ERR_QUOTA_EXCEEDED]
55 | ignored_errors = []
56 | 
57 | # define log path
58 | log_path = "eh.log"
59 | # set log level
60 | log_verbose = 2
61 | 
62 | # save tasks to h.json
63 | save_tasks = False
64 | 
65 | # delete files when deleting a task
66 | delete_task_files = False
67 | 
68 | # retry a connection if per thread speed is lower than this value, unit is KB per second
69 | low_speed_threshold = 10
70 | 
71 | # turn on auto update of program "check" for check only and "download" for download
72 | auto_update = "download"
73 | # set to true to update to dev branch
74 | update_beta_channel = False


--------------------------------------------------------------------------------
/xeHentai/const.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # constants module
  4 | # Contributor:
  5 | #      fffonion        <fffonion@gmail.com>
  6 | 
  7 | import os
  8 | import re
  9 | import sys
 10 | import locale
 11 | 
 12 | PY3K = sys.version_info.major == 3
 13 | IRONPYTHON = sys.platform == 'cli'
 14 | EXEBUNDLE = getattr(sys, 'frozen', False)
 15 | LOCALE = locale.getdefaultlocale()[0]
 16 | CODEPAGE = locale.getdefaultlocale()[1] or 'ascii'
 17 | ANDROID = 'ANDROID_ARGUMENT' in os.environ
 18 | 
 19 | __version__ = 2.023
 20 | VERSION_UPDATE = ""
 21 | VERSION_UPDATE_LOADER = __version__
 22 | DEVELOPMENT = False
 23 | 
 24 | SCRIPT_NAME = "xeHentai"
 25 | 
 26 | # https://github.com/soimort/you-get/you-get
 27 | if getattr(sys, 'frozen', False):
 28 |     # The application is frozen
 29 |     FILEPATH = os.path.dirname(os.path.realpath(sys.executable))
 30 | else:
 31 |     # The application is not frozen
 32 |     # Change this bit to match where you store your data files:
 33 |     FILEPATH = sys.path[0]
 34 |     # if update is being injected
 35 |     if FILEPATH.endswith(".zip"):
 36 |         FILEPATH = sys.path[1]
 37 | 
 38 | DUMMY_FILENAME = "-dummy-"
 39 | RENAME_TMPDIR = "-xeh-conflict-"
 40 | STATIC_CACHE_FILE = os.path.join(FILEPATH, "webui.gz")
 41 | # cache for 1 hour
 42 | STATIC_CACHE_TTL = 3600
 43 | STATIC_CACHE_VERSION = 1
 44 | 
 45 | SRC_UPDATE_FILE = os.path.join(FILEPATH, "src.zip")
 46 | SRC_UPDATE_VERSION = 1
 47 | 
 48 | RE_INDEX = re.compile('.+/(\d+)/([^\/]+)/*')
 49 | RE_GALLERY = re.compile('/([a-f0-9]{10})/[^\-]+\-(\d+)')
 50 | RE_IMGHASH = re.compile('/([a-f0-9]{40})-(\d+)-(\d+)-(\d+)-([a-z]{,4})')
 51 | RE_FULLIMG = re.compile('fullimg.php\?gid=([a-z0-9]+)&page=(\d+)&key=')
 52 | 
 53 | __restr_webpage = '^https*://([^\.]+\.)*(?:[g\.]*e-|ex)hentai.org'
 54 | RE_URL_WEBPAGE = re.compile(__restr_webpage)
 55 | RE_URL_IMAGE = re.compile('(?!%s)' % __restr_webpage)
 56 | # matches all
 57 | RE_URL_ALL = re.compile('.')
 58 | 
 59 | RE_LOCAL_ADDR = re.compile('(^localhost)|(^127\.)|(^192\.168\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^::1$)|(^[fF][cCdD])')
 60 | 
 61 | RESTR_SITE = "https*://(?:[g\.]*e\-|ex)hentai\.org"
 62 | 
 63 | FALLBACK_CF_IP = ("104.20.134.21", "104.20.135.21", "172.67.0.127")
 64 | FALLBACK_IP_MAP = {
 65 |     'e-hentai.org': FALLBACK_CF_IP,
 66 |     'forums.e-hentai.org': ("94.100.18.243", ) + FALLBACK_CF_IP,
 67 |     'exhentai.org': ("178.175.129.254", "178.175.128.252", "178.175.132.20", "178.175.129.252", "178.175.128.254", "178.175.132.22")
 68 | }
 69 | 
 70 | QUOTA_EXCEEDED_CONTENT_LENGTHS = (925, 28658, 144, 210, 1009)
 71 | 
 72 | DEFAULT_MAX_REDIRECTS = 30
 73 | 
 74 | XEH_STATE_RUNNING = 0
 75 | XEH_STATE_SOFT_EXIT = 1 # wait until current task finish and exit
 76 | XEH_STATE_FULL_EXIT = 2 # finish current task stage and exit
 77 | XEH_STATE_CLEAN = 3
 78 | 
 79 | TASK_STATE_PAUSED = 0
 80 | TASK_STATE_WAITING = 1
 81 | TASK_STATE_GET_META = 2
 82 | # TASK_STATE_GET_HATHDL = 3
 83 | TASK_STATE_SCAN_PAGE = 3
 84 | TASK_STATE_SCAN_IMG = 4
 85 | TASK_STATE_SCAN_ARCHIVE = 5
 86 | TASK_STATE_DOWNLOAD = 10
 87 | TASK_STATE_MAKE_ARCHIVE = 19
 88 | TASK_STATE_FINISHED = 20
 89 | TASK_STATE_FAILED = -1
 90 | 
 91 | ERR_NO_ERROR = 0
 92 | ERR_URL_NOT_RECOGNIZED = 1000
 93 | ERR_CANT_DOWNLOAD_EXH = 1001
 94 | ERR_ONLY_VISIBLE_EXH = 1002
 95 | ERR_MALFORMED_HATHDL = 1003
 96 | ERR_GALLERY_REMOVED = 1004
 97 | ERR_IMAGE_RESAMPLED = 1005
 98 | ERR_QUOTA_EXCEEDED = 1006
 99 | ERR_KEY_EXPIRED = 1007
100 | ERR_NO_PAGEURL_FOUND = 1008
101 | ERR_CONNECTION_ERROR = 1009
102 | ERR_IP_BANNED = 1010
103 | ERR_HATH_NOT_FOUND = 1011
104 | ERR_IMAGE_BROKEN = 1012
105 | ERR_SCAN_REGEX_FAILED = 1013
106 | ERR_STREAM_NOT_IMPLEMENTED = 1014
107 | ERR_TASK_NOT_FOUND = 1101
108 | ERR_SAVE_SESSION_FAILED = 1103
109 | ERR_TASK_LEVEL_UNDEF = 1104
110 | ERR_DELETE_RUNNING_TASK = 1105
111 | ERR_TASK_CANNOT_PAUSE = 1106
112 | ERR_TASK_CANNOT_RESUME = 1107
113 | # ERR_HATHDL_NOTFOUND = 1108
114 | ERR_RPC_UNAUTHORIZED = 1200
115 | ERR_CANNOT_CREATE_DIR = 1300
116 | ERR_CANNOT_MAKE_ARCHIVE = 1301
117 | ERR_NOT_RANGE_FORMAT = 1302
118 | ERR_RPC_PARSE_ERROR = -32700
119 | ERR_RPC_INVALID_REQUEST = -32600
120 | ERR_RPC_METHOD_NOT_FOUND = -32601
121 | ERR_RPC_INVALID_PARAMS = -32602
122 | ERR_RPC_EXEC_ERROR = -32603
123 | 
124 | 
125 | class DownloadAbortedException(Exception):
126 |     pass
127 | 


--------------------------------------------------------------------------------
/xeHentai/core.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | from __future__ import absolute_import
  7 | import os
  8 | import re
  9 | import sys
 10 | import math
 11 | import json
 12 | import time
 13 | import traceback
 14 | from .task import Task
 15 | from . import util
 16 | from . import proxy
 17 | from . import filters
 18 | from .rpc import RPCServer
 19 | from .i18n import i18n
 20 | from .util import logger
 21 | from .const import *
 22 | from .const import __version__
 23 | from .worker import *
 24 | if PY3K:
 25 |     from queue import Queue, Empty
 26 | else:
 27 |     from Queue import Queue, Empty
 28 | 
 29 | from . import config as default_config
 30 | sys.path.insert(1, FILEPATH)
 31 | try:
 32 |     import config
 33 | except ImportError:
 34 |     config = default_config
 35 | sys.path.pop(1)
 36 | 
 37 | class xeHentai(object):
 38 |     def __init__(self):
 39 |         self.verstr = "%.3f%s" % (__version__, '-dev' if DEVELOPMENT else "")
 40 |         if VERSION_UPDATE:
 41 |             self.verstr = "%s-%s(%s)" % (self.verstr, VERSION_UPDATE[:7], VERSION_UPDATE_LOADER)
 42 |         self.verstr = "%s-py%d%d" % (self.verstr, sys.version_info[0], sys.version_info[1])
 43 |         self.logger = logger.Logger()
 44 |         self._exit = False
 45 |         self.tasks = Queue() # for queueing, stores gid only
 46 |         self.last_task_guid = None
 47 |         self._all_tasks = {} # for saving states
 48 |         self._all_threads = [[] for i in range(20)]
 49 |         self.cfg = {k:v for k,v in default_config.__dict__.items() if not k.startswith("_")}
 50 |         # note that ignored_errors are overwritten using val from custom config
 51 |         self.cfg.update({k:v for k,v in config.__dict__.items() if not k.startswith("_")})
 52 |         self.proxy = None
 53 |         self.cookies = {"nw": "1"}
 54 |         self.headers = {
 55 |             'User-Agent': util.make_ua(),
 56 |             'Accept-Charset': 'utf-8;q=0.7,*;q=0.7',
 57 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 58 |             'Connection': 'keep-alive'
 59 |         }
 60 |         self.has_login = False
 61 |         self.load_session()
 62 |         self.rpc = None
 63 | 
 64 |     def update_config(self, **cfg_dict):
 65 |         self.cfg.update({k:v for k, v in cfg_dict.items() if k in cfg_dict and k not in ('ignored_errors',)})
 66 |         # merge ignored errors list
 67 |         if 'ignored_errors' in cfg_dict and cfg_dict['ignored_errors']:
 68 |             self.cfg['ignored_errors'] = list(set(self.cfg['ignored_errors'] + cfg_dict['ignored_errors']))
 69 |         self.logger.set_level(logger.Logger.WARNING - self.cfg['log_verbose'])
 70 |         self.logger.verbose("cfg %s" % self.cfg)
 71 |         if cfg_dict['proxy']:
 72 |             if not self.proxy: # else we keep it None
 73 |                 self.proxy = proxy.Pool()
 74 |             for p in self.cfg['proxy']:
 75 |                 try:
 76 |                     self.proxy.add_proxy(p)
 77 |                 except Exception as ex:
 78 |                     self.logger.warning(traceback.format_exc())
 79 |             self.logger.debug(i18n.PROXY_CANDIDATE_CNT % len(self.proxy.proxies))
 80 |         if cfg_dict['dir'] and not os.path.exists(cfg_dict['dir']):
 81 |             try:
 82 |                 os.makedirs(cfg_dict['dir'])
 83 |             except OSError as ex:  # Python >2.5
 84 |                 self.logger.error(i18n.ERR_CANNOT_CREATE_DIR % cfg_dict['dir'])
 85 |         if not self.rpc and self.cfg['rpc_port'] and self.cfg['rpc_interface']:
 86 |             self.rpc = RPCServer(self, (self.cfg['rpc_interface'], int(self.cfg['rpc_port'])),
 87 |                 secret = None if 'rpc_secret' not in self.cfg else self.cfg['rpc_secret'],
 88 |                 open_browser = False if 'rpc_open_browser' not in self.cfg else self.cfg['rpc_open_browser'],
 89 |                 logger = self.logger)
 90 |             if not RE_LOCAL_ADDR.match(self.cfg['rpc_interface']) and \
 91 |                 not self.cfg['rpc_secret']:
 92 |                 self.logger.warning(i18n.RPC_TOO_OPEN % self.cfg['rpc_interface'])
 93 |             self.rpc.start()
 94 |         self.logger.set_logfile(self.cfg['log_path'])
 95 |         return ERR_NO_ERROR, ""
 96 | 
 97 |     def _get_httpreq(self, proxy_policy):
 98 |         return HttpReq(self.headers, logger = self.logger, proxy = self.proxy, proxy_policy = proxy_policy)
 99 | 
100 |     def _get_httpworker(self, tid, task_q, flt, suc, fail, keep_alive, proxy_policy, timeout, stream_mode, lowspeed_threshold):
101 |         return HttpWorker(tid, task_q, flt, suc, fail,
102 |             headers = self.headers, proxy = self.proxy, logger = self.logger,
103 |             keep_alive = keep_alive, proxy_policy = proxy_policy, timeout = timeout, stream_mode = stream_mode,
104 |             lowspeed_threshold = lowspeed_threshold)
105 | 
106 |     def add_task(self, url, **cfg_dict):
107 |         url = url.strip()
108 |         cfg = {k:v for k, v in self.cfg.items() if k in (
109 |             "dir", "download_ori", "download_thread_cnt", "scan_thread_cnt",
110 |             "proxy_image", "proxy_image_only", "ignored_errors", "low_speed_threshold",
111 |             "rename_ori", "make_archive", "delete_task_files", "jpn_title", "download_range", "download_timeout")}
112 |         cfg.update(cfg_dict)
113 |         if cfg['download_ori'] and not self.has_login:
114 |             self.logger.warning(i18n.XEH_DOWNLOAD_ORI_NEED_LOGIN)
115 |         t = Task(url, cfg, self.logger)
116 |         if t.guid in self._all_tasks:
117 |             if self._all_tasks[t.guid].state in (TASK_STATE_FINISHED, TASK_STATE_FAILED):
118 |                 self.logger.debug(i18n.TASK_PUT_INTO_WAIT % t.guid)
119 |                 self._all_tasks[t.guid].state = TASK_STATE_WAITING
120 |                 self._all_tasks[t.guid].cleanup()
121 |             return 0, t.guid
122 |         self._all_tasks[t.guid] = t
123 |         if not re.match("^%s/[^/]+/\d+/[^/]+/*#*$" % RESTR_SITE, url):
124 |             t.set_fail(ERR_URL_NOT_RECOGNIZED)
125 |         elif not self.has_login and re.match("^https*://exhentai\.org", url):
126 |             t.set_fail(ERR_CANT_DOWNLOAD_EXH)
127 |         else:
128 |             self.tasks.put(t.guid)
129 |             return 0, t.guid
130 |         self.logger.error(i18n.TASK_ERROR % (t.guid, i18n.c(t.failcode)))
131 |         return t.failcode, None
132 | 
133 |     def del_task(self, guid):
134 |         if guid not in self._all_tasks:
135 |             return ERR_TASK_NOT_FOUND, None
136 |         if TASK_STATE_PAUSED< self._all_tasks[guid].state < TASK_STATE_FINISHED:
137 |             return ERR_DELETE_RUNNING_TASK, None
138 |         self._all_tasks[guid].cleanup(before_delete=True)
139 |         del self._all_tasks[guid]
140 |         return ERR_NO_ERROR, ""
141 | 
142 |     def pause_task(self, guid):
143 |         if guid not in self._all_tasks:
144 |             return ERR_TASK_NOT_FOUND, None
145 |         t = self._all_tasks[guid]
146 |         if t.state in (TASK_STATE_PAUSED, TASK_STATE_FINISHED, TASK_STATE_FAILED):
147 |             return ERR_TASK_CANNOT_PAUSE, None
148 |         if t._monitor:
149 |             t._monitor._exit = lambda x: True
150 |         t.state = TASK_STATE_PAUSED
151 |         return ERR_NO_ERROR, ""
152 | 
153 |     def resume_task(self, guid):
154 |         if guid not in self._all_tasks:
155 |             return ERR_TASK_NOT_FOUND, None
156 |         t = self._all_tasks[guid]
157 |         if TASK_STATE_PAUSED< t.state < TASK_STATE_FINISHED:
158 |             return ERR_TASK_CANNOT_RESUME, None
159 |         t.state = max(t.state, TASK_STATE_WAITING)
160 | 
161 |         self.tasks.put(guid)
162 |         return ERR_NO_ERROR, ""
163 | 
164 |     def _do_task(self, task_guid):
165 |         task = self._all_tasks[task_guid]
166 |         if task.state == TASK_STATE_WAITING:
167 |             task.state = TASK_STATE_GET_META
168 |         req = self._get_httpreq(util.get_proxy_policy(task.config))
169 |         if not task.page_q:
170 |             task.page_q = Queue() # per image page queue
171 |         if not task.img_q:
172 |             task.img_q = Queue() # (image url, savepath) queue
173 |         monitor_started = False
174 |         while self._exit < XEH_STATE_FULL_EXIT:
175 |             # wait for threads from former task to stop
176 |             if self._all_threads[task.state]:
177 |                 self.logger.verbose("wait %d threads in state %s" % (
178 |                     len(self._all_threads[task.state]), task.state))
179 |                 for t in self._all_threads[task.state]:
180 |                     t.join()
181 |                 self._all_threads[task.state] = []
182 |                 # check again before we bring up new threads
183 |                 continue
184 |             if task.state >= TASK_STATE_SCAN_IMG and not monitor_started:
185 |                 self.logger.verbose("state %d >= %d, bring up montior" % (task.state, TASK_STATE_SCAN_IMG))
186 |                 # bring up the monitor here, ahead of workers
187 |                 mon = Monitor(req, self.proxy, self.logger, task, ignored_errors=task.config['ignored_errors'])
188 |                 _ = ['down-%d' % (i + 1) for i in range(task.config['download_thread_cnt'])]
189 |                 # if we jumpstart from a saved session to DOQNLOAD
190 |                 # there will be no scan_thread
191 |                 # if task.state >= TASK_STATE_SCAN_PAGE:
192 |                 #    _ += ['list-1']
193 |                 if task.state >= TASK_STATE_SCAN_IMG:
194 |                     _ += ['scan-%d' % (i + 1) for i in range(task.config['scan_thread_cnt'])]
195 |                 mon.set_vote_ns(_)
196 |                 self._monitor = mon
197 |                 task._monitor = mon
198 |                 mon.start()
199 |                 # put in the lowest state
200 |                 self._all_threads[TASK_STATE_SCAN_IMG].append(mon)
201 |                 monitor_started = True
202 | 
203 |             if task.state == TASK_STATE_GET_META: # grab meta data
204 |                 try:
205 |                     r = req.request("GET", task.url,
206 |                         filters.flt_metadata,
207 |                         lambda x:task.update_meta(x),
208 |                         lambda x:task.set_fail(x))
209 |                 except Exception as ex:
210 |                     self.logger.error(i18n.TASK_ERROR % (task.guid, traceback.format_exc()))
211 |                     task.state = TASK_STATE_FAILED
212 |                     break
213 |                 if task.failcode in (ERR_ONLY_VISIBLE_EXH, ERR_GALLERY_REMOVED) and self.has_login and \
214 |                         task.migrate_exhentai():
215 |                     self.logger.info(i18n.TASK_MIGRATE_EXH % task_guid)
216 |                     self.tasks.put(task_guid)
217 |                     break
218 |                 elif task.failcode == ERR_IP_BANNED:
219 |                     self.logger.error(i18n.c(ERR_IP_BANNED) % r)
220 |                     task.state = TASK_STATE_FAILED
221 |                     break
222 | 
223 |             # elif task.state == TASK_STATE_GET_HATHDL: # download hathdl
224 |             #     r = req.request("GET",
225 |             #         "%s/hathdler.php?gid=%s&t=%s" % (task.base_url(), task.gid, task.sethash),
226 |             #         filters.flt_hathdl,
227 |             #         lambda x:(task.meta.update(x),
228 |             #             task.guess_ori(),
229 |             #             task.scan_downloaded()),
230 |             #                 #task.meta['has_ori'] and task.config['download_ori'])),
231 |             #         lambda x:task.set_fail(x),)
232 |             #     self.logger.info(i18n.TASK_WILL_DOWNLOAD_CNT % (
233 |             #         task_guid, task.meta['total'] - len(task._flist_done),
234 |             #         task.meta['total']))
235 |             elif task.state == TASK_STATE_SCAN_PAGE:
236 |                 # if task.config['fast_scan'] and not task.has_ori:
237 |                 #     self.logger.info(i18n.TASK_FAST_SCAN % task.guid)
238 |                 #     for p in task.meta['filelist']:
239 |                 #         task.queue_wrapper(task.page_q.put, pichash = p)
240 |                 # else:
241 |                 # scan by our own, should not be here currently
242 |                 # start backup thread
243 |                 task.scan_downloaded()
244 |                 if task.state == TASK_STATE_FINISHED:
245 |                     continue
246 |                 if not task.meta['use_multipage_viewer']:
247 |                     for x in range(0,
248 |                         int(math.ceil(1.0 * task.meta['total'] / int(task.meta['thumbnail_cnt'])))):
249 |                         r = req.request("GET",
250 |                             "%s/?p=%d" % (task.url, x),
251 |                             filters.flt_pageurl,
252 |                             lambda x: task.put_page_queue(x),
253 |                             lambda x: task.set_fail(x))
254 |                         if task.failcode:
255 |                             break
256 |                 elif task.meta['finished'] < task.meta['total']:
257 |                     # use multipage viewer
258 |                     r = req.request("GET",
259 |                         task.mpv_url(),
260 |                         filters.flt_pageurl_mpv,
261 |                         lambda x: task.put_page_queue(x),
262 |                         lambda x: task.set_fail(x))
263 |             elif task.state == TASK_STATE_SCAN_IMG:
264 |                 # print here so that see it after we can join former threads
265 |                 self.logger.info(i18n.TASK_TITLE % (
266 |                     task_guid, task.meta['title']))
267 |                 self.logger.info(i18n.TASK_WILL_DOWNLOAD_CNT % (
268 |                     task_guid, task.meta['total'] - task.meta['finished'],
269 |                     task.meta['total']))
270 |                 # spawn thread to scan images
271 |                 for i in range(task.config['scan_thread_cnt']):
272 |                     tid = 'scan-%d' % (i + 1)
273 |                     _ = self._get_httpworker(tid, task.page_q,
274 |                         filters.flt_imgurl_wrapper(task.config['download_ori'] and self.has_login),
275 |                         lambda x, tid = tid: (task.put_img_queue(*x),
276 |                             mon.vote(tid, 0)),
277 |                         lambda x, tid = tid: (
278 |                             mon.vote(tid, x[0]),
279 |                             self.logger.warn(i18n.XEH_SCAN_FAILED % (tid, x[1], i18n.c(x[0]))),
280 |                         ),
281 |                         mon.wrk_keepalive,
282 |                         util.get_proxy_policy(task.config),
283 |                         10,
284 |                         False,
285 |                         None)
286 |                         # we don't need proxy_image in the scan thread
287 |                         # we use default timeout in the scan thread
288 |                     # _._exit = lambda t: t._finish_queue()
289 |                     self._all_threads[TASK_STATE_SCAN_IMG].append(_)
290 |                     _.start()
291 |                 task.state = TASK_STATE_DOWNLOAD - 1
292 |             elif task.state == TASK_STATE_SCAN_ARCHIVE:
293 |                 task.state = TASK_STATE_DOWNLOAD - 1
294 |             elif task.state == TASK_STATE_DOWNLOAD:
295 |                 # spawn thread to download all urls
296 |                 for i in range(task.config['download_thread_cnt']):
297 |                     tid = 'down-%d' % (i + 1)
298 |                     _ = self._get_httpworker(tid, task.img_q,
299 |                         filters.download_file_wrapper(task.config['dir']),
300 |                         lambda x, tid = tid: (task.save_file(x[1], x[2], x[0]) and \
301 |                             (self.logger.debug(i18n.XEH_FILE_DOWNLOADED.format(tid, *task.get_fname(task.get_imghash(x[2])))),
302 |                                 mon.vote(tid, 0))),
303 |                         lambda x, tid = tid: (
304 |                             self.logger.debug(i18n.XEH_DOWNLOAD_HAS_ERROR % (
305 |                                 tid, task.get_imgfid(x[2]), i18n.c(x[0]),
306 |                             )),
307 |                             task.put_page_queue_retry(x[2]),
308 |                             mon.vote(tid, x[0])),
309 |                         mon.wrk_keepalive,
310 |                         util.get_proxy_policy(task.config),
311 |                         task.config['download_timeout'],
312 |                         True,
313 |                         task.config['low_speed_threshold'] * 1024
314 |                         )
315 |                     self._all_threads[TASK_STATE_DOWNLOAD].append(_)
316 |                     _.start()
317 |                 # spawn archiver if we need
318 |                 if task.config['make_archive']:
319 |                     if self._all_threads[TASK_STATE_MAKE_ARCHIVE]:
320 |                         self._all_threads[TASK_STATE_MAKE_ARCHIVE][0].join()
321 |                         self._all_threads[TASK_STATE_MAKE_ARCHIVE] = []
322 |                     _a = ArchiveWorker(self.logger, task)
323 |                     self._all_threads[TASK_STATE_MAKE_ARCHIVE].append(_a)
324 |                     _a.start()
325 |                 # break current task loop
326 |                 break
327 | 
328 |             if task.failcode:
329 |                 self.logger.error(i18n.TASK_ERROR % (task_guid, i18n.c(task.failcode)))
330 |                 # wait all threads to finish
331 |                 break
332 |             else:
333 |                 task.state += 1
334 | 
335 |     def _task_loop(self):
336 |         task_guid = None
337 |         cnt = 0
338 |         while not self._exit:
339 |             # get a new task
340 |             if cnt == 10:
341 |                 self.save_session()
342 |                 cnt = 0
343 |             try:
344 |                 _ = self.tasks.get(False)
345 |                 self.last_task_guid = task_guid
346 |                 task_guid = _
347 |             except Empty:
348 |                 time.sleep(1)
349 |                 cnt += 1
350 |                 continue
351 |             else:
352 |                 task = self._all_tasks[task_guid]
353 |                 if TASK_STATE_PAUSED < task.state < TASK_STATE_FINISHED:
354 |                     self.logger.info(i18n.TASK_START % task_guid)
355 |                     self.save_session()
356 |                     cnt = 0
357 |                     self._do_task(task_guid)
358 |         self.logger.info(i18n.XEH_LOOP_FINISHED)
359 |         self._cleanup()
360 | 
361 |     def _term_threads(self):
362 |         self._exit = XEH_STATE_FULL_EXIT
363 |         for l in self._all_threads:
364 |             for p in l:
365 |                 p._exit = lambda x:True
366 | 
367 |     def _cleanup(self):
368 |         self._exit = self._exit if self._exit > 0 else XEH_STATE_SOFT_EXIT
369 |         self.save_session()
370 |         self._join_all()
371 |         self.logger.cleanup()
372 |         # let's send a request to rpc server to unblock it
373 |         if self.rpc:
374 |             self.rpc._exit = lambda x:True
375 |             import requests
376 |             try:
377 |                 requests.get("http://%s:%s/" % (self.cfg['rpc_interface'], self.cfg['rpc_port']))
378 |             except:
379 |                 pass
380 |             self.rpc.join()
381 |         # save it again in case we miss something
382 |         self.save_session()
383 |         self._exit = XEH_STATE_CLEAN
384 | 
385 |     def _join_all(self):
386 |         for l in self._all_threads:
387 |             for p in l:
388 |                 p.join()
389 | 
390 |     def save_session(self):
391 |         with open("h.json", "w") as f:
392 |             try:
393 |                 f.write(json.dumps({
394 |                     'tasks':{} if not self.cfg['save_tasks'] else
395 |                         {k: v.to_dict() for k,v in self._all_tasks.items()},
396 |                     'cookies':self.cookies}))
397 |             except Exception as ex:
398 |                 self.logger.warning(i18n.SESSION_WRITE_EXCEPTION % traceback.format_exc())
399 |                 return ERR_SAVE_SESSION_FAILED, str(ex)
400 |         return ERR_NO_ERROR, None
401 | 
402 |     def load_session(self):
403 |         if os.path.exists("h.json"):
404 |             with open("h.json") as f:
405 |                 try:
406 |                     j = json.loads(f.read())
407 |                 except Exception as ex:
408 |                     self.logger.warning(i18n.SESSION_LOAD_EXCEPTION % traceback.format_exc())
409 |                     return ERR_SAVE_SESSION_FAILED, str(ex)
410 |                 else:
411 |                     for _ in j['tasks'].values():
412 |                         _t = Task("", {}, self.logger).from_dict(_)
413 |                         if 'filelist' in _t.meta:
414 |                             _t.scan_downloaded()
415 |                                 #_t.meta['has_ori'] and task.config['download_ori'])
416 |                         # since we don't block on scan_img state, an unempty page_q
417 |                         # indicates we should start from scan_img state,
418 |                         if _t.state == TASK_STATE_DOWNLOAD and _t.page_q:
419 |                             _t.state = TASK_STATE_SCAN_IMG
420 |                         self._all_tasks[_['guid']] = _t
421 |                         self.tasks.put(_['guid'])
422 |                     if self._all_tasks:
423 |                         self.logger.info(i18n.XEH_LOAD_TASKS_CNT % len(self._all_tasks))
424 |                     self.cookies.update(j['cookies'])
425 |                     if self.cookies:
426 |                         self.headers.update({'Cookie':util.make_cookie(self.cookies)})
427 |                         self.has_login = 'ipb_member_id' in self.cookies and 'ipb_pass_hash' in self.cookies
428 |         _1xcookie = os.path.join(FILEPATH, ".ehentai.cookie")# 1.x cookie file
429 |         if not self.has_login and os.path.exists(_1xcookie):
430 |             with open(_1xcookie) as f:
431 |                 try:
432 |                     cid, cpw = f.read().strip().split(",")
433 |                     self.cookies.update({'ipb_member_id':cid, 'ipb_pass_hash':cpw})
434 |                     self.headers.update({'Cookie':util.make_cookie(self.cookies)})
435 |                     self.has_login = True
436 |                     self.logger.info(i18n.XEH_LOAD_OLD_COOKIE)
437 |                 except:
438 |                     pass
439 | 
440 |         return ERR_NO_ERROR, None
441 | 
442 |     def login_exhentai(self, name, pwd):
443 |         if 'ipb_member_id' in self.cookies and 'ipb_pass_hash' in self.cookies:
444 |             return
445 |         self.logger.debug(i18n.XEH_LOGIN_EXHENTAI)
446 |         logindata = {
447 |             'UserName':name,
448 |             'returntype':'8',
449 |             'CookieDate':'1',
450 |             'b':'d',
451 |             'bt':'pone',
452 |             'PassWord':pwd
453 |         }
454 |         req = self._get_httpreq(util.get_proxy_policy(self.cfg))
455 |         req.request("POST", "https://forums.e-hentai.org/index.php?act=Login&CODE=01",
456 |             filters.login_exhentai,
457 |             lambda x:(
458 |                 setattr(self, 'cookies', x),
459 |                 setattr(self, 'has_login', True),
460 |                 self.headers.update({'Cookie':util.make_cookie(self.cookies)}),
461 |                 self.save_session(),
462 |                 self.logger.info(i18n.XEH_LOGIN_OK)),
463 |             lambda x:(self.logger.warning(str(x)),
464 |                 self.logger.info(i18n.XEH_LOGIN_FAILED)),
465 |             logindata)
466 |         return ERR_NO_ERROR, self.has_login
467 | 
468 |     def set_cookie(self, cookie):
469 |         self.cookies.update(util.parse_cookie(cookie))
470 |         self.headers.update({'Cookie':util.make_cookie(self.cookies)})
471 |         if 'ipb_member_id' in self.cookies and 'ipb_pass_hash' in self.cookies:
472 |             self.has_login = True
473 |         return ERR_NO_ERROR, None
474 | 
475 | 
476 | if __name__ == '__main__':
477 |     pass
478 | 


--------------------------------------------------------------------------------
/xeHentai/filters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | import os
  7 | import re
  8 | import json
  9 | from . import util
 10 | from .const import *
 11 | 
 12 | SUC = 0
 13 | FAIL = 1
 14 | 
 15 | def login_exhentai(r, suc, fail):
 16 |     # input login response
 17 |     # add cookies if suc; log error fail
 18 |     try:
 19 |         coo = r.headers.get('set-cookie')
 20 |         cooid = re.findall('ipb_member_id=(.*?);', coo)[0]
 21 |         coopw = re.findall('ipb_pass_hash=(.*?);', coo)[0]
 22 |     except (IndexError, ) as ex:
 23 |         errmsg = re.findall('<span class="postcolor">([^<]+)</span>', r.text)
 24 |         if errmsg:
 25 |             fail(errmsg[0])
 26 |         else:
 27 |             fail("ex: %s" % ex)
 28 |         return FAIL
 29 |     else:
 30 |         suc({'ipb_member_id':cooid, 'ipb_pass_hash':coopw})
 31 |         return SUC
 32 | 
 33 | 
 34 | def flt_metadata(r, suc, fail):
 35 |     # input index response
 36 |     # add gallery meta if suc; return errorcode if fail
 37 |     # TODO: catch re exceptions
 38 |     if r.status_code == 600:
 39 |         return fail(ERR_CONNECTION_ERROR)
 40 |     if r.status_code == 404:
 41 |         return fail(ERR_GALLERY_REMOVED)
 42 |     if re.match("This gallery is pining for the fjords", r.text):
 43 |         return fail(ERR_ONLY_VISIBLE_EXH)
 44 |     elif re.match("Your IP address has been temporarily banned", r.text):
 45 |         fail(ERR_IP_BANNED)
 46 |         return re.findall("The ban expires in (.+)", r.text)[0]
 47 |     meta = {}
 48 |     # sample_hash = re.findall('<a href="%s/./([a-f0-9]{10})/\d+\-\d+"><img' % RESTR_SITE, r.text)
 49 |     # meta['sample_hash'] = sample_hash
 50 |     # meta['resampled'] = {}
 51 |     meta['gjname'] = util.htmlescape(re.findall('="gj">(.*?)</h1>', r.text)[0])
 52 |     meta['gnname']= util.htmlescape(re.findall('="gn">(.*?)</h1>', r.text)[0])
 53 |     # don't assign title now, select gj/gn based on cfg['jpn_title']
 54 |     # meta['title'] = meta['gjname'] if meta['gjname'] else meta['gnname']
 55 |     meta['total'] = int(re.findall('Length:</td><td class="gdt2">(\d+)\s+page', r.text)[0])
 56 |     meta['finished'] = 0
 57 |     meta['tags'] = re.findall("toggle_tagmenu\('([^']+)'", r.text)
 58 | 
 59 |     # TODO: parse cookie to calc thumbnail_cnt (tr_2, ts_m)
 60 |     _ = re.findall("Showing (\d+) \- (\d+) of ([\d,]+) images", r.text)[0]
 61 |     meta['thumbnail_cnt'] = int(_[1]) - int(_[0]) + 1
 62 | 
 63 |     # check multi page viewer status in order to call proper flt_pageurl
 64 |     # TODO: remove <img alt="\d+" once e-hentai is updated to align with exhentai
 65 |     mpv_urls = re.findall(
 66 |             '<a href="(%s/mpv/(\d+)/[a-f0-9]{10})/#page\d+">(?:<div|<img alt="\d+") title="Page' % RESTR_SITE,
 67 |             r.text)
 68 |     if mpv_urls:
 69 |         meta['use_multipage_viewer'] = True
 70 |     else:
 71 |         meta['use_multipage_viewer'] = False
 72 | 
 73 |     suc(meta)
 74 |     # _ = re.findall(
 75 |     #    '%s/[^/]+/(\d+)/[^/]+/\?p=\d*" onclick="return false"(.*?)</a>' % RESTR_SITE,
 76 |     #    r.text)
 77 |     # meta['pagecount'] = 1 if len(_) <= 1 else int(pagecount[-2])
 78 | 
 79 | 
 80 | # def flt_hathdl(r, suc, fail):
 81 | #     # input hathdl response
 82 | #     # add gallery meta if suc; return errorcode if fail
 83 | #     if r.status_code == 404:
 84 | #         fail(ERR_HATHDL_NOTFOUND)
 85 | #         return ERR_HATHDL_NOTFOUND
 86 | #     try:
 87 | #         meta = {
 88 | #             'name': util.htmlescape(re.findall('TITLE (.+)', r.text)[0]),
 89 | #             #'gid': int(re.findall('GID (.+)', r.text)[0]),
 90 | #             'total': int(re.findall('FILES (.+)', r.text)[0]),
 91 | #             'finished': 0,
 92 | #             'title': re.findall('Title:\s+(.+)', r.text)[0],
 93 | #             #'upload_time': re.findall('Upload Time:\s+(.+)', r.text)[0], # invisible
 94 | #             #'upload_by': re.findall('Uploaded By:\s+(.+)', r.text)[0], # invisible
 95 | #             #'downloaded': re.findall('Downloaded:\s+(.+)', r.text)[0], # invisible
 96 | #             'tags': re.findall('Tags:\s+(.+)', r.text)[0].split(', '),
 97 | #         }
 98 | #         listtmp = re.findall('FILELIST\n(.+)\n+\nINFORMATION', r.text, re.DOTALL)[0].split('\n')
 99 | #         meta['filelist'] = {}
100 | #         for l in listtmp:
101 | #             # hash(full): id, hash_10, length, width, height, format, name
102 | #             _ = re.findall('(\d+) ([a-z0-9]+)-(\d+)-(\d+)-(\d+)-([a-z]+) (.+)', l)[0]
103 | #             meta['filelist'][_[1][:10]] = list(_)
104 | #     except (IndexError, ValueError) as ex:
105 | #         fail(ERR_MALFORMED_HATHDL)
106 | #         return ERR_MALFORMED_HATHDL
107 | #     suc(meta)
108 | 
109 | def flt_pageurl(r, suc, fail):
110 |     # input gallery response
111 |     # add per image urls if suc; finish task if fail
112 |     picpage = re.findall(
113 |         '<a href="(%s/./[a-f0-9]{10}/\d+\-\d+)">(?:<div>)?(?:<div|<img alt="\d+") title="Page' % RESTR_SITE,
114 |         r.text)
115 |     if not picpage:
116 |         try:
117 |             with open("debug.htm", "w") as f:
118 |                 f.write(r.text)
119 |         except Exception:
120 |             pass
121 |         fail(ERR_NO_PAGEURL_FOUND)
122 |     for p in picpage:
123 |         suc(p)
124 | 
125 | def flt_pageurl_mpv(r, suc, fail):
126 |     # input mpv gallaery response
127 |     # add per image urls if suc; finish task if fail
128 |     # construct single page gallery view
129 |     imagelist = re.findall("imagelist\s=\s([^;]+)", r.text)
130 |     if not imagelist:
131 |         fail(ERR_NO_PAGEURL_FOUND)
132 |     try:
133 |         imagelist = json.loads(imagelist[0])
134 |     except:
135 |         fail(ERR_NO_PAGEURL_FOUND)
136 |     baseurl = re.findall("https?://[^/]+", r._real_url)[0]
137 |     gid, _ = RE_INDEX.findall(r._real_url)[0]
138 |     for i in range(len(imagelist)):
139 |         suc("%s/s/%s/%s-%d" % (baseurl, imagelist[i]['k'], gid, i+1))
140 | 
141 | 
142 | def flt_quota_check(func):
143 |     def _(r, suc, fail):
144 |         # double compare with actual image size
145 |         sz = None
146 |         p = RE_IMGHASH.findall(r.url)
147 |         if p and len(p) > 0 and len(p[-1]) > 1:
148 |             sz = int(p[-1][1])
149 |         if r.status_code == 600:# tcp layer error
150 |             fail((ERR_CONNECTION_ERROR, r._real_url, r.url))
151 |         elif r.status_code == 403:
152 |             fail((ERR_KEY_EXPIRED, r._real_url, r.url))
153 |         elif r.status_code == 509 or \
154 |             (sz != r.content_length and (
155 |                 r.content_length in QUOTA_EXCEEDED_CONTENT_LENGTHS or \
156 |                 '/509.gif' in r.url or '/509.gif' in r._real_url)):
157 |             # TODO: /509.gif detection is still not accturate, there might be a file
158 |             # that happened to be this name
159 |             fail((ERR_QUOTA_EXCEEDED, r._real_url, r.url))
160 |             # will not call the decorated filter
161 |         elif r.content_length < 200 and \
162 |                 r.headers.get('content-type') and r.headers.get('content-type').startswith('text') and \
163 |                 re.findall("exceeded your image viewing limits", r.text):
164 |             fail((ERR_QUOTA_EXCEEDED, r._real_url, r.url))
165 |             # will not call the decorated filter
166 |         else:
167 |             func(r, suc, fail)
168 |     return _
169 | 
170 | def flt_imgurl_wrapper(ori):
171 |     @flt_quota_check
172 |     def flt_imgurl(r, suc, fail, ori = ori):
173 |         # input per image page response
174 |         # add (image url, reload url, filename) to queue if suc
175 |         # return (errorcode, page_url) if fail
176 |         if re.match('Invalid page', r.text):
177 |             return fail((ERR_IMAGE_RESAMPLED, r._real_url, r.url))
178 |         while True:
179 |             _ = re.findall('src="([^"]+keystamp[^"]+)"', r.text)
180 |             if not _:
181 |                 _ = re.findall('src="([^"]+)"\s+style="', r.text)
182 |             if not _:
183 |                 break
184 |             picurl = util.htmlescape(_[0])
185 | 
186 |             _ = re.findall('</a></div><div>(.*?) ::.+::.+</di', r.text)
187 |             if not _:
188 |                 break
189 |             filename = _[0]
190 |             # XXX: is this still valid?
191 |             if 'image.php' in filename:
192 |                 _ = re.findall('n=(.+)', picurl)
193 |                 if _:
194 |                     filename = _[0]
195 |             _ = re.findall('.*?\.([a-zA-Z]+)', filename)
196 |             if not _:
197 |                 break
198 |             fmt = _[0]
199 |             # http://exhentai.org/fullimg.php?gid=577354&page=2&key=af594b7cf3
200 |             _ = re.findall('.+/(\d+)-(\d*)', r._real_url)
201 |             if not _:
202 |                 break
203 |             index = _[0]
204 |             fullurl = re.findall('class="mr".+<a href="(.+)"\s*>Download original', r.text)
205 |             fullsize = re.findall('Download\soriginal\s[0-9]+\sx\s[0-9]+\s(.*)\ssource', r.text)  # like 2.20MB
206 |             if fullurl:
207 |                 fullurl = util.htmlescape(fullurl[0])
208 |             else:
209 |                 fullurl = picurl
210 |             _ = re.findall("return nl\('([a-zA-Z\d\-]+)'\)", r.text)
211 |             if not _:
212 |                 break
213 |             js_nl = _[0]
214 |             reload_url = "%s%snl=%s" % (r._real_url, "&" if "?" in r._real_url else "?", js_nl)
215 |             if ori:
216 |                 fullurl = "%s%sredirect=%s" % (fullurl, "&" if "?" in fullurl else "?", r.url)
217 |                 reload_url = "%s%sredirect=%s" % (reload_url, "&" if "?" in reload_url else "?", r.url)
218 |                 # we will parse the 302 url to get original filename
219 |                 return suc((fullurl, reload_url, filename))
220 |             else:
221 |                 return suc((picurl, reload_url, filename))
222 | 
223 |         return fail((ERR_SCAN_REGEX_FAILED, r._real_url, r.url))
224 | 
225 |     return flt_imgurl
226 | 
227 | def download_file_wrapper(dirpath):
228 |     @flt_quota_check
229 |     def download_file(r, suc, fail, dirpath = dirpath):
230 |         # input image/archive response
231 |         # return (binary, url) if suc; return (errocode, url) if fail
232 |         if r.status_code == 404:
233 |             return fail((ERR_HATH_NOT_FOUND, r._real_url, r.url))
234 |         p = RE_IMGHASH.findall(r.url)
235 |         # if multiple hash-size-h-w-type is found, use the last one
236 |         # the first is original and the last is scaled
237 |         # _FakeReponse will be filtered in flt_quota_check
238 |         if not r.content_length or \
239 |             p and p[-1] and int(p[-1][1]) != r.content_length:
240 |             return fail((ERR_IMAGE_BROKEN, r._real_url, r.url))
241 |         if not hasattr(r, 'iter_content_cb'):
242 |             return fail((ERR_STREAM_NOT_IMPLEMENTED, r._real_url, r.url))
243 | 
244 |         # merge the iter_content iterator with our custom stream_cb
245 |         def _yield(chunk_size=16384, _r=r):
246 |             from requests.exceptions import ConnectionError
247 |             length_read = 0
248 |             try:
249 |                 for _ in _r.iter_content(chunk_size):
250 |                     length_read += len(_)
251 |                     _r.iter_content_cb(_)
252 |                     yield _
253 |             except ConnectionError: # read timeout
254 |                 fail((ERR_IMAGE_BROKEN, r._real_url, r.url))
255 |                 raise DownloadAbortedException()
256 |             if length_read != r.content_length:
257 |                 fail((ERR_IMAGE_BROKEN, r._real_url, r.url))
258 |                 raise DownloadAbortedException()
259 |             
260 |         suc((_yield, r._real_url, r.url))
261 | 
262 |     return download_file
263 | 
264 | 
265 | def reset_quota(r, suc, fail):
266 |     # reset quota response
267 |     # reset quota if suc; finish task if fail
268 |     pass
269 | 


--------------------------------------------------------------------------------
/xeHentai/i18n/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf-8
 3 | # Contributor:
 4 | #      fffonion        <fffonion@gmail.com>
 5 | 
 6 | import importlib
 7 | from ..const import *
 8 | from . import en_us as lng_fallback
 9 | 
10 | try:
11 |     _locale = LOCALE.lower() if LOCALE else 'en_us'
12 |     if _locale in ('zh_cn', 'zh_sg'):
13 |         _locale = 'zh_hans'
14 |     elif _locale in ('zh_tw', 'zh_hk', 'zh_mo'):
15 |         _locale = 'zh_hant'
16 |     lng = importlib.import_module("%s.i18n.%s" % (SCRIPT_NAME, _locale))
17 | except (ImportError, ValueError):
18 |     lng = lng_fallback
19 | 
20 | 
21 | class _(object):
22 |     def c(cls, code):
23 |         _ = code not in lng.err_msg and \
24 |             (code not in lng_fallback.err_msg and \
25 |                 (cls.ERR_NOMSG % code) or \
26 |                     lng_fallback.err_msg[code] ) or \
27 |             lng.err_msg[code]
28 |         return _ if PY3K else (
29 |             _ if isinstance(_, unicode) else _.decode('utf-8')) # cls.ERR_NOMSG % code is unicode
30 | 
31 |     def __getattr__(cls, idx):
32 |         _ = not hasattr(lng, idx) and \
33 |             getattr(lng_fallback, idx) or \
34 |             getattr(lng, idx)
35 |         return _ if PY3K else _.decode('utf-8')
36 | 
37 | i18n = _()
38 | 


--------------------------------------------------------------------------------
/xeHentai/i18n/en_us.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | from ..const import *
  4 | 
  5 | err_msg = {
  6 |     ERR_URL_NOT_RECOGNIZED: "url not recognized",
  7 |     ERR_CANT_DOWNLOAD_EXH: "can't download exhentai.org without login",
  8 |     ERR_ONLY_VISIBLE_EXH: "this gallery is only visible in exhentai.org",
  9 |     ERR_MALFORMED_HATHDL: "malformed .hathdl, can't parse",
 10 |     ERR_GALLERY_REMOVED: "this gallery has been removed, may be visible in exhentai",
 11 |     ERR_KEY_EXPIRED: "image url is expired",
 12 |     ERR_NO_PAGEURL_FOUND: "no page url found, change of site structure?",
 13 |     ERR_CONNECTION_ERROR: "a connection problem occurs",
 14 |     ERR_IP_BANNED: "IP has been banned, retry in %s",
 15 |     ERR_IMAGE_BROKEN: "downloaded image is broken",
 16 |     ERR_SCAN_REGEX_FAILED: "page parsing failed",
 17 |     ERR_QUOTA_EXCEEDED: "quota exceeded",
 18 |     ERR_TASK_NOT_FOUND: "no such task guid",
 19 |     ERR_TASK_LEVEL_UNDEF: "task filter level unknown",
 20 |     ERR_DELETE_RUNNING_TASK: "can't delete a running task",
 21 |     ERR_TASK_CANNOT_PAUSE: "this task can't be paused",
 22 |     ERR_TASK_CANNOT_RESUME: "this task can't be resumed",
 23 |     ERR_CANNOT_CREATE_DIR: "can't create directory %s",
 24 |     ERR_CANNOT_MAKE_ARCHIVE: "can't make archive %s",
 25 |     ERR_NOT_RANGE_FORMAT: "'%s' is not a range format, expecting '1-2' or '3'",
 26 | #    ERR_HATHDL_NOTFOUND: "hathdl not found",
 27 |     ERR_RPC_PARSE_ERROR: "Parse error.",
 28 |     ERR_RPC_INVALID_REQUEST: "Invalid request.",
 29 |     ERR_RPC_METHOD_NOT_FOUND: "Method not found.",
 30 |     ERR_RPC_INVALID_PARAMS: "Invalid method parameter(s).",
 31 |     ERR_RPC_UNAUTHORIZED: "Unauthorized",
 32 |     ERR_RPC_EXEC_ERROR: "",
 33 |     ERR_SAVE_SESSION_FAILED: "",
 34 | }
 35 | 
 36 | ERR_NOMSG = "undefined error message with code %d"
 37 | 
 38 | XEH_OPT_DESC = "xeHentai Downloader NG"
 39 | XEH_OPT_EPILOG = "Values shown as current is read from config.py " \
 40 |                 "and can be overriden by command line options. " \
 41 |                 "Discuss and bug reporting at https://yooooo.us/2013/xehentai"
 42 | XEH_OPT_URLS = "gallery url(s) to download"
 43 | XEH_OPT_u = "username"
 44 | XEH_OPT_k = "password"
 45 | XEH_OPT_c = "cookie string, will be overriden if given -u and -k"
 46 | XEH_OPT_o = "download original images, needs to login (current: %(default)s)"
 47 | XEH_OPT_t = "download threads count (current: %(default)d)"
 48 | # XEH_OPT_f = "fast scan, guess page url from .hathdl file, not working everytime (current: %(default)s)"
 49 | XEH_OPT_l = "define log path (current: %(default)s)"
 50 | XEH_OPT_p = "set download proxies, can be used multiple times, currenlty supported: socks5/4a, http(s), glype. " \
 51 | "Proxies are only used on webpages by default (current: %(default)s)"
 52 | XEH_OPT_proxy_image = "use proxies on images and webpages (current: %(default)s)"
 53 | XEH_OPT_proxy_image_only = "only use proxies on images, not webpages (current: %(default)s)"
 54 | XEH_OPT_d = "set download directory (current: %(default)s)"
 55 | XEH_OPT_v = "show more detailed log (current: %(default)s)"
 56 | XEH_OPT_i = "interactive mode, will be ignored in daemon mode (current: %(default)s)"
 57 | XEH_OPT_r = "rename gallery image to original name, use sequence name if turned off  (current: %(default)s)"
 58 | XEH_OPT_daemon = "daemon mode, can't use with -i (current: %(default)s)"
 59 | XEH_OPT_rpc_interface = "bind jsonrpc server to this address (current: %(default)s)"
 60 | XEH_OPT_rpc_port = "bind jsonrpc server to this port (current: %(default)s)"
 61 | XEH_OPT_rpc_secret = "jsonrpc secret string (current: %(default)s)"
 62 | XEH_OPT_rpc_open_browser = "automatically open browser after RPC server starts (current: %(default)s)"
 63 | XEH_OPT_a = "make an archive (.zip) after download and delete directory (current: %(default)s)"
 64 | XEH_OPT_delete_task_files = "delete downloaded files when deleting a task (current: %(default)s)"
 65 | XEH_OPT_j = "use Japanese title, use English/Romaji title if turned off (current: %(default)s)"
 66 | XEH_OPT_download_range = "specify ranges of images to be downloaded, in format start-end, or single index, " \
 67 | "use comma to concat multiple ranges, e.g.: 5-10,15,20-25, default to download all images"
 68 | XEH_OPT_timeout = "set image download timeout (current: %(default)ss)"
 69 | XEH_OPT_low_speed = "retry download if speed is lower than specified value (current: %(default)s KB/s)"
 70 | XEH_OPT_f = "download regardless of quota exceeded warning (current: %(default)s)"
 71 | XEH_OPT_auto_update = "check or download update automatically (current: %(default)s)"
 72 | XEH_OPT_update_beta_channel = "check update upon beta channel (current: %(default)s)"
 73 | XEH_OPT_h = "show this help message and exit"
 74 | XEH_OPT_version = "show program's version number and exit"
 75 | XEH_OPT_IGNORING_I = "ignoring -i option in daemon mode"
 76 | 
 77 | PS_LOGIN = "login to exhentai (y/n)? > "
 78 | PS_USERNAME = "Username > "
 79 | PS_PASSWD = "Password > "
 80 | PS_URL = "URL (seperate with ,)> "
 81 | PS_PROXY = "Proxy (optional) > "
 82 | PS_DOWNLOAD_ORI = "Download original (y/n, default:%s)? > "
 83 | PS_RENAME_ORI = "Rename to original name (y/n, default:%s)? > "
 84 | PS_MAKE_ARCHIVE = "Make archive (y/n, default:%s)? > "
 85 | PS_JPN_TITLE = "Use Japanese title (y/n, default:%s)? > "
 86 | PS_DOWNLOAD_RANGE = "Download range, press enter to download all > "
 87 | PS_DOWNLOAD_DIR = "Download to (default: %s)\npress enter or enter new > "
 88 | 
 89 | PROXY_CANDIDATE_CNT = "proxy pool has %d candidates"
 90 | 
 91 | TASK_PUT_INTO_WAIT = "task #%s already exists, put into waiting state"
 92 | TASK_ERROR = "task #%s error: %s"
 93 | TASK_MIGRATE_EXH = "task #%s migrate to exhentai.org"
 94 | TASK_TITLE = "task #%s title %s"
 95 | TASK_WILL_DOWNLOAD_CNT = "task #%s will download %d/%d files"
 96 | TASK_START = "task #%s start"
 97 | TASK_FINISHED = "task #%s download finished"
 98 | TASK_START_PAGE_RESCAN = "task #%s resample detected, start full scan"
 99 | # TASK_FAST_SCAN = "task #%s uses fast scan"
100 | TASK_START_MAKE_ARCHIVE = "task #%s start making archive"
101 | TASK_MAKE_ARCHIVE_FINISHED = "task #%s archive saved at: %s, use %.1fs"
102 | TASK_STOP_QUOTA_EXCEEDED = "task #%s quota exceeded"
103 | TASK_STUCK = "task #%s is stuck, there may be some bugs in xeHentai, or the connection is too slow"
104 | TASK_SLOW = "task #%s is slow, maybe image is too large or connection is too slow, consider use a proxy"
105 | TASK_UNFINISHED = "task #%s remaining these files undownloaded: %s"
106 | 
107 | XEH_STARTED = "xeHentai %s started."
108 | XEH_LOOP_FINISHED = "application task loop finished"
109 | XEH_LOGIN_EXHENTAI = "login exhentai"
110 | XEH_LOGIN_OK = "login exhentai successfully"
111 | XEH_LOGIN_FAILED = "can't login exhentai, check your credentials or try another account.\nIt's recommended to login in browser and use RPC to transfer cookie to xeHentai (see http://t.cn/Rctr4Pf)"
112 | XEH_LOAD_TASKS_CNT = "load %d tasks from saved session"
113 | XEH_LOAD_OLD_COOKIE = "load cookie from legacy cookie file"
114 | XEH_DAEMON_START = "daemon start at PID %d"
115 | XEH_PLATFORM_NO_DAEMON = "daemon mode is not supported on platform: %s"
116 | XEH_CLEANUP = "cleaning up..."
117 | XEH_CRITICAL_ERROR = "xeHentai throws critical error:\n%s"
118 | XEH_DOWNLOAD_ORI_NEED_LOGIN = "haven't login, so I won't download original images"
119 | XEH_FILE_DOWNLOADED = "file downloaded by thread-{} #{} {}"
120 | XEH_RENAME_HAS_ERRORS = "some files are not renamed:\n%s"
121 | XEH_DOWNLOAD_HAS_ERROR = "thread-%s retry #%s because of error: %s"
122 | XEH_SCAN_FAILED = "%s scan page %s failed: %s"
123 | 
124 | RPC_STARTED = "RPC server listening on %s:%d"
125 | RPC_TOO_OPEN = "RPC server is listening on public interface (%s) but no rpc_secret defined, which is not safe"
126 | RPC_CANNOT_BIND = "RPC server can't listen on requested address: %s"
127 | RPC_WEBUI_PATH = "WebUI is accessible at %s or https://xehentai.yooooo.us"
128 | 
129 | SESSION_LOAD_EXCEPTION = "exception occurs when loading saved session: %s"
130 | SESSION_WRITE_EXCEPTION = "exception occurs when writing saved session: %s"
131 | 
132 | THREAD = "thread"
133 | THREAD_UNCAUGHT_EXCEPTION = "thread-%s uncaught exception\n%s"
134 | THREAD_MAY_BECOME_ZOMBIE = "thread-%s may became zombie"
135 | THREAD_SWEEP_OUT = "thread-%s is dead, deref it"
136 | THREAD_SPEED_TOO_LOW = "thread-%s retry because of low download speed: %s/s less than threshold %s/s"
137 | 
138 | QUEUE = "queue"
139 | 
140 | PROXY_DISABLE_BANNED = "disable a banned proxy, expire in about %ss"
141 | 
142 | UPDATE_CHANNEL = "Update channel is: %s"
143 | UPDATE_DEV_CHANNEL = "dev"
144 | UPDATE_RELEASE_CHANNEL = "release"
145 | UPDATE_FAILED = "Failure when updating program: %s"
146 | UPDATE_COMPLETE = "Update is complete, it will take effect on next run"
147 | UPDATE_NO_UPDATE = "Program is up-to-date"
148 | UPDATE_AVAILABLE = "Update available: %s \"%s\" (%s)"
149 | UPDATE_DOWNLOAD_MANUALLY = "You can download update from https://dl.yooooo.us/share/xeHentai/"
150 | 
151 | 


--------------------------------------------------------------------------------
/xeHentai/i18n/zh_hans.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | from ..const import *
  4 | 
  5 | err_msg = {
  6 |     ERR_URL_NOT_RECOGNIZED: "网址不够绅士",
  7 |     ERR_CANT_DOWNLOAD_EXH: "需要登录后才能下载里站",
  8 |     ERR_ONLY_VISIBLE_EXH: "这个本子只有里站能看到",
  9 |     ERR_MALFORMED_HATHDL: "hathdl文件有猫饼，解析失败",
 10 |     ERR_GALLERY_REMOVED: "这个本子被移除了，大概里站能看到",
 11 |     ERR_KEY_EXPIRED: "下载链接不太正常",
 12 |     ERR_NO_PAGEURL_FOUND: "没有找到页面链接，网站改版了嘛？",
 13 |     ERR_CONNECTION_ERROR: "连接有问题？",
 14 |     ERR_IP_BANNED: "IP被ban了, 恢复时间: %s",
 15 |     ERR_IMAGE_BROKEN: "下载的图片有猫饼",
 16 |     ERR_SCAN_REGEX_FAILED: "网页解析失败",
 17 |     ERR_QUOTA_EXCEEDED: "配额超限",
 18 |     ERR_TASK_NOT_FOUND: "没有该GUID对应的任务",
 19 |     ERR_TASK_LEVEL_UNDEF: "任务过滤等级不存在",
 20 |     ERR_DELETE_RUNNING_TASK: "无法删除运行中的任务",
 21 |     ERR_TASK_CANNOT_PAUSE: "这个任务无法被暂停",
 22 |     ERR_TASK_CANNOT_RESUME: "这个任务无法被恢复",
 23 |     ERR_CANNOT_CREATE_DIR: "无法创建文件夹 %s",
 24 |     ERR_CANNOT_MAKE_ARCHIVE: "无法制作压缩包 %s",
 25 |     ERR_NOT_RANGE_FORMAT: "'%s'不符合范围的格式, 正确的格式为 1-3 或者 5",
 26 | #    ERR_HATHDL_NOTFOUND: "hathdl文件未找到"
 27 |     ERR_RPC_PARSE_ERROR: "Parse error.",
 28 |     ERR_RPC_INVALID_REQUEST: "Invalid request.",
 29 |     ERR_RPC_METHOD_NOT_FOUND: "Method not found.",
 30 |     ERR_RPC_INVALID_PARAMS: "Invalid method parameter(s).",
 31 |     ERR_RPC_UNAUTHORIZED: "Unauthorized",
 32 |     ERR_RPC_EXEC_ERROR: "",
 33 |     ERR_SAVE_SESSION_FAILED: "",
 34 | }
 35 | 
 36 | ERR_NOMSG = "未指定的错误，错误号 %d"
 37 | 
 38 | XEH_OPT_DESC = "绅♂士下载器"
 39 | XEH_OPT_EPILOG = "如果参数未指定，则使用config.py中的默认值; " \
 40 |         "讨论和反馈问题：https://yooooo.us/2013/xehentai"
 41 | XEH_OPT_URLS = "下载页的网址"
 42 | XEH_OPT_u = "用户名"
 43 | XEH_OPT_k = "密码"
 44 | XEH_OPT_c = "Cookie字符串，如果指定了用户名和密码，此项会被忽略"
 45 | XEH_OPT_o = "是否下载原始图片（如果存在），需要登录 (当前: %(default)s)"
 46 | XEH_OPT_t = "下载线程数 (当前: %(default)d)"
 47 | XEH_OPT_l = "保存日志的路径 (当前: %(default)s)"
 48 | XEH_OPT_p = "设置代理, 可以指定多次, 当前支持的类型: socks5/4a, http(s), glype. 代理默认只用于扫描网页 (当前: %(default)s)"
 49 | XEH_OPT_proxy_image = "同时使用代理来下载图片和扫描网页 (当前: %(default)s)"
 50 | XEH_OPT_proxy_image_only = "仅使用代理来下载图片, 不用于扫描网页 (当前: %(default)s)"
 51 | XEH_OPT_d = "设置下载目录 (当前: %(default)s)"
 52 | XEH_OPT_v = "设置日志装逼等级 (当前: %(default)s)"
 53 | XEH_OPT_i = "交互模式，如果开启后台模式，此项会被忽略 (当前: %(default)s)"
 54 | XEH_OPT_r = "将图片重命名为原始名称，如果关闭则使用序号 (当前: %(default)s)"
 55 | XEH_OPT_daemon = "后台模式 (当前: %(default)s)"
 56 | XEH_OPT_rpc_interface = "设置JSON-RPC监听IP (当前: %(default)s)"
 57 | XEH_OPT_rpc_port = "设置JSON-RPC监听端口 (当前: %(default)s)"
 58 | XEH_OPT_rpc_secret = "设置JSON-RPC密钥 (当前: %(default)s)"
 59 | XEH_OPT_rpc_open_browser = "RPC服务端启动后自动打开浏览器页面 (当前: %(default)s)"
 60 | XEH_OPT_a = "下载完成后生成zip压缩包并删除下载目录 (当前: %(default)s)"
 61 | XEH_OPT_delete_task_files = "删除任务时同时删除下载的文件 (当前: %(default)s)"
 62 | XEH_OPT_j = "使用日语标题, 如果关闭则使用英文或罗马字标题 (当前: %(default)s)"
 63 | XEH_OPT_download_range = "设置下载的图片范围, 格式为 开始位置-结束位置, 或者单张图片的位置, " \
 64 | "使用逗号来分隔多个范围, 例如 5-10,15,20-25, 默认为下载所有"
 65 | XEH_OPT_timeout = "设置下载图片的超时 (当前: %(default)s秒)"
 66 | XEH_OPT_low_speed = "设置最低下载速度，低于此值将换源重新下载 (当前: %(default)s KB/s)"
 67 | XEH_OPT_f = "忽略配额判断，继续下载 (当前: %(default)s)"
 68 | XEH_OPT_auto_update = "检查并自动下载更新 (当前: %(default)s)"
 69 | XEH_OPT_update_beta_channel = "是否更新到测试分支 (当前: %(default)s)"
 70 | XEH_OPT_h = "显示本帮助信息"
 71 | XEH_OPT_version = "显示版本信息"
 72 | XEH_OPT_IGNORING_I = "后台模式已忽略 -i 参数"
 73 | 
 74 | 
 75 | PS_LOGIN = "当前没有登陆，要登陆吗 (y/n)? > "
 76 | PS_USERNAME = "输入用户名 > "
 77 | PS_PASSWD = "输入密码   > "
 78 | PS_URL = "输入地址（使用,分割下载多个）> "
 79 | PS_PROXY = "输入代理地址 (可选) > "
 80 | PS_DOWNLOAD_ORI = "是否下载原图（默认%s） (y/n)? > "
 81 | PS_RENAME_ORI  = "是否自动重命名（默认%s） (y/n)? > "
 82 | PS_MAKE_ARCHIVE = "是否制作zip压缩包（默认%s） (y/n)? > "
 83 | PS_JPN_TITLE = "是否使用日语标题（默认%s） (y/n)? > "
 84 | PS_DOWNLOAD_RANGE = "下载范围, 使用逗号分割多个范围, 回车下载全部 > "
 85 | PS_DOWNLOAD_DIR = "下载目录 (当前: %s)\n回车确认或输入新路径 > "
 86 | 
 87 | PROXY_CANDIDATE_CNT = "代理池中有%d个代理"
 88 | 
 89 | TASK_PUT_INTO_WAIT = "任务 #%s 已存在, 加入等待队列"
 90 | TASK_ERROR = "任务 #%s 发生错误: %s"
 91 | TASK_MIGRATE_EXH = "任务 #%s 使用里站地址重新下载"
 92 | TASK_TITLE = "任务 #%s 标题 %s"
 93 | TASK_WILL_DOWNLOAD_CNT = "任务 #%s 将下载%d个文件，共%d个 "
 94 | TASK_START = "任务 #%s 开始"
 95 | TASK_FINISHED = "任务 #%s 下载完成"
 96 | TASK_START_PAGE_RESCAN = "任务 #%s 图片被缩放，进行完整扫描"
 97 | # TASK_FAST_SCAN = "任务 #%s 使用快速扫描"
 98 | TASK_START_MAKE_ARCHIVE = "任务 #%s 开始打包"
 99 | TASK_MAKE_ARCHIVE_FINISHED = "任务 #%s 打包完成，保存在: %s, 用时%.1f秒"
100 | TASK_STOP_QUOTA_EXCEEDED = "任务 #%s 配额超限"
101 | TASK_STUCK = "任务 #%s 卡住了, 可能是脚本有bug, 或者网络连接太慢了"
102 | TASK_SLOW = "任务 #%s 有点慢, 可能是图片太大了，或者网络连接太慢了; 可以考虑使用代理"
103 | TASK_UNFINISHED = "任务 #%s 剩余以下图片未下载: %s"
104 | 
105 | XEH_STARTED = "xeHentai %s 已启动"
106 | XEH_LOOP_FINISHED = "程序循环已完成"
107 | XEH_LOGIN_EXHENTAI = "登录绅士"
108 | XEH_LOGIN_OK = "已成为绅士"
109 | XEH_LOGIN_FAILED = "无法登录绅士；检查输入是否有误或者换一个帐号。\n推荐在浏览器登录后使用RPC复制cookie到xeHentai (教程: http://t.cn/Rctr4Pf)"
110 | XEH_LOAD_TASKS_CNT = "从存档中读取了%d个任务"
111 | XEH_LOAD_OLD_COOKIE = "从1.x版cookie文件从读取了登录信息"
112 | XEH_DAEMON_START = "后台进程已启动，PID为%d"
113 | XEH_PLATFORM_NO_DAEMON = "后台模式不支持您的系统: %s"
114 | XEH_CLEANUP = "擦干净..."
115 | XEH_CRITICAL_ERROR = "xeHentai 抽风啦:\n%s"
116 | XEH_DOWNLOAD_ORI_NEED_LOGIN = "下载原图需要登录"
117 | XEH_FILE_DOWNLOADED = "绅士-{} 已下载图片 #{} {}"
118 | XEH_RENAME_HAS_ERRORS = "部分图片重命名失败:\n%s"
119 | XEH_DOWNLOAD_HAS_ERROR = "绅士-%s 下载图片 #%s 时出错: %s, 将在稍后重试"
120 | XEH_SCAN_FAILED = "%s 扫描页面 %s 失败: %s"
121 | 
122 | RPC_STARTED = "RPC服务器监听在 %s:%d"
123 | RPC_TOO_OPEN = "RPC服务器监听在公网IP (%s)，为了安全起见应该设置rpc_secret"
124 | RPC_CANNOT_BIND = "RPC服务器无法启动：%s"
125 | RPC_WEBUI_PATH = "WebUI 地址为 %s 或者 https://xehentai.yooooo.us"
126 | 
127 | SESSION_LOAD_EXCEPTION = "读取存档时遇到错误: %s"
128 | SESSION_WRITE_EXCEPTION = "写入存档时遇到错误: %s"
129 | 
130 | THREAD = "绅士"
131 | THREAD_UNCAUGHT_EXCEPTION = "绅士-%s 未捕获的异常\n%s"
132 | THREAD_MAY_BECOME_ZOMBIE = "绅士-%s 可能变成了丧尸"
133 | THREAD_SWEEP_OUT = "绅士-%s 挂了, 不再理它"
134 | THREAD_SPEED_TOO_LOW = "绅士-%s 下载速度只有 %s/s, 低于 %s/s, 将在稍后重试"
135 | 
136 | QUEUE = "队列"
137 | 
138 | PROXY_DISABLE_BANNED = "禁用了一个被ban的代理，将在约%s秒后恢复"
139 | 
140 | UPDATE_CHANNEL = "更新渠道为: %s"
141 | UPDATE_DEV_CHANNEL = "测试版"
142 | UPDATE_RELEASE_CHANNEL = "正式版"
143 | UPDATE_FAILED = "更新时遇到错误: %s"
144 | UPDATE_COMPLETE = "更新完成，请重新启动程序应用更新"
145 | UPDATE_NO_UPDATE = "没有可用更新"
146 | UPDATE_AVAILABLE = "发现可用的更新: 发布于 %s \"%s\" (%s)"
147 | UPDATE_DOWNLOAD_MANUALLY = "可以从 https://dl.yooooo.us/share/xeHentai/ 下载更新"
148 | 


--------------------------------------------------------------------------------
/xeHentai/i18n/zh_hant.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | from ..const import *
  4 | 
  5 | err_msg = {
  6 |     ERR_URL_NOT_RECOGNIZED: "網址不夠紳士",
  7 |     ERR_CANT_DOWNLOAD_EXH: "需要登錄後才能下載里站",
  8 |     ERR_ONLY_VISIBLE_EXH: "這個本子只有里站能看到",
  9 |     ERR_MALFORMED_HATHDL: "hathdl文件有貓餅，解析失敗",
 10 |     ERR_GALLERY_REMOVED: "這個本子被移除了，大概里站能看到",
 11 |     ERR_KEY_EXPIRED: "下載鏈接不太正常",
 12 |     ERR_NO_PAGEURL_FOUND: "沒有找到頁面鏈接，網站改版了嘛？",
 13 |     ERR_CONNECTION_ERROR: "連接有問題？",
 14 |     ERR_IP_BANNED: "IP被ban了, 恢復時間: %s",
 15 |     ERR_IMAGE_BROKEN: "下載的圖片有貓餅",
 16 |     ERR_SCAN_REGEX_FAILED: "網頁解析失敗",
 17 |     ERR_QUOTA_EXCEEDED: "配額超限",
 18 |     ERR_TASK_NOT_FOUND: "沒有該GUID對應的任務",
 19 |     ERR_TASK_LEVEL_UNDEF: "任務過濾等級不存在",
 20 |     ERR_DELETE_RUNNING_TASK: "無法刪除運行中的任務",
 21 |     ERR_TASK_CANNOT_PAUSE: "這個任務無法被暫停",
 22 |     ERR_TASK_CANNOT_RESUME: "這個任務無法被恢復",
 23 |     ERR_CANNOT_CREATE_DIR: "無法創建文件夾 %s",
 24 |     ERR_CANNOT_MAKE_ARCHIVE: "無法製作壓縮包 %s",
 25 |     ERR_NOT_RANGE_FORMAT: "'%s'不符合範圍的格式, 正確的格式為 1-3 或者 5",
 26 | #    ERR_HATHDL_NOTFOUND: "hathdl文件未找到"
 27 |     ERR_RPC_PARSE_ERROR: "Parse error.",
 28 |     ERR_RPC_INVALID_REQUEST: "Invalid request.",
 29 |     ERR_RPC_METHOD_NOT_FOUND: "Method not found.",
 30 |     ERR_RPC_INVALID_PARAMS: "Invalid method parameter(s).",
 31 |     ERR_RPC_UNAUTHORIZED: "Unauthorized",
 32 |     ERR_RPC_EXEC_ERROR: "",
 33 |     ERR_SAVE_SESSION_FAILED: "",
 34 | }
 35 | 
 36 | ERR_NOMSG = "未指定的錯誤，錯誤號 %d"
 37 | 
 38 | XEH_OPT_DESC = "紳♂士下載器"
 39 | XEH_OPT_EPILOG = "如果參數未指定，則使用config.py中的默認值; " \
 40 |         "討論和反饋問題：https://yooooo.us/2013/xehentai"
 41 | XEH_OPT_URLS = "下載頁的網址"
 42 | XEH_OPT_u = "用戶名"
 43 | XEH_OPT_k = "密碼"
 44 | XEH_OPT_c = "Cookie字符串，如果指定了用戶名和密碼，此項會被忽略"
 45 | XEH_OPT_o = "是否下載原始圖片（如果存在），需要登錄 (當前: %(default)s)"
 46 | XEH_OPT_t = "下載線程數 (當前: %(default)d)"
 47 | XEH_OPT_l = "保存日誌的路徑 (當前: %(default)s)"
 48 | XEH_OPT_p = "設置代理, 可以指定多次, 當前支持的類型: socks5/4a, http(s), glype. 代理默認只用於掃描網頁 (當前: %(default)s)"
 49 | XEH_OPT_proxy_image = "同時使用代理來下載圖片和掃描網頁 (當前: %(default)s)"
 50 | XEH_OPT_proxy_image_only = "僅使用代理來下載圖片, 不用於掃描網頁 (當前: %(default)s)"
 51 | XEH_OPT_d = "設置下載目錄 (當前: %(default)s)"
 52 | XEH_OPT_v = "設置日誌裝逼等級 (當前: %(default)s)"
 53 | XEH_OPT_i = "交互模式，如果開啟後台模式，此項會被忽略 (當前: %(default)s)"
 54 | XEH_OPT_r = "將圖片重命名為原始名稱，如果關閉則使用序號 (當前: %(default)s)"
 55 | XEH_OPT_daemon = "後台模式 (當前: %(default)s)"
 56 | XEH_OPT_rpc_interface = "設置JSON-RPC監聽IP (當前: %(default)s)"
 57 | XEH_OPT_rpc_port = "設置JSON-RPC監聽埠 (當前: %(default)s)"
 58 | XEH_OPT_rpc_secret = "設置JSON-RPC密鑰 (當前: %(default)s)"
 59 | XEH_OPT_rpc_open_browser = "RPC服務端啟動後自動打開瀏覽器頁面 (當前: %(default)s)"
 60 | XEH_OPT_a = "下載完成後生成zip壓縮包並刪除下載目錄 (當前: %(default)s)"
 61 | XEH_OPT_delete_task_files = "刪除任務時同時刪除下載的文件 (當前: %(default)s)"
 62 | XEH_OPT_j = "使用日語標題, 如果關閉則使用英文或羅馬字標題 (當前: %(default)s)"
 63 | XEH_OPT_download_range = "設置下載的圖片範圍, 格式為 開始位置-結束位置, 或者單張圖片的位置, " \
 64 | "使用逗號來分隔多個範圍, 例如 5-10,15,20-25, 默認為下載所有"
 65 | XEH_OPT_timeout = "設置下載圖片的超時 (當前: %(default)s秒)"
 66 | XEH_OPT_low_speed = "設置最低下載速度，低於此值將換源重新下載 (當前: %(default)s KB/s)"
 67 | XEH_OPT_f = "忽略配額判斷，繼續下載 (當前: %(default)s)"
 68 | XEH_OPT_auto_update = "檢查並自動下載更新 (當前: %(default)s)"
 69 | XEH_OPT_update_beta_channel = "是否更新到測試分支 (當前: %(default)s)"
 70 | XEH_OPT_h = "顯示本幫助信息"
 71 | XEH_OPT_version = "顯示版本信息"
 72 | XEH_OPT_IGNORING_I = "後台模式已忽略 -i 參數"
 73 | 
 74 | 
 75 | PS_LOGIN = "當前沒有登陸，要登陸嗎 (y/n)? > "
 76 | PS_USERNAME = "輸入用戶名 > "
 77 | PS_PASSWD = "輸入密碼   > "
 78 | PS_URL = "輸入地址（使用,分割下載多個）> "
 79 | PS_PROXY = "輸入代理地址 (可選) > "
 80 | PS_DOWNLOAD_ORI = "是否下載原圖（默認%s） (y/n)? > "
 81 | PS_RENAME_ORI  = "是否自動重命名（默認%s） (y/n)? > "
 82 | PS_MAKE_ARCHIVE = "是否製作zip壓縮包（默認%s） (y/n)? > "
 83 | PS_JPN_TITLE = "是否使用日語標題（默認%s） (y/n)? > "
 84 | PS_DOWNLOAD_RANGE = "下載範圍, 使用逗號分割多個範圍, 回車下載全部 > "
 85 | PS_DOWNLOAD_DIR = "下載目錄 (當前: %s)\n回車確認或輸入新路徑 > "
 86 | 
 87 | PROXY_CANDIDATE_CNT = "代理池中有%d個代理"
 88 | 
 89 | TASK_PUT_INTO_WAIT = "任務 #%s 已存在, 加入等待隊列"
 90 | TASK_ERROR = "任務 #%s 發生錯誤: %s"
 91 | TASK_MIGRATE_EXH = "任務 #%s 使用里站地址重新下載"
 92 | TASK_TITLE = "任務 #%s 標題 %s"
 93 | TASK_WILL_DOWNLOAD_CNT = "任務 #%s 將下載%d個文件，共%d個 "
 94 | TASK_START = "任務 #%s 開始"
 95 | TASK_FINISHED = "任務 #%s 下載完成"
 96 | TASK_START_PAGE_RESCAN = "任務 #%s 圖片被縮放，進行完整掃描"
 97 | # TASK_FAST_SCAN = "任務 #%s 使用快速掃描"
 98 | TASK_START_MAKE_ARCHIVE = "任務 #%s 開始打包"
 99 | TASK_MAKE_ARCHIVE_FINISHED = "任務 #%s 打包完成，保存在: %s, 用時%.1f秒"
100 | TASK_STOP_QUOTA_EXCEEDED = "任務 #%s 配額超限"
101 | TASK_STUCK = "任務 #%s 卡住了, 可能是腳本有bug, 或者網絡連接太慢了"
102 | TASK_SLOW = "任務 #%s 有點慢, 可能是圖片太大了，或者網絡連接太慢了; 可以考慮使用代理"
103 | TASK_UNFINISHED = "任務 #%s 剩餘以下圖片未下載: %s"
104 | 
105 | XEH_STARTED = "xeHentai %s 已啟動"
106 | XEH_LOOP_FINISHED = "程序循環已完成"
107 | XEH_LOGIN_EXHENTAI = "登錄紳士"
108 | XEH_LOGIN_OK = "已成為紳士"
109 | XEH_LOGIN_FAILED = "無法登錄紳士；檢查輸入是否有誤或者換一個帳號。\n推薦在瀏覽器登錄後使用RPC複製cookie到xeHentai (教程: http://t.cn/Rctr4Pf)"
110 | XEH_LOAD_TASKS_CNT = "從存檔中讀取了%d個任務"
111 | XEH_LOAD_OLD_COOKIE = "從1.x版cookie文件從讀取了登錄信息"
112 | XEH_DAEMON_START = "後台進程已啟動，PID為%d"
113 | XEH_PLATFORM_NO_DAEMON = "後台模式不支持您的系統: %s"
114 | XEH_CLEANUP = "擦乾淨..."
115 | XEH_CRITICAL_ERROR = "xeHentai 抽風啦:\n%s"
116 | XEH_DOWNLOAD_ORI_NEED_LOGIN = "下載原圖需要登錄"
117 | XEH_FILE_DOWNLOADED = "紳士-{} 已下載圖片 #{} {}"
118 | XEH_RENAME_HAS_ERRORS = "部分圖片重命名失敗:\n%s"
119 | XEH_DOWNLOAD_HAS_ERROR = "紳士-%s 下載圖片 #%s 時出錯: %s, 將在稍後重試"
120 | XEH_SCAN_FAILED = "%s 掃描頁面 %s 失敗: %s"
121 | 
122 | RPC_STARTED = "RPC伺服器監聽在 %s:%d"
123 | RPC_TOO_OPEN = "RPC伺服器監聽在公網IP (%s)，為了安全起見應該設置rpc_secret"
124 | RPC_CANNOT_BIND = "RPC伺服器無法啟動：%s"
125 | RPC_WEBUI_PATH = "WebUI 地址為 %s 或者 https://xehentai.yooooo.us"
126 | 
127 | SESSION_LOAD_EXCEPTION = "讀取存檔時遇到錯誤: %s"
128 | SESSION_WRITE_EXCEPTION = "寫入存檔時遇到錯誤: %s"
129 | 
130 | THREAD = "紳士"
131 | THREAD_UNCAUGHT_EXCEPTION = "紳士-%s 未捕獲的異常\n%s"
132 | THREAD_MAY_BECOME_ZOMBIE = "紳士-%s 可能變成了喪屍"
133 | THREAD_SWEEP_OUT = "紳士-%s 掛了, 不再理它"
134 | THREAD_SPEED_TOO_LOW = "紳士-%s 下載速度只有 %s/s, 低於 %s/s, 將在稍後重試"
135 | 
136 | QUEUE = "隊列"
137 | 
138 | PROXY_DISABLE_BANNED = "禁用了一個被ban的代理，將在約%s秒後恢復"
139 | 
140 | UPDATE_CHANNEL = "更新渠道為: %s"
141 | UPDATE_DEV_CHANNEL = "測試版"
142 | UPDATE_RELEASE_CHANNEL = "正式版"
143 | UPDATE_FAILED = "更新時遇到錯誤: %s"
144 | UPDATE_COMPLETE = "更新完成，請重新啟動程序應用更新"
145 | UPDATE_NO_UPDATE = "沒有可用更新"
146 | UPDATE_AVAILABLE = "發現可用的更新: 發布於 %s \"%s\" (%s)"
147 | UPDATE_DOWNLOAD_MANUALLY = "可以從 https://dl.yooooo.us/share/xeHentai/ 下載更新"
148 | 


--------------------------------------------------------------------------------
/xeHentai/proxy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | import re
  7 | import time
  8 | import random
  9 | from requests.exceptions import ConnectTimeout, ConnectionError, ProxyError, InvalidSchema
 10 | from requests.packages.urllib3.exceptions import ProxySchemeUnknown
 11 | from . import util
 12 | from .const import *
 13 | 
 14 | MAX_FAIL = 5
 15 | 
 16 | class PoolException(Exception):
 17 |     pass
 18 | 
 19 | class LowSpeedException(Exception):
 20 |     pass
 21 | 
 22 | class Pool(object):
 23 |     def __init__(self, disable_policy = None):
 24 |         self.proxies = {}
 25 |         self.errors = {}
 26 |         if not disable_policy:
 27 |             self.disable_policy = lambda x, y: y >= MAX_FAIL
 28 |         else:
 29 |             self.disable_policy = disable_policy
 30 |         self.disabled = {} # key: expire
 31 | 
 32 |     def proxied_request(self, session):
 33 |         for d in self.disabled:
 34 |             if 0 < self.disabled[d] < time.time():
 35 |                 try:
 36 |                     del self.disabled[d]
 37 |                 except:
 38 |                     pass
 39 |         l = [i for i in self.proxies.keys() if i not in self.disabled]
 40 |         if not l:
 41 |             raise PoolException("try to use proxy but no proxies avaliable")
 42 |         # _ = self.proxies[random.choice(l)]
 43 |         _ = self.proxies[l[0]]
 44 |         return _[0](session), self.not_good(l[0])
 45 | 
 46 |     def has_available_proxies(self):
 47 |         return len([i for i in self.proxies.keys() if i not in self.disabled]) == 0
 48 | 
 49 |     def not_good(self, addr):
 50 |         def n(weight = MAX_FAIL, expire = 0):
 51 |             self.proxies[addr][2] += weight
 52 |             if self.disable_policy(*self.proxies[addr][1:]):
 53 |                 # add to disabled set
 54 |                 self.disabled[addr] = expire + time.time()
 55 |         return n
 56 | 
 57 |     def trace_proxy(self, addr, weight = 1, check_func = None, exceptions = []):
 58 |         def _(func):
 59 |             def __(*args, **kwargs):
 60 |                 ex = None
 61 |                 try:
 62 |                     r = func(*args, **kwargs)
 63 |                 except Exception as _ex:
 64 |                     ex = _ex
 65 |                     for e in [ConnectTimeout, ConnectionError, ProxyError] + exceptions:
 66 |                         if isinstance(ex, e):
 67 |                             # ignore BadStatusLine, this doesn't mean the proxy is bad
 68 |                             if e == ConnectionError and 'BadStatusLine' in str(e):
 69 |                                 continue
 70 |                             self.proxies[addr][2] += weight
 71 |                             break
 72 |                 else:
 73 |                     if check_func and not check_func(r):
 74 |                         self.proxies[addr][2] += weight
 75 |                     else:
 76 |                         # suc count + 1
 77 |                         self.proxies[addr][1] += weight
 78 |                 if self.disable_policy(*self.proxies[addr][1:]):
 79 |                     # add to disabled set and never expire
 80 |                     self.disabled[addr] = 0
 81 |                 # print(self.proxies[addr])
 82 |                 if ex:
 83 |                     # import traceback
 84 |                     # traceback.print_exc()
 85 |                     raise ex
 86 |                 return r
 87 |             return __
 88 |         return _
 89 | 
 90 |     def add_proxy(self, addr):
 91 |         if re.match("socks[45][ah]*://([^:^/]+)(\:\d{1,5})*/*$", addr):
 92 |             p = socks_proxy(addr, self.trace_proxy)
 93 |         elif re.match("https*://([^:^/]+)(\:\d{1,5})*/*$", addr):
 94 |             p = http_proxy(addr, self.trace_proxy)
 95 |         elif re.match("https*://([^:^/]+)(\:\d{1,5})*/.+\.php\?.*b=.+", addr):
 96 |             p = glype_proxy(addr, self.trace_proxy)
 97 |         else:
 98 |             raise ValueError("%s is not an acceptable proxy address" % addr)
 99 |         self.proxies[addr] = [p, 0, 0]
100 | 
101 | def socks_proxy(addr, trace_proxy):
102 |     proxy_info = {
103 |         'http':addr,
104 |         'https':addr
105 |     }
106 |     def handle(session):
107 |         @trace_proxy(addr, exceptions = [ProxySchemeUnknown, InvalidSchema])
108 |         def f(*args, **kwargs):
109 |             kwargs.update({'proxies': proxy_info})
110 |             return session.request(*args, **kwargs)
111 |         return f
112 |     return handle
113 | 
114 | def http_proxy(addr, trace_proxy):
115 |     proxy_info = {
116 |         'http':addr,
117 |         'https':addr
118 |     }
119 |     def handle(session):
120 |         @trace_proxy(addr)
121 |         def f(*args, **kwargs):
122 |             kwargs.update({'proxies': proxy_info})
123 |             return session.request(*args, **kwargs)
124 |         return f
125 |     return handle
126 | 
127 | def glype_proxy(addr, trace_proxy):
128 |     g_session = {"s":""}
129 |     def handle(session, g_session = g_session):
130 |         import urllib
131 |         argname = re.findall('[&\?]([a-zA-Z\._]+)=[^\d]*', addr)[0]
132 |         bval = re.findall('[&\?]b=(\d*)', addr)
133 |         bval = bval[0] if bval else '4'
134 |         server, inst_loc, script = re.findall('(https*://[^/]+)/(.*?)([^/]+\.php)', addr)[0]
135 |         urlre = re.compile('/%s%s\?u=([^&"\']+)&[^"\']+' % (inst_loc, script))
136 |         def mkurl(url):
137 |             return "%s/%s%s?%s=%s&b=%s&f=norefer" % (
138 |                 server, inst_loc, script, argname,
139 |                 (urllib.parse if PY3K else urllib).quote_plus(url), bval)
140 |         @trace_proxy(addr)
141 |         def f(*args, **kwargs):
142 |             # change url
143 |             url = args[1]
144 |             args = (args[0], mkurl(url),)
145 |             kwargs['headers'] = dict(session.headers)
146 |             # anti hotlinking
147 |             kwargs['headers'].update({'Referer':"%s/%s%s" % (server, inst_loc, script)})
148 |             _coo_new = dict(g_session) if g_session['s'] else {}
149 |             if 'Cookie' in kwargs['headers']:
150 |                 site = re.findall('https*://([^/]+)/*', url)[0]
151 |                 _coo_old = util.parse_cookie(kwargs['headers']['Cookie'])
152 |                 for k in _coo_old:
153 |                     _coo_new["c[%s][/][%s]" % (site, k)] = _coo_old[k]
154 |                 kwargs['headers']['Cookie'] = util.make_cookie(_coo_new)
155 |             tried = 0
156 |             while True:
157 |                 if tried == 2:
158 |                     raise PoolException("can't bypass glype https warning")
159 |                 rt = session.request(*args, **kwargs)
160 |                 if '<input type="hidden" name="action" value="sslagree">' not in rt.text:
161 |                     break
162 |                 rt = session.request("GET", "%s/%sincludes/process.php?action=sslagree" % (server, inst_loc),
163 |                     allow_redirects = False, **kwargs)
164 |                 if rt.headers.get('set-cookie'):
165 |                     _coo_new.update(util.parse_cookie(rt.headers.get('set-cookie').replace(",", ";")))
166 |                     kwargs['headers']['Cookie'] = util.make_cookie(_coo_new)
167 |                     if 's' in _coo_new:
168 |                         g_session["s"] = _coo_new['s']
169 |                         # print(g_session)
170 |                 tried += 1
171 | 
172 |             if rt.headers.get('set-cookie'):
173 |                 coo = util.parse_cookie(rt.headers.get('set-cookie').replace(",", ";"))
174 |                 for k in list(coo.keys()):
175 |                     _ = re.findall('c\[[^]]+\]\[[^]]+\]\[([^]]+)\]', k)
176 |                     if _:
177 |                         coo[_[0]] = coo[k]
178 |                 rt.headers['set-cookie'] = util.make_cookie(coo)
179 |             # change url back, only change on text/* mime types
180 |             rt.url = url
181 |             if rt.headers.get('content-type').startswith("text"):
182 |                 if PY3K:
183 |                     rt._content = rt._content.decode('utf-8')
184 |                 _ = re.match('<div id="error">(.*?)</div>', rt.content)
185 |                 if _:
186 |                     raise PoolException("glype returns: %s" % _[0])
187 |                 # change transformed url back
188 |                 rt._content = urlre.sub(lambda x:(urllib.parse if PY3K else urllib).unquote(x.group(1)), rt._content)
189 |                 if PY3K:
190 |                     rt._content = rt._content.encode('utf-8')
191 |             return rt
192 | 
193 |         return f
194 |     return handle
195 | 
196 | if __name__ == '__main__':
197 |     import requests
198 |     p = Pool()
199 |     p.add_proxy("sock5://127.0.0.1:16961")
200 |     print(p.proxied_request(requests.Session())("GET", "http://ipip.tk", headers = {}, timeout = 2).headers)
201 | 


--------------------------------------------------------------------------------
/xeHentai/rpc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | import re
  7 | import time
  8 | import json
  9 | import zipfile
 10 | import traceback
 11 | from hashlib import md5
 12 | from threading import Thread
 13 | import zlib
 14 | import requests
 15 | import pickle
 16 | from .const import *
 17 | from .const import __version__
 18 | from .i18n import i18n
 19 | if PY3K:
 20 |     from socketserver import ThreadingMixIn
 21 |     from http.server import HTTPServer, BaseHTTPRequestHandler
 22 |     from io import BytesIO as StringIO
 23 |     from urllib.parse import urlparse
 24 | else:
 25 |     from SocketServer import ThreadingMixIn
 26 |     from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
 27 |     from cStringIO import StringIO 
 28 |     from urlparse import urlparse
 29 | 
 30 | cmdre = re.compile("([a-z])([A-Z])")
 31 | pathre = re.compile("/(?:jsonrpc|img/|zip/|static/|ui/$)")
 32 | staticre = re.compile("/static/")
 33 | imgpathre = re.compile("/img/")
 34 | zippathre = re.compile("/zip/")
 35 | 
 36 | version_str = "xeHentai/%s" % __version__
 37 | 
 38 | class RPCServer(Thread):
 39 |     def __init__(self, xeH, bind_addr, secret = None, open_browser = True, logger = None, exit_check = None):
 40 |         Thread.__init__(self, name = "rpc")
 41 |         Thread.setDaemon(self, True)
 42 |         self.xeH = xeH
 43 |         self.bind_addr = bind_addr
 44 |         self.secret = secret
 45 |         self.logger = logger
 46 |         self.server = None
 47 |         self.open_browser = open_browser
 48 |         self._exit = exit_check if exit_check else lambda x:False
 49 | 
 50 |     def run(self):
 51 |         try:
 52 |             self.server = ThreadedHTTPServer(self.bind_addr, lambda *x: Handler(self.xeH, self.secret, *x))
 53 |         except Exception as ex:
 54 |             self.logger.error(i18n.RPC_CANNOT_BIND % traceback.format_exc())
 55 |         else:
 56 |             self.logger.info(i18n.RPC_STARTED % (self.bind_addr[0], self.bind_addr[1]))
 57 |             url = "http://%s:%s/ui/#host=%s,port=%s,https=no" % (
 58 |                 self.bind_addr[0], self.bind_addr[1],
 59 |                 self.bind_addr[0], self.bind_addr[1]
 60 |             )
 61 |             if self.secret:
 62 |                 url = url + ",token=" + self.secret
 63 |             if self.open_browser:
 64 |                 import webbrowser
 65 |                 webbrowser.open(url)
 66 |             else:
 67 |                 self.logger.info(i18n.RPC_WEBUI_PATH % url)
 68 |             while not self._exit("rpc"):
 69 |                 self.server.handle_request()
 70 | 
 71 | def is_readable_obj(obj):
 72 |     return hasattr(obj, "read")
 73 | 
 74 | def is_str_obj(obj):
 75 |     if PY3K:
 76 |         return isinstance(obj, str)
 77 |     return isinstance(obj, basestring)
 78 | 
 79 | def hash_link(secret, url):
 80 |     _ = "%s-xehentai-%s" % (secret if secret else "", url)
 81 |     if PY3K:
 82 |         _ = _.encode('utf-8')
 83 |     return md5(_).hexdigest()[:8]
 84 | 
 85 | def gen_thumbnail(fh, args):
 86 |     # returns a new file handler if resized
 87 |     # and a boolean indicates there'e error
 88 |     try:
 89 |         import PIL.Image as Image
 90 |     except:
 91 |         return fh, True
 92 |     if 'w' not in args and 'h' not in args:
 93 |         return fh, False
 94 |     size = (int(args['w']) if 'w' in args else int(args['h']),
 95 |             int(args['h']) if 'h' in args else int(args['w']))
 96 |     if not is_readable_obj(fh):
 97 |         fh = StringIO(fh)
 98 |     with Image.open(fh) as img:
 99 |         img.thumbnail(size)
100 |         ret_fh = StringIO()
101 |         img.save(ret_fh, format=img.format)
102 |         ret = ret_fh.getvalue()
103 |         ret_fh.close()
104 |         fh.close()
105 |         return ret, False
106 |     
107 | def jsonrpc_resp(request, ret = None, error_code = None, error_msg = None):
108 |     r = {
109 |         "id":None if not request["id"] else request["id"],
110 |         "jsonrpc":"2.0",
111 |     }
112 |     if error_code:
113 |         r['error'] = {
114 |             'code':error_code,
115 |             "message":i18n.c(error_code) if not error_msg else error_msg
116 |         }
117 |     else:
118 |         r['result'] = ret
119 |     return json.dumps(r)
120 | 
121 | def path_filter(func):
122 |     def f(self):
123 |         if not pathre.match(self.path):
124 |             self.send_response(404)
125 |             self.send_header("Access-Control-Allow-Origin", "*")
126 |             self.end_headers()
127 |             self.wfile.write(b'\n')
128 |             return
129 |         func(self)
130 |     return f
131 | 
132 | def load_cache():
133 |     if os.path.exists(STATIC_CACHE_FILE):
134 |         try:
135 |             with open(STATIC_CACHE_FILE, "rb") as f:
136 |                 r = zlib.decompress(f.read())
137 |                 r = pickle.loads(r)
138 |                 if 'v' in r or r['v'] == STATIC_CACHE_VERSION:
139 |                     return r
140 |         except:
141 |             pass
142 |     return { "v": STATIC_CACHE_VERSION }
143 | 
144 | def save_cache(static_cache):
145 |     r = pickle.dumps(static_cache)
146 |     r = zlib.compress(r)
147 |     with open(STATIC_CACHE_FILE, "wb") as f:
148 |         f.write(r)
149 | 
150 | static_cache = load_cache()
151 | class Handler(BaseHTTPRequestHandler):
152 | 
153 |     def __init__(self, xeH, secret, *args):
154 |         self.secret = secret
155 |         self.args = args
156 |         self.xeH = xeHentaiRPCExtended(xeH, secret)
157 |         self.http = requests.Session()
158 |         BaseHTTPRequestHandler.__init__(self, *args)
159 | 
160 |     def version_string(self):
161 |         return version_str
162 |     
163 |     def serve_file(self, f):
164 |         if hasattr(self.xeH, "_monitor"):
165 |             _task = self.xeH._monitor.task
166 |             # needed to lock between archiver
167 |             _task._f_lock.acquire()
168 |         f.seek(0, os.SEEK_END)
169 |         size = f.tell()
170 |         self.xeH.logger.verbose("GET %s 200 %d %s" % (self.path, size, self.client_address[0]))
171 |         self.send_header("Content-Length", size)
172 |         f.seek(0, os.SEEK_SET)
173 |         self.end_headers()
174 |         while True:
175 |             buf = f.read(51200)
176 |             if not buf:
177 |                 break
178 |             self.wfile.write(buf)
179 |         if hasattr(self.xeH, "_monitor"):
180 |             _task._f_lock.release()
181 |         return size
182 | 
183 |     def do_OPTIONS(self):
184 |         self.send_response(200)
185 |         self.send_header("Access-Control-Allow-Origin", "*")
186 |         self.send_header("Access-Control-Allow-Headers", "Content-Type")
187 |         self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
188 |         self.send_header("Access-Control-Max-Age", "1728000")
189 |         self.end_headers()
190 |         self.wfile.write(b'\n')
191 | 
192 |     @path_filter
193 |     def do_GET(self):
194 |         code = 200
195 |         rt = b''
196 |         mime = "text/html"
197 |         path = self.path
198 |         while True:
199 |             if imgpathre.match(path):
200 |                 args = dict(q.split("=") for q in urlparse(path).query.split("&") if q)
201 |                 _ = urlparse(path).path.split("/")
202 |                 if len(_) < 5:
203 |                     code = 400
204 |                     break
205 |                 _, _, _hash, guid, fid = _[:5]
206 |                 right_hash = hash_link(self.secret, "%s/%s" % (guid, fid))
207 |                 if right_hash != _hash:
208 |                     self.xeH.logger.warning("RPC: hash mismatch %s != %s" % (right_hash, _hash))
209 |                     code = 403
210 |                     break
211 |                 path, f, mime = self.xeH._get_image_path(guid, fid)
212 |                 if not f or not os.path.exists(os.path.join(path, f)):
213 |                     zipf = "%s.zip" % path
214 |                     if not os.path.exists(zipf):
215 |                         self.xeH.logger.warning("RPC: can't find %s" % f)
216 |                         code = 404
217 |                         break
218 |                     else:
219 |                         z = zipfile.ZipFile(zipf)
220 |                         try:
221 |                             rt = z.read(f)
222 |                         except Exception as ex:
223 |                             self.xeH.logger.warning("RPC: can't find %s in zipfile: %s" % (f, ex))
224 |                             code = 404
225 |                             break
226 |                         z.close()
227 |                 else:
228 |                     rt = open(os.path.join(path, f), 'rb')
229 |                 rt, _error = gen_thumbnail(rt, args)
230 |                 if _error:
231 |                     self.xeH.logger.warning("RPC: PIL needed for generating thumbnail")
232 |             elif zippathre.match(path):
233 |                 # args = urlparse(_).query
234 |                 _ = urlparse(path).path.split("/")
235 |                 if len(_) < 5:
236 |                     code = 400
237 |                     break
238 |                 _, _, _hash, guid, fname = _[:5]
239 |                 fname = fname.split('?')[0]
240 |                 right_hash = hash_link(self.secret, "%s" % guid)
241 |                 if right_hash != _hash:
242 |                     self.xeH.logger.warning("RPC: hash mismatch %s != %s" % (right_hash, _hash))
243 |                     code = 403
244 |                     break
245 |                 f = self.xeH._get_archive_path(guid)
246 |                 mime = 'application/zip'
247 |                 if not f or not os.path.exists(f):
248 |                     self.xeH.logger.warning("RPC: can't find %s" % f)
249 |                     code = 404
250 |                     break
251 |                 rt = open(f, 'rb')
252 |             elif path == "/ui/" or staticre.match(path):
253 |                 if path == "/ui/":
254 |                     path = "/"
255 |                 while True:
256 |                     cache_rt = None
257 |                     should_clear_cache = False
258 |                     headers = { "User-Agent":  version_str }
259 |                     if path in static_cache:
260 |                         cache_rt, mime, tm, lms = static_cache[path]
261 |                         if PY3K and not isinstance(cache_rt, bytes):
262 |                             cache_rt = bytes(cache_rt, 'ascii')
263 |                         if time.time() - STATIC_CACHE_TTL < tm:
264 |                             rt = StringIO(cache_rt)
265 |                             break
266 |                         should_clear_cache = True
267 |                         headers['If-Modified-Since'] = lms
268 | 
269 |                     req_start_tm = time.time()
270 |                     r = None
271 |                     try:
272 |                         r = self.http.get("http://xehentai.yooooo.us%s?_=%d" %(path, time.time()),
273 |                             headers=headers, timeout=10)
274 |                     except Exception as ex:
275 |                         self.xeH.logger.warn("error pulling %s from remote server: %s" % (path, ex))
276 |                     self.xeH.logger.verbose("%.2fs taken to pull %s from remote server %s bytes" % (
277 |                                             time.time() - req_start_tm, path, r and len(r.content) or 0))
278 |                     if r and r.status_code == 200:
279 |                         rt = StringIO(r.content)
280 |                         mime = r.headers['Content-type']
281 |                         if should_clear_cache:
282 |                             # clear all keys, since the js/css hash may change
283 |                             static_cache.clear()
284 |                         static_cache[path] = [r.content, mime, time.time(), r.headers['Last-Modified']]
285 |                         save_cache(static_cache)
286 |                     elif r and r.status_code == 304:
287 |                         # so this is tricky: if we hit /ui/ first and it's not expired
288 |                         # then all other assets should not expire
289 |                         if path == "/":
290 |                             for k in static_cache:
291 |                                 if k != "v":
292 |                                     static_cache[k][2] = time.time()
293 |                             save_cache(static_cache)
294 |                         rt = StringIO(cache_rt)
295 |                     elif cache_rt:
296 |                         self.xeH.logger.warn("serving stale cache %s" % (path))
297 |                         rt = StringIO(cache_rt)
298 |                     else:
299 |                         rt = jsonrpc_resp({"id":None}, error_code = ERR_RPC_INVALID_REQUEST)
300 |                     break
301 |             else:
302 |                 # fallback to rpc request
303 |                 rt = jsonrpc_resp({"id":None}, error_code = ERR_RPC_INVALID_REQUEST)
304 |                 mime = "application/json-rpc"
305 |             break
306 | 
307 |         self.send_response(code)
308 |         self.send_header("Access-Control-Allow-Origin", "*")
309 |         self.send_header("Content-Type", mime)
310 |         
311 |         if is_readable_obj(rt):
312 |             size = self.serve_file(rt)
313 |             rt.close()
314 |         else:
315 |             self.xeH.logger.verbose("GET %s 200 %d %s" % (self.path, len(rt), self.client_address[0]))
316 |             self.send_header("Content-Length", len(rt))
317 |             self.end_headers()
318 |             self.wfile.write(rt)
319 |         self.wfile.write(b'\n')
320 |         return
321 | 
322 |     @path_filter
323 |     def do_POST(self):
324 |         _get_header = lambda h: self.headers.get_all(h)[0] if PY3K else \
325 |             self.headers.getheader(h)
326 |         d = self.rfile.read(int(_get_header('Content-Length')))
327 |         code = 200
328 |         rt = b''
329 |         while True:
330 |             try:
331 |                 if PY3K:
332 |                     d = d.decode('utf-8')
333 |                 j = json.loads(d)
334 |                 assert('method' in j and j['method'] != None and 'id' in j)
335 |             except ValueError:
336 |                 code = 400
337 |                 rte = jsonrpc_resp({"id":None}, error_code = ERR_RPC_PARSE_ERROR)
338 |                 break
339 |             except AssertionError:
340 |                 code = 400
341 |                 rt = jsonrpc_resp({"id":None}, error_code = ERR_RPC_INVALID_REQUEST)
342 |                 break
343 |             cmd = re.findall("xeH\.(.+)", j['method'])
344 |             if not cmd:
345 |                 code = 404
346 |                 rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_METHOD_NOT_FOUND)
347 |                 break
348 |             # let's make fooBar to foo_bar
349 |             cmd_r = cmdre.sub(lambda m: "%s_%s" % (m.group(1), m.group(2).lower()), cmd[0])
350 |             if not hasattr(self.xeH, cmd_r) or cmd_r.startswith("_"):
351 |                 code = 404
352 |                 rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_METHOD_NOT_FOUND)
353 |                 break
354 |             params = ([], {}) if 'params' not in j else j['params']
355 |             if self.secret:
356 |                 authorized = False
357 |                 while True:
358 |                     if len(params[0]) == 0:
359 |                         break
360 |                     secret = params[0][0]
361 |                     if not PY3K and isinstance(secret, unicode):
362 |                         secret = secret.encode('utf-8')
363 |                     if is_str_obj(secret) and re.findall("token:%s" % self.secret, secret):
364 |                         params[0].pop(0)
365 |                         authorized = True
366 |                     break
367 |                 if not authorized:
368 |                     code = 403
369 |                     rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_UNAUTHORIZED)
370 |                     break
371 |             self.xeH.logger.verbose("RPC from: %s, cmd: %s, params: %s" % (self.client_address[0], cmd, params))
372 |             try:
373 |                 # pop out token if extra token is found
374 |                 if len(params[0]) > 0 and 'token:' in params[0][0]:
375 |                     del params[0][0]
376 |                 cmd_rt = getattr(self.xeH, cmd_r)(*params[0], **params[1])
377 |             except (ValueError, TypeError) as ex:
378 |                 self.xeH.logger.verbose("RPC exec error:\n%s" % traceback.format_exc())
379 |                 code = 500
380 |                 rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_EXEC_ERROR,
381 |                 error_msg = str(ex))
382 |                 break
383 |             if cmd_rt[0] > 0:
384 |                 rt = jsonrpc_resp({"id":j['id']}, error_code = cmd_rt[0], error_msg = cmd_rt[1])
385 |             else:
386 |                 rt = jsonrpc_resp({"id":j['id']}, ret = cmd_rt[1])
387 |             break
388 |         self.send_response(code)
389 |         self.send_header("Access-Control-Allow-Origin", "*")
390 |         self.send_header("Content-Type", "application/json-rpc")
391 |         self.send_header("Content-Length", len(rt))
392 |         self.end_headers()
393 |         if PY3K:
394 |             rt = rt.encode('utf-8')
395 |         self.wfile.write(rt)
396 |         self.wfile.write(b'\n')
397 |         return
398 | 
399 | 
400 |     def log_message(self, format, *args):
401 |         return
402 | 
403 | # extend xeHentai class for rpc commands
404 | class xeHentaiRPCExtended(object):
405 |     def __init__(self, xeH, secret):
406 |         self.xeH = xeH
407 |         self.secret = secret
408 |     
409 |     def get_info(self):
410 |         ret = {"version": self.verstr,
411 |             "threads_zombie": 0, "threads_running": 0,
412 |             "queue_pending": 0, "queue_finished": 0,
413 |             "download_speed": 0,
414 |         }
415 |         if hasattr(self, '_monitor'):
416 |             ret['threads_running'] = len(self._monitor.thread_last_seen)
417 |             ret['threads_zombie'] = len(self._monitor.thread_zombie)
418 |             if self._monitor.task.state > TASK_STATE_PAUSED and self._monitor.task.img_q:
419 |                 ret['queue_pending'] = self._monitor.task.img_q.qsize()
420 |                 ret['queue_finished'] = self._monitor.task.meta['finished']
421 |                 ret['download_speed'] = self._monitor.download_speed
422 |             else:
423 |                 ret['queue_pending'] = 0
424 |                 ret['queue_finished'] = 0
425 |         return ERR_NO_ERROR, ret
426 |     
427 |     def get_config(self):
428 |         rt = {k: v for k, v in self.cfg.items() if not k.startswith('rpc_') and k not in ('urls',)}
429 |         return ERR_NO_ERROR, rt
430 |     
431 |     def update_config(self, **cfg_dict):
432 |         cfg_dict = {k: v for k, v in cfg_dict.items() if not k.startswith('rpc_') and k not in ('urls',)}
433 |         if 'proxy' in cfg_dict:
434 |             self.xeH.update_config(**cfg_dict)
435 |         return self.get_config()
436 |            
437 |     def list_tasks(self, level = "download"):
438 |         reverse_mode = False
439 |         if level.startswith('!'):
440 |             reverse_mode = True
441 |             level = level[1:]
442 |         level = "TASK_STATE_%s" % level.upper()
443 |         if level not in globals():
444 |             return ERR_TASK_LEVEL_UNDEF, None
445 |         lv = globals()[level]
446 |         rt = [{_k:_v for _k, _v in v.to_dict().items() if _k not in
447 |             ('reload_map', 'duplicate_map', 'renamed_map', 'logger', 'img_q', 'page_q')}
448 |                  for _, v in self._all_tasks.items() if 
449 |                     (reverse_mode and v.state != lv) or (not reverse_mode and v.state == lv)]
450 |         return ERR_NO_ERROR, rt
451 |     
452 |     def _get_image_path(self, guid, fid):
453 |         mime_map = {
454 |             "jpg": "image/jpeg",
455 |             "jepg": "image/jpeg",
456 |             "png": "image/png",
457 |             "gif": "image/gif",
458 |             "bmp": "image/bmp",
459 |             "webp": "image/webp"
460 |         }
461 |         if guid not in self._all_tasks:
462 |             return None, None, None
463 |         t = self._all_tasks[guid]
464 |         fid = int(fid)
465 |         if fid in t.renamed_map:
466 |             f = t.renamed_map[fid]
467 |         else:
468 |             f = t.get_fidpad(fid)
469 | 
470 |         ext = os.path.splitext(f)[1].lower()[1:]
471 |         if ext not in mime_map:
472 |             mime = "application/octet-stream"
473 |         else:
474 |             mime = mime_map[ext]
475 |         return t.get_fpath(), f, mime
476 |     
477 |     def _get_archive_path(self, guid):
478 |         if guid not in self._all_tasks:
479 |             return None, None
480 |         t = self._all_tasks[guid]
481 |         st = time.time()
482 |         pth = t.make_archive(False)
483 |         et = time.time()
484 |         if et - st > 0.1:
485 |             self.logger.warning('RPC: %.2fs taken to get archive' % (et - st))
486 |         return pth
487 |     
488 |     def get_image(self, guid, request_range=None):
489 |         if guid not in self._all_tasks:
490 |             return ERR_TASK_NOT_FOUND, None
491 |         t = self._all_tasks[guid]
492 |         start = 1
493 |         end = t.meta['total'] + 1
494 |         if request_range:
495 |             request_range = str(request_range)
496 |             _ = request_range.split(',')
497 |             if len(_) == 1:
498 |                 start = int(request_range)
499 |             else:
500 |                 start = int(_[0])
501 |             end = int(_[0]) + 1
502 |         rt = []
503 |         for fid in range(start, end):
504 |             if fid in t.renamed_map:
505 |                 f = t.renamed_map[fid]
506 |             else:
507 |                 f = t.get_fidpad(fid)
508 |             uri = "%s/%s" % (t.guid, fid)
509 |             rt.append('/img/%s/%s/%s' % (hash_link(self.secret, uri), uri, f))
510 |         return ERR_NO_ERROR, rt
511 | 
512 |     
513 |     def __getattr__(self, k):
514 |         # fallback attribute handler
515 |         return getattr(self.xeH, k)
516 | 
517 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
518 |     """Handle requests in a separate thread."""
519 |     pass
520 | 


--------------------------------------------------------------------------------
/xeHentai/task.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | import os
  7 | import re
  8 | import copy
  9 | import json
 10 | import uuid
 11 | import shutil
 12 | import zipfile
 13 | import tempfile
 14 | from threading import RLock
 15 | from . import util
 16 | from .const import *
 17 | from .const import __version__
 18 | from .util.logger import safestr
 19 | if PY3K:
 20 |     from queue import Queue, Empty
 21 | else:
 22 |     from Queue import Queue, Empty
 23 | 
 24 | class Task(object):
 25 |     def __init__(self, url, cfgdict, logger):
 26 |         self.url = url
 27 |         if url:
 28 |             _ = RE_INDEX.findall(url)
 29 |             if _:
 30 |                 self.gid, self.sethash = _[0]
 31 |         self.failcode = 0
 32 |         self.state = TASK_STATE_WAITING
 33 |         self.guid = str(uuid.uuid4())[:8]
 34 |         self.config = cfgdict
 35 |         self.meta = {}
 36 |         self.reload_map = {} # {img_hash:reload_url}
 37 |         self.duplicate_map = {} # map fid to duplicate file ids, {id:(id1, id2, )}
 38 |         self.renamed_map = {} # map fid to renamed file name, used in finding a file by id in RPC
 39 |         self.img_q = None
 40 |         self.page_q = None
 41 |         self._flist_done = set() # store id, don't save, will generate when scan
 42 |         self._monitor = None
 43 |         self._cnt_lock = RLock()
 44 |         self._f_lock = RLock()
 45 | 
 46 |         self.logger = logger
 47 | 
 48 |     def cleanup(self, before_delete=False):
 49 |         if before_delete:
 50 |             if 'delete_task_files' in self.config and self.config['delete_task_files'] and \
 51 |                 'title' in self.meta: # maybe it's a error task and meta is empty
 52 |                 fpath = self.get_fpath()
 53 |                 # TODO: ascii can't decode? locale not enus, also check save_file
 54 |                 if os.path.exists(fpath):
 55 |                     shutil.rmtree(fpath)
 56 |                 zippath = "%s.zip" % fpath
 57 |                 if os.path.exists(zippath):
 58 |                     os.remove(zippath)
 59 |         elif self.state in (TASK_STATE_FINISHED, TASK_STATE_FAILED):
 60 |             self.img_q = None
 61 |             self.page_q = None
 62 |             self.reload_map = {}
 63 |         
 64 |             # if 'filelist' in self.meta:
 65 |             #     del self.meta['filelist']
 66 |             # if 'resampled' in self.meta:
 67 |             #     del self.meta['resampled']
 68 | 
 69 |     def set_fail(self, code):
 70 |         self.state = TASK_STATE_FAILED
 71 |         self.failcode = code
 72 |         # cleanup all we cached
 73 |         self.meta = {}
 74 | 
 75 |     def migrate_exhentai(self):
 76 |         _ = re.findall("(?:https*://[g\.]*e\-hentai\.org)(.+)", self.url)
 77 |         if not _:
 78 |             return False
 79 |         self.url = "https://exhentai.org%s" % _[0]
 80 |         self.state = TASK_STATE_WAITING if self.state == TASK_STATE_FAILED else self.state
 81 |         self.failcode = 0
 82 |         return True
 83 | 
 84 |     def mpv_url(self):
 85 |         return re.sub(
 86 |             "/./%s/%s" % (self.gid, self.sethash),
 87 |             "/mpv/%s/%s" % (self.gid, self.sethash),
 88 |             self.url
 89 |         )
 90 | 
 91 |     def update_meta(self, meta):
 92 |         self.meta.update(meta)
 93 |         if self.config['jpn_title'] and self.meta['gjname']:
 94 |             self.meta['title'] = self.meta['gjname']
 95 |         else:
 96 |             self.meta['title'] = self.meta['gnname']
 97 | 
 98 |     # def guess_ori(self):
 99 |     #     # guess if this gallery has resampled files depending on some sample hashes
100 |     #     # return True if it's ori
101 |     #     if 'sample_hash' not in self.meta:
102 |     #         return
103 |     #     all_keys = map(lambda x:x[:10], self.meta['filelist'].keys())
104 |     #     for h in self.meta['sample_hash']:
105 |     #         if h not in all_keys:
106 |     #             self.has_ori = True
107 |     #             break
108 |     #     del self.meta['sample_hash']
109 | 
110 |     def base_url(self):
111 |         return re.findall(RESTR_SITE, self.url)[0]
112 | 
113 |     # def get_picpage_url(self, pichash):
114 |     #     # if file resized, this url not works
115 |     #     # http://%s.org/s/hash_s/gid-picid'
116 |     #     return "%s/s/%s/%s-%s" % (
117 |     #         self.base_url(), pichash[:10], self.gid, self.meta['filelist'][pichash][0]
118 |     #     )
119 | 
120 |     def put_img_queue(self, imgurl, reload_url, fname):
121 |         if self.config['download_ori']:
122 |             # fullimg.php doesn't have hash in imgurl
123 |             img_hash = RE_GALLERY.findall(reload_url)[0][0]
124 |         else:
125 |             img_hash = self.get_imghash(imgurl)
126 |         this_fid = int(RE_GALLERY.findall(reload_url)[0][1])
127 |         self.renamed_map[this_fid] = fname
128 |         # if same file occurs severl times in a gallery
129 |         while img_hash in self.reload_map:
130 |             fpath = self.get_fpath()
131 |             old_fid = self.get_fname(img_hash)[0]
132 |             old_f = os.path.join(fpath, self.get_fidpad(old_fid))
133 |             this_f = os.path.join(fpath, self.get_fidpad(this_fid))
134 |             self._f_lock.acquire()
135 |             # if we are equal to ourself, download as usual
136 |             if this_fid == old_fid:
137 |                 break
138 |             self.logger.debug("#%s is a duplicate of #%s" % (this_fid, old_fid))
139 |             if os.path.exists(old_f):
140 |                 # we can just copy old file if already downloaded
141 |                 try:
142 |                     shutil.copyfile(old_f, this_f)
143 |                 except Exception as ex:
144 |                     self._f_lock.release()
145 |                     raise ex
146 |                 else:
147 |                     self._f_lock.release()
148 |                     self.set_fid_finished(this_fid)
149 |                 self.logger.debug("#%s is copied from #%s" % (this_fid, old_fid))
150 |             else:
151 |                 # if not downloaded, we will copy them in save_file
152 |                 if old_fid not in self.duplicate_map:
153 |                     self.duplicate_map[old_fid] = set()
154 |                 self.duplicate_map[old_fid].add(this_fid)
155 |                 self._f_lock.release()
156 |                 self.logger.debug("#%s is pending copy from #%s" % (this_fid, old_fid))
157 |             return
158 | 
159 |         self.reload_map[img_hash] = [reload_url, fname]
160 |         self.img_q.put(imgurl)
161 | 
162 |     def put_page_queue_retry(self, redirect_url):
163 |         if not redirect_url:
164 |             return
165 |         if "redirect=" in redirect_url:
166 |            page_url = re.findall("redirect=(.+)", redirect_url)[0]
167 |            img_hash = RE_GALLERY.findall(page_url)[0][0]
168 |         else:
169 |             img_hash = self.get_imghash(redirect_url)
170 |         url = self.reload_map.pop(img_hash)[0]
171 |         self.page_q.put(url)
172 | 
173 |     def scan_downloaded(self, scaled = True):
174 |         fpath = self.get_fpath()
175 |         donefile = False
176 |         if os.path.exists(os.path.join(fpath, ".xehdone")) or os.path.exists("%s.zip" % fpath):
177 |             donefile = True
178 |         _range_idx = 0
179 |         for fid in range(1, self.meta['total'] + 1):
180 |             # check download range
181 |             if self.config['download_range']:
182 |                 _found = False
183 |                 # download_range is sorted asc
184 |                 for start, end in self.config['download_range'][_range_idx:]:
185 |                     if fid > end: # out of range right bound move to next range
186 |                         _range_idx += 1
187 |                     elif start <= fid <= end: # in range
188 |                         _found = True
189 |                         break
190 |                     elif fid < start: # out of range left bound
191 |                         break
192 |                 if not _found:
193 |                     self._flist_done.add(int(fid))
194 |                     continue
195 |             # can only check un-renamed files
196 |             fname = os.path.join(fpath, self.get_fidpad(fid)) # id
197 |             if donefile:
198 |                 self._flist_done.add(int(fid))
199 |             elif os.path.exists(fname):
200 |                 if os.stat(fname).st_size == 0:
201 |                     os.remove(fname)
202 |                 else:
203 |                     self._flist_done.add(int(fid))
204 |         self.meta['finished'] = len(self._flist_done)
205 |         if self.meta['finished'] == self.meta['total']:
206 |             self.state == TASK_STATE_FINISHED
207 | 
208 |     def put_page_queue(self, url):
209 |         # if url is not finished, call callback to put into queue
210 |         # type 1: normal file; type 2: resampled url
211 |         # if pichash:
212 |         #     fid = int(self.meta['filelist'][pichash][0])
213 |         #     if fid not in self._flist_done:
214 |         #         callback(self.get_picpage_url(pichash))
215 |         # elif url:
216 |         fhash, fid = RE_GALLERY.findall(url)[0]
217 |         # if fhash not in self.meta['filelist']:
218 |         #     self.meta['resampled'][fhash] = int(fid)
219 |         #     self.has_ori = True]
220 |         if int(fid) not in self._flist_done:
221 |             self.page_q.put(url)
222 | 
223 |     def save_file(self, imgurl, redirect_url, binary_iter):
224 |         # TODO: Rlock for finished += 1
225 |         fpath = self.get_fpath()
226 |         self._f_lock.acquire()
227 |         if not os.path.exists(fpath):
228 |             os.mkdir(fpath)
229 |         # use redirect_url, fullimg.php doen't have hash in imgurl
230 |         img_hash = self.get_imghash(redirect_url)
231 |         self._f_lock.release()
232 |         fid, fname = self.get_fname(img_hash)
233 |         _ = re.findall("/([^/\?]+)(?:\?|$)", redirect_url)
234 |         if _: # change it if it's a full image
235 |             fname = _[0]
236 |             self.reload_map[img_hash][1] = fname
237 | 
238 |         fn = os.path.join(fpath, self.get_fidpad(int(fid)))
239 |         if os.path.exists(fn) and os.stat(fn).st_size > 0:
240 |             return fn
241 |         # create a femp file first
242 |         # we don't need _f_lock because this will not be in a sequence
243 |         # and we can't do that otherwise we are breaking the multi threading
244 |         fd_tmp, fn_tmp = tempfile.mkstemp(prefix="xehentai-")
245 |         os.close(fd_tmp)
246 |         try:
247 |             with open(fn_tmp, "wb") as f:
248 |                 for binary in binary_iter():
249 |                     if self._monitor._exit(None):
250 |                         raise DownloadAbortedException()
251 |                     f.write(binary)
252 |         except DownloadAbortedException as ex:
253 |             try:
254 |                 os.unlink(fn_tmp)
255 |             except:
256 |                 pass
257 |             return
258 | 
259 |         self._f_lock.acquire()
260 |         try:
261 |             try:
262 |                 shutil.move(fn_tmp, fn)
263 |             except WindowsError as ex:
264 |                 # file is used by another process
265 |                 # do a copy and delete, WindowsError[32]
266 |                 if ex.errno == 13:
267 |                     shutil.copy(fn_tmp, fn)
268 |                     try:
269 |                         os.unlink(fn_tmp)
270 |                     except:
271 |                         pass
272 |                 else:
273 |                     raise ex
274 |             self.set_fid_finished(fid)
275 |             if fid in self.duplicate_map:
276 |                 for fid_rep in self.duplicate_map[fid]:
277 |                     # if a file download is interrupted, it will appear in self.filehash_map as well
278 |                     if fid_rep == fid:
279 |                         continue
280 |                     fn_rep = os.path.join(fpath, self.get_fidpad(fid_rep))
281 |                     shutil.copyfile(fn, fn_rep)
282 |                     self.set_fid_finished(fid_rep)
283 |                 self.logger.debug("#%s is copied from #%s in save_file" % (fid_rep, fid))
284 |                 del self.duplicate_map[fid]
285 |         except Exception as ex:
286 |             self._f_lock.release()
287 |             raise ex
288 |         self._f_lock.release()
289 |         return True
290 | 
291 |     def set_fid_finished(self, fid):
292 |         self._cnt_lock.acquire()
293 |         self._flist_done.add(fid)
294 |         self.meta['finished'] = len(self._flist_done)
295 |         self._cnt_lock.release()
296 | 
297 |     def get_fid_unfinished(self):
298 |         unfinished = []
299 |         for i in range(1, self.meta['total']):
300 |             if i not in self._flist_done:
301 |                 unfinished.append(i)
302 |         return unfinished
303 | 
304 |     def get_imghash(self, imgurl_with_hash):
305 |         # only get first 10 bytes of hash
306 |         # so we can use same key in both normal image (from imgurl, full hash)
307 |         # and original image (from gallery url/redirect url, short hash)
308 |         return RE_IMGHASH.findall(imgurl_with_hash)[0][0][:10]
309 | 
310 |     def get_imgfid(self, imgurl):
311 |         if RE_IMGHASH.findall(imgurl):
312 |             return self.get_fname(self.get_imghash(imgurl))[0]
313 |         # else is fullimg url
314 |         return int(re.findall("fullimg/\d+/(\d+)", imgurl)[0])
315 | 
316 |     def get_fname(self, img_hash):
317 |         pageurl, fname = self.reload_map[img_hash]
318 |         _, fid = RE_GALLERY.findall(pageurl)[0]
319 |         return int(fid), fname
320 | 
321 |     def get_fpath(self):
322 |         return os.path.join(self.config['dir'], util.legalpath(self.meta['title']))
323 | 
324 |     def get_fidpad(self, fid, ext = 'jpg'):
325 |         fid = int(fid)
326 |         _ = "%%0%dd.%%s" % (len(str(self.meta['total'])))
327 |         return _ % (fid, ext)
328 | 
329 |     def rename_fname(self):
330 |         fpath = self.get_fpath()
331 |         tmppath = os.path.join(fpath, RENAME_TMPDIR)
332 |         cnt = 0
333 |         error_list = []
334 |         # we need to track renamed fid's to decide 
335 |         # whether to rename into a temp filename or add (1)
336 |         # only need it when rename_ori = True
337 |         done_list = set()
338 |         for fid in list(self.renamed_map.keys()):
339 |             fname = self.renamed_map[fid]
340 |             original_ext = os.path.splitext(fname)[1]
341 |             if original_ext== "":
342 |                 original_ext = os.path.splitext(fname)[0]
343 |             # if we don't need to rename to original name and file type matches
344 |             if not self.config['rename_ori'] and original_ext.lower() == '.jpg':
345 |                 continue
346 |             fname_ori = os.path.join(fpath, self.get_fidpad(fid)) # id   
347 |             if self.config['rename_ori']:
348 |                 if os.path.exists(os.path.join(tmppath, self.get_fidpad(fid))):
349 |                     # if we previously put it into a temporary folder, we need to change fname_ori
350 |                     fname_ori = os.path.join(tmppath, self.get_fidpad(fid))
351 |                 fname_to = os.path.join(fpath, util.legalpath(fname))
352 |             else:
353 |                 # Q: Why we don't just use id.ext when saving files instead of using
354 |                 #   id.jpg?
355 |                 # A: If former task doesn't download all files, a new task with same gallery
356 |                 #   will have zero knowledge about file type before scanning all per page,
357 |                 #   thus can't determine if this id is downloaded, because file type is not
358 |                 #   necessarily .jpg
359 |                 fname_to = os.path.join(fpath, self.get_fidpad(fid, original_ext[1:]))
360 |             while fname_ori != fname_to:
361 |                 if os.path.exists(fname_ori):
362 |                     while os.path.exists(fname_to):
363 |                         _base, _ext = os.path.splitext(fname_to)
364 |                         _ = re.findall("\((\d+)\)$", _base)
365 |                         if self.config['rename_ori'] and fname_to not in done_list:
366 |                             # if our auto numbering conflicts with original naming
367 |                             # we move it into a temporary folder
368 |                             # It's safe since this file is same with one of our auto numbering filename,
369 |                             # it could never be conflicted with other files in tmppath
370 |                             if not os.path.exists(tmppath):
371 |                                 os.mkdir(tmppath)
372 |                             os.rename(fname_to, os.path.join(tmppath, os.path.split(fname_to)[1]))
373 |                             break
374 |                         if _ :# if ...(1) exists, use ...(2)
375 |                             print(safestr(_base))
376 |                             _base = re.sub("\((\d+)\)$", _base, lambda x:"(%d)" % (int(x.group(1)) + 1))
377 |                         else:
378 |                             _base = "%s(1)" % _base
379 |                         fname_to = "".join((_base, _ext))
380 |                 try:
381 |                     os.rename(fname_ori, fname_to)
382 |                     self.renamed_map[str(fid)] = os.path.split(fname_to)[1]
383 |                 except Exception as ex:
384 |                     error_list.append((os.path.split(fname_ori)[1], os.path.split(fname_to)[1], str(ex)))
385 |                     break
386 |                 if self.config['rename_ori']:
387 |                     done_list.add(fname_to)
388 |                 break
389 |             cnt += 1
390 |         if cnt == self.meta['total']:
391 |             with open(os.path.join(fpath, ".xehdone"), "w"):
392 |                 pass
393 |         try:
394 |             os.rmdir(tmppath)
395 |         except: # we will leave it undeleted if it's not empty
396 |             pass
397 |         return error_list
398 | 
399 |     def make_archive(self, remove=True):
400 |         # needed to lock between RPC get_img
401 |         self._f_lock.acquire()
402 |         dpath = self.get_fpath()
403 |         arc = "%s.zip" % dpath
404 |         if os.path.exists(arc):
405 |             return arc
406 |         with zipfile.ZipFile(arc, 'w', allowZip64=True) as zipFile:
407 |             zipFile.comment = ("xeHentai Archiver v%s\nTitle:%s\nOriginal URL:%s" % (
408 |                 __version__, self.meta['title'], self.url)).encode('utf-8')
409 |             for f in sorted(os.listdir(dpath)):
410 |                 fullpath = os.path.join(dpath, f)
411 |                 zipFile.write(fullpath, f, zipfile.ZIP_STORED)
412 |         if remove:
413 |             shutil.rmtree(dpath)
414 |         self._f_lock.release()
415 |         return arc
416 | 
417 |     def from_dict(self, j):
418 |         for k in self.__dict__:
419 |             if k not in j:
420 |                 continue
421 |             if k == "logger":
422 |                 continue
423 |             if k.endswith('_q') and j[k]:
424 |                 setattr(self, k, Queue())
425 |                 [getattr(self, k).put(e, False) for e in j[k]]
426 |             else:
427 |                 setattr(self, k, j[k])
428 |         _ = RE_INDEX.findall(self.url)
429 |         if _:
430 |             self.gid, self.sethash = _[0]
431 |         return self
432 | 
433 | 
434 |     def to_dict(self):
435 |         d = dict({k:v for k, v in self.__dict__.items()
436 |             if not k.endswith('_q') and not k.startswith("_") and k != "logger"})
437 |         for k in ['img_q', 'page_q']:
438 |             if getattr(self, k):
439 |                 d[k] = [e for e in getattr(self, k).queue]
440 |         return d
441 | 


--------------------------------------------------------------------------------
/xeHentai/updater/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | # Contributor:
 3 | #      fffonion        <fffonion@gmail.com>
 4 | 
 5 | class Updater(object):
 6 |     def get_latest_release(self, dev=False):
 7 |         raise NotImplementedError("get_latest_release not implemented")
 8 | 
 9 |     def get_src_path_in_archive(self, info):
10 |         raise NotImplementedError("get_src_path_in_archive not implemented")
11 | 
12 | class UpdateInfo(object):
13 |     def __init__(self, update_id, download_link, ts, message):
14 |         self.update_id = update_id
15 |         self.download_link = download_link
16 |         self.message = message
17 |         self.ts = ts
18 | 
19 | 


--------------------------------------------------------------------------------
/xeHentai/updater/github.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | # Contributor:
 3 | #      fffonion        <fffonion@gmail.com>
 4 | 
 5 | import requests
 6 | import time
 7 | 
 8 | from . import Updater, UpdateInfo
 9 | 
10 | class GithubUpdaterException(Exception):
11 |     pass
12 | 
13 | class GithubUpdater(Updater):
14 |     def __init__(self, session):
15 |         self.session = session
16 | 
17 |     def get_latest_release(self, dev=False):
18 |         param = dev and "dev" or "master"
19 |         r = self.session.get("https://api.github.com/repos/fffonion/xeHentai/commits?sha=%s" % param)
20 |         commit = r.json()
21 |         if r.status_code != 200 or not commit:
22 |             raise GithubUpdaterException("Failed to get latest release info: %s" % r.text)
23 |         commit = commit[0]
24 |         sha = commit["sha"]
25 |         url = "https://github.com/fffonion/xeHentai/archive/%s.zip" % sha
26 | 
27 |         return UpdateInfo(
28 |             sha,
29 |             url,
30 |             commit["commit"]["author"]["date"],
31 |             commit["commit"]["message"].replace("\r", " ").replace("\n", " "),
32 |         )
33 | 
34 |     def get_src_path_in_archive(self, info):
35 |         return "xeHentai-%s/xeHentai" % info.update_id
36 | 


--------------------------------------------------------------------------------
/xeHentai/updater/updater.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | # Contributor:
 3 | #      fffonion        <fffonion@gmail.com>
 4 | 
 5 | import os
 6 | import requests
 7 | import zipfile
 8 | import json
 9 | from ..i18n import i18n
10 | from ..util import logger
11 | from ..const import *
12 | from .. import const
13 | from .github import GithubUpdater
14 | from . import UpdateInfo
15 | 
16 | if PY3K:
17 |     from io import BytesIO as StringIO
18 | else:
19 |     from cStringIO import StringIO 
20 | 
21 | def check_update(l=None, config={}):
22 |     if not l:
23 |         l = logger.Logger()
24 |     dev = "update_beta_channel" in config and config["update_beta_channel"]
25 |     download_update = "auto_update" in config and config["auto_update"] == "download"
26 |     l.debug(i18n.UPDATE_CHANNEL % (dev and i18n.UPDATE_DEV_CHANNEL or i18n.UPDATE_RELEASE_CHANNEL))
27 |     s = requests.Session()
28 |     g = GithubUpdater(s)
29 |     try:
30 |         info = g.get_latest_release(dev)
31 |         if hasattr(const, "VERSION_UPDATE") and VERSION_UPDATE == info.update_id:
32 |             l.debug(i18n.UPDATE_NO_UPDATE)
33 |             return
34 |         l.info(i18n.UPDATE_AVAILABLE % (info.ts, info.message, info.update_id))
35 |         if not download_update:
36 |             l.info(i18n.UPDATE_DOWNLOAD_MANUALLY)
37 |             return
38 |         resp = s.get(info.download_link)
39 |         z = resp.content
40 |         with zipfile.ZipFile(StringIO(z)) as zf:
41 |             make_src_update_file(zf, g.get_src_path_in_archive(info), info)
42 |         l.info(i18n.UPDATE_COMPLETE)
43 |     except Exception as ex:
44 |         l.warn(i18n.UPDATE_FAILED % str(ex))
45 | 
46 | 
47 | def make_src_update_file(infile, path, info):
48 |     if not path.endswith("/"):
49 |         path += "/"
50 |     
51 |     with zipfile.ZipFile(SRC_UPDATE_FILE, "w") as z:
52 |         z.writestr(
53 |             "info.json",
54 |             json.dumps({
55 |                 "v": SRC_UPDATE_VERSION,
56 |                 "update_id": info.update_id,
57 |             }),
58 |             zipfile.ZIP_STORED,
59 |         )
60 | 
61 |         for f in infile.namelist():
62 |             if f.startswith(path) and not f.endswith("/"):
63 |                 z.writestr("xeHentai/%s" % f[len(path):], infile.read(f), zipfile.ZIP_STORED)


--------------------------------------------------------------------------------
/xeHentai/util/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf-8
 3 | # Contributor:
 4 | #      fffonion        <fffonion@gmail.com>
 5 | 
 6 | import os
 7 | import re
 8 | import sys
 9 | import uuid
10 | import random
11 | 
12 | from ..const import *
13 | 
14 | if os.name == 'nt':
15 |     filename_filter = re.compile("[|:?\\/*'\"<>]|\.+(?:$)")
16 | else:# assume posix
17 |     filename_filter = re.compile("[\/:]")
18 | 
19 | if PY3K:
20 |     unichr = chr
21 | 
22 | def parse_cookie(coostr):
23 |     ret = {}
24 |     for coo in coostr.split(";"):
25 |         coo = coo.strip()
26 |         if coo.lower() in ('secure', 'httponly'):
27 |             continue
28 |         _ = coo.split("=")
29 |         k = _[0]
30 |         v = "=".join(_[1:])
31 |         if k.lower() in ('path', 'expires', 'domain', 'max-age', 'comment'):
32 |             continue
33 |         ret[k] = v
34 |     return ret
35 | 
36 | def make_cookie(coodict):
37 |     return ";".join(map("=".join, coodict.items()))
38 | 
39 | def make_ua():
40 |     rrange = lambda a, b, c = 1: c == 1 and random.randrange(a, b) or int(1.0 * random.randrange(a * c, b * c) / c)
41 |     ua = 'Mozilla/%d.0 (Windows NT %d.%d) AppleWebKit/%d (KHTML, like Gecko) Chrome/%d.%d Safari/%d' % (
42 |         rrange(4, 7, 10), rrange(5, 7), rrange(0, 3), rrange(535, 538, 10),
43 |         rrange(21, 27, 10), rrange(0, 9999, 10), rrange(535, 538, 10)
44 |     )
45 | 
46 | def get_proxy_policy(cfg):
47 |     if cfg['proxy_image_only']:
48 |         return RE_URL_IMAGE
49 |     if cfg['proxy_image']:
50 |         return RE_URL_ALL
51 |     return RE_URL_WEBPAGE
52 | 
53 | def parse_human_time(s):
54 |     rt = 0
55 |     day = re.findall('(\d+)\sdays*', s)
56 |     if day:
57 |         rt += 86400 * int(day[0])
58 |     hour = re.findall('(\d+)\shours*', s)
59 |     if hour:
60 |         rt += 3600 * int(hour[0])
61 |     minute = re.findall('(\d+)\sminutes*', s)
62 |     if minute:
63 |         rt += 60 * int(minute[0])
64 |     else:
65 |         rt += 60
66 |     return rt
67 | 
68 | def htmlescape(s):
69 |     def replc(match):
70 |         #print match.group(0),match.group(1),match.group(2)
71 |         dict={'amp':'&','nbsp':' ','quot':'"','lt':'<','gt':'>','copy':'©','reg':'®'}
72 |         #dict+={'∀':'forall','∂':'part','∃':'exist','∅':'empty','∇':'nabla','∈':'isin','∉':'notin','∋':'ni','∏':'prod','∑':'sum','−':'minus','∗':'lowast','√':'radic','∝':'prop','∞':'infin','∠':'ang','∧':'and','∨':'or','∩':'cap','∪':'cup','∫':'int','∴':'there4','∼':'sim','≅':'cong','≈':'asymp','≠':'ne','≡':'equiv','≤':'le','≥':'ge','⊂':'sub','⊃':'sup','⊄':'nsub','⊆':'sube','⊇':'supe','⊕':'oplus','⊗':'otimes','⊥':'perp','⋅':'sdot','Α':'Alpha','Β':'Beta','Γ':'Gamma','Δ':'Delta','Ε':'Epsilon','Ζ':'Zeta','Η':'Eta','Θ':'Theta','Ι':'Iota','Κ':'Kappa','Λ':'Lambda','Μ':'Mu','Ν':'Nu','Ξ':'Xi','Ο':'Omicron','Π':'Pi','Ρ':'Rho','Σ':'Sigma','Τ':'Tau','Υ':'Upsilon','Φ':'Phi','Χ':'Chi','Ψ':'Psi','Ω':'Omega','α':'alpha','β':'beta','γ':'gamma','δ':'delta','ε':'epsilon','ζ':'zeta','η':'eta','θ':'theta','ι':'iota','κ':'kappa','λ':'lambda','μ':'mu','ν':'nu','ξ':'xi','ο':'omicron','π':'pi','ρ':'rho','ς':'sigmaf','σ':'sigma','τ':'tau','υ':'upsilon','φ':'phi','χ':'chi','ψ':'psi','ω':'omega','ϑ':'thetasym','ϒ':'upsih','ϖ':'piv','Œ':'OElig','œ':'oelig','Š':'Scaron','š':'scaron','Ÿ':'Yuml','ƒ':'fnof','ˆ':'circ','˜':'tilde',' ':'ensp',' ':'emsp',' ':'thinsp','‌':'zwnj','‍':'zwj','‎':'lrm','‏':'rlm','–':'ndash','—':'mdash','‘':'lsquo','’':'rsquo','‚':'sbquo','“':'ldquo','”':'rdquo','„':'bdquo','†':'dagger','‡':'Dagger','•':'bull','…':'hellip','‰':'permil','′':'prime','″':'Prime','‹':'lsaquo','›':'rsaquo','‾':'oline','€':'euro','™':'trade','←':'larr','↑':'uarr','→':'rarr','↓':'darr','↔':'harr','↵':'crarr','⌈':'lceil','⌉':'rceil','⌊':'lfloor','⌋':'rfloor','◊':'loz','♠':'spades','♣':'clubs','♥':'hearts','♦':'diams'}
73 |         if len(match.groups()) >= 2:
74 |             if match.group(1) == '#':
75 |                 return unichr(int(match.group(2)))
76 |             else:
77 |                 return dict.get(match.group(2), '?')
78 |     htmlre = re.compile("&(#?)(\d{1,5}|\w{1,8}|[a-z]+);")
79 |     return htmlre.sub(replc, s)
80 | 
81 | def legalpath(s):
82 |     sanitized = filename_filter.sub(lambda x:"", s)
83 |     # windows doesn't like trailing while spaces
84 |     if os.name == 'nt':
85 |         sanitized = sanitized.rstrip()
86 |     return sanitized
87 | 
88 | MAXINT = 9223372036854775807
89 | def human_size(t):
90 |     if t >= MAXINT:
91 |         return "UNL."
92 |     for prefix in ("B", "KB", "MB", "GB", "TB"):
93 |         if t <= 1000:
94 |             return "%s %s" % (("%.2f" % t).rstrip("0").rstrip("."), prefix)
95 |         t /= 1024.0
96 |     return "%.2f TB" % t
97 | 


--------------------------------------------------------------------------------
/xeHentai/util/logger.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | import os
  7 | import sys
  8 | import datetime
  9 | import locale
 10 | import logging
 11 | from threading import RLock
 12 | #import logging.handlers
 13 | from ..const import *
 14 | 
 15 | class tz_GMT8(datetime.tzinfo):
 16 |     def utcoffset(self, dt):
 17 |         return datetime.timedelta(hours = 8)
 18 |     def dst(self, dt):
 19 |         return datetime.timedelta(0)
 20 | 
 21 | 
 22 | def safestr(s):
 23 |     if (PY3K and isinstance(s, bytes)) or (not PY3K and not isinstance(s, unicode)):
 24 |         s = s.decode("utf-8")
 25 |     if PY3K:
 26 |         # python<=3.5 hack
 27 |         if sys.version_info.minor <= 5:
 28 |             return s \
 29 |                 .encode(locale.getdefaultlocale()[1] or 'utf-8', 'replace') \
 30 |                 .decode(locale.getdefaultlocale()[1] or 'utf-8', 'replace')
 31 |         return s
 32 |     return s.encode(locale.getdefaultlocale()[1] or 'utf-8', 'replace')
 33 |     #return _.decode('utf-8') if PY3K else _
 34 | 
 35 | if os.name == 'nt':
 36 |     endl = '\r\n'
 37 | else:# assume posix
 38 |     endl = '\n'
 39 | 
 40 | class Logger(object):
 41 |     # paste from goagent
 42 |     CRITICAL = 5
 43 |     FATAL = CRITICAL
 44 |     ERROR = 4
 45 |     WARNING = 3
 46 |     WARN = WARNING
 47 |     INFO = 2
 48 |     DEBUG = 1
 49 |     VERBOSE = 0
 50 |     def __init__(self, *args, **kwargs):
 51 |         # self.level = self.__class__.INFO
 52 |         self.logf = None
 53 |         self.__write = __write = lambda x: sys.stdout.write(safestr(x))
 54 |         self.isatty = getattr(sys.stdout, 'isatty', lambda: False)()
 55 |         self.__set_error_color = lambda: None
 56 |         self.__set_warning_color = lambda: None
 57 |         self.__set_debug_color = lambda: None
 58 |         self.__set_verbose_color = lambda: None
 59 |         self.__reset_color = lambda: None
 60 |         if self.isatty:
 61 |             if os.name == 'nt':
 62 |                 self._nt_color_lock = RLock()
 63 |                 import ctypes
 64 |                 SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute
 65 |                 GetStdHandle = ctypes.windll.kernel32.GetStdHandle
 66 |                 self.__set_error_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x0C))
 67 |                 self.__set_warning_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x06))
 68 |                 self.__set_debug_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x02))
 69 |                 self.__set_verbose_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x08))
 70 |                 self.__set_bright_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x0F))
 71 |                 self.__reset_color = lambda: (SetConsoleTextAttribute(GetStdHandle(-11), 0x07), self._nt_color_lock.release())
 72 |             elif os.name == 'posix':
 73 |                 self.__set_error_color = lambda: __write('\033[31m')
 74 |                 self.__set_warning_color = lambda: __write('\033[33m')
 75 |                 self.__set_debug_color = lambda: __write('\033[32m')
 76 |                 self.__set_verbose_color = lambda: __write('\033[36m')
 77 |                 self.__set_bright_color = lambda: __write('\033[32m')
 78 |                 self.__reset_color = lambda: __write('\033[0m')
 79 | 
 80 | 
 81 |     @classmethod
 82 |     def getLogger(cls, *args, **kwargs):
 83 |         return cls(*args, **kwargs)
 84 | 
 85 |     def cleanup(self):
 86 |         if self.logf:
 87 |             _ = self.logf
 88 |             self.logf = None
 89 |             _.close()
 90 | 
 91 |     def set_logfile(self, fpath):
 92 |         if self.logf:
 93 |             self.logf.close()
 94 |         self.logf = open(fpath, "ab")
 95 | 
 96 |     def set_level(self, level):
 97 |         f = ('verbose', 'debug', 'info')
 98 |         lv = min(max(level, 0), 3)
 99 |         for p in range(lv):
100 |             setattr(self, f[p], self.dummy)
101 | 
102 |     def log(self, level, fmt, *args, **kwargs):
103 |         # fmt=du8(fmt)
104 |         try:
105 |             try:
106 |                 self.__write('%-4s - [%s] %s\n' % (level, datetime.datetime.now(tz_GMT8()).strftime('%X'), fmt % args))
107 |             except (ValueError, TypeError):
108 |                 fmt = fmt.replace('%','%%')
109 |                 self.__write('%-4s - [%s] %s\n' % (level, datetime.datetime.now(tz_GMT8()).strftime('%X'), fmt % args))
110 |         except IOError: # fix for Windows console
111 |             pass
112 |         sys.stdout.flush()
113 |         if self.logf:
114 |             _ = ('[%s] %s%s' % (datetime.datetime.now(tz_GMT8()).strftime('%b %d %X'), fmt % args, endl))
115 |             self.logf.write(_.encode("utf-8", 'replace'))
116 | 
117 |     def dummy(self, *args, **kwargs):
118 |         pass
119 | 
120 |     def debug(self, fmt, *args, **kwargs):
121 |         self.__set_debug_color()
122 |         self.log('DEBG', fmt, *args, **kwargs)
123 |         self.__reset_color()
124 | 
125 |     def info(self, fmt, *args, **kwargs):
126 |         puretext = self.log('INFO', fmt, *args)
127 |         # if self.logfile:
128 |         #    self.logfile.write(puretext)
129 | 
130 |     def verbose(self, fmt, *args, **kwargs):
131 |         self.__set_verbose_color()
132 |         self.log('VERB', fmt, *args, **kwargs)
133 |         self.__reset_color()
134 | 
135 |     def warning(self, fmt, *args, **kwargs):
136 |         self.__set_warning_color()
137 |         self.log('WARN', fmt, *args, **kwargs)
138 |         self.__reset_color()
139 | 
140 |     def warn(self, fmt, *args, **kwargs):
141 |         self.warning(fmt, *args, **kwargs)
142 | 
143 |     def error(self, fmt, *args, **kwargs):
144 |         self.__set_error_color()
145 |         self.log('ERROR', fmt, *args, **kwargs)
146 |         self.__reset_color()
147 | 
148 |     def exception(self, fmt, *args, **kwargs):
149 |         self.error(fmt, *args, **kwargs)
150 |         traceback.print_exc(file = sys.stderr)
151 | 
152 |     def critical(self, fmt, *args, **kwargs):
153 |         self.__set_error_color()
154 |         self.log('CRITICAL', fmt, *args, **kwargs)
155 |         self.__reset_color()
156 | 


--------------------------------------------------------------------------------
/xeHentai/worker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding:utf-8
  3 | # Contributor:
  4 | #      fffonion        <fffonion@gmail.com>
  5 | 
  6 | import re
  7 | import math
  8 | import time
  9 | import random
 10 | import requests
 11 | from requests.adapters import HTTPAdapter
 12 | import traceback
 13 | from threading import Thread, RLock
 14 | from . import util
 15 | from .const import *
 16 | from .i18n import i18n
 17 | from .proxy import PoolException, LowSpeedException
 18 | if PY3K:
 19 |     from queue import Queue, Empty
 20 |     from urllib.parse import urlparse, urlunparse
 21 | else:
 22 |     from Queue import Queue, Empty
 23 |     from urlparse import urlparse, urlunparse
 24 | 
 25 | # pinfo = {'http':'socks5://127.0.0.1:16963', 'https':'socks5://127.0.0.1:16963'}
 26 | 
 27 | class _FakeResponse(object):
 28 |     def __init__(self, url):
 29 |         self.status_code = 600
 30 |         self.content = None
 31 |         self.url = self._real_url = url
 32 |         self.headers = {}
 33 | 
 34 | class FallbackIpAdapter(HTTPAdapter):
 35 |     def __init__(self, ip_map=FALLBACK_IP_MAP, **kwargs):
 36 |         self.ip_map = ip_map
 37 |         kwargs.update({'max_retries': 1})
 38 |         requests.adapters.HTTPAdapter.__init__(self, **kwargs)
 39 | 
 40 |     # override
 41 |     def get_connection(self, url, proxies=None):
 42 |         if not proxies:
 43 |             parsed = urlparse(url)
 44 |             _hostname = parsed.hostname
 45 |             _scheme = parsed.scheme
 46 |             if _hostname in self.ip_map:
 47 |                 _parsed = list(parsed)
 48 |                 # alter the hostname
 49 |                 _hostname = '%s%s' % (random.choice(self.ip_map[_hostname]),
 50 |                                         (":%d" % parsed.port) if parsed.port else "")
 51 |                 _scheme = 'https'
 52 |             return self.poolmanager.connection_from_host(_hostname, parsed.port, scheme=_scheme,
 53 |                                                             pool_kwargs={'assert_hostname': parsed.hostname})
 54 |         else:
 55 |             # fallback
 56 |             return requests.adapters.HTTPAdapter.get_connection(self, url, proxies)
 57 |     
 58 |     def add_headers(self, request, **kwargs):
 59 |         if not request.headers.get('Host'):
 60 |             parsed = urlparse(request.url)
 61 |             request.headers['Host'] = parsed.hostname
 62 |     
 63 |     def cert_verify(self, conn, url, verify, cert):
 64 |         # let the super run verify process
 65 |         if url.startswith('http://'):
 66 |             url = "https://%s" % url[7:]
 67 |         return requests.adapters.HTTPAdapter.cert_verify(self, conn, url, verify, cert)
 68 | 
 69 | class HttpReq(object):
 70 |     def __init__(self, headers = {}, proxy = None, proxy_policy = None, retry = 10, timeout = 20, logger = None, tname = "main"):
 71 |         self.session = requests.Session()
 72 |         self.session.headers = headers
 73 |         for u in ('forums.e-hentai.org', 'e-hentai.org', 'exhentai.org'):
 74 |             self.session.mount('http://%s' % u, FallbackIpAdapter())
 75 |             self.session.mount('https://%s' % u, FallbackIpAdapter())
 76 |         self.session.mount('http://', HTTPAdapter(max_retries=0))
 77 |         self.retry = retry
 78 |         self.timeout = timeout
 79 |         self.proxy = proxy
 80 |         self.proxy_policy = proxy_policy
 81 |         self.logger = logger
 82 |         self.tname = tname
 83 | 
 84 |     def request(self, method, url, _filter, suc, fail, data=None, stream_cb=None):
 85 |         retry = 0
 86 |         url_history = [url]
 87 |         while retry < self.retry:
 88 |             try:
 89 |                 headers = {}
 90 |                 # if proxy_policy is set and match current url, use proxy
 91 |                 if self.proxy and self.proxy_policy and self.proxy_policy.match(url):
 92 |                     f, __not_good = self.proxy.proxied_request(self.session)
 93 |                 else:
 94 |                     f = self.session.request
 95 |                 r = f(method, url,
 96 |                     allow_redirects=False,
 97 |                     data=data,
 98 |                     timeout=self.timeout,
 99 |                     stream=stream_cb != None)
100 |             except requests.RequestException as ex:
101 |                 self.logger.warning("%s-%s %s %s: %s" % (i18n.THREAD, self.tname, method, url, ex))
102 |                 time.sleep(random.random() + 0.618)
103 |             else:
104 |                 if r.headers.get('content-length'):
105 |                     r.content_length = int(r.headers.get('content-length'))
106 |                 elif not stream_cb:
107 |                     r.content_length = len(r.content)
108 |                 else:
109 |                     r.content_length = 0
110 |                 self.logger.verbose("%s-%s %s %s %d %d" % (i18n.THREAD, self.tname, method, url, r.status_code, r.content_length))
111 |                 # if it's a redirect, 3xx
112 |                 if r.status_code > 300 and r.status_code < 400:
113 |                     _new_url = r.headers.get("location")
114 |                     if _new_url:
115 |                         url_history.append(url)
116 |                         if len(url_history) > DEFAULT_MAX_REDIRECTS:
117 |                             self.logger.warning("%s-%s %s %s: too many redirects" % (i18n.THREAD, self.tname, method, url))
118 |                             return _filter(_FakeResponse(url_history[0]), suc, fail)
119 |                         url = _new_url
120 |                         continue
121 |                 # intercept some error to see if we can change IP
122 |                 if self.proxy and r.content_length < 1024 and \
123 |                     re.match("Your IP address has been temporarily banned", r.text):
124 |                     _t = util.parse_human_time(r.text)
125 |                     self.logger.warn(i18n.PROXY_DISABLE_BANNED % _t)
126 |                     # fail this proxy immediately and set expire time
127 |                     __not_good(expire = _t)
128 |                     continue
129 |                 
130 |                 r.encoding = "utf-8"
131 |                 # r._text_bytes = r.text.encode("utf-8")
132 |                 r._real_url = url_history[-1]
133 | 
134 |                 r.iter_content_cb = stream_cb
135 | 
136 |                 return _filter(r, suc, fail)
137 |             retry += 1
138 |         return _filter(_FakeResponse(url_history[0]), suc, fail)
139 | 
140 | # speed statistics with ring buffer
141 | class speed_checker(object):
142 |     def __init__(self, cnt=5):
143 |         self.cnt = cnt
144 |         self.speed_buffer = []
145 |         self.reset()
146 | 
147 |     def check(self, l):
148 |         self.current_bytes += l
149 |         self.current_tm = time.time()
150 |         if self.current_tm - self.last_tm > 1:
151 |             self.speed_buffer.append((self.current_bytes-self.last_bytes)/(self.current_tm-self.last_tm))
152 |             while len(self.speed_buffer) > self.cnt:
153 |                 self.speed_buffer.pop(0)
154 |             self.last_tm = self.current_tm
155 |             self.last_bytes = self.current_bytes
156 |         return
157 | 
158 |     def calc(self, full=False):
159 |         if len(self.speed_buffer) == 0:
160 |             return 0
161 |         elif full and len(self.speed_buffer) < self.cnt:
162 |             return 0
163 |         return sum(self.speed_buffer)/len(self.speed_buffer)
164 | 
165 |     def reset(self):
166 |         self.last_tm = time.time()
167 |         self.last_bytes = 0
168 |         self.current_bytes = 0
169 |         self.current_tm = 0
170 |         if self.speed_buffer:
171 |             self.speed_buffer = []
172 | 
173 | class HttpWorker(Thread, HttpReq):
174 |     def __init__(self, tname, task_queue, flt, suc, fail, headers={}, proxy=None, proxy_policy=None,
175 |             retry=3, timeout=10, logger=None, keep_alive=None, stream_mode=False, lowspeed_threshold=None):
176 |         """
177 |         Construct a new 'HttpWorker' obkect
178 | 
179 |         :param tname: The name of this http worker
180 |         :param task_queue: The task Queue instance
181 |         :param flt: the filter function
182 |         :param suc: the function to call when succeeded
183 |         :param fail: the function to call when failed
184 |         :param headers: custom HTTP headers
185 |         :param proxy: proxy dict
186 |         :param proxy_policy: a function to determine whether proxy should be used
187 |         :param retry: retry count
188 |         :param timeout: timeout in seconds
189 |         :param logger: the Logger instance
190 |         :param keep_alive: the callback to send keep alive
191 |         :param stream_mode: set the request to use stream mode, keep_alive will be called every iteration
192 |         :return: returns nothing
193 |         """
194 |         HttpReq.__init__(self, headers, proxy, proxy_policy, retry, timeout, logger, tname = tname)
195 |         Thread.__init__(self, name = tname)
196 |         Thread.setDaemon(self, True)
197 |         self.task_queue = task_queue
198 |         self.logger = logger
199 |         self._keepalive = keep_alive
200 |         self._exit = lambda x: False
201 |         self.flt = flt
202 |         self.f_suc = suc
203 |         self.f_fail = fail
204 |         self.stream_mode = stream_mode
205 |         self.stream_speed = None
206 |         self.lowspeed_threshold = lowspeed_threshold
207 |         # if we don't checkin in this zombie_threshold time, monitor will regard us as zombie
208 |         self.zombie_threshold = timeout * (retry + 1) 
209 |         self.run_once = False
210 | 
211 |     def _finish_queue(self, *args):
212 |         # exit if current queue is finished
213 |         return self.run_once and self.task_queue.empty()
214 | 
215 |     def run(self):
216 |         self.logger.verbose("t-%s start" % self.name)
217 |         _stream_cb = None
218 |         if self.stream_mode:
219 |             self.stream_speed = speed_checker()
220 |             def f(d):
221 |                 self.stream_speed.check(len(d))
222 |                 if self.lowspeed_threshold:
223 |                     speed = self.stream_speed.calc(full=True)
224 |                     if 0 < speed < self.lowspeed_threshold:
225 |                         raise LowSpeedException("")
226 |                 self._keepalive(self)
227 |             _stream_cb = f
228 |         while not self._keepalive(self) and not self._exit(self):
229 |             try:
230 |                 url = self.task_queue.get(False)
231 |             except Empty:
232 |                 # set back to 0 when waiting
233 |                 if self.stream_speed:
234 |                     self.stream_speed.reset()
235 |                 time.sleep(1)
236 |                 continue
237 |             self.run_once = True
238 |             try:
239 |                 self.request("GET", url, self.flt, self.f_suc, self.f_fail, stream_cb=_stream_cb)
240 |             except PoolException as ex:
241 |                 self.logger.warning("%s-%s %s" % (i18n.THREAD, self.tname, str(ex)))
242 |                 break
243 |             except LowSpeedException as ex:
244 |                 self.logger.warning(i18n.THREAD_SPEED_TOO_LOW % (
245 |                     self.tname,
246 |                     util.human_size(self.stream_speed.calc(full=True)),
247 |                     util.human_size(self.lowspeed_threshold),
248 |                 ))
249 |                 self.flt(_FakeResponse(url), self.f_suc, self.f_fail)
250 |             except Exception as ex:
251 |                 self.logger.warning(i18n.THREAD_UNCAUGHT_EXCEPTION % (self.tname, traceback.format_exc()))
252 |                 self.flt(_FakeResponse(url), self.f_suc, self.f_fail)
253 |         # notify monitor the last time
254 |         self.logger.verbose("t-%s exit" % self.name)
255 |         self._keepalive(self, _exit = True)
256 | 
257 | class ArchiveWorker(Thread):
258 |     # this worker is not managed by monitor
259 |     def __init__(self, logger, task, exit_check = None):
260 |         Thread.__init__(self, name = "archiver%s" % task.guid)
261 |         Thread.setDaemon(self, True)
262 |         self.logger = logger
263 |         self.task = task
264 |         self._exit = lambda x: False
265 | 
266 |     def run(self):
267 |         while self.task.state < TASK_STATE_FINISHED:
268 |             if self._exit(self) or self.task.state in (TASK_STATE_PAUSED, TASK_STATE_FAILED):
269 |                 return
270 |             time.sleep(1)
271 |         self.logger.info(i18n.TASK_START_MAKE_ARCHIVE % self.task.guid)
272 |         self.task.state = TASK_STATE_MAKE_ARCHIVE
273 |         t = time.time()
274 |         try:
275 |             pth = self.task.make_archive()
276 |         except Exception as ex:
277 |             self.task.state = TASK_STATE_FAILED
278 |             self.logger.error(i18n.TASK_ERROR % (self.task.guid, i18n.c(ERR_CANNOT_MAKE_ARCHIVE) % traceback.format_exc()))
279 |         else:
280 |             self.task.state = TASK_STATE_FINISHED
281 |             self.logger.info(i18n.TASK_MAKE_ARCHIVE_FINISHED % (self.task.guid, pth, time.time() - t))
282 | 
283 | 
284 | class Monitor(Thread):
285 |     def __init__(self, req, proxy, logger, task, exit_check=None, ignored_errors=[]):
286 |         Thread.__init__(self, name = "monitor%s" % task.guid)
287 |         Thread.setDaemon(self, True)
288 |         # the count of votes per error code
289 |         self.vote_result = {}
290 |         # the error code to be ignored
291 |         self.vote_cleared = set().union(ignored_errors)
292 |         self.thread_last_seen = {}
293 |         self.dctlock = RLock()
294 |         self.votelock = RLock()
295 |         self.thread_ref = {}
296 |         self.thread_zombie = set()
297 |         # HttpReq instance
298 |         self.req = req
299 |         # proxy.Pool instance
300 |         self.proxy = proxy
301 |         self.logger = logger
302 |         self.task = task
303 |         self._exit = exit_check if exit_check else lambda x: False
304 |         self._cleaning_up = False
305 |         self.download_speed = 0
306 |         if os.name == "nt":
307 |             self.set_title = lambda s:os.system("TITLE %s" % (
308 |                 s if PY3K else s.encode(CODEPAGE, 'replace')))
309 |         elif os.name == 'posix':
310 |             import sys
311 |             self.set_title = lambda s:sys.stdout.write("\033]2;%s\007" % (
312 |                 s if PY3K else s.encode(CODEPAGE, 'replace')))
313 | 
314 |     def set_vote_ns(self, tnames):
315 |         t = time.time()
316 |         self.thread_last_seen = {k:t for k in tnames}
317 | 
318 |     def vote(self, tname, code):
319 |         # thread_id, result_code
320 |         self.votelock.acquire()
321 |         if code != ERR_NO_ERROR:
322 |             self.logger.verbose("t-%s vote:%s" % (tname, code))
323 |         if code not in self.vote_result:
324 |             self.vote_result[code] = 1
325 |         else:
326 |             self.vote_result[code] += 1
327 |         self.votelock.release()
328 | 
329 |     def wrk_keepalive(self, wrk_thread, _exit = False):
330 |         tname = wrk_thread.name
331 |         if tname in self.thread_zombie:
332 |             self.thread_zombie.remove(tname)
333 |         # all image downloaded
334 |         # task is finished or failed
335 |         # monitor is exiting or worker notify its exit
336 |         _ = self.task.meta['finished'] == self.task.meta['total'] or \
337 |             self.task.state in (TASK_STATE_FINISHED, TASK_STATE_FAILED) or \
338 |             self._exit("mon") or _exit
339 |         # self.logger.verbose("mon#%s %s ask, %s, %s" % (self.task.guid, tname, _,
340 |         #    self.thread_last_seen))
341 |         if _ or not wrk_thread.is_alive():
342 |             self.dctlock.acquire()
343 |             if tname in self.thread_last_seen:
344 |                 del self.thread_last_seen[tname]
345 |             if tname in self.thread_ref:
346 |                 del self.thread_ref[tname]
347 |             self.dctlock.release()
348 |         else:
349 |             self.thread_last_seen[tname] = time.time()
350 |             if tname not in self.thread_ref:
351 |                 self.thread_ref[tname] = wrk_thread
352 |         return _
353 | 
354 |     # def _rescan_pages(self):
355 |     #     # not using
356 |     #     # throw away existing page urls
357 |     #     while True:
358 |     #         try:
359 |     #             self.task.page_q.get(False)
360 |     #         except Empty:
361 |     #             break
362 |     #     # put page into task.list_q
363 |     #     [self.task.list_q.put("%s/?p=%d" % (self.task.url, x)
364 |     #         for x in range(1, 1 + int(math.ceil(self.task.meta['total']/20.0))))
365 |     #     ]
366 |     #     print(self.task.list_q.qsize())
367 | 
368 |     def _check_vote(self):
369 |         # if False and ERR_IMAGE_RESAMPLED in self.vote_result and ERR_IMAGE_RESAMPLED not in self.vote_cleared:
370 |         #     self.logger.warning(i18n.TASK_START_PAGE_RESCAN % self.task.guid)
371 |         #     self._rescan_pages()
372 |         #     self.task.meta['has_ori'] = True
373 |         #     self.vote_cleared.add(ERR_IMAGE_RESAMPLED)
374 |         if ERR_QUOTA_EXCEEDED in self.vote_result and \
375 |             ERR_QUOTA_EXCEEDED not in self.vote_cleared and \
376 |             self.vote_result[ERR_QUOTA_EXCEEDED] >= len(self.thread_last_seen):
377 |             self.logger.error(i18n.TASK_STOP_QUOTA_EXCEEDED % self.task.guid)
378 |             self.task.state = TASK_STATE_FAILED
379 | 
380 |     def run(self):
381 |         CHECK_INTERVAL = 10
382 |         STUCK_INTERVAL = 90
383 |         intv = 0
384 |         self.set_title(i18n.TASK_START % self.task.guid)
385 |         last_change = time.time()
386 |         last_finished = -1
387 |         while len(self.thread_last_seen) > 0:
388 |             intv += 1
389 |             self._check_vote()
390 |             total_speed = 0
391 |             for k, last_seen in list(self.thread_last_seen.items()):
392 |                 _zombie_threshold = self.thread_ref[k].zombie_threshold if k in self.thread_ref else 30
393 |                 if time.time() - last_seen > _zombie_threshold:
394 |                     if k in self.thread_ref and self.thread_ref[k].is_alive():
395 |                         self.logger.warning(i18n.THREAD_MAY_BECOME_ZOMBIE % k)
396 |                         self.thread_zombie.add(k)
397 |                     else:
398 |                         self.logger.warning(i18n.THREAD_SWEEP_OUT % k)
399 |                     del self.thread_last_seen[k]
400 |                 # if thread is not a zombie, add to speed sum
401 |                 elif k in self.thread_ref and self.thread_ref[k].stream_speed:
402 |                     total_speed += self.thread_ref[k].stream_speed.calc()
403 |             self.download_speed = total_speed
404 |             if intv == CHECK_INTERVAL:
405 |                 _ = "%s %dR/%dZ, %s %dR/%dD, %s/s" % (
406 |                     i18n.THREAD,
407 |                     len(self.thread_last_seen), len(self.thread_zombie),
408 |                     i18n.QUEUE,
409 |                     self.task.img_q.qsize(),
410 |                     self.task.meta['finished'],
411 |                     util.human_size(total_speed))
412 |                 self.logger.info(_)
413 |                 self.set_title(_)
414 |                 intv = 0
415 |                 # if not downloading any new images in 1.5 min, exit
416 |                 if last_finished != self.task.meta['finished']:
417 |                     last_change = time.time()
418 |                     last_finished = self.task.meta['finished']
419 |                 elif time.time() - last_change > STUCK_INTERVAL:
420 |                     self.logger.info(i18n.TASK_UNFINISHED % (self.task.guid, self.task.get_fid_unfinished()))
421 |                     if total_speed > 0:
422 |                         # reset last_change
423 |                         last_change = time.time()
424 |                         self.logger.warning(i18n.TASK_SLOW % self.task.guid)
425 |                     else:
426 |                         self.logger.warning(i18n.TASK_STUCK % self.task.guid)
427 |                         break
428 |             time.sleep(0.5)
429 |         if self.task.meta['finished'] == self.task.meta['total']:
430 |             _err = self.task.rename_fname()
431 |             if _err:
432 |                 self.logger.warning(i18n.XEH_RENAME_HAS_ERRORS % (
433 |                     "\n".join(map(lambda x:"%s => %s : %s" % x, _err))
434 |                 ))
435 |             self.set_title(i18n.TASK_FINISHED % self.task.guid)
436 |             self.logger.info(i18n.TASK_FINISHED % self.task.guid)
437 |             self.task.state = TASK_STATE_FINISHED
438 |         self.task.cleanup()
439 | 
440 | if __name__ == '__main__':
441 |     print(HttpReq().request("GET", "https://ipip.tk", lambda x:x, None, None))
442 | 


--------------------------------------------------------------------------------