├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE.txt ├── README.chs.md ├── README.cht.md ├── README.md ├── icon3.ico ├── requirements.txt ├── setup.py ├── util ├── make_release_config.py └── make_verinfo.py ├── xeH ├── xeH.py └── xeHentai ├── __init__.py ├── cli.py ├── config.py ├── const.py ├── core.py ├── filters.py ├── i18n ├── __init__.py ├── en_us.py ├── zh_hans.py └── zh_hant.py ├── proxy.py ├── rpc.py ├── task.py ├── updater ├── __init__.py ├── github.py └── updater.py ├── util ├── __init__.py └── logger.py └── worker.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | *.pyc 4 | *.json 5 | *.sh 6 | *.log 7 | .ehentai.cookie 8 | release 9 | desktop.ini 10 | verinfo.txt 11 | .atomignore 12 | make.bat 13 | config.py 14 | README.html 15 | CHANGELOG.html 16 | webui.gz 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | sudo: required 3 | dist: xenial 4 | 5 | matrix: 6 | include: 7 | - os: linux 8 | language: python 9 | python: "2.7" 10 | - os: linux 11 | language: python 12 | python: "3.5" 13 | - os: linux 14 | language: python 15 | python: "3.6" 16 | - os: linux 17 | language: python 18 | python: "3.7" 19 | # https://chocolatey.org/packages/python2 20 | # - os: windows 21 | # language: sh 22 | # python: "2.7" 23 | # before_install: 24 | # - choco install python2 25 | # - export PATH="/c/Python26:/c/Python27/Scripts:$PATH" 26 | # - wget https://bootstrap.pypa.io/get-pip.py 27 | # - python ./get-pip.py 28 | # https://chocolatey.org/packages/python/3.7.4 29 | - os: windows 30 | language: sh 31 | python: "3.5" 32 | before_install: 33 | - choco install python --version 3.5.4 34 | - export PATH="/c/Python35:/c/Python35/Scripts:$PATH" 35 | - os: windows 36 | language: sh 37 | python: "3.6" 38 | before_install: 39 | - choco install python --version 3.6.8 40 | - export PATH="/c/Python36:/c/Python36/Scripts:$PATH" 41 | - os: windows 42 | language: sh 43 | python: "3.7" 44 | before_install: 45 | - choco install python --version 3.7.4 46 | - export PATH="/c/Python37:/c/Python37/Scripts:$PATH" 47 | 48 | install: 49 | - python setup.py install 50 | 51 | script: 52 | - xeH --help 53 | - xeH $TEST_URL_E --dir test1 54 | # nested env currently have no effect on windows 55 | - LANG=zh_CN.utf-8 LC_ALL=zh_CN.utf-8 xeH $TEST_URL_E --dir test1 56 | - LANG=zh_TW.utf-8 LC_ALL=zh_TW.utf-8 xeH $TEST_URL_E --dir test1 57 | - xeH $TEST_URL_E --dir test1 --archive true 58 | 59 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2.023 4 | - 自动更新 5 | - 修复保存任务时logger无法序列化的bug (感谢 @9chu) 6 | 7 | ## 2.022 8 | - 增加下载速度显示 9 | - 增加低速自动重试 `low_speed_threshold`, 默认为`10KB/s`以下重试 10 | - 增加本地缓存WebUI, 并在打开RPC时自动开启浏览器,可通过`rpc_open_browser`关闭 11 | - 支持制作大于2G的压缩包 12 | - 修复同内容文件处理, 优化重试表的键 13 | - 修复RPC读图和制作压缩包间的竞争问题 14 | - 修复flt_quota_check的返回参数为3个 15 | - 修复配额检查中的路径和大小匹配仅在和期望大小不同时触发 16 | - 修复没有开启rpc_secret但发送了token时的RPC参数列表 17 | 18 | ## 2.021 19 | - 支持 `multi page viewer` 20 | - 不再支持Python 3.3 21 | - 修复里站和表站CDN的IP 22 | - 优化509错误判断 23 | - 修复重命名时的编码问题 24 | - 修复某些Python版本下跨磁盘移动时的问题 25 | 26 | ## 2.020 27 | - 增加RPC帮助函数:`get_info`,`get_config`,`update_config`,`get_image` 28 | - 增加通过RPC看图和下载压缩包功能 29 | - 增加`delete_task_files`选项,设置是否删除任务时同时删除下载的文件 30 | - 修复Windows文件夹不能以空格结尾以及文件不能以`.`结尾 31 | - 修复下载时的临时文件在Windows下报错Error 32的问题 32 | 33 | ## 2.019 34 | - 增加图片地址解析时的错误处理 35 | - 增加使用流模式下载图片,优化判断丧尸线程 36 | - 修复无法从环境变量中获得LOCALE时的问题 37 | - 修复unichr被当成局部变量的问题 38 | - 修复交互模式在Python3中的一个问题 39 | 40 | ## 2.018 41 | - 增加`jpn_title`选项,选择是否使用日语标题 42 | - 增加`download_range`选项,选择下载范围 43 | - 增加`timeout`选项,设置下载图片的超时 44 | - 增加`proxy_image_only`选项,设置仅使用代理下载图片 45 | - 命令行增加`--force`选项,设置忽略配额继续下载; 配置增加`ignored_errors`选项,设置忽略的错误码 46 | - 检查下载的图片是否完整 47 | - 识别`socks5h`代理字符串 48 | 49 | ## 2.017 50 | - 修复匹配网址的正则表达式 51 | - 修复表站自动转换里站逻辑 52 | - 修复下载图片重试后重命名失败的问题 53 | - 修复原始文件名与自动编号冲突时重命名异常的问题 54 | 55 | ## 2.016 56 | - 修复超出配额的判断 57 | - 修复可能会下到评论中的图的问题 58 | - 修复无法匹配安装在根目录glype的代理问题 59 | - 某些错误现在会显示详细信息 60 | - 增加`proxy_image`选项,选择是否使用代理下载图片 61 | 62 | ## 2.015 63 | - 显示重命名时的错误 64 | - 修复扩展名中多余的`.` 65 | - 修复Windows下文件名的保留字符`<`, `>` 66 | 67 | ## 2.014 68 | - 修复cookie中只有nw判断为已登录的bug 69 | - 登录失败时显示网页上的错误信息 70 | - 交互支持逗号分割多个任务,命令行模式支持同时添加多个任务 71 | - 修复重命名的bug 72 | 73 | ## 2.013 74 | - 修复页数>=1000页时抽风的bug 75 | - 原始文件名冲突时自动+1 76 | 77 | ## 2.012 78 | - 修复Windows下中文路径的问题 79 | 80 | ## 2.011 81 | - 修复每页缩略图数量不是40时下载不完整的bug 82 | - 90秒没有新下载图片则自动结束任务 83 | - 本子包含重复图片时直接复制 84 | 85 | ## 2.010 86 | - 图片404时重试 87 | 88 | ## 2.009 89 | - 交互模式默认值改为配置中设置的值 90 | 91 | ## 2.008 92 | - 跳过Content Warning 93 | 94 | ## 2.007 95 | - 修复本子中有重复图时无法自动退出的bug 96 | - 其他稳定性修复 97 | 98 | ## 2.006 99 | - 增加`make_archive`, 下载完成后生成zip压缩包并删除下载目录 100 | - 完善reload机制 101 | - 检测IP是否被ban并自动更换代理IP 102 | 103 | ## 2.005 104 | - 增加`rpc_secret` 105 | - `i18n/zh_cn`更名为`i18n/zh_hans` 106 | 107 | ## 2.004 108 | - 支持Python3 109 | 110 | ## 2.003 111 | - 读取 .ehentai.cookie 112 | - 交互模式不保存任务 113 | - 添加 `--rename-ori` 参数和配置 114 | - 如果用户配置有问题,从内置配置读取默认值 115 | - 其他更新 116 | 117 | ## 2.002 118 | - 支持`glype`代理类型 119 | 120 | ## 2.001 121 | - 初始发布 122 | -------------------------------------------------------------------------------- /README.chs.md: -------------------------------------------------------------------------------- 1 | # 绅♂士♂站♂小♂爬♂虫 2 | 3 | [![Build Status](https://travis-ci.org/fffonion/xeHentai.svg?branch=master)](https://travis-ci.org/fffonion/xeHentai) 4 | 5 | [English](README.md) [繁體中文](README.cht.md) 6 | 7 | [xeHentai Web界面](https://github.com/fffonion/xeHentai-webui) 8 | 9 | ## 快速入门 10 | 11 | windows用户可以下载可执行文件 [这里](https://github.com/fffonion/xeHentai/releases) [或这里](http://dl.yooooo.us/share/xeHentai/) 12 | 13 | 或者可以运行源码 14 | 15 | ```shell 16 | pip install -U requests[socks] 17 | git clone https://github.com/fffonion/xeHentai.git 18 | cd xeHentai 19 | python ./setup.py install 20 | xeH 21 | ``` 22 | 23 | 新版本默认为命令行模式,如果需要使用交互模式,请运行`xeH.py -i` 24 | 25 | ## 详细说明 26 | 27 | ### 配置文件 28 | 29 | 使用源码运行的用户请先将`xeHentai/config.py`复制到当前目录。 30 | 31 | 配置的优先级为 交互模式 > 命令行参数 > 用户config.py > 内置config.py。 32 | 33 | 常用参数: 34 | 35 | - **daemon** 后台模式,仅支持posix兼容的系统,参见[运行模式](#运行模式),默认为否 36 | - **dir** 下载目录,默认为当前目录 37 | - **download_ori** 是否下载原图,默认为否 38 | - **jpn_title** 是否使用日语标题,如果关闭则使用英文或罗马字标题,默认为是 39 | - **rename_ori** 将图片重命名为原始名称,如果关闭则使用序号,默认为否 40 | - **make_archive** 是否下载完成后生成zip压缩包,并删除下载目录,默认为否 41 | 42 | 高级参数: 43 | 44 | - **proxy** 代理列表,参见[代理](#代理)。 45 | - **proxy_image** 是否同时使用代理来下载图片和扫描网页,默认为是 46 | - **proxy_image_only** 是否仅使用代理来下载图片,不用于扫描网页,默认为否 47 | - **rpc_interface** RPC绑定的IP,参见[JSON-RPC](#json-rpc),默认为`localhost` 48 | - **rpc_port** RPC绑定的端口,默认为`None` 49 | - **rpc_secret** RPC密钥,默认为`None` (不开启RPC服务器) 50 | - **rpc_open_browser** RPC服务端启动后自动打开浏览器页面,默认为是 51 | - **delete_task_files** 是否删除任务时同时删除下载的文件,默认为否 52 | - **download_range** 设置下载的图片范围,参见[下载范围](#下载范围) 53 | - **scan_thread_cnt** 扫描线程数,默认为`1` 54 | - **download_thread_cnt** 下载线程数,默认为`5` 55 | - **download_timeout** 设置下载图片的超时,默认为`10`秒 56 | - **low_speed_threshold** 设置最低下载速度,低于此值将换源重新下载,单位为KB/s,默认为`10` 57 | - **ignored_errors** 设置忽略的错误码,默认为空,错误码可以从`const.py`中获得 58 | - **auto_update** 自动检查更新,`check` 仅检查更新,`download` 下载更新,`off` 关闭检查;默认为`download` 59 | - **update_beta_channel** 设置是否更新到测试版,默认为否 60 | - **log_path** 日志路径,默认为`eh.log` 61 | - **log_verbose** 日志等级,可选1-3,值越大输出越详细,默认为`2` 62 | - **save_tasks** 是否保存任务到`h.json`,可用于断点续传,默认为否 63 | 64 | 65 | ### 命令行模式 66 | ``` 67 | 用法: xeH [-u USERNAME] [-k KEY] [-c COOKIE] [-i] [--daemon] [-d DIR] [-o] 68 | [-j BOOL] [-r BOOL] [-p PROXY] [--proxy-image | --proxy-image-only] 69 | [--rpc-interface ADDR] [--rpc-port PORT] [--rpc-secret ...] 70 | [--rpc-open-browser BOOL] [--delete-task-files BOOL] [-a BOOL] 71 | [--download-range a-b,c-d,e] [-t N] [--timeout N] 72 | [--low-speed-threshold N] [-f] [--auto-update {check,download,off}] 73 | [--update-beta-channel BOOL] [-l /path/to/eh.log] [-v] [-h] 74 | [--version] 75 | [url [url ...]] 76 | 77 | 绅♂士下载器 78 | 79 | 必选参数: 80 | url 下载页的网址 81 | 82 | 可选参数: 83 | -u USERNAME, --username USERNAME 84 | 用户名 85 | -k KEY, --key KEY 密码 86 | -c COOKIE, --cookie COOKIE 87 | Cookie字符串, 如果指定了用户名和密码, 此项会被忽略 88 | -i, --interactive 交互模式, 如果开启后台模式, 此项会被忽略 (默认: False) 89 | --daemon 后台模式 (默认: False) 90 | -d DIR, --dir DIR 设置下载目录 (默认: 当前目录) 91 | -o, --download-ori 是否下载原始图片(如果存在), 需要登录 (默认: False) 92 | -j BOOL, --jpn-title BOOL 93 | 使用日语标题, 如果关闭则使用英文或罗马字标题 (默认: True) 94 | -r BOOL, --rename-ori BOOL 95 | 将图片重命名为原始名称, 如果关闭则使用序号 (默认: False) 96 | -p PROXY, --proxy PROXY 97 | 设置代理, 可以指定多次, 当前支持的类型: socks5/4a, http(s), glype. 98 | 代理默认只用于扫描网页 (默认: 空) 99 | --proxy-image 同时使用代理来下载图片和扫描网页(默认: True) 100 | --proxy-image-only 仅使用代理来下载图片, 不用于扫描网页 (默认: False) 101 | --rpc-interface ADDR 设置JSON-RPC监听IP (默认: localhost) 102 | --rpc-port PORT 设置JSON-RPC监听端口 (默认: None) 103 | --rpc-secret ... 设置JSON-RPC密钥 (默认: None) 104 | --rpc-open-browser BOOL 105 | RPC服务端启动后自动打开浏览器页面 (默认: True) 106 | --delete-task-files BOOL 107 | 删除任务时同时删除下载的文件 (默认: False) 108 | -a BOOL, --archive BOOL 109 | 下载完成后生成zip压缩包并删除下载目录 (默认: False) 110 | --download-range a-b,c-d,e 111 | 设置下载的图片范围, 格式为 开始位置-结束位置, 或者单张图片的位置, 使用逗号来分隔多个范围, 例如 112 | 5-10,15,20-25, 默认为下载所有 113 | --low-speed-threshold N 114 | 设置最低下载速度,低于此值将换源重新下载 (默认: 10 KB/s) 115 | -t N, --thread N 下载线程数 (默认: 5) 116 | --timeout N 设置下载图片的超时 (默认: 10秒) 117 | -f, --force 忽略配额判断, 继续下载 (默认: False) 118 | --auto-update {check,download,off} 119 | 检查并自动下载更新 (默认: download) 120 | --update-beta-channel BOOL 121 | 是否更新到测试分支 (默认: True) 122 | -l /path/to/eh.log, --logpath /path/to/eh.log 123 | 保存日志的路径 (默认: eh.log) 124 | -v, --verbose 设置日志装逼等级 (默认: 2) 125 | -h, --help 显示本帮助信息 126 | --version 显示版本信息 127 | 128 | ``` 129 | 130 | 如果参数未指定, 则使用config.py中的默认值;否则将覆盖config.py设置的值。 131 | 132 | ### JSON-RPC 133 | 134 | 在指定`rpc_interface`和`rpc_port`后, xeHentai会启动RPC服务器。使用[JSON-RPC 2.0](http://www.jsonrpc.org/specification)标准。典型的请求如下: 135 | 136 | ``` 137 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":[[args],{kwargs}]}' 138 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"} 139 | ``` 140 | 141 | `rpc_secret`可用于提高安全性。如果`rpc_secret`设置为**hentai**, 则需在params中带上这个值: 142 | ``` 143 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":["token:hentai",[args],{kwargs}]}' 144 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"} 145 | ``` 146 | 147 | 其中`method`为调用的方法,必须以**xeH.** 开头。在[core.py](xeHentai/core.py)的xeHentai类中,所有不以下划线`_`开头的方法均可以通过RPC调用,但需将方法名的下划线命名法改为驼峰命名法。如`add_task`需改为`addTask`。 148 | 149 | 参数列表请参阅xeHentai类。 150 | 151 | 如果浏览器安装了用户脚本插件,可以[下载xeHentaiHelper.user.js](http://dl.yooooo.us/userscripts/xeHentaiHelper.user.js),将会在页面上添加`Add to xeHentai`链接,以支持将当前页面添加到xeHentai中。Chrome用户需要安装[Tampermonkey](https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo), 152 | Firefox用户需要安装[Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/),Opera和傲游用户需要安装暴力猴。 153 | 154 | **由于绅士站启用了https,而rpc走的是http,所以chrome用户需要点击地址栏右侧盾牌,选择“加载不安全的脚本”** 155 | 156 | ### 运行模式 157 | 158 | 如果通过命令行或交互模式指定了下载url,xeHentai会在下载完成`h.json`中存储的任务(如果存在)及指定的url后退出。 159 | 160 | 如果命令行没有指定url,xeHentai将会在完成存档`h.json`中的队列(如果存在)后继续等待。 161 | 162 | 如果指定了后台模式(`-d`或设置`daemon`为`True`),xeHentai将会在保持后台运行。 163 | 164 | ### 代理 165 | 166 | 目前支持三种模式的代理: 167 | 168 | - socks代理,如`socks5h://127.0.0.1:1080`;如果需要在客户端解析DNS,请使用`socks5://127.0.0.1:1080`。 169 | - http(s)代理,如`http://127.0.0.1:8080`。 170 | - glype代理,如`http://example.com/browse.php?u=a&b=4`。请根据实际情况修改`b`的名称。glype是目前使用最广的php在线代理,使用时请取消勾选“加密url(Encrypt URL)”、取消勾选“移除脚本 (Remove Scripts)”、勾选“允许cookies (Allow Cookies)”后随意打开一个网页,然后把网址粘贴进来 171 | 172 | 可以指定多个代理,格式如`['socks5h://127.0.0.1:1080', 'http://127.0.0.1:8080']`。 173 | 174 | 默认情况下代理会被用于扫描网页和下载图片。如果不需要使用代理下载图片,请在配置文件中设置`proxy_image`为**False**。 175 | 176 | 如果使用代理仅用于突破封锁的目的,则此项可以设置为`False`;如果需要保证隐私,请将此项设置为`True`。使用glype代理的用户建议将此项设为`False`。 177 | 178 | 如果仅需要使用代理下载图片,不需要扫描网页,请在配置文件中设置`proxy_image_only`为**True**,或者在运行时加上`--proxy-image-only`参数。如果在配置中的`proxy_image`和`proxy_image_only`均为**True**,则`proxy_image`将被忽略。 179 | 180 | ### 下载范围 181 | 182 | 下载范围的格式为使用`开始位置-结束位置`,例如`5-10`表示下载第5到第10张图片,包括第5和第10张;或者单个位置,例如`15`表示下载第15张图片。 183 | 184 | 可以通过逗号来分割多个范围,例如`5-10,15`表示下载第5到第10张图片以及第15张图片。 185 | 186 | 如果不输入下载范围,则默认下载所有图片。 187 | 188 | 189 | ## 其他说明 190 | 191 | ### 配额 192 | 193 | 直接从服务器及镜像途径下载的图片计入配额,从H@H下载的不计算;下载新发布的、冷门的漫画以及原图更有可能消耗配额,下载热门漫画基本不消耗配额 194 | 195 | ## License 196 | 197 | GPLv3 198 | *** 199 | ![@fffonion](http://img.t.sinajs.cn/t5/style/images/register/logo.png)[@fffonion](http://weibo.com/376463435)                         ![Blog](https://s.w.org/about/images/logos/wordpress-logo-32-blue.png)  [博客](https://yooooo.us) 200 | -------------------------------------------------------------------------------- /README.cht.md: -------------------------------------------------------------------------------- 1 | # 紳♂士♂站♂小♂爬♂蟲 2 | 3 | [![Build Status](https://travis-ci.org/fffonion/xeHentai.svg?branch=master)](https://travis-ci.org/fffonion/xeHentai) 4 | 5 | [English](README.md) [繁體中文](README.cht.md) 6 | 7 | [xeHentai Web界面](https://github.com/fffonion/xeHentai-webui) 8 | 9 | ## 快速入門 10 | 11 | windows用戶可以下載可執行文件 [這裡](https://github.com/fffonion/xeHentai/releases) [或這裡](http://dl.yooooo.us/share/xeHentai/) 12 | 13 | 或者可以運行源碼 14 | 15 | ```shell 16 | pip install -U requests[socks] 17 | git clone https://github.com/fffonion/xeHentai.git 18 | cd xeHentai 19 | python ./setup.py install 20 | xeH 21 | ``` 22 | 23 | 新版本默認為命令行模式,如果需要使用交互模式,請運行`xeH.py -i` 24 | 25 | ## 詳細說明 26 | 27 | ### 配置文件 28 | 29 | 使用源碼運行的用戶請先將`xeHentai/config.py`複製到當前目錄。 30 | 31 | 配置的優先級為 交互模式 > 命令行參數 > 用戶config.py > 內置config.py。 32 | 33 | 常用參數: 34 | 35 | - **daemon** 後台模式,僅支持posix兼容的系統,參見[運行模式](#運行模式),默認為否 36 | - **dir** 下載目錄,默認為當前目錄 37 | - **download_ori** 是否下載原圖,默認為否 38 | - **jpn_title** 是否使用日語標題,如果關閉則使用英文或羅馬字標題,默認為是 39 | - **rename_ori** 將圖片重命名為原始名稱,如果關閉則使用序號,默認為否 40 | - **make_archive** 是否下載完成後生成zip壓縮包,並刪除下載目錄,默認為否 41 | 42 | 高級參數: 43 | 44 | - **proxy** 代理列表,參見[代理](#代理)。 45 | - **proxy_image** 是否同時使用代理來下載圖片和掃描網頁,默認為是 46 | - **proxy_image_only** 是否僅使用代理來下載圖片,不用於掃描網頁,默認為否 47 | - **rpc_interface** RPC綁定的IP,參見[JSON-RPC](#json-rpc),默認為`localhost` 48 | - **rpc_port** RPC綁定的埠,默認為`None` 49 | - **rpc_secret** RPC密鑰,默認為`None` (不開啟RPC伺服器) 50 | - **rpc_open_browser** RPC服務端啟動後自動打開瀏覽器頁面,默認為是 51 | - **delete_task_files** 是否刪除任務時同時刪除下載的文件,默認為否 52 | - **download_range** 設置下載的圖片範圍,參見[下載範圍](#下載範圍) 53 | - **scan_thread_cnt** 掃描線程數,默認為`1` 54 | - **download_thread_cnt** 下載線程數,默認為`5` 55 | - **download_timeout** 設置下載圖片的超時,默認為`10`秒 56 | - **low_speed_threshold** 設置最低下載速度,低於此值將換源重新下載,單位為KB/s,默認為`10` 57 | - **ignored_errors** 設置忽略的錯誤碼,默認為空,錯誤碼可以從`const.py`中獲得 58 | - **log_path** 日誌路徑,默認為`eh.log` 59 | - **log_verbose** 日誌等級,可選1-3,值越大輸出越詳細,默認為`2` 60 | - **save_tasks** 是否保存任務到`h.json`,可用於斷點續傳,默認為否 61 | 62 | 63 | ### 命令行模式 64 | ``` 65 | 用法: xeH [-u USERNAME] [-k KEY] [-c COOKIE] [-i] [--daemon] [-d DIR] [-o] 66 | [-j BOOL] [-r BOOL] [-p PROXY] [--proxy-image | --proxy-image-only] 67 | [--rpc-interface ADDR] [--rpc-port PORT] [--rpc-secret ...] 68 | [--rpc-open-browser BOOL] [--delete-task-files BOOL] [-a BOOL] 69 | [--download-range a-b,c-d,e] [-t N] [--timeout N] 70 | [--low-speed-threshold N] [-f] [-l /path/to/eh.log] [-v] [-h] 71 | [--version] 72 | [url [url ...]] 73 | 74 | 紳♂士下載器 75 | 76 | 必選參數: 77 | url 下載頁的網址 78 | 79 | 可選參數: 80 | -u USERNAME, --username USERNAME 81 | 用戶名 82 | -k KEY, --key KEY 密碼 83 | -c COOKIE, --cookie COOKIE 84 | Cookie字符串, 如果指定了用戶名和密碼, 此項會被忽略 85 | -i, --interactive 交互模式, 如果開啟後台模式, 此項會被忽略 (默認: False) 86 | --daemon 後台模式 (默認: False) 87 | -d DIR, --dir DIR 設置下載目錄 (默認: 當前目錄) 88 | -o, --download-ori 是否下載原始圖片(如果存在), 需要登錄 (默認: False) 89 | -j BOOL, --jpn-title BOOL 90 | 使用日語標題, 如果關閉則使用英文或羅馬字標題 (默認: True) 91 | -r BOOL, --rename-ori BOOL 92 | 將圖片重命名為原始名稱, 如果關閉則使用序號 (默認: False) 93 | -p PROXY, --proxy PROXY 94 | 設置代理, 可以指定多次, 當前支持的類型: socks5/4a, http(s), glype. 95 | 代理默認只用於掃描網頁 (默認: 空) 96 | --proxy-image 同時使用代理來下載圖片和掃描網頁(默認: True) 97 | --proxy-image-only 僅使用代理來下載圖片, 不用於掃描網頁 (默認: False) 98 | --rpc-interface ADDR 設置JSON-RPC監聽IP (默認: localhost) 99 | --rpc-port PORT 設置JSON-RPC監聽埠 (默認: None) 100 | --rpc-secret ... 設置JSON-RPC密鑰 (默認: None) 101 | --rpc-open-browser BOOL 102 | RPC服務端啟動後自動打開瀏覽器頁面 (默認: True) 103 | --delete-task-files BOOL 104 | 刪除任務時同時刪除下載的文件 (默認: False) 105 | -a BOOL, --archive BOOL 106 | 下載完成後生成zip壓縮包並刪除下載目錄 (默認: False) 107 | --download-range a-b,c-d,e 108 | 設置下載的圖片範圍, 格式為 開始位置-結束位置, 或者單張圖片的位置, 使用逗號來分隔多個範圍, 例如 109 | 5-10,15,20-25, 默認為下載所有 110 | --low-speed-threshold N 111 | 設置最低下載速度,低於此值將換源重新下載 (默認: 10 KB/s) 112 | -t N, --thread N 下載線程數 (默認: 5) 113 | --timeout N 設置下載圖片的超時 (默認: 10秒) 114 | -f, --force 忽略配額判斷, 繼續下載 (默認: False) 115 | -l /path/to/eh.log, --logpath /path/to/eh.log 116 | 保存日誌的路徑 (默認: eh.log) 117 | -v, --verbose 設置日誌裝逼等級 (默認: 2) 118 | -h, --help 顯示本幫助信息 119 | --version 顯示版本信息 120 | 121 | ``` 122 | 123 | 如果參數未指定, 則使用config.py中的默認值;否則將覆蓋config.py設置的值。 124 | 125 | ### JSON-RPC 126 | 127 | 在指定`rpc_interface`和`rpc_port`後, xeHentai會啟動RPC伺服器。使用[JSON-RPC 2.0](http://www.jsonrpc.org/specification)標准。典型的請求如下: 128 | 129 | ``` 130 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":[[args],{kwargs}]}' 131 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"} 132 | ``` 133 | 134 | `rpc_secret`可用於提高安全性。如果`rpc_secret`設置為**hentai**, 則需在params中帶上這個值: 135 | ``` 136 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":["token:hentai",[args],{kwargs}]}' 137 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"} 138 | ``` 139 | 140 | 其中`method`為調用的方法,必須以**xeH.** 開頭。在[core.py](xeHentai/core.py)的xeHentai類中,所有不以下劃線`_`開頭的方法均可以通過RPC調用,但需將方法名的下劃線命名法改為駝峰命名法。如`add_task`需改為`addTask`。 141 | 142 | 參數列表請參閱xeHentai類。 143 | 144 | 如果瀏覽器安裝了用戶腳本插件,可以[下載xeHentaiHelper.user.js](http://dl.yooooo.us/userscripts/xeHentaiHelper.user.js),將會在頁面上添加`Add to xeHentai`鏈接,以支持將當前頁面添加到xeHentai中。Chrome用戶需要安裝[Tampermonkey](https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo), 145 | Firefox用戶需要安裝[Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/),Opera和傲遊用戶需要安裝暴力猴。 146 | 147 | **由於紳士站啟用了https,而rpc走的是http,所以chrome用戶需要點擊地址欄右側盾牌,選擇「加載不安全的腳本」** 148 | 149 | ### 運行模式 150 | 151 | 如果通過命令行或交互模式指定了下載url,xeHentai會在下載完成`h.json`中存儲的任務(如果存在)及指定的url後退出。 152 | 153 | 如果命令行沒有指定url,xeHentai將會在完成存檔`h.json`中的隊列(如果存在)後繼續等待。 154 | 155 | 如果指定了後台模式(`-d`或設置`daemon`為`True`),xeHentai將會在保持後台運行。 156 | 157 | ### 代理 158 | 159 | 目前支持三種模式的代理: 160 | 161 | - socks代理,如`socks5h://127.0.0.1:1080`;如果需要在客戶端解析DNS,請使用`socks5://127.0.0.1:1080`。 162 | - http(s)代理,如`http://127.0.0.1:8080`。 163 | - glype代理,如`http://example.com/browse.php?u=a&b=4`。請根據實際情況修改`b`的名稱。glype是目前使用最廣的php在線代理,使用時請取消勾選「加密url(Encrypt URL)」、取消勾選「移除腳本 (Remove Scripts)」、勾選「允許cookies (Allow Cookies)」後隨意打開一個網頁,然後把網址粘貼進來 164 | 165 | 可以指定多個代理,格式如`['socks5h://127.0.0.1:1080', 'http://127.0.0.1:8080']`。 166 | 167 | 默認情況下代理會被用於掃描網頁和下載圖片。如果不需要使用代理下載圖片,請在配置文件中設置`proxy_image`為**False**。 168 | 169 | 如果使用代理僅用於突破封鎖的目的,則此項可以設置為`False`;如果需要保證隱私,請將此項設置為`True`。使用glype代理的用戶建議將此項設為`False`。 170 | 171 | 如果僅需要使用代理下載圖片,不需要掃描網頁,請在配置文件中設置`proxy_image_only`為**True**,或者在運行時加上`--proxy-image-only`參數。如果在配置中的`proxy_image`和`proxy_image_only`均為**True**,則`proxy_image`將被忽略。 172 | 173 | ### 下載範圍 174 | 175 | 下載範圍的格式為使用`開始位置-結束位置`,例如`5-10`表示下載第5到第10張圖片,包括第5和第10張;或者單個位置,例如`15`表示下載第15張圖片。 176 | 177 | 可以通過逗號來分割多個範圍,例如`5-10,15`表示下載第5到第10張圖片以及第15張圖片。 178 | 179 | 如果不輸入下載範圍,則默認下載所有圖片。 180 | 181 | 182 | ## 其他說明 183 | 184 | ### 配額 185 | 186 | 直接從伺服器及鏡像途徑下載的圖片計入配額,從H@H下載的不計算;下載新發布的、冷門的漫畫以及原圖更有可能消耗配額,下載熱門漫畫基本不消耗配額 187 | 188 | ## License 189 | 190 | GPLv3 191 | *** 192 | ![@fffonion](http://img.t.sinajs.cn/t5/style/images/register/logo.png)[@fffonion](http://weibo.com/376463435)                         ![Blog](https://s.w.org/about/images/logos/wordpress-logo-32-blue.png)  [博客](https://yooooo.us) 193 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # E-Hentai Dōjinshi Downloader 2 | 3 | [![Build Status](https://travis-ci.org/fffonion/xeHentai.svg?branch=master)](https://travis-ci.org/fffonion/xeHentai) 4 | 5 | [简体中文](README.chs.md) [繁體中文](README.cht.md) 6 | 7 | [xeHentai WebUI](https://github.com/fffonion/xeHentai-webui) 8 | 9 | ## TL;DR 10 | 11 | Windows users can download packed binaries from [here](https://github.com/fffonion/xeHentai/releases) or [here](http://dl.yooooo.us/share/xeHentai/). The package is built using [PyInstaller](http://www.pyinstaller.org/). 12 | 13 | Or run directly from source code: 14 | 15 | ```shell 16 | pip install -U requests[socks] 17 | git clone https://github.com/fffonion/xeHentai.git 18 | cd xeHentai 19 | python ./setup.py install 20 | xeH 21 | ``` 22 | 23 | The program is running in non-interactive mode by default. To run interactively, use `xeH.py -i`. 24 | 25 | ## For pros 26 | 27 | ### Configuration file 28 | 29 | If you are running from source code, please copy `xeHentai/config.py` to your current directory first. Use that file as your config file. 30 | 31 | The priority of configuration is: Interactive inputs > Command line options > User config.py > Internal config.py. 32 | 33 | Configuration keys: 34 | 35 | - **daemon** Set to run in default mode, can only use on posix-compatible systems. Refer to [Running Modes](#running-modes). Default to `False`. 36 | - **dir** Download directory. Default to current directory. 37 | - **download_ori** Set to download original images or not. Default to `False`. 38 | - **jpn_title** Set to select Japanese title or not. If set to `False`, English or Romaji title will be used. Default to `True`. 39 | - **rename_ori** Set to rename images to their orginal names. If set to `False`, image will be named in sequence numbers. Default to `False`. 40 | - **make_archive** Set to make a ZIP archive after download and delete downloaded directory. Default to `False`. 41 | 42 | - **proxy** Proxy list. Refer to [Proxies](#proxies). 43 | - **proxy_image** Set to use proxy both on downloading images and scanning webpages. Default to `True`. 44 | - **proxy_image_only** Set to use proxy only on downloading images. Default to `False`. 45 | - **rpc_interface** RPC server binding IP. Refer to [JSON-RPC](#json-rpc). Default to `localhost`. 46 | - **rpc_port** RPC server binding port. Default to `none` (not serving). 47 | - **rpc_secret** RPC secret key. Default to `None`. 48 | - **rpc_open_browser** automatically open browser after RPC server starts. Default to `True`. 49 | - **delete_task_files** Set to delete downloaded files when deleting a task. Default to `False`. 50 | - **download_range** Set image download range. Refer to [Download range](#download-range). Default to download all images. 51 | - **scan_thread_cnt** Thread count for scanning webpages. Default to `1`. 52 | - **download_thread_cnt** Thread count for downloading images. Default to `5`. 53 | - **download_timeout** Timeout of download images. Default to `10`s. 54 | - **low_speed_threshold** Retry download if speed is lower than specified value. Default to `10` KB/s. 55 | - **ignored_errors** Set the error codes to ignore and continue downloading. Default to *empty*. Error codes can be obtained from [const.py](xeHentai/const.py). 56 | - **auto_update** turn on auto update of program `check` for check only and `download` for download; `off` to turn off. Default to `download`. 57 | - **update_beta_channel** set to true to update to dev branch 58 | - **log_path** Set log file path. Default to `eh.log`. 59 | - **log_verbose** Set log level with integer from 1 to 3. Bigger value means more verbose output. Default to `2`. 60 | - **save_tasks** Set to save uncompleted tasks in `h.json`. Default to `False`. 61 | 62 | 63 | ### Command line options 64 | ``` 65 | Usage: xeh [-u USERNAME] [-k KEY] [-c COOKIE] [-i] [--daemon] [-d DIR] [-o] 66 | [-j BOOL] [-r BOOL] [-p PROXY] [--proxy-image | --proxy-image-only] 67 | [--rpc-interface ADDR] [--rpc-port PORT] [--rpc-secret ...] 68 | [--rpc-open-browser BOOL] [--delete-task-files BOOL] [-a BOOL] 69 | [--download-range a-b,c-d,e] [-t N] [--timeout N] 70 | [--low-speed-threshold N] [-f] [--auto-update {check,download,off}] 71 | [--update-beta-channel BOOL] [-l /path/to/eh.log] [-v] [-h] 72 | [--version] 73 | [url [url ...]] 74 | 75 | xeHentai Downloader NG 76 | 77 | positional arguments: 78 | url gallery url(s) to download 79 | 80 | optional arguments: 81 | -u USERNAME, --username USERNAME 82 | username 83 | -k KEY, --key KEY password 84 | -c COOKIE, --cookie COOKIE 85 | cookie string, will be overriden if given -u and -k 86 | -i, --interactive interactive mode, will be ignored in daemon mode 87 | (default: False) 88 | --daemon daemon mode, can't use with -i (default: False) 89 | -d DIR, --dir DIR set download directory (current: 90 | /Users/fffonion/Dev/Python/xeHentai) 91 | -o, --download-ori download original images, needs to login (current: 92 | True) 93 | -j BOOL, --jpn-title BOOL 94 | use Japanese title, use English/Romaji title if turned 95 | off (default: True) 96 | -r BOOL, --rename-ori BOOL 97 | rename gallery image to original name, use sequence 98 | name if turned off (default: False) 99 | -p PROXY, --proxy PROXY 100 | set download proxies, can be used multiple times, 101 | currenlty supported: socks5/4a, http(s), glype. 102 | Proxies are only used on webpages by default (current: 103 | ['socks5h://127.0.0.1:16963']) 104 | --proxy-image use proxies on images and webpages (default: True) 105 | --proxy-image-only only use proxies on images, not webpages (current: 106 | False) 107 | --rpc-interface ADDR bind jsonrpc server to this address (current: 108 | localhost) 109 | --rpc-port PORT bind jsonrpc server to this port (default: 8010) 110 | --rpc-secret ... jsonrpc secret string (default: None) 111 | --rpc-open-browser BOOL 112 | automatically open browser after RPC server starts 113 | (default: True) 114 | --delete-task-files BOOL 115 | delete downloaded files when deleting a task (default: 116 | True) 117 | -a BOOL, --archive BOOL 118 | make an archive (.zip) after download and delete 119 | directory (default: False) 120 | --download-range a-b,c-d,e 121 | specify ranges of images to be downloaded, in format 122 | start-end, or single index, use comma to concat 123 | multiple ranges, e.g.: 5-10,15,20-25, default to 124 | download all images 125 | -t N, --thread N download threads count (default: 5) 126 | --timeout N set image download timeout (default: 10s) 127 | --low-speed-threshold N 128 | retry download if speed is lower than specified value 129 | (default: 10 KB/s) 130 | -f, --force download regardless of quota exceeded warning 131 | (default: False) 132 | --auto-update {check,download,off} 133 | check or download update automatically 134 | (default: download) 135 | --update-beta-channel BOOL 136 | check update upon beta channel 137 | (default: True) 138 | -l /path/to/eh.log, --logpath /path/to/eh.log 139 | define log path (default: eh.log) 140 | -v, --verbose show more detailed log (default: 3) 141 | -h, --help show this help message and exit 142 | --version show program's version number and exit 143 | 144 | ``` 145 | 146 | If options are not defined, values from `config.py` will be used. 147 | 148 | ### JSON-RPC 149 | 150 | If `rpc_interface` and `rpc_port` are set, xeHentai will start a RPC server. The request and response follows the [JSON-RPC 2.0](http://www.jsonrpc.org/specification) standard. 151 | 152 | ``` 153 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":[[args],{kwargs}]}' 154 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"} 155 | ``` 156 | 157 | `rpc_secret` is a secret key to your RPC server. If it's set, client should include this value in the request. For example when `rpc_secret` is set to **hentai**: 158 | ``` 159 | $ curl localhost:8010/jsonrpc -d '{"jsonrpc": "2.0", "id": 1, "method":"xeH.addTask", "params":["token:hentai",[args],{kwargs}]}' 160 | {"jsonrpc": "2.0", "id": 1, "result": "36df423e"} 161 | ``` 162 | 163 | The method filed should start with **xeH.** and should be a public class method of **xeHentai** from [core.py](xeHentai/core.py). And change the name from *lower_case_with_underscores* notation to *lowerCamelCase* notation. For example, `add_task` becomes `addTask`. 164 | 165 | Refer to **xeHentai** class from [core.py](xeHentai/core.py) for parameters list. 166 | 167 | If your browser has a Userscript plugin, you can use [xeHentaiHelper.user.js](http://dl.yooooo.us/userscripts/xeHentaiHelper.user.js) to create tasks directly on e-hentai website. Chrome user will need to install [Tampermonkey](https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo), for firefox [Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/), and ViolentMonkey for Opera and Maxthon users. 168 | 169 | **Because e-hentai has enabled https, Chrome user will needs to click on the shield icon in the far right of the address bar and click "Load anyway" or "Load unsafe scripts"** 170 | 171 | ### Running modes 172 | 173 | If xeHentai is ran from command line interface or interative mode, the program will exit after it finishes the tasks in `h.json` (if exists) and given URL. 174 | 175 | If there's no URL given from command line, the program will exit after it finishes the tasks in `h.json`(if exists). 176 | 177 | If program is running on daemon mode (`-d` is set or `daemon` is set to `True`), the program will keep running in background. 178 | 179 | ### Proxies 180 | 181 | xeHentai supports three types of proxies: 182 | 183 | - socks proxy: `socks5h://127.0.0.1:1080`. If you want to resolve DNS on client side, use `socks5://127.0.0.1:1080`. 184 | - http(s) proxy: `http://127.0.0.1:8080`. 185 | - glype proxy: `http://example.com/browse.php?u=a&b=4`. Please set value of `b` accordingly. glype is a widely used PHP proxy script. When using, uncheck **Encrypt URL**, **Remove Scripts** and check **Allow Cookies** and open a random URL. The paste the address into configuration. 186 | 187 | Multiple proxies can be specified at the same time. The format can be like : `['socks5h://127.0.0.1:1080', 'http://127.0.0.1:8080']`. 188 | 189 | By default proxies are used to download images and scan webpages. If you don't want to use proxy on downloading images, set `proxy_image` to `False`. 190 | 191 | glype users are encouraged to set `proxy_image` to `False`。 192 | 193 | If you only want to use proxy to download image, set `proxy_image_only` to **True** in `config.py` or use the `--proxy-image-only` CLI option. If both `proxy_image` and `proxy_image_only` are set to **True**, `proxy_image` will be ignored. 194 | 195 | ### Download range 196 | 197 | Download ranges are set in format `start_positoin-end_positoin`. For example, `5-10` means number download first 5 to 10 images, including 5 and 10. Or use `15` to download number 15 only. 198 | 199 | Multiple ranges can be seperated with comma. For example,`5-10,15`. 200 | 201 | If no range is given, xeHentai will download all images. 202 | 203 | 204 | ## Misc 205 | 206 | ### Image limit 207 | 208 | Downloading images will be count towards image limit. This is calculated regarding the popularity of gallery, the server load and/or Hentai@Home bandwidth by e-hentai server. 209 | 210 | ## License 211 | 212 | GPLv3 213 | *** 214 | ![Blog](https://s.w.org/about/images/logos/wordpress-logo-32-blue.png)  [Blog](https://yooooo.us) 215 | -------------------------------------------------------------------------------- /icon3.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fffonion/xeHentai/43fc55c0c662f195d048becf6276a26ce06b09d9/icon3.ico -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests[socks] 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | PROJ_NAME = 'xeHentai' 4 | PACKAGE_NAME = 'xeHentai' 5 | 6 | PROJ_METADATA = '%s.json' % PROJ_NAME 7 | 8 | import os, json, imp 9 | here = os.path.abspath(os.path.dirname(__file__)) 10 | 11 | try: 12 | README = open(os.path.join(here, 'README.md')).read() 13 | except: 14 | README = "" 15 | try: 16 | CHANGELOG = open(os.path.join(here, 'CHANGELOG.md')).read() 17 | except: 18 | CHANGELOG = "" 19 | VERSION = imp.load_source('version', os.path.join(here, '%s/const.py' % PACKAGE_NAME)).__version__ 20 | 21 | packages = [ 22 | 'xeHentai', 23 | 'xeHentai.util', 24 | 'xeHentai.i18n', 25 | 'xeHentai.updater', 26 | ] 27 | requires = ['requests'] 28 | 29 | from setuptools import setup 30 | 31 | setup( 32 | name=PACKAGE_NAME, 33 | version=VERSION, 34 | description='xeHentai Downloader', 35 | long_description=README + '\n\n' + CHANGELOG, 36 | author='fffonion', 37 | author_email='fffonion@gmail.com', 38 | url='https://yooooo.us/2013/xehentai', 39 | packages=packages, 40 | package_dir={'requests': 'requests'}, 41 | include_package_data=True, 42 | install_requires=requires, 43 | license='GPLv3', 44 | zip_safe=False, 45 | classifiers=( 46 | 'Development Status :: 4 - Beta', 47 | 'Intended Audience :: End Users/Desktop', 48 | 'Natural Language :: English', 49 | 'OSI Approved :: GNU General Public License v3 (GPLv3)', 50 | 'Programming Language :: Python', 51 | 'Programming Language :: Python :: 2.6', 52 | 'Programming Language :: Python :: 2.7', 53 | 'Programming Language :: Python :: 3', 54 | 'Programming Language :: Python :: 3.3', 55 | 'Programming Language :: Python :: 3.4', 56 | 'Programming Language :: Python :: 3.5', 57 | 'Programming Language :: Python :: Implementation :: CPython', 58 | 'Programming Language :: Python :: Implementation :: PyPy' 59 | ), 60 | requires=requires, 61 | entry_points = {'console_scripts': ["xeH = xeHentai.cli:start"]}, 62 | ) 63 | -------------------------------------------------------------------------------- /util/make_release_config.py: -------------------------------------------------------------------------------- 1 | #coding: utf-8 2 | 3 | import os 4 | import sys 5 | import re 6 | FILEPATH = os.path.join(sys.path[0], "..") 7 | sys.path.insert(0, os.path.join(FILEPATH, "xeHentai")) 8 | import config 9 | 10 | target = os.path.join(FILEPATH, "release") 11 | if not os.path.exists(target): 12 | os.mkdir(target) 13 | 14 | target = os.path.join(target, "config.py") 15 | cli = open(os.path.join(FILEPATH, "xeHentai", "cli.py"), "r", encoding="utf-8").read() 16 | zh_hans = open(os.path.join(FILEPATH, "xeHentai", "i18n", "zh_hans.py"), "r", encoding="utf-8").read() 17 | 18 | f = open(target, "w", encoding="utf-8") 19 | f.write('''# coding:utf-8 20 | # --UTF8补丁-- # 21 | 22 | ''') 23 | 24 | other_mappings = { 25 | "save_tasks": "是否保存任务到h.json,可用于断点续传", 26 | "scan_thread_cnt": "扫描线程数", 27 | # "download_range": "设置下载的图片范围, 格式为 开始位置-结束位置, 或者单张图片的位置\n" + \ 28 | # "# 使用逗号来分隔多个范围, 例如 5-10,15,20-25, 默认为下载所有" 29 | } 30 | 31 | for k in sorted(config.__dict__): 32 | if k.startswith("__"): 33 | continue 34 | if k not in other_mappings: 35 | if k == "download_range": 36 | i18n = "XEH_OPT_download_range" 37 | else: 38 | i18n = re.findall(r"_def\[['\"]%s['\"]\].*?help\s*=\s*i18n.([^\)]+)\)" % k, cli, re.DOTALL)[0] 39 | txt = re.findall(r"%s\s*=\s*['\"](.*?)\s*\(当前.+['\"]" % i18n, zh_hans, re.DOTALL)[0] 40 | # multiline fix 41 | txt = txt.replace('"', '').replace('\\\n', '\n# ') 42 | txt = re.sub(r"\nXEH_.+", "", txt, re.DOTALL) 43 | else: 44 | txt = other_mappings[k] 45 | f.write("# %s\n" % txt) 46 | v = getattr(config, k) 47 | if isinstance(v, str): 48 | v = '"%s"' % v 49 | f.write("%s = %s\n\n" % (k, v)) 50 | 51 | f.close() 52 | -------------------------------------------------------------------------------- /util/make_verinfo.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import os 4 | import sys 5 | sys.path.insert(0, os.path.join(sys.path[0], "..")) 6 | from xeHentai import const 7 | 8 | version = const.__version__ 9 | v = list(str(int(round(version * 1000)))) 10 | print(version, v) 11 | tmpl='''# UTF-8 12 | # 13 | # For more details about fixed file info 'ffi' see: 14 | # http://msdn.microsoft.com/en-us/library/ms646997.aspx 15 | VSVersionInfo( 16 | ffi=FixedFileInfo( 17 | # filevers and prodvers should be always a tuple with four items: (1, 2, 3, 4) 18 | # Set not needed items to zero 0. 19 | filevers=(%s), 20 | prodvers=(%s), 21 | # Contains a bitmask that specifies the valid bits 'flags'r 22 | mask=0x3f, 23 | # Contains a bitmask that specifies the Boolean attributes of the file. 24 | flags=0x0, 25 | # The operating system for which this file was designed. 26 | # 0x4 - NT and there is no need to change it. 27 | OS=0x40004, 28 | # The general type of file. 29 | # 0x1 - the file is an application. 30 | fileType=0x1, 31 | # The function of the file. 32 | # 0x0 - the function is not defined for this fileType 33 | subtype=0x0, 34 | # Creation date and time stamp. 35 | date=(0, 0) 36 | ), 37 | kids=[ 38 | StringFileInfo( 39 | [ 40 | StringTable( 41 | u'080404B0', 42 | [StringStruct(u'FileVersion', u'%s'), 43 | StringStruct(u'ProductVersion', u'%s'), 44 | StringStruct(u'OriginalFilename', u'xeHentai-%s.exe'), 45 | StringStruct(u'InternalName', u'xeHentai'), 46 | StringStruct(u'FileDescription', u'绅♂士漫画下载器'), 47 | StringStruct(u'CompanyName', u'fffonion@gmail.com'), 48 | StringStruct(u'LegalCopyright', u'GPLv3'), 49 | StringStruct(u'ProductName', u'xeHentai')]) 50 | ]), 51 | VarFileInfo([VarStruct(u'Translation', [2052, 1200])]) 52 | ] 53 | )''' % ( 54 | ", ".join(v), ", ".join(v), 55 | ".".join(v), ".".join(v), version 56 | ) 57 | 58 | open("verinfo.txt", "w", encoding="utf-8").write(tmpl) 59 | -------------------------------------------------------------------------------- /xeH: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import json 6 | import zipfile 7 | from threading import Thread 8 | import xeHentai.const as const 9 | 10 | SRC_UPDATE_FILE = const.SRC_UPDATE_FILE 11 | if const.PY3K: 12 | from importlib import reload 13 | 14 | def load_update(): 15 | if os.path.exists(SRC_UPDATE_FILE): 16 | try: 17 | need_remove = False 18 | update_id = "" 19 | with zipfile.ZipFile(SRC_UPDATE_FILE, 'r') as z: 20 | try: 21 | r = json.loads(z.read("info.json")) 22 | except: 23 | need_remove = True 24 | else: 25 | if 'v' not in r and r['v'] != SRC_UPDATE_VERSION: 26 | # ignoring legacy file 27 | need_remove = True 28 | else: 29 | update_id = r["update_id"] 30 | if need_remove: 31 | os.remove(SRC_UPDATE_FILE) 32 | return 33 | v = const.__version__ 34 | sys.path.insert(0, SRC_UPDATE_FILE) 35 | import xeHentai 36 | reload(xeHentai) 37 | xeHentai.const.VERSION_UPDATE = update_id 38 | xeHentai.const.VERSION_UPDATE_LOADER = v 39 | except: 40 | if sys.path[0] == SRC_UPDATE_FILE: 41 | sys.path.pop(0) 42 | os.remove(SRC_UPDATE_FILE) 43 | 44 | 45 | if __name__ == "__main__": 46 | load_update() 47 | 48 | from xeHentai import cli, i18n 49 | cli.start() 50 | -------------------------------------------------------------------------------- /xeH.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import json 6 | import zipfile 7 | from threading import Thread 8 | import xeHentai.const as const 9 | 10 | SRC_UPDATE_FILE = const.SRC_UPDATE_FILE 11 | if const.PY3K: 12 | from importlib import reload 13 | 14 | def load_update(): 15 | if os.path.exists(SRC_UPDATE_FILE): 16 | try: 17 | need_remove = False 18 | update_id = "" 19 | with zipfile.ZipFile(SRC_UPDATE_FILE, 'r') as z: 20 | try: 21 | r = json.loads(z.read("info.json")) 22 | except: 23 | need_remove = True 24 | else: 25 | if 'v' not in r and r['v'] != SRC_UPDATE_VERSION: 26 | # ignoring legacy file 27 | need_remove = True 28 | else: 29 | update_id = r["update_id"] 30 | if need_remove: 31 | os.remove(SRC_UPDATE_FILE) 32 | return 33 | v = const.__version__ 34 | sys.path.insert(0, SRC_UPDATE_FILE) 35 | import xeHentai 36 | reload(xeHentai) 37 | xeHentai.const.VERSION_UPDATE = update_id 38 | xeHentai.const.VERSION_UPDATE_LOADER = v 39 | except: 40 | if sys.path[0] == SRC_UPDATE_FILE: 41 | sys.path.pop(0) 42 | os.remove(SRC_UPDATE_FILE) 43 | 44 | 45 | if __name__ == "__main__": 46 | load_update() 47 | 48 | from xeHentai import cli, i18n 49 | cli.start() 50 | -------------------------------------------------------------------------------- /xeHentai/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | -------------------------------------------------------------------------------- /xeHentai/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | from __future__ import absolute_import 7 | import os 8 | import time 9 | import argparse 10 | import traceback 11 | from threading import Thread 12 | from .i18n import i18n 13 | from .core import xeHentai 14 | from .const import * 15 | from .const import __version__ 16 | from .util import logger 17 | 18 | from . import config as default_config 19 | sys.path.insert(1, FILEPATH) 20 | try: 21 | import config 22 | except ImportError: 23 | config = default_config 24 | sys.path.pop(1) 25 | 26 | def start(): 27 | opt = parse_opt() 28 | xeH = xeHentai() 29 | if opt.auto_update != "off": 30 | check_update(xeH.logger, { 31 | "auto_update": opt.auto_update, 32 | "update_beta_channel": opt.update_beta_channel, 33 | }) 34 | if opt.daemon: 35 | if opt.interactive: 36 | xeH.logger.warning(i18n.XEH_OPT_IGNORING_I) 37 | if os.name == "posix": 38 | pid = os.fork() 39 | if pid == 0: 40 | sys.stdin.close() 41 | sys.stdout = open("/dev/null", "w") 42 | sys.stderr = open("/dev/null", "w") 43 | return main(xeH, opt) 44 | elif os.name == "nt": 45 | return xeH.logger.error(i18n.XEH_PLATFORM_NO_DAEMON % os.name) 46 | else: 47 | return xeH.logger.error(i18n.XEH_PLATFORM_NO_DAEMON % os.name) 48 | xeH.logger.info(i18n.XEH_DAEMON_START % pid) 49 | else: 50 | main(xeH, opt) 51 | 52 | def check_update(l, cfg): 53 | from .updater.updater import check_update 54 | t = Thread(name="updater", target=check_update, args=(l, cfg)) 55 | t.setDaemon(True) 56 | t.start() 57 | return t 58 | 59 | def main(xeH, opt): 60 | xeH.update_config(**vars(opt)) 61 | log = xeH.logger 62 | log.info(i18n.XEH_STARTED % xeH.verstr) 63 | if opt.cookie: 64 | xeH.set_cookie(opt.cookie) 65 | if opt.username and opt.key and not xeH.has_login: 66 | xeH.login_exhentai(opt.username, opt.key) 67 | if opt.interactive and not opt.daemon: 68 | try: 69 | r = interactive(xeH) 70 | opt.__dict__.update(r) 71 | xeH.update_config(**r) 72 | except (KeyboardInterrupt, SystemExit): 73 | log.info(i18n.XEH_CLEANUP) 74 | xeH._cleanup() 75 | return 76 | 77 | try: 78 | if opt.urls: 79 | for u in opt.urls: 80 | xeH.add_task(u.strip()) 81 | # Thread(target = lambda:(time.sleep(0.618), setattr(xeH, "_exit", XEH_STATE_SOFT_EXIT))).start() 82 | Thread(target = xeH._task_loop, name = "main" ).start() 83 | while xeH._exit < XEH_STATE_CLEAN: 84 | # if specify urls, finished this task and exit xeHentai 85 | if opt.urls and not [k for k, v in xeH._all_tasks.items() if TASK_STATE_WAITING <= v.state < TASK_STATE_FINISHED]: 86 | xeH._exit = XEH_STATE_SOFT_EXIT 87 | time.sleep(1) 88 | except KeyboardInterrupt: 89 | log.info(i18n.XEH_CLEANUP) 90 | xeH._term_threads() 91 | except Exception as ex: 92 | log.error(i18n.XEH_CRITICAL_ERROR % traceback.format_exc()) 93 | xeH._term_threads() 94 | else: 95 | sys.exit(0) # this is mandatory for single task auto exit 96 | try: 97 | # we should call cleanup ourself because we break out of task_loop 98 | xeH._cleanup() 99 | except KeyboardInterrupt: 100 | pass 101 | # this is mandatory for ctrl+c kill 102 | os._exit(0) 103 | 104 | ''' -ro --redirect-norm 是否应用在线代理到已解析到的非原图,默认不启用 105 | -f --force 即使超出配额也下载,默认为否 106 | -j --no-jp-name 是否不使用日语命名,默认为否''' 107 | 108 | def _parse_range(s): 109 | rg = [] 110 | s = s.replace(",", ",") 111 | for r in s.split(','): 112 | r = r.strip() 113 | m = re.match(r'(\d+)(?:-(\d+))?$', r) 114 | if not m: 115 | raise argparse.ArgumentTypeError(logger.safestr(i18n.c(ERR_NOT_RANGE_FORMAT) % r)) 116 | start = int(m.group(1)) 117 | end = int(m.group(2) or start) 118 | rg.append((start, end)) 119 | return sorted(rg) 120 | 121 | class _AddToListAction(argparse.Action): 122 | ''' This action add a value 'add_value' to the list 'dest' ''' 123 | def __init__(self, option_strings, dest, add_value=None, current=None, nargs=None, **kwargs): 124 | super(_AddToListAction, self).__init__(option_strings, dest, default=None, nargs=0, **kwargs) 125 | self.add_value = add_value 126 | # to use in formatting output 127 | self.current = current 128 | 129 | def __call__(self, parser, namespace, values, option_string=None): 130 | if getattr(namespace, self.dest, None) is None: 131 | setattr(namespace, self.dest, []) 132 | items = list(getattr(namespace, self.dest)) 133 | items.append(self.add_value) 134 | setattr(namespace, self.dest, items) 135 | 136 | def parse_opt(): 137 | _def = {k:v for k,v in default_config.__dict__.items() if not k.startswith("_")} 138 | _def.update({k:v for k,v in config.__dict__.items() if not k.startswith("_")}) 139 | if not PY3K: 140 | for k in ('dir', 'log_path'): 141 | _def[k] = _def[k].decode('utf-8') 142 | parser = argparse.ArgumentParser(description = i18n.XEH_OPT_DESC, epilog = i18n.XEH_OPT_EPILOG, add_help = False) 143 | # the followings are handled in cli 144 | parser.add_argument('-u', '--username', help = i18n.XEH_OPT_u) 145 | parser.add_argument('-k', '--key', help = i18n.XEH_OPT_k) 146 | parser.add_argument('-c', '--cookie', help = i18n.XEH_OPT_c) 147 | parser.add_argument('-i', '--interactive', action = 'store_true', default = False, 148 | help = i18n.XEH_OPT_i) 149 | parser.add_argument('--daemon', action = 'store_true', default = _def['daemon'], 150 | help = i18n.XEH_OPT_daemon) 151 | # the followings are passed to xeHentai 152 | parser.add_argument('urls', metavar = 'url', type = str, nargs = '*', 153 | help = i18n.XEH_OPT_URLS) 154 | parser.add_argument('-d', '--dir', default = os.path.abspath(_def['dir']), 155 | help = i18n.XEH_OPT_d) 156 | parser.add_argument('-o', '--download-ori', 157 | action = 'store_true', default = _def['download_ori'], 158 | help = i18n.XEH_OPT_o) 159 | parser.add_argument('-j', '--jpn-title', type = bool, metavar = "BOOL", default = _def['jpn_title'], 160 | dest = 'jpn_title', help = i18n.XEH_OPT_j) 161 | parser.add_argument('-r', '--rename-ori', type = bool, metavar = "BOOL", default = _def['rename_ori'], 162 | help = i18n.XEH_OPT_r) 163 | 164 | parser.add_argument('-p', '--proxy', action = 'append', default = _def['proxy'], 165 | help = i18n.XEH_OPT_p) 166 | group = parser.add_mutually_exclusive_group() 167 | group.add_argument('--proxy-image', action = 'store_true', default = _def['proxy_image'], 168 | help = i18n.XEH_OPT_proxy_image) 169 | group.add_argument('--proxy-image-only', action = 'store_true', default = _def['proxy_image_only'], 170 | help = i18n.XEH_OPT_proxy_image_only) 171 | parser.add_argument('--rpc-interface', metavar = "ADDR", default = _def['rpc_interface'], 172 | help = i18n.XEH_OPT_rpc_interface) 173 | parser.add_argument('--rpc-port', type = int, metavar = "PORT", default = _def['rpc_port'], 174 | help = i18n.XEH_OPT_rpc_port) 175 | parser.add_argument('--rpc-secret', metavar = "...", default = _def['rpc_secret'], 176 | help = i18n.XEH_OPT_rpc_secret) 177 | parser.add_argument('--rpc-open-browser', type = bool, metavar = "BOOL", default = _def['rpc_open_browser'], 178 | help = i18n.XEH_OPT_rpc_open_browser) 179 | parser.add_argument('--delete-task-files', type = bool, metavar = "BOOL", default = _def['delete_task_files'], 180 | dest = 'delete_task_files', help = i18n.XEH_OPT_delete_task_files) 181 | parser.add_argument('-a', '--archive', type = bool, metavar = "BOOL", default = _def['make_archive'], 182 | dest = 'make_archive', help = i18n.XEH_OPT_a) 183 | parser.add_argument('--download-range', type = _parse_range, metavar = "a-b,c-d,e", default = None, 184 | dest = 'download_range', help = i18n.XEH_OPT_download_range) 185 | parser.add_argument('-t', '--thread', type = int, metavar = 'N', 186 | default = _def['download_thread_cnt'], dest = 'download_thread_cnt', 187 | help = i18n.XEH_OPT_t) 188 | parser.add_argument('--timeout', type = int, metavar = "N", default = _def['download_timeout'], 189 | dest = 'download_timeout', help = i18n.XEH_OPT_timeout) 190 | parser.add_argument('--low-speed-threshold', type = int, metavar = "N", default = _def['low_speed_threshold'], 191 | dest = 'low_speed_threshold', help = i18n.XEH_OPT_low_speed) 192 | parser.add_argument('-f', '--force', action = _AddToListAction, 193 | current = ERR_QUOTA_EXCEEDED in _def['ignored_errors'], 194 | add_value = ERR_QUOTA_EXCEEDED, dest='ignored_errors', 195 | help = i18n.XEH_OPT_f) 196 | parser.add_argument('--auto-update', default = _def['auto_update'], choices = ("check", "download", "off"), 197 | dest = 'auto_update', help = i18n.XEH_OPT_auto_update) 198 | parser.add_argument('--update-beta-channel', type = bool, metavar = "BOOL", default = _def['update_beta_channel'], 199 | dest = 'update_beta_channel', help = i18n.XEH_OPT_update_beta_channel) 200 | 201 | parser.add_argument('-l', '--logpath', metavar = '/path/to/eh.log', 202 | dest = 'log_path', default = os.path.abspath(_def['log_path']), help = i18n.XEH_OPT_l) 203 | 204 | parser.add_argument('-v', '--verbose', action = 'count', default = _def['log_verbose'], 205 | help = i18n.XEH_OPT_v) 206 | parser.add_argument('-h','--help', action = 'help', help = i18n.XEH_OPT_h) 207 | parser.add_argument('--version', action = 'version', 208 | version = '%s v%.3f%s' % (SCRIPT_NAME, __version__, '-dev' if DEVELOPMENT else ""), 209 | help = i18n.XEH_OPT_version) 210 | args = parser.parse_args() 211 | 212 | return args 213 | 214 | def interactive(xeH): 215 | def _readline(x, default = ""): 216 | if default: 217 | x = x % default 218 | _ = input(logger.safestr(x)) if PY3K else raw_input(logger.safestr(x)) 219 | _ = _ or default 220 | return _ if PY3K else _.decode(locale.getdefaultlocale()[1] or 'utf-8') 221 | 222 | if not xeH.has_login and _readline(i18n.PS_LOGIN) == 'y': 223 | uname = pwd = "" 224 | while not uname: 225 | uname = _readline(i18n.PS_USERNAME) 226 | while not pwd: 227 | pwd = _readline(i18n.PS_PASSWD) 228 | xeH.login_exhentai(uname, pwd) 229 | url = proxy = download_range = "" 230 | while not url: 231 | url = _readline(i18n.PS_URL) 232 | url = url.split(",") 233 | download_ori = _readline(i18n.PS_DOWNLOAD_ORI, 'y' if xeH.cfg['download_ori'] else 'n') == 'y' 234 | proxy = _readline(i18n.PS_PROXY).strip() 235 | proxy = [proxy] if proxy else xeH.cfg['proxy'] 236 | __def_dir = os.path.abspath(xeH.cfg['dir']) 237 | # if not PY3K: 238 | # __def_dir = __def_dir.decode(sys.getfilesystemencoding()) 239 | _dir = _readline(i18n.PS_DOWNLOAD_DIR % __def_dir) or xeH.cfg['dir'] 240 | rename_ori = _readline(i18n.PS_RENAME_ORI, 'y' if xeH.cfg['rename_ori'] else 'n') == 'y' 241 | make_archive = _readline(i18n.PS_MAKE_ARCHIVE, 'y' if xeH.cfg['make_archive'] else 'n') == 'y' 242 | jpn_title = _readline(i18n.PS_JPN_TITLE, 'y' if xeH.cfg['jpn_title'] else 'n') == 'y' 243 | while not download_range: 244 | _ = _readline(i18n.PS_DOWNLOAD_RANGE) 245 | if not _: 246 | download_range = [] 247 | break 248 | try: 249 | download_range = _parse_range(logger.safestr(_)) 250 | except argparse.ArgumentTypeError as ex: 251 | print(ex) 252 | else: 253 | break 254 | return {'urls': url, 'proxy': proxy, 'download_ori': download_ori, 'dir': _dir, 'rename_ori':rename_ori, 255 | 'make_archive': make_archive, 'jpn_title': jpn_title, 'save_tasks': False, 256 | 'download_range': download_range} 257 | -------------------------------------------------------------------------------- /xeHentai/config.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # DO NOT EDIT THIS FILE 3 | # make a copy to your working directory 4 | # and edit that file 5 | 6 | # Daemon mode 7 | daemon = False 8 | 9 | # set download directory 10 | dir = "." 11 | # download original images, needs to login 12 | download_ori = False 13 | # Set if use Japanese title if available 14 | jpn_title = True 15 | # rename gallery image to original name, use sequence name if turned off 16 | rename_ori = False 17 | 18 | # set download proxies 19 | # currenlty supported: socks5/4a, http(s), glype 20 | # by default, proxy is only used on webpages 21 | proxy = [] 22 | # also use proxy to download images 23 | proxy_image = True 24 | # only use proxy on images, not webpages 25 | # if set to True, the value of proxy_image will be ignored 26 | proxy_image_only = False 27 | 28 | # bind jsonrpc server to this address 29 | rpc_interface = 'localhost' 30 | # bind jsonrpc server to this port 31 | rpc_port = None 32 | # jsonrpc secret string 33 | rpc_secret = None 34 | # auto open browser on rpc start 35 | rpc_open_browser = True 36 | 37 | # make an archive (.zip) after download and delete directory 38 | make_archive = False 39 | # specify ranges of images to be downloaded, in format 40 | # start-end, or single index, use comma to concat 41 | # multiple ranges, e.g.: 5-10,15,20-25, default to 42 | # download all images 43 | download_range = None 44 | # scan threads count 45 | scan_thread_cnt = 1 46 | # download threads count 47 | download_thread_cnt = 5 48 | # set image download timeout 49 | download_timeout = 10 50 | 51 | # ignore these error codes, continue download 52 | # to use predefined error codes, use: 53 | # import const as __c 54 | # ignored_errors = [__c.ERR_QUOTA_EXCEEDED] 55 | ignored_errors = [] 56 | 57 | # define log path 58 | log_path = "eh.log" 59 | # set log level 60 | log_verbose = 2 61 | 62 | # save tasks to h.json 63 | save_tasks = False 64 | 65 | # delete files when deleting a task 66 | delete_task_files = False 67 | 68 | # retry a connection if per thread speed is lower than this value, unit is KB per second 69 | low_speed_threshold = 10 70 | 71 | # turn on auto update of program "check" for check only and "download" for download 72 | auto_update = "download" 73 | # set to true to update to dev branch 74 | update_beta_channel = False -------------------------------------------------------------------------------- /xeHentai/const.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # constants module 4 | # Contributor: 5 | # fffonion 6 | 7 | import os 8 | import re 9 | import sys 10 | import locale 11 | 12 | PY3K = sys.version_info.major == 3 13 | IRONPYTHON = sys.platform == 'cli' 14 | EXEBUNDLE = getattr(sys, 'frozen', False) 15 | LOCALE = locale.getdefaultlocale()[0] 16 | CODEPAGE = locale.getdefaultlocale()[1] or 'ascii' 17 | ANDROID = 'ANDROID_ARGUMENT' in os.environ 18 | 19 | __version__ = 2.023 20 | VERSION_UPDATE = "" 21 | VERSION_UPDATE_LOADER = __version__ 22 | DEVELOPMENT = False 23 | 24 | SCRIPT_NAME = "xeHentai" 25 | 26 | # https://github.com/soimort/you-get/you-get 27 | if getattr(sys, 'frozen', False): 28 | # The application is frozen 29 | FILEPATH = os.path.dirname(os.path.realpath(sys.executable)) 30 | else: 31 | # The application is not frozen 32 | # Change this bit to match where you store your data files: 33 | FILEPATH = sys.path[0] 34 | # if update is being injected 35 | if FILEPATH.endswith(".zip"): 36 | FILEPATH = sys.path[1] 37 | 38 | DUMMY_FILENAME = "-dummy-" 39 | RENAME_TMPDIR = "-xeh-conflict-" 40 | STATIC_CACHE_FILE = os.path.join(FILEPATH, "webui.gz") 41 | # cache for 1 hour 42 | STATIC_CACHE_TTL = 3600 43 | STATIC_CACHE_VERSION = 1 44 | 45 | SRC_UPDATE_FILE = os.path.join(FILEPATH, "src.zip") 46 | SRC_UPDATE_VERSION = 1 47 | 48 | RE_INDEX = re.compile('.+/(\d+)/([^\/]+)/*') 49 | RE_GALLERY = re.compile('/([a-f0-9]{10})/[^\-]+\-(\d+)') 50 | RE_IMGHASH = re.compile('/([a-f0-9]{40})-(\d+)-(\d+)-(\d+)-([a-z]{,4})') 51 | RE_FULLIMG = re.compile('fullimg.php\?gid=([a-z0-9]+)&page=(\d+)&key=') 52 | 53 | __restr_webpage = '^https*://([^\.]+\.)*(?:[g\.]*e-|ex)hentai.org' 54 | RE_URL_WEBPAGE = re.compile(__restr_webpage) 55 | RE_URL_IMAGE = re.compile('(?!%s)' % __restr_webpage) 56 | # matches all 57 | RE_URL_ALL = re.compile('.') 58 | 59 | RE_LOCAL_ADDR = re.compile('(^localhost)|(^127\.)|(^192\.168\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^::1$)|(^[fF][cCdD])') 60 | 61 | RESTR_SITE = "https*://(?:[g\.]*e\-|ex)hentai\.org" 62 | 63 | FALLBACK_CF_IP = ("104.20.134.21", "104.20.135.21", "172.67.0.127") 64 | FALLBACK_IP_MAP = { 65 | 'e-hentai.org': FALLBACK_CF_IP, 66 | 'forums.e-hentai.org': ("94.100.18.243", ) + FALLBACK_CF_IP, 67 | 'exhentai.org': ("178.175.129.254", "178.175.128.252", "178.175.132.20", "178.175.129.252", "178.175.128.254", "178.175.132.22") 68 | } 69 | 70 | QUOTA_EXCEEDED_CONTENT_LENGTHS = (925, 28658, 144, 210, 1009) 71 | 72 | DEFAULT_MAX_REDIRECTS = 30 73 | 74 | XEH_STATE_RUNNING = 0 75 | XEH_STATE_SOFT_EXIT = 1 # wait until current task finish and exit 76 | XEH_STATE_FULL_EXIT = 2 # finish current task stage and exit 77 | XEH_STATE_CLEAN = 3 78 | 79 | TASK_STATE_PAUSED = 0 80 | TASK_STATE_WAITING = 1 81 | TASK_STATE_GET_META = 2 82 | # TASK_STATE_GET_HATHDL = 3 83 | TASK_STATE_SCAN_PAGE = 3 84 | TASK_STATE_SCAN_IMG = 4 85 | TASK_STATE_SCAN_ARCHIVE = 5 86 | TASK_STATE_DOWNLOAD = 10 87 | TASK_STATE_MAKE_ARCHIVE = 19 88 | TASK_STATE_FINISHED = 20 89 | TASK_STATE_FAILED = -1 90 | 91 | ERR_NO_ERROR = 0 92 | ERR_URL_NOT_RECOGNIZED = 1000 93 | ERR_CANT_DOWNLOAD_EXH = 1001 94 | ERR_ONLY_VISIBLE_EXH = 1002 95 | ERR_MALFORMED_HATHDL = 1003 96 | ERR_GALLERY_REMOVED = 1004 97 | ERR_IMAGE_RESAMPLED = 1005 98 | ERR_QUOTA_EXCEEDED = 1006 99 | ERR_KEY_EXPIRED = 1007 100 | ERR_NO_PAGEURL_FOUND = 1008 101 | ERR_CONNECTION_ERROR = 1009 102 | ERR_IP_BANNED = 1010 103 | ERR_HATH_NOT_FOUND = 1011 104 | ERR_IMAGE_BROKEN = 1012 105 | ERR_SCAN_REGEX_FAILED = 1013 106 | ERR_STREAM_NOT_IMPLEMENTED = 1014 107 | ERR_TASK_NOT_FOUND = 1101 108 | ERR_SAVE_SESSION_FAILED = 1103 109 | ERR_TASK_LEVEL_UNDEF = 1104 110 | ERR_DELETE_RUNNING_TASK = 1105 111 | ERR_TASK_CANNOT_PAUSE = 1106 112 | ERR_TASK_CANNOT_RESUME = 1107 113 | # ERR_HATHDL_NOTFOUND = 1108 114 | ERR_RPC_UNAUTHORIZED = 1200 115 | ERR_CANNOT_CREATE_DIR = 1300 116 | ERR_CANNOT_MAKE_ARCHIVE = 1301 117 | ERR_NOT_RANGE_FORMAT = 1302 118 | ERR_RPC_PARSE_ERROR = -32700 119 | ERR_RPC_INVALID_REQUEST = -32600 120 | ERR_RPC_METHOD_NOT_FOUND = -32601 121 | ERR_RPC_INVALID_PARAMS = -32602 122 | ERR_RPC_EXEC_ERROR = -32603 123 | 124 | 125 | class DownloadAbortedException(Exception): 126 | pass 127 | -------------------------------------------------------------------------------- /xeHentai/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | from __future__ import absolute_import 7 | import os 8 | import re 9 | import sys 10 | import math 11 | import json 12 | import time 13 | import traceback 14 | from .task import Task 15 | from . import util 16 | from . import proxy 17 | from . import filters 18 | from .rpc import RPCServer 19 | from .i18n import i18n 20 | from .util import logger 21 | from .const import * 22 | from .const import __version__ 23 | from .worker import * 24 | if PY3K: 25 | from queue import Queue, Empty 26 | else: 27 | from Queue import Queue, Empty 28 | 29 | from . import config as default_config 30 | sys.path.insert(1, FILEPATH) 31 | try: 32 | import config 33 | except ImportError: 34 | config = default_config 35 | sys.path.pop(1) 36 | 37 | class xeHentai(object): 38 | def __init__(self): 39 | self.verstr = "%.3f%s" % (__version__, '-dev' if DEVELOPMENT else "") 40 | if VERSION_UPDATE: 41 | self.verstr = "%s-%s(%s)" % (self.verstr, VERSION_UPDATE[:7], VERSION_UPDATE_LOADER) 42 | self.verstr = "%s-py%d%d" % (self.verstr, sys.version_info[0], sys.version_info[1]) 43 | self.logger = logger.Logger() 44 | self._exit = False 45 | self.tasks = Queue() # for queueing, stores gid only 46 | self.last_task_guid = None 47 | self._all_tasks = {} # for saving states 48 | self._all_threads = [[] for i in range(20)] 49 | self.cfg = {k:v for k,v in default_config.__dict__.items() if not k.startswith("_")} 50 | # note that ignored_errors are overwritten using val from custom config 51 | self.cfg.update({k:v for k,v in config.__dict__.items() if not k.startswith("_")}) 52 | self.proxy = None 53 | self.cookies = {"nw": "1"} 54 | self.headers = { 55 | 'User-Agent': util.make_ua(), 56 | 'Accept-Charset': 'utf-8;q=0.7,*;q=0.7', 57 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 58 | 'Connection': 'keep-alive' 59 | } 60 | self.has_login = False 61 | self.load_session() 62 | self.rpc = None 63 | 64 | def update_config(self, **cfg_dict): 65 | self.cfg.update({k:v for k, v in cfg_dict.items() if k in cfg_dict and k not in ('ignored_errors',)}) 66 | # merge ignored errors list 67 | if 'ignored_errors' in cfg_dict and cfg_dict['ignored_errors']: 68 | self.cfg['ignored_errors'] = list(set(self.cfg['ignored_errors'] + cfg_dict['ignored_errors'])) 69 | self.logger.set_level(logger.Logger.WARNING - self.cfg['log_verbose']) 70 | self.logger.verbose("cfg %s" % self.cfg) 71 | if cfg_dict['proxy']: 72 | if not self.proxy: # else we keep it None 73 | self.proxy = proxy.Pool() 74 | for p in self.cfg['proxy']: 75 | try: 76 | self.proxy.add_proxy(p) 77 | except Exception as ex: 78 | self.logger.warning(traceback.format_exc()) 79 | self.logger.debug(i18n.PROXY_CANDIDATE_CNT % len(self.proxy.proxies)) 80 | if cfg_dict['dir'] and not os.path.exists(cfg_dict['dir']): 81 | try: 82 | os.makedirs(cfg_dict['dir']) 83 | except OSError as ex: # Python >2.5 84 | self.logger.error(i18n.ERR_CANNOT_CREATE_DIR % cfg_dict['dir']) 85 | if not self.rpc and self.cfg['rpc_port'] and self.cfg['rpc_interface']: 86 | self.rpc = RPCServer(self, (self.cfg['rpc_interface'], int(self.cfg['rpc_port'])), 87 | secret = None if 'rpc_secret' not in self.cfg else self.cfg['rpc_secret'], 88 | open_browser = False if 'rpc_open_browser' not in self.cfg else self.cfg['rpc_open_browser'], 89 | logger = self.logger) 90 | if not RE_LOCAL_ADDR.match(self.cfg['rpc_interface']) and \ 91 | not self.cfg['rpc_secret']: 92 | self.logger.warning(i18n.RPC_TOO_OPEN % self.cfg['rpc_interface']) 93 | self.rpc.start() 94 | self.logger.set_logfile(self.cfg['log_path']) 95 | return ERR_NO_ERROR, "" 96 | 97 | def _get_httpreq(self, proxy_policy): 98 | return HttpReq(self.headers, logger = self.logger, proxy = self.proxy, proxy_policy = proxy_policy) 99 | 100 | def _get_httpworker(self, tid, task_q, flt, suc, fail, keep_alive, proxy_policy, timeout, stream_mode, lowspeed_threshold): 101 | return HttpWorker(tid, task_q, flt, suc, fail, 102 | headers = self.headers, proxy = self.proxy, logger = self.logger, 103 | keep_alive = keep_alive, proxy_policy = proxy_policy, timeout = timeout, stream_mode = stream_mode, 104 | lowspeed_threshold = lowspeed_threshold) 105 | 106 | def add_task(self, url, **cfg_dict): 107 | url = url.strip() 108 | cfg = {k:v for k, v in self.cfg.items() if k in ( 109 | "dir", "download_ori", "download_thread_cnt", "scan_thread_cnt", 110 | "proxy_image", "proxy_image_only", "ignored_errors", "low_speed_threshold", 111 | "rename_ori", "make_archive", "delete_task_files", "jpn_title", "download_range", "download_timeout")} 112 | cfg.update(cfg_dict) 113 | if cfg['download_ori'] and not self.has_login: 114 | self.logger.warning(i18n.XEH_DOWNLOAD_ORI_NEED_LOGIN) 115 | t = Task(url, cfg, self.logger) 116 | if t.guid in self._all_tasks: 117 | if self._all_tasks[t.guid].state in (TASK_STATE_FINISHED, TASK_STATE_FAILED): 118 | self.logger.debug(i18n.TASK_PUT_INTO_WAIT % t.guid) 119 | self._all_tasks[t.guid].state = TASK_STATE_WAITING 120 | self._all_tasks[t.guid].cleanup() 121 | return 0, t.guid 122 | self._all_tasks[t.guid] = t 123 | if not re.match("^%s/[^/]+/\d+/[^/]+/*#*$" % RESTR_SITE, url): 124 | t.set_fail(ERR_URL_NOT_RECOGNIZED) 125 | elif not self.has_login and re.match("^https*://exhentai\.org", url): 126 | t.set_fail(ERR_CANT_DOWNLOAD_EXH) 127 | else: 128 | self.tasks.put(t.guid) 129 | return 0, t.guid 130 | self.logger.error(i18n.TASK_ERROR % (t.guid, i18n.c(t.failcode))) 131 | return t.failcode, None 132 | 133 | def del_task(self, guid): 134 | if guid not in self._all_tasks: 135 | return ERR_TASK_NOT_FOUND, None 136 | if TASK_STATE_PAUSED< self._all_tasks[guid].state < TASK_STATE_FINISHED: 137 | return ERR_DELETE_RUNNING_TASK, None 138 | self._all_tasks[guid].cleanup(before_delete=True) 139 | del self._all_tasks[guid] 140 | return ERR_NO_ERROR, "" 141 | 142 | def pause_task(self, guid): 143 | if guid not in self._all_tasks: 144 | return ERR_TASK_NOT_FOUND, None 145 | t = self._all_tasks[guid] 146 | if t.state in (TASK_STATE_PAUSED, TASK_STATE_FINISHED, TASK_STATE_FAILED): 147 | return ERR_TASK_CANNOT_PAUSE, None 148 | if t._monitor: 149 | t._monitor._exit = lambda x: True 150 | t.state = TASK_STATE_PAUSED 151 | return ERR_NO_ERROR, "" 152 | 153 | def resume_task(self, guid): 154 | if guid not in self._all_tasks: 155 | return ERR_TASK_NOT_FOUND, None 156 | t = self._all_tasks[guid] 157 | if TASK_STATE_PAUSED< t.state < TASK_STATE_FINISHED: 158 | return ERR_TASK_CANNOT_RESUME, None 159 | t.state = max(t.state, TASK_STATE_WAITING) 160 | 161 | self.tasks.put(guid) 162 | return ERR_NO_ERROR, "" 163 | 164 | def _do_task(self, task_guid): 165 | task = self._all_tasks[task_guid] 166 | if task.state == TASK_STATE_WAITING: 167 | task.state = TASK_STATE_GET_META 168 | req = self._get_httpreq(util.get_proxy_policy(task.config)) 169 | if not task.page_q: 170 | task.page_q = Queue() # per image page queue 171 | if not task.img_q: 172 | task.img_q = Queue() # (image url, savepath) queue 173 | monitor_started = False 174 | while self._exit < XEH_STATE_FULL_EXIT: 175 | # wait for threads from former task to stop 176 | if self._all_threads[task.state]: 177 | self.logger.verbose("wait %d threads in state %s" % ( 178 | len(self._all_threads[task.state]), task.state)) 179 | for t in self._all_threads[task.state]: 180 | t.join() 181 | self._all_threads[task.state] = [] 182 | # check again before we bring up new threads 183 | continue 184 | if task.state >= TASK_STATE_SCAN_IMG and not monitor_started: 185 | self.logger.verbose("state %d >= %d, bring up montior" % (task.state, TASK_STATE_SCAN_IMG)) 186 | # bring up the monitor here, ahead of workers 187 | mon = Monitor(req, self.proxy, self.logger, task, ignored_errors=task.config['ignored_errors']) 188 | _ = ['down-%d' % (i + 1) for i in range(task.config['download_thread_cnt'])] 189 | # if we jumpstart from a saved session to DOQNLOAD 190 | # there will be no scan_thread 191 | # if task.state >= TASK_STATE_SCAN_PAGE: 192 | # _ += ['list-1'] 193 | if task.state >= TASK_STATE_SCAN_IMG: 194 | _ += ['scan-%d' % (i + 1) for i in range(task.config['scan_thread_cnt'])] 195 | mon.set_vote_ns(_) 196 | self._monitor = mon 197 | task._monitor = mon 198 | mon.start() 199 | # put in the lowest state 200 | self._all_threads[TASK_STATE_SCAN_IMG].append(mon) 201 | monitor_started = True 202 | 203 | if task.state == TASK_STATE_GET_META: # grab meta data 204 | try: 205 | r = req.request("GET", task.url, 206 | filters.flt_metadata, 207 | lambda x:task.update_meta(x), 208 | lambda x:task.set_fail(x)) 209 | except Exception as ex: 210 | self.logger.error(i18n.TASK_ERROR % (task.guid, traceback.format_exc())) 211 | task.state = TASK_STATE_FAILED 212 | break 213 | if task.failcode in (ERR_ONLY_VISIBLE_EXH, ERR_GALLERY_REMOVED) and self.has_login and \ 214 | task.migrate_exhentai(): 215 | self.logger.info(i18n.TASK_MIGRATE_EXH % task_guid) 216 | self.tasks.put(task_guid) 217 | break 218 | elif task.failcode == ERR_IP_BANNED: 219 | self.logger.error(i18n.c(ERR_IP_BANNED) % r) 220 | task.state = TASK_STATE_FAILED 221 | break 222 | 223 | # elif task.state == TASK_STATE_GET_HATHDL: # download hathdl 224 | # r = req.request("GET", 225 | # "%s/hathdler.php?gid=%s&t=%s" % (task.base_url(), task.gid, task.sethash), 226 | # filters.flt_hathdl, 227 | # lambda x:(task.meta.update(x), 228 | # task.guess_ori(), 229 | # task.scan_downloaded()), 230 | # #task.meta['has_ori'] and task.config['download_ori'])), 231 | # lambda x:task.set_fail(x),) 232 | # self.logger.info(i18n.TASK_WILL_DOWNLOAD_CNT % ( 233 | # task_guid, task.meta['total'] - len(task._flist_done), 234 | # task.meta['total'])) 235 | elif task.state == TASK_STATE_SCAN_PAGE: 236 | # if task.config['fast_scan'] and not task.has_ori: 237 | # self.logger.info(i18n.TASK_FAST_SCAN % task.guid) 238 | # for p in task.meta['filelist']: 239 | # task.queue_wrapper(task.page_q.put, pichash = p) 240 | # else: 241 | # scan by our own, should not be here currently 242 | # start backup thread 243 | task.scan_downloaded() 244 | if task.state == TASK_STATE_FINISHED: 245 | continue 246 | if not task.meta['use_multipage_viewer']: 247 | for x in range(0, 248 | int(math.ceil(1.0 * task.meta['total'] / int(task.meta['thumbnail_cnt'])))): 249 | r = req.request("GET", 250 | "%s/?p=%d" % (task.url, x), 251 | filters.flt_pageurl, 252 | lambda x: task.put_page_queue(x), 253 | lambda x: task.set_fail(x)) 254 | if task.failcode: 255 | break 256 | elif task.meta['finished'] < task.meta['total']: 257 | # use multipage viewer 258 | r = req.request("GET", 259 | task.mpv_url(), 260 | filters.flt_pageurl_mpv, 261 | lambda x: task.put_page_queue(x), 262 | lambda x: task.set_fail(x)) 263 | elif task.state == TASK_STATE_SCAN_IMG: 264 | # print here so that see it after we can join former threads 265 | self.logger.info(i18n.TASK_TITLE % ( 266 | task_guid, task.meta['title'])) 267 | self.logger.info(i18n.TASK_WILL_DOWNLOAD_CNT % ( 268 | task_guid, task.meta['total'] - task.meta['finished'], 269 | task.meta['total'])) 270 | # spawn thread to scan images 271 | for i in range(task.config['scan_thread_cnt']): 272 | tid = 'scan-%d' % (i + 1) 273 | _ = self._get_httpworker(tid, task.page_q, 274 | filters.flt_imgurl_wrapper(task.config['download_ori'] and self.has_login), 275 | lambda x, tid = tid: (task.put_img_queue(*x), 276 | mon.vote(tid, 0)), 277 | lambda x, tid = tid: ( 278 | mon.vote(tid, x[0]), 279 | self.logger.warn(i18n.XEH_SCAN_FAILED % (tid, x[1], i18n.c(x[0]))), 280 | ), 281 | mon.wrk_keepalive, 282 | util.get_proxy_policy(task.config), 283 | 10, 284 | False, 285 | None) 286 | # we don't need proxy_image in the scan thread 287 | # we use default timeout in the scan thread 288 | # _._exit = lambda t: t._finish_queue() 289 | self._all_threads[TASK_STATE_SCAN_IMG].append(_) 290 | _.start() 291 | task.state = TASK_STATE_DOWNLOAD - 1 292 | elif task.state == TASK_STATE_SCAN_ARCHIVE: 293 | task.state = TASK_STATE_DOWNLOAD - 1 294 | elif task.state == TASK_STATE_DOWNLOAD: 295 | # spawn thread to download all urls 296 | for i in range(task.config['download_thread_cnt']): 297 | tid = 'down-%d' % (i + 1) 298 | _ = self._get_httpworker(tid, task.img_q, 299 | filters.download_file_wrapper(task.config['dir']), 300 | lambda x, tid = tid: (task.save_file(x[1], x[2], x[0]) and \ 301 | (self.logger.debug(i18n.XEH_FILE_DOWNLOADED.format(tid, *task.get_fname(task.get_imghash(x[2])))), 302 | mon.vote(tid, 0))), 303 | lambda x, tid = tid: ( 304 | self.logger.debug(i18n.XEH_DOWNLOAD_HAS_ERROR % ( 305 | tid, task.get_imgfid(x[2]), i18n.c(x[0]), 306 | )), 307 | task.put_page_queue_retry(x[2]), 308 | mon.vote(tid, x[0])), 309 | mon.wrk_keepalive, 310 | util.get_proxy_policy(task.config), 311 | task.config['download_timeout'], 312 | True, 313 | task.config['low_speed_threshold'] * 1024 314 | ) 315 | self._all_threads[TASK_STATE_DOWNLOAD].append(_) 316 | _.start() 317 | # spawn archiver if we need 318 | if task.config['make_archive']: 319 | if self._all_threads[TASK_STATE_MAKE_ARCHIVE]: 320 | self._all_threads[TASK_STATE_MAKE_ARCHIVE][0].join() 321 | self._all_threads[TASK_STATE_MAKE_ARCHIVE] = [] 322 | _a = ArchiveWorker(self.logger, task) 323 | self._all_threads[TASK_STATE_MAKE_ARCHIVE].append(_a) 324 | _a.start() 325 | # break current task loop 326 | break 327 | 328 | if task.failcode: 329 | self.logger.error(i18n.TASK_ERROR % (task_guid, i18n.c(task.failcode))) 330 | # wait all threads to finish 331 | break 332 | else: 333 | task.state += 1 334 | 335 | def _task_loop(self): 336 | task_guid = None 337 | cnt = 0 338 | while not self._exit: 339 | # get a new task 340 | if cnt == 10: 341 | self.save_session() 342 | cnt = 0 343 | try: 344 | _ = self.tasks.get(False) 345 | self.last_task_guid = task_guid 346 | task_guid = _ 347 | except Empty: 348 | time.sleep(1) 349 | cnt += 1 350 | continue 351 | else: 352 | task = self._all_tasks[task_guid] 353 | if TASK_STATE_PAUSED < task.state < TASK_STATE_FINISHED: 354 | self.logger.info(i18n.TASK_START % task_guid) 355 | self.save_session() 356 | cnt = 0 357 | self._do_task(task_guid) 358 | self.logger.info(i18n.XEH_LOOP_FINISHED) 359 | self._cleanup() 360 | 361 | def _term_threads(self): 362 | self._exit = XEH_STATE_FULL_EXIT 363 | for l in self._all_threads: 364 | for p in l: 365 | p._exit = lambda x:True 366 | 367 | def _cleanup(self): 368 | self._exit = self._exit if self._exit > 0 else XEH_STATE_SOFT_EXIT 369 | self.save_session() 370 | self._join_all() 371 | self.logger.cleanup() 372 | # let's send a request to rpc server to unblock it 373 | if self.rpc: 374 | self.rpc._exit = lambda x:True 375 | import requests 376 | try: 377 | requests.get("http://%s:%s/" % (self.cfg['rpc_interface'], self.cfg['rpc_port'])) 378 | except: 379 | pass 380 | self.rpc.join() 381 | # save it again in case we miss something 382 | self.save_session() 383 | self._exit = XEH_STATE_CLEAN 384 | 385 | def _join_all(self): 386 | for l in self._all_threads: 387 | for p in l: 388 | p.join() 389 | 390 | def save_session(self): 391 | with open("h.json", "w") as f: 392 | try: 393 | f.write(json.dumps({ 394 | 'tasks':{} if not self.cfg['save_tasks'] else 395 | {k: v.to_dict() for k,v in self._all_tasks.items()}, 396 | 'cookies':self.cookies})) 397 | except Exception as ex: 398 | self.logger.warning(i18n.SESSION_WRITE_EXCEPTION % traceback.format_exc()) 399 | return ERR_SAVE_SESSION_FAILED, str(ex) 400 | return ERR_NO_ERROR, None 401 | 402 | def load_session(self): 403 | if os.path.exists("h.json"): 404 | with open("h.json") as f: 405 | try: 406 | j = json.loads(f.read()) 407 | except Exception as ex: 408 | self.logger.warning(i18n.SESSION_LOAD_EXCEPTION % traceback.format_exc()) 409 | return ERR_SAVE_SESSION_FAILED, str(ex) 410 | else: 411 | for _ in j['tasks'].values(): 412 | _t = Task("", {}, self.logger).from_dict(_) 413 | if 'filelist' in _t.meta: 414 | _t.scan_downloaded() 415 | #_t.meta['has_ori'] and task.config['download_ori']) 416 | # since we don't block on scan_img state, an unempty page_q 417 | # indicates we should start from scan_img state, 418 | if _t.state == TASK_STATE_DOWNLOAD and _t.page_q: 419 | _t.state = TASK_STATE_SCAN_IMG 420 | self._all_tasks[_['guid']] = _t 421 | self.tasks.put(_['guid']) 422 | if self._all_tasks: 423 | self.logger.info(i18n.XEH_LOAD_TASKS_CNT % len(self._all_tasks)) 424 | self.cookies.update(j['cookies']) 425 | if self.cookies: 426 | self.headers.update({'Cookie':util.make_cookie(self.cookies)}) 427 | self.has_login = 'ipb_member_id' in self.cookies and 'ipb_pass_hash' in self.cookies 428 | _1xcookie = os.path.join(FILEPATH, ".ehentai.cookie")# 1.x cookie file 429 | if not self.has_login and os.path.exists(_1xcookie): 430 | with open(_1xcookie) as f: 431 | try: 432 | cid, cpw = f.read().strip().split(",") 433 | self.cookies.update({'ipb_member_id':cid, 'ipb_pass_hash':cpw}) 434 | self.headers.update({'Cookie':util.make_cookie(self.cookies)}) 435 | self.has_login = True 436 | self.logger.info(i18n.XEH_LOAD_OLD_COOKIE) 437 | except: 438 | pass 439 | 440 | return ERR_NO_ERROR, None 441 | 442 | def login_exhentai(self, name, pwd): 443 | if 'ipb_member_id' in self.cookies and 'ipb_pass_hash' in self.cookies: 444 | return 445 | self.logger.debug(i18n.XEH_LOGIN_EXHENTAI) 446 | logindata = { 447 | 'UserName':name, 448 | 'returntype':'8', 449 | 'CookieDate':'1', 450 | 'b':'d', 451 | 'bt':'pone', 452 | 'PassWord':pwd 453 | } 454 | req = self._get_httpreq(util.get_proxy_policy(self.cfg)) 455 | req.request("POST", "https://forums.e-hentai.org/index.php?act=Login&CODE=01", 456 | filters.login_exhentai, 457 | lambda x:( 458 | setattr(self, 'cookies', x), 459 | setattr(self, 'has_login', True), 460 | self.headers.update({'Cookie':util.make_cookie(self.cookies)}), 461 | self.save_session(), 462 | self.logger.info(i18n.XEH_LOGIN_OK)), 463 | lambda x:(self.logger.warning(str(x)), 464 | self.logger.info(i18n.XEH_LOGIN_FAILED)), 465 | logindata) 466 | return ERR_NO_ERROR, self.has_login 467 | 468 | def set_cookie(self, cookie): 469 | self.cookies.update(util.parse_cookie(cookie)) 470 | self.headers.update({'Cookie':util.make_cookie(self.cookies)}) 471 | if 'ipb_member_id' in self.cookies and 'ipb_pass_hash' in self.cookies: 472 | self.has_login = True 473 | return ERR_NO_ERROR, None 474 | 475 | 476 | if __name__ == '__main__': 477 | pass 478 | -------------------------------------------------------------------------------- /xeHentai/filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import os 7 | import re 8 | import json 9 | from . import util 10 | from .const import * 11 | 12 | SUC = 0 13 | FAIL = 1 14 | 15 | def login_exhentai(r, suc, fail): 16 | # input login response 17 | # add cookies if suc; log error fail 18 | try: 19 | coo = r.headers.get('set-cookie') 20 | cooid = re.findall('ipb_member_id=(.*?);', coo)[0] 21 | coopw = re.findall('ipb_pass_hash=(.*?);', coo)[0] 22 | except (IndexError, ) as ex: 23 | errmsg = re.findall('([^<]+)', r.text) 24 | if errmsg: 25 | fail(errmsg[0]) 26 | else: 27 | fail("ex: %s" % ex) 28 | return FAIL 29 | else: 30 | suc({'ipb_member_id':cooid, 'ipb_pass_hash':coopw}) 31 | return SUC 32 | 33 | 34 | def flt_metadata(r, suc, fail): 35 | # input index response 36 | # add gallery meta if suc; return errorcode if fail 37 | # TODO: catch re exceptions 38 | if r.status_code == 600: 39 | return fail(ERR_CONNECTION_ERROR) 40 | if r.status_code == 404: 41 | return fail(ERR_GALLERY_REMOVED) 42 | if re.match("This gallery is pining for the fjords", r.text): 43 | return fail(ERR_ONLY_VISIBLE_EXH) 44 | elif re.match("Your IP address has been temporarily banned", r.text): 45 | fail(ERR_IP_BANNED) 46 | return re.findall("The ban expires in (.+)", r.text)[0] 47 | meta = {} 48 | # sample_hash = re.findall('(.*?)', r.text)[0]) 52 | meta['gnname']= util.htmlescape(re.findall('="gn">(.*?)', r.text)[0]) 53 | # don't assign title now, select gj/gn based on cfg['jpn_title'] 54 | # meta['title'] = meta['gjname'] if meta['gjname'] else meta['gnname'] 55 | meta['total'] = int(re.findall('Length:(\d+)\s+page', r.text)[0]) 56 | meta['finished'] = 0 57 | meta['tags'] = re.findall("toggle_tagmenu\('([^']+)'", r.text) 58 | 59 | # TODO: parse cookie to calc thumbnail_cnt (tr_2, ts_m) 60 | _ = re.findall("Showing (\d+) \- (\d+) of ([\d,]+) images", r.text)[0] 61 | meta['thumbnail_cnt'] = int(_[1]) - int(_[0]) + 1 62 | 63 | # check multi page viewer status in order to call proper flt_pageurl 64 | # TODO: remove \d+(?:' % RESTR_SITE, 76 | # r.text) 77 | # meta['pagecount'] = 1 if len(_) <= 1 else int(pagecount[-2]) 78 | 79 | 80 | # def flt_hathdl(r, suc, fail): 81 | # # input hathdl response 82 | # # add gallery meta if suc; return errorcode if fail 83 | # if r.status_code == 404: 84 | # fail(ERR_HATHDL_NOTFOUND) 85 | # return ERR_HATHDL_NOTFOUND 86 | # try: 87 | # meta = { 88 | # 'name': util.htmlescape(re.findall('TITLE (.+)', r.text)[0]), 89 | # #'gid': int(re.findall('GID (.+)', r.text)[0]), 90 | # 'total': int(re.findall('FILES (.+)', r.text)[0]), 91 | # 'finished': 0, 92 | # 'title': re.findall('Title:\s+(.+)', r.text)[0], 93 | # #'upload_time': re.findall('Upload Time:\s+(.+)', r.text)[0], # invisible 94 | # #'upload_by': re.findall('Uploaded By:\s+(.+)', r.text)[0], # invisible 95 | # #'downloaded': re.findall('Downloaded:\s+(.+)', r.text)[0], # invisible 96 | # 'tags': re.findall('Tags:\s+(.+)', r.text)[0].split(', '), 97 | # } 98 | # listtmp = re.findall('FILELIST\n(.+)\n+\nINFORMATION', r.text, re.DOTALL)[0].split('\n') 99 | # meta['filelist'] = {} 100 | # for l in listtmp: 101 | # # hash(full): id, hash_10, length, width, height, format, name 102 | # _ = re.findall('(\d+) ([a-z0-9]+)-(\d+)-(\d+)-(\d+)-([a-z]+) (.+)', l)[0] 103 | # meta['filelist'][_[1][:10]] = list(_) 104 | # except (IndexError, ValueError) as ex: 105 | # fail(ERR_MALFORMED_HATHDL) 106 | # return ERR_MALFORMED_HATHDL 107 | # suc(meta) 108 | 109 | def flt_pageurl(r, suc, fail): 110 | # input gallery response 111 | # add per image urls if suc; finish task if fail 112 | picpage = re.findall( 113 | '(?:
)?(?: 0 and len(p[-1]) > 1: 148 | sz = int(p[-1][1]) 149 | if r.status_code == 600:# tcp layer error 150 | fail((ERR_CONNECTION_ERROR, r._real_url, r.url)) 151 | elif r.status_code == 403: 152 | fail((ERR_KEY_EXPIRED, r._real_url, r.url)) 153 | elif r.status_code == 509 or \ 154 | (sz != r.content_length and ( 155 | r.content_length in QUOTA_EXCEEDED_CONTENT_LENGTHS or \ 156 | '/509.gif' in r.url or '/509.gif' in r._real_url)): 157 | # TODO: /509.gif detection is still not accturate, there might be a file 158 | # that happened to be this name 159 | fail((ERR_QUOTA_EXCEEDED, r._real_url, r.url)) 160 | # will not call the decorated filter 161 | elif r.content_length < 200 and \ 162 | r.headers.get('content-type') and r.headers.get('content-type').startswith('text') and \ 163 | re.findall("exceeded your image viewing limits", r.text): 164 | fail((ERR_QUOTA_EXCEEDED, r._real_url, r.url)) 165 | # will not call the decorated filter 166 | else: 167 | func(r, suc, fail) 168 | return _ 169 | 170 | def flt_imgurl_wrapper(ori): 171 | @flt_quota_check 172 | def flt_imgurl(r, suc, fail, ori = ori): 173 | # input per image page response 174 | # add (image url, reload url, filename) to queue if suc 175 | # return (errorcode, page_url) if fail 176 | if re.match('Invalid page', r.text): 177 | return fail((ERR_IMAGE_RESAMPLED, r._real_url, r.url)) 178 | while True: 179 | _ = re.findall('src="([^"]+keystamp[^"]+)"', r.text) 180 | if not _: 181 | _ = re.findall('src="([^"]+)"\s+style="', r.text) 182 | if not _: 183 | break 184 | picurl = util.htmlescape(_[0]) 185 | 186 | _ = re.findall('
(.*?) ::.+::.+Download original', r.text) 205 | fullsize = re.findall('Download\soriginal\s[0-9]+\sx\s[0-9]+\s(.*)\ssource', r.text) # like 2.20MB 206 | if fullurl: 207 | fullurl = util.htmlescape(fullurl[0]) 208 | else: 209 | fullurl = picurl 210 | _ = re.findall("return nl\('([a-zA-Z\d\-]+)'\)", r.text) 211 | if not _: 212 | break 213 | js_nl = _[0] 214 | reload_url = "%s%snl=%s" % (r._real_url, "&" if "?" in r._real_url else "?", js_nl) 215 | if ori: 216 | fullurl = "%s%sredirect=%s" % (fullurl, "&" if "?" in fullurl else "?", r.url) 217 | reload_url = "%s%sredirect=%s" % (reload_url, "&" if "?" in reload_url else "?", r.url) 218 | # we will parse the 302 url to get original filename 219 | return suc((fullurl, reload_url, filename)) 220 | else: 221 | return suc((picurl, reload_url, filename)) 222 | 223 | return fail((ERR_SCAN_REGEX_FAILED, r._real_url, r.url)) 224 | 225 | return flt_imgurl 226 | 227 | def download_file_wrapper(dirpath): 228 | @flt_quota_check 229 | def download_file(r, suc, fail, dirpath = dirpath): 230 | # input image/archive response 231 | # return (binary, url) if suc; return (errocode, url) if fail 232 | if r.status_code == 404: 233 | return fail((ERR_HATH_NOT_FOUND, r._real_url, r.url)) 234 | p = RE_IMGHASH.findall(r.url) 235 | # if multiple hash-size-h-w-type is found, use the last one 236 | # the first is original and the last is scaled 237 | # _FakeReponse will be filtered in flt_quota_check 238 | if not r.content_length or \ 239 | p and p[-1] and int(p[-1][1]) != r.content_length: 240 | return fail((ERR_IMAGE_BROKEN, r._real_url, r.url)) 241 | if not hasattr(r, 'iter_content_cb'): 242 | return fail((ERR_STREAM_NOT_IMPLEMENTED, r._real_url, r.url)) 243 | 244 | # merge the iter_content iterator with our custom stream_cb 245 | def _yield(chunk_size=16384, _r=r): 246 | from requests.exceptions import ConnectionError 247 | length_read = 0 248 | try: 249 | for _ in _r.iter_content(chunk_size): 250 | length_read += len(_) 251 | _r.iter_content_cb(_) 252 | yield _ 253 | except ConnectionError: # read timeout 254 | fail((ERR_IMAGE_BROKEN, r._real_url, r.url)) 255 | raise DownloadAbortedException() 256 | if length_read != r.content_length: 257 | fail((ERR_IMAGE_BROKEN, r._real_url, r.url)) 258 | raise DownloadAbortedException() 259 | 260 | suc((_yield, r._real_url, r.url)) 261 | 262 | return download_file 263 | 264 | 265 | def reset_quota(r, suc, fail): 266 | # reset quota response 267 | # reset quota if suc; finish task if fail 268 | pass 269 | -------------------------------------------------------------------------------- /xeHentai/i18n/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import importlib 7 | from ..const import * 8 | from . import en_us as lng_fallback 9 | 10 | try: 11 | _locale = LOCALE.lower() if LOCALE else 'en_us' 12 | if _locale in ('zh_cn', 'zh_sg'): 13 | _locale = 'zh_hans' 14 | elif _locale in ('zh_tw', 'zh_hk', 'zh_mo'): 15 | _locale = 'zh_hant' 16 | lng = importlib.import_module("%s.i18n.%s" % (SCRIPT_NAME, _locale)) 17 | except (ImportError, ValueError): 18 | lng = lng_fallback 19 | 20 | 21 | class _(object): 22 | def c(cls, code): 23 | _ = code not in lng.err_msg and \ 24 | (code not in lng_fallback.err_msg and \ 25 | (cls.ERR_NOMSG % code) or \ 26 | lng_fallback.err_msg[code] ) or \ 27 | lng.err_msg[code] 28 | return _ if PY3K else ( 29 | _ if isinstance(_, unicode) else _.decode('utf-8')) # cls.ERR_NOMSG % code is unicode 30 | 31 | def __getattr__(cls, idx): 32 | _ = not hasattr(lng, idx) and \ 33 | getattr(lng_fallback, idx) or \ 34 | getattr(lng, idx) 35 | return _ if PY3K else _.decode('utf-8') 36 | 37 | i18n = _() 38 | -------------------------------------------------------------------------------- /xeHentai/i18n/en_us.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from ..const import * 4 | 5 | err_msg = { 6 | ERR_URL_NOT_RECOGNIZED: "url not recognized", 7 | ERR_CANT_DOWNLOAD_EXH: "can't download exhentai.org without login", 8 | ERR_ONLY_VISIBLE_EXH: "this gallery is only visible in exhentai.org", 9 | ERR_MALFORMED_HATHDL: "malformed .hathdl, can't parse", 10 | ERR_GALLERY_REMOVED: "this gallery has been removed, may be visible in exhentai", 11 | ERR_KEY_EXPIRED: "image url is expired", 12 | ERR_NO_PAGEURL_FOUND: "no page url found, change of site structure?", 13 | ERR_CONNECTION_ERROR: "a connection problem occurs", 14 | ERR_IP_BANNED: "IP has been banned, retry in %s", 15 | ERR_IMAGE_BROKEN: "downloaded image is broken", 16 | ERR_SCAN_REGEX_FAILED: "page parsing failed", 17 | ERR_QUOTA_EXCEEDED: "quota exceeded", 18 | ERR_TASK_NOT_FOUND: "no such task guid", 19 | ERR_TASK_LEVEL_UNDEF: "task filter level unknown", 20 | ERR_DELETE_RUNNING_TASK: "can't delete a running task", 21 | ERR_TASK_CANNOT_PAUSE: "this task can't be paused", 22 | ERR_TASK_CANNOT_RESUME: "this task can't be resumed", 23 | ERR_CANNOT_CREATE_DIR: "can't create directory %s", 24 | ERR_CANNOT_MAKE_ARCHIVE: "can't make archive %s", 25 | ERR_NOT_RANGE_FORMAT: "'%s' is not a range format, expecting '1-2' or '3'", 26 | # ERR_HATHDL_NOTFOUND: "hathdl not found", 27 | ERR_RPC_PARSE_ERROR: "Parse error.", 28 | ERR_RPC_INVALID_REQUEST: "Invalid request.", 29 | ERR_RPC_METHOD_NOT_FOUND: "Method not found.", 30 | ERR_RPC_INVALID_PARAMS: "Invalid method parameter(s).", 31 | ERR_RPC_UNAUTHORIZED: "Unauthorized", 32 | ERR_RPC_EXEC_ERROR: "", 33 | ERR_SAVE_SESSION_FAILED: "", 34 | } 35 | 36 | ERR_NOMSG = "undefined error message with code %d" 37 | 38 | XEH_OPT_DESC = "xeHentai Downloader NG" 39 | XEH_OPT_EPILOG = "Values shown as current is read from config.py " \ 40 | "and can be overriden by command line options. " \ 41 | "Discuss and bug reporting at https://yooooo.us/2013/xehentai" 42 | XEH_OPT_URLS = "gallery url(s) to download" 43 | XEH_OPT_u = "username" 44 | XEH_OPT_k = "password" 45 | XEH_OPT_c = "cookie string, will be overriden if given -u and -k" 46 | XEH_OPT_o = "download original images, needs to login (current: %(default)s)" 47 | XEH_OPT_t = "download threads count (current: %(default)d)" 48 | # XEH_OPT_f = "fast scan, guess page url from .hathdl file, not working everytime (current: %(default)s)" 49 | XEH_OPT_l = "define log path (current: %(default)s)" 50 | XEH_OPT_p = "set download proxies, can be used multiple times, currenlty supported: socks5/4a, http(s), glype. " \ 51 | "Proxies are only used on webpages by default (current: %(default)s)" 52 | XEH_OPT_proxy_image = "use proxies on images and webpages (current: %(default)s)" 53 | XEH_OPT_proxy_image_only = "only use proxies on images, not webpages (current: %(default)s)" 54 | XEH_OPT_d = "set download directory (current: %(default)s)" 55 | XEH_OPT_v = "show more detailed log (current: %(default)s)" 56 | XEH_OPT_i = "interactive mode, will be ignored in daemon mode (current: %(default)s)" 57 | XEH_OPT_r = "rename gallery image to original name, use sequence name if turned off (current: %(default)s)" 58 | XEH_OPT_daemon = "daemon mode, can't use with -i (current: %(default)s)" 59 | XEH_OPT_rpc_interface = "bind jsonrpc server to this address (current: %(default)s)" 60 | XEH_OPT_rpc_port = "bind jsonrpc server to this port (current: %(default)s)" 61 | XEH_OPT_rpc_secret = "jsonrpc secret string (current: %(default)s)" 62 | XEH_OPT_rpc_open_browser = "automatically open browser after RPC server starts (current: %(default)s)" 63 | XEH_OPT_a = "make an archive (.zip) after download and delete directory (current: %(default)s)" 64 | XEH_OPT_delete_task_files = "delete downloaded files when deleting a task (current: %(default)s)" 65 | XEH_OPT_j = "use Japanese title, use English/Romaji title if turned off (current: %(default)s)" 66 | XEH_OPT_download_range = "specify ranges of images to be downloaded, in format start-end, or single index, " \ 67 | "use comma to concat multiple ranges, e.g.: 5-10,15,20-25, default to download all images" 68 | XEH_OPT_timeout = "set image download timeout (current: %(default)ss)" 69 | XEH_OPT_low_speed = "retry download if speed is lower than specified value (current: %(default)s KB/s)" 70 | XEH_OPT_f = "download regardless of quota exceeded warning (current: %(default)s)" 71 | XEH_OPT_auto_update = "check or download update automatically (current: %(default)s)" 72 | XEH_OPT_update_beta_channel = "check update upon beta channel (current: %(default)s)" 73 | XEH_OPT_h = "show this help message and exit" 74 | XEH_OPT_version = "show program's version number and exit" 75 | XEH_OPT_IGNORING_I = "ignoring -i option in daemon mode" 76 | 77 | PS_LOGIN = "login to exhentai (y/n)? > " 78 | PS_USERNAME = "Username > " 79 | PS_PASSWD = "Password > " 80 | PS_URL = "URL (seperate with ,)> " 81 | PS_PROXY = "Proxy (optional) > " 82 | PS_DOWNLOAD_ORI = "Download original (y/n, default:%s)? > " 83 | PS_RENAME_ORI = "Rename to original name (y/n, default:%s)? > " 84 | PS_MAKE_ARCHIVE = "Make archive (y/n, default:%s)? > " 85 | PS_JPN_TITLE = "Use Japanese title (y/n, default:%s)? > " 86 | PS_DOWNLOAD_RANGE = "Download range, press enter to download all > " 87 | PS_DOWNLOAD_DIR = "Download to (default: %s)\npress enter or enter new > " 88 | 89 | PROXY_CANDIDATE_CNT = "proxy pool has %d candidates" 90 | 91 | TASK_PUT_INTO_WAIT = "task #%s already exists, put into waiting state" 92 | TASK_ERROR = "task #%s error: %s" 93 | TASK_MIGRATE_EXH = "task #%s migrate to exhentai.org" 94 | TASK_TITLE = "task #%s title %s" 95 | TASK_WILL_DOWNLOAD_CNT = "task #%s will download %d/%d files" 96 | TASK_START = "task #%s start" 97 | TASK_FINISHED = "task #%s download finished" 98 | TASK_START_PAGE_RESCAN = "task #%s resample detected, start full scan" 99 | # TASK_FAST_SCAN = "task #%s uses fast scan" 100 | TASK_START_MAKE_ARCHIVE = "task #%s start making archive" 101 | TASK_MAKE_ARCHIVE_FINISHED = "task #%s archive saved at: %s, use %.1fs" 102 | TASK_STOP_QUOTA_EXCEEDED = "task #%s quota exceeded" 103 | TASK_STUCK = "task #%s is stuck, there may be some bugs in xeHentai, or the connection is too slow" 104 | TASK_SLOW = "task #%s is slow, maybe image is too large or connection is too slow, consider use a proxy" 105 | TASK_UNFINISHED = "task #%s remaining these files undownloaded: %s" 106 | 107 | XEH_STARTED = "xeHentai %s started." 108 | XEH_LOOP_FINISHED = "application task loop finished" 109 | XEH_LOGIN_EXHENTAI = "login exhentai" 110 | XEH_LOGIN_OK = "login exhentai successfully" 111 | XEH_LOGIN_FAILED = "can't login exhentai, check your credentials or try another account.\nIt's recommended to login in browser and use RPC to transfer cookie to xeHentai (see http://t.cn/Rctr4Pf)" 112 | XEH_LOAD_TASKS_CNT = "load %d tasks from saved session" 113 | XEH_LOAD_OLD_COOKIE = "load cookie from legacy cookie file" 114 | XEH_DAEMON_START = "daemon start at PID %d" 115 | XEH_PLATFORM_NO_DAEMON = "daemon mode is not supported on platform: %s" 116 | XEH_CLEANUP = "cleaning up..." 117 | XEH_CRITICAL_ERROR = "xeHentai throws critical error:\n%s" 118 | XEH_DOWNLOAD_ORI_NEED_LOGIN = "haven't login, so I won't download original images" 119 | XEH_FILE_DOWNLOADED = "file downloaded by thread-{} #{} {}" 120 | XEH_RENAME_HAS_ERRORS = "some files are not renamed:\n%s" 121 | XEH_DOWNLOAD_HAS_ERROR = "thread-%s retry #%s because of error: %s" 122 | XEH_SCAN_FAILED = "%s scan page %s failed: %s" 123 | 124 | RPC_STARTED = "RPC server listening on %s:%d" 125 | RPC_TOO_OPEN = "RPC server is listening on public interface (%s) but no rpc_secret defined, which is not safe" 126 | RPC_CANNOT_BIND = "RPC server can't listen on requested address: %s" 127 | RPC_WEBUI_PATH = "WebUI is accessible at %s or https://xehentai.yooooo.us" 128 | 129 | SESSION_LOAD_EXCEPTION = "exception occurs when loading saved session: %s" 130 | SESSION_WRITE_EXCEPTION = "exception occurs when writing saved session: %s" 131 | 132 | THREAD = "thread" 133 | THREAD_UNCAUGHT_EXCEPTION = "thread-%s uncaught exception\n%s" 134 | THREAD_MAY_BECOME_ZOMBIE = "thread-%s may became zombie" 135 | THREAD_SWEEP_OUT = "thread-%s is dead, deref it" 136 | THREAD_SPEED_TOO_LOW = "thread-%s retry because of low download speed: %s/s less than threshold %s/s" 137 | 138 | QUEUE = "queue" 139 | 140 | PROXY_DISABLE_BANNED = "disable a banned proxy, expire in about %ss" 141 | 142 | UPDATE_CHANNEL = "Update channel is: %s" 143 | UPDATE_DEV_CHANNEL = "dev" 144 | UPDATE_RELEASE_CHANNEL = "release" 145 | UPDATE_FAILED = "Failure when updating program: %s" 146 | UPDATE_COMPLETE = "Update is complete, it will take effect on next run" 147 | UPDATE_NO_UPDATE = "Program is up-to-date" 148 | UPDATE_AVAILABLE = "Update available: %s \"%s\" (%s)" 149 | UPDATE_DOWNLOAD_MANUALLY = "You can download update from https://dl.yooooo.us/share/xeHentai/" 150 | 151 | -------------------------------------------------------------------------------- /xeHentai/i18n/zh_hans.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from ..const import * 4 | 5 | err_msg = { 6 | ERR_URL_NOT_RECOGNIZED: "网址不够绅士", 7 | ERR_CANT_DOWNLOAD_EXH: "需要登录后才能下载里站", 8 | ERR_ONLY_VISIBLE_EXH: "这个本子只有里站能看到", 9 | ERR_MALFORMED_HATHDL: "hathdl文件有猫饼,解析失败", 10 | ERR_GALLERY_REMOVED: "这个本子被移除了,大概里站能看到", 11 | ERR_KEY_EXPIRED: "下载链接不太正常", 12 | ERR_NO_PAGEURL_FOUND: "没有找到页面链接,网站改版了嘛?", 13 | ERR_CONNECTION_ERROR: "连接有问题?", 14 | ERR_IP_BANNED: "IP被ban了, 恢复时间: %s", 15 | ERR_IMAGE_BROKEN: "下载的图片有猫饼", 16 | ERR_SCAN_REGEX_FAILED: "网页解析失败", 17 | ERR_QUOTA_EXCEEDED: "配额超限", 18 | ERR_TASK_NOT_FOUND: "没有该GUID对应的任务", 19 | ERR_TASK_LEVEL_UNDEF: "任务过滤等级不存在", 20 | ERR_DELETE_RUNNING_TASK: "无法删除运行中的任务", 21 | ERR_TASK_CANNOT_PAUSE: "这个任务无法被暂停", 22 | ERR_TASK_CANNOT_RESUME: "这个任务无法被恢复", 23 | ERR_CANNOT_CREATE_DIR: "无法创建文件夹 %s", 24 | ERR_CANNOT_MAKE_ARCHIVE: "无法制作压缩包 %s", 25 | ERR_NOT_RANGE_FORMAT: "'%s'不符合范围的格式, 正确的格式为 1-3 或者 5", 26 | # ERR_HATHDL_NOTFOUND: "hathdl文件未找到" 27 | ERR_RPC_PARSE_ERROR: "Parse error.", 28 | ERR_RPC_INVALID_REQUEST: "Invalid request.", 29 | ERR_RPC_METHOD_NOT_FOUND: "Method not found.", 30 | ERR_RPC_INVALID_PARAMS: "Invalid method parameter(s).", 31 | ERR_RPC_UNAUTHORIZED: "Unauthorized", 32 | ERR_RPC_EXEC_ERROR: "", 33 | ERR_SAVE_SESSION_FAILED: "", 34 | } 35 | 36 | ERR_NOMSG = "未指定的错误,错误号 %d" 37 | 38 | XEH_OPT_DESC = "绅♂士下载器" 39 | XEH_OPT_EPILOG = "如果参数未指定,则使用config.py中的默认值; " \ 40 | "讨论和反馈问题:https://yooooo.us/2013/xehentai" 41 | XEH_OPT_URLS = "下载页的网址" 42 | XEH_OPT_u = "用户名" 43 | XEH_OPT_k = "密码" 44 | XEH_OPT_c = "Cookie字符串,如果指定了用户名和密码,此项会被忽略" 45 | XEH_OPT_o = "是否下载原始图片(如果存在),需要登录 (当前: %(default)s)" 46 | XEH_OPT_t = "下载线程数 (当前: %(default)d)" 47 | XEH_OPT_l = "保存日志的路径 (当前: %(default)s)" 48 | XEH_OPT_p = "设置代理, 可以指定多次, 当前支持的类型: socks5/4a, http(s), glype. 代理默认只用于扫描网页 (当前: %(default)s)" 49 | XEH_OPT_proxy_image = "同时使用代理来下载图片和扫描网页 (当前: %(default)s)" 50 | XEH_OPT_proxy_image_only = "仅使用代理来下载图片, 不用于扫描网页 (当前: %(default)s)" 51 | XEH_OPT_d = "设置下载目录 (当前: %(default)s)" 52 | XEH_OPT_v = "设置日志装逼等级 (当前: %(default)s)" 53 | XEH_OPT_i = "交互模式,如果开启后台模式,此项会被忽略 (当前: %(default)s)" 54 | XEH_OPT_r = "将图片重命名为原始名称,如果关闭则使用序号 (当前: %(default)s)" 55 | XEH_OPT_daemon = "后台模式 (当前: %(default)s)" 56 | XEH_OPT_rpc_interface = "设置JSON-RPC监听IP (当前: %(default)s)" 57 | XEH_OPT_rpc_port = "设置JSON-RPC监听端口 (当前: %(default)s)" 58 | XEH_OPT_rpc_secret = "设置JSON-RPC密钥 (当前: %(default)s)" 59 | XEH_OPT_rpc_open_browser = "RPC服务端启动后自动打开浏览器页面 (当前: %(default)s)" 60 | XEH_OPT_a = "下载完成后生成zip压缩包并删除下载目录 (当前: %(default)s)" 61 | XEH_OPT_delete_task_files = "删除任务时同时删除下载的文件 (当前: %(default)s)" 62 | XEH_OPT_j = "使用日语标题, 如果关闭则使用英文或罗马字标题 (当前: %(default)s)" 63 | XEH_OPT_download_range = "设置下载的图片范围, 格式为 开始位置-结束位置, 或者单张图片的位置, " \ 64 | "使用逗号来分隔多个范围, 例如 5-10,15,20-25, 默认为下载所有" 65 | XEH_OPT_timeout = "设置下载图片的超时 (当前: %(default)s秒)" 66 | XEH_OPT_low_speed = "设置最低下载速度,低于此值将换源重新下载 (当前: %(default)s KB/s)" 67 | XEH_OPT_f = "忽略配额判断,继续下载 (当前: %(default)s)" 68 | XEH_OPT_auto_update = "检查并自动下载更新 (当前: %(default)s)" 69 | XEH_OPT_update_beta_channel = "是否更新到测试分支 (当前: %(default)s)" 70 | XEH_OPT_h = "显示本帮助信息" 71 | XEH_OPT_version = "显示版本信息" 72 | XEH_OPT_IGNORING_I = "后台模式已忽略 -i 参数" 73 | 74 | 75 | PS_LOGIN = "当前没有登陆,要登陆吗 (y/n)? > " 76 | PS_USERNAME = "输入用户名 > " 77 | PS_PASSWD = "输入密码 > " 78 | PS_URL = "输入地址(使用,分割下载多个)> " 79 | PS_PROXY = "输入代理地址 (可选) > " 80 | PS_DOWNLOAD_ORI = "是否下载原图(默认%s) (y/n)? > " 81 | PS_RENAME_ORI = "是否自动重命名(默认%s) (y/n)? > " 82 | PS_MAKE_ARCHIVE = "是否制作zip压缩包(默认%s) (y/n)? > " 83 | PS_JPN_TITLE = "是否使用日语标题(默认%s) (y/n)? > " 84 | PS_DOWNLOAD_RANGE = "下载范围, 使用逗号分割多个范围, 回车下载全部 > " 85 | PS_DOWNLOAD_DIR = "下载目录 (当前: %s)\n回车确认或输入新路径 > " 86 | 87 | PROXY_CANDIDATE_CNT = "代理池中有%d个代理" 88 | 89 | TASK_PUT_INTO_WAIT = "任务 #%s 已存在, 加入等待队列" 90 | TASK_ERROR = "任务 #%s 发生错误: %s" 91 | TASK_MIGRATE_EXH = "任务 #%s 使用里站地址重新下载" 92 | TASK_TITLE = "任务 #%s 标题 %s" 93 | TASK_WILL_DOWNLOAD_CNT = "任务 #%s 将下载%d个文件,共%d个 " 94 | TASK_START = "任务 #%s 开始" 95 | TASK_FINISHED = "任务 #%s 下载完成" 96 | TASK_START_PAGE_RESCAN = "任务 #%s 图片被缩放,进行完整扫描" 97 | # TASK_FAST_SCAN = "任务 #%s 使用快速扫描" 98 | TASK_START_MAKE_ARCHIVE = "任务 #%s 开始打包" 99 | TASK_MAKE_ARCHIVE_FINISHED = "任务 #%s 打包完成,保存在: %s, 用时%.1f秒" 100 | TASK_STOP_QUOTA_EXCEEDED = "任务 #%s 配额超限" 101 | TASK_STUCK = "任务 #%s 卡住了, 可能是脚本有bug, 或者网络连接太慢了" 102 | TASK_SLOW = "任务 #%s 有点慢, 可能是图片太大了,或者网络连接太慢了; 可以考虑使用代理" 103 | TASK_UNFINISHED = "任务 #%s 剩余以下图片未下载: %s" 104 | 105 | XEH_STARTED = "xeHentai %s 已启动" 106 | XEH_LOOP_FINISHED = "程序循环已完成" 107 | XEH_LOGIN_EXHENTAI = "登录绅士" 108 | XEH_LOGIN_OK = "已成为绅士" 109 | XEH_LOGIN_FAILED = "无法登录绅士;检查输入是否有误或者换一个帐号。\n推荐在浏览器登录后使用RPC复制cookie到xeHentai (教程: http://t.cn/Rctr4Pf)" 110 | XEH_LOAD_TASKS_CNT = "从存档中读取了%d个任务" 111 | XEH_LOAD_OLD_COOKIE = "从1.x版cookie文件从读取了登录信息" 112 | XEH_DAEMON_START = "后台进程已启动,PID为%d" 113 | XEH_PLATFORM_NO_DAEMON = "后台模式不支持您的系统: %s" 114 | XEH_CLEANUP = "擦干净..." 115 | XEH_CRITICAL_ERROR = "xeHentai 抽风啦:\n%s" 116 | XEH_DOWNLOAD_ORI_NEED_LOGIN = "下载原图需要登录" 117 | XEH_FILE_DOWNLOADED = "绅士-{} 已下载图片 #{} {}" 118 | XEH_RENAME_HAS_ERRORS = "部分图片重命名失败:\n%s" 119 | XEH_DOWNLOAD_HAS_ERROR = "绅士-%s 下载图片 #%s 时出错: %s, 将在稍后重试" 120 | XEH_SCAN_FAILED = "%s 扫描页面 %s 失败: %s" 121 | 122 | RPC_STARTED = "RPC服务器监听在 %s:%d" 123 | RPC_TOO_OPEN = "RPC服务器监听在公网IP (%s),为了安全起见应该设置rpc_secret" 124 | RPC_CANNOT_BIND = "RPC服务器无法启动:%s" 125 | RPC_WEBUI_PATH = "WebUI 地址为 %s 或者 https://xehentai.yooooo.us" 126 | 127 | SESSION_LOAD_EXCEPTION = "读取存档时遇到错误: %s" 128 | SESSION_WRITE_EXCEPTION = "写入存档时遇到错误: %s" 129 | 130 | THREAD = "绅士" 131 | THREAD_UNCAUGHT_EXCEPTION = "绅士-%s 未捕获的异常\n%s" 132 | THREAD_MAY_BECOME_ZOMBIE = "绅士-%s 可能变成了丧尸" 133 | THREAD_SWEEP_OUT = "绅士-%s 挂了, 不再理它" 134 | THREAD_SPEED_TOO_LOW = "绅士-%s 下载速度只有 %s/s, 低于 %s/s, 将在稍后重试" 135 | 136 | QUEUE = "队列" 137 | 138 | PROXY_DISABLE_BANNED = "禁用了一个被ban的代理,将在约%s秒后恢复" 139 | 140 | UPDATE_CHANNEL = "更新渠道为: %s" 141 | UPDATE_DEV_CHANNEL = "测试版" 142 | UPDATE_RELEASE_CHANNEL = "正式版" 143 | UPDATE_FAILED = "更新时遇到错误: %s" 144 | UPDATE_COMPLETE = "更新完成,请重新启动程序应用更新" 145 | UPDATE_NO_UPDATE = "没有可用更新" 146 | UPDATE_AVAILABLE = "发现可用的更新: 发布于 %s \"%s\" (%s)" 147 | UPDATE_DOWNLOAD_MANUALLY = "可以从 https://dl.yooooo.us/share/xeHentai/ 下载更新" 148 | -------------------------------------------------------------------------------- /xeHentai/i18n/zh_hant.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from ..const import * 4 | 5 | err_msg = { 6 | ERR_URL_NOT_RECOGNIZED: "網址不夠紳士", 7 | ERR_CANT_DOWNLOAD_EXH: "需要登錄後才能下載里站", 8 | ERR_ONLY_VISIBLE_EXH: "這個本子只有里站能看到", 9 | ERR_MALFORMED_HATHDL: "hathdl文件有貓餅,解析失敗", 10 | ERR_GALLERY_REMOVED: "這個本子被移除了,大概里站能看到", 11 | ERR_KEY_EXPIRED: "下載鏈接不太正常", 12 | ERR_NO_PAGEURL_FOUND: "沒有找到頁面鏈接,網站改版了嘛?", 13 | ERR_CONNECTION_ERROR: "連接有問題?", 14 | ERR_IP_BANNED: "IP被ban了, 恢復時間: %s", 15 | ERR_IMAGE_BROKEN: "下載的圖片有貓餅", 16 | ERR_SCAN_REGEX_FAILED: "網頁解析失敗", 17 | ERR_QUOTA_EXCEEDED: "配額超限", 18 | ERR_TASK_NOT_FOUND: "沒有該GUID對應的任務", 19 | ERR_TASK_LEVEL_UNDEF: "任務過濾等級不存在", 20 | ERR_DELETE_RUNNING_TASK: "無法刪除運行中的任務", 21 | ERR_TASK_CANNOT_PAUSE: "這個任務無法被暫停", 22 | ERR_TASK_CANNOT_RESUME: "這個任務無法被恢復", 23 | ERR_CANNOT_CREATE_DIR: "無法創建文件夾 %s", 24 | ERR_CANNOT_MAKE_ARCHIVE: "無法製作壓縮包 %s", 25 | ERR_NOT_RANGE_FORMAT: "'%s'不符合範圍的格式, 正確的格式為 1-3 或者 5", 26 | # ERR_HATHDL_NOTFOUND: "hathdl文件未找到" 27 | ERR_RPC_PARSE_ERROR: "Parse error.", 28 | ERR_RPC_INVALID_REQUEST: "Invalid request.", 29 | ERR_RPC_METHOD_NOT_FOUND: "Method not found.", 30 | ERR_RPC_INVALID_PARAMS: "Invalid method parameter(s).", 31 | ERR_RPC_UNAUTHORIZED: "Unauthorized", 32 | ERR_RPC_EXEC_ERROR: "", 33 | ERR_SAVE_SESSION_FAILED: "", 34 | } 35 | 36 | ERR_NOMSG = "未指定的錯誤,錯誤號 %d" 37 | 38 | XEH_OPT_DESC = "紳♂士下載器" 39 | XEH_OPT_EPILOG = "如果參數未指定,則使用config.py中的默認值; " \ 40 | "討論和反饋問題:https://yooooo.us/2013/xehentai" 41 | XEH_OPT_URLS = "下載頁的網址" 42 | XEH_OPT_u = "用戶名" 43 | XEH_OPT_k = "密碼" 44 | XEH_OPT_c = "Cookie字符串,如果指定了用戶名和密碼,此項會被忽略" 45 | XEH_OPT_o = "是否下載原始圖片(如果存在),需要登錄 (當前: %(default)s)" 46 | XEH_OPT_t = "下載線程數 (當前: %(default)d)" 47 | XEH_OPT_l = "保存日誌的路徑 (當前: %(default)s)" 48 | XEH_OPT_p = "設置代理, 可以指定多次, 當前支持的類型: socks5/4a, http(s), glype. 代理默認只用於掃描網頁 (當前: %(default)s)" 49 | XEH_OPT_proxy_image = "同時使用代理來下載圖片和掃描網頁 (當前: %(default)s)" 50 | XEH_OPT_proxy_image_only = "僅使用代理來下載圖片, 不用於掃描網頁 (當前: %(default)s)" 51 | XEH_OPT_d = "設置下載目錄 (當前: %(default)s)" 52 | XEH_OPT_v = "設置日誌裝逼等級 (當前: %(default)s)" 53 | XEH_OPT_i = "交互模式,如果開啟後台模式,此項會被忽略 (當前: %(default)s)" 54 | XEH_OPT_r = "將圖片重命名為原始名稱,如果關閉則使用序號 (當前: %(default)s)" 55 | XEH_OPT_daemon = "後台模式 (當前: %(default)s)" 56 | XEH_OPT_rpc_interface = "設置JSON-RPC監聽IP (當前: %(default)s)" 57 | XEH_OPT_rpc_port = "設置JSON-RPC監聽埠 (當前: %(default)s)" 58 | XEH_OPT_rpc_secret = "設置JSON-RPC密鑰 (當前: %(default)s)" 59 | XEH_OPT_rpc_open_browser = "RPC服務端啟動後自動打開瀏覽器頁面 (當前: %(default)s)" 60 | XEH_OPT_a = "下載完成後生成zip壓縮包並刪除下載目錄 (當前: %(default)s)" 61 | XEH_OPT_delete_task_files = "刪除任務時同時刪除下載的文件 (當前: %(default)s)" 62 | XEH_OPT_j = "使用日語標題, 如果關閉則使用英文或羅馬字標題 (當前: %(default)s)" 63 | XEH_OPT_download_range = "設置下載的圖片範圍, 格式為 開始位置-結束位置, 或者單張圖片的位置, " \ 64 | "使用逗號來分隔多個範圍, 例如 5-10,15,20-25, 默認為下載所有" 65 | XEH_OPT_timeout = "設置下載圖片的超時 (當前: %(default)s秒)" 66 | XEH_OPT_low_speed = "設置最低下載速度,低於此值將換源重新下載 (當前: %(default)s KB/s)" 67 | XEH_OPT_f = "忽略配額判斷,繼續下載 (當前: %(default)s)" 68 | XEH_OPT_auto_update = "檢查並自動下載更新 (當前: %(default)s)" 69 | XEH_OPT_update_beta_channel = "是否更新到測試分支 (當前: %(default)s)" 70 | XEH_OPT_h = "顯示本幫助信息" 71 | XEH_OPT_version = "顯示版本信息" 72 | XEH_OPT_IGNORING_I = "後台模式已忽略 -i 參數" 73 | 74 | 75 | PS_LOGIN = "當前沒有登陸,要登陸嗎 (y/n)? > " 76 | PS_USERNAME = "輸入用戶名 > " 77 | PS_PASSWD = "輸入密碼 > " 78 | PS_URL = "輸入地址(使用,分割下載多個)> " 79 | PS_PROXY = "輸入代理地址 (可選) > " 80 | PS_DOWNLOAD_ORI = "是否下載原圖(默認%s) (y/n)? > " 81 | PS_RENAME_ORI = "是否自動重命名(默認%s) (y/n)? > " 82 | PS_MAKE_ARCHIVE = "是否製作zip壓縮包(默認%s) (y/n)? > " 83 | PS_JPN_TITLE = "是否使用日語標題(默認%s) (y/n)? > " 84 | PS_DOWNLOAD_RANGE = "下載範圍, 使用逗號分割多個範圍, 回車下載全部 > " 85 | PS_DOWNLOAD_DIR = "下載目錄 (當前: %s)\n回車確認或輸入新路徑 > " 86 | 87 | PROXY_CANDIDATE_CNT = "代理池中有%d個代理" 88 | 89 | TASK_PUT_INTO_WAIT = "任務 #%s 已存在, 加入等待隊列" 90 | TASK_ERROR = "任務 #%s 發生錯誤: %s" 91 | TASK_MIGRATE_EXH = "任務 #%s 使用里站地址重新下載" 92 | TASK_TITLE = "任務 #%s 標題 %s" 93 | TASK_WILL_DOWNLOAD_CNT = "任務 #%s 將下載%d個文件,共%d個 " 94 | TASK_START = "任務 #%s 開始" 95 | TASK_FINISHED = "任務 #%s 下載完成" 96 | TASK_START_PAGE_RESCAN = "任務 #%s 圖片被縮放,進行完整掃描" 97 | # TASK_FAST_SCAN = "任務 #%s 使用快速掃描" 98 | TASK_START_MAKE_ARCHIVE = "任務 #%s 開始打包" 99 | TASK_MAKE_ARCHIVE_FINISHED = "任務 #%s 打包完成,保存在: %s, 用時%.1f秒" 100 | TASK_STOP_QUOTA_EXCEEDED = "任務 #%s 配額超限" 101 | TASK_STUCK = "任務 #%s 卡住了, 可能是腳本有bug, 或者網絡連接太慢了" 102 | TASK_SLOW = "任務 #%s 有點慢, 可能是圖片太大了,或者網絡連接太慢了; 可以考慮使用代理" 103 | TASK_UNFINISHED = "任務 #%s 剩餘以下圖片未下載: %s" 104 | 105 | XEH_STARTED = "xeHentai %s 已啟動" 106 | XEH_LOOP_FINISHED = "程序循環已完成" 107 | XEH_LOGIN_EXHENTAI = "登錄紳士" 108 | XEH_LOGIN_OK = "已成為紳士" 109 | XEH_LOGIN_FAILED = "無法登錄紳士;檢查輸入是否有誤或者換一個帳號。\n推薦在瀏覽器登錄後使用RPC複製cookie到xeHentai (教程: http://t.cn/Rctr4Pf)" 110 | XEH_LOAD_TASKS_CNT = "從存檔中讀取了%d個任務" 111 | XEH_LOAD_OLD_COOKIE = "從1.x版cookie文件從讀取了登錄信息" 112 | XEH_DAEMON_START = "後台進程已啟動,PID為%d" 113 | XEH_PLATFORM_NO_DAEMON = "後台模式不支持您的系統: %s" 114 | XEH_CLEANUP = "擦乾淨..." 115 | XEH_CRITICAL_ERROR = "xeHentai 抽風啦:\n%s" 116 | XEH_DOWNLOAD_ORI_NEED_LOGIN = "下載原圖需要登錄" 117 | XEH_FILE_DOWNLOADED = "紳士-{} 已下載圖片 #{} {}" 118 | XEH_RENAME_HAS_ERRORS = "部分圖片重命名失敗:\n%s" 119 | XEH_DOWNLOAD_HAS_ERROR = "紳士-%s 下載圖片 #%s 時出錯: %s, 將在稍後重試" 120 | XEH_SCAN_FAILED = "%s 掃描頁面 %s 失敗: %s" 121 | 122 | RPC_STARTED = "RPC伺服器監聽在 %s:%d" 123 | RPC_TOO_OPEN = "RPC伺服器監聽在公網IP (%s),為了安全起見應該設置rpc_secret" 124 | RPC_CANNOT_BIND = "RPC伺服器無法啟動:%s" 125 | RPC_WEBUI_PATH = "WebUI 地址為 %s 或者 https://xehentai.yooooo.us" 126 | 127 | SESSION_LOAD_EXCEPTION = "讀取存檔時遇到錯誤: %s" 128 | SESSION_WRITE_EXCEPTION = "寫入存檔時遇到錯誤: %s" 129 | 130 | THREAD = "紳士" 131 | THREAD_UNCAUGHT_EXCEPTION = "紳士-%s 未捕獲的異常\n%s" 132 | THREAD_MAY_BECOME_ZOMBIE = "紳士-%s 可能變成了喪屍" 133 | THREAD_SWEEP_OUT = "紳士-%s 掛了, 不再理它" 134 | THREAD_SPEED_TOO_LOW = "紳士-%s 下載速度只有 %s/s, 低於 %s/s, 將在稍後重試" 135 | 136 | QUEUE = "隊列" 137 | 138 | PROXY_DISABLE_BANNED = "禁用了一個被ban的代理,將在約%s秒後恢復" 139 | 140 | UPDATE_CHANNEL = "更新渠道為: %s" 141 | UPDATE_DEV_CHANNEL = "測試版" 142 | UPDATE_RELEASE_CHANNEL = "正式版" 143 | UPDATE_FAILED = "更新時遇到錯誤: %s" 144 | UPDATE_COMPLETE = "更新完成,請重新啟動程序應用更新" 145 | UPDATE_NO_UPDATE = "沒有可用更新" 146 | UPDATE_AVAILABLE = "發現可用的更新: 發布於 %s \"%s\" (%s)" 147 | UPDATE_DOWNLOAD_MANUALLY = "可以從 https://dl.yooooo.us/share/xeHentai/ 下載更新" 148 | -------------------------------------------------------------------------------- /xeHentai/proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import re 7 | import time 8 | import random 9 | from requests.exceptions import ConnectTimeout, ConnectionError, ProxyError, InvalidSchema 10 | from requests.packages.urllib3.exceptions import ProxySchemeUnknown 11 | from . import util 12 | from .const import * 13 | 14 | MAX_FAIL = 5 15 | 16 | class PoolException(Exception): 17 | pass 18 | 19 | class LowSpeedException(Exception): 20 | pass 21 | 22 | class Pool(object): 23 | def __init__(self, disable_policy = None): 24 | self.proxies = {} 25 | self.errors = {} 26 | if not disable_policy: 27 | self.disable_policy = lambda x, y: y >= MAX_FAIL 28 | else: 29 | self.disable_policy = disable_policy 30 | self.disabled = {} # key: expire 31 | 32 | def proxied_request(self, session): 33 | for d in self.disabled: 34 | if 0 < self.disabled[d] < time.time(): 35 | try: 36 | del self.disabled[d] 37 | except: 38 | pass 39 | l = [i for i in self.proxies.keys() if i not in self.disabled] 40 | if not l: 41 | raise PoolException("try to use proxy but no proxies avaliable") 42 | # _ = self.proxies[random.choice(l)] 43 | _ = self.proxies[l[0]] 44 | return _[0](session), self.not_good(l[0]) 45 | 46 | def has_available_proxies(self): 47 | return len([i for i in self.proxies.keys() if i not in self.disabled]) == 0 48 | 49 | def not_good(self, addr): 50 | def n(weight = MAX_FAIL, expire = 0): 51 | self.proxies[addr][2] += weight 52 | if self.disable_policy(*self.proxies[addr][1:]): 53 | # add to disabled set 54 | self.disabled[addr] = expire + time.time() 55 | return n 56 | 57 | def trace_proxy(self, addr, weight = 1, check_func = None, exceptions = []): 58 | def _(func): 59 | def __(*args, **kwargs): 60 | ex = None 61 | try: 62 | r = func(*args, **kwargs) 63 | except Exception as _ex: 64 | ex = _ex 65 | for e in [ConnectTimeout, ConnectionError, ProxyError] + exceptions: 66 | if isinstance(ex, e): 67 | # ignore BadStatusLine, this doesn't mean the proxy is bad 68 | if e == ConnectionError and 'BadStatusLine' in str(e): 69 | continue 70 | self.proxies[addr][2] += weight 71 | break 72 | else: 73 | if check_func and not check_func(r): 74 | self.proxies[addr][2] += weight 75 | else: 76 | # suc count + 1 77 | self.proxies[addr][1] += weight 78 | if self.disable_policy(*self.proxies[addr][1:]): 79 | # add to disabled set and never expire 80 | self.disabled[addr] = 0 81 | # print(self.proxies[addr]) 82 | if ex: 83 | # import traceback 84 | # traceback.print_exc() 85 | raise ex 86 | return r 87 | return __ 88 | return _ 89 | 90 | def add_proxy(self, addr): 91 | if re.match("socks[45][ah]*://([^:^/]+)(\:\d{1,5})*/*$", addr): 92 | p = socks_proxy(addr, self.trace_proxy) 93 | elif re.match("https*://([^:^/]+)(\:\d{1,5})*/*$", addr): 94 | p = http_proxy(addr, self.trace_proxy) 95 | elif re.match("https*://([^:^/]+)(\:\d{1,5})*/.+\.php\?.*b=.+", addr): 96 | p = glype_proxy(addr, self.trace_proxy) 97 | else: 98 | raise ValueError("%s is not an acceptable proxy address" % addr) 99 | self.proxies[addr] = [p, 0, 0] 100 | 101 | def socks_proxy(addr, trace_proxy): 102 | proxy_info = { 103 | 'http':addr, 104 | 'https':addr 105 | } 106 | def handle(session): 107 | @trace_proxy(addr, exceptions = [ProxySchemeUnknown, InvalidSchema]) 108 | def f(*args, **kwargs): 109 | kwargs.update({'proxies': proxy_info}) 110 | return session.request(*args, **kwargs) 111 | return f 112 | return handle 113 | 114 | def http_proxy(addr, trace_proxy): 115 | proxy_info = { 116 | 'http':addr, 117 | 'https':addr 118 | } 119 | def handle(session): 120 | @trace_proxy(addr) 121 | def f(*args, **kwargs): 122 | kwargs.update({'proxies': proxy_info}) 123 | return session.request(*args, **kwargs) 124 | return f 125 | return handle 126 | 127 | def glype_proxy(addr, trace_proxy): 128 | g_session = {"s":""} 129 | def handle(session, g_session = g_session): 130 | import urllib 131 | argname = re.findall('[&\?]([a-zA-Z\._]+)=[^\d]*', addr)[0] 132 | bval = re.findall('[&\?]b=(\d*)', addr) 133 | bval = bval[0] if bval else '4' 134 | server, inst_loc, script = re.findall('(https*://[^/]+)/(.*?)([^/]+\.php)', addr)[0] 135 | urlre = re.compile('/%s%s\?u=([^&"\']+)&[^"\']+' % (inst_loc, script)) 136 | def mkurl(url): 137 | return "%s/%s%s?%s=%s&b=%s&f=norefer" % ( 138 | server, inst_loc, script, argname, 139 | (urllib.parse if PY3K else urllib).quote_plus(url), bval) 140 | @trace_proxy(addr) 141 | def f(*args, **kwargs): 142 | # change url 143 | url = args[1] 144 | args = (args[0], mkurl(url),) 145 | kwargs['headers'] = dict(session.headers) 146 | # anti hotlinking 147 | kwargs['headers'].update({'Referer':"%s/%s%s" % (server, inst_loc, script)}) 148 | _coo_new = dict(g_session) if g_session['s'] else {} 149 | if 'Cookie' in kwargs['headers']: 150 | site = re.findall('https*://([^/]+)/*', url)[0] 151 | _coo_old = util.parse_cookie(kwargs['headers']['Cookie']) 152 | for k in _coo_old: 153 | _coo_new["c[%s][/][%s]" % (site, k)] = _coo_old[k] 154 | kwargs['headers']['Cookie'] = util.make_cookie(_coo_new) 155 | tried = 0 156 | while True: 157 | if tried == 2: 158 | raise PoolException("can't bypass glype https warning") 159 | rt = session.request(*args, **kwargs) 160 | if '' not in rt.text: 161 | break 162 | rt = session.request("GET", "%s/%sincludes/process.php?action=sslagree" % (server, inst_loc), 163 | allow_redirects = False, **kwargs) 164 | if rt.headers.get('set-cookie'): 165 | _coo_new.update(util.parse_cookie(rt.headers.get('set-cookie').replace(",", ";"))) 166 | kwargs['headers']['Cookie'] = util.make_cookie(_coo_new) 167 | if 's' in _coo_new: 168 | g_session["s"] = _coo_new['s'] 169 | # print(g_session) 170 | tried += 1 171 | 172 | if rt.headers.get('set-cookie'): 173 | coo = util.parse_cookie(rt.headers.get('set-cookie').replace(",", ";")) 174 | for k in list(coo.keys()): 175 | _ = re.findall('c\[[^]]+\]\[[^]]+\]\[([^]]+)\]', k) 176 | if _: 177 | coo[_[0]] = coo[k] 178 | rt.headers['set-cookie'] = util.make_cookie(coo) 179 | # change url back, only change on text/* mime types 180 | rt.url = url 181 | if rt.headers.get('content-type').startswith("text"): 182 | if PY3K: 183 | rt._content = rt._content.decode('utf-8') 184 | _ = re.match('
(.*?)
', rt.content) 185 | if _: 186 | raise PoolException("glype returns: %s" % _[0]) 187 | # change transformed url back 188 | rt._content = urlre.sub(lambda x:(urllib.parse if PY3K else urllib).unquote(x.group(1)), rt._content) 189 | if PY3K: 190 | rt._content = rt._content.encode('utf-8') 191 | return rt 192 | 193 | return f 194 | return handle 195 | 196 | if __name__ == '__main__': 197 | import requests 198 | p = Pool() 199 | p.add_proxy("sock5://127.0.0.1:16961") 200 | print(p.proxied_request(requests.Session())("GET", "http://ipip.tk", headers = {}, timeout = 2).headers) 201 | -------------------------------------------------------------------------------- /xeHentai/rpc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import re 7 | import time 8 | import json 9 | import zipfile 10 | import traceback 11 | from hashlib import md5 12 | from threading import Thread 13 | import zlib 14 | import requests 15 | import pickle 16 | from .const import * 17 | from .const import __version__ 18 | from .i18n import i18n 19 | if PY3K: 20 | from socketserver import ThreadingMixIn 21 | from http.server import HTTPServer, BaseHTTPRequestHandler 22 | from io import BytesIO as StringIO 23 | from urllib.parse import urlparse 24 | else: 25 | from SocketServer import ThreadingMixIn 26 | from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler 27 | from cStringIO import StringIO 28 | from urlparse import urlparse 29 | 30 | cmdre = re.compile("([a-z])([A-Z])") 31 | pathre = re.compile("/(?:jsonrpc|img/|zip/|static/|ui/$)") 32 | staticre = re.compile("/static/") 33 | imgpathre = re.compile("/img/") 34 | zippathre = re.compile("/zip/") 35 | 36 | version_str = "xeHentai/%s" % __version__ 37 | 38 | class RPCServer(Thread): 39 | def __init__(self, xeH, bind_addr, secret = None, open_browser = True, logger = None, exit_check = None): 40 | Thread.__init__(self, name = "rpc") 41 | Thread.setDaemon(self, True) 42 | self.xeH = xeH 43 | self.bind_addr = bind_addr 44 | self.secret = secret 45 | self.logger = logger 46 | self.server = None 47 | self.open_browser = open_browser 48 | self._exit = exit_check if exit_check else lambda x:False 49 | 50 | def run(self): 51 | try: 52 | self.server = ThreadedHTTPServer(self.bind_addr, lambda *x: Handler(self.xeH, self.secret, *x)) 53 | except Exception as ex: 54 | self.logger.error(i18n.RPC_CANNOT_BIND % traceback.format_exc()) 55 | else: 56 | self.logger.info(i18n.RPC_STARTED % (self.bind_addr[0], self.bind_addr[1])) 57 | url = "http://%s:%s/ui/#host=%s,port=%s,https=no" % ( 58 | self.bind_addr[0], self.bind_addr[1], 59 | self.bind_addr[0], self.bind_addr[1] 60 | ) 61 | if self.secret: 62 | url = url + ",token=" + self.secret 63 | if self.open_browser: 64 | import webbrowser 65 | webbrowser.open(url) 66 | else: 67 | self.logger.info(i18n.RPC_WEBUI_PATH % url) 68 | while not self._exit("rpc"): 69 | self.server.handle_request() 70 | 71 | def is_readable_obj(obj): 72 | return hasattr(obj, "read") 73 | 74 | def is_str_obj(obj): 75 | if PY3K: 76 | return isinstance(obj, str) 77 | return isinstance(obj, basestring) 78 | 79 | def hash_link(secret, url): 80 | _ = "%s-xehentai-%s" % (secret if secret else "", url) 81 | if PY3K: 82 | _ = _.encode('utf-8') 83 | return md5(_).hexdigest()[:8] 84 | 85 | def gen_thumbnail(fh, args): 86 | # returns a new file handler if resized 87 | # and a boolean indicates there'e error 88 | try: 89 | import PIL.Image as Image 90 | except: 91 | return fh, True 92 | if 'w' not in args and 'h' not in args: 93 | return fh, False 94 | size = (int(args['w']) if 'w' in args else int(args['h']), 95 | int(args['h']) if 'h' in args else int(args['w'])) 96 | if not is_readable_obj(fh): 97 | fh = StringIO(fh) 98 | with Image.open(fh) as img: 99 | img.thumbnail(size) 100 | ret_fh = StringIO() 101 | img.save(ret_fh, format=img.format) 102 | ret = ret_fh.getvalue() 103 | ret_fh.close() 104 | fh.close() 105 | return ret, False 106 | 107 | def jsonrpc_resp(request, ret = None, error_code = None, error_msg = None): 108 | r = { 109 | "id":None if not request["id"] else request["id"], 110 | "jsonrpc":"2.0", 111 | } 112 | if error_code: 113 | r['error'] = { 114 | 'code':error_code, 115 | "message":i18n.c(error_code) if not error_msg else error_msg 116 | } 117 | else: 118 | r['result'] = ret 119 | return json.dumps(r) 120 | 121 | def path_filter(func): 122 | def f(self): 123 | if not pathre.match(self.path): 124 | self.send_response(404) 125 | self.send_header("Access-Control-Allow-Origin", "*") 126 | self.end_headers() 127 | self.wfile.write(b'\n') 128 | return 129 | func(self) 130 | return f 131 | 132 | def load_cache(): 133 | if os.path.exists(STATIC_CACHE_FILE): 134 | try: 135 | with open(STATIC_CACHE_FILE, "rb") as f: 136 | r = zlib.decompress(f.read()) 137 | r = pickle.loads(r) 138 | if 'v' in r or r['v'] == STATIC_CACHE_VERSION: 139 | return r 140 | except: 141 | pass 142 | return { "v": STATIC_CACHE_VERSION } 143 | 144 | def save_cache(static_cache): 145 | r = pickle.dumps(static_cache) 146 | r = zlib.compress(r) 147 | with open(STATIC_CACHE_FILE, "wb") as f: 148 | f.write(r) 149 | 150 | static_cache = load_cache() 151 | class Handler(BaseHTTPRequestHandler): 152 | 153 | def __init__(self, xeH, secret, *args): 154 | self.secret = secret 155 | self.args = args 156 | self.xeH = xeHentaiRPCExtended(xeH, secret) 157 | self.http = requests.Session() 158 | BaseHTTPRequestHandler.__init__(self, *args) 159 | 160 | def version_string(self): 161 | return version_str 162 | 163 | def serve_file(self, f): 164 | if hasattr(self.xeH, "_monitor"): 165 | _task = self.xeH._monitor.task 166 | # needed to lock between archiver 167 | _task._f_lock.acquire() 168 | f.seek(0, os.SEEK_END) 169 | size = f.tell() 170 | self.xeH.logger.verbose("GET %s 200 %d %s" % (self.path, size, self.client_address[0])) 171 | self.send_header("Content-Length", size) 172 | f.seek(0, os.SEEK_SET) 173 | self.end_headers() 174 | while True: 175 | buf = f.read(51200) 176 | if not buf: 177 | break 178 | self.wfile.write(buf) 179 | if hasattr(self.xeH, "_monitor"): 180 | _task._f_lock.release() 181 | return size 182 | 183 | def do_OPTIONS(self): 184 | self.send_response(200) 185 | self.send_header("Access-Control-Allow-Origin", "*") 186 | self.send_header("Access-Control-Allow-Headers", "Content-Type") 187 | self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") 188 | self.send_header("Access-Control-Max-Age", "1728000") 189 | self.end_headers() 190 | self.wfile.write(b'\n') 191 | 192 | @path_filter 193 | def do_GET(self): 194 | code = 200 195 | rt = b'' 196 | mime = "text/html" 197 | path = self.path 198 | while True: 199 | if imgpathre.match(path): 200 | args = dict(q.split("=") for q in urlparse(path).query.split("&") if q) 201 | _ = urlparse(path).path.split("/") 202 | if len(_) < 5: 203 | code = 400 204 | break 205 | _, _, _hash, guid, fid = _[:5] 206 | right_hash = hash_link(self.secret, "%s/%s" % (guid, fid)) 207 | if right_hash != _hash: 208 | self.xeH.logger.warning("RPC: hash mismatch %s != %s" % (right_hash, _hash)) 209 | code = 403 210 | break 211 | path, f, mime = self.xeH._get_image_path(guid, fid) 212 | if not f or not os.path.exists(os.path.join(path, f)): 213 | zipf = "%s.zip" % path 214 | if not os.path.exists(zipf): 215 | self.xeH.logger.warning("RPC: can't find %s" % f) 216 | code = 404 217 | break 218 | else: 219 | z = zipfile.ZipFile(zipf) 220 | try: 221 | rt = z.read(f) 222 | except Exception as ex: 223 | self.xeH.logger.warning("RPC: can't find %s in zipfile: %s" % (f, ex)) 224 | code = 404 225 | break 226 | z.close() 227 | else: 228 | rt = open(os.path.join(path, f), 'rb') 229 | rt, _error = gen_thumbnail(rt, args) 230 | if _error: 231 | self.xeH.logger.warning("RPC: PIL needed for generating thumbnail") 232 | elif zippathre.match(path): 233 | # args = urlparse(_).query 234 | _ = urlparse(path).path.split("/") 235 | if len(_) < 5: 236 | code = 400 237 | break 238 | _, _, _hash, guid, fname = _[:5] 239 | fname = fname.split('?')[0] 240 | right_hash = hash_link(self.secret, "%s" % guid) 241 | if right_hash != _hash: 242 | self.xeH.logger.warning("RPC: hash mismatch %s != %s" % (right_hash, _hash)) 243 | code = 403 244 | break 245 | f = self.xeH._get_archive_path(guid) 246 | mime = 'application/zip' 247 | if not f or not os.path.exists(f): 248 | self.xeH.logger.warning("RPC: can't find %s" % f) 249 | code = 404 250 | break 251 | rt = open(f, 'rb') 252 | elif path == "/ui/" or staticre.match(path): 253 | if path == "/ui/": 254 | path = "/" 255 | while True: 256 | cache_rt = None 257 | should_clear_cache = False 258 | headers = { "User-Agent": version_str } 259 | if path in static_cache: 260 | cache_rt, mime, tm, lms = static_cache[path] 261 | if PY3K and not isinstance(cache_rt, bytes): 262 | cache_rt = bytes(cache_rt, 'ascii') 263 | if time.time() - STATIC_CACHE_TTL < tm: 264 | rt = StringIO(cache_rt) 265 | break 266 | should_clear_cache = True 267 | headers['If-Modified-Since'] = lms 268 | 269 | req_start_tm = time.time() 270 | r = None 271 | try: 272 | r = self.http.get("http://xehentai.yooooo.us%s?_=%d" %(path, time.time()), 273 | headers=headers, timeout=10) 274 | except Exception as ex: 275 | self.xeH.logger.warn("error pulling %s from remote server: %s" % (path, ex)) 276 | self.xeH.logger.verbose("%.2fs taken to pull %s from remote server %s bytes" % ( 277 | time.time() - req_start_tm, path, r and len(r.content) or 0)) 278 | if r and r.status_code == 200: 279 | rt = StringIO(r.content) 280 | mime = r.headers['Content-type'] 281 | if should_clear_cache: 282 | # clear all keys, since the js/css hash may change 283 | static_cache.clear() 284 | static_cache[path] = [r.content, mime, time.time(), r.headers['Last-Modified']] 285 | save_cache(static_cache) 286 | elif r and r.status_code == 304: 287 | # so this is tricky: if we hit /ui/ first and it's not expired 288 | # then all other assets should not expire 289 | if path == "/": 290 | for k in static_cache: 291 | if k != "v": 292 | static_cache[k][2] = time.time() 293 | save_cache(static_cache) 294 | rt = StringIO(cache_rt) 295 | elif cache_rt: 296 | self.xeH.logger.warn("serving stale cache %s" % (path)) 297 | rt = StringIO(cache_rt) 298 | else: 299 | rt = jsonrpc_resp({"id":None}, error_code = ERR_RPC_INVALID_REQUEST) 300 | break 301 | else: 302 | # fallback to rpc request 303 | rt = jsonrpc_resp({"id":None}, error_code = ERR_RPC_INVALID_REQUEST) 304 | mime = "application/json-rpc" 305 | break 306 | 307 | self.send_response(code) 308 | self.send_header("Access-Control-Allow-Origin", "*") 309 | self.send_header("Content-Type", mime) 310 | 311 | if is_readable_obj(rt): 312 | size = self.serve_file(rt) 313 | rt.close() 314 | else: 315 | self.xeH.logger.verbose("GET %s 200 %d %s" % (self.path, len(rt), self.client_address[0])) 316 | self.send_header("Content-Length", len(rt)) 317 | self.end_headers() 318 | self.wfile.write(rt) 319 | self.wfile.write(b'\n') 320 | return 321 | 322 | @path_filter 323 | def do_POST(self): 324 | _get_header = lambda h: self.headers.get_all(h)[0] if PY3K else \ 325 | self.headers.getheader(h) 326 | d = self.rfile.read(int(_get_header('Content-Length'))) 327 | code = 200 328 | rt = b'' 329 | while True: 330 | try: 331 | if PY3K: 332 | d = d.decode('utf-8') 333 | j = json.loads(d) 334 | assert('method' in j and j['method'] != None and 'id' in j) 335 | except ValueError: 336 | code = 400 337 | rte = jsonrpc_resp({"id":None}, error_code = ERR_RPC_PARSE_ERROR) 338 | break 339 | except AssertionError: 340 | code = 400 341 | rt = jsonrpc_resp({"id":None}, error_code = ERR_RPC_INVALID_REQUEST) 342 | break 343 | cmd = re.findall("xeH\.(.+)", j['method']) 344 | if not cmd: 345 | code = 404 346 | rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_METHOD_NOT_FOUND) 347 | break 348 | # let's make fooBar to foo_bar 349 | cmd_r = cmdre.sub(lambda m: "%s_%s" % (m.group(1), m.group(2).lower()), cmd[0]) 350 | if not hasattr(self.xeH, cmd_r) or cmd_r.startswith("_"): 351 | code = 404 352 | rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_METHOD_NOT_FOUND) 353 | break 354 | params = ([], {}) if 'params' not in j else j['params'] 355 | if self.secret: 356 | authorized = False 357 | while True: 358 | if len(params[0]) == 0: 359 | break 360 | secret = params[0][0] 361 | if not PY3K and isinstance(secret, unicode): 362 | secret = secret.encode('utf-8') 363 | if is_str_obj(secret) and re.findall("token:%s" % self.secret, secret): 364 | params[0].pop(0) 365 | authorized = True 366 | break 367 | if not authorized: 368 | code = 403 369 | rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_UNAUTHORIZED) 370 | break 371 | self.xeH.logger.verbose("RPC from: %s, cmd: %s, params: %s" % (self.client_address[0], cmd, params)) 372 | try: 373 | # pop out token if extra token is found 374 | if len(params[0]) > 0 and 'token:' in params[0][0]: 375 | del params[0][0] 376 | cmd_rt = getattr(self.xeH, cmd_r)(*params[0], **params[1]) 377 | except (ValueError, TypeError) as ex: 378 | self.xeH.logger.verbose("RPC exec error:\n%s" % traceback.format_exc()) 379 | code = 500 380 | rt = jsonrpc_resp({"id":j['id']}, error_code = ERR_RPC_EXEC_ERROR, 381 | error_msg = str(ex)) 382 | break 383 | if cmd_rt[0] > 0: 384 | rt = jsonrpc_resp({"id":j['id']}, error_code = cmd_rt[0], error_msg = cmd_rt[1]) 385 | else: 386 | rt = jsonrpc_resp({"id":j['id']}, ret = cmd_rt[1]) 387 | break 388 | self.send_response(code) 389 | self.send_header("Access-Control-Allow-Origin", "*") 390 | self.send_header("Content-Type", "application/json-rpc") 391 | self.send_header("Content-Length", len(rt)) 392 | self.end_headers() 393 | if PY3K: 394 | rt = rt.encode('utf-8') 395 | self.wfile.write(rt) 396 | self.wfile.write(b'\n') 397 | return 398 | 399 | 400 | def log_message(self, format, *args): 401 | return 402 | 403 | # extend xeHentai class for rpc commands 404 | class xeHentaiRPCExtended(object): 405 | def __init__(self, xeH, secret): 406 | self.xeH = xeH 407 | self.secret = secret 408 | 409 | def get_info(self): 410 | ret = {"version": self.verstr, 411 | "threads_zombie": 0, "threads_running": 0, 412 | "queue_pending": 0, "queue_finished": 0, 413 | "download_speed": 0, 414 | } 415 | if hasattr(self, '_monitor'): 416 | ret['threads_running'] = len(self._monitor.thread_last_seen) 417 | ret['threads_zombie'] = len(self._monitor.thread_zombie) 418 | if self._monitor.task.state > TASK_STATE_PAUSED and self._monitor.task.img_q: 419 | ret['queue_pending'] = self._monitor.task.img_q.qsize() 420 | ret['queue_finished'] = self._monitor.task.meta['finished'] 421 | ret['download_speed'] = self._monitor.download_speed 422 | else: 423 | ret['queue_pending'] = 0 424 | ret['queue_finished'] = 0 425 | return ERR_NO_ERROR, ret 426 | 427 | def get_config(self): 428 | rt = {k: v for k, v in self.cfg.items() if not k.startswith('rpc_') and k not in ('urls',)} 429 | return ERR_NO_ERROR, rt 430 | 431 | def update_config(self, **cfg_dict): 432 | cfg_dict = {k: v for k, v in cfg_dict.items() if not k.startswith('rpc_') and k not in ('urls',)} 433 | if 'proxy' in cfg_dict: 434 | self.xeH.update_config(**cfg_dict) 435 | return self.get_config() 436 | 437 | def list_tasks(self, level = "download"): 438 | reverse_mode = False 439 | if level.startswith('!'): 440 | reverse_mode = True 441 | level = level[1:] 442 | level = "TASK_STATE_%s" % level.upper() 443 | if level not in globals(): 444 | return ERR_TASK_LEVEL_UNDEF, None 445 | lv = globals()[level] 446 | rt = [{_k:_v for _k, _v in v.to_dict().items() if _k not in 447 | ('reload_map', 'duplicate_map', 'renamed_map', 'logger', 'img_q', 'page_q')} 448 | for _, v in self._all_tasks.items() if 449 | (reverse_mode and v.state != lv) or (not reverse_mode and v.state == lv)] 450 | return ERR_NO_ERROR, rt 451 | 452 | def _get_image_path(self, guid, fid): 453 | mime_map = { 454 | "jpg": "image/jpeg", 455 | "jepg": "image/jpeg", 456 | "png": "image/png", 457 | "gif": "image/gif", 458 | "bmp": "image/bmp", 459 | "webp": "image/webp" 460 | } 461 | if guid not in self._all_tasks: 462 | return None, None, None 463 | t = self._all_tasks[guid] 464 | fid = int(fid) 465 | if fid in t.renamed_map: 466 | f = t.renamed_map[fid] 467 | else: 468 | f = t.get_fidpad(fid) 469 | 470 | ext = os.path.splitext(f)[1].lower()[1:] 471 | if ext not in mime_map: 472 | mime = "application/octet-stream" 473 | else: 474 | mime = mime_map[ext] 475 | return t.get_fpath(), f, mime 476 | 477 | def _get_archive_path(self, guid): 478 | if guid not in self._all_tasks: 479 | return None, None 480 | t = self._all_tasks[guid] 481 | st = time.time() 482 | pth = t.make_archive(False) 483 | et = time.time() 484 | if et - st > 0.1: 485 | self.logger.warning('RPC: %.2fs taken to get archive' % (et - st)) 486 | return pth 487 | 488 | def get_image(self, guid, request_range=None): 489 | if guid not in self._all_tasks: 490 | return ERR_TASK_NOT_FOUND, None 491 | t = self._all_tasks[guid] 492 | start = 1 493 | end = t.meta['total'] + 1 494 | if request_range: 495 | request_range = str(request_range) 496 | _ = request_range.split(',') 497 | if len(_) == 1: 498 | start = int(request_range) 499 | else: 500 | start = int(_[0]) 501 | end = int(_[0]) + 1 502 | rt = [] 503 | for fid in range(start, end): 504 | if fid in t.renamed_map: 505 | f = t.renamed_map[fid] 506 | else: 507 | f = t.get_fidpad(fid) 508 | uri = "%s/%s" % (t.guid, fid) 509 | rt.append('/img/%s/%s/%s' % (hash_link(self.secret, uri), uri, f)) 510 | return ERR_NO_ERROR, rt 511 | 512 | 513 | def __getattr__(self, k): 514 | # fallback attribute handler 515 | return getattr(self.xeH, k) 516 | 517 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): 518 | """Handle requests in a separate thread.""" 519 | pass 520 | -------------------------------------------------------------------------------- /xeHentai/task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import os 7 | import re 8 | import copy 9 | import json 10 | import uuid 11 | import shutil 12 | import zipfile 13 | import tempfile 14 | from threading import RLock 15 | from . import util 16 | from .const import * 17 | from .const import __version__ 18 | from .util.logger import safestr 19 | if PY3K: 20 | from queue import Queue, Empty 21 | else: 22 | from Queue import Queue, Empty 23 | 24 | class Task(object): 25 | def __init__(self, url, cfgdict, logger): 26 | self.url = url 27 | if url: 28 | _ = RE_INDEX.findall(url) 29 | if _: 30 | self.gid, self.sethash = _[0] 31 | self.failcode = 0 32 | self.state = TASK_STATE_WAITING 33 | self.guid = str(uuid.uuid4())[:8] 34 | self.config = cfgdict 35 | self.meta = {} 36 | self.reload_map = {} # {img_hash:reload_url} 37 | self.duplicate_map = {} # map fid to duplicate file ids, {id:(id1, id2, )} 38 | self.renamed_map = {} # map fid to renamed file name, used in finding a file by id in RPC 39 | self.img_q = None 40 | self.page_q = None 41 | self._flist_done = set() # store id, don't save, will generate when scan 42 | self._monitor = None 43 | self._cnt_lock = RLock() 44 | self._f_lock = RLock() 45 | 46 | self.logger = logger 47 | 48 | def cleanup(self, before_delete=False): 49 | if before_delete: 50 | if 'delete_task_files' in self.config and self.config['delete_task_files'] and \ 51 | 'title' in self.meta: # maybe it's a error task and meta is empty 52 | fpath = self.get_fpath() 53 | # TODO: ascii can't decode? locale not enus, also check save_file 54 | if os.path.exists(fpath): 55 | shutil.rmtree(fpath) 56 | zippath = "%s.zip" % fpath 57 | if os.path.exists(zippath): 58 | os.remove(zippath) 59 | elif self.state in (TASK_STATE_FINISHED, TASK_STATE_FAILED): 60 | self.img_q = None 61 | self.page_q = None 62 | self.reload_map = {} 63 | 64 | # if 'filelist' in self.meta: 65 | # del self.meta['filelist'] 66 | # if 'resampled' in self.meta: 67 | # del self.meta['resampled'] 68 | 69 | def set_fail(self, code): 70 | self.state = TASK_STATE_FAILED 71 | self.failcode = code 72 | # cleanup all we cached 73 | self.meta = {} 74 | 75 | def migrate_exhentai(self): 76 | _ = re.findall("(?:https*://[g\.]*e\-hentai\.org)(.+)", self.url) 77 | if not _: 78 | return False 79 | self.url = "https://exhentai.org%s" % _[0] 80 | self.state = TASK_STATE_WAITING if self.state == TASK_STATE_FAILED else self.state 81 | self.failcode = 0 82 | return True 83 | 84 | def mpv_url(self): 85 | return re.sub( 86 | "/./%s/%s" % (self.gid, self.sethash), 87 | "/mpv/%s/%s" % (self.gid, self.sethash), 88 | self.url 89 | ) 90 | 91 | def update_meta(self, meta): 92 | self.meta.update(meta) 93 | if self.config['jpn_title'] and self.meta['gjname']: 94 | self.meta['title'] = self.meta['gjname'] 95 | else: 96 | self.meta['title'] = self.meta['gnname'] 97 | 98 | # def guess_ori(self): 99 | # # guess if this gallery has resampled files depending on some sample hashes 100 | # # return True if it's ori 101 | # if 'sample_hash' not in self.meta: 102 | # return 103 | # all_keys = map(lambda x:x[:10], self.meta['filelist'].keys()) 104 | # for h in self.meta['sample_hash']: 105 | # if h not in all_keys: 106 | # self.has_ori = True 107 | # break 108 | # del self.meta['sample_hash'] 109 | 110 | def base_url(self): 111 | return re.findall(RESTR_SITE, self.url)[0] 112 | 113 | # def get_picpage_url(self, pichash): 114 | # # if file resized, this url not works 115 | # # http://%s.org/s/hash_s/gid-picid' 116 | # return "%s/s/%s/%s-%s" % ( 117 | # self.base_url(), pichash[:10], self.gid, self.meta['filelist'][pichash][0] 118 | # ) 119 | 120 | def put_img_queue(self, imgurl, reload_url, fname): 121 | if self.config['download_ori']: 122 | # fullimg.php doesn't have hash in imgurl 123 | img_hash = RE_GALLERY.findall(reload_url)[0][0] 124 | else: 125 | img_hash = self.get_imghash(imgurl) 126 | this_fid = int(RE_GALLERY.findall(reload_url)[0][1]) 127 | self.renamed_map[this_fid] = fname 128 | # if same file occurs severl times in a gallery 129 | while img_hash in self.reload_map: 130 | fpath = self.get_fpath() 131 | old_fid = self.get_fname(img_hash)[0] 132 | old_f = os.path.join(fpath, self.get_fidpad(old_fid)) 133 | this_f = os.path.join(fpath, self.get_fidpad(this_fid)) 134 | self._f_lock.acquire() 135 | # if we are equal to ourself, download as usual 136 | if this_fid == old_fid: 137 | break 138 | self.logger.debug("#%s is a duplicate of #%s" % (this_fid, old_fid)) 139 | if os.path.exists(old_f): 140 | # we can just copy old file if already downloaded 141 | try: 142 | shutil.copyfile(old_f, this_f) 143 | except Exception as ex: 144 | self._f_lock.release() 145 | raise ex 146 | else: 147 | self._f_lock.release() 148 | self.set_fid_finished(this_fid) 149 | self.logger.debug("#%s is copied from #%s" % (this_fid, old_fid)) 150 | else: 151 | # if not downloaded, we will copy them in save_file 152 | if old_fid not in self.duplicate_map: 153 | self.duplicate_map[old_fid] = set() 154 | self.duplicate_map[old_fid].add(this_fid) 155 | self._f_lock.release() 156 | self.logger.debug("#%s is pending copy from #%s" % (this_fid, old_fid)) 157 | return 158 | 159 | self.reload_map[img_hash] = [reload_url, fname] 160 | self.img_q.put(imgurl) 161 | 162 | def put_page_queue_retry(self, redirect_url): 163 | if not redirect_url: 164 | return 165 | if "redirect=" in redirect_url: 166 | page_url = re.findall("redirect=(.+)", redirect_url)[0] 167 | img_hash = RE_GALLERY.findall(page_url)[0][0] 168 | else: 169 | img_hash = self.get_imghash(redirect_url) 170 | url = self.reload_map.pop(img_hash)[0] 171 | self.page_q.put(url) 172 | 173 | def scan_downloaded(self, scaled = True): 174 | fpath = self.get_fpath() 175 | donefile = False 176 | if os.path.exists(os.path.join(fpath, ".xehdone")) or os.path.exists("%s.zip" % fpath): 177 | donefile = True 178 | _range_idx = 0 179 | for fid in range(1, self.meta['total'] + 1): 180 | # check download range 181 | if self.config['download_range']: 182 | _found = False 183 | # download_range is sorted asc 184 | for start, end in self.config['download_range'][_range_idx:]: 185 | if fid > end: # out of range right bound move to next range 186 | _range_idx += 1 187 | elif start <= fid <= end: # in range 188 | _found = True 189 | break 190 | elif fid < start: # out of range left bound 191 | break 192 | if not _found: 193 | self._flist_done.add(int(fid)) 194 | continue 195 | # can only check un-renamed files 196 | fname = os.path.join(fpath, self.get_fidpad(fid)) # id 197 | if donefile: 198 | self._flist_done.add(int(fid)) 199 | elif os.path.exists(fname): 200 | if os.stat(fname).st_size == 0: 201 | os.remove(fname) 202 | else: 203 | self._flist_done.add(int(fid)) 204 | self.meta['finished'] = len(self._flist_done) 205 | if self.meta['finished'] == self.meta['total']: 206 | self.state == TASK_STATE_FINISHED 207 | 208 | def put_page_queue(self, url): 209 | # if url is not finished, call callback to put into queue 210 | # type 1: normal file; type 2: resampled url 211 | # if pichash: 212 | # fid = int(self.meta['filelist'][pichash][0]) 213 | # if fid not in self._flist_done: 214 | # callback(self.get_picpage_url(pichash)) 215 | # elif url: 216 | fhash, fid = RE_GALLERY.findall(url)[0] 217 | # if fhash not in self.meta['filelist']: 218 | # self.meta['resampled'][fhash] = int(fid) 219 | # self.has_ori = True] 220 | if int(fid) not in self._flist_done: 221 | self.page_q.put(url) 222 | 223 | def save_file(self, imgurl, redirect_url, binary_iter): 224 | # TODO: Rlock for finished += 1 225 | fpath = self.get_fpath() 226 | self._f_lock.acquire() 227 | if not os.path.exists(fpath): 228 | os.mkdir(fpath) 229 | # use redirect_url, fullimg.php doen't have hash in imgurl 230 | img_hash = self.get_imghash(redirect_url) 231 | self._f_lock.release() 232 | fid, fname = self.get_fname(img_hash) 233 | _ = re.findall("/([^/\?]+)(?:\?|$)", redirect_url) 234 | if _: # change it if it's a full image 235 | fname = _[0] 236 | self.reload_map[img_hash][1] = fname 237 | 238 | fn = os.path.join(fpath, self.get_fidpad(int(fid))) 239 | if os.path.exists(fn) and os.stat(fn).st_size > 0: 240 | return fn 241 | # create a femp file first 242 | # we don't need _f_lock because this will not be in a sequence 243 | # and we can't do that otherwise we are breaking the multi threading 244 | fd_tmp, fn_tmp = tempfile.mkstemp(prefix="xehentai-") 245 | os.close(fd_tmp) 246 | try: 247 | with open(fn_tmp, "wb") as f: 248 | for binary in binary_iter(): 249 | if self._monitor._exit(None): 250 | raise DownloadAbortedException() 251 | f.write(binary) 252 | except DownloadAbortedException as ex: 253 | try: 254 | os.unlink(fn_tmp) 255 | except: 256 | pass 257 | return 258 | 259 | self._f_lock.acquire() 260 | try: 261 | try: 262 | shutil.move(fn_tmp, fn) 263 | except WindowsError as ex: 264 | # file is used by another process 265 | # do a copy and delete, WindowsError[32] 266 | if ex.errno == 13: 267 | shutil.copy(fn_tmp, fn) 268 | try: 269 | os.unlink(fn_tmp) 270 | except: 271 | pass 272 | else: 273 | raise ex 274 | self.set_fid_finished(fid) 275 | if fid in self.duplicate_map: 276 | for fid_rep in self.duplicate_map[fid]: 277 | # if a file download is interrupted, it will appear in self.filehash_map as well 278 | if fid_rep == fid: 279 | continue 280 | fn_rep = os.path.join(fpath, self.get_fidpad(fid_rep)) 281 | shutil.copyfile(fn, fn_rep) 282 | self.set_fid_finished(fid_rep) 283 | self.logger.debug("#%s is copied from #%s in save_file" % (fid_rep, fid)) 284 | del self.duplicate_map[fid] 285 | except Exception as ex: 286 | self._f_lock.release() 287 | raise ex 288 | self._f_lock.release() 289 | return True 290 | 291 | def set_fid_finished(self, fid): 292 | self._cnt_lock.acquire() 293 | self._flist_done.add(fid) 294 | self.meta['finished'] = len(self._flist_done) 295 | self._cnt_lock.release() 296 | 297 | def get_fid_unfinished(self): 298 | unfinished = [] 299 | for i in range(1, self.meta['total']): 300 | if i not in self._flist_done: 301 | unfinished.append(i) 302 | return unfinished 303 | 304 | def get_imghash(self, imgurl_with_hash): 305 | # only get first 10 bytes of hash 306 | # so we can use same key in both normal image (from imgurl, full hash) 307 | # and original image (from gallery url/redirect url, short hash) 308 | return RE_IMGHASH.findall(imgurl_with_hash)[0][0][:10] 309 | 310 | def get_imgfid(self, imgurl): 311 | if RE_IMGHASH.findall(imgurl): 312 | return self.get_fname(self.get_imghash(imgurl))[0] 313 | # else is fullimg url 314 | return int(re.findall("fullimg/\d+/(\d+)", imgurl)[0]) 315 | 316 | def get_fname(self, img_hash): 317 | pageurl, fname = self.reload_map[img_hash] 318 | _, fid = RE_GALLERY.findall(pageurl)[0] 319 | return int(fid), fname 320 | 321 | def get_fpath(self): 322 | return os.path.join(self.config['dir'], util.legalpath(self.meta['title'])) 323 | 324 | def get_fidpad(self, fid, ext = 'jpg'): 325 | fid = int(fid) 326 | _ = "%%0%dd.%%s" % (len(str(self.meta['total']))) 327 | return _ % (fid, ext) 328 | 329 | def rename_fname(self): 330 | fpath = self.get_fpath() 331 | tmppath = os.path.join(fpath, RENAME_TMPDIR) 332 | cnt = 0 333 | error_list = [] 334 | # we need to track renamed fid's to decide 335 | # whether to rename into a temp filename or add (1) 336 | # only need it when rename_ori = True 337 | done_list = set() 338 | for fid in list(self.renamed_map.keys()): 339 | fname = self.renamed_map[fid] 340 | original_ext = os.path.splitext(fname)[1] 341 | if original_ext== "": 342 | original_ext = os.path.splitext(fname)[0] 343 | # if we don't need to rename to original name and file type matches 344 | if not self.config['rename_ori'] and original_ext.lower() == '.jpg': 345 | continue 346 | fname_ori = os.path.join(fpath, self.get_fidpad(fid)) # id 347 | if self.config['rename_ori']: 348 | if os.path.exists(os.path.join(tmppath, self.get_fidpad(fid))): 349 | # if we previously put it into a temporary folder, we need to change fname_ori 350 | fname_ori = os.path.join(tmppath, self.get_fidpad(fid)) 351 | fname_to = os.path.join(fpath, util.legalpath(fname)) 352 | else: 353 | # Q: Why we don't just use id.ext when saving files instead of using 354 | # id.jpg? 355 | # A: If former task doesn't download all files, a new task with same gallery 356 | # will have zero knowledge about file type before scanning all per page, 357 | # thus can't determine if this id is downloaded, because file type is not 358 | # necessarily .jpg 359 | fname_to = os.path.join(fpath, self.get_fidpad(fid, original_ext[1:])) 360 | while fname_ori != fname_to: 361 | if os.path.exists(fname_ori): 362 | while os.path.exists(fname_to): 363 | _base, _ext = os.path.splitext(fname_to) 364 | _ = re.findall("\((\d+)\)$", _base) 365 | if self.config['rename_ori'] and fname_to not in done_list: 366 | # if our auto numbering conflicts with original naming 367 | # we move it into a temporary folder 368 | # It's safe since this file is same with one of our auto numbering filename, 369 | # it could never be conflicted with other files in tmppath 370 | if not os.path.exists(tmppath): 371 | os.mkdir(tmppath) 372 | os.rename(fname_to, os.path.join(tmppath, os.path.split(fname_to)[1])) 373 | break 374 | if _ :# if ...(1) exists, use ...(2) 375 | print(safestr(_base)) 376 | _base = re.sub("\((\d+)\)$", _base, lambda x:"(%d)" % (int(x.group(1)) + 1)) 377 | else: 378 | _base = "%s(1)" % _base 379 | fname_to = "".join((_base, _ext)) 380 | try: 381 | os.rename(fname_ori, fname_to) 382 | self.renamed_map[str(fid)] = os.path.split(fname_to)[1] 383 | except Exception as ex: 384 | error_list.append((os.path.split(fname_ori)[1], os.path.split(fname_to)[1], str(ex))) 385 | break 386 | if self.config['rename_ori']: 387 | done_list.add(fname_to) 388 | break 389 | cnt += 1 390 | if cnt == self.meta['total']: 391 | with open(os.path.join(fpath, ".xehdone"), "w"): 392 | pass 393 | try: 394 | os.rmdir(tmppath) 395 | except: # we will leave it undeleted if it's not empty 396 | pass 397 | return error_list 398 | 399 | def make_archive(self, remove=True): 400 | # needed to lock between RPC get_img 401 | self._f_lock.acquire() 402 | dpath = self.get_fpath() 403 | arc = "%s.zip" % dpath 404 | if os.path.exists(arc): 405 | return arc 406 | with zipfile.ZipFile(arc, 'w', allowZip64=True) as zipFile: 407 | zipFile.comment = ("xeHentai Archiver v%s\nTitle:%s\nOriginal URL:%s" % ( 408 | __version__, self.meta['title'], self.url)).encode('utf-8') 409 | for f in sorted(os.listdir(dpath)): 410 | fullpath = os.path.join(dpath, f) 411 | zipFile.write(fullpath, f, zipfile.ZIP_STORED) 412 | if remove: 413 | shutil.rmtree(dpath) 414 | self._f_lock.release() 415 | return arc 416 | 417 | def from_dict(self, j): 418 | for k in self.__dict__: 419 | if k not in j: 420 | continue 421 | if k == "logger": 422 | continue 423 | if k.endswith('_q') and j[k]: 424 | setattr(self, k, Queue()) 425 | [getattr(self, k).put(e, False) for e in j[k]] 426 | else: 427 | setattr(self, k, j[k]) 428 | _ = RE_INDEX.findall(self.url) 429 | if _: 430 | self.gid, self.sethash = _[0] 431 | return self 432 | 433 | 434 | def to_dict(self): 435 | d = dict({k:v for k, v in self.__dict__.items() 436 | if not k.endswith('_q') and not k.startswith("_") and k != "logger"}) 437 | for k in ['img_q', 'page_q']: 438 | if getattr(self, k): 439 | d[k] = [e for e in getattr(self, k).queue] 440 | return d 441 | -------------------------------------------------------------------------------- /xeHentai/updater/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # Contributor: 3 | # fffonion 4 | 5 | class Updater(object): 6 | def get_latest_release(self, dev=False): 7 | raise NotImplementedError("get_latest_release not implemented") 8 | 9 | def get_src_path_in_archive(self, info): 10 | raise NotImplementedError("get_src_path_in_archive not implemented") 11 | 12 | class UpdateInfo(object): 13 | def __init__(self, update_id, download_link, ts, message): 14 | self.update_id = update_id 15 | self.download_link = download_link 16 | self.message = message 17 | self.ts = ts 18 | 19 | -------------------------------------------------------------------------------- /xeHentai/updater/github.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # Contributor: 3 | # fffonion 4 | 5 | import requests 6 | import time 7 | 8 | from . import Updater, UpdateInfo 9 | 10 | class GithubUpdaterException(Exception): 11 | pass 12 | 13 | class GithubUpdater(Updater): 14 | def __init__(self, session): 15 | self.session = session 16 | 17 | def get_latest_release(self, dev=False): 18 | param = dev and "dev" or "master" 19 | r = self.session.get("https://api.github.com/repos/fffonion/xeHentai/commits?sha=%s" % param) 20 | commit = r.json() 21 | if r.status_code != 200 or not commit: 22 | raise GithubUpdaterException("Failed to get latest release info: %s" % r.text) 23 | commit = commit[0] 24 | sha = commit["sha"] 25 | url = "https://github.com/fffonion/xeHentai/archive/%s.zip" % sha 26 | 27 | return UpdateInfo( 28 | sha, 29 | url, 30 | commit["commit"]["author"]["date"], 31 | commit["commit"]["message"].replace("\r", " ").replace("\n", " "), 32 | ) 33 | 34 | def get_src_path_in_archive(self, info): 35 | return "xeHentai-%s/xeHentai" % info.update_id 36 | -------------------------------------------------------------------------------- /xeHentai/updater/updater.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # Contributor: 3 | # fffonion 4 | 5 | import os 6 | import requests 7 | import zipfile 8 | import json 9 | from ..i18n import i18n 10 | from ..util import logger 11 | from ..const import * 12 | from .. import const 13 | from .github import GithubUpdater 14 | from . import UpdateInfo 15 | 16 | if PY3K: 17 | from io import BytesIO as StringIO 18 | else: 19 | from cStringIO import StringIO 20 | 21 | def check_update(l=None, config={}): 22 | if not l: 23 | l = logger.Logger() 24 | dev = "update_beta_channel" in config and config["update_beta_channel"] 25 | download_update = "auto_update" in config and config["auto_update"] == "download" 26 | l.debug(i18n.UPDATE_CHANNEL % (dev and i18n.UPDATE_DEV_CHANNEL or i18n.UPDATE_RELEASE_CHANNEL)) 27 | s = requests.Session() 28 | g = GithubUpdater(s) 29 | try: 30 | info = g.get_latest_release(dev) 31 | if hasattr(const, "VERSION_UPDATE") and VERSION_UPDATE == info.update_id: 32 | l.debug(i18n.UPDATE_NO_UPDATE) 33 | return 34 | l.info(i18n.UPDATE_AVAILABLE % (info.ts, info.message, info.update_id)) 35 | if not download_update: 36 | l.info(i18n.UPDATE_DOWNLOAD_MANUALLY) 37 | return 38 | resp = s.get(info.download_link) 39 | z = resp.content 40 | with zipfile.ZipFile(StringIO(z)) as zf: 41 | make_src_update_file(zf, g.get_src_path_in_archive(info), info) 42 | l.info(i18n.UPDATE_COMPLETE) 43 | except Exception as ex: 44 | l.warn(i18n.UPDATE_FAILED % str(ex)) 45 | 46 | 47 | def make_src_update_file(infile, path, info): 48 | if not path.endswith("/"): 49 | path += "/" 50 | 51 | with zipfile.ZipFile(SRC_UPDATE_FILE, "w") as z: 52 | z.writestr( 53 | "info.json", 54 | json.dumps({ 55 | "v": SRC_UPDATE_VERSION, 56 | "update_id": info.update_id, 57 | }), 58 | zipfile.ZIP_STORED, 59 | ) 60 | 61 | for f in infile.namelist(): 62 | if f.startswith(path) and not f.endswith("/"): 63 | z.writestr("xeHentai/%s" % f[len(path):], infile.read(f), zipfile.ZIP_STORED) -------------------------------------------------------------------------------- /xeHentai/util/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import os 7 | import re 8 | import sys 9 | import uuid 10 | import random 11 | 12 | from ..const import * 13 | 14 | if os.name == 'nt': 15 | filename_filter = re.compile("[|:?\\/*'\"<>]|\.+(?:$)") 16 | else:# assume posix 17 | filename_filter = re.compile("[\/:]") 18 | 19 | if PY3K: 20 | unichr = chr 21 | 22 | def parse_cookie(coostr): 23 | ret = {} 24 | for coo in coostr.split(";"): 25 | coo = coo.strip() 26 | if coo.lower() in ('secure', 'httponly'): 27 | continue 28 | _ = coo.split("=") 29 | k = _[0] 30 | v = "=".join(_[1:]) 31 | if k.lower() in ('path', 'expires', 'domain', 'max-age', 'comment'): 32 | continue 33 | ret[k] = v 34 | return ret 35 | 36 | def make_cookie(coodict): 37 | return ";".join(map("=".join, coodict.items())) 38 | 39 | def make_ua(): 40 | rrange = lambda a, b, c = 1: c == 1 and random.randrange(a, b) or int(1.0 * random.randrange(a * c, b * c) / c) 41 | ua = 'Mozilla/%d.0 (Windows NT %d.%d) AppleWebKit/%d (KHTML, like Gecko) Chrome/%d.%d Safari/%d' % ( 42 | rrange(4, 7, 10), rrange(5, 7), rrange(0, 3), rrange(535, 538, 10), 43 | rrange(21, 27, 10), rrange(0, 9999, 10), rrange(535, 538, 10) 44 | ) 45 | 46 | def get_proxy_policy(cfg): 47 | if cfg['proxy_image_only']: 48 | return RE_URL_IMAGE 49 | if cfg['proxy_image']: 50 | return RE_URL_ALL 51 | return RE_URL_WEBPAGE 52 | 53 | def parse_human_time(s): 54 | rt = 0 55 | day = re.findall('(\d+)\sdays*', s) 56 | if day: 57 | rt += 86400 * int(day[0]) 58 | hour = re.findall('(\d+)\shours*', s) 59 | if hour: 60 | rt += 3600 * int(hour[0]) 61 | minute = re.findall('(\d+)\sminutes*', s) 62 | if minute: 63 | rt += 60 * int(minute[0]) 64 | else: 65 | rt += 60 66 | return rt 67 | 68 | def htmlescape(s): 69 | def replc(match): 70 | #print match.group(0),match.group(1),match.group(2) 71 | dict={'amp':'&','nbsp':' ','quot':'"','lt':'<','gt':'>','copy':'©','reg':'®'} 72 | #dict+={'∀':'forall','∂':'part','∃':'exist','∅':'empty','∇':'nabla','∈':'isin','∉':'notin','∋':'ni','∏':'prod','∑':'sum','−':'minus','∗':'lowast','√':'radic','∝':'prop','∞':'infin','∠':'ang','∧':'and','∨':'or','∩':'cap','∪':'cup','∫':'int','∴':'there4','∼':'sim','≅':'cong','≈':'asymp','≠':'ne','≡':'equiv','≤':'le','≥':'ge','⊂':'sub','⊃':'sup','⊄':'nsub','⊆':'sube','⊇':'supe','⊕':'oplus','⊗':'otimes','⊥':'perp','⋅':'sdot','Α':'Alpha','Β':'Beta','Γ':'Gamma','Δ':'Delta','Ε':'Epsilon','Ζ':'Zeta','Η':'Eta','Θ':'Theta','Ι':'Iota','Κ':'Kappa','Λ':'Lambda','Μ':'Mu','Ν':'Nu','Ξ':'Xi','Ο':'Omicron','Π':'Pi','Ρ':'Rho','Σ':'Sigma','Τ':'Tau','Υ':'Upsilon','Φ':'Phi','Χ':'Chi','Ψ':'Psi','Ω':'Omega','α':'alpha','β':'beta','γ':'gamma','δ':'delta','ε':'epsilon','ζ':'zeta','η':'eta','θ':'theta','ι':'iota','κ':'kappa','λ':'lambda','μ':'mu','ν':'nu','ξ':'xi','ο':'omicron','π':'pi','ρ':'rho','ς':'sigmaf','σ':'sigma','τ':'tau','υ':'upsilon','φ':'phi','χ':'chi','ψ':'psi','ω':'omega','ϑ':'thetasym','ϒ':'upsih','ϖ':'piv','Œ':'OElig','œ':'oelig','Š':'Scaron','š':'scaron','Ÿ':'Yuml','ƒ':'fnof','ˆ':'circ','˜':'tilde',' ':'ensp',' ':'emsp',' ':'thinsp','‌':'zwnj','‍':'zwj','‎':'lrm','‏':'rlm','–':'ndash','—':'mdash','‘':'lsquo','’':'rsquo','‚':'sbquo','“':'ldquo','”':'rdquo','„':'bdquo','†':'dagger','‡':'Dagger','•':'bull','…':'hellip','‰':'permil','′':'prime','″':'Prime','‹':'lsaquo','›':'rsaquo','‾':'oline','€':'euro','™':'trade','←':'larr','↑':'uarr','→':'rarr','↓':'darr','↔':'harr','↵':'crarr','⌈':'lceil','⌉':'rceil','⌊':'lfloor','⌋':'rfloor','◊':'loz','♠':'spades','♣':'clubs','♥':'hearts','♦':'diams'} 73 | if len(match.groups()) >= 2: 74 | if match.group(1) == '#': 75 | return unichr(int(match.group(2))) 76 | else: 77 | return dict.get(match.group(2), '?') 78 | htmlre = re.compile("&(#?)(\d{1,5}|\w{1,8}|[a-z]+);") 79 | return htmlre.sub(replc, s) 80 | 81 | def legalpath(s): 82 | sanitized = filename_filter.sub(lambda x:"", s) 83 | # windows doesn't like trailing while spaces 84 | if os.name == 'nt': 85 | sanitized = sanitized.rstrip() 86 | return sanitized 87 | 88 | MAXINT = 9223372036854775807 89 | def human_size(t): 90 | if t >= MAXINT: 91 | return "UNL." 92 | for prefix in ("B", "KB", "MB", "GB", "TB"): 93 | if t <= 1000: 94 | return "%s %s" % (("%.2f" % t).rstrip("0").rstrip("."), prefix) 95 | t /= 1024.0 96 | return "%.2f TB" % t 97 | -------------------------------------------------------------------------------- /xeHentai/util/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import os 7 | import sys 8 | import datetime 9 | import locale 10 | import logging 11 | from threading import RLock 12 | #import logging.handlers 13 | from ..const import * 14 | 15 | class tz_GMT8(datetime.tzinfo): 16 | def utcoffset(self, dt): 17 | return datetime.timedelta(hours = 8) 18 | def dst(self, dt): 19 | return datetime.timedelta(0) 20 | 21 | 22 | def safestr(s): 23 | if (PY3K and isinstance(s, bytes)) or (not PY3K and not isinstance(s, unicode)): 24 | s = s.decode("utf-8") 25 | if PY3K: 26 | # python<=3.5 hack 27 | if sys.version_info.minor <= 5: 28 | return s \ 29 | .encode(locale.getdefaultlocale()[1] or 'utf-8', 'replace') \ 30 | .decode(locale.getdefaultlocale()[1] or 'utf-8', 'replace') 31 | return s 32 | return s.encode(locale.getdefaultlocale()[1] or 'utf-8', 'replace') 33 | #return _.decode('utf-8') if PY3K else _ 34 | 35 | if os.name == 'nt': 36 | endl = '\r\n' 37 | else:# assume posix 38 | endl = '\n' 39 | 40 | class Logger(object): 41 | # paste from goagent 42 | CRITICAL = 5 43 | FATAL = CRITICAL 44 | ERROR = 4 45 | WARNING = 3 46 | WARN = WARNING 47 | INFO = 2 48 | DEBUG = 1 49 | VERBOSE = 0 50 | def __init__(self, *args, **kwargs): 51 | # self.level = self.__class__.INFO 52 | self.logf = None 53 | self.__write = __write = lambda x: sys.stdout.write(safestr(x)) 54 | self.isatty = getattr(sys.stdout, 'isatty', lambda: False)() 55 | self.__set_error_color = lambda: None 56 | self.__set_warning_color = lambda: None 57 | self.__set_debug_color = lambda: None 58 | self.__set_verbose_color = lambda: None 59 | self.__reset_color = lambda: None 60 | if self.isatty: 61 | if os.name == 'nt': 62 | self._nt_color_lock = RLock() 63 | import ctypes 64 | SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute 65 | GetStdHandle = ctypes.windll.kernel32.GetStdHandle 66 | self.__set_error_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x0C)) 67 | self.__set_warning_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x06)) 68 | self.__set_debug_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x02)) 69 | self.__set_verbose_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x08)) 70 | self.__set_bright_color = lambda: (self._nt_color_lock.acquire(), SetConsoleTextAttribute(GetStdHandle(-11), 0x0F)) 71 | self.__reset_color = lambda: (SetConsoleTextAttribute(GetStdHandle(-11), 0x07), self._nt_color_lock.release()) 72 | elif os.name == 'posix': 73 | self.__set_error_color = lambda: __write('\033[31m') 74 | self.__set_warning_color = lambda: __write('\033[33m') 75 | self.__set_debug_color = lambda: __write('\033[32m') 76 | self.__set_verbose_color = lambda: __write('\033[36m') 77 | self.__set_bright_color = lambda: __write('\033[32m') 78 | self.__reset_color = lambda: __write('\033[0m') 79 | 80 | 81 | @classmethod 82 | def getLogger(cls, *args, **kwargs): 83 | return cls(*args, **kwargs) 84 | 85 | def cleanup(self): 86 | if self.logf: 87 | _ = self.logf 88 | self.logf = None 89 | _.close() 90 | 91 | def set_logfile(self, fpath): 92 | if self.logf: 93 | self.logf.close() 94 | self.logf = open(fpath, "ab") 95 | 96 | def set_level(self, level): 97 | f = ('verbose', 'debug', 'info') 98 | lv = min(max(level, 0), 3) 99 | for p in range(lv): 100 | setattr(self, f[p], self.dummy) 101 | 102 | def log(self, level, fmt, *args, **kwargs): 103 | # fmt=du8(fmt) 104 | try: 105 | try: 106 | self.__write('%-4s - [%s] %s\n' % (level, datetime.datetime.now(tz_GMT8()).strftime('%X'), fmt % args)) 107 | except (ValueError, TypeError): 108 | fmt = fmt.replace('%','%%') 109 | self.__write('%-4s - [%s] %s\n' % (level, datetime.datetime.now(tz_GMT8()).strftime('%X'), fmt % args)) 110 | except IOError: # fix for Windows console 111 | pass 112 | sys.stdout.flush() 113 | if self.logf: 114 | _ = ('[%s] %s%s' % (datetime.datetime.now(tz_GMT8()).strftime('%b %d %X'), fmt % args, endl)) 115 | self.logf.write(_.encode("utf-8", 'replace')) 116 | 117 | def dummy(self, *args, **kwargs): 118 | pass 119 | 120 | def debug(self, fmt, *args, **kwargs): 121 | self.__set_debug_color() 122 | self.log('DEBG', fmt, *args, **kwargs) 123 | self.__reset_color() 124 | 125 | def info(self, fmt, *args, **kwargs): 126 | puretext = self.log('INFO', fmt, *args) 127 | # if self.logfile: 128 | # self.logfile.write(puretext) 129 | 130 | def verbose(self, fmt, *args, **kwargs): 131 | self.__set_verbose_color() 132 | self.log('VERB', fmt, *args, **kwargs) 133 | self.__reset_color() 134 | 135 | def warning(self, fmt, *args, **kwargs): 136 | self.__set_warning_color() 137 | self.log('WARN', fmt, *args, **kwargs) 138 | self.__reset_color() 139 | 140 | def warn(self, fmt, *args, **kwargs): 141 | self.warning(fmt, *args, **kwargs) 142 | 143 | def error(self, fmt, *args, **kwargs): 144 | self.__set_error_color() 145 | self.log('ERROR', fmt, *args, **kwargs) 146 | self.__reset_color() 147 | 148 | def exception(self, fmt, *args, **kwargs): 149 | self.error(fmt, *args, **kwargs) 150 | traceback.print_exc(file = sys.stderr) 151 | 152 | def critical(self, fmt, *args, **kwargs): 153 | self.__set_error_color() 154 | self.log('CRITICAL', fmt, *args, **kwargs) 155 | self.__reset_color() 156 | -------------------------------------------------------------------------------- /xeHentai/worker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | # Contributor: 4 | # fffonion 5 | 6 | import re 7 | import math 8 | import time 9 | import random 10 | import requests 11 | from requests.adapters import HTTPAdapter 12 | import traceback 13 | from threading import Thread, RLock 14 | from . import util 15 | from .const import * 16 | from .i18n import i18n 17 | from .proxy import PoolException, LowSpeedException 18 | if PY3K: 19 | from queue import Queue, Empty 20 | from urllib.parse import urlparse, urlunparse 21 | else: 22 | from Queue import Queue, Empty 23 | from urlparse import urlparse, urlunparse 24 | 25 | # pinfo = {'http':'socks5://127.0.0.1:16963', 'https':'socks5://127.0.0.1:16963'} 26 | 27 | class _FakeResponse(object): 28 | def __init__(self, url): 29 | self.status_code = 600 30 | self.content = None 31 | self.url = self._real_url = url 32 | self.headers = {} 33 | 34 | class FallbackIpAdapter(HTTPAdapter): 35 | def __init__(self, ip_map=FALLBACK_IP_MAP, **kwargs): 36 | self.ip_map = ip_map 37 | kwargs.update({'max_retries': 1}) 38 | requests.adapters.HTTPAdapter.__init__(self, **kwargs) 39 | 40 | # override 41 | def get_connection(self, url, proxies=None): 42 | if not proxies: 43 | parsed = urlparse(url) 44 | _hostname = parsed.hostname 45 | _scheme = parsed.scheme 46 | if _hostname in self.ip_map: 47 | _parsed = list(parsed) 48 | # alter the hostname 49 | _hostname = '%s%s' % (random.choice(self.ip_map[_hostname]), 50 | (":%d" % parsed.port) if parsed.port else "") 51 | _scheme = 'https' 52 | return self.poolmanager.connection_from_host(_hostname, parsed.port, scheme=_scheme, 53 | pool_kwargs={'assert_hostname': parsed.hostname}) 54 | else: 55 | # fallback 56 | return requests.adapters.HTTPAdapter.get_connection(self, url, proxies) 57 | 58 | def add_headers(self, request, **kwargs): 59 | if not request.headers.get('Host'): 60 | parsed = urlparse(request.url) 61 | request.headers['Host'] = parsed.hostname 62 | 63 | def cert_verify(self, conn, url, verify, cert): 64 | # let the super run verify process 65 | if url.startswith('http://'): 66 | url = "https://%s" % url[7:] 67 | return requests.adapters.HTTPAdapter.cert_verify(self, conn, url, verify, cert) 68 | 69 | class HttpReq(object): 70 | def __init__(self, headers = {}, proxy = None, proxy_policy = None, retry = 10, timeout = 20, logger = None, tname = "main"): 71 | self.session = requests.Session() 72 | self.session.headers = headers 73 | for u in ('forums.e-hentai.org', 'e-hentai.org', 'exhentai.org'): 74 | self.session.mount('http://%s' % u, FallbackIpAdapter()) 75 | self.session.mount('https://%s' % u, FallbackIpAdapter()) 76 | self.session.mount('http://', HTTPAdapter(max_retries=0)) 77 | self.retry = retry 78 | self.timeout = timeout 79 | self.proxy = proxy 80 | self.proxy_policy = proxy_policy 81 | self.logger = logger 82 | self.tname = tname 83 | 84 | def request(self, method, url, _filter, suc, fail, data=None, stream_cb=None): 85 | retry = 0 86 | url_history = [url] 87 | while retry < self.retry: 88 | try: 89 | headers = {} 90 | # if proxy_policy is set and match current url, use proxy 91 | if self.proxy and self.proxy_policy and self.proxy_policy.match(url): 92 | f, __not_good = self.proxy.proxied_request(self.session) 93 | else: 94 | f = self.session.request 95 | r = f(method, url, 96 | allow_redirects=False, 97 | data=data, 98 | timeout=self.timeout, 99 | stream=stream_cb != None) 100 | except requests.RequestException as ex: 101 | self.logger.warning("%s-%s %s %s: %s" % (i18n.THREAD, self.tname, method, url, ex)) 102 | time.sleep(random.random() + 0.618) 103 | else: 104 | if r.headers.get('content-length'): 105 | r.content_length = int(r.headers.get('content-length')) 106 | elif not stream_cb: 107 | r.content_length = len(r.content) 108 | else: 109 | r.content_length = 0 110 | self.logger.verbose("%s-%s %s %s %d %d" % (i18n.THREAD, self.tname, method, url, r.status_code, r.content_length)) 111 | # if it's a redirect, 3xx 112 | if r.status_code > 300 and r.status_code < 400: 113 | _new_url = r.headers.get("location") 114 | if _new_url: 115 | url_history.append(url) 116 | if len(url_history) > DEFAULT_MAX_REDIRECTS: 117 | self.logger.warning("%s-%s %s %s: too many redirects" % (i18n.THREAD, self.tname, method, url)) 118 | return _filter(_FakeResponse(url_history[0]), suc, fail) 119 | url = _new_url 120 | continue 121 | # intercept some error to see if we can change IP 122 | if self.proxy and r.content_length < 1024 and \ 123 | re.match("Your IP address has been temporarily banned", r.text): 124 | _t = util.parse_human_time(r.text) 125 | self.logger.warn(i18n.PROXY_DISABLE_BANNED % _t) 126 | # fail this proxy immediately and set expire time 127 | __not_good(expire = _t) 128 | continue 129 | 130 | r.encoding = "utf-8" 131 | # r._text_bytes = r.text.encode("utf-8") 132 | r._real_url = url_history[-1] 133 | 134 | r.iter_content_cb = stream_cb 135 | 136 | return _filter(r, suc, fail) 137 | retry += 1 138 | return _filter(_FakeResponse(url_history[0]), suc, fail) 139 | 140 | # speed statistics with ring buffer 141 | class speed_checker(object): 142 | def __init__(self, cnt=5): 143 | self.cnt = cnt 144 | self.speed_buffer = [] 145 | self.reset() 146 | 147 | def check(self, l): 148 | self.current_bytes += l 149 | self.current_tm = time.time() 150 | if self.current_tm - self.last_tm > 1: 151 | self.speed_buffer.append((self.current_bytes-self.last_bytes)/(self.current_tm-self.last_tm)) 152 | while len(self.speed_buffer) > self.cnt: 153 | self.speed_buffer.pop(0) 154 | self.last_tm = self.current_tm 155 | self.last_bytes = self.current_bytes 156 | return 157 | 158 | def calc(self, full=False): 159 | if len(self.speed_buffer) == 0: 160 | return 0 161 | elif full and len(self.speed_buffer) < self.cnt: 162 | return 0 163 | return sum(self.speed_buffer)/len(self.speed_buffer) 164 | 165 | def reset(self): 166 | self.last_tm = time.time() 167 | self.last_bytes = 0 168 | self.current_bytes = 0 169 | self.current_tm = 0 170 | if self.speed_buffer: 171 | self.speed_buffer = [] 172 | 173 | class HttpWorker(Thread, HttpReq): 174 | def __init__(self, tname, task_queue, flt, suc, fail, headers={}, proxy=None, proxy_policy=None, 175 | retry=3, timeout=10, logger=None, keep_alive=None, stream_mode=False, lowspeed_threshold=None): 176 | """ 177 | Construct a new 'HttpWorker' obkect 178 | 179 | :param tname: The name of this http worker 180 | :param task_queue: The task Queue instance 181 | :param flt: the filter function 182 | :param suc: the function to call when succeeded 183 | :param fail: the function to call when failed 184 | :param headers: custom HTTP headers 185 | :param proxy: proxy dict 186 | :param proxy_policy: a function to determine whether proxy should be used 187 | :param retry: retry count 188 | :param timeout: timeout in seconds 189 | :param logger: the Logger instance 190 | :param keep_alive: the callback to send keep alive 191 | :param stream_mode: set the request to use stream mode, keep_alive will be called every iteration 192 | :return: returns nothing 193 | """ 194 | HttpReq.__init__(self, headers, proxy, proxy_policy, retry, timeout, logger, tname = tname) 195 | Thread.__init__(self, name = tname) 196 | Thread.setDaemon(self, True) 197 | self.task_queue = task_queue 198 | self.logger = logger 199 | self._keepalive = keep_alive 200 | self._exit = lambda x: False 201 | self.flt = flt 202 | self.f_suc = suc 203 | self.f_fail = fail 204 | self.stream_mode = stream_mode 205 | self.stream_speed = None 206 | self.lowspeed_threshold = lowspeed_threshold 207 | # if we don't checkin in this zombie_threshold time, monitor will regard us as zombie 208 | self.zombie_threshold = timeout * (retry + 1) 209 | self.run_once = False 210 | 211 | def _finish_queue(self, *args): 212 | # exit if current queue is finished 213 | return self.run_once and self.task_queue.empty() 214 | 215 | def run(self): 216 | self.logger.verbose("t-%s start" % self.name) 217 | _stream_cb = None 218 | if self.stream_mode: 219 | self.stream_speed = speed_checker() 220 | def f(d): 221 | self.stream_speed.check(len(d)) 222 | if self.lowspeed_threshold: 223 | speed = self.stream_speed.calc(full=True) 224 | if 0 < speed < self.lowspeed_threshold: 225 | raise LowSpeedException("") 226 | self._keepalive(self) 227 | _stream_cb = f 228 | while not self._keepalive(self) and not self._exit(self): 229 | try: 230 | url = self.task_queue.get(False) 231 | except Empty: 232 | # set back to 0 when waiting 233 | if self.stream_speed: 234 | self.stream_speed.reset() 235 | time.sleep(1) 236 | continue 237 | self.run_once = True 238 | try: 239 | self.request("GET", url, self.flt, self.f_suc, self.f_fail, stream_cb=_stream_cb) 240 | except PoolException as ex: 241 | self.logger.warning("%s-%s %s" % (i18n.THREAD, self.tname, str(ex))) 242 | break 243 | except LowSpeedException as ex: 244 | self.logger.warning(i18n.THREAD_SPEED_TOO_LOW % ( 245 | self.tname, 246 | util.human_size(self.stream_speed.calc(full=True)), 247 | util.human_size(self.lowspeed_threshold), 248 | )) 249 | self.flt(_FakeResponse(url), self.f_suc, self.f_fail) 250 | except Exception as ex: 251 | self.logger.warning(i18n.THREAD_UNCAUGHT_EXCEPTION % (self.tname, traceback.format_exc())) 252 | self.flt(_FakeResponse(url), self.f_suc, self.f_fail) 253 | # notify monitor the last time 254 | self.logger.verbose("t-%s exit" % self.name) 255 | self._keepalive(self, _exit = True) 256 | 257 | class ArchiveWorker(Thread): 258 | # this worker is not managed by monitor 259 | def __init__(self, logger, task, exit_check = None): 260 | Thread.__init__(self, name = "archiver%s" % task.guid) 261 | Thread.setDaemon(self, True) 262 | self.logger = logger 263 | self.task = task 264 | self._exit = lambda x: False 265 | 266 | def run(self): 267 | while self.task.state < TASK_STATE_FINISHED: 268 | if self._exit(self) or self.task.state in (TASK_STATE_PAUSED, TASK_STATE_FAILED): 269 | return 270 | time.sleep(1) 271 | self.logger.info(i18n.TASK_START_MAKE_ARCHIVE % self.task.guid) 272 | self.task.state = TASK_STATE_MAKE_ARCHIVE 273 | t = time.time() 274 | try: 275 | pth = self.task.make_archive() 276 | except Exception as ex: 277 | self.task.state = TASK_STATE_FAILED 278 | self.logger.error(i18n.TASK_ERROR % (self.task.guid, i18n.c(ERR_CANNOT_MAKE_ARCHIVE) % traceback.format_exc())) 279 | else: 280 | self.task.state = TASK_STATE_FINISHED 281 | self.logger.info(i18n.TASK_MAKE_ARCHIVE_FINISHED % (self.task.guid, pth, time.time() - t)) 282 | 283 | 284 | class Monitor(Thread): 285 | def __init__(self, req, proxy, logger, task, exit_check=None, ignored_errors=[]): 286 | Thread.__init__(self, name = "monitor%s" % task.guid) 287 | Thread.setDaemon(self, True) 288 | # the count of votes per error code 289 | self.vote_result = {} 290 | # the error code to be ignored 291 | self.vote_cleared = set().union(ignored_errors) 292 | self.thread_last_seen = {} 293 | self.dctlock = RLock() 294 | self.votelock = RLock() 295 | self.thread_ref = {} 296 | self.thread_zombie = set() 297 | # HttpReq instance 298 | self.req = req 299 | # proxy.Pool instance 300 | self.proxy = proxy 301 | self.logger = logger 302 | self.task = task 303 | self._exit = exit_check if exit_check else lambda x: False 304 | self._cleaning_up = False 305 | self.download_speed = 0 306 | if os.name == "nt": 307 | self.set_title = lambda s:os.system("TITLE %s" % ( 308 | s if PY3K else s.encode(CODEPAGE, 'replace'))) 309 | elif os.name == 'posix': 310 | import sys 311 | self.set_title = lambda s:sys.stdout.write("\033]2;%s\007" % ( 312 | s if PY3K else s.encode(CODEPAGE, 'replace'))) 313 | 314 | def set_vote_ns(self, tnames): 315 | t = time.time() 316 | self.thread_last_seen = {k:t for k in tnames} 317 | 318 | def vote(self, tname, code): 319 | # thread_id, result_code 320 | self.votelock.acquire() 321 | if code != ERR_NO_ERROR: 322 | self.logger.verbose("t-%s vote:%s" % (tname, code)) 323 | if code not in self.vote_result: 324 | self.vote_result[code] = 1 325 | else: 326 | self.vote_result[code] += 1 327 | self.votelock.release() 328 | 329 | def wrk_keepalive(self, wrk_thread, _exit = False): 330 | tname = wrk_thread.name 331 | if tname in self.thread_zombie: 332 | self.thread_zombie.remove(tname) 333 | # all image downloaded 334 | # task is finished or failed 335 | # monitor is exiting or worker notify its exit 336 | _ = self.task.meta['finished'] == self.task.meta['total'] or \ 337 | self.task.state in (TASK_STATE_FINISHED, TASK_STATE_FAILED) or \ 338 | self._exit("mon") or _exit 339 | # self.logger.verbose("mon#%s %s ask, %s, %s" % (self.task.guid, tname, _, 340 | # self.thread_last_seen)) 341 | if _ or not wrk_thread.is_alive(): 342 | self.dctlock.acquire() 343 | if tname in self.thread_last_seen: 344 | del self.thread_last_seen[tname] 345 | if tname in self.thread_ref: 346 | del self.thread_ref[tname] 347 | self.dctlock.release() 348 | else: 349 | self.thread_last_seen[tname] = time.time() 350 | if tname not in self.thread_ref: 351 | self.thread_ref[tname] = wrk_thread 352 | return _ 353 | 354 | # def _rescan_pages(self): 355 | # # not using 356 | # # throw away existing page urls 357 | # while True: 358 | # try: 359 | # self.task.page_q.get(False) 360 | # except Empty: 361 | # break 362 | # # put page into task.list_q 363 | # [self.task.list_q.put("%s/?p=%d" % (self.task.url, x) 364 | # for x in range(1, 1 + int(math.ceil(self.task.meta['total']/20.0)))) 365 | # ] 366 | # print(self.task.list_q.qsize()) 367 | 368 | def _check_vote(self): 369 | # if False and ERR_IMAGE_RESAMPLED in self.vote_result and ERR_IMAGE_RESAMPLED not in self.vote_cleared: 370 | # self.logger.warning(i18n.TASK_START_PAGE_RESCAN % self.task.guid) 371 | # self._rescan_pages() 372 | # self.task.meta['has_ori'] = True 373 | # self.vote_cleared.add(ERR_IMAGE_RESAMPLED) 374 | if ERR_QUOTA_EXCEEDED in self.vote_result and \ 375 | ERR_QUOTA_EXCEEDED not in self.vote_cleared and \ 376 | self.vote_result[ERR_QUOTA_EXCEEDED] >= len(self.thread_last_seen): 377 | self.logger.error(i18n.TASK_STOP_QUOTA_EXCEEDED % self.task.guid) 378 | self.task.state = TASK_STATE_FAILED 379 | 380 | def run(self): 381 | CHECK_INTERVAL = 10 382 | STUCK_INTERVAL = 90 383 | intv = 0 384 | self.set_title(i18n.TASK_START % self.task.guid) 385 | last_change = time.time() 386 | last_finished = -1 387 | while len(self.thread_last_seen) > 0: 388 | intv += 1 389 | self._check_vote() 390 | total_speed = 0 391 | for k, last_seen in list(self.thread_last_seen.items()): 392 | _zombie_threshold = self.thread_ref[k].zombie_threshold if k in self.thread_ref else 30 393 | if time.time() - last_seen > _zombie_threshold: 394 | if k in self.thread_ref and self.thread_ref[k].is_alive(): 395 | self.logger.warning(i18n.THREAD_MAY_BECOME_ZOMBIE % k) 396 | self.thread_zombie.add(k) 397 | else: 398 | self.logger.warning(i18n.THREAD_SWEEP_OUT % k) 399 | del self.thread_last_seen[k] 400 | # if thread is not a zombie, add to speed sum 401 | elif k in self.thread_ref and self.thread_ref[k].stream_speed: 402 | total_speed += self.thread_ref[k].stream_speed.calc() 403 | self.download_speed = total_speed 404 | if intv == CHECK_INTERVAL: 405 | _ = "%s %dR/%dZ, %s %dR/%dD, %s/s" % ( 406 | i18n.THREAD, 407 | len(self.thread_last_seen), len(self.thread_zombie), 408 | i18n.QUEUE, 409 | self.task.img_q.qsize(), 410 | self.task.meta['finished'], 411 | util.human_size(total_speed)) 412 | self.logger.info(_) 413 | self.set_title(_) 414 | intv = 0 415 | # if not downloading any new images in 1.5 min, exit 416 | if last_finished != self.task.meta['finished']: 417 | last_change = time.time() 418 | last_finished = self.task.meta['finished'] 419 | elif time.time() - last_change > STUCK_INTERVAL: 420 | self.logger.info(i18n.TASK_UNFINISHED % (self.task.guid, self.task.get_fid_unfinished())) 421 | if total_speed > 0: 422 | # reset last_change 423 | last_change = time.time() 424 | self.logger.warning(i18n.TASK_SLOW % self.task.guid) 425 | else: 426 | self.logger.warning(i18n.TASK_STUCK % self.task.guid) 427 | break 428 | time.sleep(0.5) 429 | if self.task.meta['finished'] == self.task.meta['total']: 430 | _err = self.task.rename_fname() 431 | if _err: 432 | self.logger.warning(i18n.XEH_RENAME_HAS_ERRORS % ( 433 | "\n".join(map(lambda x:"%s => %s : %s" % x, _err)) 434 | )) 435 | self.set_title(i18n.TASK_FINISHED % self.task.guid) 436 | self.logger.info(i18n.TASK_FINISHED % self.task.guid) 437 | self.task.state = TASK_STATE_FINISHED 438 | self.task.cleanup() 439 | 440 | if __name__ == '__main__': 441 | print(HttpReq().request("GET", "https://ipip.tk", lambda x:x, None, None)) 442 | --------------------------------------------------------------------------------