├── .gitignore ├── LICENSE ├── README.md ├── doc └── import-to-pinry.md ├── picktrue.user.js ├── res ├── artstation-downloaded.jpg ├── huaban-downloaded.jpg ├── pixiv.jpg └── usage.gif └── src ├── Makefile ├── build-on-windows-metmuseum.bat ├── build-on-windows.bat ├── dev-requirements.txt ├── files ├── icon.icns ├── icon.ico └── icon.png ├── picktrue ├── __init__.py ├── __main__.py ├── engine.py ├── gui │ ├── __init__.py │ ├── __main__.py │ ├── config.py │ ├── downloader.py │ ├── entry.py │ ├── pinry_importer.py │ └── toolkit.py ├── logger.py ├── meta.py ├── pinry │ ├── __init__.py │ ├── ds.py │ ├── importer.py │ └── uploader.py ├── rpc │ ├── __init__.py │ ├── channel.py │ └── taskserver.py ├── sites │ ├── __init__.py │ ├── abstract.py │ ├── artstation.py │ ├── douban.py │ ├── huaban.py │ ├── metmuseum.py │ ├── pixiv.py │ └── utils.py ├── utils.py └── version.py ├── setup.py └── tests └── test_sites └── test_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # log file 2 | *.log 3 | 4 | # fucking MacOS 5 | .DS_Store 6 | 7 | # I have no idea 8 | .idea 9 | 10 | # DB files 11 | *.sqlite 12 | *.sqlite3 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | 19 | # C extensions 20 | *.so 21 | 22 | # Distribution / packaging 23 | .Python 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # Environments 98 | .env 99 | .venv 100 | env/ 101 | venv/ 102 | ENV/ 103 | env.bak/ 104 | venv.bak/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ji Qu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PickTrue 2 | ------------ 3 | 4 | ![logo](src/files/icon.png) 5 | 6 | 初衷是帮你备份自己的花瓣采集,以免哪天被和谐。 7 | 8 | 强迫症可以收集喜爱画师的图。 9 | 10 | 画师/设计师们也可以用来构建自己的Visual Library 11 | 12 | **划重点**:现在支持导出画板的画板和Tag信息,并且导入另一个个人画板项目[Pinry](https://github.com/pinry/pinry)啦! 13 | 14 | # 下载 15 | 16 | + [windows-x64](https://github.com/winkidney/PickTrue/releases) 17 | + [macOS](https://github.com/winkidney/PickTrue/releases) 18 | 19 | Linux请直接下载并安装Python项目即可,有打包需求再提 20 | 21 | + [适用于Artstation的油猴脚本](https://greasyfork.org/zh-CN/scripts/390597-picktruebrowser) 22 | + 推荐使用的油猴脚本插件(其他的不推荐,理由很多,不详细叙述啦),不推荐的插件可能也能运行,但我没有测试 23 | + [火狐的ViolentMonkey](https://addons.mozilla.org/en-US/firefox/addon/violentmonkey/?src=search) 24 | + [Chrome的ViolentMonkey](https://chrome.google.com/webstore/detail/violentmonkey/jinjaccalgkegednnccohejagnlnfdag) 25 | 26 | # 用法 27 | 28 | ## 从花瓣迁移到Pinry 29 | 参见:[导入到Pinry](./doc/import-to-pinry.md) 30 | 31 | ## 支持列表和网址范例 32 | 33 | + ArtStation 34 | + 个人页(按艺术家): https://www.artstation.com/braveking 35 | + 花瓣网 36 | + 个人页 (按收藏者): http://huaban.com/wmtzyzw1fl/ 37 | + 按画板 :http://huaban.com/boards/18720569/ 38 | + Pixiv 39 | + 个人页(按作者,如果是漫画则会自动建立子文件夹): https://www.pixiv.net/en/users/212801 40 | + 豆瓣 41 | + 相册:https://www.douban.com/photos/album/145972492/ 42 | 43 | ## 通用 44 | 45 | ![用法](res/usage.gif) 46 | 47 | ## Pixiv用法 48 | 49 | 如果本身已经全局翻墙或者路由翻墙,可以不填写单独的代理地址 50 | 51 | ![pixiv](res/pixiv.jpg) 52 | 53 | 54 | ## 需要配合油猴脚本的Artstation下载 55 | 56 | 1. 安装油猴脚本: [适用于Artstation的油猴脚本](https://greasyfork.org/zh-CN/scripts/390597-picktruebrowser) 57 | 2. 下载最新的[客户端](https://github.com/winkidney/PickTrue/releases) 58 | 3. 打开浏览器,访问Artstation你想要下载的指定用户的主页,例如 [https://www.artstation.com/braveking](https://www.artstation.com/braveking) 59 | 4. 启动PickTrue客户端,切换到Artstation的Tab,粘贴第三步的主页地址到地址填写处,设置好代理(没有代理似乎无法下载了)和下载文件夹, 60 | 点击"开始下载"(注意)这时候下载进度不会更新,将不会有下载进度产生。 61 | 5. (火狐浏览器+ViolentMokney)在浏览器页面中,右键,选择"发送相册到PickTrue并下载",注意这一步不要重复操作,操作一次就行了。 62 | 5. (Chrome内核浏览器+ViolentMokney)在浏览器页面中,按住Crtl+鼠标右键,就会开始下载(注意这一步不要重复操作,操作一次就行了)。 63 | 6. (可选步骤)打开浏览器控制台,观察控制台的日志。 64 | 7. 此时,下载器的下载进度(底部状态栏)应该开始更新了,耐心等待即可:) 65 | 66 | # 已知问题 67 | 68 | + 花瓣下载会丢失部分不一致的数据(比较罕见),原因是花瓣的网页能看到的图,在花瓣的API里面不存在,属于花瓣网的Bug,以后有精力会修复这个问题,基本不影响使用 69 | + Pixiv下载需要代理,或者本地hosts,或者是全局翻墙 70 | 71 | # 其他图站 72 | 73 | 欢迎提出建议:) 74 | 75 | + Pinterest (暂无计划) 76 | + NHentai (暂无计划) 77 | 78 | # 其他功能 79 | 80 | + 自动记录上次选择的保存路径 81 | 82 | # 更新日志 83 | 84 | 参见 `release` 页,https://github.com/winkidney/PickTrue/releases 85 | 86 | # Bug反馈和使用交流 87 | 88 | + QQ群:863404640 89 | 90 | # 附图 91 | 92 | ## ArtStation 93 | ![Artstation](res/artstation-downloaded.jpg) 94 | 95 | ## 花瓣 96 | ![Huaban](res/huaban-downloaded.jpg) 97 | -------------------------------------------------------------------------------- /doc/import-to-pinry.md: -------------------------------------------------------------------------------- 1 | 将你的画板系统迁移到Pinry 2 | ---------------------- 3 | 4 | 现在仅提供从花瓣迁移的方法,其他画板,你只需要生成和花瓣导出的CSV格式一样的文件,也能直接导入! 5 | 6 | # 将花瓣导入到Pinry 7 | 8 | 1. 使用下载器的下载功能,重新下载花瓣(个人页) 9 | 2. 下载文件夹里会多一个pins2import.csv文件 10 | 3. 切换到下载器的"Pinry导入"功能: 11 | + 填写你部署的Pinry的地址,例如 `http://pin.37soloist.com` 12 | + 填写登录账号和密码 13 | + 点击“测试登录”可以看你的账号密码是否配置正确 14 | + 选取刚才提到的CSV文件路径 15 | + **可选**: 如果有必要,过滤掉花瓣自动处理为小锁的那些图片,设定图片上传最小尺寸为20(kb)即可。 16 | 4. 点击“开始导入”,等待完成即可。小文件也会被标记为“错误”,“已完成”都是上传的文件。 17 | 18 | -------------------------------------------------------------------------------- /picktrue.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name PickTrueBrowser 3 | // @author winkidney@gmail.com 4 | // @version 0.0.4 5 | // @namespace tools 6 | // @description A tool to get meta info form ArtStation within browser to provide downloading service. 7 | // @match *://*/* 8 | // @grant GM_xmlhttpRequest 9 | // @require https://code.jquery.com/jquery-1.12.4.min.js 10 | // @run-at context-menu 11 | // ==/UserScript== 12 | let utils = { 13 | isFirefox: function () { 14 | return (navigator.userAgent.indexOf("Firefox") !== -1) 15 | }, 16 | isChrome: function () { 17 | return (navigator.userAgent.indexOf("Chrome") !== -1) 18 | } 19 | }; 20 | 21 | let logger = { 22 | info: function(...args) { 23 | console.log("[PickTrue]: ", ...args); 24 | } 25 | }; 26 | 27 | let BrowserClient = function () { 28 | function fetchUrl(url, callback) { 29 | logger.info("Fetching url:", url); 30 | return $.get(url, callback); 31 | } 32 | return { 33 | fetchUrl: fetchUrl, 34 | } 35 | }; 36 | 37 | let RequestProxy = function () { 38 | let client = BrowserClient(); 39 | 40 | function submitTask(request_url, respData, callback) { 41 | logger.info("Submit response:", respData); 42 | let request_data = JSON.stringify(respData); 43 | let data = JSON.stringify( 44 | { 45 | request_url: request_url, 46 | response: request_data 47 | } 48 | ) 49 | 50 | let details = { 51 | url: "http://localhost:2333/tasks/submit/", 52 | data: data, 53 | method: "POST", 54 | onloadend: function (data) { 55 | logger.info("Submit response done: ", data); 56 | callback() 57 | }, 58 | }; 59 | return GM_xmlhttpRequest(details); 60 | } 61 | function getTask() { 62 | let details = { 63 | url: "http://localhost:2333/tasks/", 64 | method: "GET", 65 | onloadend: function (resp) { 66 | logger.info("Get task: ", resp); 67 | let data = JSON.parse(resp.responseText); 68 | if (data.length <= 0){ 69 | return getTask() 70 | } else { 71 | client.fetchUrl( 72 | data[0], 73 | function (respData) { 74 | submitTask(data[0], respData, getTask) 75 | }, 76 | ) 77 | } 78 | }, 79 | }; 80 | return GM_xmlhttpRequest(details); 81 | } 82 | return { 83 | getTask: getTask, 84 | submitTask: submitTask, 85 | }; 86 | }; 87 | 88 | function entry() { 89 | alert("请确保已经启动了PickTrue客户端。将要解析当前用户的所有图集并将下载地址发送PickTrue下载器,确认后将立即开始。"); 90 | let proxy = RequestProxy(); 91 | proxy.getTask(); 92 | } 93 | 94 | function _setUpContextMenuFirefox(entryFn) { 95 | var menu = document.body.appendChild(document.createElement("menu")); 96 | var html = document.documentElement; 97 | if (html.hasAttribute("contextmenu")) { 98 | // We don't want to override web page context menu if any 99 | var contextmenu = $("#" + html.getAttribute("contextmenu")); 100 | contextmenu[0].appendChild(menu); // Append to web page context menu 101 | } else { 102 | html.setAttribute("contextmenu", "userscript-picktrue-context-menu"); 103 | } 104 | 105 | menu.outerHTML = '\ 107 | \ 109 | \ 110 | '; 111 | 112 | if ("contextMenu" in html && "HTMLMenuItemElement" in window) { 113 | var menuitem = $("#userscript-picktrue-menuitem")[0]; 114 | menuitem.addEventListener("click", entryFn, false); 115 | } 116 | } 117 | 118 | function _setUpContextMenuChrome(entryFn) { 119 | $(document).on("contextmenu", function (e) { 120 | if (e.ctrlKey){ 121 | entryFn() 122 | } 123 | }); 124 | } 125 | 126 | function setUpContextMenu(entryFn) { 127 | if (utils.isFirefox()) { 128 | _setUpContextMenuFirefox(entryFn); 129 | } else if (utils.isChrome()) { 130 | _setUpContextMenuChrome(entryFn); 131 | } else { 132 | alert("Unsupported browser " + navigator.userAgent); 133 | } 134 | } 135 | 136 | setUpContextMenu(entry); 137 | -------------------------------------------------------------------------------- /res/artstation-downloaded.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/artstation-downloaded.jpg -------------------------------------------------------------------------------- /res/huaban-downloaded.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/huaban-downloaded.jpg -------------------------------------------------------------------------------- /res/pixiv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/pixiv.jpg -------------------------------------------------------------------------------- /res/usage.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/usage.gif -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | build-osx: 2 | pyinstaller --name picktrue --onefile --icon=files/icon.icns --windowed picktrue/gui/__main__.py 3 | clean: 4 | rm -fr ./build 5 | rm -fr ./dist 6 | gui: 7 | python -m picktrue.gui 8 | -------------------------------------------------------------------------------- /src/build-on-windows-metmuseum.bat: -------------------------------------------------------------------------------- 1 | rm -fr dist build 2 | pyinstaller --name picktrue-metmuseum --onefile --icon=files/icon.ico picktrue/sites/metmuseum.py 3 | -------------------------------------------------------------------------------- /src/build-on-windows.bat: -------------------------------------------------------------------------------- 1 | pyinstaller --name picktrue-windows --onefile --icon=files/icon.ico --windowed picktrue/gui/__main__.py 2 | -------------------------------------------------------------------------------- /src/dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pyinstaller 2 | pytest 3 | -------------------------------------------------------------------------------- /src/files/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/files/icon.icns -------------------------------------------------------------------------------- /src/files/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/files/icon.ico -------------------------------------------------------------------------------- /src/files/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/files/icon.png -------------------------------------------------------------------------------- /src/picktrue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/__init__.py -------------------------------------------------------------------------------- /src/picktrue/__main__.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from picktrue.sites.douban import DoubanPersonalAlbum 4 | from picktrue.sites.pixiv import Pixiv 5 | 6 | from picktrue.logger import pk_logger 7 | from picktrue.sites.artstation import ArtStation 8 | from picktrue.sites.huaban import HuaBan, HuaBanBoard 9 | from picktrue.engine import Downloader 10 | 11 | 12 | @click.group('downloader') 13 | def entry(): 14 | pass 15 | 16 | 17 | @click.argument("url") 18 | @click.option("--proxy", default=None, type=click.STRING) 19 | @entry.command( 20 | "artstation-user", 21 | help='download from artstation user home page', 22 | ) 23 | def artstation_user(url, proxy): 24 | site = ArtStation(url, proxy=proxy) 25 | downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name) 26 | downloader.add_task( 27 | site.tasks 28 | ) 29 | pk_logger.info("All task add...waiting for execution...") 30 | try: 31 | downloader.join() 32 | except KeyboardInterrupt: 33 | pk_logger.warn("Exiting...Press crtl+c again to force quit") 34 | downloader.stop() 35 | exit(0) 36 | else: 37 | pk_logger.info("All task done...Enjoy!") 38 | 39 | 40 | @click.argument("url") 41 | @entry.command( 42 | "huaban-user", 43 | help='download from huaban.com user home page', 44 | ) 45 | def huban_user(url): 46 | site = HuaBan(url) 47 | downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name) 48 | downloader.add_task( 49 | site.tasks 50 | ) 51 | pk_logger.info("All task add...waiting for execution...") 52 | try: 53 | downloader.join() 54 | except KeyboardInterrupt: 55 | pk_logger.warn("Exiting...Press crtl+c again to force quit") 56 | downloader.stop() 57 | exit(0) 58 | else: 59 | pk_logger.info("All task done...Enjoy!") 60 | 61 | 62 | @click.argument("url") 63 | @entry.command( 64 | "huaban-board", 65 | help='download from huaban.com specified board page', 66 | ) 67 | def huban_board(url): 68 | site = HuaBanBoard(url) 69 | downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name) 70 | downloader.add_task( 71 | site.tasks 72 | ) 73 | pk_logger.info("All task add...waiting for execution...") 74 | try: 75 | downloader.join() 76 | except KeyboardInterrupt: 77 | pk_logger.warn("Exiting...Press crtl+c again to force quit") 78 | downloader.stop() 79 | exit(0) 80 | else: 81 | pk_logger.info("All task done...Enjoy!") 82 | 83 | 84 | @click.option( 85 | '--proxy', 86 | help="http/https/socks5 is supported", 87 | default=None, 88 | ) 89 | @click.argument("member-id") 90 | @click.argument("password") 91 | @click.argument("username") 92 | @entry.command( 93 | "pixiv-member", 94 | help='download from pixiv.net user home page', 95 | ) 96 | def huban_user(member_id, username, password, proxy): 97 | site = Pixiv(member_id, username, password, proxy=proxy) 98 | downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name) 99 | downloader.add_task( 100 | site.tasks 101 | ) 102 | pk_logger.info("All task add...waiting for execution...") 103 | try: 104 | downloader.join() 105 | except KeyboardInterrupt: 106 | pk_logger.warn("Exiting...Press crtl+c again to force quit") 107 | downloader.stop() 108 | exit(0) 109 | else: 110 | pk_logger.info("All task done...Enjoy!") 111 | 112 | 113 | @click.argument("album-url") 114 | @entry.command( 115 | "douban-personal-album", 116 | help='download from douban personal album', 117 | ) 118 | def douban_personal_album(album_url): 119 | site = DoubanPersonalAlbum(album_url) 120 | downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name) 121 | downloader.add_task( 122 | site.tasks 123 | ) 124 | pk_logger.info("All task add...waiting for execution...") 125 | try: 126 | downloader.join() 127 | except KeyboardInterrupt: 128 | pk_logger.warn("Exiting...Press crtl+c again to force quit") 129 | downloader.stop() 130 | exit(0) 131 | else: 132 | pk_logger.info("All task done...Enjoy!") 133 | 134 | 135 | def main(): 136 | entry() 137 | 138 | 139 | if __name__ == "__main__": 140 | main() 141 | -------------------------------------------------------------------------------- /src/picktrue/engine.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | import os 4 | from queue import Queue, Empty 5 | from threading import Thread 6 | import time 7 | from functools import wraps 8 | 9 | from picktrue.logger import pk_logger 10 | from picktrue.meta import DownloadTaskItem 11 | from picktrue.utils import run_as_thread 12 | 13 | 14 | class WorkerTask(NamedTuple): 15 | kwargs: dict = None 16 | args: tuple = None 17 | 18 | 19 | class StoppableThread(Thread): 20 | 21 | def __init__( 22 | self, queue, target 23 | ): 24 | """ 25 | :type queue: queue.Queue 26 | """ 27 | super(StoppableThread, self).__init__() 28 | self.task_func = target 29 | self.queue = queue 30 | self.daemon = True 31 | self._stopped = False 32 | 33 | def run(self): 34 | while not self._stopped: 35 | try: 36 | task = self.queue.get(timeout=0.2) 37 | except Empty: 38 | continue 39 | else: 40 | args = task.args or () 41 | kwargs = task.kwargs or {} 42 | self.task_func(*args, **kwargs) 43 | self.queue.task_done() 44 | 45 | def stop(self): 46 | self._stopped = True 47 | 48 | 49 | def mk_download_save_function(fetcher): 50 | """ 51 | :type fetcher: picktrue.sites.abstract.DummyFetcher 52 | """ 53 | 54 | def download_then_save(task_item: DownloadTaskItem): 55 | """ 56 | :return True if download ok 57 | :type task_item: picktrue.meta.DownloadTaskItem 58 | """ 59 | response = fetcher.get(task_item.image.url) 60 | if response is None: 61 | pk_logger.error("Failed to download image: %s" % task_item.image.url) 62 | return 63 | fetcher.save(response.content, task_item) 64 | return True 65 | 66 | return download_then_save 67 | 68 | 69 | class Counter: 70 | 71 | def __init__(self, total=0): 72 | self.total = total 73 | self.done = 0 74 | 75 | def on_change(self): 76 | print(self.format(), end='\r', flush=True) 77 | 78 | def increment_done(self): 79 | self.done += 1 80 | self.on_change() 81 | 82 | def increment_total(self): 83 | self.total += 1 84 | self.on_change() 85 | 86 | def format(self): 87 | return "total: %s, done: %s" % (self.total, self.done) 88 | 89 | 90 | class Downloader: 91 | 92 | def __init__(self, fetcher, num_workers=5, save_dir='.'): 93 | self.save_dir = save_dir 94 | self.num_workers = num_workers 95 | self._download_queue = Queue() 96 | self.counter = Counter() 97 | self.done = False 98 | self._stop = False 99 | self._all_task_add = False 100 | self.ensure_dir() 101 | 102 | def counter_wrapper(func): 103 | 104 | @wraps(func) 105 | def wrapped(task_item): 106 | ret = func(task_item=task_item) 107 | self.counter.increment_done() 108 | return ret 109 | 110 | return wrapped 111 | 112 | download_then_save = mk_download_save_function( 113 | fetcher 114 | ) 115 | 116 | _dts = counter_wrapper(download_then_save) 117 | 118 | self._download_workers = [ 119 | StoppableThread( 120 | self._download_queue, 121 | _dts, 122 | ) for _ in range(num_workers) 123 | ] 124 | self._start_daemons() 125 | 126 | def ensure_dir(self): 127 | if not os.path.exists(self.save_dir): 128 | os.mkdir(self.save_dir) 129 | 130 | def add_task(self, task_iter, background=False): 131 | if background: 132 | run_as_thread(self._add_task, task_iter) 133 | else: 134 | self._add_task(task_iter) 135 | 136 | def _add_task(self, image_iter): 137 | for image in image_iter: 138 | if self._stop: 139 | break 140 | dti = DownloadTaskItem( 141 | image=image, 142 | base_save_path=self.save_dir, 143 | ) 144 | self.counter.increment_total() 145 | self._download_queue.put( 146 | WorkerTask( 147 | kwargs={ 148 | 'task_item': dti, 149 | } 150 | ) 151 | ) 152 | self._all_task_add = True 153 | 154 | def _start_daemons(self): 155 | for worker in self._download_workers: 156 | worker.start() 157 | 158 | def join(self, background=False): 159 | 160 | def run(): 161 | self._download_queue.join() 162 | while not self._all_task_add: 163 | time.sleep(0.2) 164 | self._download_queue.join() 165 | self.done = True 166 | 167 | if background: 168 | run_as_thread(run) 169 | else: 170 | run() 171 | 172 | def stop(self): 173 | self._stop = True 174 | for worker in self._download_workers: 175 | worker.stop() 176 | 177 | for worker in self._download_workers: 178 | worker.join() 179 | 180 | @property 181 | def task_add_done(self): 182 | return self._all_task_add 183 | 184 | @property 185 | def stopped(self): 186 | return self._stop 187 | 188 | def describe(self): 189 | return "%s of %s downloaded" % ( 190 | self.counter.done, 191 | self.counter.total, 192 | ) 193 | -------------------------------------------------------------------------------- /src/picktrue/gui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/gui/__init__.py -------------------------------------------------------------------------------- /src/picktrue/gui/__main__.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | import webbrowser 3 | from tkinter import ttk 4 | 5 | from picktrue import version 6 | from picktrue.gui.downloader import downloaders 7 | from picktrue.gui.toolkit import info 8 | 9 | 10 | class App(tk.Tk): 11 | 12 | def __init__(self, *args, **kwargs): 13 | super(App, self).__init__(*args, **kwargs) 14 | self.tabs = ttk.Notebook(self) 15 | self.title("PickTrue - 相册下载器 v%s" % version.__version__) 16 | self.build_menu() 17 | 18 | for downloader in downloaders: 19 | self.tabs.add(downloader(self), text=downloader.title) 20 | self.tabs.pack( 21 | side=tk.LEFT, 22 | ) 23 | 24 | @staticmethod 25 | def open_online_help(): 26 | url = 'https://github.com/winkidney/PickTrue' 27 | webbrowser.open_new_tab(url) 28 | 29 | @staticmethod 30 | def show_about(): 31 | webbrowser.open_new_tab( 32 | 'https://winkidney.com' 33 | ) 34 | 35 | @staticmethod 36 | def contact(): 37 | info( 38 | "任何问题或者建议请联系作者\n" 39 | "用户QQ群: 863404640\n" 40 | ) 41 | 42 | def build_menu(self): 43 | menu_bar = tk.Menu(self) 44 | help_menu = tk.Menu(menu_bar) 45 | help_menu.add_command(label="在线帮助", command=self.open_online_help) 46 | help_menu.add_command(label="关于", command=self.show_about) 47 | help_menu.add_command(label="联系作者/用户群", command=self.contact) 48 | menu_bar.add_cascade(label="帮助", menu=help_menu) 49 | self.config(menu=menu_bar) 50 | 51 | 52 | def main(): 53 | app = App() 54 | app.mainloop() 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /src/picktrue/gui/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | 5 | 6 | class AttrDict(dict): 7 | """Allows attributes to be bound to and also behaves like a dict""" 8 | 9 | def __getattr__(self, attr): 10 | try: 11 | return self[attr] 12 | except KeyError: 13 | raise AttributeError(r"'AttrDict' object has no attribute '%s'" % attr) 14 | 15 | def __setattr__(self, attr, value): 16 | self[attr] = value 17 | 18 | 19 | class ConfigStore(AttrDict): 20 | 21 | _save_file = Path(os.path.expanduser("~/.picktrue-config.json")) 22 | 23 | @classmethod 24 | def from_config_file(cls): 25 | path = Path(cls._save_file) 26 | if not os.path.exists(path): 27 | return cls() 28 | with open(path, "rb") as f: 29 | return cls(**json.load(f)) 30 | 31 | def __setattr__(self, key, value): 32 | super(ConfigStore, self).__setattr__(key, value) 33 | self._save() 34 | 35 | def _save(self): 36 | path = Path(self._save_file) 37 | with open(path, "w") as f: 38 | json.dump(self, f) 39 | 40 | def op_store_path(self, name, path): 41 | path = Path(path) 42 | self[name] = str(path) 43 | self._save() 44 | 45 | def op_read_path(self, name): 46 | path = self.get(name, None) 47 | return Path(path) if path is not None else None 48 | 49 | 50 | config_store = ConfigStore.from_config_file() 51 | -------------------------------------------------------------------------------- /src/picktrue/gui/downloader.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import time 4 | import tkinter as tk 5 | 6 | from picktrue.gui.entry import art_station_run, hua_ban_run, pixiv_run, hua_ban_board_run, \ 7 | douban_personal_album_board_run 8 | from picktrue.gui.pinry_importer import PinryImporterGUI 9 | from picktrue.gui.toolkit import ( 10 | NamedInput, FileBrowse, StatusBar, info, ProgressBar, open_sys_explorer, PasswordInput, 11 | ProxyInput 12 | ) 13 | from picktrue.utils import run_as_thread 14 | 15 | 16 | def mk_normal_inputs(master=None, store_name=None, user_home_name=None): 17 | url = NamedInput(master, name=user_home_name or "用户主页地址 ") 18 | save_path = FileBrowse(master, store_name=store_name) 19 | return url, save_path 20 | 21 | 22 | def mk_pixiv_inputs(master=None): 23 | url = NamedInput(master, name="用户主页地址") 24 | username = NamedInput(master, name="Pixiv账户名(需要登录才能下载)") 25 | password = PasswordInput(master, name="登录密码") 26 | proxy = ProxyInput(master, name="代理地址(支持http/https/socks5, 可不填)") 27 | save_path = FileBrowse(master, store_name="pixiv_save_path") 28 | return url, username, password, proxy, save_path 29 | 30 | 31 | class UserHomeDownloader(tk.Frame): 32 | 33 | title = "请更改此名字" 34 | 35 | def __init__(self, *args, store_name=None, user_home_name=None, **kwargs): 36 | super(UserHomeDownloader, self).__init__(*args, **kwargs) 37 | self.downloader = None 38 | self.url, self.save_path = mk_normal_inputs( 39 | self, store_name=store_name, 40 | user_home_name=user_home_name, 41 | ) 42 | for attr_name, value in self.user_inputs().items(): 43 | setattr(self, attr_name, value) 44 | self.btn_group = self.build_buttons() 45 | self.progress = ProgressBar(self) 46 | self.status = StatusBar(self) 47 | self.start_update() 48 | 49 | def user_inputs(self): 50 | return {} 51 | 52 | def run(self, url, path_prefix): 53 | raise NotImplementedError() 54 | 55 | def build_buttons(self): 56 | btn_args = dict( 57 | height=1, 58 | ) 59 | btn_group = tk.Frame(self) 60 | 61 | buttons = [ 62 | tk.Button( 63 | btn_group, 64 | text=text, 65 | command=command, 66 | **btn_args 67 | ) 68 | for text, command in ( 69 | ("开始下载", self.start_download), 70 | ("停止下载", self.stop_download), 71 | ("打开下载文件夹", self.open_download_folder), 72 | ) 73 | ] 74 | 75 | for index, btn in enumerate(buttons): 76 | btn.grid(column=index, row=0, sticky=tk.N) 77 | 78 | btn_group.pack(fill=tk.BOTH, expand=1) 79 | return btn_group 80 | 81 | def open_download_folder(self): 82 | path = self.save_path.get_path() 83 | open_sys_explorer(path) 84 | 85 | def start_download(self): 86 | self.url.assert_no_error() 87 | self.save_path.assert_no_error() 88 | url = self.url.get_input() 89 | path_prefix = self.save_path.get_path() 90 | if not os.access(path_prefix, os.W_OK): 91 | return info("对下载文件夹没有写权限,请重新选择") 92 | if self.downloader is not None: 93 | if not self.downloader.done: 94 | return info("请停止后再重新点击下载...") 95 | self.downloader = self.run( 96 | url=url, 97 | path_prefix=path_prefix, 98 | ) 99 | 100 | def stop_download(self): 101 | if self.downloader is not None: 102 | self.downloader.stop() 103 | self.downloader = None 104 | 105 | def start_update(self): 106 | run_as_thread(self._update_loop) 107 | 108 | def _update_loop(self): 109 | while True: 110 | time.sleep(0.1) 111 | try: 112 | self.update_progress() 113 | except AttributeError: 114 | pass 115 | 116 | def update_progress(self): 117 | if self.downloader is None: 118 | self.progress.update_progress( 119 | 0, 100 120 | ) 121 | self.status.set("") 122 | else: 123 | self.progress.update_progress( 124 | self.downloader.counter.done, 125 | self.downloader.counter.total, 126 | ) 127 | msg = self.downloader.counter.format() 128 | if self.downloader.done: 129 | msg = msg + " 全部下载完毕,可以开始新的下载了:)" 130 | self.status.set(msg) 131 | 132 | 133 | class Pixiv(tk.Frame): 134 | 135 | title = "Pixiv(按画师)" 136 | 137 | def __init__(self, *args, **kwargs): 138 | super(Pixiv, self).__init__(*args, **kwargs) 139 | 140 | self.downloader = None 141 | self.url, self.username, self.password, \ 142 | self.proxy, self.save_path = mk_pixiv_inputs(self) 143 | self.btn_group = self.build_buttons() 144 | self.progress = ProgressBar(self) 145 | self.status = StatusBar(self) 146 | self.start_update() 147 | 148 | def build_buttons(self): 149 | btn_args = dict( 150 | height=1, 151 | ) 152 | btn_group = tk.Frame(self) 153 | 154 | buttons = [ 155 | tk.Button( 156 | btn_group, 157 | text=text, 158 | command=command, 159 | **btn_args 160 | ) 161 | for text, command in ( 162 | ("开始下载", self.start_download), 163 | ("停止下载", self.stop_download), 164 | ("打开下载文件夹", self.open_download_folder), 165 | ) 166 | ] 167 | 168 | for index, btn in enumerate(buttons): 169 | btn.grid(column=index, row=0, sticky=tk.N) 170 | 171 | btn_group.pack(fill=tk.BOTH, expand=1) 172 | return btn_group 173 | 174 | def open_download_folder(self): 175 | path = self.save_path.get_path() 176 | open_sys_explorer(path) 177 | 178 | def start_download(self): 179 | self.url.assert_no_error() 180 | self.username.assert_no_error() 181 | self.password.assert_no_error() 182 | self.proxy.assert_no_error() 183 | self.save_path.assert_no_error() 184 | 185 | url = self.url.get_input() 186 | proxy = self.proxy.get_input() or None 187 | username = self.username.get_input() 188 | password = self.password.get_input() 189 | path_prefix = self.save_path.get_path() 190 | 191 | if not os.access(path_prefix, os.W_OK): 192 | return info("对下载文件夹没有写权限,请重新选择") 193 | if self.downloader is not None: 194 | if not self.downloader.done: 195 | return info("请停止后再重新点击下载...") 196 | self.downloader = pixiv_run( 197 | url=url, 198 | username=username, 199 | password=password, 200 | proxy=proxy, 201 | path_prefix=path_prefix, 202 | ) 203 | 204 | def stop_download(self): 205 | if self.downloader is not None: 206 | self.downloader.stop() 207 | self.downloader = None 208 | 209 | def start_update(self): 210 | run_as_thread(self._update_loop) 211 | 212 | def _update_loop(self): 213 | while True: 214 | time.sleep(0.1) 215 | try: 216 | self.update_progress() 217 | except AttributeError: 218 | pass 219 | 220 | def update_progress(self): 221 | if self.downloader is None: 222 | self.progress.update_progress( 223 | 0, 100 224 | ) 225 | self.status.set("") 226 | else: 227 | self.progress.update_progress( 228 | self.downloader.counter.done, 229 | self.downloader.counter.total, 230 | ) 231 | msg = self.downloader.counter.format() 232 | if self.downloader.done: 233 | msg = msg + " 全部下载完毕,可以开始新的下载了:)" 234 | self.status.set(msg) 235 | 236 | 237 | class HuaBan(UserHomeDownloader): 238 | 239 | title = "花瓣(按作者)" 240 | 241 | def __init__(self, *args, **kwargs): 242 | super(HuaBan, self).__init__(*args, store_name='huaban_save_path', **kwargs) 243 | 244 | def run(self, url, path_prefix): 245 | downloader, site = hua_ban_run( 246 | url=url, 247 | path_prefix=path_prefix, 248 | return_site=True, 249 | ) 250 | return downloader 251 | 252 | 253 | class HuaBanBoard(UserHomeDownloader): 254 | 255 | title = "花瓣(按画板)" 256 | 257 | def __init__(self, *args, **kwargs): 258 | super(HuaBanBoard, self).__init__( 259 | *args, 260 | store_name='huaban_board_save_path', 261 | user_home_name="画板地址", 262 | **kwargs 263 | ) 264 | 265 | def run(self, url, path_prefix): 266 | return hua_ban_board_run( 267 | url=url, 268 | path_prefix=path_prefix, 269 | ) 270 | 271 | 272 | class DoubanPsersonalAlbum(UserHomeDownloader): 273 | 274 | title = "豆瓣(按相册)" 275 | 276 | def __init__(self, *args, **kwargs): 277 | super(DoubanPsersonalAlbum, self).__init__( 278 | *args, 279 | store_name='douban_personal_album_save_path', 280 | user_home_name="相册地址", 281 | **kwargs 282 | ) 283 | 284 | def run(self, url, path_prefix): 285 | return douban_personal_album_board_run( 286 | url=url, 287 | path_prefix=path_prefix, 288 | ) 289 | 290 | 291 | class ArtStation(UserHomeDownloader): 292 | 293 | title = "ArtStation(按作者)" 294 | 295 | def user_inputs(self): 296 | return { 297 | 'proxy': ProxyInput(master=self, name="代理地址(支持http/https/socks5, 可不填)"), 298 | } 299 | 300 | def start_download(self): 301 | self.url.assert_no_error() 302 | self.save_path.assert_no_error() 303 | self.proxy.assert_no_error() 304 | 305 | url = self.url.get_input() 306 | path_prefix = self.save_path.get_path() 307 | proxy = self.proxy.get_input() 308 | 309 | if not os.access(path_prefix, os.W_OK): 310 | return info("对下载文件夹没有写权限,请重新选择") 311 | if self.downloader is not None: 312 | if not self.downloader.done: 313 | return info("请停止后再重新点击下载...") 314 | self.downloader = self.run( 315 | url=url, 316 | path_prefix=path_prefix, 317 | proxy=proxy, 318 | ) 319 | 320 | def run(self, url, path_prefix, proxy): 321 | return art_station_run( 322 | url=url, 323 | path_prefix=path_prefix, 324 | proxy=proxy, 325 | ) 326 | 327 | 328 | downloaders = [ 329 | ArtStation, 330 | HuaBan, 331 | HuaBanBoard, 332 | Pixiv, 333 | DoubanPsersonalAlbum, 334 | PinryImporterGUI, 335 | ] 336 | 337 | 338 | __all__ = ( 339 | "downloaders", 340 | "mk_normal_inputs", 341 | ) 342 | -------------------------------------------------------------------------------- /src/picktrue/gui/entry.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from picktrue.engine import Downloader 4 | 5 | from picktrue.sites.artstation import ArtStation 6 | from picktrue.sites.douban import DoubanPersonalAlbum 7 | from picktrue.sites.huaban import HuaBan, HuaBanBoard 8 | from picktrue.sites.pixiv import Pixiv 9 | 10 | 11 | def _user_home_run(site, path_prefix=None): 12 | """ 13 | :type site: picktrue.sites.abstract.DummySite 14 | :type path_prefix: str or None 15 | """ 16 | path = site.dir_name 17 | if path_prefix is not None: 18 | path = os.path.join(path_prefix, path) 19 | downloader = Downloader(save_dir=path, fetcher=site.fetcher) 20 | downloader.add_task( 21 | site.tasks, 22 | background=True, 23 | ) 24 | downloader.join(background=True) 25 | return downloader 26 | 27 | 28 | def art_station_run(url, path_prefix=None, proxy=None): 29 | site = ArtStation(url, proxy=proxy) 30 | return _user_home_run(site, path_prefix=path_prefix) 31 | 32 | 33 | def hua_ban_run(url, path_prefix=None, return_site=False): 34 | site = HuaBan(url) 35 | if return_site: 36 | return _user_home_run(site=site, path_prefix=path_prefix), site 37 | else: 38 | return _user_home_run(site=site, path_prefix=path_prefix) 39 | 40 | 41 | def hua_ban_board_run(url, path_prefix=None): 42 | site = HuaBanBoard(url) 43 | return _user_home_run(site=site, path_prefix=path_prefix) 44 | 45 | 46 | def douban_personal_album_board_run(url, path_prefix=None): 47 | site = DoubanPersonalAlbum(url) 48 | return _user_home_run(site=site, path_prefix=path_prefix) 49 | 50 | 51 | def pixiv_run(url, username, password, proxy=None, path_prefix=None): 52 | site = Pixiv( 53 | url=url, 54 | username=username, 55 | password=password, 56 | proxy=proxy, 57 | ) 58 | return _user_home_run(site, path_prefix) 59 | -------------------------------------------------------------------------------- /src/picktrue/gui/pinry_importer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import tkinter as tk 3 | 4 | from picktrue.gui.toolkit import ProgressBar, StatusBar, NamedInput, FileBrowse, info, FilePathBrowse, PasswordInput 5 | from picktrue.pinry.importer import PinryImporter 6 | from picktrue.utils import run_as_thread 7 | 8 | 9 | class PinryImporterGUI(tk.Frame): 10 | 11 | title = "导入到Pinry" 12 | 13 | def __init__(self, *args, **kwargs): 14 | super(PinryImporterGUI, self).__init__(*args, **kwargs) 15 | 16 | self._url = NamedInput(self, name="Pinry部署地址") 17 | self._min_size = NamedInput(self, name="最小上传大小(KB)(低于此值的文件不上传,不限制请留空)") 18 | self._username = NamedInput(self, name="用户名") 19 | self._password = PasswordInput(self, name="密码") 20 | self._csv_file = FilePathBrowse(self, store_name="import_csv", text_label="CSV文件文件路径") 21 | self.btn_group = self.build_buttons() 22 | self._importer = None 23 | self.progress = ProgressBar(self) 24 | self.status = StatusBar(self) 25 | self.start_update() 26 | 27 | def _get_importer(self): 28 | min_size = self._min_size.get_input() 29 | if min_size: 30 | try: 31 | min_size = int(min_size) 32 | except Exception: 33 | info("最小文件上传大小应该是整数") 34 | else: 35 | min_size = None 36 | return PinryImporter( 37 | base_url=self._url.get_input(), 38 | username=self._username.get_input(), 39 | password=self._password.get_input(), 40 | min_upload_size_kb=min_size, 41 | ) 42 | 43 | def build_buttons(self): 44 | btn_args = dict( 45 | height=1, 46 | ) 47 | btn_group = tk.Frame(self) 48 | 49 | buttons = [ 50 | tk.Button( 51 | btn_group, 52 | text=text, 53 | command=command, 54 | **btn_args 55 | ) 56 | for text, command in ( 57 | ("测试登录", self._test_login), 58 | ("开始导入", self._start_import), 59 | ) 60 | ] 61 | 62 | for index, btn in enumerate(buttons): 63 | btn.grid(column=index, row=0, sticky=tk.N) 64 | 65 | btn_group.pack(fill=tk.BOTH, expand=1) 66 | return btn_group 67 | 68 | def _test_login(self): 69 | importer = self._get_importer() 70 | if importer.test_login() is True: 71 | info("登录成功") 72 | else: 73 | info("情检查用户名密码以及部署路径是否可访问") 74 | 75 | def _start_import(self): 76 | self._importer = self._get_importer() 77 | run_as_thread( 78 | self._importer.do_import, 79 | self._csv_file.get_path(), 80 | name="import2pinry" 81 | ) 82 | 83 | def start_update(self): 84 | run_as_thread(self._update_loop) 85 | 86 | def _update_loop(self): 87 | while True: 88 | time.sleep(0.1) 89 | self.update_progress() 90 | 91 | def update_progress(self): 92 | if self._importer is not None: 93 | self.progress.update_progress( 94 | self._importer.done_pins, 95 | self._importer.total_pins, 96 | ) 97 | self.status.set(self._importer.status_text()) 98 | else: 99 | self.progress.update_progress(0, 0) 100 | self.status.set("待机...") 101 | -------------------------------------------------------------------------------- /src/picktrue/gui/toolkit.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import platform 3 | 4 | import os 5 | import tkinter as tk 6 | from pathlib import Path 7 | from tkinter import filedialog, messagebox as msgbox, ttk 8 | 9 | from picktrue.gui.config import ConfigStore, config_store 10 | 11 | 12 | def info(message, title="信息"): 13 | msgbox.showinfo(title=title, message=message) 14 | 15 | 16 | def open_sys_explorer(path): 17 | ptf = platform.system().lower() 18 | path = Path(path) 19 | if "darwin" in ptf: 20 | return os.system('open %s' % path) 21 | elif 'windows' in ptf: 22 | return os.system('explorer.exe "%s"' % path) 23 | elif 'linux' in ptf: 24 | return os.system('xdg-open %s' % path) 25 | return info('平台不支持') 26 | 27 | 28 | def get_working_dir(): 29 | return os.getcwd() 30 | 31 | 32 | class StatusBar(tk.Frame): 33 | def __init__(self, master): 34 | tk.Frame.__init__(self, master) 35 | self.variable=tk.StringVar() 36 | self.label=tk.Label( 37 | self, bd=1, relief=tk.SUNKEN, anchor=tk.W, 38 | textvariable=self.variable, 39 | font=('arial', 16, 'normal') 40 | ) 41 | self.variable.set('') 42 | self.label.pack(fill=tk.X) 43 | self.pack(fill=tk.BOTH) 44 | 45 | def set(self, value): 46 | self.variable.set(value) 47 | 48 | 49 | class NamedInput(tk.Frame): 50 | def __init__(self, master=None, name=None, **kwargs): 51 | super(NamedInput, self).__init__(master=master, **kwargs) 52 | assert name is not None 53 | self._name = name 54 | label = tk.Label(self, text=name) 55 | label.pack(side=tk.LEFT) 56 | 57 | self.entry = tk.Entry(self) 58 | self.entry.pack(side=tk.LEFT, fill=tk.X, expand=1) 59 | self.pack(fill=tk.X) 60 | 61 | def get_input(self): 62 | return self.entry.get() 63 | 64 | def assert_no_error(self): 65 | text = self.get_input() 66 | if not text: 67 | info( 68 | "%s 不能为空" % self._name 69 | ) 70 | raise ValueError("value error, can't be null") 71 | 72 | 73 | class PasswordInput(tk.Frame): 74 | def __init__(self, master=None, name=None, **kwargs): 75 | super(PasswordInput, self).__init__(master=master, **kwargs) 76 | assert name is not None 77 | self._name = name 78 | label = tk.Label(self, text=name) 79 | label.pack(side=tk.LEFT) 80 | 81 | self.entry = tk.Entry(self, show="*") 82 | self.entry.pack(side=tk.LEFT, fill=tk.X, expand=1) 83 | self.pack(fill=tk.X) 84 | 85 | def get_input(self): 86 | return self.entry.get() 87 | 88 | def assert_no_error(self): 89 | text = self.get_input() 90 | if not text: 91 | info( 92 | "%s 不能为空" % self._name 93 | ) 94 | raise ValueError("value error, can't be null") 95 | 96 | 97 | class ProxyInput(NamedInput): 98 | def assert_no_error(self): 99 | value = self.get_input() 100 | if not value: 101 | return 102 | results = [kw in value for kw in ('http', 'https', 'socks5')] 103 | if not any(results): 104 | info("代理地址错误") 105 | raise ValueError("Proxy address error") 106 | 107 | 108 | class FileBrowse(tk.Frame): 109 | 110 | def __init__(self, master=None, store_name=None, text_label=None, **kwargs): 111 | super(FileBrowse, self).__init__(master=master, **kwargs) 112 | self.label_text = tk.StringVar() 113 | btn = tk.Button(self, text=text_label or "下载到", command=self.choose_file) 114 | btn.pack( 115 | side=tk.LEFT, 116 | ) 117 | 118 | tk.Label(self, textvariable=self.label_text).pack( 119 | side=tk.LEFT, 120 | fill=tk.X, 121 | ) 122 | self.pack(fill=tk.X) 123 | 124 | self._store_name = store_name 125 | if store_name is not None: 126 | self._config = config_store 127 | save_path = self._config.op_read_path(store_name) or get_working_dir() 128 | else: 129 | self._config = None 130 | save_path = get_working_dir() 131 | 132 | self.label_text.set( 133 | save_path 134 | ) 135 | 136 | def ask_path(self): 137 | return filedialog.askdirectory( 138 | title="选择下载文件夹", 139 | ) 140 | 141 | def choose_file(self): 142 | path = self.ask_path() 143 | if not path: 144 | return 145 | path = Path(path) 146 | self.label_text.set(str(path)) 147 | if self._config is not None: 148 | self._config.op_store_path(self._store_name, path) 149 | 150 | def get_path(self): 151 | return self.label_text.get() 152 | 153 | def assert_no_error(self): 154 | text = self.get_path() 155 | if not text: 156 | info( 157 | "%s 不能为空" 158 | ) 159 | raise ValueError("Value should not be null") 160 | 161 | 162 | class FilePathBrowse(FileBrowse): 163 | def ask_path(self): 164 | return filedialog.askopenfilename( 165 | title="选择csv文件", 166 | ) 167 | 168 | 169 | class ProgressBar(ttk.Progressbar): 170 | 171 | def __init__(self, master=None): 172 | super(ProgressBar, self).__init__( 173 | master=master, 174 | orient="horizontal", 175 | length=600, 176 | mode="determinate", 177 | ) 178 | self.pack(expand=1) 179 | 180 | def update_progress(self, current, maximum=None): 181 | self['value'] = current 182 | if maximum is not None: 183 | self['maximum'] = maximum 184 | 185 | def reset_progress(self): 186 | self.update_progress(0, 0) 187 | -------------------------------------------------------------------------------- /src/picktrue/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def __get_logger(name): 6 | __log_level = logging.INFO 7 | 8 | if "--debug-%s" % name in sys.argv: 9 | __log_level = logging.DEBUG 10 | 11 | fmt = "%(levelname)s - %(asctime)-15s - %(filename)s - line %(lineno)d --> %(message)s" 12 | date_fmt = "%a %d %b %Y %H:%M:%S" 13 | formatter = logging.Formatter(fmt, date_fmt) 14 | 15 | handler = logging.StreamHandler() 16 | file_handler = logging.FileHandler( 17 | "./picktrue.all.log", 18 | ) 19 | handler.setFormatter(formatter) 20 | 21 | logger = logging.getLogger(name) 22 | logger.addHandler( 23 | handler 24 | ) 25 | logger.addHandler( 26 | file_handler 27 | ) 28 | logger.setLevel(level=__log_level) 29 | return logger 30 | 31 | 32 | pk_logger = __get_logger('picktrue') 33 | 34 | 35 | __all__ = ( 36 | 'pk_logger', 37 | ) 38 | -------------------------------------------------------------------------------- /src/picktrue/meta.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import NamedTuple 3 | 4 | 5 | # requires python >= 3.6.1 6 | class ImageItem(NamedTuple): 7 | url: str 8 | name: str or callable 9 | meta: dict = None 10 | pin_meta: dict = None 11 | 12 | 13 | DownloadTaskItem = namedtuple( 14 | 'TaskItem', 15 | ( 16 | 'image', 17 | 'base_save_path', 18 | ) 19 | ) 20 | 21 | 22 | UA = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} 23 | -------------------------------------------------------------------------------- /src/picktrue/pinry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/pinry/__init__.py -------------------------------------------------------------------------------- /src/picktrue/pinry/ds.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | from collections import namedtuple 4 | from dataclasses import dataclass 5 | from typing import List 6 | 7 | 8 | @dataclass 9 | class Pin2Import: 10 | referer: str 11 | tags: list 12 | description: str 13 | board: str 14 | 15 | # only one of following item should exist and another one should be None 16 | file_abs_path: str 17 | image_url2download: str 18 | 19 | @classmethod 20 | def get_fields(cls) -> List[str]: 21 | return list(cls.__annotations__.keys()) 22 | 23 | def as_dict(self) -> dict: 24 | out = {} 25 | fields = self.get_fields() 26 | for field in fields: 27 | value = getattr(self, field) 28 | if not value: 29 | value = '' 30 | else: 31 | value = str(value) 32 | out[field] = value 33 | return out 34 | 35 | 36 | def from_csv(path='pins2import.csv') -> List[Pin2Import]: 37 | with open(path, 'r', encoding="utf-8") as csv_file: 38 | reader = csv.DictReader(csv_file, delimiter="|") 39 | rows = list(reader) 40 | for row in rows: 41 | tags = row['tags'].strip() if row['tags'] else row['tags'] 42 | if not tags: 43 | row['tags'] = [] 44 | else: 45 | if "[" in tags: 46 | row['tags'] = eval(tags) 47 | else: 48 | row['tags'] = [tags, ] 49 | row['file_abs_path'] = row['file_abs_path'] or None 50 | row['image_url2download'] = row['image_url2download'] or None 51 | return [Pin2Import(**row) for row in rows] 52 | 53 | 54 | def to_csv(pins2export: List[Pin2Import], base_path, filename='pins2import.csv'): 55 | fields_names = Pin2Import.get_fields() 56 | path = os.path.join(base_path, filename) 57 | with open(path, 'w', encoding="utf-8") as csv_file: 58 | writer = csv.DictWriter(csv_file, fieldnames=fields_names, delimiter="|") 59 | writer.writeheader() 60 | for row in pins2export: 61 | writer.writerow( 62 | row.as_dict(), 63 | ) 64 | 65 | 66 | def write_to_csv(pin2export: Pin2Import, base_path, filename='pins2import.csv'): 67 | fields_names = Pin2Import.get_fields() 68 | path = os.path.join(base_path, filename) 69 | if os.path.exists(path): 70 | mode = "a" 71 | else: 72 | mode = "w" 73 | with open(path, mode, encoding="utf-8") as csv_file: 74 | writer = csv.DictWriter(csv_file, fieldnames=fields_names, delimiter="|") 75 | if mode == "w": 76 | writer.writeheader() 77 | writer.writerow( 78 | pin2export.as_dict(), 79 | ) 80 | csv_file.flush() 81 | -------------------------------------------------------------------------------- /src/picktrue/pinry/importer.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures.thread import ThreadPoolExecutor 2 | from threading import Lock 3 | 4 | from picktrue.logger import pk_logger 5 | from picktrue.pinry.ds import from_csv 6 | from picktrue.pinry.uploader import Uploader 7 | 8 | 9 | class PinryImporter: 10 | _counter_lock = Lock() 11 | 12 | def __init__(self, base_url, username, password, min_upload_size_kb=None): 13 | self._base_url = base_url 14 | self._username = username 15 | self._password = password 16 | self.total_pins = 999 17 | self.done_pins = 0 18 | self.error_pins = 0 19 | self._started = False 20 | self._creating_boards = False 21 | self._min_upload_size_kb = None 22 | if min_upload_size_kb is not None: 23 | if int(min_upload_size_kb) != 0: 24 | self._min_upload_size_kb = min_upload_size_kb 25 | 26 | self._executor = ThreadPoolExecutor( 27 | max_workers=1, 28 | ) 29 | 30 | def test_login(self): 31 | uploader = Uploader( 32 | self._base_url, 33 | self._username, 34 | self._password, 35 | ) 36 | return uploader.login() 37 | 38 | def is_done(self): 39 | return self.done_pins + self.error_pins == self.total_pins 40 | 41 | def status_text(self): 42 | if not self._started: 43 | return "待命..." 44 | if self.is_done(): 45 | return "导入完毕,可以开始新的导入; 总量: %s,出错: %s, 已完成: %s" % ( 46 | self.total_pins, self.error_pins, self.done_pins, 47 | ) 48 | else: 49 | if self._creating_boards: 50 | return "创建画板..." 51 | else: 52 | return "执行中,等待更新;总量: %s,出错: %s, 已完成: %s" % ( 53 | self.total_pins, self.error_pins, self.done_pins, 54 | ) 55 | 56 | def create_single_pin(self, uploader, pin): 57 | try: 58 | if pin.image_url2download is not None: 59 | uploader.create( 60 | pin.description, 61 | pin.referer, 62 | pin.image_url2download, 63 | board_name=pin.board, 64 | tags=pin.tags, 65 | ) 66 | elif pin.file_abs_path is not None: 67 | uploader.create_with_file_upload( 68 | pin.description, 69 | pin.referer, 70 | file_path=pin.file_abs_path, 71 | board_name=pin.board, 72 | tags=pin.tags, 73 | ) 74 | except ValueError: 75 | pk_logger.exception( 76 | "Failed to to pin creation:", 77 | ) 78 | with self._counter_lock: 79 | self.error_pins += 1 80 | else: 81 | with self._counter_lock: 82 | self.done_pins += 1 83 | 84 | def do_import(self, file_path): 85 | uploader = Uploader( 86 | self._base_url, 87 | self._username, 88 | self._password, 89 | login=True, 90 | min_upload_size_kb=self._min_upload_size_kb, 91 | ) 92 | pins = from_csv(file_path) 93 | self._started = True 94 | self._creating_boards = True 95 | uploader.create_boards( 96 | set([pin.board for pin in pins]) 97 | ) 98 | self._creating_boards = False 99 | self.total_pins = len(pins) 100 | jobs = [] 101 | for pin in pins: 102 | job = self._executor.submit( 103 | self.create_single_pin, 104 | uploader, 105 | pin, 106 | ) 107 | jobs.append(job) 108 | self._executor.shutdown(wait=True) 109 | 110 | 111 | -------------------------------------------------------------------------------- /src/picktrue/pinry/uploader.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | from urllib.parse import urljoin 4 | 5 | import requests 6 | 7 | from picktrue.logger import pk_logger 8 | from picktrue.utils import get_file_size_kb 9 | 10 | 11 | class Uploader: 12 | def __init__(self, pinry_url, username, password, login=False, min_upload_size_kb=None): 13 | """ 14 | @:param: pinry_url, like https://pin.xxx.com/ 15 | """ 16 | self.pinry_url = pinry_url 17 | self._api_prefix = urljoin(pinry_url, '/api/v2/') 18 | self._login_url = urljoin(self._api_prefix, 'profile/login/') 19 | self._pin_creation_url = urljoin(self._api_prefix, 'pins/') 20 | self._image_creation_url = urljoin(self._api_prefix, 'images/') 21 | self._board_add_url = urljoin(self._api_prefix, 'boards/') 22 | self._board_list_url = urljoin(self._api_prefix, 'boards-auto-complete/') 23 | self._min_upload_size_kb = min_upload_size_kb 24 | self._cached_boards = None 25 | 26 | self.session = requests.session() 27 | self._username = username 28 | self._password = password 29 | if login: 30 | self.login() 31 | 32 | def _get_board_url(self, board_name): 33 | board_id = self._get_board_id(board_name) 34 | return f'{self._board_add_url}{board_id}/' 35 | 36 | def _get_board_id(self, board_name): 37 | return self.boards[board_name] 38 | 39 | def create_boards(self, board_names: set): 40 | for name in board_names: 41 | self.post(self._board_add_url, json={"name": name}) 42 | 43 | @property 44 | def boards(self): 45 | if self._cached_boards is not None: 46 | return self._cached_boards 47 | data = self.session.get(self._board_list_url).json() 48 | self._cached_boards = {} 49 | for board in data: 50 | self._cached_boards[board['name']] = board['id'] 51 | return self._cached_boards 52 | 53 | def _get_csrf_token(self): 54 | csrf_token = self.session.cookies.get('csrftoken') 55 | if not csrf_token: 56 | self.session.get(self._api_prefix) 57 | csrf_token = self.session.cookies.get('csrftoken') 58 | headers = { 59 | 'X-CSRFToken': csrf_token, 60 | } 61 | return headers 62 | 63 | def patch(self, url, json=None): 64 | headers = self._get_csrf_token() 65 | return self.session.patch( 66 | url=url, 67 | json=json, 68 | headers=headers, 69 | ) 70 | 71 | def post(self, url, json=None, files=None): 72 | headers = self._get_csrf_token() 73 | if files is None: 74 | return self.session.post( 75 | url=url, 76 | json=json, 77 | headers=headers, 78 | ) 79 | else: 80 | return self.session.post( 81 | url=url, 82 | headers=headers, 83 | files=files, 84 | ) 85 | 86 | def login(self): 87 | data = { 88 | 'username': self._username, 89 | 'password': self._password, 90 | } 91 | resp = self.post(url=self._login_url, json=data) 92 | return resp.status_code == 200 93 | 94 | def _upload_image(self, file_path): 95 | if not os.path.exists(file_path): 96 | raise ValueError( 97 | "Failed to upload image [%s]: not found" % file_path 98 | ) 99 | if self._min_upload_size_kb is not None: 100 | if get_file_size_kb(file_path) < self._min_upload_size_kb: 101 | raise ValueError( 102 | "Failed to upload image[%s]: size too small" % file_path 103 | ) 104 | resp = self.post( 105 | self._image_creation_url, 106 | files={"image": open(file_path, "rb")}, 107 | ) 108 | if resp.status_code != 201: 109 | raise ValueError( 110 | "Failed to upload image [%s]: %s" % ( 111 | file_path, 112 | resp.json(), 113 | ) 114 | ) 115 | return resp.json()['id'] 116 | 117 | def _create_pin(self, data, board_name): 118 | board_url = self._get_board_url(board_name) 119 | resp = self.post( 120 | url=self._pin_creation_url, 121 | json=data, 122 | ) 123 | if resp.status_code != 201: 124 | raise ValueError("Failed to create pin %s, %s" % (data, resp.content)) 125 | pin = resp.json() 126 | pin_id = pin['id'] 127 | resp = self.patch( 128 | url=board_url, 129 | json={'pins_to_add': [pin_id, ]} 130 | ) 131 | if resp.status_code != 200: 132 | pk_logger.error( 133 | "Failed to add pin to board: %s, %s" % (board_name, pin) 134 | ) 135 | 136 | def create_with_file_upload(self, description, referer, file_path, board_name, tags): 137 | image_id = self._upload_image(file_path) 138 | data = dict( 139 | description=description, 140 | referer=referer, 141 | tags=tags, 142 | image_by_id=image_id, 143 | ) 144 | return self._create_pin( 145 | data, 146 | board_name, 147 | ) 148 | 149 | def create(self, description, referer, url, board_name, tags): 150 | data = dict( 151 | description=description, 152 | referer=referer, 153 | url=url, 154 | tags=tags, 155 | ) 156 | return self._create_pin( 157 | data, 158 | board_name, 159 | ) 160 | -------------------------------------------------------------------------------- /src/picktrue/rpc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/rpc/__init__.py -------------------------------------------------------------------------------- /src/picktrue/rpc/channel.py: -------------------------------------------------------------------------------- 1 | import json 2 | import queue 3 | import time 4 | from queue import Queue 5 | from threading import Lock 6 | 7 | from picktrue.utils import run_as_thread 8 | 9 | 10 | class BrowserRequester: 11 | def __init__(self): 12 | self.recv_queue = Queue() 13 | self.send_queue = Queue() 14 | self._t = run_as_thread(self.start_recv) 15 | self._lock_registry = {} 16 | self._ret_registry = {} 17 | 18 | def start_recv(self): 19 | while True: 20 | raw = self.recv_queue.get() 21 | ret_meta = json.loads(raw) 22 | url = ret_meta['request_url'] 23 | data = ret_meta['response'] 24 | self._ret_registry[url] = data 25 | self._lock_registry[url].release() 26 | 27 | def get_request(self, timeout=None): 28 | if timeout is not None: 29 | try: 30 | return self.send_queue.get( 31 | timeout=timeout 32 | ) 33 | except queue.Empty: 34 | return None 35 | return self.send_queue.get() 36 | 37 | def send_and_wait(self, url, timeout=None, max_retry=0): 38 | retried = 0 39 | while True: 40 | self.send_request(url) 41 | ret = self.get_response(url, timeout=timeout) 42 | if ret is None: 43 | retried += 1 44 | time.sleep(5) 45 | else: 46 | return ret 47 | if retried > max_retry: 48 | raise ValueError("Failed to get url: %s" % url) 49 | 50 | def send_request(self, url): 51 | self._lock_registry[url] = Lock() 52 | self._lock_registry[url].acquire() 53 | self.send_queue.put(url) 54 | 55 | def submit_response(self, resp): 56 | self.recv_queue.put(resp) 57 | 58 | def get_response(self, url, timeout=None): 59 | if timeout is None: 60 | got = self._lock_registry[url].acquire() 61 | else: 62 | got = self._lock_registry[url].acquire(timeout=timeout) 63 | if got: 64 | ret = self._ret_registry[url] 65 | del self._ret_registry[url] 66 | del self._lock_registry[url] 67 | return ret 68 | else: 69 | return None 70 | -------------------------------------------------------------------------------- /src/picktrue/rpc/taskserver.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from threading import Thread 4 | 5 | from flask import Flask, jsonify 6 | from flask import request 7 | 8 | from picktrue.rpc.channel import BrowserRequester 9 | 10 | app = Flask(__name__) 11 | 12 | 13 | __all__ = [ 14 | "server", 15 | ] 16 | 17 | 18 | class TaskServer: 19 | def __init__(self): 20 | self.requester = BrowserRequester() 21 | self._thread = None 22 | 23 | def request(self, url): 24 | return self.requester.send_and_wait(url) 25 | 26 | def log_received(self): 27 | while True: 28 | resp = self.request("https://www.artstation.com/users/braveking/projects.json?page=1") 29 | print("resp received", resp) 30 | 31 | def start_debug_task(self): 32 | t = Thread(target=self.log_received) 33 | t.setDaemon(True) 34 | t.start() 35 | 36 | def is_running(self): 37 | if self._thread is None: 38 | return False 39 | if not self._thread.is_alive(): 40 | return False 41 | return True 42 | 43 | def start(self): 44 | if self.is_running(): 45 | return False 46 | 47 | def run(): 48 | app.run(debug=True, port=2333, use_reloader=False) 49 | app.logger.setLevel(logging.WARNING) 50 | 51 | self._thread = Thread(target=run) 52 | self._thread.setDaemon(True) 53 | self._thread.start() 54 | 55 | 56 | server = TaskServer() 57 | 58 | 59 | @app.route("/tasks/") 60 | def get_task(): 61 | task = server.requester.get_request(10) 62 | if task is None: 63 | return jsonify([]) 64 | else: 65 | return jsonify([task, ]) 66 | 67 | 68 | @app.route("/tasks/submit/", methods=["POST", "GET"]) 69 | def task_submit(): 70 | """ 71 | :return: 72 | """ 73 | resp = request.data 74 | server.requester.submit_response( 75 | resp 76 | ) 77 | return jsonify({}) 78 | 79 | 80 | class BrowserMetaFetcher: 81 | server = server 82 | 83 | def __init__(self): 84 | log = logging.getLogger('werkzeug') 85 | log.setLevel(logging.ERROR) 86 | self.server.start() 87 | 88 | def request_url(self, url): 89 | text = self.server.requester.send_and_wait(url, timeout=10, max_retry=3) 90 | try: 91 | return json.loads(text) 92 | except json.JSONDecodeError: 93 | return text 94 | 95 | 96 | if __name__ == '__main__': 97 | server.start() 98 | # server.start_debug_task() 99 | import pdb;pdb.set_trace() 100 | -------------------------------------------------------------------------------- /src/picktrue/sites/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/sites/__init__.py -------------------------------------------------------------------------------- /src/picktrue/sites/abstract.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import requests 5 | 6 | from picktrue.meta import UA, ImageItem 7 | from picktrue.utils import retry 8 | 9 | 10 | def normalize_proxy_string(proxy): 11 | if 'socks5' in proxy: 12 | if 'socks5h' not in proxy: 13 | proxy = proxy.replace('socks5', 'socks5h') 14 | return proxy 15 | 16 | 17 | def get_proxy(proxy_string=None): 18 | if proxy_string is None: 19 | return {} 20 | proxy = normalize_proxy_string(proxy_string) 21 | proxies = { 22 | 'proxies': { 23 | 'http': proxy, 24 | 'https': proxy, 25 | } 26 | } 27 | return proxies 28 | 29 | 30 | class DummySite: 31 | 32 | @property 33 | def dir_name(self): 34 | raise NotImplementedError() 35 | 36 | @property 37 | def fetcher(self): 38 | raise NotImplementedError() 39 | 40 | @property 41 | def tasks(self): 42 | raise NotImplementedError() 43 | 44 | 45 | class DummyFetcher: 46 | 47 | def __init__(self, proxies=None): 48 | self.session = requests.session() 49 | if proxies is not None: 50 | self.session.proxies = proxies 51 | self.session.headers.update(UA) 52 | 53 | @staticmethod 54 | def _safe_name(name): 55 | name = name.replace("/", " ") 56 | name = name.replace("\\", " ") 57 | name = name.strip() 58 | name = name.replace(" ", '-') 59 | return name 60 | 61 | @staticmethod 62 | def _safe_path(path): 63 | return Path(path).absolute() 64 | 65 | @retry() 66 | def get(self, url, **kwargs): 67 | """ 68 | :rtype: requests.Response 69 | """ 70 | if 'timeout' in kwargs: 71 | kwargs.pop('timeout') 72 | return self.session.get(url, timeout=(2, 30), **kwargs) 73 | 74 | def get_save_path(self, base_path, image_name, image: ImageItem): 75 | save_path = os.path.join( 76 | base_path, 77 | image_name, 78 | ) 79 | return save_path 80 | 81 | def save(self, content, task_item): 82 | """ 83 | :type content: bytearray 84 | :type task_item: picktrue.meta.TaskItem 85 | """ 86 | image = task_item.image 87 | image_name = image.name 88 | if callable(image.name): 89 | image_name = image.name(image.url, content) 90 | 91 | save_path = self.get_save_path( 92 | task_item.base_save_path, 93 | image_name, 94 | image, 95 | ) 96 | save_path = self._safe_path(save_path) 97 | if os.path.exists(save_path): 98 | return 99 | with open(save_path, "wb") as f: 100 | f.write(content) 101 | f.flush() 102 | -------------------------------------------------------------------------------- /src/picktrue/sites/artstation.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple, Counter 2 | import hashlib 3 | import json 4 | import os 5 | import re 6 | import time 7 | from urllib.parse import urljoin 8 | 9 | import requests 10 | 11 | from picktrue.meta import ImageItem, UA 12 | from picktrue.rpc.taskserver import server 13 | from picktrue.sites.abstract import DummySite, DummyFetcher, get_proxy 14 | from picktrue.sites.utils import get_name_with_hash_from_url 15 | 16 | BASE_URL = "https://www.artstation.com/" 17 | PROJECT_URL_TPL = '/users/{username}/projects.json?page={page}' 18 | ALBUMS_URL_TPL = 'https://www.artstation.com/albums.json?' \ 19 | 'include_total_count=true&page={page}' \ 20 | '&per_page=25&user_id={user_id}' 21 | ALBUM_CONTENT_URL_TPL = 'https://www.artstation.com/users/{username}' \ 22 | '/projects.json?album_id={album_id}&page={page}' 23 | DETAIL_URL_TPL = '/projects/{hash_id}.json' 24 | 25 | Album = namedtuple( 26 | "Album", 27 | ( 28 | "name", 29 | "id", 30 | ) 31 | ) 32 | 33 | 34 | def parse_single_artwork(artwork_dict: dict): 35 | """ 36 | { 37 | "liked":false, 38 | "tags":[ 39 | 40 | ], 41 | "hide_as_adult":false, 42 | "visible_on_artstation":true, 43 | "assets":[ 44 | { 45 | "has_image":true, 46 | "has_embedded_player":false, 47 | "player_embedded":null, 48 | "oembed":null, 49 | "id":12260469, 50 | "title_formatted":"", 51 | "image_url":"https://cdnb.artstation.com/p/assets/images/images/012/260/469/large/ham-sung-choul-braveking-180809-1-mini.jpg?1533864344", 52 | "width":1300, 53 | "height":2434, 54 | "position":0, 55 | "asset_type":"image", 56 | "viewport_constraint_type":"constrained" 57 | }, 58 | { 59 | "has_image":false, 60 | "has_embedded_player":false, 61 | "player_embedded":null, 62 | "oembed":null, 63 | "id":12260473, 64 | "title_formatted":"", 65 | "image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/260/473/large/ham-sung-choul-braveking-180809-1-mini-2.jpg?1533864353", 66 | "width":822, 67 | "height":822, 68 | "position":1, 69 | "asset_type":"cover", 70 | "viewport_constraint_type":"constrained" 71 | } 72 | ], 73 | "collections":[ 74 | 75 | ], 76 | "user":{ 77 | "followed":true, 78 | "following_back":false, 79 | "blocked":false, 80 | "is_staff":false, 81 | "id":199106, 82 | "username":"braveking", 83 | "headline":"freelance artist", 84 | "full_name":"Ham Sung-Choul(braveking)", 85 | "permalink":"https://www.artstation.com/braveking", 86 | "medium_avatar_url":"https://cdna.artstation.com/p/users/avatars/000/199/106/medium/ab27ac7f48de117074c14963a3371914.jpg?1461412259", 87 | "large_avatar_url":"https://cdna.artstation.com/p/users/avatars/000/199/106/large/ab27ac7f48de117074c14963a3371914.jpg?1461412259", 88 | "small_cover_url":"https://cdn.artstation.com/static_media/placeholders/user/cover/default.jpg", 89 | "pro_member":false 90 | }, 91 | "medium":null, 92 | "categories":[ 93 | { 94 | "name":"Characters", 95 | "id":1 96 | }, 97 | { 98 | "name":"Fantasy", 99 | "id":2 100 | }, 101 | { 102 | "name":"Concept Art", 103 | "id":3 104 | } 105 | ], 106 | "software_items":[ 107 | 108 | ], 109 | "id":3513664, 110 | "user_id":199106, 111 | "title":"doodle", 112 | "description":"

", 113 | "description_html":"

", 114 | "created_at":"2018-08-09T07:50:11.347-05:00", 115 | "updated_at":"2018-08-10T01:55:50.964-05:00", 116 | "views_count":3257, 117 | "likes_count":699, 118 | "comments_count":1, 119 | "permalink":"https://www.artstation.com/artwork/mr5aZ", 120 | "cover_url":"https://cdnb.artstation.com/p/assets/covers/images/012/260/473/medium/ham-sung-choul-braveking-180809-1-mini-2.jpg?1533864353", 121 | "published_at":"2018-08-09T07:50:19.308-05:00", 122 | "editor_pick":true, 123 | "adult_content":false, 124 | "admin_adult_content":false, 125 | "slug":"doodle-184-a5ea10f5-e98e-46e2-866e-63ae54fd443a", 126 | "suppressed":false, 127 | "hash_id":"mr5aZ", 128 | "visible":true 129 | } 130 | :rtype: list[ImageItem] 131 | """ 132 | assets = artwork_dict['assets'] 133 | assets = [ 134 | asset for asset in assets 135 | if asset['has_image'] 136 | ] 137 | images = ( 138 | ImageItem( 139 | url=asset['image_url'], 140 | name=get_name_with_hash_from_url, 141 | ) 142 | for asset in assets 143 | ) 144 | return images 145 | 146 | 147 | def parse_artwork_url(item_dict): 148 | """ 149 | { 150 | "data": 151 | [ 152 | { 153 | "id":3497866, 154 | "user_id":199106, 155 | "title":"doodle", 156 | "description":"", 157 | "created_at":"2018-08-06T04:23:20.695-05:00", 158 | "updated_at":"2018-08-10T01:39:27.162-05:00", 159 | "likes_count":340, 160 | "slug":"doodle-184-669828ca-6a1b-4fc7-986d-e4eeaa4b5d55", 161 | "published_at":"2018-08-06T04:24:58.518-05:00", 162 | "adult_content":false, 163 | "cover_asset_id":12192935, 164 | "admin_adult_content":false, 165 | "hash_id":"KnrbX", 166 | "permalink":"https://www.artstation.com/artwork/KnrbX", 167 | "hide_as_adult":false, 168 | "cover":{ 169 | "id":12192935, 170 | "small_image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/small/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474", 171 | "medium_image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/medium/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474", 172 | "small_square_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/small_square/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474", 173 | "thumb_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/smaller_square/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474", 174 | "micro_square_image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/micro_square/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474", 175 | "aspect":1 176 | }, 177 | "icons":{ 178 | "image":false, 179 | "video":false, 180 | "model3d":false, 181 | "marmoset":false, 182 | "pano":false 183 | }, 184 | "assets_count":1 185 | }, 186 | ], 187 | "total_count":38 188 | } 189 | """ 190 | url = urljoin( 191 | BASE_URL, 192 | DETAIL_URL_TPL.format( 193 | hash_id=item_dict['hash_id'] 194 | ) 195 | ) 196 | return url 197 | 198 | 199 | def get_project_page_url(username, page=1): 200 | path = PROJECT_URL_TPL.format( 201 | username=username, 202 | page=page, 203 | ) 204 | url = urljoin(BASE_URL, path) 205 | return url 206 | 207 | 208 | def get_project_albums_page_url(user_id, page=1): 209 | path = ALBUMS_URL_TPL.format( 210 | user_id=user_id, 211 | page=page, 212 | ) 213 | url = urljoin(BASE_URL, path) 214 | return url 215 | 216 | 217 | def get_project_albums_details_page_url(username, album_id, page=1): 218 | path = ALBUM_CONTENT_URL_TPL.format( 219 | username=username, 220 | album_id=album_id, 221 | page=page, 222 | ) 223 | url = urljoin(BASE_URL, path) 224 | return url 225 | 226 | 227 | def has_next_page(current_count, total_count): 228 | return current_count < total_count 229 | 230 | 231 | class BaseMetaFetcher: 232 | def request_url(self, url): 233 | raise NotImplementedError 234 | 235 | def get_artwork_summery(self, summary_url): 236 | return self.request_url(summary_url) 237 | 238 | def get_albums_index_page(self, user_id): 239 | page = 1 240 | current_count = 0 241 | total_count = 1 242 | while current_count < total_count: 243 | url = get_project_albums_page_url( 244 | user_id=user_id, 245 | page=page, 246 | ) 247 | resp = self.request_url(url) 248 | assert 'total_count' in resp 249 | total_count = resp['total_count'] 250 | for album_detail in resp['data']: 251 | yield Album( 252 | id=album_detail['id'], 253 | name=album_detail['title'], 254 | ) 255 | current_count = len(resp['data']) 256 | page += 1 257 | 258 | def get_album_projects_single_page(self, username, album_id, page): 259 | initial_url = get_project_albums_details_page_url( 260 | username=username, 261 | album_id=album_id, 262 | page=page, 263 | ) 264 | resp = self.request_url(initial_url) 265 | assert 'total_count' in resp 266 | total_count = resp['total_count'] 267 | data_count = len(resp['data']) 268 | return total_count, data_count, resp['data'] 269 | 270 | def get_projects_single_page(self, username, page): 271 | initial_url = get_project_page_url( 272 | username=username, 273 | page=page, 274 | ) 275 | resp = self.request_url(initial_url) 276 | assert 'total_count' in resp 277 | total_count = resp['total_count'] 278 | data_count = len(resp['data']) 279 | return total_count, data_count, resp['data'] 280 | 281 | 282 | class LocalMetaFetcher(BaseMetaFetcher): 283 | def __init__(self, proxies): 284 | self._proxies = proxies 285 | 286 | def request_url(self, url): 287 | resp = requests.get(url, headers=UA, proxies=self._proxies) 288 | return resp.json() 289 | 290 | 291 | class BrowserMetaFetcher(BaseMetaFetcher): 292 | server = server 293 | 294 | def __init__(self): 295 | self.server.start() 296 | 297 | def request_url(self, url): 298 | text = self.server.requester.send_and_wait(url) 299 | try: 300 | return json.loads(text) 301 | except json.JSONDecodeError: 302 | return text 303 | 304 | 305 | class TaskMaker: 306 | def __init__(self, user_url, username, meta_fetcher: BaseMetaFetcher): 307 | self.user_url = user_url 308 | self.username = username 309 | self.meta = meta_fetcher 310 | self.user_id = None 311 | 312 | @staticmethod 313 | def _get_repeated_uid(user_ids): 314 | counter = Counter(user_ids) 315 | top_uid = counter.most_common(1) 316 | return top_uid[0][0] 317 | 318 | def get_user_id(self, user_url): 319 | resp = self.meta.request_url(user_url) 320 | user_ids = re.findall(r"user_id.*?(\d+)", resp) 321 | return self._get_repeated_uid(user_ids) 322 | 323 | def __call__(self, *args, **kwargs): 324 | self.user_id = self.get_user_id(user_url=self.user_url) 325 | yield from self._gen_tasks() 326 | 327 | def _get_image_item_from_detail(self, artwork_summary): 328 | summary_url = parse_artwork_url(artwork_summary) 329 | resp = self.meta.get_artwork_summery(summary_url) 330 | return parse_single_artwork(resp) 331 | 332 | def _yield_image_items(self, data, album_name=None): 333 | for summary in data: 334 | for image_item in self._get_image_item_from_detail( 335 | summary, 336 | ): 337 | if album_name is not None: 338 | image_item = ImageItem( 339 | url=image_item.url, 340 | name=image_item.name, 341 | meta={"album_name": album_name}, 342 | ) 343 | yield image_item 344 | 345 | def _gen_tasks_from_root(self): 346 | page = 1 347 | total_count, current_count, data = self.meta.get_projects_single_page( 348 | self.username, 349 | page, 350 | ) 351 | yield from self._yield_image_items(data) 352 | while has_next_page(current_count, total_count): 353 | page += 1 354 | _, count_delta, data = self.meta.get_projects_single_page( 355 | self.username, 356 | page=page, 357 | ) 358 | current_count += count_delta 359 | yield from self._yield_image_items(data) 360 | time.sleep(0.2) 361 | 362 | def _gen_tasks_from_albums(self): 363 | for index, album in enumerate(self.meta.get_albums_index_page(user_id=self.user_id)): 364 | page = 1 365 | current_count = 0 366 | total_count = 1 367 | while has_next_page(current_count, total_count): 368 | total_count, count_delta, data = self.meta.get_album_projects_single_page( 369 | self.username, 370 | album.id, 371 | page, 372 | ) 373 | current_count += count_delta 374 | yield from self._yield_image_items(data, album_name=album.name) 375 | time.sleep(0.2) 376 | page += 1 377 | 378 | def _gen_tasks(self): 379 | yield from self._gen_tasks_from_root() 380 | yield from self._gen_tasks_from_albums() 381 | 382 | 383 | class ArtStationFetcher(DummyFetcher): 384 | """ 385 | New url to test album download: https://www.artstation.com/bvdhorst 386 | """ 387 | 388 | def save(self, content, task_item): 389 | if task_item.image.meta is None: 390 | return super(ArtStationFetcher, self).save(content, task_item) 391 | image = task_item.image 392 | if image.meta is not None: 393 | escaped_name = self._safe_name(image.meta['album_name']) 394 | save_path = os.path.join( 395 | task_item.base_save_path, 396 | escaped_name, 397 | ) 398 | os.makedirs(save_path, exist_ok=True) 399 | else: 400 | save_path = task_item.base_save_path 401 | save_path = self._safe_path(save_path) 402 | if callable(image.name): 403 | image_name = image.name(image.url, content) 404 | else: 405 | image_name = image.name 406 | save_path = os.path.join( 407 | save_path, 408 | image_name, 409 | ) 410 | with open(save_path, "wb") as f: 411 | f.write(content) 412 | 413 | 414 | class ArtStation(DummySite): 415 | """ 416 | >>> art = ArtStation("https://www.artstation.com/braveking") 417 | >>> len(list(art.tasks)) > 0 418 | True 419 | """ 420 | 421 | def __init__(self, user_url: str, proxy=None): 422 | self._tasks = None 423 | self.url = user_url 424 | assert user_url.startswith(BASE_URL) 425 | self.username = user_url.replace(BASE_URL, '') 426 | self._proxies = get_proxy(proxy) 427 | self._fetcher = ArtStationFetcher(**self._proxies) 428 | self._task_maker = TaskMaker( 429 | user_url=user_url, 430 | username=self.username, 431 | meta_fetcher=BrowserMetaFetcher(), 432 | ) 433 | 434 | @property 435 | def fetcher(self): 436 | return self._fetcher 437 | 438 | @property 439 | def dir_name(self): 440 | return self.username 441 | 442 | @property 443 | def tasks(self): 444 | yield from self._task_maker() 445 | 446 | 447 | if __name__ == "__main__": 448 | import doctest 449 | doctest.testmod() 450 | -------------------------------------------------------------------------------- /src/picktrue/sites/douban.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from pyquery import PyQuery as PQ 3 | 4 | from picktrue.meta import ImageItem, UA 5 | from picktrue.sites.abstract import DummySite, DummyFetcher 6 | from picktrue.sites.utils import get_filename_fom_url 7 | 8 | ALBUM_URL_TPL = "https://www.douban.com/photos/album/{album_id}/" 9 | 10 | 11 | def _get_large_img_url(small_img_url): 12 | return small_img_url.replace("/m/", "/l/") 13 | 14 | 15 | def _get_album_url(album_id, m_start=None): 16 | url = ALBUM_URL_TPL.format(album_id=album_id) 17 | if m_start is not None: 18 | url = url + "?m_start=%s" % m_start 19 | return url 20 | 21 | 22 | def _parse_page(page_html): 23 | pq = PQ(page_html) 24 | images = pq(".photolst_photo img") 25 | images = [PQ(img).attr("src") for img in images] 26 | images = [_get_large_img_url(img_url) for img_url in images] 27 | return images 28 | 29 | 30 | def _get_album_id_form_init_url(url): 31 | return url.split("/")[-2] 32 | 33 | 34 | def parse_page(page_html, previous_m_start=None): 35 | new_m_start = previous_m_start or 0 36 | images = _parse_page(page_html) 37 | new_m_start += len(images) 38 | has_next = len(images) >= 18 39 | return images, has_next, new_m_start 40 | 41 | 42 | def get_images(album_home_url, album_id): 43 | has_next = True 44 | session = requests.Session() 45 | session.headers.update(UA) 46 | album_url = album_home_url 47 | m_start = None 48 | 49 | while has_next: 50 | resp = session.get( 51 | url=album_url, 52 | ) 53 | if resp.status_code != 200: 54 | raise ValueError( 55 | "Failed to fetch douban meta info, code: %s" % resp.status_code 56 | ) 57 | images, has_next, m_start = parse_page( 58 | resp.text, 59 | m_start, 60 | ) 61 | album_url = _get_album_url(album_id, m_start) 62 | for image_url in images: 63 | yield image_url 64 | 65 | 66 | class DoubanPersonalAlbum(DummySite): 67 | 68 | fetcher = DummyFetcher() 69 | 70 | def __init__(self, album_url): 71 | self.base_url = album_url 72 | self.album_id = _get_album_id_form_init_url(album_url) 73 | 74 | @property 75 | def dir_name(self): 76 | return self.album_id 77 | 78 | @property 79 | def tasks(self): 80 | for image_url in get_images(self.base_url, self.album_id): 81 | yield ImageItem( 82 | url=image_url, 83 | name=get_filename_fom_url(image_url), 84 | ) 85 | -------------------------------------------------------------------------------- /src/picktrue/sites/huaban.py: -------------------------------------------------------------------------------- 1 | import re 2 | from json import JSONDecodeError 3 | from pprint import pformat, pprint 4 | 5 | import os 6 | 7 | import random 8 | import string 9 | from collections import namedtuple 10 | from urllib.parse import urljoin 11 | 12 | from picktrue.logger import pk_logger 13 | from picktrue.meta import ImageItem, DownloadTaskItem 14 | from picktrue.pinry.ds import Pin2Import, write_to_csv 15 | from picktrue.sites.abstract import DummySite, DummyFetcher 16 | from picktrue.sites.utils import safe_file_name 17 | from picktrue.utils import retry 18 | 19 | IMAGE_URL_TPL = "http://img.hb.aicdn.com/{file_key}" 20 | BASE_URL = "https://huaban.com" 21 | 22 | XHR_HEADERS = { 23 | "X-Requested-With": "XMLHttpRequest", 24 | "User-Agent": 25 | "Mozilla/5.0 (Windows NT 10.0; WOW64) " 26 | "AppleWebKit/537.36 (KHTML, like Gecko) " 27 | "Chrome/56.0.2924.87 Safari/537.36", 28 | } 29 | 30 | 31 | Pin = namedtuple( 32 | 'Pin', 33 | ( 34 | 'url', 35 | 'filename', 36 | ) 37 | ) 38 | 39 | 40 | class HuaBanFetcher(DummyFetcher): 41 | 42 | def __init__(self): 43 | super(HuaBanFetcher, self).__init__() 44 | self.session.headers.update( 45 | XHR_HEADERS, 46 | ) 47 | 48 | @classmethod 49 | def get_huaban_save_path(cls, task_item): 50 | board_name = cls._safe_name(task_item.image.meta['board_name']) 51 | save_path = os.path.join( 52 | task_item.base_save_path, 53 | board_name, 54 | ) 55 | cls.ensure_dir(dir_path=save_path) 56 | save_path = os.path.join( 57 | save_path, 58 | task_item.image.name, 59 | ) 60 | save_path = cls._safe_path(save_path) 61 | return save_path 62 | 63 | @retry() 64 | def get(self, url, require_json=False, **kwargs): 65 | """ 66 | :param require_json: If require_json is True and 67 | the result is not json-encoded, will raise error 68 | then have a retry. 69 | :rtype: requests.Response 70 | """ 71 | if 'timeout' in kwargs: 72 | kwargs.pop('timeout') 73 | resp = self.session.get(url, timeout=(2, 30), **kwargs) 74 | if require_json: 75 | try: 76 | resp.json() 77 | except JSONDecodeError: 78 | pk_logger.error( 79 | "Failed to convert resp to json for url {}: {}".format( 80 | url, 81 | resp.text, 82 | ) 83 | ) 84 | raise 85 | return resp 86 | 87 | @staticmethod 88 | def ensure_dir(dir_path): 89 | return os.makedirs(dir_path, exist_ok=True) 90 | 91 | def save(self, content, task_item: DownloadTaskItem): 92 | """ 93 | :type content: bytearray 94 | :type task_item: picktrue.meta.TaskItem 95 | """ 96 | if task_item.image.meta is None: 97 | return super(HuaBanFetcher, self).save(content, task_item) 98 | save_path = self.get_huaban_save_path(task_item) 99 | with open(save_path, "wb") as f: 100 | f.write(content) 101 | pin2import = mk_pin2import(task_item) 102 | if pin2import: 103 | write_to_csv(pin2import, base_path=task_item.base_save_path) 104 | 105 | 106 | def _random_string(length): 107 | return ''.join( 108 | random.choice(string.ascii_lowercase + string.digits) 109 | for _ in range(length) 110 | ) 111 | 112 | 113 | def _get_file_ext(mime_type): 114 | return mime_type.split("/")[-1] 115 | 116 | 117 | def get_pins(pins_meta): 118 | pins = [] 119 | for info in pins_meta: 120 | ext = _get_file_ext(info['file']['type']) 121 | file_name = "%s.%s" % (info['pin_id'], ext) 122 | meta = { 123 | "pin_id": info['pin_id'], 124 | "url": IMAGE_URL_TPL.format(file_key=info['file']['key']), 125 | 'type': info['file']['type'], 126 | 'ext': ext, 127 | "title": info['raw_text'], 128 | "link": info['link'], 129 | "source": info['source'], 130 | "file_name": file_name, 131 | "tags": info['tags'], 132 | } 133 | pins.append(meta) 134 | return pins 135 | 136 | 137 | def get_boards(boards_meta): 138 | boards = [] 139 | for board in boards_meta: 140 | meta = { 141 | "board_id": board['board_id'], 142 | "title": board['title'], 143 | "pins": None, 144 | "pin_count": board['pin_count'], 145 | "dir_name": safe_file_name(board['title']), 146 | } 147 | boards.append(meta) 148 | return boards 149 | 150 | 151 | def mk_pin2import(task_item: DownloadTaskItem) -> Pin2Import or None: 152 | if task_item.image.meta is None: 153 | return None 154 | meta = task_item.image.pin_meta 155 | return Pin2Import( 156 | referer=meta['link'], 157 | tags=meta['tags'], 158 | description=meta['title'], 159 | board=task_item.image.meta['board_name'], 160 | file_abs_path=HuaBanFetcher.get_huaban_save_path(task_item), 161 | image_url2download="", 162 | ) 163 | 164 | 165 | class Board(object): 166 | 167 | BOARD_API_BASE = "https://api.huaban.com/boards/{board_id}/pins?limit=20" 168 | 169 | def __init__(self, board_url_or_id): 170 | board_id = str(board_url_or_id) 171 | self.fetcher = HuaBanFetcher() 172 | if "http" in board_id: 173 | board_id = re.findall(r'boards/(\d+)', board_id)[0] 174 | self.id = board_id 175 | self.base_url = self.BOARD_API_BASE.format(board_id=board_id) 176 | self.further_pin_url_tpl = self.base_url + "&max={pin_id}" 177 | 178 | # uninitialized properties 179 | self.pin_count = None 180 | self.title = None 181 | self.description = None 182 | self._pins = [] 183 | self._init_board() 184 | 185 | def _fetch_home(self): 186 | resp = self.fetcher.get( 187 | self.base_url, 188 | require_json=True, 189 | ) 190 | resp = resp.json() 191 | board = resp['board'] 192 | self.pin_count = board['pin_count'] 193 | self.title = board['title'] 194 | self.description = board['description'] 195 | return get_pins(resp['pins']) 196 | 197 | _init_board = _fetch_home 198 | 199 | def _fetch_further(self, prev_pins): 200 | if len(prev_pins) == 0: 201 | info = ( 202 | "prev_pins should not be [], " 203 | "board: %s, " 204 | "url: %s, " 205 | "pin_count: %s, " 206 | "current_pins: %s, " 207 | ) 208 | pk_logger.error( 209 | info% ( 210 | self.title, 211 | self.base_url, 212 | self.pin_count, 213 | pformat(self._pins), 214 | ) 215 | ) 216 | return [] 217 | max_id = prev_pins[-1]['pin_id'] 218 | further_url = self.further_pin_url_tpl.format( 219 | pin_id=max_id, 220 | ) 221 | 222 | resp = self.fetcher.get( 223 | further_url, 224 | require_json=True, 225 | ) 226 | content = resp.json() 227 | return get_pins(content['pins']) 228 | 229 | def _fetch_pins(self): 230 | assert len(self._pins) == 0 231 | self._pins.extend(self._fetch_home()) 232 | for pin in self._pins: 233 | yield pin 234 | while self.pin_count > len(self._pins): 235 | further_pins = self._fetch_further(self._pins) 236 | if len(further_pins) <= 0: 237 | break 238 | self._pins.extend(further_pins) 239 | for pin in further_pins: 240 | yield pin 241 | 242 | @property 243 | def pins(self): 244 | yield from self._fetch_pins() 245 | 246 | def as_dict(self): 247 | return { 248 | "pins": self._pins, 249 | "title": self.title, 250 | "description": self.description, 251 | "pin_count": self.pin_count, 252 | } 253 | 254 | 255 | def mk_pin(pin_meta): 256 | url = pin_meta["url"] 257 | filename = u"{title}.{ext}".format( 258 | title=pin_meta['pin_id'], 259 | ext=pin_meta['ext'], 260 | ) 261 | return Pin( 262 | url=url, 263 | filename=filename, 264 | ) 265 | 266 | 267 | class User(object): 268 | BOARDS_URL_TPL = "https://api.huaban.com/{user_id}/boards?limit=30&urlname={user_id}" 269 | 270 | def __init__(self, user_url: str): 271 | self.fetcher = HuaBanFetcher() 272 | user_uid = user_url.split("/")[-1] 273 | self.base_url = self.BOARDS_URL_TPL.format( 274 | user_id=user_uid 275 | ) 276 | self.further_url_tpl = self.base_url + "&max={board_id}" 277 | 278 | self.username = None 279 | self.board_count = None 280 | self.pin_count = None 281 | self._boards_metas = [] 282 | self._init_profile() 283 | 284 | def _fetch_home(self): 285 | resp = self.fetcher.get(self.base_url, require_json=True) 286 | meta = resp.json() 287 | user_meta = meta['user'] 288 | self.username = user_meta['username'] 289 | self.board_count = user_meta['board_count'] 290 | self.pin_count = user_meta['pin_count'] 291 | return get_boards(meta['boards']) 292 | 293 | _init_profile = _fetch_home 294 | 295 | def _fetch_further(self, prev_boards): 296 | max_id = prev_boards[-1]['board_id'] 297 | further_url = self.further_url_tpl.format( 298 | board_id=max_id, 299 | ) 300 | resp = self.fetcher.get( 301 | further_url, 302 | require_json=True, 303 | ) 304 | content = resp.json() 305 | return get_boards(content['boards']) 306 | 307 | def _fetch_boards(self): 308 | assert len(self._boards_metas) == 0 309 | self._boards_metas.extend(self._fetch_home()) 310 | further_boards = self._boards_metas 311 | while True: 312 | for meta in further_boards: 313 | yield Board(meta['board_id']) 314 | if self.board_count > len(self._boards_metas): 315 | further_boards = self._fetch_further(self._boards_metas) 316 | self._boards_metas.extend(further_boards) 317 | else: 318 | break 319 | 320 | @property 321 | def boards(self): 322 | """ 323 | :rtype: iter[Board] 324 | """ 325 | yield from self._fetch_boards() 326 | 327 | def as_dict(self): 328 | return { 329 | "username": self.username, 330 | "board_count": self.board_count, 331 | "boards": self.boards, 332 | } 333 | 334 | 335 | class HuaBan(DummySite): 336 | 337 | fetcher = HuaBanFetcher() 338 | 339 | def __init__(self, user_url): 340 | self.meta = None 341 | self.base_url = user_url 342 | self.user = User(user_url) 343 | self._boards = [] 344 | 345 | @property 346 | def dir_name(self): 347 | return self.user.username 348 | 349 | @property 350 | def tasks(self): 351 | for board, pin_meta in self._boards_pins: 352 | pin_item = mk_pin( 353 | pin_meta 354 | ) 355 | yield ImageItem( 356 | url=pin_item.url, 357 | name=pin_item.filename, 358 | meta={ 359 | 'board_name': board.title, 360 | }, 361 | pin_meta=pin_meta, 362 | ) 363 | 364 | @property 365 | def _boards_pins(self): 366 | for board in self.user.boards: 367 | self._boards.append(board) 368 | for pin in board.pins: 369 | yield board, pin 370 | 371 | def as_dict(self): 372 | meta = self.user.as_dict() 373 | meta['boards'] = [ 374 | board.as_dict() for board in self._boards 375 | ] 376 | return meta 377 | 378 | 379 | class HuaBanBoard(DummySite): 380 | 381 | fetcher = HuaBanFetcher() 382 | 383 | def __init__(self, board_url): 384 | self.base_url = board_url 385 | self._board = Board(self.base_url) 386 | 387 | @property 388 | def dir_name(self): 389 | return safe_file_name( 390 | "%s-%s" % (self._board.title, self._board.id) 391 | ) 392 | 393 | @property 394 | def tasks(self): 395 | for pin_meta in self._board.pins: 396 | pin_item = mk_pin( 397 | pin_meta 398 | ) 399 | yield ImageItem( 400 | url=pin_item.url, 401 | name=pin_item.filename, 402 | pin_meta=pin_meta, 403 | ) 404 | -------------------------------------------------------------------------------- /src/picktrue/sites/metmuseum.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from urllib.parse import urlparse, parse_qs 4 | 5 | from pyquery import PyQuery 6 | 7 | from picktrue.engine import Downloader 8 | from picktrue.meta import ImageItem 9 | from picktrue.rpc.taskserver import BrowserMetaFetcher 10 | from picktrue.sites.abstract import DummySite, DummyFetcher 11 | from picktrue.logger import pk_logger 12 | from picktrue.sites.utils import safe_file_name, get_filename_fom_url 13 | 14 | IMAGE_URL_TPL = "http://img.hb.aicdn.com/{file_key}" 15 | BASE_URL = "http://huaban.com" 16 | 17 | XHR_HEADERS = { 18 | "X-Requested-With": "XMLHttpRequest", 19 | "User-Agent": 20 | "Mozilla/5.0 (Windows NT 10.0; WOW64) " 21 | "AppleWebKit/537.36 (KHTML, like Gecko) " 22 | "Chrome/56.0.2924.87 Safari/537.36", 23 | } 24 | 25 | 26 | def _get_params(query_parts): 27 | """ 28 | :param query_parts: just like "material=Archery" 29 | or "material=Archery&offset=0&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0" 30 | """ 31 | path = urlparse('http://test.com/?' + query_parts) 32 | return parse_qs(path.query) 33 | 34 | 35 | class SearchPage: 36 | def __init__(self, page_url, meta_fetcher: BrowserMetaFetcher): 37 | """ 38 | html page link: 39 | https://www.metmuseum.org/art/collection/search#!/search?material=Archery 40 | https://www.metmuseum.org/art/collection/search#!?material=Archery&offset=0&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0 41 | https://www.metmuseum.org/art/collection/search#!?material=Archery&offset=20&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0 42 | json link: 43 | https://www.metmuseum.org/mothra/collectionlisting/search?material=Archery&offset=0&pageSize=0&perPage=20&searchField=All&showOnly=&sortBy=Relevance 44 | """ 45 | query_parts = "?".join(page_url.split("?")[-1:]) 46 | self._params = _get_params(query_parts) 47 | if "material" not in self._params: 48 | raise ValueError("Failed to parse url: %s" % page_url) 49 | self._fetcher = meta_fetcher 50 | 51 | @property 52 | def dir_name(self): 53 | return self.safe_search_keyword 54 | 55 | @property 56 | def safe_search_keyword(self): 57 | return safe_file_name(self._params["material"][0]) 58 | 59 | def get_search_request(self, offset, page_size, per_page): 60 | tpl = ( 61 | "https://www.metmuseum.org/mothra/collectionlisting/search" 62 | "?material={keyword}" 63 | "&offset={offset}" 64 | "&pageSize={page_size}" 65 | "&perPage={per_page}" 66 | "&searchField=All" 67 | "&showOnly=" 68 | "&sortBy=Relevance" 69 | ) 70 | return tpl.format( 71 | keyword=self.safe_search_keyword, 72 | offset=offset, 73 | page_size=page_size, 74 | per_page=per_page, 75 | ) 76 | 77 | def get_image_items(self, ): 78 | """ 79 | { 80 | "results": [ 81 | { 82 | "title": "Archer's Ring", 83 | "description": " ", 84 | "artist": "", 85 | "culture": "Turkish", 86 | "teaserText": "

Date: 16th–17th century
Accession Number: 36.25.2814

", 87 | "url": "https://www.metmuseum.org/art/collection/search/30142?searchField=All&sortBy=Relevance&what=Archery&ft=*&offset=0&rpp=20&pos=1", 88 | "image": "https://images.metmuseum.org/CRDImages/aa/mobile-large/LC-36_25_2814-001.jpg", 89 | "regularImage": "aa/web-additional/LC-36_25_2814-001.jpg", 90 | "largeImage": "aa/web-large/LC-36_25_2814-001.jpg", 91 | "date": "16th–17th century", 92 | "medium": "Bronze", 93 | "accessionNumber": "36.25.2814", 94 | "galleryInformation": "Not on view" 95 | }, 96 | } 97 | """ 98 | offset = int(self._params.get('offset', [0])[0]) 99 | page_size = int(self._params.get('pageSize', [0])[0]) 100 | per_page = int(self._params.get('perPage', [20])[0]) 101 | while True: 102 | r = self._fetcher.request_url( 103 | url=self.get_search_request( 104 | offset=offset, 105 | page_size=page_size, 106 | per_page=per_page, 107 | ), 108 | ) 109 | for image_meta in r['results']: 110 | page_url = image_meta['url'] 111 | items = ItemPage( 112 | page_url=page_url, 113 | meta_fetcher=self._fetcher, 114 | search_keyword=self.safe_search_keyword, 115 | ).get_image_items() 116 | if items: 117 | for item in items: 118 | yield item 119 | req = r['request'] 120 | offset = req['offset'] + per_page 121 | print(offset, r['totalResults']) 122 | if offset > r['totalResults']: 123 | break 124 | 125 | 126 | class ItemPage: 127 | def __init__(self, page_url, meta_fetcher: BrowserMetaFetcher, search_keyword=None): 128 | """ 129 | https://www.metmuseum.org/art/collection/search/35684? 130 | searchField=All&sortBy=Relevance&what=Archery&ft=*&offset=0&rpp=20&pos=13 131 | image: 132 | https://collectionapi.metmuseum.org/api/collection/v1/iiif/23603/1642473/main-image 133 | """ 134 | path = urlparse(page_url) 135 | self._item_id = path.path.split("/")[-1] 136 | self._fetcher = meta_fetcher 137 | self._page_url = page_url 138 | self._search_keyword = search_keyword 139 | 140 | @property 141 | def dir_name(self): 142 | if self._search_keyword is not None: 143 | return safe_file_name( 144 | self._search_keyword 145 | ) 146 | else: 147 | return "" 148 | 149 | def _mk_item(self, image_url, title, has_many=False): 150 | if image_url.endswith("restricted") or image_url.endswith("main-image"): 151 | name = "_".join(image_url.split("/")[-3:]) + ".jpg" 152 | else: 153 | name = get_filename_fom_url(image_url) 154 | meta = dict(title=title, has_many=has_many) 155 | meta['search_keyword'] = self._search_keyword 156 | return ImageItem( 157 | image_url, 158 | name=name, 159 | meta=meta, 160 | ) 161 | 162 | def get_image_items(self): 163 | resp = self._fetcher.request_url( 164 | self._page_url, 165 | ) 166 | query = PyQuery(resp) 167 | title = query("#artwork__title").text() 168 | extra_images = query("img.gtm__carousel__thumbnail") 169 | main_image = query(".artwork__interaction--download a") 170 | 171 | has_original_image = True 172 | if "Due to rights restrictions" in resp: 173 | has_original_image = False 174 | 175 | def getter(target): 176 | if has_original_image: 177 | return PyQuery(target).attr("data-superjumboimage") 178 | else: 179 | return PyQuery(target).attr("data-largeimage") 180 | 181 | if len(extra_images) > 0: 182 | return [ 183 | self._mk_item( 184 | getter(img), 185 | title=title, 186 | has_many=True, 187 | ) 188 | for img in extra_images 189 | ] 190 | else: 191 | image_url = main_image.attr("href") 192 | if not image_url: 193 | pk_logger.warning("No image available for: %s" % title) 194 | return [] 195 | return [ 196 | self._mk_item( 197 | image_url=image_url, 198 | title=title, 199 | has_many=False, 200 | ) 201 | ] 202 | 203 | 204 | class Fetcher(DummyFetcher): 205 | 206 | def get_save_path(self, base_path, image_name, image: ImageItem): 207 | project_dir = base_path 208 | if image.meta['search_keyword']: 209 | project_dir = os.path.join(image.meta['search_keyword']) 210 | if image.meta['has_many']: 211 | project_dir = os.path.join( 212 | project_dir, 213 | safe_file_name(image.meta['title']), 214 | ) 215 | else: 216 | splited = image_name.split(".") 217 | name, ext = ".".join(splited[:-1]), splited[-1] 218 | image_name = safe_file_name(image.meta['title'] + name + "." + ext) 219 | if not os.path.exists(project_dir): 220 | os.makedirs(project_dir, exist_ok=True) 221 | return os.path.join(project_dir, image_name) 222 | 223 | 224 | class MetMuseum(DummySite): 225 | 226 | fetcher = Fetcher() 227 | 228 | def __init__(self, url): 229 | self._base_url = url 230 | if "search/" not in url: 231 | self._iter = SearchPage( 232 | page_url=self._base_url, 233 | meta_fetcher=BrowserMetaFetcher(), 234 | ) 235 | else: 236 | self._iter = ItemPage( 237 | page_url=self._base_url, 238 | meta_fetcher=BrowserMetaFetcher(), 239 | ) 240 | 241 | @property 242 | def dir_name(self): 243 | return self._iter.dir_name 244 | 245 | @property 246 | def tasks(self): 247 | print("Task generator begin: ", self._base_url) 248 | for item in self._iter.get_image_items(): 249 | yield item 250 | 251 | 252 | def main(): 253 | import sys 254 | import time 255 | if len(sys.argv) <= 1: 256 | print("Error, please add argument like: picktrue-metmuseum.exe ") 257 | url = sys.argv[1] 258 | if os.path.exists(url): 259 | urls = [ 260 | line 261 | for line in open(url).readlines() 262 | if line 263 | ] 264 | save_dir = os.path.abspath(url) 265 | save_dir = os.path.dirname(save_dir) 266 | else: 267 | save_dir = "." 268 | urls = [url, ] 269 | # "https://www.metmuseum.org/art/collection/search#!?material=Archery&offset=0&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0" 270 | # "https://www.metmuseum.org/art/collection/search/35684?searchField=All&sortBy=Relevance&what=Archery&ft=*&offset=0&rpp=20&pos=13" 271 | 272 | sites = [MetMuseum(target) for target in urls] 273 | 274 | def task_iter(): 275 | for site in sites: 276 | for task in site.tasks: 277 | yield task 278 | downloader = Downloader(save_dir=save_dir, fetcher=sites[0].fetcher) 279 | downloader.add_task( 280 | task_iter=task_iter(), 281 | background=True, 282 | ) 283 | downloader.join(background=True) 284 | 285 | while not downloader.done: 286 | time.sleep(5) 287 | print(downloader.describe()) 288 | 289 | 290 | if __name__ == '__main__': 291 | main() 292 | -------------------------------------------------------------------------------- /src/picktrue/sites/pixiv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from picktrue.meta import ImageItem 5 | from picktrue.sites.abstract import DummySite, DummyFetcher, get_proxy 6 | 7 | from pixivpy3 import ( 8 | AppPixivAPI 9 | ) 10 | 11 | 12 | def guess_extension(image_url): 13 | return image_url.split('.')[-1] 14 | 15 | 16 | def normalize_filename(filename): 17 | filename = filename.replace("../", "_") 18 | filename = filename.replace("..\\", "_") 19 | filename = filename.replace("\\", "_") 20 | return filename 21 | 22 | 23 | def parse_image_urls(illustration): 24 | if 'original_image_url' in illustration['meta_single_page']: 25 | url = illustration['meta_single_page']['original_image_url'] 26 | if illustration['type'] == 'ugoira': 27 | url = url.replace("img-original", 'img-zip-ugoira') 28 | url = re.findall('(.*)_ugoira0\..*', url)[0] 29 | url = "%s%s" % (url, '_ugoira1920x1080.zip') 30 | file_name = '%s.%s' % ( 31 | illustration['id'], 32 | guess_extension(url) 33 | ) 34 | yield ImageItem( 35 | name=file_name, 36 | url=url, 37 | ) 38 | else: 39 | dir_name = normalize_filename(illustration['title']) 40 | images = illustration['meta_pages'] 41 | for index, image in enumerate(images): 42 | url = image['image_urls']['original'] 43 | name = "%s.%s" % (index, guess_extension(url)) 44 | yield ImageItem( 45 | name=name, 46 | url=url, 47 | meta={ 48 | 'is_comic': True, 49 | 'dir_name': dir_name, 50 | } 51 | ) 52 | 53 | 54 | class PixivFetcher(DummyFetcher): 55 | 56 | def __init__(self, **kwargs): 57 | super(PixivFetcher, self).__init__(**kwargs) 58 | self.session.headers.update( 59 | {'Referer': 'http://www.pixiv.net/'} 60 | ) 61 | 62 | def save(self, content, task_item): 63 | if task_item.image.meta is None: 64 | return super(PixivFetcher, self).save(content, task_item) 65 | image = task_item.image 66 | save_path = os.path.join( 67 | task_item.base_save_path, 68 | image.meta['dir_name'], 69 | ) 70 | os.makedirs(save_path, exist_ok=True) 71 | save_path = self._safe_path(save_path) 72 | save_path = os.path.join( 73 | save_path, 74 | image.name, 75 | ) 76 | with open(save_path, "wb") as f: 77 | f.write(content) 78 | 79 | 80 | class Pixiv(DummySite): 81 | 82 | def __init__(self, url, username, password, proxy=None): 83 | proxies = get_proxy(proxy) 84 | requests_kwargs = { 85 | "timeout": (3, 10), 86 | } 87 | requests_kwargs.update(proxies) 88 | self.api = AppPixivAPI( 89 | **requests_kwargs 90 | ) 91 | self._fetcher = PixivFetcher(**proxies) 92 | self.api.login(username, password) 93 | self._user_id = int(url.split("/")[-1]) 94 | self._dir_name = None 95 | self._total_illustrations = 0 96 | self._fetch_user_detail() 97 | 98 | @property 99 | def fetcher(self): 100 | return self._fetcher 101 | 102 | @property 103 | def dir_name(self): 104 | assert self._dir_name is not None 105 | return self._dir_name 106 | 107 | def _fetch_user_detail(self): 108 | assert self._user_id is not None 109 | profile = self.api.user_detail(self._user_id) 110 | user = profile['user'] 111 | self._dir_name = "-".join( 112 | [ 113 | user['name'], 114 | user['account'], 115 | str(user['id']), 116 | ] 117 | ) 118 | self._dir_name = normalize_filename(self._dir_name) 119 | self._total_illustrations = profile['profile']['total_illusts'] 120 | return self.dir_name 121 | 122 | def _fetch_image_list(self, ): 123 | ret = self.api.user_illusts(self._user_id) 124 | while True: 125 | for illustration in ret.illusts: 126 | yield from parse_image_urls(illustration) 127 | if ret.next_url is None: 128 | break 129 | ret = self.api.user_illusts( 130 | **self.api.parse_qs(ret.next_url) 131 | ) 132 | 133 | def _fetch_single_image_url(self, illustration_id): 134 | json_result = self.api.illust_detail(illustration_id) 135 | illustration_info = json_result.illust 136 | return illustration_info.image_urls['large'] 137 | 138 | @property 139 | def tasks(self): 140 | yield from self._fetch_image_list() 141 | -------------------------------------------------------------------------------- /src/picktrue/sites/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | 4 | def _get_file_hash(file_content): 5 | m = hashlib.md5() 6 | m.update(file_content) 7 | return m.digest().hex() 8 | 9 | 10 | def _get_name_ext_from_url(img_url): 11 | file_name = img_url.split( 12 | '/' 13 | )[-1] 14 | if "?" in file_name: 15 | file_name = file_name.split('?')[:-1] 16 | file_name = '?'.join(file_name) 17 | name = file_name.split('.')[:-1] 18 | name = ".".join(name) 19 | ext = file_name.split('.')[-1] 20 | return name, ext 21 | 22 | 23 | def get_filename_fom_url(img_url): 24 | name, ext = _get_name_ext_from_url(img_url) 25 | return ".".join((name, ext)) 26 | 27 | 28 | def get_name_with_hash_from_url(img_url: str, file_content): 29 | name, ext = _get_name_ext_from_url(img_url) 30 | name_postfix = _get_file_hash(file_content) 31 | file_name = "-".join([name, name_postfix]) 32 | file_name = ".".join([file_name, ext]) 33 | return file_name 34 | 35 | 36 | def safe_file_name(file_name): 37 | file_name = file_name.replace("/", "_") 38 | file_name = file_name.replace("?", "__") 39 | file_name = file_name.replace(":", "___") 40 | return file_name -------------------------------------------------------------------------------- /src/picktrue/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | from functools import wraps 5 | from picktrue.logger import pk_logger 6 | 7 | from threading import Thread 8 | 9 | 10 | def run_as_thread(func, *args, name=None, **kwargs): 11 | if name is None: 12 | name = func.__name__ 13 | t = Thread(target=func, args=args, kwargs=kwargs, name=name) 14 | t.setDaemon(True) 15 | t.start() 16 | return t 17 | 18 | 19 | def retry(max_retries=3): 20 | 21 | def wrapper(func): 22 | @wraps(func) 23 | def wrapped(*args, **kwargs): 24 | retries = 0 25 | while retries <= max_retries: 26 | retries += 1 27 | try: 28 | return func(*args, **kwargs) 29 | except Exception: 30 | if retries > max_retries: 31 | pk_logger.exception("Error occurs while execute function\n") 32 | break 33 | time.sleep(1) 34 | return None 35 | return wrapped 36 | 37 | return wrapper 38 | 39 | 40 | def convert2kb(size_in_bytes): 41 | """ Convert the size from bytes to other units like KB, MB or GB""" 42 | return size_in_bytes / 1024 43 | 44 | 45 | def get_file_size_kb(file_name): 46 | """ Get file in size in given unit like KB, MB or GB""" 47 | size = os.path.getsize(file_name) 48 | return convert2kb(size) 49 | -------------------------------------------------------------------------------- /src/picktrue/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.5.7" 2 | -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup, find_packages, convert_path 4 | 5 | HERE = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | 8 | def get_version(): 9 | ver_path = convert_path('picktrue/version.py') 10 | main_ns = {} 11 | with open(ver_path) as ver_file: 12 | content = ver_file.read() 13 | exec(content, main_ns) 14 | return main_ns['__version__'] 15 | 16 | 17 | install_requires = ( 18 | "requests", 19 | 'click', 20 | 'pixivpy', 21 | 'PySocks', 22 | 'flask', 23 | 'pyquery', 24 | ) 25 | 26 | setup( 27 | name='picktrue', 28 | version=get_version(), 29 | packages=find_packages(HERE), 30 | install_requires=install_requires, 31 | url='https://github.com/winkidney/picktrue', 32 | license='MIT', 33 | author='winkidney', 34 | author_email='winkidney@gmail.com', 35 | description='tools to download pictures you want', 36 | entry_points = { 37 | 'console_scripts': [ 38 | 'picktrue-cli=picktrue.__main__:main', 39 | 'picktrue-gui=picktrue.gui.__main__:main', 40 | ] 41 | }, 42 | ) 43 | -------------------------------------------------------------------------------- /src/tests/test_sites/test_utils.py: -------------------------------------------------------------------------------- 1 | from picktrue.sites import utils 2 | 3 | 4 | def test_get_name_ext_from_url(): 5 | assert utils.get_filename_fom_url( 6 | "https://img9.doubanio.com/view/photo/l/public/p2208623414.jpg" 7 | ) == "p2208623414.jpg" 8 | 9 | assert utils.get_filename_fom_url( 10 | "https://img9.doubanio.com/view/photo/l/public/p2208623414.jpg?hello=world" 11 | ) == "p2208623414.jpg" 12 | --------------------------------------------------------------------------------