├── .gitignore
├── LICENSE
├── README.md
├── doc
    └── import-to-pinry.md
├── picktrue.user.js
├── res
    ├── artstation-downloaded.jpg
    ├── huaban-downloaded.jpg
    ├── pixiv.jpg
    └── usage.gif
└── src
    ├── Makefile
    ├── build-on-windows-metmuseum.bat
    ├── build-on-windows.bat
    ├── dev-requirements.txt
    ├── files
        ├── icon.icns
        ├── icon.ico
        └── icon.png
    ├── picktrue
        ├── __init__.py
        ├── __main__.py
        ├── engine.py
        ├── gui
        │   ├── __init__.py
        │   ├── __main__.py
        │   ├── config.py
        │   ├── downloader.py
        │   ├── entry.py
        │   ├── pinry_importer.py
        │   └── toolkit.py
        ├── logger.py
        ├── meta.py
        ├── pinry
        │   ├── __init__.py
        │   ├── ds.py
        │   ├── importer.py
        │   └── uploader.py
        ├── rpc
        │   ├── __init__.py
        │   ├── channel.py
        │   └── taskserver.py
        ├── sites
        │   ├── __init__.py
        │   ├── abstract.py
        │   ├── artstation.py
        │   ├── douban.py
        │   ├── huaban.py
        │   ├── metmuseum.py
        │   ├── pixiv.py
        │   └── utils.py
        ├── utils.py
        └── version.py
    ├── setup.py
    └── tests
        └── test_sites
            └── test_utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # log file
  2 | *.log
  3 | 
  4 | # fucking MacOS
  5 | .DS_Store
  6 | 
  7 | # I have no idea
  8 | .idea
  9 | 
 10 | # DB files
 11 | *.sqlite
 12 | *.sqlite3
 13 | 
 14 | # Byte-compiled / optimized / DLL files
 15 | __pycache__/
 16 | *.py[cod]
 17 | *$py.class
 18 | 
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # celery beat schedule file
 92 | celerybeat-schedule
 93 | 
 94 | # SageMath parsed files
 95 | *.sage.py
 96 | 
 97 | # Environments
 98 | .env
 99 | .venv
100 | env/
101 | venv/
102 | ENV/
103 | env.bak/
104 | venv.bak/
105 | 
106 | # Spyder project settings
107 | .spyderproject
108 | .spyproject
109 | 
110 | # Rope project settings
111 | .ropeproject
112 | 
113 | # mkdocs documentation
114 | /site
115 | 
116 | # mypy
117 | .mypy_cache/
118 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Ji Qu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | PickTrue
 2 | ------------
 3 | 
 4 | ![logo](src/files/icon.png)
 5 | 
 6 | 初衷是帮你备份自己的花瓣采集，以免哪天被和谐。
 7 | 
 8 | 强迫症可以收集喜爱画师的图。
 9 | 
10 | 画师/设计师们也可以用来构建自己的Visual Library
11 | 
12 | **划重点**：现在支持导出画板的画板和Tag信息，并且导入另一个个人画板项目[Pinry](https://github.com/pinry/pinry)啦！
13 | 
14 | # 下载
15 | 
16 | + [windows-x64](https://github.com/winkidney/PickTrue/releases)
17 | + [macOS](https://github.com/winkidney/PickTrue/releases)
18 | 
19 | Linux请直接下载并安装Python项目即可，有打包需求再提
20 | 
21 | + [适用于Artstation的油猴脚本](https://greasyfork.org/zh-CN/scripts/390597-picktruebrowser)
22 | + 推荐使用的油猴脚本插件（其他的不推荐，理由很多，不详细叙述啦），不推荐的插件可能也能运行，但我没有测试
23 |   + [火狐的ViolentMonkey](https://addons.mozilla.org/en-US/firefox/addon/violentmonkey/?src=search)
24 |   + [Chrome的ViolentMonkey](https://chrome.google.com/webstore/detail/violentmonkey/jinjaccalgkegednnccohejagnlnfdag)
25 | 
26 | # 用法
27 | 
28 | ## 从花瓣迁移到Pinry
29 | 参见：[导入到Pinry](./doc/import-to-pinry.md)
30 | 
31 | ## 支持列表和网址范例
32 | 
33 | + ArtStation
34 |   + 个人页（按艺术家）： https://www.artstation.com/braveking
35 | + 花瓣网
36 |   + 个人页 (按收藏者)： http://huaban.com/wmtzyzw1fl/
37 |   + 按画板 ：http://huaban.com/boards/18720569/ 
38 | + Pixiv
39 |   + 个人页（按作者，如果是漫画则会自动建立子文件夹）： https://www.pixiv.net/en/users/212801
40 | + 豆瓣
41 |   + 相册：https://www.douban.com/photos/album/145972492/
42 | 
43 | ## 通用
44 | 
45 | ![用法](res/usage.gif)
46 | 
47 | ## Pixiv用法
48 | 
49 | 如果本身已经全局翻墙或者路由翻墙，可以不填写单独的代理地址
50 | 
51 | ![pixiv](res/pixiv.jpg)
52 | 
53 | 
54 | ## 需要配合油猴脚本的Artstation下载
55 | 
56 | 1. 安装油猴脚本: [适用于Artstation的油猴脚本](https://greasyfork.org/zh-CN/scripts/390597-picktruebrowser)
57 | 2. 下载最新的[客户端](https://github.com/winkidney/PickTrue/releases)
58 | 3. 打开浏览器，访问Artstation你想要下载的指定用户的主页，例如 [https://www.artstation.com/braveking](https://www.artstation.com/braveking)
59 | 4. 启动PickTrue客户端，切换到Artstation的Tab，粘贴第三步的主页地址到地址填写处，设置好代理（没有代理似乎无法下载了）和下载文件夹，
60 |    点击"开始下载"（注意）这时候下载进度不会更新，将不会有下载进度产生。
61 | 5. (火狐浏览器+ViolentMokney)在浏览器页面中，右键，选择"发送相册到PickTrue并下载"，注意这一步不要重复操作，操作一次就行了。
62 | 5. (Chrome内核浏览器+ViolentMokney)在浏览器页面中，按住Crtl+鼠标右键，就会开始下载（注意这一步不要重复操作，操作一次就行了）。
63 | 6. （可选步骤）打开浏览器控制台，观察控制台的日志。
64 | 7. 此时，下载器的下载进度（底部状态栏）应该开始更新了，耐心等待即可：）
65 | 
66 | # 已知问题
67 | 
68 | + 花瓣下载会丢失部分不一致的数据（比较罕见），原因是花瓣的网页能看到的图，在花瓣的API里面不存在，属于花瓣网的Bug，以后有精力会修复这个问题，基本不影响使用
69 | + Pixiv下载需要代理，或者本地hosts，或者是全局翻墙
70 | 
71 | # 其他图站
72 | 
73 | 欢迎提出建议：）
74 | 
75 | + Pinterest (暂无计划)
76 | + NHentai (暂无计划)
77 | 
78 | # 其他功能
79 | 
80 | + 自动记录上次选择的保存路径
81 | 
82 | # 更新日志
83 | 
84 | 参见 `release` 页，https://github.com/winkidney/PickTrue/releases
85 | 
86 | # Bug反馈和使用交流
87 | 
88 | + QQ群：863404640
89 | 
90 | # 附图
91 | 
92 | ## ArtStation
93 | ![Artstation](res/artstation-downloaded.jpg)
94 | 
95 | ## 花瓣
96 | ![Huaban](res/huaban-downloaded.jpg)
97 | 


--------------------------------------------------------------------------------
/doc/import-to-pinry.md:
--------------------------------------------------------------------------------
 1 | 将你的画板系统迁移到Pinry
 2 | ----------------------
 3 | 
 4 | 现在仅提供从花瓣迁移的方法，其他画板，你只需要生成和花瓣导出的CSV格式一样的文件，也能直接导入！
 5 | 
 6 | # 将花瓣导入到Pinry
 7 | 
 8 | 1. 使用下载器的下载功能，重新下载花瓣（个人页）
 9 | 2. 下载文件夹里会多一个pins2import.csv文件
10 | 3. 切换到下载器的"Pinry导入"功能：
11 |   + 填写你部署的Pinry的地址，例如 `http://pin.37soloist.com`
12 |   + 填写登录账号和密码
13 |   + 点击“测试登录”可以看你的账号密码是否配置正确
14 |   + 选取刚才提到的CSV文件路径
15 |   + **可选**： 如果有必要，过滤掉花瓣自动处理为小锁的那些图片，设定图片上传最小尺寸为20（kb）即可。
16 | 4. 点击“开始导入”，等待完成即可。小文件也会被标记为“错误”，“已完成”都是上传的文件。
17 | 
18 | 


--------------------------------------------------------------------------------
/picktrue.user.js:
--------------------------------------------------------------------------------
  1 | // ==UserScript==
  2 | // @name PickTrueBrowser
  3 | // @author winkidney@gmail.com
  4 | // @version 0.0.4
  5 | // @namespace tools
  6 | // @description A tool to get meta info form ArtStation within browser to provide downloading service.
  7 | // @match        *://*/*
  8 | // @grant GM_xmlhttpRequest
  9 | // @require https://code.jquery.com/jquery-1.12.4.min.js
 10 | // @run-at context-menu
 11 | // ==/UserScript==
 12 | let utils = {
 13 |   isFirefox: function () {
 14 |     return (navigator.userAgent.indexOf("Firefox") !== -1)
 15 |   },
 16 |   isChrome: function () {
 17 |     return (navigator.userAgent.indexOf("Chrome") !== -1)
 18 |   }
 19 | };
 20 | 
 21 | let logger = {
 22 |   info: function(...args) {
 23 |     console.log("[PickTrue]: ", ...args);
 24 |   }
 25 | };
 26 | 
 27 | let BrowserClient = function () {
 28 |   function fetchUrl(url, callback) {
 29 |     logger.info("Fetching url:", url);
 30 |     return $.get(url, callback);
 31 |   }
 32 |   return {
 33 |     fetchUrl: fetchUrl,
 34 |   }
 35 | };
 36 | 
 37 | let RequestProxy = function () {
 38 |   let client = BrowserClient();
 39 | 
 40 |   function submitTask(request_url, respData, callback) {
 41 |     logger.info("Submit response:", respData);
 42 |     let request_data = JSON.stringify(respData);
 43 |     let data = JSON.stringify(
 44 |         {
 45 |           request_url: request_url,
 46 |           response: request_data
 47 |         }
 48 |     )
 49 | 
 50 |     let details = {
 51 |       url: "http://localhost:2333/tasks/submit/",
 52 |       data: data,
 53 |       method: "POST",
 54 |       onloadend: function (data) {
 55 |         logger.info("Submit response done: ", data);
 56 |         callback()
 57 |       },
 58 |     };
 59 |     return GM_xmlhttpRequest(details);
 60 |   }
 61 |   function getTask() {
 62 |     let details = {
 63 |       url: "http://localhost:2333/tasks/",
 64 |       method: "GET",
 65 |       onloadend: function (resp) {
 66 |         logger.info("Get task: ", resp);
 67 |         let data = JSON.parse(resp.responseText);
 68 |         if (data.length <= 0){
 69 |           return getTask()
 70 |         } else {
 71 |           client.fetchUrl(
 72 |               data[0],
 73 |               function (respData) {
 74 |                 submitTask(data[0], respData, getTask)
 75 |               },
 76 |           )
 77 |         }
 78 |       },
 79 |     };
 80 |     return GM_xmlhttpRequest(details);
 81 |   }
 82 |   return {
 83 |     getTask: getTask,
 84 |     submitTask: submitTask,
 85 |   };
 86 | };
 87 | 
 88 | function entry() {
 89 |   alert("请确保已经启动了PickTrue客户端。将要解析当前用户的所有图集并将下载地址发送PickTrue下载器，确认后将立即开始。");
 90 |   let proxy = RequestProxy();
 91 |   proxy.getTask();
 92 | }
 93 | 
 94 | function _setUpContextMenuFirefox(entryFn) {
 95 |   var menu = document.body.appendChild(document.createElement("menu"));
 96 |   var html = document.documentElement;
 97 |   if (html.hasAttribute("contextmenu")) {
 98 |     // We don't want to override web page context menu if any
 99 |     var contextmenu = $("#" + html.getAttribute("contextmenu"));
100 |     contextmenu[0].appendChild(menu); // Append to web page context menu
101 |   } else {
102 |     html.setAttribute("contextmenu", "userscript-picktrue-context-menu");
103 |   }
104 | 
105 |   menu.outerHTML = '<menu id="userscript-picktrue-context-menu"\
106 |                           type="context">\
107 |                       <menuitem id="userscript-picktrue-menuitem"\
108 |                                 label="发送相册到PickTrue并下载">\
109 |                       </menuitem>\
110 |                     </menu>';
111 | 
112 |   if ("contextMenu" in html && "HTMLMenuItemElement" in window) {
113 |     var menuitem = $("#userscript-picktrue-menuitem")[0];
114 |     menuitem.addEventListener("click", entryFn, false);
115 |   }
116 | }
117 | 
118 | function _setUpContextMenuChrome(entryFn) {
119 |   $(document).on("contextmenu", function (e) {
120 |     if (e.ctrlKey){
121 |       entryFn()
122 |     }
123 |   });
124 | }
125 | 
126 | function setUpContextMenu(entryFn) {
127 |   if (utils.isFirefox()) {
128 |     _setUpContextMenuFirefox(entryFn);
129 |   } else if (utils.isChrome()) {
130 |     _setUpContextMenuChrome(entryFn);
131 |   } else {
132 |     alert("Unsupported browser " + navigator.userAgent);
133 |   }
134 | }
135 | 
136 | setUpContextMenu(entry);
137 | 


--------------------------------------------------------------------------------
/res/artstation-downloaded.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/artstation-downloaded.jpg


--------------------------------------------------------------------------------
/res/huaban-downloaded.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/huaban-downloaded.jpg


--------------------------------------------------------------------------------
/res/pixiv.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/pixiv.jpg


--------------------------------------------------------------------------------
/res/usage.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/res/usage.gif


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | build-osx:
2 | 	pyinstaller --name picktrue --onefile --icon=files/icon.icns --windowed picktrue/gui/__main__.py
3 | clean:
4 | 	rm -fr ./build
5 | 	rm -fr ./dist
6 | gui:
7 | 	python -m picktrue.gui
8 | 


--------------------------------------------------------------------------------
/src/build-on-windows-metmuseum.bat:
--------------------------------------------------------------------------------
1 | rm -fr dist build
2 | pyinstaller --name picktrue-metmuseum --onefile --icon=files/icon.ico picktrue/sites/metmuseum.py
3 | 


--------------------------------------------------------------------------------
/src/build-on-windows.bat:
--------------------------------------------------------------------------------
1 | pyinstaller --name picktrue-windows --onefile --icon=files/icon.ico --windowed picktrue/gui/__main__.py
2 | 


--------------------------------------------------------------------------------
/src/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | pyinstaller
2 | pytest
3 | 


--------------------------------------------------------------------------------
/src/files/icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/files/icon.icns


--------------------------------------------------------------------------------
/src/files/icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/files/icon.ico


--------------------------------------------------------------------------------
/src/files/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/files/icon.png


--------------------------------------------------------------------------------
/src/picktrue/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/__init__.py


--------------------------------------------------------------------------------
/src/picktrue/__main__.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | 
  3 | from picktrue.sites.douban import DoubanPersonalAlbum
  4 | from picktrue.sites.pixiv import Pixiv
  5 | 
  6 | from picktrue.logger import pk_logger
  7 | from picktrue.sites.artstation import ArtStation
  8 | from picktrue.sites.huaban import HuaBan, HuaBanBoard
  9 | from picktrue.engine import Downloader
 10 | 
 11 | 
 12 | @click.group('downloader')
 13 | def entry():
 14 |     pass
 15 | 
 16 | 
 17 | @click.argument("url")
 18 | @click.option("--proxy", default=None, type=click.STRING)
 19 | @entry.command(
 20 |     "artstation-user",
 21 |     help='download from artstation user home page',
 22 | )
 23 | def artstation_user(url, proxy):
 24 |     site = ArtStation(url, proxy=proxy)
 25 |     downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name)
 26 |     downloader.add_task(
 27 |         site.tasks
 28 |     )
 29 |     pk_logger.info("All task add...waiting for execution...")
 30 |     try:
 31 |         downloader.join()
 32 |     except KeyboardInterrupt:
 33 |         pk_logger.warn("Exiting...Press crtl+c again to force quit")
 34 |         downloader.stop()
 35 |         exit(0)
 36 |     else:
 37 |         pk_logger.info("All task done...Enjoy!")
 38 | 
 39 | 
 40 | @click.argument("url")
 41 | @entry.command(
 42 |     "huaban-user",
 43 |     help='download from huaban.com user home page',
 44 | )
 45 | def huban_user(url):
 46 |     site = HuaBan(url)
 47 |     downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name)
 48 |     downloader.add_task(
 49 |         site.tasks
 50 |     )
 51 |     pk_logger.info("All task add...waiting for execution...")
 52 |     try:
 53 |         downloader.join()
 54 |     except KeyboardInterrupt:
 55 |         pk_logger.warn("Exiting...Press crtl+c again to force quit")
 56 |         downloader.stop()
 57 |         exit(0)
 58 |     else:
 59 |         pk_logger.info("All task done...Enjoy!")
 60 | 
 61 | 
 62 | @click.argument("url")
 63 | @entry.command(
 64 |     "huaban-board",
 65 |     help='download from huaban.com specified board page',
 66 | )
 67 | def huban_board(url):
 68 |     site = HuaBanBoard(url)
 69 |     downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name)
 70 |     downloader.add_task(
 71 |         site.tasks
 72 |     )
 73 |     pk_logger.info("All task add...waiting for execution...")
 74 |     try:
 75 |         downloader.join()
 76 |     except KeyboardInterrupt:
 77 |         pk_logger.warn("Exiting...Press crtl+c again to force quit")
 78 |         downloader.stop()
 79 |         exit(0)
 80 |     else:
 81 |         pk_logger.info("All task done...Enjoy!")
 82 | 
 83 | 
 84 | @click.option(
 85 |     '--proxy',
 86 |     help="http/https/socks5 is supported",
 87 |     default=None,
 88 | )
 89 | @click.argument("member-id")
 90 | @click.argument("password")
 91 | @click.argument("username")
 92 | @entry.command(
 93 |     "pixiv-member",
 94 |     help='download from pixiv.net user home page',
 95 | )
 96 | def huban_user(member_id, username, password, proxy):
 97 |     site = Pixiv(member_id, username, password, proxy=proxy)
 98 |     downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name)
 99 |     downloader.add_task(
100 |         site.tasks
101 |     )
102 |     pk_logger.info("All task add...waiting for execution...")
103 |     try:
104 |         downloader.join()
105 |     except KeyboardInterrupt:
106 |         pk_logger.warn("Exiting...Press crtl+c again to force quit")
107 |         downloader.stop()
108 |         exit(0)
109 |     else:
110 |         pk_logger.info("All task done...Enjoy!")
111 | 
112 | 
113 | @click.argument("album-url")
114 | @entry.command(
115 |     "douban-personal-album",
116 |     help='download from douban personal album',
117 | )
118 | def douban_personal_album(album_url):
119 |     site = DoubanPersonalAlbum(album_url)
120 |     downloader = Downloader(fetcher=site.fetcher, save_dir=site.dir_name)
121 |     downloader.add_task(
122 |         site.tasks
123 |     )
124 |     pk_logger.info("All task add...waiting for execution...")
125 |     try:
126 |         downloader.join()
127 |     except KeyboardInterrupt:
128 |         pk_logger.warn("Exiting...Press crtl+c again to force quit")
129 |         downloader.stop()
130 |         exit(0)
131 |     else:
132 |         pk_logger.info("All task done...Enjoy!")
133 | 
134 | 
135 | def main():
136 |     entry()
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     main()
141 | 


--------------------------------------------------------------------------------
/src/picktrue/engine.py:
--------------------------------------------------------------------------------
  1 | from typing import NamedTuple
  2 | 
  3 | import os
  4 | from queue import Queue, Empty
  5 | from threading import Thread
  6 | import time
  7 | from functools import wraps
  8 | 
  9 | from picktrue.logger import pk_logger
 10 | from picktrue.meta import DownloadTaskItem
 11 | from picktrue.utils import run_as_thread
 12 | 
 13 | 
 14 | class WorkerTask(NamedTuple):
 15 |     kwargs: dict = None
 16 |     args: tuple = None
 17 | 
 18 | 
 19 | class StoppableThread(Thread):
 20 | 
 21 |     def __init__(
 22 |             self, queue, target
 23 |     ):
 24 |         """
 25 |         :type queue: queue.Queue
 26 |         """
 27 |         super(StoppableThread, self).__init__()
 28 |         self.task_func = target
 29 |         self.queue = queue
 30 |         self.daemon = True
 31 |         self._stopped = False
 32 | 
 33 |     def run(self):
 34 |         while not self._stopped:
 35 |             try:
 36 |                 task = self.queue.get(timeout=0.2)
 37 |             except Empty:
 38 |                 continue
 39 |             else:
 40 |                 args = task.args or ()
 41 |                 kwargs = task.kwargs or {}
 42 |                 self.task_func(*args, **kwargs)
 43 |                 self.queue.task_done()
 44 | 
 45 |     def stop(self):
 46 |         self._stopped = True
 47 | 
 48 | 
 49 | def mk_download_save_function(fetcher):
 50 |     """
 51 |     :type fetcher: picktrue.sites.abstract.DummyFetcher
 52 |     """
 53 | 
 54 |     def download_then_save(task_item: DownloadTaskItem):
 55 |         """
 56 |         :return True if download ok
 57 |         :type task_item: picktrue.meta.DownloadTaskItem
 58 |         """
 59 |         response = fetcher.get(task_item.image.url)
 60 |         if response is None:
 61 |             pk_logger.error("Failed to download image: %s" % task_item.image.url)
 62 |             return
 63 |         fetcher.save(response.content, task_item)
 64 |         return True
 65 | 
 66 |     return download_then_save
 67 | 
 68 | 
 69 | class Counter:
 70 | 
 71 |     def __init__(self, total=0):
 72 |         self.total = total
 73 |         self.done = 0
 74 | 
 75 |     def on_change(self):
 76 |         print(self.format(), end='\r', flush=True)
 77 | 
 78 |     def increment_done(self):
 79 |         self.done += 1
 80 |         self.on_change()
 81 | 
 82 |     def increment_total(self):
 83 |         self.total += 1
 84 |         self.on_change()
 85 | 
 86 |     def format(self):
 87 |         return "total: %s, done: %s" % (self.total, self.done)
 88 | 
 89 | 
 90 | class Downloader:
 91 | 
 92 |     def __init__(self, fetcher, num_workers=5, save_dir='.'):
 93 |         self.save_dir = save_dir
 94 |         self.num_workers = num_workers
 95 |         self._download_queue = Queue()
 96 |         self.counter = Counter()
 97 |         self.done = False
 98 |         self._stop = False
 99 |         self._all_task_add = False
100 |         self.ensure_dir()
101 | 
102 |         def counter_wrapper(func):
103 | 
104 |             @wraps(func)
105 |             def wrapped(task_item):
106 |                 ret = func(task_item=task_item)
107 |                 self.counter.increment_done()
108 |                 return ret
109 | 
110 |             return wrapped
111 | 
112 |         download_then_save = mk_download_save_function(
113 |             fetcher
114 |         )
115 | 
116 |         _dts = counter_wrapper(download_then_save)
117 | 
118 |         self._download_workers = [
119 |             StoppableThread(
120 |                 self._download_queue,
121 |                 _dts,
122 |             ) for _ in range(num_workers)
123 |         ]
124 |         self._start_daemons()
125 | 
126 |     def ensure_dir(self):
127 |         if not os.path.exists(self.save_dir):
128 |             os.mkdir(self.save_dir)
129 | 
130 |     def add_task(self, task_iter, background=False):
131 |         if background:
132 |             run_as_thread(self._add_task, task_iter)
133 |         else:
134 |             self._add_task(task_iter)
135 | 
136 |     def _add_task(self, image_iter):
137 |         for image in image_iter:
138 |             if self._stop:
139 |                 break
140 |             dti = DownloadTaskItem(
141 |                 image=image,
142 |                 base_save_path=self.save_dir,
143 |             )
144 |             self.counter.increment_total()
145 |             self._download_queue.put(
146 |                 WorkerTask(
147 |                     kwargs={
148 |                         'task_item': dti,
149 |                     }
150 |                 )
151 |             )
152 |         self._all_task_add = True
153 | 
154 |     def _start_daemons(self):
155 |         for worker in self._download_workers:
156 |             worker.start()
157 | 
158 |     def join(self, background=False):
159 | 
160 |         def run():
161 |             self._download_queue.join()
162 |             while not self._all_task_add:
163 |                 time.sleep(0.2)
164 |                 self._download_queue.join()
165 |             self.done = True
166 | 
167 |         if background:
168 |             run_as_thread(run)
169 |         else:
170 |             run()
171 | 
172 |     def stop(self):
173 |         self._stop = True
174 |         for worker in self._download_workers:
175 |             worker.stop()
176 | 
177 |         for worker in self._download_workers:
178 |             worker.join()
179 | 
180 |     @property
181 |     def task_add_done(self):
182 |         return self._all_task_add
183 | 
184 |     @property
185 |     def stopped(self):
186 |         return self._stop
187 | 
188 |     def describe(self):
189 |         return "%s of %s downloaded" % (
190 |             self.counter.done,
191 |             self.counter.total,
192 |         )
193 | 


--------------------------------------------------------------------------------
/src/picktrue/gui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/gui/__init__.py


--------------------------------------------------------------------------------
/src/picktrue/gui/__main__.py:
--------------------------------------------------------------------------------
 1 | import tkinter as tk
 2 | import webbrowser
 3 | from tkinter import ttk
 4 | 
 5 | from picktrue import version
 6 | from picktrue.gui.downloader import downloaders
 7 | from picktrue.gui.toolkit import info
 8 | 
 9 | 
10 | class App(tk.Tk):
11 | 
12 |     def __init__(self, *args, **kwargs):
13 |         super(App, self).__init__(*args, **kwargs)
14 |         self.tabs = ttk.Notebook(self)
15 |         self.title("PickTrue - 相册下载器 v%s" % version.__version__)
16 |         self.build_menu()
17 | 
18 |         for downloader in downloaders:
19 |             self.tabs.add(downloader(self), text=downloader.title)
20 |         self.tabs.pack(
21 |             side=tk.LEFT,
22 |         )
23 | 
24 |     @staticmethod
25 |     def open_online_help():
26 |         url = 'https://github.com/winkidney/PickTrue'
27 |         webbrowser.open_new_tab(url)
28 | 
29 |     @staticmethod
30 |     def show_about():
31 |         webbrowser.open_new_tab(
32 |             'https://winkidney.com'
33 |         )
34 | 
35 |     @staticmethod
36 |     def contact():
37 |         info(
38 |             "任何问题或者建议请联系作者\n"
39 |             "用户QQ群： 863404640\n"
40 |         )
41 | 
42 |     def build_menu(self):
43 |         menu_bar = tk.Menu(self)
44 |         help_menu = tk.Menu(menu_bar)
45 |         help_menu.add_command(label="在线帮助", command=self.open_online_help)
46 |         help_menu.add_command(label="关于", command=self.show_about)
47 |         help_menu.add_command(label="联系作者/用户群", command=self.contact)
48 |         menu_bar.add_cascade(label="帮助", menu=help_menu)
49 |         self.config(menu=menu_bar)
50 | 
51 | 
52 | def main():
53 |     app = App()
54 |     app.mainloop()
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/src/picktrue/gui/config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | class AttrDict(dict):
 7 |     """Allows attributes to be bound to and also behaves like a dict"""
 8 | 
 9 |     def __getattr__(self, attr):
10 |         try:
11 |             return self[attr]
12 |         except KeyError:
13 |             raise AttributeError(r"'AttrDict' object has no attribute '%s'" % attr)
14 | 
15 |     def __setattr__(self, attr, value):
16 |         self[attr] = value
17 | 
18 | 
19 | class ConfigStore(AttrDict):
20 | 
21 |     _save_file = Path(os.path.expanduser("~/.picktrue-config.json"))
22 | 
23 |     @classmethod
24 |     def from_config_file(cls):
25 |         path = Path(cls._save_file)
26 |         if not os.path.exists(path):
27 |             return cls()
28 |         with open(path, "rb") as f:
29 |             return cls(**json.load(f))
30 | 
31 |     def __setattr__(self, key, value):
32 |         super(ConfigStore, self).__setattr__(key, value)
33 |         self._save()
34 | 
35 |     def _save(self):
36 |         path = Path(self._save_file)
37 |         with open(path, "w") as f:
38 |             json.dump(self, f)
39 | 
40 |     def op_store_path(self, name, path):
41 |         path = Path(path)
42 |         self[name] = str(path)
43 |         self._save()
44 | 
45 |     def op_read_path(self, name):
46 |         path = self.get(name, None)
47 |         return Path(path) if path is not None else None
48 | 
49 | 
50 | config_store = ConfigStore.from_config_file()
51 | 


--------------------------------------------------------------------------------
/src/picktrue/gui/downloader.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import os
  3 | import time
  4 | import tkinter as tk
  5 | 
  6 | from picktrue.gui.entry import art_station_run, hua_ban_run, pixiv_run, hua_ban_board_run, \
  7 |     douban_personal_album_board_run
  8 | from picktrue.gui.pinry_importer import PinryImporterGUI
  9 | from picktrue.gui.toolkit import (
 10 |     NamedInput, FileBrowse, StatusBar, info, ProgressBar, open_sys_explorer, PasswordInput,
 11 |     ProxyInput
 12 | )
 13 | from picktrue.utils import run_as_thread
 14 | 
 15 | 
 16 | def mk_normal_inputs(master=None, store_name=None, user_home_name=None):
 17 |     url = NamedInput(master, name=user_home_name or "用户主页地址 ")
 18 |     save_path = FileBrowse(master, store_name=store_name)
 19 |     return url, save_path
 20 | 
 21 | 
 22 | def mk_pixiv_inputs(master=None):
 23 |     url = NamedInput(master, name="用户主页地址")
 24 |     username = NamedInput(master, name="Pixiv账户名（需要登录才能下载）")
 25 |     password = PasswordInput(master, name="登录密码")
 26 |     proxy = ProxyInput(master, name="代理地址(支持http/https/socks5， 可不填)")
 27 |     save_path = FileBrowse(master, store_name="pixiv_save_path")
 28 |     return url, username, password, proxy, save_path
 29 | 
 30 | 
 31 | class UserHomeDownloader(tk.Frame):
 32 | 
 33 |     title = "请更改此名字"
 34 | 
 35 |     def __init__(self, *args, store_name=None, user_home_name=None, **kwargs):
 36 |         super(UserHomeDownloader, self).__init__(*args, **kwargs)
 37 |         self.downloader = None
 38 |         self.url, self.save_path = mk_normal_inputs(
 39 |             self, store_name=store_name,
 40 |             user_home_name=user_home_name,
 41 |         )
 42 |         for attr_name, value in self.user_inputs().items():
 43 |             setattr(self, attr_name, value)
 44 |         self.btn_group = self.build_buttons()
 45 |         self.progress = ProgressBar(self)
 46 |         self.status = StatusBar(self)
 47 |         self.start_update()
 48 | 
 49 |     def user_inputs(self):
 50 |         return {}
 51 | 
 52 |     def run(self, url, path_prefix):
 53 |         raise NotImplementedError()
 54 | 
 55 |     def build_buttons(self):
 56 |         btn_args = dict(
 57 |             height=1,
 58 |         )
 59 |         btn_group = tk.Frame(self)
 60 | 
 61 |         buttons = [
 62 |             tk.Button(
 63 |                 btn_group,
 64 |                 text=text,
 65 |                 command=command,
 66 |                 **btn_args
 67 |             )
 68 |             for text, command in (
 69 |                 ("开始下载", self.start_download),
 70 |                 ("停止下载", self.stop_download),
 71 |                 ("打开下载文件夹", self.open_download_folder),
 72 |             )
 73 |         ]
 74 | 
 75 |         for index, btn in enumerate(buttons):
 76 |             btn.grid(column=index, row=0, sticky=tk.N)
 77 | 
 78 |         btn_group.pack(fill=tk.BOTH, expand=1)
 79 |         return btn_group
 80 | 
 81 |     def open_download_folder(self):
 82 |         path = self.save_path.get_path()
 83 |         open_sys_explorer(path)
 84 | 
 85 |     def start_download(self):
 86 |         self.url.assert_no_error()
 87 |         self.save_path.assert_no_error()
 88 |         url = self.url.get_input()
 89 |         path_prefix = self.save_path.get_path()
 90 |         if not os.access(path_prefix, os.W_OK):
 91 |             return info("对下载文件夹没有写权限，请重新选择")
 92 |         if self.downloader is not None:
 93 |             if not self.downloader.done:
 94 |                 return info("请停止后再重新点击下载...")
 95 |         self.downloader = self.run(
 96 |             url=url,
 97 |             path_prefix=path_prefix,
 98 |         )
 99 | 
100 |     def stop_download(self):
101 |         if self.downloader is not None:
102 |             self.downloader.stop()
103 |             self.downloader = None
104 | 
105 |     def start_update(self):
106 |         run_as_thread(self._update_loop)
107 | 
108 |     def _update_loop(self):
109 |         while True:
110 |             time.sleep(0.1)
111 |             try:
112 |                 self.update_progress()
113 |             except AttributeError:
114 |                 pass
115 | 
116 |     def update_progress(self):
117 |         if self.downloader is None:
118 |             self.progress.update_progress(
119 |                 0, 100
120 |             )
121 |             self.status.set("")
122 |         else:
123 |             self.progress.update_progress(
124 |                 self.downloader.counter.done,
125 |                 self.downloader.counter.total,
126 |             )
127 |             msg = self.downloader.counter.format()
128 |             if self.downloader.done:
129 |                 msg = msg + "  全部下载完毕，可以开始新的下载了：）"
130 |             self.status.set(msg)
131 | 
132 | 
133 | class Pixiv(tk.Frame):
134 | 
135 |     title = "Pixiv(按画师)"
136 | 
137 |     def __init__(self, *args, **kwargs):
138 |         super(Pixiv, self).__init__(*args, **kwargs)
139 | 
140 |         self.downloader = None
141 |         self.url, self.username, self.password, \
142 |             self.proxy, self.save_path = mk_pixiv_inputs(self)
143 |         self.btn_group = self.build_buttons()
144 |         self.progress = ProgressBar(self)
145 |         self.status = StatusBar(self)
146 |         self.start_update()
147 | 
148 |     def build_buttons(self):
149 |         btn_args = dict(
150 |             height=1,
151 |         )
152 |         btn_group = tk.Frame(self)
153 | 
154 |         buttons = [
155 |             tk.Button(
156 |                 btn_group,
157 |                 text=text,
158 |                 command=command,
159 |                 **btn_args
160 |             )
161 |             for text, command in (
162 |                 ("开始下载", self.start_download),
163 |                 ("停止下载", self.stop_download),
164 |                 ("打开下载文件夹", self.open_download_folder),
165 |             )
166 |         ]
167 | 
168 |         for index, btn in enumerate(buttons):
169 |             btn.grid(column=index, row=0, sticky=tk.N)
170 | 
171 |         btn_group.pack(fill=tk.BOTH, expand=1)
172 |         return btn_group
173 | 
174 |     def open_download_folder(self):
175 |         path = self.save_path.get_path()
176 |         open_sys_explorer(path)
177 | 
178 |     def start_download(self):
179 |         self.url.assert_no_error()
180 |         self.username.assert_no_error()
181 |         self.password.assert_no_error()
182 |         self.proxy.assert_no_error()
183 |         self.save_path.assert_no_error()
184 | 
185 |         url = self.url.get_input()
186 |         proxy = self.proxy.get_input() or None
187 |         username = self.username.get_input()
188 |         password = self.password.get_input()
189 |         path_prefix = self.save_path.get_path()
190 | 
191 |         if not os.access(path_prefix, os.W_OK):
192 |             return info("对下载文件夹没有写权限，请重新选择")
193 |         if self.downloader is not None:
194 |             if not self.downloader.done:
195 |                 return info("请停止后再重新点击下载...")
196 |         self.downloader = pixiv_run(
197 |             url=url,
198 |             username=username,
199 |             password=password,
200 |             proxy=proxy,
201 |             path_prefix=path_prefix,
202 |         )
203 | 
204 |     def stop_download(self):
205 |         if self.downloader is not None:
206 |             self.downloader.stop()
207 |             self.downloader = None
208 | 
209 |     def start_update(self):
210 |         run_as_thread(self._update_loop)
211 | 
212 |     def _update_loop(self):
213 |         while True:
214 |             time.sleep(0.1)
215 |             try:
216 |                 self.update_progress()
217 |             except AttributeError:
218 |                 pass
219 | 
220 |     def update_progress(self):
221 |         if self.downloader is None:
222 |             self.progress.update_progress(
223 |                 0, 100
224 |             )
225 |             self.status.set("")
226 |         else:
227 |             self.progress.update_progress(
228 |                 self.downloader.counter.done,
229 |                 self.downloader.counter.total,
230 |             )
231 |             msg = self.downloader.counter.format()
232 |             if self.downloader.done:
233 |                 msg = msg + "  全部下载完毕，可以开始新的下载了：）"
234 |             self.status.set(msg)
235 | 
236 | 
237 | class HuaBan(UserHomeDownloader):
238 | 
239 |     title = "花瓣(按作者)"
240 | 
241 |     def __init__(self, *args, **kwargs):
242 |         super(HuaBan, self).__init__(*args, store_name='huaban_save_path', **kwargs)
243 | 
244 |     def run(self, url, path_prefix):
245 |         downloader, site = hua_ban_run(
246 |             url=url,
247 |             path_prefix=path_prefix,
248 |             return_site=True,
249 |         )
250 |         return downloader
251 | 
252 | 
253 | class HuaBanBoard(UserHomeDownloader):
254 | 
255 |     title = "花瓣(按画板)"
256 | 
257 |     def __init__(self, *args, **kwargs):
258 |         super(HuaBanBoard, self).__init__(
259 |             *args,
260 |             store_name='huaban_board_save_path',
261 |             user_home_name="画板地址",
262 |             **kwargs
263 |         )
264 | 
265 |     def run(self, url, path_prefix):
266 |         return hua_ban_board_run(
267 |             url=url,
268 |             path_prefix=path_prefix,
269 |         )
270 | 
271 | 
272 | class DoubanPsersonalAlbum(UserHomeDownloader):
273 | 
274 |     title = "豆瓣(按相册)"
275 | 
276 |     def __init__(self, *args, **kwargs):
277 |         super(DoubanPsersonalAlbum, self).__init__(
278 |             *args,
279 |             store_name='douban_personal_album_save_path',
280 |             user_home_name="相册地址",
281 |             **kwargs
282 |         )
283 | 
284 |     def run(self, url, path_prefix):
285 |         return douban_personal_album_board_run(
286 |             url=url,
287 |             path_prefix=path_prefix,
288 |         )
289 | 
290 | 
291 | class ArtStation(UserHomeDownloader):
292 | 
293 |     title = "ArtStation(按作者)"
294 | 
295 |     def user_inputs(self):
296 |         return {
297 |             'proxy': ProxyInput(master=self, name="代理地址(支持http/https/socks5， 可不填)"),
298 |         }
299 | 
300 |     def start_download(self):
301 |         self.url.assert_no_error()
302 |         self.save_path.assert_no_error()
303 |         self.proxy.assert_no_error()
304 | 
305 |         url = self.url.get_input()
306 |         path_prefix = self.save_path.get_path()
307 |         proxy = self.proxy.get_input()
308 | 
309 |         if not os.access(path_prefix, os.W_OK):
310 |             return info("对下载文件夹没有写权限，请重新选择")
311 |         if self.downloader is not None:
312 |             if not self.downloader.done:
313 |                 return info("请停止后再重新点击下载...")
314 |         self.downloader = self.run(
315 |             url=url,
316 |             path_prefix=path_prefix,
317 |             proxy=proxy,
318 |         )
319 | 
320 |     def run(self, url, path_prefix, proxy):
321 |         return art_station_run(
322 |             url=url,
323 |             path_prefix=path_prefix,
324 |             proxy=proxy,
325 |         )
326 | 
327 | 
328 | downloaders = [
329 |     ArtStation,
330 |     HuaBan,
331 |     HuaBanBoard,
332 |     Pixiv,
333 |     DoubanPsersonalAlbum,
334 |     PinryImporterGUI,
335 | ]
336 | 
337 | 
338 | __all__ = (
339 |     "downloaders",
340 |     "mk_normal_inputs",
341 | )
342 | 


--------------------------------------------------------------------------------
/src/picktrue/gui/entry.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from picktrue.engine import Downloader
 4 | 
 5 | from picktrue.sites.artstation import ArtStation
 6 | from picktrue.sites.douban import DoubanPersonalAlbum
 7 | from picktrue.sites.huaban import HuaBan, HuaBanBoard
 8 | from picktrue.sites.pixiv import Pixiv
 9 | 
10 | 
11 | def _user_home_run(site, path_prefix=None):
12 |     """
13 |     :type site: picktrue.sites.abstract.DummySite
14 |     :type path_prefix: str or None
15 |     """
16 |     path = site.dir_name
17 |     if path_prefix is not None:
18 |         path = os.path.join(path_prefix, path)
19 |     downloader = Downloader(save_dir=path, fetcher=site.fetcher)
20 |     downloader.add_task(
21 |         site.tasks,
22 |         background=True,
23 |     )
24 |     downloader.join(background=True)
25 |     return downloader
26 | 
27 | 
28 | def art_station_run(url, path_prefix=None, proxy=None):
29 |     site = ArtStation(url, proxy=proxy)
30 |     return _user_home_run(site, path_prefix=path_prefix)
31 | 
32 | 
33 | def hua_ban_run(url, path_prefix=None, return_site=False):
34 |     site = HuaBan(url)
35 |     if return_site:
36 |         return _user_home_run(site=site, path_prefix=path_prefix), site
37 |     else:
38 |         return _user_home_run(site=site, path_prefix=path_prefix)
39 | 
40 | 
41 | def hua_ban_board_run(url, path_prefix=None):
42 |     site = HuaBanBoard(url)
43 |     return _user_home_run(site=site, path_prefix=path_prefix)
44 | 
45 | 
46 | def douban_personal_album_board_run(url, path_prefix=None):
47 |     site = DoubanPersonalAlbum(url)
48 |     return _user_home_run(site=site, path_prefix=path_prefix)
49 | 
50 | 
51 | def pixiv_run(url, username, password, proxy=None, path_prefix=None):
52 |     site = Pixiv(
53 |         url=url,
54 |         username=username,
55 |         password=password,
56 |         proxy=proxy,
57 |     )
58 |     return _user_home_run(site, path_prefix)
59 | 


--------------------------------------------------------------------------------
/src/picktrue/gui/pinry_importer.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import tkinter as tk
  3 | 
  4 | from picktrue.gui.toolkit import ProgressBar, StatusBar, NamedInput, FileBrowse, info, FilePathBrowse, PasswordInput
  5 | from picktrue.pinry.importer import PinryImporter
  6 | from picktrue.utils import run_as_thread
  7 | 
  8 | 
  9 | class PinryImporterGUI(tk.Frame):
 10 | 
 11 |     title = "导入到Pinry"
 12 | 
 13 |     def __init__(self, *args, **kwargs):
 14 |         super(PinryImporterGUI, self).__init__(*args, **kwargs)
 15 | 
 16 |         self._url = NamedInput(self, name="Pinry部署地址")
 17 |         self._min_size = NamedInput(self, name="最小上传大小(KB)（低于此值的文件不上传，不限制请留空）")
 18 |         self._username = NamedInput(self, name="用户名")
 19 |         self._password = PasswordInput(self, name="密码")
 20 |         self._csv_file = FilePathBrowse(self, store_name="import_csv", text_label="CSV文件文件路径")
 21 |         self.btn_group = self.build_buttons()
 22 |         self._importer = None
 23 |         self.progress = ProgressBar(self)
 24 |         self.status = StatusBar(self)
 25 |         self.start_update()
 26 | 
 27 |     def _get_importer(self):
 28 |         min_size = self._min_size.get_input()
 29 |         if min_size:
 30 |             try:
 31 |                 min_size = int(min_size)
 32 |             except Exception:
 33 |                 info("最小文件上传大小应该是整数")
 34 |         else:
 35 |             min_size = None
 36 |         return PinryImporter(
 37 |             base_url=self._url.get_input(),
 38 |             username=self._username.get_input(),
 39 |             password=self._password.get_input(),
 40 |             min_upload_size_kb=min_size,
 41 |         )
 42 | 
 43 |     def build_buttons(self):
 44 |         btn_args = dict(
 45 |             height=1,
 46 |         )
 47 |         btn_group = tk.Frame(self)
 48 | 
 49 |         buttons = [
 50 |             tk.Button(
 51 |                 btn_group,
 52 |                 text=text,
 53 |                 command=command,
 54 |                 **btn_args
 55 |             )
 56 |             for text, command in (
 57 |                 ("测试登录", self._test_login),
 58 |                 ("开始导入", self._start_import),
 59 |             )
 60 |         ]
 61 | 
 62 |         for index, btn in enumerate(buttons):
 63 |             btn.grid(column=index, row=0, sticky=tk.N)
 64 | 
 65 |         btn_group.pack(fill=tk.BOTH, expand=1)
 66 |         return btn_group
 67 | 
 68 |     def _test_login(self):
 69 |         importer = self._get_importer()
 70 |         if importer.test_login() is True:
 71 |             info("登录成功")
 72 |         else:
 73 |             info("情检查用户名密码以及部署路径是否可访问")
 74 | 
 75 |     def _start_import(self):
 76 |         self._importer = self._get_importer()
 77 |         run_as_thread(
 78 |             self._importer.do_import,
 79 |             self._csv_file.get_path(),
 80 |             name="import2pinry"
 81 |         )
 82 | 
 83 |     def start_update(self):
 84 |         run_as_thread(self._update_loop)
 85 | 
 86 |     def _update_loop(self):
 87 |         while True:
 88 |             time.sleep(0.1)
 89 |             self.update_progress()
 90 | 
 91 |     def update_progress(self):
 92 |         if self._importer is not None:
 93 |             self.progress.update_progress(
 94 |                 self._importer.done_pins,
 95 |                 self._importer.total_pins,
 96 |             )
 97 |             self.status.set(self._importer.status_text())
 98 |         else:
 99 |             self.progress.update_progress(0, 0)
100 |             self.status.set("待机...")
101 | 


--------------------------------------------------------------------------------
/src/picktrue/gui/toolkit.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import platform
  3 | 
  4 | import os
  5 | import tkinter as tk
  6 | from pathlib import Path
  7 | from tkinter import filedialog, messagebox as msgbox, ttk
  8 | 
  9 | from picktrue.gui.config import ConfigStore, config_store
 10 | 
 11 | 
 12 | def info(message, title="信息"):
 13 |     msgbox.showinfo(title=title, message=message)
 14 | 
 15 | 
 16 | def open_sys_explorer(path):
 17 |     ptf = platform.system().lower()
 18 |     path = Path(path)
 19 |     if "darwin" in ptf:
 20 |         return os.system('open %s' % path)
 21 |     elif 'windows' in ptf:
 22 |         return os.system('explorer.exe "%s"' % path)
 23 |     elif 'linux' in ptf:
 24 |         return os.system('xdg-open %s' % path)
 25 |     return info('平台不支持')
 26 | 
 27 | 
 28 | def get_working_dir():
 29 |     return os.getcwd()
 30 | 
 31 | 
 32 | class StatusBar(tk.Frame):
 33 |     def __init__(self, master):
 34 |         tk.Frame.__init__(self, master)
 35 |         self.variable=tk.StringVar()
 36 |         self.label=tk.Label(
 37 |             self, bd=1, relief=tk.SUNKEN, anchor=tk.W,
 38 |             textvariable=self.variable,
 39 |             font=('arial', 16, 'normal')
 40 |         )
 41 |         self.variable.set('')
 42 |         self.label.pack(fill=tk.X)
 43 |         self.pack(fill=tk.BOTH)
 44 | 
 45 |     def set(self, value):
 46 |         self.variable.set(value)
 47 | 
 48 | 
 49 | class NamedInput(tk.Frame):
 50 |     def __init__(self, master=None, name=None, **kwargs):
 51 |         super(NamedInput, self).__init__(master=master, **kwargs)
 52 |         assert name is not None
 53 |         self._name = name
 54 |         label = tk.Label(self, text=name)
 55 |         label.pack(side=tk.LEFT)
 56 | 
 57 |         self.entry = tk.Entry(self)
 58 |         self.entry.pack(side=tk.LEFT, fill=tk.X, expand=1)
 59 |         self.pack(fill=tk.X)
 60 | 
 61 |     def get_input(self):
 62 |         return self.entry.get()
 63 | 
 64 |     def assert_no_error(self):
 65 |         text = self.get_input()
 66 |         if not text:
 67 |             info(
 68 |                 "%s 不能为空" % self._name
 69 |             )
 70 |             raise ValueError("value error, can't be null")
 71 | 
 72 | 
 73 | class PasswordInput(tk.Frame):
 74 |     def __init__(self, master=None, name=None, **kwargs):
 75 |         super(PasswordInput, self).__init__(master=master, **kwargs)
 76 |         assert name is not None
 77 |         self._name = name
 78 |         label = tk.Label(self, text=name)
 79 |         label.pack(side=tk.LEFT)
 80 | 
 81 |         self.entry = tk.Entry(self, show="*")
 82 |         self.entry.pack(side=tk.LEFT, fill=tk.X, expand=1)
 83 |         self.pack(fill=tk.X)
 84 | 
 85 |     def get_input(self):
 86 |         return self.entry.get()
 87 | 
 88 |     def assert_no_error(self):
 89 |         text = self.get_input()
 90 |         if not text:
 91 |             info(
 92 |                 "%s 不能为空" % self._name
 93 |             )
 94 |             raise ValueError("value error, can't be null")
 95 | 
 96 | 
 97 | class ProxyInput(NamedInput):
 98 |     def assert_no_error(self):
 99 |         value = self.get_input()
100 |         if not value:
101 |             return
102 |         results = [kw in value for kw in ('http', 'https', 'socks5')]
103 |         if not any(results):
104 |             info("代理地址错误")
105 |             raise ValueError("Proxy address error")
106 | 
107 | 
108 | class FileBrowse(tk.Frame):
109 | 
110 |     def __init__(self, master=None, store_name=None, text_label=None, **kwargs):
111 |         super(FileBrowse, self).__init__(master=master, **kwargs)
112 |         self.label_text = tk.StringVar()
113 |         btn = tk.Button(self, text=text_label or "下载到", command=self.choose_file)
114 |         btn.pack(
115 |             side=tk.LEFT,
116 |         )
117 | 
118 |         tk.Label(self, textvariable=self.label_text).pack(
119 |             side=tk.LEFT,
120 |             fill=tk.X,
121 |         )
122 |         self.pack(fill=tk.X)
123 | 
124 |         self._store_name = store_name
125 |         if store_name is not None:
126 |             self._config = config_store
127 |             save_path = self._config.op_read_path(store_name) or get_working_dir()
128 |         else:
129 |             self._config = None
130 |             save_path = get_working_dir()
131 | 
132 |         self.label_text.set(
133 |             save_path
134 |         )
135 | 
136 |     def ask_path(self):
137 |         return filedialog.askdirectory(
138 |             title="选择下载文件夹",
139 |         )
140 | 
141 |     def choose_file(self):
142 |         path = self.ask_path()
143 |         if not path:
144 |             return
145 |         path = Path(path)
146 |         self.label_text.set(str(path))
147 |         if self._config is not None:
148 |             self._config.op_store_path(self._store_name, path)
149 | 
150 |     def get_path(self):
151 |         return self.label_text.get()
152 | 
153 |     def assert_no_error(self):
154 |         text = self.get_path()
155 |         if not text:
156 |             info(
157 |                 "%s 不能为空"
158 |             )
159 |             raise ValueError("Value should not be null")
160 | 
161 | 
162 | class FilePathBrowse(FileBrowse):
163 |     def ask_path(self):
164 |         return filedialog.askopenfilename(
165 |             title="选择csv文件",
166 |         )
167 | 
168 | 
169 | class ProgressBar(ttk.Progressbar):
170 | 
171 |     def __init__(self, master=None):
172 |         super(ProgressBar, self).__init__(
173 |             master=master,
174 |             orient="horizontal",
175 |             length=600,
176 |             mode="determinate",
177 |         )
178 |         self.pack(expand=1)
179 | 
180 |     def update_progress(self, current, maximum=None):
181 |         self['value'] = current
182 |         if maximum is not None:
183 |             self['maximum'] = maximum
184 | 
185 |     def reset_progress(self):
186 |         self.update_progress(0, 0)
187 | 


--------------------------------------------------------------------------------
/src/picktrue/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | 
 5 | def __get_logger(name):
 6 |     __log_level = logging.INFO
 7 | 
 8 |     if "--debug-%s" % name in sys.argv:
 9 |         __log_level = logging.DEBUG
10 | 
11 |     fmt = "%(levelname)s - %(asctime)-15s - %(filename)s - line %(lineno)d --> %(message)s"
12 |     date_fmt = "%a %d %b %Y %H:%M:%S"
13 |     formatter = logging.Formatter(fmt, date_fmt)
14 | 
15 |     handler = logging.StreamHandler()
16 |     file_handler = logging.FileHandler(
17 |         "./picktrue.all.log",
18 |     )
19 |     handler.setFormatter(formatter)
20 | 
21 |     logger = logging.getLogger(name)
22 |     logger.addHandler(
23 |         handler
24 |     )
25 |     logger.addHandler(
26 |         file_handler
27 |     )
28 |     logger.setLevel(level=__log_level)
29 |     return logger
30 | 
31 | 
32 | pk_logger = __get_logger('picktrue')
33 | 
34 | 
35 | __all__ = (
36 |     'pk_logger',
37 | )
38 | 


--------------------------------------------------------------------------------
/src/picktrue/meta.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from typing import NamedTuple
 3 | 
 4 | 
 5 | # requires python >= 3.6.1
 6 | class ImageItem(NamedTuple):
 7 |     url: str
 8 |     name: str or callable
 9 |     meta: dict = None
10 |     pin_meta: dict = None
11 | 
12 | 
13 | DownloadTaskItem = namedtuple(
14 |     'TaskItem',
15 |     (
16 |         'image',
17 |         'base_save_path',
18 |     )
19 | )
20 | 
21 | 
22 | UA = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
23 | 


--------------------------------------------------------------------------------
/src/picktrue/pinry/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/pinry/__init__.py


--------------------------------------------------------------------------------
/src/picktrue/pinry/ds.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import os
 3 | from collections import namedtuple
 4 | from dataclasses import dataclass
 5 | from typing import List
 6 | 
 7 | 
 8 | @dataclass
 9 | class Pin2Import:
10 |     referer: str
11 |     tags: list
12 |     description: str
13 |     board: str
14 | 
15 |     # only one of following item should exist and another one should be None
16 |     file_abs_path: str
17 |     image_url2download: str
18 | 
19 |     @classmethod
20 |     def get_fields(cls) -> List[str]:
21 |         return list(cls.__annotations__.keys())
22 | 
23 |     def as_dict(self) -> dict:
24 |         out = {}
25 |         fields = self.get_fields()
26 |         for field in fields:
27 |             value = getattr(self, field)
28 |             if not value:
29 |                 value = ''
30 |             else:
31 |                 value = str(value)
32 |             out[field] = value
33 |         return out
34 | 
35 | 
36 | def from_csv(path='pins2import.csv') -> List[Pin2Import]:
37 |     with open(path, 'r', encoding="utf-8") as csv_file:
38 |         reader = csv.DictReader(csv_file, delimiter="|")
39 |         rows = list(reader)
40 |         for row in rows:
41 |             tags = row['tags'].strip() if row['tags'] else row['tags']
42 |             if not tags:
43 |                 row['tags'] = []
44 |             else:
45 |                 if "[" in tags:
46 |                     row['tags'] = eval(tags)
47 |                 else:
48 |                     row['tags'] = [tags, ]
49 |             row['file_abs_path'] = row['file_abs_path'] or None
50 |             row['image_url2download'] = row['image_url2download'] or None
51 |         return [Pin2Import(**row) for row in rows]
52 | 
53 | 
54 | def to_csv(pins2export: List[Pin2Import], base_path, filename='pins2import.csv'):
55 |     fields_names = Pin2Import.get_fields()
56 |     path = os.path.join(base_path, filename)
57 |     with open(path, 'w', encoding="utf-8") as csv_file:
58 |         writer = csv.DictWriter(csv_file, fieldnames=fields_names, delimiter="|")
59 |         writer.writeheader()
60 |         for row in pins2export:
61 |             writer.writerow(
62 |                 row.as_dict(),
63 |             )
64 | 
65 | 
66 | def write_to_csv(pin2export: Pin2Import, base_path, filename='pins2import.csv'):
67 |     fields_names = Pin2Import.get_fields()
68 |     path = os.path.join(base_path, filename)
69 |     if os.path.exists(path):
70 |         mode = "a"
71 |     else:
72 |         mode = "w"
73 |     with open(path, mode, encoding="utf-8") as csv_file:
74 |         writer = csv.DictWriter(csv_file, fieldnames=fields_names, delimiter="|")
75 |         if mode == "w":
76 |             writer.writeheader()
77 |         writer.writerow(
78 |             pin2export.as_dict(),
79 |         )
80 |         csv_file.flush()
81 | 


--------------------------------------------------------------------------------
/src/picktrue/pinry/importer.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures.thread import ThreadPoolExecutor
  2 | from threading import Lock
  3 | 
  4 | from picktrue.logger import pk_logger
  5 | from picktrue.pinry.ds import from_csv
  6 | from picktrue.pinry.uploader import Uploader
  7 | 
  8 | 
  9 | class PinryImporter:
 10 |     _counter_lock = Lock()
 11 | 
 12 |     def __init__(self, base_url, username, password, min_upload_size_kb=None):
 13 |         self._base_url = base_url
 14 |         self._username = username
 15 |         self._password = password
 16 |         self.total_pins = 999
 17 |         self.done_pins = 0
 18 |         self.error_pins = 0
 19 |         self._started = False
 20 |         self._creating_boards = False
 21 |         self._min_upload_size_kb = None
 22 |         if min_upload_size_kb is not None:
 23 |             if int(min_upload_size_kb) != 0:
 24 |                 self._min_upload_size_kb = min_upload_size_kb
 25 | 
 26 |         self._executor = ThreadPoolExecutor(
 27 |             max_workers=1,
 28 |         )
 29 | 
 30 |     def test_login(self):
 31 |         uploader = Uploader(
 32 |             self._base_url,
 33 |             self._username,
 34 |             self._password,
 35 |         )
 36 |         return uploader.login()
 37 | 
 38 |     def is_done(self):
 39 |         return self.done_pins + self.error_pins == self.total_pins
 40 | 
 41 |     def status_text(self):
 42 |         if not self._started:
 43 |             return "待命..."
 44 |         if self.is_done():
 45 |             return "导入完毕，可以开始新的导入; 总量: %s,出错: %s, 已完成: %s" % (
 46 |                 self.total_pins, self.error_pins, self.done_pins,
 47 |             )
 48 |         else:
 49 |             if self._creating_boards:
 50 |                 return "创建画板..."
 51 |             else:
 52 |                 return "执行中，等待更新；总量: %s,出错: %s, 已完成: %s" % (
 53 |                     self.total_pins, self.error_pins, self.done_pins,
 54 |                 )
 55 | 
 56 |     def create_single_pin(self, uploader, pin):
 57 |         try:
 58 |             if pin.image_url2download is not None:
 59 |                 uploader.create(
 60 |                     pin.description,
 61 |                     pin.referer,
 62 |                     pin.image_url2download,
 63 |                     board_name=pin.board,
 64 |                     tags=pin.tags,
 65 |                 )
 66 |             elif pin.file_abs_path is not None:
 67 |                 uploader.create_with_file_upload(
 68 |                     pin.description,
 69 |                     pin.referer,
 70 |                     file_path=pin.file_abs_path,
 71 |                     board_name=pin.board,
 72 |                     tags=pin.tags,
 73 |                 )
 74 |         except ValueError:
 75 |             pk_logger.exception(
 76 |                 "Failed to to pin creation:",
 77 |             )
 78 |             with self._counter_lock:
 79 |                 self.error_pins += 1
 80 |         else:
 81 |             with self._counter_lock:
 82 |                 self.done_pins += 1
 83 | 
 84 |     def do_import(self, file_path):
 85 |         uploader = Uploader(
 86 |             self._base_url,
 87 |             self._username,
 88 |             self._password,
 89 |             login=True,
 90 |             min_upload_size_kb=self._min_upload_size_kb,
 91 |         )
 92 |         pins = from_csv(file_path)
 93 |         self._started = True
 94 |         self._creating_boards = True
 95 |         uploader.create_boards(
 96 |             set([pin.board for pin in pins])
 97 |         )
 98 |         self._creating_boards = False
 99 |         self.total_pins = len(pins)
100 |         jobs = []
101 |         for pin in pins:
102 |             job = self._executor.submit(
103 |                 self.create_single_pin,
104 |                 uploader,
105 |                 pin,
106 |             )
107 |             jobs.append(job)
108 |         self._executor.shutdown(wait=True)
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/src/picktrue/pinry/uploader.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import os
  3 | from urllib.parse import urljoin
  4 | 
  5 | import requests
  6 | 
  7 | from picktrue.logger import pk_logger
  8 | from picktrue.utils import get_file_size_kb
  9 | 
 10 | 
 11 | class Uploader:
 12 |     def __init__(self, pinry_url, username, password, login=False, min_upload_size_kb=None):
 13 |         """
 14 |         @:param: pinry_url, like https://pin.xxx.com/
 15 |         """
 16 |         self.pinry_url = pinry_url
 17 |         self._api_prefix = urljoin(pinry_url, '/api/v2/')
 18 |         self._login_url = urljoin(self._api_prefix, 'profile/login/')
 19 |         self._pin_creation_url = urljoin(self._api_prefix, 'pins/')
 20 |         self._image_creation_url = urljoin(self._api_prefix, 'images/')
 21 |         self._board_add_url = urljoin(self._api_prefix, 'boards/')
 22 |         self._board_list_url = urljoin(self._api_prefix, 'boards-auto-complete/')
 23 |         self._min_upload_size_kb = min_upload_size_kb
 24 |         self._cached_boards = None
 25 | 
 26 |         self.session = requests.session()
 27 |         self._username = username
 28 |         self._password = password
 29 |         if login:
 30 |             self.login()
 31 | 
 32 |     def _get_board_url(self, board_name):
 33 |         board_id = self._get_board_id(board_name)
 34 |         return f'{self._board_add_url}{board_id}/'
 35 | 
 36 |     def _get_board_id(self, board_name):
 37 |         return self.boards[board_name]
 38 | 
 39 |     def create_boards(self, board_names: set):
 40 |         for name in board_names:
 41 |             self.post(self._board_add_url, json={"name": name})
 42 | 
 43 |     @property
 44 |     def boards(self):
 45 |         if self._cached_boards is not None:
 46 |             return self._cached_boards
 47 |         data = self.session.get(self._board_list_url).json()
 48 |         self._cached_boards = {}
 49 |         for board in data:
 50 |             self._cached_boards[board['name']] = board['id']
 51 |         return self._cached_boards
 52 | 
 53 |     def _get_csrf_token(self):
 54 |         csrf_token = self.session.cookies.get('csrftoken')
 55 |         if not csrf_token:
 56 |             self.session.get(self._api_prefix)
 57 |         csrf_token = self.session.cookies.get('csrftoken')
 58 |         headers = {
 59 |             'X-CSRFToken': csrf_token,
 60 |         }
 61 |         return headers
 62 | 
 63 |     def patch(self, url, json=None):
 64 |         headers = self._get_csrf_token()
 65 |         return self.session.patch(
 66 |             url=url,
 67 |             json=json,
 68 |             headers=headers,
 69 |         )
 70 | 
 71 |     def post(self, url, json=None, files=None):
 72 |         headers = self._get_csrf_token()
 73 |         if files is None:
 74 |             return self.session.post(
 75 |                 url=url,
 76 |                 json=json,
 77 |                 headers=headers,
 78 |             )
 79 |         else:
 80 |             return self.session.post(
 81 |                 url=url,
 82 |                 headers=headers,
 83 |                 files=files,
 84 |             )
 85 | 
 86 |     def login(self):
 87 |         data = {
 88 |             'username': self._username,
 89 |             'password': self._password,
 90 |         }
 91 |         resp = self.post(url=self._login_url, json=data)
 92 |         return resp.status_code == 200
 93 | 
 94 |     def _upload_image(self, file_path):
 95 |         if not os.path.exists(file_path):
 96 |             raise ValueError(
 97 |                 "Failed to upload image [%s]: not found" % file_path
 98 |             )
 99 |         if self._min_upload_size_kb is not None:
100 |             if get_file_size_kb(file_path) < self._min_upload_size_kb:
101 |                 raise ValueError(
102 |                     "Failed to upload image[%s]: size too small" % file_path
103 |                 )
104 |         resp = self.post(
105 |             self._image_creation_url,
106 |             files={"image": open(file_path, "rb")},
107 |         )
108 |         if resp.status_code != 201:
109 |             raise ValueError(
110 |                 "Failed to upload image [%s]: %s" % (
111 |                     file_path,
112 |                     resp.json(),
113 |                 )
114 |             )
115 |         return resp.json()['id']
116 | 
117 |     def _create_pin(self, data, board_name):
118 |         board_url = self._get_board_url(board_name)
119 |         resp = self.post(
120 |             url=self._pin_creation_url,
121 |             json=data,
122 |         )
123 |         if resp.status_code != 201:
124 |             raise ValueError("Failed to create pin %s, %s" % (data, resp.content))
125 |         pin = resp.json()
126 |         pin_id = pin['id']
127 |         resp = self.patch(
128 |             url=board_url,
129 |             json={'pins_to_add': [pin_id, ]}
130 |         )
131 |         if resp.status_code != 200:
132 |             pk_logger.error(
133 |                 "Failed to add pin to board: %s, %s" % (board_name, pin)
134 |             )
135 | 
136 |     def create_with_file_upload(self, description, referer, file_path, board_name, tags):
137 |         image_id = self._upload_image(file_path)
138 |         data = dict(
139 |             description=description,
140 |             referer=referer,
141 |             tags=tags,
142 |             image_by_id=image_id,
143 |         )
144 |         return self._create_pin(
145 |             data,
146 |             board_name,
147 |         )
148 | 
149 |     def create(self, description, referer, url, board_name, tags):
150 |         data = dict(
151 |             description=description,
152 |             referer=referer,
153 |             url=url,
154 |             tags=tags,
155 |         )
156 |         return self._create_pin(
157 |             data,
158 |             board_name,
159 |         )
160 | 


--------------------------------------------------------------------------------
/src/picktrue/rpc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/rpc/__init__.py


--------------------------------------------------------------------------------
/src/picktrue/rpc/channel.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import queue
 3 | import time
 4 | from queue import Queue
 5 | from threading import Lock
 6 | 
 7 | from picktrue.utils import run_as_thread
 8 | 
 9 | 
10 | class BrowserRequester:
11 |     def __init__(self):
12 |         self.recv_queue = Queue()
13 |         self.send_queue = Queue()
14 |         self._t = run_as_thread(self.start_recv)
15 |         self._lock_registry = {}
16 |         self._ret_registry = {}
17 | 
18 |     def start_recv(self):
19 |         while True:
20 |             raw = self.recv_queue.get()
21 |             ret_meta = json.loads(raw)
22 |             url = ret_meta['request_url']
23 |             data = ret_meta['response']
24 |             self._ret_registry[url] = data
25 |             self._lock_registry[url].release()
26 | 
27 |     def get_request(self, timeout=None):
28 |         if timeout is not None:
29 |             try:
30 |                 return self.send_queue.get(
31 |                     timeout=timeout
32 |                 )
33 |             except queue.Empty:
34 |                 return None
35 |         return self.send_queue.get()
36 | 
37 |     def send_and_wait(self, url, timeout=None, max_retry=0):
38 |         retried = 0
39 |         while True:
40 |             self.send_request(url)
41 |             ret = self.get_response(url, timeout=timeout)
42 |             if ret is None:
43 |                 retried += 1
44 |                 time.sleep(5)
45 |             else:
46 |                 return ret
47 |             if retried > max_retry:
48 |                 raise ValueError("Failed to get url: %s" % url)
49 | 
50 |     def send_request(self, url):
51 |         self._lock_registry[url] = Lock()
52 |         self._lock_registry[url].acquire()
53 |         self.send_queue.put(url)
54 | 
55 |     def submit_response(self, resp):
56 |         self.recv_queue.put(resp)
57 | 
58 |     def get_response(self, url, timeout=None):
59 |         if timeout is None:
60 |             got = self._lock_registry[url].acquire()
61 |         else:
62 |             got = self._lock_registry[url].acquire(timeout=timeout)
63 |         if got:
64 |             ret = self._ret_registry[url]
65 |             del self._ret_registry[url]
66 |             del self._lock_registry[url]
67 |             return ret
68 |         else:
69 |             return None
70 | 


--------------------------------------------------------------------------------
/src/picktrue/rpc/taskserver.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from threading import Thread
  4 | 
  5 | from flask import Flask, jsonify
  6 | from flask import request
  7 | 
  8 | from picktrue.rpc.channel import BrowserRequester
  9 | 
 10 | app = Flask(__name__)
 11 | 
 12 | 
 13 | __all__ = [
 14 |     "server",
 15 | ]
 16 | 
 17 | 
 18 | class TaskServer:
 19 |     def __init__(self):
 20 |         self.requester = BrowserRequester()
 21 |         self._thread = None
 22 | 
 23 |     def request(self, url):
 24 |         return self.requester.send_and_wait(url)
 25 | 
 26 |     def log_received(self):
 27 |         while True:
 28 |             resp = self.request("https://www.artstation.com/users/braveking/projects.json?page=1")
 29 |             print("resp received", resp)
 30 | 
 31 |     def start_debug_task(self):
 32 |         t = Thread(target=self.log_received)
 33 |         t.setDaemon(True)
 34 |         t.start()
 35 | 
 36 |     def is_running(self):
 37 |         if self._thread is None:
 38 |             return False
 39 |         if not self._thread.is_alive():
 40 |             return False
 41 |         return True
 42 | 
 43 |     def start(self):
 44 |         if self.is_running():
 45 |             return False
 46 | 
 47 |         def run():
 48 |             app.run(debug=True, port=2333, use_reloader=False)
 49 |             app.logger.setLevel(logging.WARNING)
 50 | 
 51 |         self._thread = Thread(target=run)
 52 |         self._thread.setDaemon(True)
 53 |         self._thread.start()
 54 | 
 55 | 
 56 | server = TaskServer()
 57 | 
 58 | 
 59 | @app.route("/tasks/")
 60 | def get_task():
 61 |     task = server.requester.get_request(10)
 62 |     if task is None:
 63 |         return jsonify([])
 64 |     else:
 65 |         return jsonify([task, ])
 66 | 
 67 | 
 68 | @app.route("/tasks/submit/", methods=["POST", "GET"])
 69 | def task_submit():
 70 |     """
 71 |     :return:
 72 |     """
 73 |     resp = request.data
 74 |     server.requester.submit_response(
 75 |         resp
 76 |     )
 77 |     return jsonify({})
 78 | 
 79 | 
 80 | class BrowserMetaFetcher:
 81 |     server = server
 82 | 
 83 |     def __init__(self):
 84 |         log = logging.getLogger('werkzeug')
 85 |         log.setLevel(logging.ERROR)
 86 |         self.server.start()
 87 | 
 88 |     def request_url(self, url):
 89 |         text = self.server.requester.send_and_wait(url, timeout=10, max_retry=3)
 90 |         try:
 91 |             return json.loads(text)
 92 |         except json.JSONDecodeError:
 93 |             return text
 94 | 
 95 | 
 96 | if __name__ == '__main__':
 97 |     server.start()
 98 |     # server.start_debug_task()
 99 |     import pdb;pdb.set_trace()
100 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/winkidney/PickTrue/772b105e4de3852bba41369221f47b8480bf1070/src/picktrue/sites/__init__.py


--------------------------------------------------------------------------------
/src/picktrue/sites/abstract.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | import requests
  5 | 
  6 | from picktrue.meta import UA, ImageItem
  7 | from picktrue.utils import retry
  8 | 
  9 | 
 10 | def normalize_proxy_string(proxy):
 11 |     if 'socks5' in proxy:
 12 |         if 'socks5h' not in proxy:
 13 |             proxy = proxy.replace('socks5', 'socks5h')
 14 |     return proxy
 15 | 
 16 | 
 17 | def get_proxy(proxy_string=None):
 18 |     if proxy_string is None:
 19 |         return {}
 20 |     proxy = normalize_proxy_string(proxy_string)
 21 |     proxies = {
 22 |         'proxies': {
 23 |             'http': proxy,
 24 |             'https': proxy,
 25 |         }
 26 |     }
 27 |     return proxies
 28 | 
 29 | 
 30 | class DummySite:
 31 | 
 32 |     @property
 33 |     def dir_name(self):
 34 |         raise NotImplementedError()
 35 | 
 36 |     @property
 37 |     def fetcher(self):
 38 |         raise NotImplementedError()
 39 | 
 40 |     @property
 41 |     def tasks(self):
 42 |         raise NotImplementedError()
 43 | 
 44 | 
 45 | class DummyFetcher:
 46 | 
 47 |     def __init__(self, proxies=None):
 48 |         self.session = requests.session()
 49 |         if proxies is not None:
 50 |             self.session.proxies = proxies
 51 |         self.session.headers.update(UA)
 52 | 
 53 |     @staticmethod
 54 |     def _safe_name(name):
 55 |         name = name.replace("/", " ")
 56 |         name = name.replace("\\", " ")
 57 |         name = name.strip()
 58 |         name = name.replace(" ", '-')
 59 |         return name
 60 | 
 61 |     @staticmethod
 62 |     def _safe_path(path):
 63 |         return Path(path).absolute()
 64 | 
 65 |     @retry()
 66 |     def get(self, url, **kwargs):
 67 |         """
 68 |         :rtype: requests.Response
 69 |         """
 70 |         if 'timeout' in kwargs:
 71 |             kwargs.pop('timeout')
 72 |         return self.session.get(url, timeout=(2, 30), **kwargs)
 73 | 
 74 |     def get_save_path(self, base_path, image_name, image: ImageItem):
 75 |         save_path = os.path.join(
 76 |             base_path,
 77 |             image_name,
 78 |         )
 79 |         return save_path
 80 | 
 81 |     def save(self, content, task_item):
 82 |         """
 83 |         :type content: bytearray
 84 |         :type task_item: picktrue.meta.TaskItem
 85 |         """
 86 |         image = task_item.image
 87 |         image_name = image.name
 88 |         if callable(image.name):
 89 |             image_name = image.name(image.url, content)
 90 | 
 91 |         save_path = self.get_save_path(
 92 |             task_item.base_save_path,
 93 |             image_name,
 94 |             image,
 95 |         )
 96 |         save_path = self._safe_path(save_path)
 97 |         if os.path.exists(save_path):
 98 |             return
 99 |         with open(save_path, "wb") as f:
100 |             f.write(content)
101 |             f.flush()
102 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/artstation.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple, Counter
  2 | import hashlib
  3 | import json
  4 | import os
  5 | import re
  6 | import time
  7 | from urllib.parse import urljoin
  8 | 
  9 | import requests
 10 | 
 11 | from picktrue.meta import ImageItem, UA
 12 | from picktrue.rpc.taskserver import server
 13 | from picktrue.sites.abstract import DummySite, DummyFetcher, get_proxy
 14 | from picktrue.sites.utils import get_name_with_hash_from_url
 15 | 
 16 | BASE_URL = "https://www.artstation.com/"
 17 | PROJECT_URL_TPL = '/users/{username}/projects.json?page={page}'
 18 | ALBUMS_URL_TPL = 'https://www.artstation.com/albums.json?' \
 19 |                  'include_total_count=true&page={page}' \
 20 |                  '&per_page=25&user_id={user_id}'
 21 | ALBUM_CONTENT_URL_TPL = 'https://www.artstation.com/users/{username}' \
 22 |                         '/projects.json?album_id={album_id}&page={page}'
 23 | DETAIL_URL_TPL = '/projects/{hash_id}.json'
 24 | 
 25 | Album = namedtuple(
 26 |     "Album",
 27 |     (
 28 |         "name",
 29 |         "id",
 30 |     )
 31 | )
 32 | 
 33 | 
 34 | def parse_single_artwork(artwork_dict: dict):
 35 |     """
 36 |     {
 37 |         "liked":false,
 38 |         "tags":[
 39 | 
 40 |         ],
 41 |         "hide_as_adult":false,
 42 |         "visible_on_artstation":true,
 43 |         "assets":[
 44 |             {
 45 |                 "has_image":true,
 46 |                 "has_embedded_player":false,
 47 |                 "player_embedded":null,
 48 |                 "oembed":null,
 49 |                 "id":12260469,
 50 |                 "title_formatted":"",
 51 |                 "image_url":"https://cdnb.artstation.com/p/assets/images/images/012/260/469/large/ham-sung-choul-braveking-180809-1-mini.jpg?1533864344",
 52 |                 "width":1300,
 53 |                 "height":2434,
 54 |                 "position":0,
 55 |                 "asset_type":"image",
 56 |                 "viewport_constraint_type":"constrained"
 57 |             },
 58 |             {
 59 |                 "has_image":false,
 60 |                 "has_embedded_player":false,
 61 |                 "player_embedded":null,
 62 |                 "oembed":null,
 63 |                 "id":12260473,
 64 |                 "title_formatted":"",
 65 |                 "image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/260/473/large/ham-sung-choul-braveking-180809-1-mini-2.jpg?1533864353",
 66 |                 "width":822,
 67 |                 "height":822,
 68 |                 "position":1,
 69 |                 "asset_type":"cover",
 70 |                 "viewport_constraint_type":"constrained"
 71 |             }
 72 |         ],
 73 |         "collections":[
 74 | 
 75 |         ],
 76 |         "user":{
 77 |             "followed":true,
 78 |             "following_back":false,
 79 |             "blocked":false,
 80 |             "is_staff":false,
 81 |             "id":199106,
 82 |             "username":"braveking",
 83 |             "headline":"freelance artist",
 84 |             "full_name":"Ham Sung-Choul(braveking)",
 85 |             "permalink":"https://www.artstation.com/braveking",
 86 |             "medium_avatar_url":"https://cdna.artstation.com/p/users/avatars/000/199/106/medium/ab27ac7f48de117074c14963a3371914.jpg?1461412259",
 87 |             "large_avatar_url":"https://cdna.artstation.com/p/users/avatars/000/199/106/large/ab27ac7f48de117074c14963a3371914.jpg?1461412259",
 88 |             "small_cover_url":"https://cdn.artstation.com/static_media/placeholders/user/cover/default.jpg",
 89 |             "pro_member":false
 90 |         },
 91 |         "medium":null,
 92 |         "categories":[
 93 |             {
 94 |                 "name":"Characters",
 95 |                 "id":1
 96 |             },
 97 |             {
 98 |                 "name":"Fantasy",
 99 |                 "id":2
100 |             },
101 |             {
102 |                 "name":"Concept Art",
103 |                 "id":3
104 |             }
105 |         ],
106 |         "software_items":[
107 | 
108 |         ],
109 |         "id":3513664,
110 |         "user_id":199106,
111 |         "title":"doodle",
112 |         "description":"<p></p>",
113 |         "description_html":"<p></p>",
114 |         "created_at":"2018-08-09T07:50:11.347-05:00",
115 |         "updated_at":"2018-08-10T01:55:50.964-05:00",
116 |         "views_count":3257,
117 |         "likes_count":699,
118 |         "comments_count":1,
119 |         "permalink":"https://www.artstation.com/artwork/mr5aZ",
120 |         "cover_url":"https://cdnb.artstation.com/p/assets/covers/images/012/260/473/medium/ham-sung-choul-braveking-180809-1-mini-2.jpg?1533864353",
121 |         "published_at":"2018-08-09T07:50:19.308-05:00",
122 |         "editor_pick":true,
123 |         "adult_content":false,
124 |         "admin_adult_content":false,
125 |         "slug":"doodle-184-a5ea10f5-e98e-46e2-866e-63ae54fd443a",
126 |         "suppressed":false,
127 |         "hash_id":"mr5aZ",
128 |         "visible":true
129 |     }
130 |     :rtype: list[ImageItem]
131 |     """
132 |     assets = artwork_dict['assets']
133 |     assets = [
134 |         asset for asset in assets
135 |         if asset['has_image']
136 |     ]
137 |     images = (
138 |         ImageItem(
139 |             url=asset['image_url'],
140 |             name=get_name_with_hash_from_url,
141 |         )
142 |         for asset in assets
143 |     )
144 |     return images
145 | 
146 | 
147 | def parse_artwork_url(item_dict):
148 |     """
149 |     {
150 |     "data":
151 |         [
152 |             {
153 |                 "id":3497866,
154 |                 "user_id":199106,
155 |                 "title":"doodle",
156 |                 "description":"",
157 |                 "created_at":"2018-08-06T04:23:20.695-05:00",
158 |                 "updated_at":"2018-08-10T01:39:27.162-05:00",
159 |                 "likes_count":340,
160 |                 "slug":"doodle-184-669828ca-6a1b-4fc7-986d-e4eeaa4b5d55",
161 |                 "published_at":"2018-08-06T04:24:58.518-05:00",
162 |                 "adult_content":false,
163 |                 "cover_asset_id":12192935,
164 |                 "admin_adult_content":false,
165 |                 "hash_id":"KnrbX",
166 |                 "permalink":"https://www.artstation.com/artwork/KnrbX",
167 |                 "hide_as_adult":false,
168 |                 "cover":{
169 |                     "id":12192935,
170 |                     "small_image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/small/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474",
171 |                     "medium_image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/medium/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474",
172 |                     "small_square_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/small_square/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474",
173 |                     "thumb_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/smaller_square/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474",
174 |                     "micro_square_image_url":"https://cdnb.artstation.com/p/assets/covers/images/012/192/935/micro_square/ham-sung-choul-braveking-180806-1-b-mini-3.jpg?1533547474",
175 |                     "aspect":1
176 |                 },
177 |                 "icons":{
178 |                     "image":false,
179 |                     "video":false,
180 |                     "model3d":false,
181 |                     "marmoset":false,
182 |                     "pano":false
183 |                 },
184 |                 "assets_count":1
185 |             },
186 |         ],
187 |         "total_count":38
188 |     }
189 |     """
190 |     url = urljoin(
191 |         BASE_URL,
192 |         DETAIL_URL_TPL.format(
193 |             hash_id=item_dict['hash_id']
194 |         )
195 |     )
196 |     return url
197 | 
198 | 
199 | def get_project_page_url(username, page=1):
200 |     path = PROJECT_URL_TPL.format(
201 |         username=username,
202 |         page=page,
203 |     )
204 |     url = urljoin(BASE_URL, path)
205 |     return url
206 | 
207 | 
208 | def get_project_albums_page_url(user_id, page=1):
209 |     path = ALBUMS_URL_TPL.format(
210 |         user_id=user_id,
211 |         page=page,
212 |     )
213 |     url = urljoin(BASE_URL, path)
214 |     return url
215 | 
216 | 
217 | def get_project_albums_details_page_url(username, album_id, page=1):
218 |     path = ALBUM_CONTENT_URL_TPL.format(
219 |         username=username,
220 |         album_id=album_id,
221 |         page=page,
222 |     )
223 |     url = urljoin(BASE_URL, path)
224 |     return url
225 | 
226 | 
227 | def has_next_page(current_count, total_count):
228 |     return current_count < total_count
229 | 
230 | 
231 | class BaseMetaFetcher:
232 |     def request_url(self, url):
233 |         raise NotImplementedError
234 | 
235 |     def get_artwork_summery(self, summary_url):
236 |         return self.request_url(summary_url)
237 | 
238 |     def get_albums_index_page(self, user_id):
239 |         page = 1
240 |         current_count = 0
241 |         total_count = 1
242 |         while current_count < total_count:
243 |             url = get_project_albums_page_url(
244 |                 user_id=user_id,
245 |                 page=page,
246 |             )
247 |             resp = self.request_url(url)
248 |             assert 'total_count' in resp
249 |             total_count = resp['total_count']
250 |             for album_detail in resp['data']:
251 |                 yield Album(
252 |                     id=album_detail['id'],
253 |                     name=album_detail['title'],
254 |                 )
255 |             current_count = len(resp['data'])
256 |             page += 1
257 | 
258 |     def get_album_projects_single_page(self, username, album_id, page):
259 |         initial_url = get_project_albums_details_page_url(
260 |             username=username,
261 |             album_id=album_id,
262 |             page=page,
263 |         )
264 |         resp = self.request_url(initial_url)
265 |         assert 'total_count' in resp
266 |         total_count = resp['total_count']
267 |         data_count = len(resp['data'])
268 |         return total_count, data_count, resp['data']
269 | 
270 |     def get_projects_single_page(self, username, page):
271 |         initial_url = get_project_page_url(
272 |             username=username,
273 |             page=page,
274 |         )
275 |         resp = self.request_url(initial_url)
276 |         assert 'total_count' in resp
277 |         total_count = resp['total_count']
278 |         data_count = len(resp['data'])
279 |         return total_count, data_count, resp['data']
280 | 
281 | 
282 | class LocalMetaFetcher(BaseMetaFetcher):
283 |     def __init__(self, proxies):
284 |         self._proxies = proxies
285 | 
286 |     def request_url(self, url):
287 |         resp = requests.get(url, headers=UA, proxies=self._proxies)
288 |         return resp.json()
289 | 
290 | 
291 | class BrowserMetaFetcher(BaseMetaFetcher):
292 |     server = server
293 | 
294 |     def __init__(self):
295 |         self.server.start()
296 | 
297 |     def request_url(self, url):
298 |         text = self.server.requester.send_and_wait(url)
299 |         try:
300 |             return json.loads(text)
301 |         except json.JSONDecodeError:
302 |             return text
303 | 
304 | 
305 | class TaskMaker:
306 |     def __init__(self, user_url, username, meta_fetcher: BaseMetaFetcher):
307 |         self.user_url = user_url
308 |         self.username = username
309 |         self.meta = meta_fetcher
310 |         self.user_id = None
311 | 
312 |     @staticmethod
313 |     def _get_repeated_uid(user_ids):
314 |         counter = Counter(user_ids)
315 |         top_uid = counter.most_common(1)
316 |         return top_uid[0][0]
317 | 
318 |     def get_user_id(self, user_url):
319 |         resp = self.meta.request_url(user_url)
320 |         user_ids = re.findall(r"user_id.*?(\d+)", resp)
321 |         return self._get_repeated_uid(user_ids)
322 | 
323 |     def __call__(self, *args, **kwargs):
324 |         self.user_id = self.get_user_id(user_url=self.user_url)
325 |         yield from self._gen_tasks()
326 | 
327 |     def _get_image_item_from_detail(self, artwork_summary):
328 |         summary_url = parse_artwork_url(artwork_summary)
329 |         resp = self.meta.get_artwork_summery(summary_url)
330 |         return parse_single_artwork(resp)
331 | 
332 |     def _yield_image_items(self, data, album_name=None):
333 |         for summary in data:
334 |             for image_item in self._get_image_item_from_detail(
335 |                 summary,
336 |             ):
337 |                 if album_name is not None:
338 |                     image_item = ImageItem(
339 |                         url=image_item.url,
340 |                         name=image_item.name,
341 |                         meta={"album_name": album_name},
342 |                     )
343 |                 yield image_item
344 | 
345 |     def _gen_tasks_from_root(self):
346 |         page = 1
347 |         total_count, current_count, data = self.meta.get_projects_single_page(
348 |             self.username,
349 |             page,
350 |         )
351 |         yield from self._yield_image_items(data)
352 |         while has_next_page(current_count, total_count):
353 |             page += 1
354 |             _, count_delta, data = self.meta.get_projects_single_page(
355 |                 self.username,
356 |                 page=page,
357 |             )
358 |             current_count += count_delta
359 |             yield from self._yield_image_items(data)
360 |             time.sleep(0.2)
361 | 
362 |     def _gen_tasks_from_albums(self):
363 |         for index, album in enumerate(self.meta.get_albums_index_page(user_id=self.user_id)):
364 |             page = 1
365 |             current_count = 0
366 |             total_count = 1
367 |             while has_next_page(current_count, total_count):
368 |                 total_count, count_delta, data = self.meta.get_album_projects_single_page(
369 |                     self.username,
370 |                     album.id,
371 |                     page,
372 |                 )
373 |                 current_count += count_delta
374 |                 yield from self._yield_image_items(data, album_name=album.name)
375 |                 time.sleep(0.2)
376 |                 page += 1
377 | 
378 |     def _gen_tasks(self):
379 |         yield from self._gen_tasks_from_root()
380 |         yield from self._gen_tasks_from_albums()
381 | 
382 | 
383 | class ArtStationFetcher(DummyFetcher):
384 |     """
385 |     New url to test album download: https://www.artstation.com/bvdhorst
386 |     """
387 | 
388 |     def save(self, content, task_item):
389 |         if task_item.image.meta is None:
390 |             return super(ArtStationFetcher, self).save(content, task_item)
391 |         image = task_item.image
392 |         if image.meta is not None:
393 |             escaped_name = self._safe_name(image.meta['album_name'])
394 |             save_path = os.path.join(
395 |                 task_item.base_save_path,
396 |                 escaped_name,
397 |             )
398 |             os.makedirs(save_path, exist_ok=True)
399 |         else:
400 |             save_path = task_item.base_save_path
401 |         save_path = self._safe_path(save_path)
402 |         if callable(image.name):
403 |             image_name = image.name(image.url, content)
404 |         else:
405 |             image_name = image.name
406 |         save_path = os.path.join(
407 |             save_path,
408 |             image_name,
409 |         )
410 |         with open(save_path, "wb") as f:
411 |             f.write(content)
412 | 
413 | 
414 | class ArtStation(DummySite):
415 |     """
416 |     >>> art = ArtStation("https://www.artstation.com/braveking")
417 |     >>> len(list(art.tasks)) > 0
418 |     True
419 |     """
420 | 
421 |     def __init__(self, user_url: str, proxy=None):
422 |         self._tasks = None
423 |         self.url = user_url
424 |         assert user_url.startswith(BASE_URL)
425 |         self.username = user_url.replace(BASE_URL, '')
426 |         self._proxies = get_proxy(proxy)
427 |         self._fetcher = ArtStationFetcher(**self._proxies)
428 |         self._task_maker = TaskMaker(
429 |             user_url=user_url,
430 |             username=self.username,
431 |             meta_fetcher=BrowserMetaFetcher(),
432 |         )
433 | 
434 |     @property
435 |     def fetcher(self):
436 |         return self._fetcher
437 | 
438 |     @property
439 |     def dir_name(self):
440 |         return self.username
441 | 
442 |     @property
443 |     def tasks(self):
444 |         yield from self._task_maker()
445 | 
446 | 
447 | if __name__ == "__main__":
448 |     import doctest
449 |     doctest.testmod()
450 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/douban.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from pyquery import PyQuery as PQ
 3 | 
 4 | from picktrue.meta import ImageItem, UA
 5 | from picktrue.sites.abstract import DummySite, DummyFetcher
 6 | from picktrue.sites.utils import get_filename_fom_url
 7 | 
 8 | ALBUM_URL_TPL = "https://www.douban.com/photos/album/{album_id}/"
 9 | 
10 | 
11 | def _get_large_img_url(small_img_url):
12 |     return small_img_url.replace("/m/", "/l/")
13 | 
14 | 
15 | def _get_album_url(album_id, m_start=None):
16 |     url = ALBUM_URL_TPL.format(album_id=album_id)
17 |     if m_start is not None:
18 |         url = url + "?m_start=%s" % m_start
19 |     return url
20 | 
21 | 
22 | def _parse_page(page_html):
23 |     pq = PQ(page_html)
24 |     images = pq(".photolst_photo img")
25 |     images = [PQ(img).attr("src") for img in images]
26 |     images = [_get_large_img_url(img_url) for img_url in images]
27 |     return images
28 | 
29 | 
30 | def _get_album_id_form_init_url(url):
31 |     return url.split("/")[-2]
32 | 
33 | 
34 | def parse_page(page_html, previous_m_start=None):
35 |     new_m_start = previous_m_start or 0
36 |     images = _parse_page(page_html)
37 |     new_m_start += len(images)
38 |     has_next = len(images) >= 18
39 |     return images, has_next, new_m_start
40 | 
41 | 
42 | def get_images(album_home_url, album_id):
43 |     has_next = True
44 |     session = requests.Session()
45 |     session.headers.update(UA)
46 |     album_url = album_home_url
47 |     m_start = None
48 | 
49 |     while has_next:
50 |         resp = session.get(
51 |             url=album_url,
52 |         )
53 |         if resp.status_code != 200:
54 |             raise ValueError(
55 |                 "Failed to fetch douban meta info, code: %s" % resp.status_code
56 |             )
57 |         images, has_next, m_start = parse_page(
58 |             resp.text,
59 |             m_start,
60 |         )
61 |         album_url = _get_album_url(album_id, m_start)
62 |         for image_url in images:
63 |             yield image_url
64 | 
65 | 
66 | class DoubanPersonalAlbum(DummySite):
67 | 
68 |     fetcher = DummyFetcher()
69 | 
70 |     def __init__(self, album_url):
71 |         self.base_url = album_url
72 |         self.album_id = _get_album_id_form_init_url(album_url)
73 | 
74 |     @property
75 |     def dir_name(self):
76 |         return self.album_id
77 | 
78 |     @property
79 |     def tasks(self):
80 |         for image_url in get_images(self.base_url, self.album_id):
81 |             yield ImageItem(
82 |                 url=image_url,
83 |                 name=get_filename_fom_url(image_url),
84 |             )
85 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/huaban.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from json import JSONDecodeError
  3 | from pprint import pformat, pprint
  4 | 
  5 | import os
  6 | 
  7 | import random
  8 | import string
  9 | from collections import namedtuple
 10 | from urllib.parse import urljoin
 11 | 
 12 | from picktrue.logger import pk_logger
 13 | from picktrue.meta import ImageItem, DownloadTaskItem
 14 | from picktrue.pinry.ds import Pin2Import, write_to_csv
 15 | from picktrue.sites.abstract import DummySite, DummyFetcher
 16 | from picktrue.sites.utils import safe_file_name
 17 | from picktrue.utils import retry
 18 | 
 19 | IMAGE_URL_TPL = "http://img.hb.aicdn.com/{file_key}"
 20 | BASE_URL = "https://huaban.com"
 21 | 
 22 | XHR_HEADERS = {
 23 |     "X-Requested-With": "XMLHttpRequest",
 24 |     "User-Agent":
 25 |         "Mozilla/5.0 (Windows NT 10.0; WOW64) "
 26 |         "AppleWebKit/537.36 (KHTML, like Gecko) "
 27 |         "Chrome/56.0.2924.87 Safari/537.36",
 28 | }
 29 | 
 30 | 
 31 | Pin = namedtuple(
 32 |     'Pin',
 33 |     (
 34 |         'url',
 35 |         'filename',
 36 |     )
 37 | )
 38 | 
 39 | 
 40 | class HuaBanFetcher(DummyFetcher):
 41 | 
 42 |     def __init__(self):
 43 |         super(HuaBanFetcher, self).__init__()
 44 |         self.session.headers.update(
 45 |             XHR_HEADERS,
 46 |         )
 47 | 
 48 |     @classmethod
 49 |     def get_huaban_save_path(cls, task_item):
 50 |         board_name = cls._safe_name(task_item.image.meta['board_name'])
 51 |         save_path = os.path.join(
 52 |             task_item.base_save_path,
 53 |             board_name,
 54 |         )
 55 |         cls.ensure_dir(dir_path=save_path)
 56 |         save_path = os.path.join(
 57 |             save_path,
 58 |             task_item.image.name,
 59 |         )
 60 |         save_path = cls._safe_path(save_path)
 61 |         return save_path
 62 | 
 63 |     @retry()
 64 |     def get(self, url, require_json=False, **kwargs):
 65 |         """
 66 |         :param require_json: If require_json is True and
 67 |         the result is not json-encoded, will raise error
 68 |         then have a retry.
 69 |         :rtype: requests.Response
 70 |         """
 71 |         if 'timeout' in kwargs:
 72 |             kwargs.pop('timeout')
 73 |         resp = self.session.get(url, timeout=(2, 30), **kwargs)
 74 |         if require_json:
 75 |             try:
 76 |                 resp.json()
 77 |             except JSONDecodeError:
 78 |                 pk_logger.error(
 79 |                     "Failed to convert resp to json for url {}: {}".format(
 80 |                         url,
 81 |                         resp.text,
 82 |                     )
 83 |                 )
 84 |                 raise
 85 |         return resp
 86 | 
 87 |     @staticmethod
 88 |     def ensure_dir(dir_path):
 89 |         return os.makedirs(dir_path, exist_ok=True)
 90 | 
 91 |     def save(self, content, task_item: DownloadTaskItem):
 92 |         """
 93 |         :type content: bytearray
 94 |         :type task_item: picktrue.meta.TaskItem
 95 |         """
 96 |         if task_item.image.meta is None:
 97 |             return super(HuaBanFetcher, self).save(content, task_item)
 98 |         save_path = self.get_huaban_save_path(task_item)
 99 |         with open(save_path, "wb") as f:
100 |             f.write(content)
101 |         pin2import = mk_pin2import(task_item)
102 |         if pin2import:
103 |             write_to_csv(pin2import, base_path=task_item.base_save_path)
104 | 
105 | 
106 | def _random_string(length):
107 |     return ''.join(
108 |         random.choice(string.ascii_lowercase + string.digits)
109 |         for _ in range(length)
110 |     )
111 | 
112 | 
113 | def _get_file_ext(mime_type):
114 |     return mime_type.split("/")[-1]
115 | 
116 | 
117 | def get_pins(pins_meta):
118 |     pins = []
119 |     for info in pins_meta:
120 |         ext = _get_file_ext(info['file']['type'])
121 |         file_name = "%s.%s" % (info['pin_id'], ext)
122 |         meta = {
123 |             "pin_id": info['pin_id'],
124 |             "url": IMAGE_URL_TPL.format(file_key=info['file']['key']),
125 |             'type': info['file']['type'],
126 |             'ext': ext,
127 |             "title": info['raw_text'],
128 |             "link": info['link'],
129 |             "source": info['source'],
130 |             "file_name": file_name,
131 |             "tags": info['tags'],
132 |         }
133 |         pins.append(meta)
134 |     return pins
135 | 
136 | 
137 | def get_boards(boards_meta):
138 |     boards = []
139 |     for board in boards_meta:
140 |         meta = {
141 |             "board_id": board['board_id'],
142 |             "title": board['title'],
143 |             "pins": None,
144 |             "pin_count": board['pin_count'],
145 |             "dir_name": safe_file_name(board['title']),
146 |         }
147 |         boards.append(meta)
148 |     return boards
149 | 
150 | 
151 | def mk_pin2import(task_item: DownloadTaskItem) -> Pin2Import or None:
152 |     if task_item.image.meta is None:
153 |         return None
154 |     meta = task_item.image.pin_meta
155 |     return Pin2Import(
156 |         referer=meta['link'],
157 |         tags=meta['tags'],
158 |         description=meta['title'],
159 |         board=task_item.image.meta['board_name'],
160 |         file_abs_path=HuaBanFetcher.get_huaban_save_path(task_item),
161 |         image_url2download="",
162 |     )
163 | 
164 | 
165 | class Board(object):
166 | 
167 |     BOARD_API_BASE = "https://api.huaban.com/boards/{board_id}/pins?limit=20"
168 | 
169 |     def __init__(self, board_url_or_id):
170 |         board_id = str(board_url_or_id)
171 |         self.fetcher = HuaBanFetcher()
172 |         if "http" in board_id:
173 |             board_id = re.findall(r'boards/(\d+)', board_id)[0]
174 |         self.id = board_id
175 |         self.base_url = self.BOARD_API_BASE.format(board_id=board_id)
176 |         self.further_pin_url_tpl = self.base_url + "&max={pin_id}"
177 | 
178 |         # uninitialized properties
179 |         self.pin_count = None
180 |         self.title = None
181 |         self.description = None
182 |         self._pins = []
183 |         self._init_board()
184 | 
185 |     def _fetch_home(self):
186 |         resp = self.fetcher.get(
187 |             self.base_url,
188 |             require_json=True,
189 |         )
190 |         resp = resp.json()
191 |         board = resp['board']
192 |         self.pin_count = board['pin_count']
193 |         self.title = board['title']
194 |         self.description = board['description']
195 |         return get_pins(resp['pins'])
196 | 
197 |     _init_board = _fetch_home
198 | 
199 |     def _fetch_further(self, prev_pins):
200 |         if len(prev_pins) == 0:
201 |             info = (
202 |                 "prev_pins should not be [], "
203 |                 "board: %s, "
204 |                 "url: %s, "
205 |                 "pin_count: %s, "
206 |                 "current_pins: %s, "
207 |             )
208 |             pk_logger.error(
209 |                 info% (
210 |                     self.title,
211 |                     self.base_url,
212 |                     self.pin_count,
213 |                     pformat(self._pins),
214 |                 )
215 |             )
216 |             return []
217 |         max_id = prev_pins[-1]['pin_id']
218 |         further_url = self.further_pin_url_tpl.format(
219 |             pin_id=max_id,
220 |         )
221 | 
222 |         resp = self.fetcher.get(
223 |             further_url,
224 |             require_json=True,
225 |         )
226 |         content = resp.json()
227 |         return get_pins(content['pins'])
228 | 
229 |     def _fetch_pins(self):
230 |         assert len(self._pins) == 0
231 |         self._pins.extend(self._fetch_home())
232 |         for pin in self._pins:
233 |             yield pin
234 |         while self.pin_count > len(self._pins):
235 |             further_pins = self._fetch_further(self._pins)
236 |             if len(further_pins) <= 0:
237 |                 break
238 |             self._pins.extend(further_pins)
239 |             for pin in further_pins:
240 |                 yield pin
241 | 
242 |     @property
243 |     def pins(self):
244 |         yield from self._fetch_pins()
245 | 
246 |     def as_dict(self):
247 |         return {
248 |             "pins": self._pins,
249 |             "title": self.title,
250 |             "description": self.description,
251 |             "pin_count": self.pin_count,
252 |         }
253 | 
254 | 
255 | def mk_pin(pin_meta):
256 |     url = pin_meta["url"]
257 |     filename = u"{title}.{ext}".format(
258 |         title=pin_meta['pin_id'],
259 |         ext=pin_meta['ext'],
260 |     )
261 |     return Pin(
262 |         url=url,
263 |         filename=filename,
264 |     )
265 | 
266 | 
267 | class User(object):
268 |     BOARDS_URL_TPL = "https://api.huaban.com/{user_id}/boards?limit=30&urlname={user_id}"
269 | 
270 |     def __init__(self, user_url: str):
271 |         self.fetcher = HuaBanFetcher()
272 |         user_uid = user_url.split("/")[-1]
273 |         self.base_url = self.BOARDS_URL_TPL.format(
274 |             user_id=user_uid
275 |         )
276 |         self.further_url_tpl = self.base_url + "&max={board_id}"
277 | 
278 |         self.username = None
279 |         self.board_count = None
280 |         self.pin_count = None
281 |         self._boards_metas = []
282 |         self._init_profile()
283 | 
284 |     def _fetch_home(self):
285 |         resp = self.fetcher.get(self.base_url, require_json=True)
286 |         meta = resp.json()
287 |         user_meta = meta['user']
288 |         self.username = user_meta['username']
289 |         self.board_count = user_meta['board_count']
290 |         self.pin_count = user_meta['pin_count']
291 |         return get_boards(meta['boards'])
292 | 
293 |     _init_profile = _fetch_home
294 | 
295 |     def _fetch_further(self, prev_boards):
296 |         max_id = prev_boards[-1]['board_id']
297 |         further_url = self.further_url_tpl.format(
298 |             board_id=max_id,
299 |         )
300 |         resp = self.fetcher.get(
301 |             further_url,
302 |             require_json=True,
303 |         )
304 |         content = resp.json()
305 |         return get_boards(content['boards'])
306 | 
307 |     def _fetch_boards(self):
308 |         assert len(self._boards_metas) == 0
309 |         self._boards_metas.extend(self._fetch_home())
310 |         further_boards = self._boards_metas
311 |         while True:
312 |             for meta in further_boards:
313 |                 yield Board(meta['board_id'])
314 |             if self.board_count > len(self._boards_metas):
315 |                 further_boards = self._fetch_further(self._boards_metas)
316 |                 self._boards_metas.extend(further_boards)
317 |             else:
318 |                 break
319 | 
320 |     @property
321 |     def boards(self):
322 |         """
323 |         :rtype: iter[Board]
324 |         """
325 |         yield from self._fetch_boards()
326 | 
327 |     def as_dict(self):
328 |         return {
329 |             "username": self.username,
330 |             "board_count": self.board_count,
331 |             "boards": self.boards,
332 |         }
333 | 
334 | 
335 | class HuaBan(DummySite):
336 | 
337 |     fetcher = HuaBanFetcher()
338 | 
339 |     def __init__(self, user_url):
340 |         self.meta = None
341 |         self.base_url = user_url
342 |         self.user = User(user_url)
343 |         self._boards = []
344 | 
345 |     @property
346 |     def dir_name(self):
347 |         return self.user.username
348 | 
349 |     @property
350 |     def tasks(self):
351 |         for board, pin_meta in self._boards_pins:
352 |             pin_item = mk_pin(
353 |                 pin_meta
354 |             )
355 |             yield ImageItem(
356 |                 url=pin_item.url,
357 |                 name=pin_item.filename,
358 |                 meta={
359 |                     'board_name': board.title,
360 |                 },
361 |                 pin_meta=pin_meta,
362 |             )
363 | 
364 |     @property
365 |     def _boards_pins(self):
366 |         for board in self.user.boards:
367 |             self._boards.append(board)
368 |             for pin in board.pins:
369 |                 yield board, pin
370 | 
371 |     def as_dict(self):
372 |         meta = self.user.as_dict()
373 |         meta['boards'] = [
374 |             board.as_dict() for board in self._boards
375 |         ]
376 |         return meta
377 | 
378 | 
379 | class HuaBanBoard(DummySite):
380 | 
381 |     fetcher = HuaBanFetcher()
382 | 
383 |     def __init__(self, board_url):
384 |         self.base_url = board_url
385 |         self._board = Board(self.base_url)
386 | 
387 |     @property
388 |     def dir_name(self):
389 |         return safe_file_name(
390 |             "%s-%s" % (self._board.title, self._board.id)
391 |         )
392 | 
393 |     @property
394 |     def tasks(self):
395 |         for pin_meta in self._board.pins:
396 |             pin_item = mk_pin(
397 |                 pin_meta
398 |             )
399 |             yield ImageItem(
400 |                 url=pin_item.url,
401 |                 name=pin_item.filename,
402 |                 pin_meta=pin_meta,
403 |             )
404 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/metmuseum.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from urllib.parse import urlparse, parse_qs
  4 | 
  5 | from pyquery import PyQuery
  6 | 
  7 | from picktrue.engine import Downloader
  8 | from picktrue.meta import ImageItem
  9 | from picktrue.rpc.taskserver import BrowserMetaFetcher
 10 | from picktrue.sites.abstract import DummySite, DummyFetcher
 11 | from picktrue.logger import pk_logger
 12 | from picktrue.sites.utils import safe_file_name, get_filename_fom_url
 13 | 
 14 | IMAGE_URL_TPL = "http://img.hb.aicdn.com/{file_key}"
 15 | BASE_URL = "http://huaban.com"
 16 | 
 17 | XHR_HEADERS = {
 18 |     "X-Requested-With": "XMLHttpRequest",
 19 |     "User-Agent":
 20 |         "Mozilla/5.0 (Windows NT 10.0; WOW64) "
 21 |         "AppleWebKit/537.36 (KHTML, like Gecko) "
 22 |         "Chrome/56.0.2924.87 Safari/537.36",
 23 | }
 24 | 
 25 | 
 26 | def _get_params(query_parts):
 27 |     """
 28 |     :param query_parts: just like "material=Archery"
 29 |         or "material=Archery&offset=0&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0"
 30 |     """
 31 |     path = urlparse('http://test.com/?' + query_parts)
 32 |     return parse_qs(path.query)
 33 | 
 34 | 
 35 | class SearchPage:
 36 |     def __init__(self, page_url, meta_fetcher: BrowserMetaFetcher):
 37 |         """
 38 |         html page link:
 39 |         https://www.metmuseum.org/art/collection/search#!/search?material=Archery
 40 |         https://www.metmuseum.org/art/collection/search#!?material=Archery&offset=0&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0
 41 |         https://www.metmuseum.org/art/collection/search#!?material=Archery&offset=20&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0
 42 |         json link:
 43 |         https://www.metmuseum.org/mothra/collectionlisting/search?material=Archery&offset=0&pageSize=0&perPage=20&searchField=All&showOnly=&sortBy=Relevance
 44 |         """
 45 |         query_parts = "?".join(page_url.split("?")[-1:])
 46 |         self._params = _get_params(query_parts)
 47 |         if "material" not in self._params:
 48 |             raise ValueError("Failed to parse url: %s" % page_url)
 49 |         self._fetcher = meta_fetcher
 50 | 
 51 |     @property
 52 |     def dir_name(self):
 53 |         return self.safe_search_keyword
 54 | 
 55 |     @property
 56 |     def safe_search_keyword(self):
 57 |         return safe_file_name(self._params["material"][0])
 58 | 
 59 |     def get_search_request(self, offset, page_size, per_page):
 60 |         tpl = (
 61 |             "https://www.metmuseum.org/mothra/collectionlisting/search"
 62 |             "?material={keyword}"
 63 |             "&offset={offset}"
 64 |             "&pageSize={page_size}"
 65 |             "&perPage={per_page}"
 66 |             "&searchField=All"
 67 |             "&showOnly="
 68 |             "&sortBy=Relevance"
 69 |         )
 70 |         return tpl.format(
 71 |             keyword=self.safe_search_keyword,
 72 |             offset=offset,
 73 |             page_size=page_size,
 74 |             per_page=per_page,
 75 |         )
 76 | 
 77 |     def get_image_items(self, ):
 78 |         """
 79 |         {
 80 |           "results": [
 81 |             {
 82 |               "title": "Archer&#39;s Ring",
 83 |               "description": " ",
 84 |               "artist": "",
 85 |               "culture": "Turkish",
 86 |               "teaserText": "<p>Date: 16th–17th century<br/>Accession Number: 36.25.2814</p>",
 87 |               "url": "https://www.metmuseum.org/art/collection/search/30142?searchField=All&amp;sortBy=Relevance&amp;what=Archery&amp;ft=*&amp;offset=0&amp;rpp=20&amp;pos=1",
 88 |               "image": "https://images.metmuseum.org/CRDImages/aa/mobile-large/LC-36_25_2814-001.jpg",
 89 |               "regularImage": "aa/web-additional/LC-36_25_2814-001.jpg",
 90 |               "largeImage": "aa/web-large/LC-36_25_2814-001.jpg",
 91 |               "date": "16th–17th century",
 92 |               "medium": "Bronze",
 93 |               "accessionNumber": "36.25.2814",
 94 |               "galleryInformation": "Not on view"
 95 |             },
 96 |         }
 97 |         """
 98 |         offset = int(self._params.get('offset', [0])[0])
 99 |         page_size = int(self._params.get('pageSize', [0])[0])
100 |         per_page = int(self._params.get('perPage', [20])[0])
101 |         while True:
102 |             r = self._fetcher.request_url(
103 |                 url=self.get_search_request(
104 |                     offset=offset,
105 |                     page_size=page_size,
106 |                     per_page=per_page,
107 |                 ),
108 |             )
109 |             for image_meta in r['results']:
110 |                 page_url = image_meta['url']
111 |                 items = ItemPage(
112 |                     page_url=page_url,
113 |                     meta_fetcher=self._fetcher,
114 |                     search_keyword=self.safe_search_keyword,
115 |                 ).get_image_items()
116 |                 if items:
117 |                     for item in items:
118 |                         yield item
119 |             req = r['request']
120 |             offset = req['offset'] + per_page
121 |             print(offset, r['totalResults'])
122 |             if offset > r['totalResults']:
123 |                 break
124 | 
125 | 
126 | class ItemPage:
127 |     def __init__(self, page_url, meta_fetcher: BrowserMetaFetcher, search_keyword=None):
128 |         """
129 |         https://www.metmuseum.org/art/collection/search/35684?
130 |         searchField=All&sortBy=Relevance&what=Archery&ft=*&offset=0&rpp=20&pos=13
131 |         image:
132 |         https://collectionapi.metmuseum.org/api/collection/v1/iiif/23603/1642473/main-image
133 |         """
134 |         path = urlparse(page_url)
135 |         self._item_id = path.path.split("/")[-1]
136 |         self._fetcher = meta_fetcher
137 |         self._page_url = page_url
138 |         self._search_keyword = search_keyword
139 | 
140 |     @property
141 |     def dir_name(self):
142 |         if self._search_keyword is not None:
143 |             return safe_file_name(
144 |                 self._search_keyword
145 |             )
146 |         else:
147 |             return ""
148 | 
149 |     def _mk_item(self, image_url, title, has_many=False):
150 |         if image_url.endswith("restricted") or image_url.endswith("main-image"):
151 |             name = "_".join(image_url.split("/")[-3:]) + ".jpg"
152 |         else:
153 |             name = get_filename_fom_url(image_url)
154 |         meta = dict(title=title, has_many=has_many)
155 |         meta['search_keyword'] = self._search_keyword
156 |         return ImageItem(
157 |             image_url,
158 |             name=name,
159 |             meta=meta,
160 |         )
161 | 
162 |     def get_image_items(self):
163 |         resp = self._fetcher.request_url(
164 |             self._page_url,
165 |         )
166 |         query = PyQuery(resp)
167 |         title = query("#artwork__title").text()
168 |         extra_images = query("img.gtm__carousel__thumbnail")
169 |         main_image = query(".artwork__interaction--download a")
170 | 
171 |         has_original_image = True
172 |         if "Due to rights restrictions" in resp:
173 |             has_original_image = False
174 | 
175 |         def getter(target):
176 |             if has_original_image:
177 |                 return PyQuery(target).attr("data-superjumboimage")
178 |             else:
179 |                 return PyQuery(target).attr("data-largeimage")
180 | 
181 |         if len(extra_images) > 0:
182 |             return [
183 |                 self._mk_item(
184 |                     getter(img),
185 |                     title=title,
186 |                     has_many=True,
187 |                 )
188 |                 for img in extra_images
189 |             ]
190 |         else:
191 |             image_url = main_image.attr("href")
192 |             if not image_url:
193 |                 pk_logger.warning("No image available for: %s" % title)
194 |                 return []
195 |             return [
196 |                 self._mk_item(
197 |                     image_url=image_url,
198 |                     title=title,
199 |                     has_many=False,
200 |                 )
201 |             ]
202 | 
203 | 
204 | class Fetcher(DummyFetcher):
205 | 
206 |     def get_save_path(self, base_path, image_name, image: ImageItem):
207 |         project_dir = base_path
208 |         if image.meta['search_keyword']:
209 |             project_dir = os.path.join(image.meta['search_keyword'])
210 |         if image.meta['has_many']:
211 |             project_dir = os.path.join(
212 |                 project_dir,
213 |                 safe_file_name(image.meta['title']),
214 |             )
215 |         else:
216 |             splited = image_name.split(".")
217 |             name, ext = ".".join(splited[:-1]), splited[-1]
218 |             image_name = safe_file_name(image.meta['title'] + name + "." + ext)
219 |         if not os.path.exists(project_dir):
220 |             os.makedirs(project_dir, exist_ok=True)
221 |         return os.path.join(project_dir, image_name)
222 | 
223 | 
224 | class MetMuseum(DummySite):
225 | 
226 |     fetcher = Fetcher()
227 | 
228 |     def __init__(self, url):
229 |         self._base_url = url
230 |         if "search/" not in url:
231 |             self._iter = SearchPage(
232 |                 page_url=self._base_url,
233 |                 meta_fetcher=BrowserMetaFetcher(),
234 |             )
235 |         else:
236 |             self._iter = ItemPage(
237 |                 page_url=self._base_url,
238 |                 meta_fetcher=BrowserMetaFetcher(),
239 |             )
240 | 
241 |     @property
242 |     def dir_name(self):
243 |         return self._iter.dir_name
244 | 
245 |     @property
246 |     def tasks(self):
247 |         print("Task generator begin: ", self._base_url)
248 |         for item in self._iter.get_image_items():
249 |             yield item
250 | 
251 | 
252 | def main():
253 |     import sys
254 |     import time
255 |     if len(sys.argv) <= 1:
256 |         print("Error, please add argument like: picktrue-metmuseum.exe <url_or_path>")
257 |     url = sys.argv[1]
258 |     if os.path.exists(url):
259 |         urls = [
260 |             line
261 |             for line in open(url).readlines()
262 |             if line
263 |         ]
264 |         save_dir = os.path.abspath(url)
265 |         save_dir = os.path.dirname(save_dir)
266 |     else:
267 |         save_dir = "."
268 |         urls = [url, ]
269 |     # "https://www.metmuseum.org/art/collection/search#!?material=Archery&offset=0&perPage=20&sortBy=Relevance&sortOrder=asc&searchField=All&pageSize=0"
270 |     # "https://www.metmuseum.org/art/collection/search/35684?searchField=All&sortBy=Relevance&what=Archery&ft=*&offset=0&rpp=20&pos=13"
271 | 
272 |     sites = [MetMuseum(target) for target in urls]
273 | 
274 |     def task_iter():
275 |         for site in sites:
276 |             for task in site.tasks:
277 |                 yield task
278 |     downloader = Downloader(save_dir=save_dir, fetcher=sites[0].fetcher)
279 |     downloader.add_task(
280 |         task_iter=task_iter(),
281 |         background=True,
282 |     )
283 |     downloader.join(background=True)
284 | 
285 |     while not downloader.done:
286 |         time.sleep(5)
287 |         print(downloader.describe())
288 | 
289 | 
290 | if __name__ == '__main__':
291 |     main()
292 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/pixiv.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | from picktrue.meta import ImageItem
  5 | from picktrue.sites.abstract import DummySite, DummyFetcher, get_proxy
  6 | 
  7 | from pixivpy3 import (
  8 |     AppPixivAPI
  9 | )
 10 | 
 11 | 
 12 | def guess_extension(image_url):
 13 |     return image_url.split('.')[-1]
 14 | 
 15 | 
 16 | def normalize_filename(filename):
 17 |     filename = filename.replace("../", "_")
 18 |     filename = filename.replace("..\\", "_")
 19 |     filename = filename.replace("\\", "_")
 20 |     return filename
 21 | 
 22 | 
 23 | def parse_image_urls(illustration):
 24 |     if 'original_image_url' in illustration['meta_single_page']:
 25 |         url = illustration['meta_single_page']['original_image_url']
 26 |         if illustration['type'] == 'ugoira':
 27 |             url = url.replace("img-original", 'img-zip-ugoira')
 28 |             url = re.findall('(.*)_ugoira0\..*', url)[0]
 29 |             url = "%s%s" % (url, '_ugoira1920x1080.zip')
 30 |         file_name = '%s.%s' % (
 31 |             illustration['id'],
 32 |             guess_extension(url)
 33 |         )
 34 |         yield ImageItem(
 35 |             name=file_name,
 36 |             url=url,
 37 |         )
 38 |     else:
 39 |         dir_name = normalize_filename(illustration['title'])
 40 |         images = illustration['meta_pages']
 41 |         for index, image in enumerate(images):
 42 |             url = image['image_urls']['original']
 43 |             name = "%s.%s" % (index, guess_extension(url))
 44 |             yield ImageItem(
 45 |                 name=name,
 46 |                 url=url,
 47 |                 meta={
 48 |                     'is_comic': True,
 49 |                     'dir_name': dir_name,
 50 |                 }
 51 |             )
 52 | 
 53 | 
 54 | class PixivFetcher(DummyFetcher):
 55 | 
 56 |     def __init__(self, **kwargs):
 57 |         super(PixivFetcher, self).__init__(**kwargs)
 58 |         self.session.headers.update(
 59 |             {'Referer': 'http://www.pixiv.net/'}
 60 |         )
 61 | 
 62 |     def save(self, content, task_item):
 63 |         if task_item.image.meta is None:
 64 |             return super(PixivFetcher, self).save(content, task_item)
 65 |         image = task_item.image
 66 |         save_path = os.path.join(
 67 |             task_item.base_save_path,
 68 |             image.meta['dir_name'],
 69 |         )
 70 |         os.makedirs(save_path, exist_ok=True)
 71 |         save_path = self._safe_path(save_path)
 72 |         save_path = os.path.join(
 73 |             save_path,
 74 |             image.name,
 75 |         )
 76 |         with open(save_path, "wb") as f:
 77 |             f.write(content)
 78 | 
 79 | 
 80 | class Pixiv(DummySite):
 81 | 
 82 |     def __init__(self, url, username, password, proxy=None):
 83 |         proxies = get_proxy(proxy)
 84 |         requests_kwargs = {
 85 |             "timeout": (3, 10),
 86 |         }
 87 |         requests_kwargs.update(proxies)
 88 |         self.api = AppPixivAPI(
 89 |             **requests_kwargs
 90 |         )
 91 |         self._fetcher = PixivFetcher(**proxies)
 92 |         self.api.login(username, password)
 93 |         self._user_id = int(url.split("/")[-1])
 94 |         self._dir_name = None
 95 |         self._total_illustrations = 0
 96 |         self._fetch_user_detail()
 97 | 
 98 |     @property
 99 |     def fetcher(self):
100 |         return self._fetcher
101 | 
102 |     @property
103 |     def dir_name(self):
104 |         assert self._dir_name is not None
105 |         return self._dir_name
106 | 
107 |     def _fetch_user_detail(self):
108 |         assert self._user_id is not None
109 |         profile = self.api.user_detail(self._user_id)
110 |         user = profile['user']
111 |         self._dir_name = "-".join(
112 |             [
113 |                 user['name'],
114 |                 user['account'],
115 |                 str(user['id']),
116 |             ]
117 |         )
118 |         self._dir_name = normalize_filename(self._dir_name)
119 |         self._total_illustrations = profile['profile']['total_illusts']
120 |         return self.dir_name
121 | 
122 |     def _fetch_image_list(self, ):
123 |         ret = self.api.user_illusts(self._user_id)
124 |         while True:
125 |             for illustration in ret.illusts:
126 |                 yield from parse_image_urls(illustration)
127 |             if ret.next_url is None:
128 |                 break
129 |             ret = self.api.user_illusts(
130 |                 **self.api.parse_qs(ret.next_url)
131 |             )
132 | 
133 |     def _fetch_single_image_url(self, illustration_id):
134 |         json_result = self.api.illust_detail(illustration_id)
135 |         illustration_info = json_result.illust
136 |         return illustration_info.image_urls['large']
137 | 
138 |     @property
139 |     def tasks(self):
140 |         yield from self._fetch_image_list()
141 | 


--------------------------------------------------------------------------------
/src/picktrue/sites/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | 
 3 | 
 4 | def _get_file_hash(file_content):
 5 |     m = hashlib.md5()
 6 |     m.update(file_content)
 7 |     return m.digest().hex()
 8 | 
 9 | 
10 | def _get_name_ext_from_url(img_url):
11 |     file_name = img_url.split(
12 |         '/'
13 |     )[-1]
14 |     if "?" in file_name:
15 |         file_name = file_name.split('?')[:-1]
16 |         file_name = '?'.join(file_name)
17 |     name = file_name.split('.')[:-1]
18 |     name = ".".join(name)
19 |     ext = file_name.split('.')[-1]
20 |     return name, ext
21 | 
22 | 
23 | def get_filename_fom_url(img_url):
24 |     name, ext = _get_name_ext_from_url(img_url)
25 |     return ".".join((name, ext))
26 | 
27 | 
28 | def get_name_with_hash_from_url(img_url: str, file_content):
29 |     name, ext = _get_name_ext_from_url(img_url)
30 |     name_postfix = _get_file_hash(file_content)
31 |     file_name = "-".join([name, name_postfix])
32 |     file_name = ".".join([file_name, ext])
33 |     return file_name
34 | 
35 | 
36 | def safe_file_name(file_name):
37 |     file_name = file_name.replace("/", "_")
38 |     file_name = file_name.replace("?", "__")
39 |     file_name = file_name.replace(":", "___")
40 |     return file_name


--------------------------------------------------------------------------------
/src/picktrue/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | from functools import wraps
 5 | from picktrue.logger import pk_logger
 6 | 
 7 | from threading import Thread
 8 | 
 9 | 
10 | def run_as_thread(func, *args, name=None, **kwargs):
11 |     if name is None:
12 |         name = func.__name__
13 |     t = Thread(target=func, args=args, kwargs=kwargs, name=name)
14 |     t.setDaemon(True)
15 |     t.start()
16 |     return t
17 | 
18 | 
19 | def retry(max_retries=3):
20 | 
21 |     def wrapper(func):
22 |         @wraps(func)
23 |         def wrapped(*args, **kwargs):
24 |             retries = 0
25 |             while retries <= max_retries:
26 |                 retries += 1
27 |                 try:
28 |                     return func(*args, **kwargs)
29 |                 except Exception:
30 |                     if retries > max_retries:
31 |                         pk_logger.exception("Error occurs while execute function\n")
32 |                         break
33 |                     time.sleep(1)
34 |             return None
35 |         return wrapped
36 | 
37 |     return wrapper
38 | 
39 | 
40 | def convert2kb(size_in_bytes):
41 |     """ Convert the size from bytes to other units like KB, MB or GB"""
42 |     return size_in_bytes / 1024
43 | 
44 | 
45 | def get_file_size_kb(file_name):
46 |     """ Get file in size in given unit like KB, MB or GB"""
47 |     size = os.path.getsize(file_name)
48 |     return convert2kb(size)
49 | 


--------------------------------------------------------------------------------
/src/picktrue/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.5.7"
2 | 


--------------------------------------------------------------------------------
/src/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import setup, find_packages, convert_path
 4 | 
 5 | HERE = os.path.abspath(os.path.dirname(__file__))
 6 | 
 7 | 
 8 | def get_version():
 9 |     ver_path = convert_path('picktrue/version.py')
10 |     main_ns = {}
11 |     with open(ver_path) as ver_file:
12 |         content = ver_file.read()
13 |         exec(content, main_ns)
14 |     return main_ns['__version__']
15 | 
16 | 
17 | install_requires = (
18 |     "requests",
19 |     'click',
20 |     'pixivpy',
21 |     'PySocks',
22 |     'flask',
23 |     'pyquery',
24 | )
25 | 
26 | setup(
27 |     name='picktrue',
28 |     version=get_version(),
29 |     packages=find_packages(HERE),
30 |     install_requires=install_requires,
31 |     url='https://github.com/winkidney/picktrue',
32 |     license='MIT',
33 |     author='winkidney',
34 |     author_email='winkidney@gmail.com',
35 |     description='tools to download pictures you want',
36 |     entry_points = {
37 |         'console_scripts': [
38 |             'picktrue-cli=picktrue.__main__:main',
39 |             'picktrue-gui=picktrue.gui.__main__:main',
40 |         ]
41 |     },
42 | )
43 | 


--------------------------------------------------------------------------------
/src/tests/test_sites/test_utils.py:
--------------------------------------------------------------------------------
 1 | from picktrue.sites import utils
 2 | 
 3 | 
 4 | def test_get_name_ext_from_url():
 5 |     assert utils.get_filename_fom_url(
 6 |         "https://img9.doubanio.com/view/photo/l/public/p2208623414.jpg"
 7 |     ) == "p2208623414.jpg"
 8 | 
 9 |     assert utils.get_filename_fom_url(
10 |         "https://img9.doubanio.com/view/photo/l/public/p2208623414.jpg?hello=world"
11 |     ) == "p2208623414.jpg"
12 | 


--------------------------------------------------------------------------------