├── .gitignore
├── .idea
    ├── Wenku8ToEpub-Online.iml
    ├── misc.xml
    ├── modules.xml
    ├── vcs.xml
    └── workspace.xml
├── LICENSE
├── NovelList.txt
├── Procfile
├── Procfile.windows
├── README.md
├── __pycache__
    ├── base_logger.cpython-36.pyc
    ├── database.cpython-36.pyc
    ├── error_report.cpython-36.pyc
    ├── manager.cpython-36.pyc
    └── wenku8toepub.cpython-36.pyc
├── app.json
├── async_test.py
├── base_logger.py
├── database.py
├── dmzj2epub.py
├── dmzj_novel_data.json
├── dmzj_novel_data_full.json
├── ebooklib
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   ├── epub.cpython-36.pyc
    │   └── utils.cpython-36.pyc
    ├── epub.py
    ├── plugins
    │   ├── __init__.py
    │   ├── base.py
    │   ├── booktype.py
    │   ├── sourcecode.py
    │   ├── standard.py
    │   └── tidyhtml.py
    └── utils.py
├── error_report.py
├── errors.txt
├── headers.txt
├── images
    ├── 1.jpg
    ├── 2.jpg
    └── 3.png
├── make.bat
├── manage.py
├── manager.py
├── opds
    ├── .gitignore
    ├── Config.py
    ├── Const.py
    ├── Procfile
    ├── Procfile.windows
    ├── README.MD
    ├── app.json
    ├── config.yaml
    ├── filesystem.py
    ├── generate.py
    ├── index.wsgi
    ├── metadata.json
    ├── opdscore.py
    ├── opdsserver.py
    ├── requirements.txt
    ├── runtime.txt
    ├── static
    │   ├── book.png
    │   ├── bookdetail.xsl
    │   ├── booklist.xsl
    │   ├── bootstrap-responsive.min.css
    │   ├── bootstrap.min.css
    │   ├── jquery.min.js
    │   ├── logo.png
    │   └── test.html
    ├── test_mine.py
    └── utils.py
├── progress.txt
├── refresh.py
├── requirements.txt
├── restart.sh
├── runtime.txt
├── server.py
├── static
    ├── board.json
    ├── extra.js
    ├── favicon.ico
    ├── theme.js
    └── wenku8.js
├── templates
    ├── forms.html
    └── index.html
├── wenku8toepub.bkp.py
├── wenku8toepub.py
├── wk8local.py
└── xiaoice.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # test module 个人习惯，测试目录去掉
 10 | # /test/
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | .idea/
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # celery beat schedule file
 90 | celerybeat-schedule
 91 | 
 92 | # SageMath parsed files
 93 | *.sage.py
 94 | 
 95 | # Environments
 96 | .env
 97 | .venv
 98 | env/
 99 | venv/
100 | ENV/
101 | env.bak/
102 | venv.bak/
103 | 
104 | # Spyder project settings
105 | .spyderproject
106 | .spyproject
107 | 
108 | # Rope project settings
109 | .ropeproject
110 | 
111 | # mkdocs documentation
112 | /site
113 | 
114 | # mypy
115 | .mypy_cache/
116 | .dmypy.json
117 | dmypy.json
118 | 
119 | # Pyre type checker
120 | .pyre/
121 | 
122 | # downloaded
123 | static/*.epub


--------------------------------------------------------------------------------
/.idea/Wenku8ToEpub-Online.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
4 |   <component name="PyCharmProfessionalAdvertiser">
5 |     <option name="shown" value="true" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Wenku8ToEpub-Online.iml" filepath="$PROJECT_DIR$/.idea/Wenku8ToEpub-Online.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 LanceLiang2018
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: python server.py
2 | 


--------------------------------------------------------------------------------
/Procfile.windows:
--------------------------------------------------------------------------------
1 | web: python server.py
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### 使用的模块的说明
 2 | 
 3 | ```
 4 | 把www.wenku8.net的轻小说在线转换成epub格式。wenku8.net没有版权的小说则下载TXT文件然后转换为epub文件。
 5 | 
 6 | wk2epub [-h] [-t] [-m] [-b] [list]
 7 | 
 8 |     list            一个数字列表，中间用空格隔开
 9 | 
10 |     -t              只获取文字，忽略图片。
11 |                     但是图像远程连接仍然保留在文中。
12 |                     此开关默认关闭，即默认获取图片。
13 | 
14 |     -m              多线程模式。
15 |                     该开关已默认打开。
16 | 
17 |     -i              显示该书信息。
18 | 
19 |     -b              把生成的epub文件直接从stdio返回。
20 |                     此时list长度应为1。
21 |                     调试用。
22 | 
23 |     -h              显示本帮助。
24 | 
25 | 调用示例:
26 |     wk2epub -t 1 1213
27 | 
28 | 关于:
29 |     https://github.com/LanceLiang2018/Wenku8ToEpub
30 | 
31 | 版本:
32 |     2020/3/8 1:45 AM
33 | ```
34 | 
35 | ### 文件下载方式
36 | 
37 | #### 方式1
38 | 
39 | [书名形式](https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/小说标题.epub)
40 | 
41 | https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/{{小说标题}}.epub
42 | 
43 | 小说标题以显示在wenku8网站上的为准，例如
44 | 
45 |     TIGER×DRAGON！(龙与虎)
46 | 
47 | 示例:
48 | 
49 | [文学少女](https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/文学少女.epub)
50 | 
51 | #### 方式2
52 | 
53 | ~~[ID形式](https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/小说ID.html)~~(废弃。)
54 | 
55 |     注意等待静态HTML跳转
56 | 
57 | ## 更新：服务器版
58 | 
59 | - 从缓存中获取。存在此书则直接重定向到下载链接。
60 | 
61 |     https://wenku8.herokuapp.com/get/书本id
62 | 
63 | - 更新CDN缓存。更新完成后就会重定向到下载链接，请耐心等候。小书5s，大书30s以上。(Heroku在30s没有响应时会报错。)
64 | 
65 |     https://wenku8.herokuapp.com/cache/书本id
66 |  
67 | - 直接获取下载。获取最新的章节，但是不更新CDN。这个链接可以获取图片。好吧这个速度够慢的...
68 | 
69 |     https://wenku8.herokuapp.com/no_cache/书本id
70 | 
71 | ## 更新：美化UI和功能
72 | 
73 | [主站](http://wenku8.herokuapp.com)：http://wenku8.herokuapp.com
74 | 
75 | 免费托管于heroku。可以https，但是会造成iframe引用源站的图片没法显示。
76 | 
77 | 在这里提出issues或者在网站内反馈。
78 | 
79 | **TODO**:
80 | 
81 | - [x] MDUI配置
82 | - [x] 书籍信息显示
83 | - [x] 下载过程实时反馈
84 | - [x] 防止内存泄露
85 | 
86 | **效果展示**
87 | 
88 | ![图片1](https://github.com/LanceLiang2018/Wenku8ToEpub-Online/raw/master/images/1.jpg)
89 | 
90 | ![图片2](https://github.com/LanceLiang2018/Wenku8ToEpub-Online/raw/master/images/2.jpg)
91 | 
92 | ![图片3](https://github.com/LanceLiang2018/Wenku8ToEpub-Online/raw/master/images/3.png)


--------------------------------------------------------------------------------
/__pycache__/base_logger.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/__pycache__/base_logger.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/database.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/__pycache__/database.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/error_report.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/__pycache__/error_report.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/manager.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/__pycache__/manager.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/wenku8toepub.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/__pycache__/wenku8toepub.cpython-36.pyc


--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Start on Heroku: Python",
 3 |   "description": "A barebones Python app, which can easily be deployed to Heroku.",
 4 |   "image": "heroku/python",
 5 |   "repository": "https://github.com/heroku/python-getting-started",
 6 |   "keywords": ["python", "django" ],
 7 |   "addons": [ "heroku-postgresql" ],
 8 |   "env": {
 9 |     "SECRET_KEY": {
10 |       "description": "The secret key for the Django application.",
11 |       "generator": "secret"
12 |     }
13 |   },
14 |   "environments": {
15 |     "test": {
16 |       "scripts": {
17 |         "test-setup": "python manage.py collectstatic --noinput",
18 |         "test": "python manage.py test"
19 |       }
20 |     }
21 |   },
22 |   "stack": "heroku-22"
23 | }
24 | 


--------------------------------------------------------------------------------
/async_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import random
 3 | import json
 4 | 
 5 | 
 6 | async def chapter(bid, vid, sem):
 7 |     async with sem:
 8 |         t = 1 + random.random()
 9 |         await asyncio.sleep(t)
10 |         print('done', t)
11 |         return ('%s %s' % (bid, vid)).encode()
12 | 
13 | 
14 | async def volume(vid, chapters, sem_limit=10):
15 |     sem = asyncio.Semaphore(sem_limit)
16 |     chapter_data = [None for _ in range(len(chapters))]
17 |     for i in range(len(chapters)):
18 |         chapter_data[i] = chapter(chapters[i]['chapter_id'], vid, sem)
19 |     res = await asyncio.wait(chapter_data)
20 |     print(list(res[0])[0].result())
21 |     return chapter_data
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     test_js = '[{"volume_id":10728,"id":10728,"volume_name":"\u7b2c\u4e00\u5377","volume_order":10,"chapters":[{"chapter_id":104897,"chapter_name":"\u8f6c\u8f7d\u4fe1\u606f","chapter_order":1},{"chapter_id":104892,"chapter_name":"\u7b2c1\u8bdd \u4e24\u540dJK","chapter_order":10},{"chapter_id":104893,"chapter_name":"\u7b2c2\u8bdd \u5c31\u5bdd\u524d\u7684JK","chapter_order":20},{"chapter_id":104894,"chapter_name":"\u7b2c3\u8bdd \u5bb6\u52a1\u4e0eJK","chapter_order":30},{"chapter_id":104895,"chapter_name":"\u7b2c4\u8bdd \u8d2d\u7269\u4e0eJK","chapter_order":40},{"chapter_id":104896,"chapter_name":"\u7b2c5\u8bdd \u7535\u8111\u4e0eJK","chapter_order":50},{"chapter_id":104898,"chapter_name":"\u7b2c6\u8bdd \u610f\u5916\u4e0eJK","chapter_order":60},{"chapter_id":104899,"chapter_name":"\u7b2c7\u8bdd \u8840\u7f18\u4e0eJK","chapter_order":70},{"chapter_id":104900,"chapter_name":"\u7b2c8\u8bdd \u4f11\u606f\u65f6\u95f4\u4e0eJK","chapter_order":80},{"chapter_id":104901,"chapter_name":"\u7b2c9\u8bdd \u540d\u5b57\u4e0eJK","chapter_order":90},{"chapter_id":104902,"chapter_name":"\u7b2c10\u8bdd \u98df\u5802\u4e0e\u6211","chapter_order":100},{"chapter_id":104903,"chapter_name":"\u7b2c11\u8bdd \u517c\u804c\u4e0eJK","chapter_order":110},{"chapter_id":104904,"chapter_name":"\u7b2c12\u8bdd \u9752\u6885\u7af9\u9a6c\u4e0e\u6211","chapter_order":120},{"chapter_id":104905,"chapter_name":"\u7b2c13\u8bdd \u4f11\u606f\u65f6\u95f4\u4e0eJK\u2461","chapter_order":130},{"chapter_id":104906,"chapter_name":"\u7b2c14\u8bdd \u611f\u5192\u4e0eJK","chapter_order":140},{"chapter_id":104907,"chapter_name":"\u7b2c15\u8bdd \u88ad\u51fb\u4e0eJK","chapter_order":150},{"chapter_id":104908,"chapter_name":"\u7b2c16\u8bdd \u714e\u86cb\u4e0eJK","chapter_order":160},{"chapter_id":104909,"chapter_name":"\u540e\u8bb0","chapter_order":170},{"chapter_id":104910,"chapter_name":"\u63d2\u753b","chapter_order":180}]}]'
26 |     test_data = json.loads(test_js)
27 |     # volume_data = [None for _ in range(len(test_data))]
28 |     # sem = None
29 |     for i in range(len(test_data)):
30 |         v = test_data[i]
31 |         # volume_data[i] = volume(v['volume_id'], v['chapters'], sem)
32 |         # asyncio.run(volume_data[i])
33 |         asyncio.run(volume(v['volume_id'], v['chapters']))
34 | 


--------------------------------------------------------------------------------
/base_logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from colorlog import ColoredFormatter
 3 | 
 4 | 
 5 | def getLogger(name=__name__):
 6 |     logger_base = logging.getLogger(name)
 7 |     logger_base.setLevel(logging.DEBUG)
 8 |     stream_handler = logging.StreamHandler()
 9 | 
10 |     color_formatter = ColoredFormatter('%(log_color)s[%(module)-15s][%(funcName)-20s][%(levelname)-8s] %(message)s')
11 | 
12 |     # formatter = logging.Formatter('[%(module)-15s][%(funcName)-7s][%(levelname)-8s] %(message)s')
13 |     stream_handler.setFormatter(color_formatter)
14 | 
15 |     logger_base.addHandler(stream_handler)
16 | 
17 |     return logger_base
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     logger = getLogger(__name__)
22 |     logger.debug('debug message')
23 |     logger.info('info message')
24 |     logger.warn('warn message')
25 |     logger.error('error message')
26 |     logger.critical('critical message')


--------------------------------------------------------------------------------
/database.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import time
 3 | import pymongo
 4 | 
 5 | 
 6 | '''
 7 | DATA:
 8 | {
 9 |     username: ...,
10 |     email: ...,
11 |     message: ...
12 | }
13 | '''
14 | 
15 | 
16 | class DataBase:
17 |     def __init__(self):
18 |         self.client = None
19 |         self.db = None
20 |         self.col = None
21 |         self.connect_init()
22 | 
23 |     def connect_init(self):
24 |         # 下面这个是哪个数据库来着？？？
25 |         # self.client = pymongo.MongoClient("mongodb+srv://LanceLiang:1352040930database@lanceliang-lktmq.azure."
26 |         #                                   "mongodb.net/test?retryWrites=true&w=majority")
27 |         self.client = pymongo.MongoClient("mongodb+srv://lanceliang:1352040930database@lanceliang-9kkx3.azure."
28 |                                           "mongodb.net/test?retryWrites=true&w=majority")
29 |         # self.client = pymongo.MongoClient()
30 |         self.db = self.client.wenku8_comments
31 |         self.col = self.db.wenku8_comments
32 | 
33 |     def db_init(self):
34 |         collection_names = self.db.list_collection_names()
35 |         if 'wenku8_comments' in collection_names:
36 |             self.db.drop_collection('wenku8_comments')
37 |         self.col = self.db.wenku8_comments
38 | 
39 |     def put_comment(self, username: str, email: str, message: str, head: str):
40 |         self.col.insert_one({'username': username, 'email': email, 'message': message, 'head': head})
41 | 
42 |     def get_comments(self, count=5000, show_email=True):
43 |         result = list(self.col.find({}, {'username': 1, 'email': 1, 'message': 1, 'head': 1, '_id': 0}).limit(count))
44 |         if not show_email:
45 |             for i in range(len(result)):
46 |                 result[i]['email'] = ''
47 |         return result
48 | 
49 |     def find_email(self, username: str):
50 |         data = list(self.col.find({'username': username}, {'username': 1, 'email': 1, 'message': 1, '_id': 0}))
51 |         if len(data) == 0:
52 |             return ''
53 |         return data[-1]['email']
54 | 
55 |     def error_report(self, error):
56 |         self.db.wenku8_bugs.insert_one({'time': time.asctime(), 'error': error})
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     _db = DataBase()
61 |     _db.db_init()
62 |     _db.put_comment('lance', 'lanceliang2018@163.com', 'messagefsiafjaiso')
63 |     print(_db.get_comments(show_email=True))
64 | 


--------------------------------------------------------------------------------
/dmzj2epub.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import bs4
  3 | from bs4 import BeautifulSoup as Soup
  4 | from ebooklib import epub
  5 | import os
  6 | import json
  7 | import sys
  8 | import getopt
  9 | from base_logger import getLogger
 10 | import threading
 11 | import io
 12 | import copy
 13 | import re
 14 | import asyncio
 15 | 
 16 | 
 17 | class MLogger:
 18 |     def __init__(self):
 19 |         self.data = io.StringIO()
 20 | 
 21 |     def write(self, content: str):
 22 |         self.data.write(content + '\n')
 23 |         print(content)
 24 | 
 25 |     def read_all(self):
 26 |         data2 = copy.deepcopy(self.data)
 27 |         data2.seek(0)
 28 |         d = data2.read()
 29 |         return d
 30 | 
 31 |     def info(self, message):
 32 |         self.write(message)
 33 | 
 34 |     def error(self, message):
 35 |         self.write(message)
 36 | 
 37 |     def warning(self, message):
 38 |         self.write(message)
 39 | 
 40 |     def warn(self, message):
 41 |         self.write(message)
 42 | 
 43 |     def critical(self, message):
 44 |         self.write(message)
 45 | 
 46 |     def debug(self, message):
 47 |         self.write(message)
 48 | 
 49 | 
 50 | class Dmzj2Epub:
 51 |     def __init__(self, logger=None):
 52 |         self.novel_data_file = 'dmzj_novel_data_full.json'
 53 |         # self.novel_data_file = 'dmzj_novel_data.json'
 54 |         self.api_novel = 'http://v2.api.dmzj.com/novel/%d.json'
 55 |         self.api_chapter = 'http://v2.api.dmzj.com/novel/chapter/%d.json'
 56 |         # 'http://v2.api.dmzj.com/novel/download/%d_%d_%d.txt'%(BookId,volume_id,chapter_id)
 57 |         self.api_download = 'http://v2.api.dmzj.com/novel/download/%d_%d_%d.txt'
 58 | 
 59 |         self.limit_sem_img = 10
 60 |         self.limit_sem_chapter = 10
 61 |         self.limit_sem_volume = 10
 62 | 
 63 |         self.sumi = 0
 64 |         self.book = None
 65 | 
 66 |         if logger is None:
 67 |             self.logger = getLogger()
 68 |         else:
 69 |             self.logger = logger
 70 | 
 71 |         if not os.path.exists(self.novel_data_file):
 72 |             raise FileNotFoundError('Can not find ' + self.novel_data_file)
 73 |         with open(self.novel_data_file, 'r', encoding='utf8') as f:
 74 |             self.novel_data = json.load(f)
 75 | 
 76 |     def search(self, key: str):
 77 |         results = []
 78 |         if len(key) == 0:
 79 |             return None
 80 |         for d in self.novel_data:
 81 |             if key in d['name'] or key in d['authors']:
 82 |             # if key in d['title'] or key in d['author']:
 83 |                 results.append(d)
 84 |         return results
 85 | 
 86 |     def info(self, bid: int):
 87 |         # for d in self.novel_data:
 88 |         #     if bid == d['id']:
 89 |         #         return d
 90 |         # return None
 91 |         response = requests.get(self.api_novel % bid).content
 92 |         info = json.loads(response)
 93 |         if type(info) is list:
 94 |             return None
 95 |         return info
 96 | 
 97 |     def get_volumes_chapters(self, bid: int):
 98 |         response = json.loads(requests.get(self.api_chapter % bid).content)
 99 |         return response
100 | 
101 |     async def download_img(self, url, sem):
102 |         async with sem:
103 |             filename = os.path.basename(url)
104 |             data = requests.get(url).content
105 |             file_type = filename.split('.')[-1]
106 |             item_img = epub.EpubItem(file_name="images/%s" % filename,
107 |                                      media_type="image/%s" % file_type, content=data)
108 |             self.book.add_item(item_img)
109 |             self.logger.info('<-Done image: ' + url)
110 | 
111 |     async def download_chapter(self, bid: int, volume_id: int, chapter_id: int, sem, fetch_image: bool = False):
112 |         async with sem:
113 |             content = requests.get(self.api_download % (bid, volume_id, chapter_id)).content
114 |             if fetch_image:
115 |                 text = content.decode('utf8', errors='ignore')
116 |                 imgs = re.findall('https://xs.dmzj.com/img/[0-9]+/[0-9]+/[a-fA-F0-9]{32,32}.jpg', text)
117 |                 # self.logger.debug(str(imgs))
118 |                 tasks_imgs = []
119 |                 msem = asyncio.Semaphore(self.limit_sem_img)
120 |                 for img in imgs:
121 |                     tasks_imgs.append(self.download_img(img, msem))
122 |                     filename = os.path.basename(img)
123 |                     text = text.replace(img, 'images/%s' % filename)
124 |                 if len(tasks_imgs) > 0:
125 |                     await asyncio.wait(tasks_imgs)
126 |                 content = text.encode()
127 |             return {
128 |                 'chapter_id': chapter_id,
129 |                 'content': content
130 |             }
131 | 
132 |     async def download_book(self,
133 |                       bid: int,
134 |                       fetch_image: bool = False):
135 |         self.book = epub.EpubBook()
136 |         self.sumi = 0
137 |         book_info = self.info(bid)
138 |         if book_info is None:
139 |             return None
140 |         title = book_info['name']
141 |         author = book_info['authors']
142 |         cover_url = book_info['cover']
143 |         self.logger.info('#' * 15 + '开始下载' + '#' * 15)
144 |         self.logger.info('标题: ' + title + " 作者: " + author)
145 |         self.book.set_identifier("%s, %s" % (title, author))
146 |         self.book.set_title(title)
147 |         self.book.add_author(author)
148 |         data_cover = requests.get(cover_url).content
149 |         self.book.set_cover('cover.jpg', data_cover)
150 | 
151 |         toc = []
152 |         spine = []
153 | 
154 |         volume_chapters = self.get_volumes_chapters(bid)
155 |         for volume in volume_chapters:
156 |             self.logger.info('volume: ' + volume['volume_name'])
157 |             # 先增加卷
158 |             toc.append((epub.Section(volume['volume_name']), []))
159 |             page_volume = epub.EpubHtml(title=volume['volume_name'], file_name='%s.html' % self.sumi)
160 |             self.sumi = self.sumi + 1
161 |             page_volume.set_content(("<h1>%s</h1><br>" % volume['volume_name']).encode())
162 |             self.book.add_item(page_volume)
163 |             tasks_chapters = []
164 |             sem = asyncio.Semaphore(self.limit_sem_chapter)
165 |             for chapter in volume['chapters']:
166 |                 tasks_chapters.append(self.download_chapter(bid, volume['volume_id'], chapter['chapter_id'], sem, fetch_image=fetch_image))
167 |             result_chapters = []
168 |             result_tasks = list((await asyncio.wait(tasks_chapters))[0])
169 |             for task in result_tasks:
170 |                 result_chapters.append(task.result())
171 |             result_chapters.sort(key=lambda x: x['chapter_id'], reverse=False)
172 |             # print(result_chapters)
173 |             for i in range(len(result_chapters)):
174 |                 chapter = volume['chapters'][i]
175 |                 self.logger.info('  chapter: ' + chapter['chapter_name'])
176 |                 chapter_content = result_chapters[i]['content']
177 |                 page = epub.EpubHtml(title=chapter['chapter_name'], file_name='%s.xhtml' % self.sumi)
178 |                 self.sumi = self.sumi + 1
179 |                 page.set_content(chapter_content)
180 |                 self.book.add_item(page)
181 |                 toc[-1][1].append(page)
182 |                 spine.append(page)
183 | 
184 |         self.book.toc = toc
185 |         self.book.spine = spine
186 |         self.book.add_item(epub.EpubNcx())
187 |         self.book.add_item(epub.EpubNav())
188 | 
189 |         stream = io.BytesIO()
190 |         epub.write_epub(stream, self.book)
191 |         return stream.getvalue()
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     _de = Dmzj2Epub()
196 |     # print(_de.search('入间人间'))
197 |     # print(_de.info(6))
198 |     # print(_de.get_chapters(6))
199 |     _info = _de.info(1)
200 |     print(_info)
201 |     _data = asyncio.run(_de.download_book(1, fetch_image=True))
202 |     with open('%s - %s.epub' % (_info['name'], _info['authors']), 'wb') as f:
203 |         f.write(_data)


--------------------------------------------------------------------------------
/ebooklib/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of EbookLib.
 2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
 3 | #
 4 | # EbookLib is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU Affero General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # EbookLib is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU Affero General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | # Version of ebook library
18 | 
19 | VERSION = (0, 17, 1)
20 | 
21 | # LIST OF POSSIBLE ITEMS
22 | ITEM_UNKNOWN = 0
23 | ITEM_IMAGE = 1
24 | ITEM_STYLE = 2
25 | ITEM_SCRIPT = 3
26 | ITEM_NAVIGATION = 4
27 | ITEM_VECTOR = 5
28 | ITEM_FONT = 6
29 | ITEM_VIDEO = 7
30 | ITEM_AUDIO = 8
31 | ITEM_DOCUMENT = 9
32 | ITEM_COVER = 10
33 | ITEM_SMIL = 11
34 | 
35 | # EXTENSION MAPPER
36 | EXTENSIONS = {ITEM_IMAGE: ['.jpg', '.jpeg', '.gif', '.tiff', '.tif', '.png'],
37 |               ITEM_STYLE: ['.css'],
38 |               ITEM_VECTOR: ['.svg'],
39 |               ITEM_FONT: ['.otf', '.woff', '.ttf'],
40 |               ITEM_SCRIPT: ['.js'],
41 |               ITEM_NAVIGATION: ['.ncx'],
42 |               ITEM_VIDEO: ['.mov', '.mp4', '.avi'],
43 |               ITEM_AUDIO: ['.mp3', '.ogg'],
44 |               ITEM_COVER: ['.jpg', '.jpeg', '.png'],
45 |               ITEM_SMIL: ['.smil']
46 |               }
47 | 


--------------------------------------------------------------------------------
/ebooklib/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/ebooklib/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/ebooklib/__pycache__/epub.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/ebooklib/__pycache__/epub.cpython-36.pyc


--------------------------------------------------------------------------------
/ebooklib/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/ebooklib/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/ebooklib/plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/ebooklib/plugins/__init__.py


--------------------------------------------------------------------------------
/ebooklib/plugins/base.py:
--------------------------------------------------------------------------------
 1 | # This file is part of EbookLib.
 2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
 3 | #
 4 | # EbookLib is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU Affero General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # EbookLib is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU Affero General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | class BasePlugin(object):
19 |     def before_write(self, book):
20 |         "Processing before save"
21 |         return True
22 | 
23 |     def after_write(self, book):
24 |         "Processing after save"
25 |         return True
26 | 
27 |     def before_read(self, book):
28 |         "Processing before save"
29 |         return True
30 | 
31 |     def after_read(self, book):
32 |         "Processing after save"
33 |         return True
34 | 
35 |     def item_after_read(self, book, item):
36 |         "Process general item after read."
37 |         return True
38 | 
39 |     def item_before_write(self, book, item):
40 |         "Process general item before write."
41 |         return True
42 | 
43 |     def html_after_read(self, book, chapter):
44 |         "Processing HTML before read."
45 |         return True
46 | 
47 |     def html_before_write(self, book, chapter):
48 |         "Processing HTML before save."
49 |         return True
50 | 


--------------------------------------------------------------------------------
/ebooklib/plugins/booktype.py:
--------------------------------------------------------------------------------
  1 | # This file is part of EbookLib.
  2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
  3 | #
  4 | # EbookLib is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU Affero General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # EbookLib is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU Affero General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU Affero General Public License
 15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | from ebooklib.plugins.base import BasePlugin
 18 | from ebooklib.utils import parse_html_string
 19 | 
 20 | class BooktypeLinks(BasePlugin):
 21 |     NAME = 'Booktype Links'
 22 | 
 23 |     def __init__(self, booktype_book):
 24 |         self.booktype_book = booktype_book
 25 | 
 26 |     def html_before_write(self, book, chapter):
 27 |         from lxml import  etree
 28 | 
 29 |         try:
 30 |             from urlparse import urlparse, urljoin
 31 |         except ImportError:
 32 |             from urllib.parse import urlparse, urljoin
 33 | 
 34 |         try:
 35 |             tree = parse_html_string(chapter.content)
 36 |         except:
 37 |             return
 38 | 
 39 |         root = tree.getroottree()
 40 | 
 41 |         if len(root.find('body')) != 0:
 42 |             body = tree.find('body')
 43 | 
 44 |             # should also be aware to handle
 45 |             # ../chapter/
 46 |             # ../chapter/#reference
 47 |             # ../chapter#reference
 48 | 
 49 |             for _link in body.xpath('//a'):
 50 |                 # This is just temporary for the footnotes
 51 |                 if _link.get('href', '').find('InsertNoteID') != -1:
 52 |                     _ln = _link.get('href', '')
 53 |                     i = _ln.find('#')
 54 |                     _link.set('href', _ln[i:])
 55 | 
 56 |                     continue
 57 | 
 58 |                 _u = urlparse(_link.get('href', ''))
 59 | 
 60 |                 # Let us care only for internal links at the moment
 61 |                 if _u.scheme == '':
 62 |                     if _u.path != '':
 63 |                         _link.set('href', '%s.xhtml' % _u.path)
 64 | 
 65 |                     if _u.fragment != '':
 66 |                         _link.set('href', urljoin(_link.get('href'), '#%s' % _u.fragment))
 67 | 
 68 |                     if _link.get('name') != None:
 69 |                         _link.set('id', _link.get('name'))
 70 |                         etree.strip_attributes(_link, 'name')
 71 | 
 72 |         chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8')
 73 | 
 74 | 
 75 | 
 76 | 
 77 | class BooktypeFootnotes(BasePlugin):
 78 |     NAME = 'Booktype Footnotes'
 79 | 
 80 |     def __init__(self, booktype_book):
 81 |         self.booktype_book = booktype_book
 82 | 
 83 |     def html_before_write(self, book, chapter):
 84 |         from lxml import etree
 85 | 
 86 |         from ebooklib import epub
 87 | 
 88 |         try:
 89 |             tree = parse_html_string(chapter.content)
 90 |         except:
 91 |             return
 92 | 
 93 |         root = tree.getroottree()
 94 | 
 95 |         if len(root.find('body')) != 0:
 96 |             body = tree.find('body')
 97 | 
 98 |             # <span id="InsertNoteID_1_marker1" class="InsertNoteMarker"><sup><a href="#InsertNoteID_1">1</a></sup><span>
 99 |             # <ol id="InsertNote_NoteList"><li id="InsertNoteID_1">prvi footnote <span id="InsertNoteID_1_LinkBacks"><sup><a href="#InsertNoteID_1_marker1">^</a></sup></span></li>
100 | 
101 |             # <a epub:type="noteref" href="#n1">1</a></p>
102 |             # <aside epub:type="footnote" id="n1"><p>These have been corrected in this EPUB3 edition.</p></aside>
103 |             for footnote in body.xpath('//span[@class="InsertNoteMarker"]'):
104 |                 footnote_id = footnote.get('id')[:-8]
105 |                 a = footnote.getchildren()[0].getchildren()[0]
106 | 
107 |                 footnote_text = body.xpath('//li[@id="%s"]' % footnote_id)[0]
108 | 
109 |                 a.attrib['{%s}type' % epub.NAMESPACES['EPUB']] = 'noteref'
110 |                 ftn = etree.SubElement(body, 'aside', {'id': footnote_id})
111 |                 ftn.attrib['{%s}type' % epub.NAMESPACES['EPUB']] = 'footnote'
112 |                 ftn_p = etree.SubElement(ftn, 'p')
113 |                 ftn_p.text = footnote_text.text
114 | 
115 |             old_footnote = body.xpath('//ol[@id="InsertNote_NoteList"]')
116 |             if len(old_footnote) > 0:
117 |                 body.remove(old_footnote[0])
118 | 
119 |         chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8')
120 | 


--------------------------------------------------------------------------------
/ebooklib/plugins/sourcecode.py:
--------------------------------------------------------------------------------
 1 | # This file is part of EbookLib.
 2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
 3 | #
 4 | # EbookLib is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU Affero General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # EbookLib is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU Affero General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | from ebooklib.plugins.base import BasePlugin
18 | from ebooklib.utils import parse_html_string
19 | 
20 | class SourceHighlighter(BasePlugin):    
21 |     def __init__(self):
22 |         pass
23 | 
24 |     def html_before_write(self, book, chapter):
25 |         from lxml import etree, html
26 | 
27 |         from pygments import highlight
28 |         from pygments.formatters import HtmlFormatter
29 | 
30 |         from ebooklib import epub
31 | 
32 |         try:
33 |             tree = parse_html_string(chapter.content)
34 |         except:
35 |             return
36 | 
37 |         root = tree.getroottree()
38 | 
39 |         had_source = False
40 | 
41 |         if len(root.find('body')) != 0:
42 |             body = tree.find('body')
43 |             # check for embeded source
44 |             for source in body.xpath('//pre[contains(@class,"source-")]'):
45 |                 css_class = source.get('class')
46 | 
47 |                 source_text = (source.text or '') + ''.join([html.tostring(child) for child in source.iterchildren()])
48 | 
49 |                 if 'source-python' in css_class:
50 |                     from pygments.lexers import PythonLexer
51 | 
52 | #                    _text =  highlight(source_text, PythonLexer(), HtmlFormatter(linenos="inline"))
53 |                     _text =  highlight(source_text, PythonLexer(), HtmlFormatter())
54 | 
55 |                 if 'source-css' in css_class:
56 |                     from pygments.lexers import CssLexer
57 | 
58 |                     _text =  highlight(source_text, CssLexer(), HtmlFormatter())
59 | 
60 |                 _parent = source.getparent()
61 |                 _parent.replace(source, etree.XML(_text))
62 | 
63 |                 had_source = True
64 | 
65 |         if had_source:
66 |             chapter.add_link(href="style/code.css", rel="stylesheet", type="text/css")
67 |             chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8')        
68 | 
69 | 


--------------------------------------------------------------------------------
/ebooklib/plugins/standard.py:
--------------------------------------------------------------------------------
  1 | # This file is part of EbookLib.
  2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
  3 | #
  4 | # EbookLib is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU Affero General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # EbookLib is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU Affero General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU Affero General Public License
 15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | import six
 18 | 
 19 | from ebooklib.plugins.base import BasePlugin
 20 | from ebooklib.utils import parse_html_string
 21 | 
 22 | # TODO:
 23 | #   - should also look for the _required_ elements
 24 | # http://www.w3.org/html/wg/drafts/html/master/tabular-data.html#the-table-element
 25 | 
 26 | ATTRIBUTES_GLOBAL = ['accesskey', 'class', 'contenteditable', 'contextmenu', 'dir', 'draggable',
 27 |                      'dropzone', 'hidden',  'id', 'inert', 'itemid', 'itemprop', 'itemref',
 28 |                      'itemscope', 'itemtype', 'lang', 'spellcheck', 'style', 'tabindex',
 29 |                      'title', 'translate', 'epub:type']
 30 | 
 31 | # Remove <u> for now from here
 32 | DEPRECATED_TAGS = ['acronym', 'applet', 'basefont', 'big', 'center', 'dir', 'font', 'frame',
 33 |                    'frameset', 'isindex', 'noframes', 's', 'strike', 'tt']
 34 | 
 35 | 
 36 | def leave_only(item, tag_list):
 37 |     for _attr in six.iterkeys(item.attrib):
 38 |         if _attr not in tag_list:
 39 |             del item.attrib[_attr]
 40 | 
 41 | 
 42 | class SyntaxPlugin(BasePlugin):
 43 |     NAME = 'Check HTML syntax'
 44 | 
 45 |     def html_before_write(self, book, chapter):
 46 |         from lxml import etree
 47 | 
 48 |         try:
 49 |             tree = parse_html_string(chapter.content)
 50 |         except:
 51 |             return
 52 | 
 53 |         root = tree.getroottree()
 54 | 
 55 |         # delete deprecated tags
 56 |         # i should really have a list of allowed tags
 57 |         for tag in DEPRECATED_TAGS:
 58 |             etree.strip_tags(root, tag)
 59 | 
 60 |         head = tree.find('head')
 61 |         
 62 |         if head is not None and len(head) != 0:
 63 |             
 64 |             for _item in head:
 65 |                 if _item.tag == 'base':
 66 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['href', 'target'])
 67 |                 elif _item.tag == 'link':
 68 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['href', 'crossorigin', 'rel', 'media', 'hreflang', 'type', 'sizes'])
 69 |                 elif _item.tag == 'title':
 70 |                     if _item.text == '':
 71 |                         head.remove(_item)
 72 |                 elif _item.tag == 'meta':
 73 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['name', 'http-equiv', 'content', 'charset'])
 74 |                     # just remove for now, but really should not be like this
 75 |                     head.remove(_item) 
 76 |                 elif _item.tag == 'script':
 77 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'type', 'charset', 'async', 'defer', 'crossorigin'])
 78 |                 elif _item.tag == 'source':
 79 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'type', 'media'])
 80 |                 elif _item.tag == 'style':
 81 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['media', 'type', 'scoped'])
 82 |                 else:
 83 |                     leave_only(_item, ATTRIBUTES_GLOBAL)
 84 | 
 85 | 
 86 |         if len(root.find('body')) != 0:
 87 |             body = tree.find('body')
 88 | 
 89 |             for _item in body.iter():
 90 |                 # it is not
 91 |                 # <a class="indexterm" href="ch05.html#ix_epub:trigger_element">
 92 |                 
 93 |                 if _item.tag == 'a':
 94 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['href', 'target', 'download', 'rel', 'hreflang', 'type'])
 95 |                 elif _item.tag == 'area':
 96 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['alt', 'coords', 'shape', 'href', 'target', 'download', 'rel', 'hreflang', 'type'])
 97 |                 elif _item.tag == 'audio':
 98 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'crossorigin', 'preload', 'autoplay', 'mediagroup', 'loop', 'muted', 'controls'])
 99 |                 elif _item.tag == 'blockquote':
100 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['cite'])
101 |                 elif _item.tag == 'button':
102 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['autofocus', 'disabled', 'form', 'formaction', 'formenctype', 'formmethod', 'formnovalidate',
103 |                                                            'formtarget', 'name', 'type', 'value', 'menu'])
104 |                 elif _item.tag == 'canvas':
105 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['width', 'height'])
106 |                 elif _item.tag == 'canvas':
107 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['width', 'height'])
108 |                 elif _item.tag == 'del':
109 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['cite', 'datetime'])
110 |                 elif _item.tag == 'details':
111 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['open'])
112 |                 elif _item.tag == 'embed':
113 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'type', 'width', 'height'])
114 |                 elif _item.tag == 'fieldset':
115 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['disable', 'form', 'name'])
116 |                 elif _item.tag == 'details':
117 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['accept-charset', 'action', 'autocomplete', 'enctype', 'method', 'name', 'novalidate', 'target'])
118 |                 elif _item.tag == 'iframe':
119 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'srcdoc', 'name', 'sandbox', 'seamless', 'allowfullscreen', 'width', 'height'])
120 |                 elif _item.tag == 'img':
121 |                     _src =  _item.get('src', '').lower()
122 |                     if _src.startswith('http://') or _src.startswith('https://'):
123 |                         if 'remote-resources' not in chapter.properties:
124 |                             chapter.properties.append('remote-resources')
125 |                             # THIS DOES NOT WORK, ONLY VIDEO AND AUDIO FILES CAN BE REMOTE RESOURCES
126 |                             # THAT MEANS I SHOULD ALSO CATCH <SOURCE TAG
127 |                             from ebooklib import epub
128 |                             _img = epub.EpubImage(file_name = _item.get('src'))
129 |                             book.add_item(_img)
130 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['alt', 'src', 'crossorigin', 'usemap', 'ismap', 'width', 'height'])
131 |                 elif _item.tag == 'input':
132 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['accept', 'alt', 'autocomplete', 'autofocus', 'checked', 'dirname',
133 |                                                            'disabled', 'form', 'formaction', 'formenctype', 'formmethod', 'formnovalidate',
134 |                                                            'formtarget', 'height', 'inputmode', 'list', 'max', 'maxlength', 'min', 'multiple',
135 |                                                            'name', 'pattern', 'placeholder', 'readonly', 'required', 'size', 'src', 'step'
136 |                                                            'type', 'value', 'width'])
137 |                 elif _item.tag == 'ins':
138 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['cite', 'datetime'])
139 |                 elif _item.tag == 'keygen':
140 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['autofocus', 'challenge', 'disabled', 'form', 'keytype', 'name'])
141 |                 elif _item.tag == 'label':
142 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['form', 'for'])
143 |                 elif _item.tag == 'label':
144 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['form', 'for'])
145 |                 elif _item.tag == 'map':
146 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['name'])
147 |                 elif _item.tag == 'menu':
148 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['type', 'label'])
149 |                 elif _item.tag == 'object':
150 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['data', 'type', 'typemustmatch', 'name', 'usemap', 'form', 'width', 'height'])
151 |                 elif _item.tag == 'ol':
152 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['reversed', 'start', 'type'])
153 |                 elif _item.tag == 'optgroup':
154 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['disabled', 'label'])
155 |                 elif _item.tag == 'option':
156 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['disabled', 'label', 'selected', 'value'])
157 |                 elif _item.tag == 'output':
158 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['for', 'form', 'name'])
159 |                 elif _item.tag == 'param':
160 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['name', 'value'])
161 |                 elif _item.tag == 'progress':
162 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['value', 'max'])
163 |                 elif _item.tag == 'q':
164 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['cite'])
165 |                 elif _item.tag == 'select':
166 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['autofocus', 'disabled', 'form', 'multiple', 'name', 'required', 'size'])
167 | 
168 |                 elif _item.tag == 'table':
169 |                     if _item.get('border', None):
170 |                         if _item.get('border') == '0':
171 |                             _item.set('border', '')
172 | 
173 |                     if _item.get('summary', None):
174 |                         _caption = etree.Element('caption', {})
175 |                         _caption.text = _item.get('summary')
176 |                         _item.insert(0, _caption)
177 | 
178 |                         # add it as caption
179 |                         del _item.attrib['summary']
180 | 
181 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['border', 'sortable'])
182 |                 elif _item.tag == 'dl':
183 |                     _d = _item.find('dd')
184 |                     if _d is not None and len(_d) == 0:
185 |                         pass
186 | 
187 |                         # http://html5doctor.com/the-dl-element/
188 |                         # should be like this really
189 |                         # some of the elements can be missing
190 |                         # dl
191 |                         #   dt
192 |                         #   dd
193 |                         #   dt
194 |                         #   dd
195 |                 elif _item.tag == 'td':
196 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['colspan', 'rowspan', 'headers'])
197 |                 elif _item.tag == 'textarea':
198 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['autocomplete', 'autofocus', 'cols', 'dirname', 'disabled', 'form',
199 |                                                            'inputmode', 'maxlength', 'name', 'placeholder', 'readonly', 'required',
200 |                                                            'rows', 'wrap'])
201 | 
202 |                 elif _item.tag in ['col', 'colgroup']:
203 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['span'])
204 |                 elif _item.tag == 'th':
205 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['colspan', 'rowspan', 'headers', 'scope', 'abbr', 'sorted'])
206 |                 elif _item.tag in ['time']:
207 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['datetime'])
208 |                 elif _item.tag in ['track']:
209 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['kind', 'src', 'srclang', 'label', 'default'])
210 |                 elif _item.tag == 'video':
211 |                     leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'crossorigin', 'poster', 'preload', 'autoplay', 'mediagroup',
212 |                                                            'loop', 'muted', 'controls', 'width', 'height'])
213 |                 elif _item.tag == 'svg':
214 |                     # We need to add property "svg" in case we have embeded svg file
215 |                     if 'svg' not in chapter.properties:
216 |                         chapter.properties.append('svg')
217 |                         
218 |                     if _item.get('viewbox', None):
219 |                         del _item.attrib['viewbox']
220 | 
221 |                     if _item.get('preserveaspectratio', None):
222 |                         del _item.attrib['preserveaspectratio']
223 |                 else:
224 |                     for _attr in six.iterkeys(_item.attrib):
225 |                         if _attr not in ATTRIBUTES_GLOBAL:
226 |                             del _item.attrib[_attr]
227 | 
228 |         chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True)
229 |         
230 |         return chapter.content
231 | 


--------------------------------------------------------------------------------
/ebooklib/plugins/tidyhtml.py:
--------------------------------------------------------------------------------
 1 | # This file is part of EbookLib.
 2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
 3 | #
 4 | # EbookLib is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU Affero General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # EbookLib is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU Affero General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | import six
18 | import subprocess
19 | 
20 | from ebooklib.plugins.base import BasePlugin
21 | from ebooklib.utils import parse_html_string
22 | 
23 | # Recommend usage of
24 | # - https://github.com/w3c/tidy-html5
25 | 
26 | def tidy_cleanup(content, **extra):
27 |     cmd = []
28 | 
29 |     for k, v in six.iteritems(extra):
30 | 
31 |         if v:
32 |             cmd.append('--%s' % k)
33 |             cmd.append(v)
34 |         else:
35 |             cmd.append('-%s' % k)
36 | 
37 |     # must parse all other extra arguments
38 |     try:
39 |         p = subprocess.Popen(['tidy']+cmd, shell=False, 
40 |                              stdin=subprocess.PIPE, stdout=subprocess.PIPE, 
41 |                              stderr=subprocess.PIPE, close_fds=True)
42 |     except OSError:
43 |         return (3, None)
44 | 
45 |     p.stdin.write(content)
46 | 
47 |     (cont, p_err) = p.communicate()
48 | 
49 |     # 0 - all ok
50 |     # 1 - there were warnings
51 |     # 2 - there were errors
52 |     # 3 - exception
53 | 
54 |     return (p.returncode, cont)
55 | 
56 | 
57 | class TidyPlugin(BasePlugin):
58 |     NAME = 'Tidy HTML'
59 |     OPTIONS = {'char-encoding': 'utf8',
60 |                'tidy-mark': 'no'
61 |               }
62 | 
63 |     def __init__(self, extra = {}):
64 |         self.options = dict(self.OPTIONS)
65 |         self.options.update(extra)
66 | 
67 |     def html_before_write(self, book, chapter):
68 |         if not chapter.content:
69 |             return None
70 | 
71 |         (_, chapter.content) = tidy_cleanup(chapter.content, **self.options)
72 | 
73 |         return chapter.content
74 | 
75 |     def html_after_read(self, book, chapter):
76 |         if not chapter.content:
77 |             return None
78 | 
79 |         (_, chapter.content) = tidy_cleanup(chapter.content, **self.options)
80 | 
81 |         return chapter.content
82 | 
83 | 


--------------------------------------------------------------------------------
/ebooklib/utils.py:
--------------------------------------------------------------------------------
  1 | # This file is part of EbookLib.
  2 | # Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
  3 | #
  4 | # EbookLib is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU Affero General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # EbookLib is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU Affero General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU Affero General Public License
 15 | # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | import io
 18 | import mimetypes
 19 | 
 20 | from lxml import etree
 21 | 
 22 | 
 23 | mimetype_initialised = False
 24 | 
 25 | 
 26 | def debug(obj):
 27 |     import pprint
 28 | 
 29 |     pp = pprint.PrettyPrinter(indent=4)
 30 |     pp.pprint(obj)
 31 | 
 32 | 
 33 | def parse_string(s):
 34 |     try:
 35 |         tree = etree.parse(io.BytesIO(s.encode('utf-8')))
 36 |     except:
 37 |         tree = etree.parse(io.BytesIO(s))
 38 | 
 39 |     return tree
 40 | 
 41 | 
 42 | def parse_html_string(s):
 43 |     from lxml import html
 44 | 
 45 |     utf8_parser = html.HTMLParser(encoding='utf-8')
 46 | 
 47 |     html_tree = html.document_fromstring(s, parser=utf8_parser)
 48 | 
 49 |     return html_tree
 50 | 
 51 | 
 52 | def guess_type(extenstion):
 53 |     global mimetype_initialised
 54 | 
 55 |     if not mimetype_initialised:
 56 |         mimetypes.init()
 57 |         mimetypes.add_type('application/xhtml+xml', '.xhtml')
 58 |         mimetype_initialised = True
 59 | 
 60 |     return mimetypes.guess_type(extenstion)
 61 | 
 62 | 
 63 | def create_pagebreak(pageref, label=None, html=True):
 64 |     from ebooklib.epub import NAMESPACES
 65 | 
 66 |     pageref_attributes = {
 67 |         '{%s}type' % NAMESPACES['EPUB']: 'pagebreak',
 68 |         'title': u'{}'.format(pageref),
 69 |         'id': u'{}'.format(pageref),
 70 |      }
 71 | 
 72 |     pageref_elem = etree.Element('span', pageref_attributes, nsmap={'epub': NAMESPACES['EPUB']})
 73 | 
 74 |     if label:
 75 |         pageref_elem.text = label
 76 | 
 77 |     if html:
 78 |         return etree.tostring(pageref_elem, encoding='unicode')
 79 | 
 80 |     return pageref_elem
 81 | 
 82 | 
 83 | def get_headers(elem):
 84 |     for n in range(1, 7):
 85 |         headers = elem.xpath('./h{}'.format(n))
 86 | 
 87 |         if len(headers) > 0:
 88 |             text = headers[0].text_content().strip()
 89 |             if len(text) > 0:
 90 |                 return text
 91 |     return None
 92 | 
 93 | 
 94 | def get_pages(item):
 95 |     body = parse_html_string(item.get_body_content())
 96 |     pages = []
 97 | 
 98 |     for elem in body.iter():
 99 |         if 'epub:type' in elem.attrib:
100 |             if elem.get('id') is not None:
101 |                 _text = None
102 |                 
103 |                 if elem.text is not None and elem.text.strip() != '':
104 |                     _text = elem.text.strip()
105 | 
106 |                 if _text is None:
107 |                     _text = elem.get('aria-label')
108 | 
109 |                 if _text is None:
110 |                     _text = get_headers(elem)
111 | 
112 |                 pages.append((item.get_name(), elem.get('id'), _text or elem.get('id')))
113 | 
114 |     return pages
115 | 
116 | 
117 | def get_pages_for_items(items):
118 |     pages_from_docs = [get_pages(item) for item in items]
119 | 
120 |     return [item for pages in pages_from_docs for item in pages]
121 | 


--------------------------------------------------------------------------------
/error_report.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import smtplib
 4 | from email.mime.text import MIMEText
 5 | from email.utils import formataddr
 6 | 
 7 | 
 8 | def send_report(report):
 9 |     my_sender = 'LanceLiang2018@163.com'  # 发件人邮箱账号
10 |     my_pass = '1352040930smtp'  # 发件人邮箱密码
11 |     # my_user = '1352040930@qq.com'  # 收件人邮箱账号
12 |     try:
13 |         if type(report) is dict:
14 |             report = json.dumps(report)
15 |         msg = MIMEText(str(report), 'plain', 'utf-8')
16 |         msg['From'] = formataddr(["Programe errors", my_sender])  # 括号里的对应发件人邮箱昵称、发件人邮箱账号
17 |         msg['To'] = formataddr(['Lance Liang', my_sender])  # 括号里的对应收件人邮箱昵称、收件人邮箱账号
18 |         msg['Subject'] = "wk8local程序的新bug report"  # 邮件的主题，也可以说是标题
19 | 
20 |         server = smtplib.SMTP_SSL("smtp.163.com", 465)  # 发件人邮箱中的SMTP服务器，端口是465
21 |         server.login(my_sender, my_pass)  # 括号中对应的是发件人邮箱账号、邮箱密码
22 |         server.sendmail(my_sender, [my_sender, ], msg.as_string())  # 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
23 |         server.quit()  # 关闭连接
24 |     except Exception as e:
25 |         print('错误信息邮件发送失败！', e)
26 |         print('请将程序窗口截图手动发送到 LanceLiang2018@163.com 以协助程序开发。')
27 |         print('...如果您不想发也没关系QAQ...')
28 |         if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
29 |             input()
30 |             exit(1)
31 | 
32 | 
33 | def form_report(e):
34 |     report = {
35 |         'string': str(e),
36 |         'file': e.__traceback__.tb_frame.f_globals['__file__'],
37 |         'line': e.__traceback__.tb_lineno
38 |     }
39 |     return report
40 | 
41 | try:
42 |     from database import DataBase
43 |     _db = DataBase()
44 | except Exception as _e:
45 |     print("产生了无法预知的错误")
46 |     print("错误内容如下:")
47 |     print('初始化远程数据库时出现错误(wk8local.py)')
48 |     _error = form_report(_e)
49 |     print(_error['string'])
50 |     print('文件', _error['file'])
51 |     print('行号', _error['line'])
52 |     print('尝试发送bug报告邮件...')
53 |     send_report(_error)
54 |     print('发送bug报告邮件完成，请关闭窗口。')
55 |     if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
56 |         input()
57 |         exit(1)
58 | 
59 | 
60 | def report_it(e, _exit=False):
61 |     print("产生了无法预知的错误")
62 |     print("错误内容如下:")
63 |     error = form_report(e)
64 |     print(error['string'])
65 |     print('文件', error['file'])
66 |     print('行号', error['line'])
67 |     print('正在尝试反馈错误...')
68 |     print('尝试发送bug报告邮件...')
69 |     send_report(error)
70 |     print('发送bug报告邮件成功')
71 |     try:
72 |         print('尝试把bug发送到远程数据库...')
73 |         _db.error_report(error)
74 |     except Exception as e2:
75 |         print('把bug发送到远程数据库失败')
76 |         send_report(e2)
77 |     print('发送bug报告完成，请关闭窗口。')
78 |     if os.environ.get('WENKU8_LOCAL', 'False') == 'True' and _exit:
79 |         input()
80 |         exit(1)


--------------------------------------------------------------------------------
/errors.txt:
--------------------------------------------------------------------------------
1 | 11
2 | 14
3 | 2
4 | 


--------------------------------------------------------------------------------
/headers.txt:
--------------------------------------------------------------------------------
 1 | Host: m.weibo.cn
 2 | User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0
 3 | Accept: application/json, text/plain, */*
 4 | Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
 5 | Accept-Encoding: gzip, deflate, br
 6 | Content-Type: application/x-www-form-urlencoded
 7 | X-Requested-With: XMLHttpRequest
 8 | MWeibo-Pwa: 1
 9 | X-XSRF-TOKEN: f572a8
10 | Origin: https://m.weibo.cn
11 | Referer: https://m.weibo.cn/message/chat?uid=5175429989&name=msgbox
12 | Connection: keep-alive
13 | Cookie: ALF=1587999867; _T_WM=58616718015; SCF=Ahy1D6if_emkN6pc6So_HAY-k_9h-LMxAeGhK7JS3utF_LHmzXNOh78wNJcWP5mbaZ_rVMF1VS1W298GHOAOFjA.; SUHB=0nl-HcWaEtaWnm; SUB=_2A25ze5xeDeRhGeNG7lIU-SjPwj2IHXVQhyQWrDV6PUJbkdAKLWH9kW1NS14NzQhs_rKjFfwH3KlvjD3w4V-NYQ4N; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh2-Tz-6ovrrTzGYGhUmmqO5JpX5K-hUgL.Fo-RSK5f1Kq01K22dJLoI7v-dc44PXpDqg4rP0z7eK-t; SSOLoginState=1585441806; XSRF-TOKEN=f572a8; WEIBOCN_FROM=1110006030; MLOGIN=1; M_WEIBOCN_PARAMS=uicode%3D20000174
14 | Pragma: no-cache
15 | Cache-Control: no-cache
16 | TE: Trailers


--------------------------------------------------------------------------------
/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/images/1.jpg


--------------------------------------------------------------------------------
/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/images/2.jpg


--------------------------------------------------------------------------------
/images/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/images/3.png


--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | pyinstaller wk8local.py
3 | md dist\wk8local\static
4 | copy /Y static\* dist\wk8local\static\
5 | md dist\wk8local\templates
6 | copy /Y templates\* dist\wk8local\templates\
7 | copy dmzj_novel_data_full.json dist\wk8local\
8 | copy dmzj_novel_data.json dist\wk8local\
9 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from flask import *
  3 | from manager import *
  4 | import io
  5 | # import urllib.parse
  6 | import threading
  7 | import re
  8 | import time
  9 | import requests
 10 | import hashlib
 11 | import smtplib
 12 | from email.mime.text import MIMEText
 13 | from email.utils import formataddr
 14 | from database import DataBase
 15 | import error_report
 16 | import xiaoice
 17 | 
 18 | 
 19 | db = DataBase()
 20 | app = Flask(__name__)
 21 | threads = []
 22 | my_email = 'LanceLiang2018@163.com'
 23 | my_password = '1352040930wenku8'
 24 | 
 25 | 
 26 | def get_icon(email):
 27 |     return'https://s.gravatar.com/avatar/' + hashlib.md5(email.lower().encode()).hexdigest() + '?s=34'
 28 | 
 29 | 
 30 | def has_file(target):
 31 |     r = requests.get(target, stream=True)
 32 |     if int(r.status_code) == 200:
 33 |         return True
 34 |     return False
 35 | 
 36 | 
 37 | def file_size(target):
 38 |     r = requests.get(target, stream=True)
 39 |     if int(r.status_code) == 200:
 40 |         return int(r.headers['Content-Length'])
 41 |     return 0
 42 | 
 43 | 
 44 | def local_check(book_id):
 45 |     wk = Wenku8ToEpub()
 46 |     filename_ = wk.id2name(book_id) + '.epub'
 47 |     info = wk.book_info(book_id)
 48 |     if info is None:
 49 |         return '1'  # 需要更新
 50 |     # 检查上次上传时间
 51 |     last_time = v2_check_time(filename_)
 52 |     if last_time is None:
 53 |         return '1'
 54 |     last_time = last_time[:10]
 55 |     if last_time > info['update']:
 56 |         return '0'
 57 |     return '1'  # 需要更新
 58 | 
 59 | 
 60 | def send_email(user, email, message):
 61 |     # print(user, message)
 62 |     my_sender = 'LanceLiang2018@163.com'  # 发件人邮箱账号
 63 |     my_pass = '1352040930smtp'  # 发件人邮箱密码
 64 |     # my_user = '1352040930@qq.com'  # 收件人邮箱账号
 65 |     try:
 66 |         # print('try to send:', user)
 67 |         msg = MIMEText(message, 'plain', 'utf-8')
 68 |         msg['From'] = formataddr(["USER:%s" % user, my_sender])  # 括号里的对应发件人邮箱昵称、发件人邮箱账号
 69 |         msg['To'] = formataddr(['Lance Liang', my_sender])  # 括号里的对应收件人邮箱昵称、收件人邮箱账号
 70 |         msg['Subject'] = "来自 %s(%s) 的新消息" % (user, email)  # 邮件的主题，也可以说是标题
 71 | 
 72 |         server = smtplib.SMTP_SSL("smtp.163.com", 465)  # 发件人邮箱中的SMTP服务器，端口是465
 73 |         server.login(my_sender, my_pass)  # 括号中对应的是发件人邮箱账号、邮箱密码
 74 |         server.sendmail(my_sender, [my_sender, ], msg.as_string())  # 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
 75 |         server.quit()  # 关闭连接
 76 |     except Exception as e:
 77 |         print(e)
 78 |         error_report.report_it(e)
 79 | 
 80 | 
 81 | def send_email_2(user, email, message):
 82 |     my_sender = 'LanceLiang2018@163.com'  # 发件人邮箱账号
 83 |     my_pass = '1352040930smtp'  # 发件人邮箱密码
 84 |     # my_user = '1352040930@qq.com'  # 收件人邮箱账号
 85 |     try:
 86 |         # print('try to send:', user)
 87 |         msg = MIMEText(message, 'plain', 'utf-8')
 88 |         msg['From'] = formataddr(["Lance Liang", my_sender])  # 括号里的对应发件人邮箱昵称、发件人邮箱账号
 89 |         msg['To'] = formataddr(['%s' % user, email])  # 括号里的对应收件人邮箱昵称、收件人邮箱账号
 90 |         msg['Subject'] = "Re:您在wenku8.herokuapp.com的反馈"  # 邮件的主题，也可以说是标题
 91 | 
 92 |         server = smtplib.SMTP_SSL("smtp.163.com", 465)  # 发件人邮箱中的SMTP服务器，端口是465
 93 |         server.login(my_sender, my_pass)  # 括号中对应的是发件人邮箱账号、邮箱密码
 94 |         server.sendmail(my_sender, [email, ], msg.as_string())  # 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
 95 |         server.quit()  # 关闭连接
 96 |     except Exception as e:
 97 |         print(e)
 98 |         error_report.report_it(e)
 99 | 
100 | 
101 | @app.route('/', methods=['GET'])
102 | def index():
103 |     # return '<a href="https://github.com/LanceLiang2018/Wenku8ToEpub-Online">' \
104 |     #        'https://github.com/LanceLiang2018/Wenku8ToEpub-Online</a>'
105 |     local = False
106 |     if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
107 |         local = True
108 |     urls = make_urls()
109 |     return render_template('index.html', local=local, urls=urls)
110 | 
111 | 
112 | @app.route('/bookinfo/<int:book_id>', methods=['GET'])
113 | def get_bookinfo(book_id: int):
114 |     wk = Wenku8ToEpub()
115 |     filename_ = wk.id2name(book_id) + '.epub'
116 |     info = wk.book_info(book_id)
117 |     if info is None:
118 |         return json.dumps({})
119 |     # 检查上次上传时间
120 |     last_time = v2_check_time(filename_)
121 |     info['update_time'] = last_time
122 |     return json.dumps(info)
123 | 
124 | 
125 | @app.route('/bookinfo_dmzj/<int:book_id>', methods=['GET'])
126 | def get_dmzj_bookinfo(book_id: int):
127 |     de = Dmzj2Epub()
128 |     info = de.info(book_id)
129 |     filename_ = 'dmzj_%s.epub' % info['name']
130 |     if info is None:
131 |         return json.dumps({})
132 |     # 检查上次上传时间
133 |     last_time = v2_check_time(filename_)
134 |     info['update_time'] = last_time
135 |     return json.dumps(info)
136 | 
137 | 
138 | @app.route('/v2/check/<book_id>', methods=['GET'])
139 | def v2_check(book_id):
140 |     book_id = str(book_id)
141 |     if book_id.startswith('dmzj_'):
142 |         # 没有动态获取，直接返回“需要”
143 |         de = Dmzj2Epub()
144 |         try:
145 |             book_id = int(book_id.split('dmzj_')[-1])
146 |         except ValueError:
147 |             return '1'
148 |         info = de.info(book_id)
149 |         if info is None:
150 |             return '1'
151 |         # 检查上次上传时间
152 |         filename_ = 'dmzj_%s.epub' % info['name']
153 |         last_time = v2_check_time(filename_)
154 |         if last_time is None:
155 |             return '1'
156 |         last_time = last_time[:10]
157 |         update = list(time.localtime(info['last_update_time']))
158 |         update_time = '%s-%s-%s' % (update[0], update[1], update[2])
159 |         if last_time > update_time:
160 |             return '0'
161 |         return '1'  # 需要更新
162 |     try:
163 |         book_id = int(book_id)
164 |     except ValueError:
165 |         return '1'
166 |     wk = Wenku8ToEpub()
167 |     filename_ = wk.id2name(book_id) + '.epub'
168 |     info = wk.book_info(book_id)
169 |     if info is None:
170 |         return '1'  # 需要更新
171 |     # 检查上次上传时间
172 |     last_time = v2_check_time(filename_)
173 |     if last_time is None:
174 |         return '1'
175 |     last_time = last_time[:10]
176 |     if last_time > info['update']:
177 |         return '0'
178 |     return '1'  # 需要更新
179 | 
180 | 
181 | @app.route('/v2/search/<string:key>', methods=['GET'])
182 | def v2_search(key: str):
183 |     wk = Wenku8ToEpub()
184 |     results = wk.search(key)
185 |     return json.dumps(results)
186 | 
187 | 
188 | @app.route('/v2_dmzj/search/<string:key>', methods=['GET'])
189 | def v2_dmzj_search(key: str):
190 |     de = Dmzj2Epub()
191 |     results = de.search(key)
192 |     return json.dumps(results)
193 | 
194 | 
195 | @app.route('/v2/name/<string:book_name>')
196 | def v2_jump_by_name(book_name):
197 |     filename = "%s.epub" % book_name
198 |     target = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
199 |     if has_file(target):
200 |         return target
201 |     return ''
202 | 
203 | 
204 | @app.route('/v2/cache/<int:book_id>')
205 | def v2_cache(book_id: int, image=False):
206 |     wk = Wenku8ToEpub()
207 |     filename_ = wk.id2name(book_id)
208 |     if filename_ == '':
209 |         return '1'
210 |     for t in threads:
211 |         if t['bid'] == book_id:
212 |             return '2'
213 |     mlogger = MLogger()
214 |     th = threading.Thread(target=v2_work, args=(book_id, None, mlogger, image))
215 |     th.setDaemon(True)
216 |     th.start()
217 |     # filename = "%s.epub" % filename_
218 |     # url = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
219 |     threads.append({
220 |         'bid': book_id,
221 |         'th': th,
222 |         'messages': mlogger,
223 |         # 'result': url
224 |     })
225 |     # url = work(book_id)
226 |     return '0'
227 | 
228 | 
229 | @app.route('/v2_dmzj/cache/<int:book_id>')
230 | def v2_dmzj_cache(book_id: int, image=False):
231 |     de = Dmzj2Epub()
232 |     info = de.info(book_id)
233 |     if info is None:
234 |         return '1'
235 |     for t in threads:
236 |         if t['bid'] == book_id:
237 |             return '2'
238 |     mlogger = MLogger()
239 |     th = threading.Thread(target=v2_dmzj_work, args=(book_id, None, mlogger, image))
240 |     th.setDaemon(True)
241 |     th.start()
242 |     # filename = "%s.epub" % filename_
243 |     # url = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
244 |     threads.append({
245 |         'bid': 'dmzj_' + str(book_id),
246 |         'th': th,
247 |         'messages': mlogger,
248 |         # 'result': url
249 |     })
250 |     # url = work(book_id)
251 |     return '0'
252 | 
253 | 
254 | @app.route('/v2/cache_img/<int:book_id>')
255 | def v2_cache_img(book_id: int):
256 |     return v2_cache(book_id, image=True)
257 | 
258 | 
259 | @app.route('/v2_dmzj/cache_img/<int:book_id>')
260 | def v2_dmzj_cache_img(book_id: int):
261 |     return v2_dmzj_cache(book_id, image=True)
262 | 
263 | 
264 | @app.route('/v2/cache_status/<book_id>')
265 | def v2_cache_status(book_id):
266 |     book_id = str(book_id)
267 |     for t in threads:
268 |         if t['bid'] == book_id:
269 |             if t['th'].isAlive():
270 |                 return '0'
271 |             else:
272 |                 # url = t['result']
273 |                 threads.remove(t)
274 |                 url = th_results.get(str(book_id))
275 |                 if url is None:
276 |                     return '1'
277 |                 return url
278 |     return '1'
279 | 
280 | 
281 | @app.route('/v2/cache_logs/<book_id>')
282 | def v2_cache_logs(book_id):
283 |     book_id = str(book_id)
284 |     for t in threads:
285 |         if t['bid'] == book_id:
286 |             data = t['messages'].read_all()
287 |             return data
288 |     return ''
289 | 
290 | 
291 | @app.route('/v2/get/<int:book_id>')
292 | def v2_get(book_id: int):
293 |     wk = Wenku8ToEpub()
294 |     filename_ = wk.id2name(book_id)
295 |     if filename_ == '':
296 |         return ''
297 |     filename = "%s.epub" % filename_
298 |     filename = urllib.parse.quote(filename)
299 |     target = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
300 |     if has_file(target):
301 |         return target
302 |     return ''
303 | 
304 | 
305 | @app.route('/v2_dmzj/get/<int:book_id>')
306 | def v2_dmzj_get(book_id: int):
307 |     de = Dmzj2Epub()
308 |     info = de.info(book_id)
309 |     if info is None:
310 |         return ''
311 |     filename_ = 'dmzj_%s' % info['name']
312 |     filename = "%s.epub" % filename_
313 |     filename = urllib.parse.quote(filename)
314 |     target = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
315 |     if has_file(target):
316 |         return target
317 |     return ''
318 | 
319 | 
320 | @app.route('/v2/comments', methods=['GET'])
321 | def v2_comments():
322 |     data = db.get_comments(show_email=False)
323 |     return json.dumps(data)
324 | 
325 | 
326 | @app.route('/v2/feedback', methods=['POST'])
327 | def v2_feedback():
328 |     form = dict(request.form)
329 |     message = form.get('message', '')
330 |     user = form.get('user', '')
331 |     email = form.get('email', '')
332 |     password = form.get('password', '')
333 |     head = get_icon(email)
334 |     logger.info(str((user, email, message, password)))
335 |     if len(password) > 0:
336 |         if password == my_password:
337 |             # 老子是管理员，给别人发消息，user是名字。
338 |             target_email = db.find_email(user)
339 |             if '' == target_email:
340 |                 return '邮箱查找失败'
341 |             send_email_2(user, target_email, message)
342 |             db.put_comment('Lance->@%s' % user, my_email, message, head)
343 |             return '管理员操作成功'
344 |         else:
345 |             return '管理员密码错误'
346 |     else:
347 |         send_email(user, email, message)
348 |         db.put_comment(user, email, message, head)
349 |         pass
350 |     return ''
351 | 
352 | 
353 | @app.route('/v2/visitors')
354 | def v2_visitors():
355 |     api = 'https://api.baidu.com/json/tongji/v1/ReportService/getData'
356 |     r = requests.post(api)
357 |     return '0'
358 | 
359 | 
360 | @app.route('/cache/<int:book_id>')
361 | def cache(book_id: int):
362 |     wk = Wenku8ToEpub()
363 |     filename_ = wk.id2name(book_id)
364 |     if filename_ == '':
365 |         return '没有这个小说！'
366 |     url = work(book_id)
367 |     return redirect(url)
368 | 
369 | 
370 | @app.route('/cache_img/<int:book_id>')
371 | def cache_img(book_id: int):
372 |     wk = Wenku8ToEpub()
373 |     filename_ = wk.id2name(book_id)
374 |     if filename_ == '':
375 |         return '没有这个小说！'
376 |     url = work4(book_id)
377 |     return redirect(url)
378 | 
379 | 
380 | @app.route('/no_cache/<int:book_id>')
381 | def no_cache(book_id: int):
382 |     wk = Wenku8ToEpub()
383 |     filename_ = wk.id2name(book_id)
384 |     if filename_ == '':
385 |         return '没有这个小说！'
386 | 
387 |     data = work3(book_id)
388 |     fp = io.BytesIO(data)
389 | 
390 |     # urlencode方案
391 |     # filename_ = urllib.parse.urlencode({'': filename_})[1:] + '.epub'
392 |     # latin-1 方案
393 | 
394 |     filename_ = ("%s.epub" % filename_).encode().decode('latin-1')
395 |     response = make_response(send_file(fp, attachment_filename="%s" % filename_))
396 |     response.headers["Content-Disposition"] = "attachment; filename=%s;" % filename_
397 |     return response
398 | 
399 |     # url = work3(book_id)
400 |     # return redirect(url)
401 | 
402 | 
403 | @app.route('/get/<int:book_id>')
404 | def get(book_id: int):
405 |     wk = Wenku8ToEpub()
406 |     filename_ = wk.id2name(book_id)
407 |     if filename_ == '':
408 |         return '没有这个小说！'
409 |     filename = "%s.epub" % filename_
410 |     return redirect('https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename)
411 | 
412 | 
413 | @app.route('/name/<string:book_name>')
414 | def jump_by_name(book_name: str):
415 |     filename = "%s.epub" % book_name
416 |     return redirect('https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename)
417 | 
418 | 
419 | @app.route('/search')
420 | def search():
421 |     args0 = dict(request.args)
422 |     args = {}
423 |     for arg in args0:
424 |         v = args0[arg]
425 |         if type(v) is list:
426 |             args[arg] = v[0]
427 |         else:
428 |             args[arg] = v
429 |     # print(args)
430 |     method = args['method']
431 |     search_key = args['search_key']
432 |     bid = None
433 |     if method == 'id':
434 |         try:
435 |             bid = int(search_key)
436 |         except ValueError:
437 |             return "ID输入错误！"
438 |         return redirect('/get/%s' % bid)
439 |     elif method == 'name':
440 |         return redirect('/name/%s' % search_key)
441 |     elif method == 'cache':
442 |         return redirect('/cache/%s' % search_key)
443 |     elif method == 'cache_img':
444 |         return redirect('/cache_img/%s' % search_key)
445 |     else:
446 |         return '参数不正确'
447 | 
448 | 
449 | @app.route('/favicon.ico', methods=['GET'])
450 | def favicon():
451 |     return redirect('/static/favicon.ico')
452 | 
453 | 
454 | @app.route('/baidu_verify_kBBfcDGnTX.html', methods=['GET'])
455 | def baidu_verify():
456 |     return 'kBBfcDGnTX'
457 | 
458 | 
459 | @app.route('/chat/<string:text>', methods=['GET', 'POST'])
460 | def server_chat(text):
461 |     try:
462 |         # 先下载
463 |         headers_data = requests.get('https://cdn-1254016670.cos.ap-chengdu.myqcloud.com/headers.txt').content
464 |         with open('headers.txt', 'wb') as f:
465 |             f.write(headers_data)
466 |         response = xiaoice.chat(text)
467 |         # 然后上传
468 |         with open('headers.txt', 'rb') as f:
469 |             response1 = client2.put_object(
470 |                 Bucket=bucket2,
471 |                 Body=f.read(),
472 |                 # Key=filename_md5,
473 |                 Key="headers.txt",
474 |                 StorageClass='STANDARD',
475 |                 EnableMD5=False
476 |             )
477 |             logger.info(str(response1))
478 |     except Exception as e:
479 |         result = {
480 |             'data': '',
481 |             'other': str(e),
482 |             'code': 1
483 |         }
484 |         return 'foo(' + json.dumps(result) + ')'
485 |     result = {
486 |         'data': response,
487 |         'other': response1,
488 |         'code': 0
489 |     }
490 |     return 'foo(' + json.dumps(result) + ')'
491 | 
492 | 
493 | if __name__ == '__main__':
494 |     # os.environ['WENKU8_LOCAL'] = "True"
495 |     app.run("0.0.0.0", port=int(os.environ.get('PORT', '8000')), debug=False)
496 | 
497 | 


--------------------------------------------------------------------------------
/manager.py:
--------------------------------------------------------------------------------
  1 | from qcloud_cos import CosConfig
  2 | from qcloud_cos import CosS3Client
  3 | from qcloud_cos import CosClientError
  4 | import sys
  5 | import getopt
  6 | import json
  7 | import base_logger
  8 | from tqdm import *
  9 | from wenku8toepub import Wenku8ToEpub, lock, MLogger, logger
 10 | from dmzj2epub import Dmzj2Epub
 11 | import requests
 12 | import threading
 13 | import urllib.parse
 14 | import os
 15 | import io
 16 | import asyncio
 17 | 
 18 | # logger = base_logger.getLogger()
 19 | th_results = {}
 20 | 
 21 | # 向服务器请求密码
 22 | logger.info('正在获取密码...')
 23 | password = '1352040930'
 24 | 
 25 | import base64
 26 | password_data = json.loads(base64.b64decode("ewogICAgImNvZGUiOiAwLAogICAgImlkIjogIkFLSUQyc1RxenZYN05QQ3JIUlAxUmVjS24wMG1KYmZVT01RRSIsCiAgICAia2V5IjogImlCT001WW1rNUM1anZzWjBEQXJJVE85ZXV1ZkNhbWtUIgp9").decode())
 27 | if not password_data['code'] == 0:
 28 |     logger.error('密码无效！进入只读模式！')
 29 | logger.info('密码正确！')
 30 | 
 31 | secret_id = password_data['id']
 32 | secret_key = password_data['key']
 33 | region = 'ap-guangzhou'
 34 | region2 = 'ap-chengdu'
 35 | 
 36 | # NO提高超时时间
 37 | # config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key, Timeout=120)
 38 | config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key)
 39 | config2 = CosConfig(Region=region2, SecretId=secret_id, SecretKey=secret_key)
 40 | # 2. 获取客户端对象
 41 | # NO增大重试次数
 42 | # client = CosS3Client(config, retry=5)
 43 | client = CosS3Client(config)
 44 | client2 = CosS3Client(config2)
 45 | 
 46 | bucket = 'light-novel-1254016670'
 47 | bucket2 = 'cdn-1254016670'
 48 | 
 49 | 
 50 | str_jump = '''<head><meta http-equiv="refresh" content="5;url=%s"></head>'''
 51 | 
 52 | 
 53 | def work2(book_id: int, filename: str = None):
 54 |     wk = Wenku8ToEpub()
 55 |     if filename is None:
 56 |         filename_ = wk.id2name(book_id)
 57 |         if filename == '':
 58 |             return
 59 |         filename = "%s.epub" % filename_
 60 |     response = client.put_object(
 61 |         Bucket=bucket,
 62 |         Body=(str_jump % filename).encode('gbk'),
 63 |         # Body=(str_jump % ("https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/" + urllib.quote(filename))).encode('utf-8'),
 64 |         # Key=filename_md5,
 65 |         Key="%s.html" % (book_id, ),
 66 |         StorageClass='STANDARD',
 67 |         EnableMD5=False
 68 |     )
 69 |     logger.info("%s OK." % filename)
 70 | 
 71 | 
 72 | def work(book_id: int, filename: str = None):
 73 |     wk = Wenku8ToEpub()
 74 |     if filename is None:
 75 |         filename_ = wk.id2name(book_id)
 76 |         if filename == '':
 77 |             return
 78 |         filename = "%s.epub" % filename_
 79 |     data = wk.get_book(book_id, bin_mode=True, fetch_image=False)
 80 |     response1 = client.put_object(
 81 |         Bucket=bucket,
 82 |         Body=data,
 83 |         # Key=filename_md5,
 84 |         Key="%s" % (filename, ),
 85 |         StorageClass='STANDARD',
 86 |         EnableMD5=False
 87 |     )
 88 |     # response2 = client.put_object(
 89 |     #     Bucket=bucket,
 90 |     #     Body=(str_jump % filename).encode('gbk'),
 91 |     #     # Key=filename_md5,
 92 |     #     Key="%s.html" % (book_id, ),
 93 |     #     StorageClass='STANDARD',
 94 |     #     EnableMD5=False
 95 |     # )
 96 |     # logger.info("%s OK. %s %s" % (filename, str(response1), str(response2)))
 97 |     logger.info("%s OK. %s" % (filename, str(response1)))
 98 |     return 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
 99 | 
100 | 
101 | def work3(book_id: int, filename: str = None):
102 |     wk = Wenku8ToEpub()
103 |     if filename is None:
104 |         filename_ = wk.id2name(book_id)
105 |         if filename == '':
106 |             return
107 |         filename = "%s.epub" % filename_
108 |     data = wk.get_book(book_id, bin_mode=True, fetch_image=True)
109 |     # response1 = client.put_object(
110 |     #     Bucket=bucket,
111 |     #     Body=data,
112 |     #     # Key=filename_md5,
113 |     #     Key="%s" % (filename, ),
114 |     #     StorageClass='STANDARD',
115 |     #     EnableMD5=False
116 |     # )
117 |     # response2 = client.put_object(
118 |     #     Bucket=bucket,
119 |     #     Body=(str_jump % filename).encode('gbk'),
120 |     #     # Key=filename_md5,
121 |     #     Key="%s.html" % (book_id, ),
122 |     #     StorageClass='STANDARD',
123 |     #     EnableMD5=False
124 |     # )
125 |     # logger.info("%s OK. %s %s" % (filename, str(response1), str(response2)))
126 |     # return 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
127 |     logger.info("%s OK。(No Cache.)" % (filename,))
128 |     return data
129 | 
130 | 
131 | def work4(book_id: int, filename: str = None):
132 |     wk = Wenku8ToEpub()
133 |     if filename is None:
134 |         filename_ = wk.id2name(book_id)
135 |         if filename == '':
136 |             return
137 |         filename = "%s.epub" % filename_
138 |     data = wk.get_book(book_id, bin_mode=True, fetch_image=True)
139 |     response1 = client.put_object(
140 |         Bucket=bucket,
141 |         Body=data,
142 |         # Key=filename_md5,
143 |         Key="%s" % (filename, ),
144 |         StorageClass='STANDARD',
145 |         EnableMD5=False
146 |     )
147 |     response2 = client.put_object(
148 |         Bucket=bucket,
149 |         Body=(str_jump % filename).encode('gbk'),
150 |         # Key=filename_md5,
151 |         Key="%s.html" % (book_id, ),
152 |         StorageClass='STANDARD',
153 |         EnableMD5=False
154 |     )
155 |     logger.info("%s OK. %s %s" % (filename, str(response1), str(response2)))
156 |     return 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
157 |     # logger.info("%s OK。(No Cache.)" % (filename,))
158 |     # return data
159 | 
160 | 
161 | def my_upload_file(key, data):
162 |     # 最后尝试
163 |     data.seek(0)
164 |     client.upload_file_from_buffer(
165 |         Bucket=bucket,
166 |         Body=data,
167 |         # Key=filename_md5,
168 |         Key=key,
169 |         StorageClass='STANDARD',
170 |         # PartSize=1,
171 |         # MAXThread=10
172 |     )
173 | 
174 | 
175 | def v2_work(book_id: int, filename: str = None, mlogger=None, image=False):
176 |     wk = Wenku8ToEpub()
177 |     if filename is None:
178 |         filename_ = wk.id2name(book_id)
179 |         if filename == '':
180 |             return
181 |         filename = "%s.epub" % filename_
182 |     # 设置最大图像规模为3MB
183 |     if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
184 |         image_size = None
185 |     else:
186 |         image_size = 3 * 1024 * 1024
187 |     data = wk.get_book(book_id, bin_mode=True, fetch_image=image, mlogger=mlogger, image_size=image_size)
188 |     mlogger.info('小说获取完毕，准备上传到腾讯云...')
189 |     try:
190 |         if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
191 |             response1 = client.put_object(
192 |                 Bucket=bucket,
193 |                 Body=data,
194 |                 # Key=filename_md5,
195 |                 Key="%s" % (filename,),
196 |                 StorageClass='STANDARD',
197 |                 EnableMD5=False
198 |             )
199 |         else:
200 |             raise CosClientError("腾讯云上传取消。")
201 |         # 小心内存过大
202 |     except Exception as e:
203 |         mlogger.warn("%s 腾讯云上传错误，准备直接返回临时下载链接..." % str(e))
204 |         # 保存到本地
205 |         with open('static/%s' % filename, 'wb') as f:
206 |             f.write(data)
207 |         filename = urllib.parse.quote(filename)
208 |         url = '/static/%s' % filename
209 |         lock.acquire()
210 |         th_results[str(book_id)] = url
211 |         lock.release()
212 |         # 再开个线程再次尝试上传
213 |         # threading.Thread(target=my_upload_file, args=("%s" % (filename,), bio)).start()
214 |         return url
215 |     mlogger.info("%s OK. %s" % (filename, str(response1)))
216 |     if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
217 |         with open('static/%s' % filename, 'wb') as f:
218 |             f.write(data)
219 |         filename = urllib.parse.quote(filename)
220 |         url = '/static/%s' % filename
221 |     else:
222 |         filename = urllib.parse.quote(filename)
223 |         url = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
224 |     lock.acquire()
225 |     th_results[str(book_id)] = url
226 |     lock.release()
227 |     return url
228 | 
229 | 
230 | def v2_dmzj_work(book_id: int, filename: str = None, mlogger=None, image=False):
231 |     de = Dmzj2Epub(logger=mlogger)
232 |     if filename is None:
233 |         info = de.info(book_id)
234 |         if info is None:
235 |             return
236 |         filename_ = info['name']
237 |         filename = "dmzj_%s.epub" % filename_
238 |     # 设置最大图像规模为3MB
239 |     if os.environ.get('WENKU8_LOCAL', 'False') != 'True':
240 |         image = False
241 |     data = asyncio.run(de.download_book(book_id, fetch_image=image))
242 |     mlogger.info('小说获取完毕，准备上传到腾讯云...')
243 |     try:
244 |         if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
245 |             response1 = client.put_object(
246 |                 Bucket=bucket,
247 |                 Body=data,
248 |                 # Key=filename_md5,
249 |                 Key="%s" % (filename,),
250 |                 StorageClass='STANDARD',
251 |                 EnableMD5=False
252 |             )
253 |         else:
254 |             raise CosClientError("腾讯云上传取消。")
255 |         # 小心内存过大
256 |     except Exception as e:
257 |         mlogger.warn("%s 腾讯云上传错误，准备直接返回临时下载链接..." % str(e))
258 |         # 保存到本地
259 |         with open('static/%s' % filename, 'wb') as f:
260 |             f.write(data)
261 |         filename = urllib.parse.quote(filename)
262 |         url = '/static/%s' % filename
263 |         lock.acquire()
264 |         th_results['dmzj_' + str(book_id)] = url
265 |         lock.release()
266 |         # 再开个线程再次尝试上传
267 |         # threading.Thread(target=my_upload_file, args=("%s" % (filename,), bio)).start()
268 |         return url
269 |     mlogger.info("%s OK. %s" % (filename, str(response1)))
270 |     if os.environ.get('WENKU8_LOCAL', 'False') == 'True':
271 |         with open('static/%s' % filename, 'wb') as f:
272 |             f.write(data)
273 |         filename = urllib.parse.quote(filename)
274 |         url = '/static/%s' % filename
275 |     else:
276 |         filename = urllib.parse.quote(filename)
277 |         url = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com/%s' % filename
278 |     lock.acquire()
279 |     th_results['dmzj_' + str(book_id)] = url
280 |     lock.release()
281 |     return url
282 | 
283 | 
284 | def v2_check_time(key):
285 |     response = client.list_objects(
286 |         Bucket=bucket,
287 |         Prefix=key
288 |     )
289 |     if 'Contents' not in response or len(response['Contents']) == 0:
290 |         return None
291 |     return response['Contents'][0]['LastModified']
292 | 
293 | 
294 | def make_urls():
295 |     method = 'GET'
296 |     # 30分钟有效
297 |     expired = 30 * 60
298 |     req = {
299 |         'static-1254016670': ['wk8local.exe', 'wenku8toepub.exe', 'Wenku8下载_1.1.apk',
300 |                                          '网易云音乐下载器_1.2.apk', '方寸之间_2.31.apk']
301 |     }
302 |     urls = []
303 |     for r in req:
304 |         for k in req[r]:
305 |             urls.append(client2.get_presigned_download_url(
306 |                 Bucket=r,
307 |                 Key=k,
308 |                 Expired=expired
309 |             ))
310 |     # 更换为百度云链接
311 |     urls[0] = 'https://pan.baidu.com/s/1FljnyZQK2VdeZIl-kd90lw'
312 |     urls[1] = 'https://pan.baidu.com/s/1FljnyZQK2VdeZIl-kd90lw'
313 |     return urls
314 | 
315 | 
316 | if __name__ == '__main__':
317 |     opts, args = getopt.getopt(sys.argv[1:], '-s:-e:-b', [])
318 |     start = 1
319 |     end = 3000
320 |     for name, val in opts:
321 |         if name == '-s':
322 |             try:
323 |                 start = int(val)
324 |             except ValueError as e:
325 |                 logger.error(str(e))
326 |                 sys.exit()
327 |         if name == '-e':
328 |             try:
329 |                 end = int(val)
330 |             except ValueError as e:
331 |                 logger.error(str(e))
332 |                 sys.exit()
333 |         if name == '-b':
334 |             for _book_id in trange(start, end + 1, 1):
335 |                 try:
336 |                     work2(_book_id)
337 |                 except Exception as e:
338 |                     logger.critical(str(e))
339 |             sys.exit()
340 | 
341 |     for _book_id in trange(start, end + 1, 1):
342 |         try:
343 |             work(_book_id)
344 |         except Exception as e:
345 |             logger.critical(str(e))


--------------------------------------------------------------------------------
/opds/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | .pyc


--------------------------------------------------------------------------------
/opds/Config.py:
--------------------------------------------------------------------------------
 1 | # coding: UTF-8
 2 | 
 3 | __author__ = 'lei'
 4 | 
 5 | # #############################
 6 | # root for opds server website
 7 | # SITE_URL = "http://10.10.113.237:5000"
 8 | # SITE_URL = "http://opds.cockybook.com"
 9 | SITE_URL = '/opds'
10 | # SITE_URL = 'http://192.168.43.203:10086'
11 | # SITE_URL = 'https://light-opds.herokuapp.com'
12 | SITE_TITLE = "Light Novels OPDS Site"
13 | SITE_EMAIL = "LanceLiang2018@163.com"
14 | SITE_BOOK_LIST = SITE_URL + "/list"
15 | 
16 | # for local filesyste
17 | base = "/home/lance/Books"
18 | 
19 | # Used In opdscore.py
20 | # filesyste_type = 'LocalFileSystem'
21 | # filesyste_type = 'QiniuFileSystem'
22 | filesyste_type = 'TencentFileSystem'
23 | # filesyste_type = 'LocalMetadataFileSystem'
24 | 
25 | # download URL is SITE_BOOK_DONWLOAD/$path/$filename.$postfix
26 | # SITE_BOOK_DONWLOAD = 'http://7sbqcs.com1.z0.glb.clouddn.com'
27 | if filesyste_type == 'TencentFileSystem':
28 |     SITE_BOOK_DONWLOAD = 'https://light-novel-1254016670.cos.ap-guangzhou.myqcloud.com'
29 | else:
30 |     SITE_BOOK_DONWLOAD = 'http://192.168.43.203:10086/static/Books'
31 | 
32 | 
33 | description = u"""
34 |      OPDS 标准核心功能是支持 EPUB 标准和基于 Atom XML 的目录格式.
35 | 可以使用阅读器进行在线书库添加，比如FBReader、静读天下（Moon+ Reader）、Aldiko、Stanza等等.
36 | 添加地址为:   %s
37 | （轻小说书源提供&修改代码by LanceLiang2018@163.com）
38 | """ % SITE_URL
39 | 


--------------------------------------------------------------------------------
/opds/Const.py:
--------------------------------------------------------------------------------
 1 | # coding: UTF-8
 2 | __author__ = 'lei'
 3 | 
 4 | id = "id"
 5 | title = "title"
 6 | updated = "updated"
 7 | icon = "icon"
 8 | author = "author"
 9 | link = "link"
10 | description = "description"
11 | search = 'search'
12 | 
13 | #####################
14 | 
15 | entry = "entry"
16 | entry_title = "title"
17 | entry_link = "link"
18 | entry_updated = "updated"
19 | entry_id = "id"
20 | entry_type = "type"
21 | entry_content = "content"
22 | 
23 | ##############
24 | book_type_pdf = "application/pdf"
25 | book_type_epub = "application/epub+zip"
26 | book_type_mobi = "application/x-mobipocket-ebook"
27 | ##book page
28 | book_type_picture = "image/jpeg;image/png"
29 | ##html open
30 | book_type_html = "text/html"
31 | book_type_text = "text/plain"
32 | book_type_content = "text"
33 | book_type_entry_catalog = "application/atom+xml;type=entry;profile=opds-catalog"
34 | 
35 | ######book_link_type
36 | book_link_rel_subsection = "subsection"
37 | book_link_rel_image = "http://opds-spec.org/image"
38 | book_link_rel__image_thumbnail = "http://opds-spec.org/image/thumbnail"
39 | book_link_rel__acquisition = "http://opds-spec.org/acquisition"
40 | 


--------------------------------------------------------------------------------
/opds/Procfile:
--------------------------------------------------------------------------------
1 | web: python opdsserver.py
2 | 


--------------------------------------------------------------------------------
/opds/Procfile.windows:
--------------------------------------------------------------------------------
1 | web: python opdsserver.py
2 | 


--------------------------------------------------------------------------------
/opds/README.MD:
--------------------------------------------------------------------------------
 1 | #cockybook
 2 | 演示地址: [http://opds.cockybook.com/][2]
 3 | 
 4 | OPDS 标准核心功能是支持 EPUB 标准和基于 Atom XML 的目录格式. 可以使用阅读器进行在线书库添加，比如FBReader、静读天下（Moon+ Reader）、Aldiko、Stanza等等
 5 | 
 6 | ##cockybook简介
 7 | cockybook是python开发的一个opds server 的简易书籍共享服务。
 8 | 使用python Flask实现。
 9 | 他的数据源可以是本地存储，也可以是云存储如百度网盘、七牛云存储。当然你可以很方便的自定义实现自己的数据源接口。
10 | 
11 | ##What is OPDS？
12 | OPDS全称是Open Publication Distribution System开放式出版发布系统，使用 Atom 格式，意在为电子书在线目录建立一个公开标准。OPDS 将 RSS 信息源，替换为电子书目录，包括链接到书籍封面和简短摘要的可选链接。使用 OPDS，用户无需到处点击链接，通过电子书应用程序，只需订阅并搜索这些目录，然后就可以将电子书下载到 ebook 阅读器中，不需要再使用浏览器或其他应用程序。
13 | OPDS 标准核心功能是支持 EPUB 标准和基于 Atom XML 的目录格式。
14 | 
15 | ##OPDS协议
16 | 
17 | 可以参考博客：[http://www.cockybook.com/?p=159][3]
18 | 
19 | ##入口
20 | `opdsserver.py`
21 | 可以直接启动`opdsserver.py`
22 | 这里你可以找到:
23 | 
24 | ```
25 | if __name__ == "__main__":
26 | 
27 |     logging.basicConfig(level=logging.DEBUG,
28 |                 format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
29 |                 )
30 |     app.debug = True
31 |     app.run(host='0.0.0.0')
32 | ```
33 | 
34 | ##程序结构
35 | 文件系统接口，你好可以自由扩展。
36 | `opdscore.py` 中主要类：
37 | 这个类描述需要实现的主要接口。
38 | ```
39 | class OpdsProtocol:
40 |     def listBooks(self, path):
41 |         return ("No Realized")
42 |         pass
43 |     def dowloadBook(self, path):
44 |         return ("No Realized")
45 |         pass
46 |     def showhtml(self):
47 |         return ("No Realized")
48 |         pass
49 | ```
50 | 
51 | `filesystem.py`
52 | 你可以在这个类中实现你的数据源文件操作。
53 | 目前实现的有LocalFileSystem、QiniuFileSystem。
54 | 
55 | ```
56 | class LocalMetadataFileSystem(FileSystem):
57 | class QiniuFileSystem(FileSystem):
58 | ```
59 | 
60 | 
61 | ##部署
62 | 1. 目前可以在[sinaapp][1]中直接部署。index.wsgi 已经写好。
63 | 2. 可以直接运行`opdsserver.py`进行发布。
64 | 
65 | 
66 | [1]: http://sinaapp.com/
67 | [2]: http://opds.cockybook.com/
68 | [3]: http://www.cockybook.com/?p=159


--------------------------------------------------------------------------------
/opds/app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Start on Heroku: Python",
 3 |   "description": "A barebones Python app, which can easily be deployed to Heroku.",
 4 |   "image": "heroku/python",
 5 |   "repository": "https://github.com/heroku/python-getting-started",
 6 |   "keywords": ["python", "django" ],
 7 |   "addons": [ "heroku-postgresql" ],
 8 |   "env": {
 9 |     "SECRET_KEY": {
10 |       "description": "The secret key for the Django application.",
11 |       "generator": "secret"
12 |     }
13 |   },
14 |   "environments": {
15 |     "test": {
16 |       "scripts": {
17 |         "test-setup": "python manage.py collectstatic --noinput",
18 |         "test": "python manage.py test"
19 |       }
20 |     }
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/opds/config.yaml:
--------------------------------------------------------------------------------
1 | name: opdscockybook
2 | version: 1


--------------------------------------------------------------------------------
/opds/filesystem.py:
--------------------------------------------------------------------------------
  1 | # coding: UTF-8
  2 | import logging
  3 | 
  4 | import os
  5 | # import urllib2
  6 | import requests
  7 | import opds.Config as Config
  8 | import json
  9 | 
 10 | from opds.utils import connect_path, getFile
 11 | bookdata = {}
 12 | 
 13 | from qcloud_cos import CosConfig
 14 | from qcloud_cos import CosS3Client
 15 | 
 16 | 
 17 | __author__ = 'lei'
 18 | __author2__ = 'Lance'
 19 | 
 20 | 
 21 | # base="/home/cocky"
 22 | class FileSystem:
 23 |     def outErr(self):
 24 |         logging.error("No Realyzed")
 25 | 
 26 |     def exists(self, path):
 27 |         self.outErr()
 28 |         pass
 29 | 
 30 |     def isfile(self, path):
 31 |         self.outErr()
 32 |         pass
 33 | 
 34 |     def listdir(self, path):
 35 |         self.outErr()
 36 |         return []
 37 |         pass
 38 | 
 39 |     def getdownloadurl(self, path, name):
 40 |         self.outErr()
 41 |         return ""
 42 | 
 43 | 
 44 | class LocalFileSystem(FileSystem):
 45 |     """
 46 |     config the #Config.base
 47 |     """
 48 | 
 49 |     def __init__(self):
 50 |         self.path = ''
 51 | 
 52 |     def exists(self, path):
 53 |         if path is None:
 54 |             path = self.path
 55 |         return os.path.exists(connect_path(Config.base, path))
 56 | 
 57 |     def isfile(self, path):
 58 |         if path is None:
 59 |             path = self.path
 60 |         # print('isf', connect_path(Config.base, path))
 61 |         return os.path.isfile(connect_path(Config.base, path))
 62 | 
 63 |     def listdir(self, path):
 64 |         if path is None:
 65 |             path = self.path
 66 |         # print('listdir', os.listdir(connect_path(Config.base, path)))
 67 |         return os.listdir(connect_path(Config.base, path))
 68 | 
 69 |     def getdownloadurl(self, path, name):
 70 |         # print('down url:', connect_path(connect_path(Config.SITE_BOOK_DONWLOAD, path), name))
 71 |         # 这里有问题。已经修改
 72 |         return [connect_path(connect_path(Config.SITE_BOOK_DONWLOAD, path), name), ]
 73 | 
 74 | 
 75 | class LocalMetadataFileSystem(FileSystem):
 76 |     # q = Auth(Config.access_key, Config.secret_key)
 77 | 
 78 |     # bucket = BucketManager(q)
 79 |     def __init__(self):
 80 |         ff = open('metadata.json', 'r')
 81 | 
 82 |         self.book_trees = json.load(ff)
 83 | 
 84 |     def exists(self, path):
 85 |         files = getFile(self.book_trees, self.getTruePaths(path))
 86 |         return files != None
 87 | 
 88 |     def isfile(self, path):
 89 |         if path is None:
 90 |             return False
 91 |         # ???为啥放_-_
 92 |         if path.find('_-_') == -1:
 93 |             return False
 94 |         else:
 95 |             return True
 96 | 
 97 |     def listdir(self, path):
 98 |         paths = self.getTruePaths(path)
 99 | 
100 |         if len(paths) != 0:
101 |             return getFile(self.book_trees, paths)
102 |         else:
103 |             return self.book_trees
104 | 
105 |     def getTruePaths(self, tmp):
106 |         """
107 |         :param tmp:
108 |         :return:
109 |         """
110 |         paths = tmp.split('/')
111 |         paths = [p for p in paths if p != '']
112 |         return paths
113 | 
114 |     def getdownloadurl(self, path, name):
115 |         tmp = connect_path(path, name)
116 | 
117 |         files = getFile(self.book_trees, self.getTruePaths(tmp))
118 | 
119 |         return [connect_path(Config.SITE_BOOK_DONWLOAD, connect_path(path, ee)) for ee in files]
120 | 
121 | 
122 | class QiniuFileSystem(FileSystem):
123 |     # q = Auth(Config.access_key, Config.secret_key)
124 | 
125 |     # bucket = BucketManager(q)
126 |     def __init__(self):
127 |         # resp=urllib2.urlopen(connect_path(Config.SITE_BOOK_DONWLOAD,'metadata.json'))
128 |         resp = requests.get(connect_path(Config.SITE_BOOK_DONWLOAD, 'metadata.json'))
129 |         if resp.status_code == 200:
130 |             self.book_trees = json.loads(resp.text)
131 | 
132 |     def outErr(self):
133 |         logging.error("No Realyzed")
134 | 
135 |     def exists(self, path):
136 |         files = getFile(self.book_trees, self.getTruePaths(path))
137 |         # logging.info(len(files)!=0)
138 |         return len(files) != 0
139 | 
140 |     def isfile(self, path):
141 |         if path.find('_-_') == -1:
142 |             return False
143 |         else:
144 |             return True
145 | 
146 |     def listdir(self, path):
147 |         paths = self.getTruePaths(path)
148 | 
149 |         if len(paths) != 0:
150 |             return getFile(self.book_trees, paths)
151 |         else:
152 |             return self.book_trees
153 | 
154 |     def getTruePaths(self, tmp):
155 |         """
156 |         :param tmp:
157 |         :return:
158 |         """
159 |         paths = tmp.split('/')
160 |         paths = [p for p in paths if p != '']
161 |         return paths
162 | 
163 |     def getdownloadurl(self, path, name):
164 |         tmp = connect_path(path, name)
165 | 
166 |         files = getFile(self.book_trees, self.getTruePaths(tmp))
167 | 
168 |         return [connect_path(Config.SITE_BOOK_DONWLOAD, connect_path(path, ee)) for ee in files]
169 | 
170 | 
171 | class TencentFileSystem(FileSystem):
172 | 
173 |     def __init__(self):
174 |         # 向服务器请求密码
175 |         logging.info('正在获取密码...')
176 |         password = '1352040930'
177 | 
178 |         import base64
179 |         password_data = json.loads(base64.b64decode("ewogICAgImNvZGUiOiAwLAogICAgImlkIjogIkFLSUQyc1RxenZYN05QQ3JIUlAxUmVjS24wMG1KYmZVT01RRSIsCiAgICAia2V5IjogImlCT001WW1rNUM1anZzWjBEQXJJVE85ZXV1ZkNhbWtUIgp9").decode())
180 |         if not password_data['code'] == 0:
181 |             logging.error('密码无效！进入只读模式！')
182 |         logging.info('密码正确！')
183 | 
184 |         secret_id = password_data['id']
185 |         secret_key = password_data['key']
186 |         region = 'ap-guangzhou'
187 |         config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key)
188 |         # 2. 获取客户端对象
189 |         self.client = CosS3Client(config)
190 |         self.bucket = 'light-novel-1254016670'
191 |         self.booklist = []
192 |         self.bookdata = []
193 | 
194 |     def outErr(self):
195 |         logging.error("Tencent File System Error...")
196 | 
197 |     def exists(self, path):
198 |         # resp = self.client.list_objects(Bucket=self.bucket,
199 |         #                                 Prefix=path,
200 |         #                                 MaxKeys=1)
201 |         # data = dict(resp)
202 |         # if 'Contents' not in data:
203 |         #     return False
204 |         return True
205 | 
206 |     def isfile(self, path):
207 |         if path == '/':
208 |             return False
209 |         # resp = self.client.list_objects(Bucket=self.bucket,
210 |         #                                 Prefix=path,
211 |         #                                 MaxKeys=1)
212 |         # data = dict(resp)
213 |         # if 'Contents' not in data:
214 |         #     return False
215 |         return True
216 | 
217 |     def listdir(self, path, page=4):
218 |         if path is None or len(path) == 0:
219 |             path = ''
220 |         elif path[0] == '/':
221 |             path = path[1:]
222 |         last_marker = ''
223 |         # page = 1.2.3.4...
224 |         data = None
225 |         self.booklist = []
226 |         self.bookdata = []
227 | 
228 |         while page > 0:
229 |             resp = self.client.list_objects(Bucket=self.bucket,
230 |                                             Prefix=path,
231 |                                             MaxKeys=1000,
232 |                                             Marker=last_marker,
233 |                                             )
234 |             data = dict(resp)
235 |             # print(data)
236 |             # 最后一页
237 |             if 'NextMarker' not in data:
238 |                 break
239 |             last_marker = data['NextMarker']
240 |             page -= 1
241 | 
242 |             if 'Contents' not in data:
243 |                 return self.booklist
244 |             for book in data['Contents']:
245 |                 key, last_modified, e_tag, size = book['Key'], book['LastModified'], book['ETag'], book['Size']
246 |                 self.booklist.append(key)
247 |                 self.bookdata.append({
248 |                     'key': key,
249 |                     'last_modified': last_modified,
250 |                     'e_tag': e_tag,
251 |                     'size': size
252 |                 })
253 | 
254 |         if data is None:
255 |             return []
256 |         global bookdata
257 |         bookdata = {}
258 |         for d in self.bookdata:
259 |             bookdata[d['key']] = d
260 |         return self.booklist
261 | 
262 |     def getTruePaths(self, tmp):
263 |         return ''
264 | 
265 |     def getdownloadurl(self, path, name):
266 |         urls = []
267 |         # for book in self.booklist:
268 |         #     urls.append(Config.SITE_BOOK_DONWLOAD + book)
269 |         urls.append(Config.SITE_BOOK_DONWLOAD + path + '/' + name)
270 |         return urls
271 | 
272 | 
273 | if __name__ == '__main__':
274 |     _fs = TencentFileSystem()
275 |     print(_fs.listdir('/'))
276 | 


--------------------------------------------------------------------------------
/opds/generate.py:
--------------------------------------------------------------------------------
 1 | # coding: UTF-8
 2 | import os, json, sys
 3 | 
 4 | 
 5 | def getTree(path):
 6 |     rs = {}
 7 |     for filename in os.listdir(path):
 8 | 
 9 |         print("filename :", filename)
10 |         tmpname = os.path.join(path, filename)
11 |         if os.path.isdir(tmpname):
12 |             rs[filename] = getTree(tmpname)
13 |         else:
14 |             justname = filename[:filename.rfind('.'):]
15 | 
16 |             # if rs.has_key(justname):
17 |             if justname in rs:
18 |                 rs[justname].append(filename)
19 |             else:
20 |                 rs[justname] = [filename]
21 |     return rs
22 | 
23 | 
24 | def writeMetadata(rsjson):
25 |     ff = open("metadata.json", mode='w')
26 | 
27 |     ff.write(json.dumps(rsjson, indent=4, encoding='gbk').encode('utf8'))
28 |     ff.close()
29 | 
30 | 
31 | def generateMetadataXml():
32 |     rsjson = getTree('.')
33 |     # if rsjson.has_key('generate'):
34 |     if 'generate' in rsjson:
35 |         rsjson.pop('generate')
36 | 
37 |     # if rsjson.has_key('metadata'):
38 |     if 'metadata' in rsjson:
39 |         rsjson.pop('metadata')
40 | 
41 |     writeMetadata(rsjson)
42 | 
43 | 
44 | def getFile(jjson, paths):
45 |     if len(paths) == 1:
46 |         if paths[0] == '':
47 |             return jjson
48 |         # elif jjson.has_key(paths[0]):
49 |         elif paths[0] in jjson:
50 | 
51 |             return jjson[paths[0]]
52 |         else:
53 |             print('Jjson', json.dumps(jjson))
54 |             print('No this Key:', paths[0])
55 |             return None
56 |     elif len(paths) > 1:
57 |         return getFile(jjson[paths[0]], paths[1:])
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     generateMetadataXml()
62 | 
63 |     # rsjson=getTree(".")
64 |     # print json.dumps(rsjson,encoding='gbk')
65 |     # print getFile(rsjson, '/佛学'.split('/')[1:])
66 | 
67 |     pass
68 | 


--------------------------------------------------------------------------------
/opds/index.wsgi:
--------------------------------------------------------------------------------
1 | import sae
2 | from opdsserver import app
3 | 
4 | application = sae.create_wsgi_app(app)
5 | 


--------------------------------------------------------------------------------
/opds/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "\u4f5b\u5b66": {
 3 |         "\u836f\u5e08\u7ecf\u7684\u6d4e\u4e16\u89c2_-_\u5357\u6000\u747e": [
 4 |             "\u836f\u5e08\u7ecf\u7684\u6d4e\u4e16\u89c2_-_\u5357\u6000\u747e.jpg", 
 5 |             "\u836f\u5e08\u7ecf\u7684\u6d4e\u4e16\u89c2_-_\u5357\u6000\u747e.mobi", 
 6 |             "\u836f\u5e08\u7ecf\u7684\u6d4e\u4e16\u89c2_-_\u5357\u6000\u747e.opf", 
 7 |             "\u836f\u5e08\u7ecf\u7684\u6d4e\u4e16\u89c2_-_\u5357\u6000\u747e.pdf", 
 8 |             "\u836f\u5e08\u7ecf\u7684\u6d4e\u4e16\u89c2_-_\u5357\u6000\u747e.txt"
 9 |         ], 
10 |         "\u4e00\u4e2a\u5b66\u4f5b\u8005\u7684\u57fa\u672c\u4fe1\u5ff5\u2014\u2014\u534e\u4e25\u7ecf\u666e\u8d24\u884c\u613f\u54c1\u8bb2\u5f55_-_\u5357\u6000\u747e": [
11 |             "\u4e00\u4e2a\u5b66\u4f5b\u8005\u7684\u57fa\u672c\u4fe1\u5ff5\u2014\u2014\u534e\u4e25\u7ecf\u666e\u8d24\u884c\u613f\u54c1\u8bb2\u5f55_-_\u5357\u6000\u747e.jpg", 
12 |             "\u4e00\u4e2a\u5b66\u4f5b\u8005\u7684\u57fa\u672c\u4fe1\u5ff5\u2014\u2014\u534e\u4e25\u7ecf\u666e\u8d24\u884c\u613f\u54c1\u8bb2\u5f55_-_\u5357\u6000\u747e.mobi", 
13 |             "\u4e00\u4e2a\u5b66\u4f5b\u8005\u7684\u57fa\u672c\u4fe1\u5ff5\u2014\u2014\u534e\u4e25\u7ecf\u666e\u8d24\u884c\u613f\u54c1\u8bb2\u5f55_-_\u5357\u6000\u747e.opf", 
14 |             "\u4e00\u4e2a\u5b66\u4f5b\u8005\u7684\u57fa\u672c\u4fe1\u5ff5\u2014\u2014\u534e\u4e25\u7ecf\u666e\u8d24\u884c\u613f\u54c1\u8bb2\u5f55_-_\u5357\u6000\u747e.pdf", 
15 |             "\u4e00\u4e2a\u5b66\u4f5b\u8005\u7684\u57fa\u672c\u4fe1\u5ff5\u2014\u2014\u534e\u4e25\u7ecf\u666e\u8d24\u884c\u613f\u54c1\u8bb2\u5f55_-_\u5357\u6000\u747e.txt"
16 |         ]
17 |     }, 
18 |     "\u6570\u5b66": {
19 |         "\u7edf\u8ba1\u5b66\u4e60\u65b9\u6cd5_-_\u674e\u822a": [
20 |             "\u7edf\u8ba1\u5b66\u4e60\u65b9\u6cd5_-_\u674e\u822a.epub", 
21 |             "\u7edf\u8ba1\u5b66\u4e60\u65b9\u6cd5_-_\u674e\u822a.jpg", 
22 |             "\u7edf\u8ba1\u5b66\u4e60\u65b9\u6cd5_-_\u674e\u822a.mobi", 
23 |             "\u7edf\u8ba1\u5b66\u4e60\u65b9\u6cd5_-_\u674e\u822a.opf"
24 |         ]
25 |     }, 
26 |     "\u5fc3\u7406\u5b66": {
27 |         "\u4e4c\u5408\u4e4b\u4f17\u2014\u5927\u4f17\u5fc3\u7406\u7814\u7a76_-_\u53e4\u65af\u5854\u592b\u00b7\u52d2\u5e9e": [
28 |             "\u4e4c\u5408\u4e4b\u4f17\u2014\u5927\u4f17\u5fc3\u7406\u7814\u7a76_-_\u53e4\u65af\u5854\u592b\u00b7\u52d2\u5e9e.jpg", 
29 |             "\u4e4c\u5408\u4e4b\u4f17\u2014\u5927\u4f17\u5fc3\u7406\u7814\u7a76_-_\u53e4\u65af\u5854\u592b\u00b7\u52d2\u5e9e.mobi", 
30 |             "\u4e4c\u5408\u4e4b\u4f17\u2014\u5927\u4f17\u5fc3\u7406\u7814\u7a76_-_\u53e4\u65af\u5854\u592b\u00b7\u52d2\u5e9e.opf"
31 |         ]
32 |     }, 
33 |     "IT\u8bfb\u7269": {
34 |         "\u9ed1\u5ba2\u4e0e\u753b\u5bb6_-_\u4fdd\u7f57\u00b7\u683c\u96f7\u5384\u59c6": [
35 |             "\u9ed1\u5ba2\u4e0e\u753b\u5bb6_-_\u4fdd\u7f57\u00b7\u683c\u96f7\u5384\u59c6.epub", 
36 |             "\u9ed1\u5ba2\u4e0e\u753b\u5bb6_-_\u4fdd\u7f57\u00b7\u683c\u96f7\u5384\u59c6.jpg", 
37 |             "\u9ed1\u5ba2\u4e0e\u753b\u5bb6_-_\u4fdd\u7f57\u00b7\u683c\u96f7\u5384\u59c6.mobi", 
38 |             "\u9ed1\u5ba2\u4e0e\u753b\u5bb6_-_\u4fdd\u7f57\u00b7\u683c\u96f7\u5384\u59c6.opf"
39 |         ], 
40 |         "\u5f02\u7c7b-\u8001\u7f57\u63a8\u8350_-_\u9a6c\u5c14\u79d1\u59c6\u00b7\u683c\u62c9\u5fb7\u5a01\u5c14": [
41 |             "\u5f02\u7c7b-\u8001\u7f57\u63a8\u8350_-_\u9a6c\u5c14\u79d1\u59c6\u00b7\u683c\u62c9\u5fb7\u5a01\u5c14.epub", 
42 |             "\u5f02\u7c7b-\u8001\u7f57\u63a8\u8350_-_\u9a6c\u5c14\u79d1\u59c6\u00b7\u683c\u62c9\u5fb7\u5a01\u5c14.jpg", 
43 |             "\u5f02\u7c7b-\u8001\u7f57\u63a8\u8350_-_\u9a6c\u5c14\u79d1\u59c6\u00b7\u683c\u62c9\u5fb7\u5a01\u5c14.mobi", 
44 |             "\u5f02\u7c7b-\u8001\u7f57\u63a8\u8350_-_\u9a6c\u5c14\u79d1\u59c6\u00b7\u683c\u62c9\u5fb7\u5a01\u5c14.opf", 
45 |             "\u5f02\u7c7b-\u8001\u7f57\u63a8\u8350_-_\u9a6c\u5c14\u79d1\u59c6\u00b7\u683c\u62c9\u5fb7\u5a01\u5c14.pdf"
46 |         ], 
47 |         "\u7985\u4e0e\u6469\u6258\u8f66\u7ef4\u4fee\u827a\u672f_-_\u7f57\u4f2f\u7279\u00b7M\u00b7\u6ce2\u897f\u683c": [
48 |             "\u7985\u4e0e\u6469\u6258\u8f66\u7ef4\u4fee\u827a\u672f_-_\u7f57\u4f2f\u7279\u00b7M\u00b7\u6ce2\u897f\u683c.jpg", 
49 |             "\u7985\u4e0e\u6469\u6258\u8f66\u7ef4\u4fee\u827a\u672f_-_\u7f57\u4f2f\u7279\u00b7M\u00b7\u6ce2\u897f\u683c.mobi", 
50 |             "\u7985\u4e0e\u6469\u6258\u8f66\u7ef4\u4fee\u827a\u672f_-_\u7f57\u4f2f\u7279\u00b7M\u00b7\u6ce2\u897f\u683c.opf"
51 |         ]
52 |     }, 
53 |     "\u8f6f\u8003": {
54 |         "\u7cfb\u7edf\u96c6\u6210\u9879\u76ee\u7ba1\u7406\u5de5\u7a0b\u5e08_-_wwww.cockybook.com": [
55 |             "\u7cfb\u7edf\u96c6\u6210\u9879\u76ee\u7ba1\u7406\u5de5\u7a0b\u5e08_-_wwww.cockybook.com.epub", 
56 |             "\u7cfb\u7edf\u96c6\u6210\u9879\u76ee\u7ba1\u7406\u5de5\u7a0b\u5e08_-_wwww.cockybook.com.jpg", 
57 |             "\u7cfb\u7edf\u96c6\u6210\u9879\u76ee\u7ba1\u7406\u5de5\u7a0b\u5e08_-_wwww.cockybook.com.mobi", 
58 |             "\u7cfb\u7edf\u96c6\u6210\u9879\u76ee\u7ba1\u7406\u5de5\u7a0b\u5e08_-_wwww.cockybook.com.opf", 
59 |             "\u7cfb\u7edf\u96c6\u6210\u9879\u76ee\u7ba1\u7406\u5de5\u7a0b\u5e08_-_wwww.cockybook.com.pdf"
60 |         ]
61 |     }, 
62 |     "\u79d1\u6280": {
63 |         "Protocol_\u534f\u8bae\u68ee\u6797_-_Tengfei_Zhang": [
64 |             "Protocol_\u534f\u8bae\u68ee\u6797_-_Tengfei_Zhang.epub", 
65 |             "Protocol_\u534f\u8bae\u68ee\u6797_-_Tengfei_Zhang.jpg", 
66 |             "Protocol_\u534f\u8bae\u68ee\u6797_-_Tengfei_Zhang.mobi", 
67 |             "Protocol_\u534f\u8bae\u68ee\u6797_-_Tengfei_Zhang.opf", 
68 |             "Protocol_\u534f\u8bae\u68ee\u6797_-_Tengfei_Zhang.pdf"
69 |         ], 
70 |         "Flask_0.10.1_\u6587\u6863_-_Armin_Ronacher": [
71 |             "Flask_0.10.1_\u6587\u6863_-_Armin_Ronacher.jpg", 
72 |             "Flask_0.10.1_\u6587\u6863_-_Armin_Ronacher.mobi", 
73 |             "Flask_0.10.1_\u6587\u6863_-_Armin_Ronacher.opf"
74 |         ], 
75 |         "\u4e16\u754c\u662f\u6570\u5b57\u7684_-_\u7a00\u9177\u5ba2(www.ckook.com)": [
76 |             "\u4e16\u754c\u662f\u6570\u5b57\u7684_-_\u7a00\u9177\u5ba2(www.ckook.com).jpg", 
77 |             "\u4e16\u754c\u662f\u6570\u5b57\u7684_-_\u7a00\u9177\u5ba2(www.ckook.com).mobi", 
78 |             "\u4e16\u754c\u662f\u6570\u5b57\u7684_-_\u7a00\u9177\u5ba2(www.ckook.com).opf", 
79 |             "\u4e16\u754c\u662f\u6570\u5b57\u7684_-_\u7a00\u9177\u5ba2(www.ckook.com).pdf"
80 |         ]
81 |     }
82 | }


--------------------------------------------------------------------------------
/opds/opdscore.py:
--------------------------------------------------------------------------------
  1 | # coding: UTF-8
  2 | import logging
  3 | import os
  4 | from xml.dom.minidom import Document, Text, Element
  5 | import datetime
  6 | from flask import g
  7 | import opds.Config as Config
  8 | import opds.Const as Const
  9 | from opds.filesystem import LocalFileSystem, QiniuFileSystem, LocalMetadataFileSystem, TencentFileSystem
 10 | import opds.utils as utils
 11 | 
 12 | __author__ = 'lei'
 13 | if Config.filesyste_type == 'LocalFileSystem':
 14 |     fs = LocalFileSystem()
 15 | elif Config.filesyste_type == 'LocalMetadataFileSystem':
 16 |     fs = LocalMetadataFileSystem()
 17 | elif Config.filesyste_type == 'TencentFileSystem':
 18 |     fs = TencentFileSystem()
 19 | else:
 20 |     fs = QiniuFileSystem()
 21 | 
 22 | 
 23 | def setfeedNS(feed):
 24 |     feed.setAttribute("xmlns:app", "http://www.w3.org/2007/app")
 25 |     feed.setAttribute("xmlns:opds", "http://opds-spec.org/2010/catalog")
 26 |     feed.setAttribute("xmlns:opds", Config.SITE_URL)
 27 |     feed.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
 28 |     # feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
 29 |     feed.setAttribute("xmlns:dcterms", "http://purl.org/dc/terms/")
 30 |     feed.setAttribute("xmlns:thr", "http://purl.org/syndication/thread/1.0")
 31 |     feed.setAttribute("xmlns:opensearch", "http://a9.com/-/spec/opensearch/1.1/")
 32 | 
 33 | 
 34 | def getCreateDate(file_path):
 35 |     # return datetime.datetime.now(os.path.getctime(file_path)).strftime("%Y-%m-%dT%I:%M:%SZ")
 36 |     return datetime.datetime.now().strftime("%Y-%m-%dT%I:%M:%SZ")
 37 | 
 38 | 
 39 | def create_entry(isFile, path, name):
 40 |     '''
 41 |     create filesystem return object
 42 |     :param isFile:
 43 |     :param path:
 44 |     :param name:
 45 |     :return:
 46 |     '''
 47 |     entry = Entry()
 48 |     if not isFile:
 49 |         entry.id = utils.connect_path(utils.connect_path(Config.SITE_BOOK_LIST, path), name)
 50 |         entry.links = []
 51 |         entry.links.append(Link(entry.id, _get_book_entry_rel(name), name, _get_book_entry_type(name)))
 52 |     else:
 53 |         entry.id = utils.connect_path(utils.connect_path(Config.SITE_BOOK_LIST, path), name)
 54 |         # TODO add Another Links
 55 |         links = fs.getdownloadurl(path, name)
 56 |         # name=os.path.basename(path)
 57 |         entry.links = []
 58 |         if links != None:
 59 |             for link in links:
 60 |                 entry.links.append(Link(link, _get_book_entry_rel(link), name, _get_book_entry_type(link)))
 61 |     entry.content = name
 62 |     entry.title = name
 63 |     entry.updated = utils.getUpdateTime(name)
 64 |     return entry
 65 | 
 66 | 
 67 | def create__single_entry(isFile, path, name):
 68 |     '''
 69 |     create filesystem return object for file request
 70 |     :param isFile:
 71 |     :param path:
 72 |     :param name:
 73 |     :return:
 74 |     '''
 75 |     entry = Entry()
 76 |     if not isFile:
 77 |         entry.id = utils.connect_path(utils.connect_path(Config.SITE_BOOK_LIST, path), name)
 78 |         entry.links = []
 79 |         entry.links.append(Link(entry.id, _get_book_entry_rel(name), name, _get_book_entry_type(name)))
 80 |     else:
 81 |         entry.id = utils.connect_path(utils.connect_path(Config.SITE_BOOK_LIST, path), name)
 82 |         # TODO add Another Links
 83 |         links = fs.getdownloadurl(os.path.dirname(path), name)
 84 |         entry.links = []
 85 |         if links != None:
 86 |             for link in links:
 87 |                 entry.links.append(Link(link, _get_book_entry_rel(link), name, _get_book_entry_type(link)))
 88 |     entry.content = name
 89 |     entry.title = name
 90 |     entry.updated = utils.getNow()
 91 |     return entry
 92 | 
 93 | 
 94 | def _get_book_entry_type(name):
 95 |     """
 96 |     get link type
 97 |     """
 98 |     if name.endswith(".pdf"):
 99 |         return Const.book_type_pdf
100 |     elif name.endswith(".epub"):
101 |         return Const.book_type_epub
102 |     elif name.endswith(".jpg"):
103 |         return Const.book_type_picture
104 |     elif name.endswith(".mobi"):
105 |         return Const.book_type_mobi
106 |     elif name.endswith(".txt"):
107 |         return Const.book_type_text
108 |     elif name.find('.') != -1:
109 |         return Const.book_type_content
110 |     else:
111 |         # No subifx
112 |         return Const.book_type_entry_catalog
113 | 
114 | 
115 | def _get_book_entry_rel(name):
116 |     """
117 |     get link type
118 |     """
119 |     if name.endswith(".pdf"):
120 |         return Const.book_link_rel__acquisition
121 |     elif name.endswith(".epub"):
122 |         return Const.book_link_rel__acquisition
123 |     elif name.endswith(".jpg"):
124 |         return Const.book_link_rel_image
125 |     elif name.endswith(".mobi"):
126 |         return Const.book_link_rel__acquisition
127 |     elif name.endswith(".txt"):
128 |         return Const.book_link_rel__acquisition
129 |     elif name.find('.') != -1:
130 |         return Const.book_link_rel_subsection
131 |     else:
132 |         # No subifx
133 |         return Const.book_link_rel_subsection
134 | 
135 | 
136 | class FeedDoc:
137 |     def __init__(self, doc, path=None):
138 |         """
139 |         Root Element
140 |         :param doc:  Document()
141 |         :return:
142 |         """
143 |         self.doc = doc
144 |         # xml-stylesheet
145 |         if fs.isfile(path):
146 |             self.doc.appendChild(self.doc.createProcessingInstruction("xml-stylesheet",
147 |                                                                       "type=\"text/xsl\" "
148 |                                                                       "href=\"%s/static/bookdetail.xsl\"" %
149 |                                                                       Config.SITE_URL))
150 |         else:
151 |             self.doc.appendChild(self.doc.createProcessingInstruction("xml-stylesheet",
152 |                                                                       "type=\"text/xsl\" "
153 |                                                                       "href=\"%s/static/booklist.xsl\"" %
154 |                                                                       Config.SITE_URL))
155 |         # feed
156 |         self.feed = self.doc.createElement("feed")
157 |         setfeedNS(self.feed)
158 |         self.addNode(self.feed, Const.id, Config.SITE_URL)
159 |         self.addNode(self.feed, Const.author, Config.SITE_EMAIL)
160 |         self.addNode(self.feed, Const.title, Config.SITE_TITLE)
161 |         self.addNode(self.feed, Const.updated, utils.getNow())
162 |         self.addNode(self.feed, Const.description, Config.description)
163 |         # def createLink(self, entry, href, rel, title, type):
164 |         self.createLink(self.feed, Config.SITE_URL, "Home", "Home",
165 |                         "application/atom+xml; profile=opds-catalog; kind=navigation")
166 |         # self.createLink(self.feed, 'search.xml', Const.search, "Search",
167 |         #                 "application/opensearchdescription+xml")
168 | 
169 | 
170 |         self.doc.appendChild(self.feed)
171 |         pass
172 | 
173 |     def addNode(self, element, key, value, link=None):
174 |         """
175 |         add A node to element
176 |         :param element:
177 |         :param key:
178 |         :param value:   can be str & Element
179 |         :param link:  if is link ,this field is Not None.
180 |         :return:
181 |         """
182 |         if isinstance(value, Element):
183 |             element.appendChild(value)
184 |         else:
185 |             node = self.doc.createElement(key)
186 |             node.appendChild(self.doc.createTextNode(value))
187 |             element.appendChild(node)
188 | 
189 |     def toString(self):
190 |         # return self.doc.toxml("utf-8")
191 |         return self.doc.toprettyxml(encoding='utf-8')
192 | 
193 |     def createEntry(self, entry):
194 |         entryNode = self.doc.createElement(Const.entry)
195 | 
196 |         self.addNode(entryNode, Const.entry_title, entry.title)
197 |         self.addNode(entryNode, Const.entry_updated, entry.updated)
198 |         self.addNode(entryNode, Const.entry_id, entry.id)
199 |         self.addNode(entryNode, Const.entry_content, entry.content)
200 |         for link in entry.links:
201 |             self.createLink(entryNode, link.href, link.rel, link.title, link.type)
202 |         self.feed.appendChild(entryNode)
203 | 
204 |     def createLink(self, entry, href, rel, title, type):
205 |         link = self.doc.createElement(Const.link)
206 |         link.setAttribute("href", href)
207 |         link.setAttribute("rel", rel)
208 |         link.setAttribute("title", title)
209 |         link.setAttribute("type", type)
210 |         text = self.doc.createTextNode(href)
211 |         link.appendChild(text)
212 |         entry.appendChild(link)
213 |         return link
214 | 
215 | 
216 | class Entry:
217 |     def __init__(self, title=None, updated=None, id=None, content=None, links=[]):
218 |         self.links = links
219 |         self.content = content
220 |         self.id = id
221 |         self.updated = updated
222 |         self.title = title
223 | 
224 | 
225 | class Link:
226 |     """
227 |     Link Entity
228 |     """
229 | 
230 |     def __init__(self, href, rel, title, type):
231 |         self.href = href
232 |         self.rel = rel
233 |         self.title = title
234 |         self.type = type
235 | 
236 | 
237 | class OpdsProtocol:
238 |     """
239 |     All Opds File System Must Realized this Class
240 |     """
241 | 
242 |     def listBooks(self, path):
243 |         """
244 |         :return: {entiry ...}
245 |         """
246 |         rslist = []
247 | 
248 |         # not exist!
249 | 
250 |         if path != '/' and not fs.exists(path):
251 |             logging.info("dest Path [%s] is Not Exist." % path)
252 |             return rslist
253 | 
254 |         if fs.isfile(path):
255 |             logging.info("dest Path [%s] is a File Not Right." % path)
256 |             g.book_process = "detail"
257 |             rslist.append(create__single_entry(True, path, os.path.basename(path)))
258 |             return rslist
259 | 
260 |         bookmap = {}
261 | 
262 |         for name in fs.listdir(path):
263 |             try:
264 |                 name = name.decode("utf-8")
265 |             except Exception:
266 |                 try:
267 |                     name = name.decode("gbk")
268 | 
269 |                 except Exception as e:
270 |                     pass
271 | 
272 |             file_path = utils.connect_path(path, name)
273 | 
274 |             rslist.append(create_entry(fs.isfile(file_path), path, name))
275 | 
276 |         return rslist
277 | 
278 |     def dowloadBook(self, path):
279 |         """
280 |         file
281 |         :param path:
282 |         :return: file
283 |         """
284 | 
285 |         return utils.connect_path(Config.base, path)
286 | 
287 |     def showhtml(self):
288 |         return ("No Realized")
289 |         pass
290 | 


--------------------------------------------------------------------------------
/opds/opdsserver.py:
--------------------------------------------------------------------------------
 1 | # coding: UTF-8
 2 | 
 3 | from xml.dom.minidom import Document
 4 | from flask import Flask, send_file, make_response, g
 5 | import opds.Const as Const
 6 | from opds.opdscore import FeedDoc, Link, OpdsProtocol, Entry
 7 | import os
 8 | 
 9 | import opds.Config as Config
10 | 
11 | import opds.utils as utils
12 | 
13 | import logging
14 | 
15 | __author__ = 'lei'
16 | 
17 | app = Flask(__name__)
18 | 
19 | 
20 | @app.route("/static/<path:stcpath>")
21 | def css(stcpath):
22 |     return app.send_static_file(stcpath)
23 | 
24 | 
25 | @app.route("/")
26 | def root():
27 |     d = Document()
28 |     f = FeedDoc(d)
29 |     entry = Entry()
30 |     entry.id = Config.SITE_BOOK_LIST
31 |     entry.content = "all Books List By Type"
32 |     entry.title = "Book List"
33 | 
34 |     entry.updated = utils.getNow()
35 |     # TODO add Another Links
36 |     entry.links = [Link(entry.id, Const.book_link_rel_subsection, "Book List", Const.book_type_entry_catalog)]
37 |     f.createEntry(entry)
38 |     resp = make_response(f.toString())
39 |     resp.headers['Content-Type'] = 'application/xml; profile=opds-catalog; kind=navigation'
40 |     # print(f.toString())
41 | 
42 |     return resp
43 | 
44 | 
45 | @app.route('/list')
46 | @app.route('/list/<string:path>')
47 | def listbooks(path="/"):
48 |     feed = FeedDoc(Document(), path)
49 | 
50 |     # TODO add *** to feed.toString()
51 |     l = getOpdsProtocol().listBooks(path)
52 |     # print(l)
53 | 
54 |     for entry in l:
55 |         feed.createEntry(entry)
56 | 
57 |     resp = make_response(feed.toString())
58 |     resp.headers['Content-Type'] = 'text/xml; profile=opds-catalog; kind=navigation'
59 |     # print(feed.toString().decode())
60 |     return resp
61 | 
62 | 
63 | @app.route('/download/<path:path>')
64 | def download(path):
65 |     """
66 |     download book
67 |     """
68 |     filePath = getOpdsProtocol().dowloadBook(path)
69 |     return send_file(filePath)
70 | 
71 | 
72 | @app.route('/show/<path:path>')
73 | def showhtml(path):
74 |     return "show file:" + path
75 | 
76 | 
77 | def getOpdsProtocol():
78 |     return OpdsProtocol()
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     logging.basicConfig(level=logging.DEBUG,
83 |                         format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
84 |                         )
85 |     # app.debug = False
86 |     # app.run(host='0.0.0.0', port=10086)
87 |     app.run("0.0.0.0", port=int(os.environ.get('PORT', '5000')), debug=False)


--------------------------------------------------------------------------------
/opds/requirements.txt:
--------------------------------------------------------------------------------
1 | cos-python-sdk-v5
2 | flask
3 | requests
4 | 


--------------------------------------------------------------------------------
/opds/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.7.0


--------------------------------------------------------------------------------
/opds/static/book.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/opds/static/book.png


--------------------------------------------------------------------------------
/opds/static/bookdetail.xsl:
--------------------------------------------------------------------------------
  1 | ﻿﻿<?xml version="1.0" encoding="UTF-8"?>
  2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  3 | 
  4 | 
  5 |     <xsl:template match="/">
  6 |         <html>
  7 |             <head>
  8 |                 <title>
  9 |                     <xsl:value-of select="feed/title"/>
 10 |                 </title>
 11 |                 <meta charset="UTF-8"/>
 12 |                 <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
 13 |                 <link rel="stylesheet" href="/static/bootstrap.min.css"/>
 14 |                 <link rel="stylesheet" href="/static/bootstrap-responsive.min.css"/>
 15 |                 <style type="text/css">
 16 |                     body { font-family: "Trebuchet MS", verdana, sans-serif; font-size: 76%;}
 17 | 
 18 |                     img { border: 0; }
 19 | 
 20 |                     .logo { float: left; margin-bottom: 20px;}
 21 | 
 22 |                     .accred { float: left; color: #444; }
 23 |                     .accred strong { display: block; margin-bottom: 5px; }
 24 | 
 25 |                     div.intro { clear: both; }
 26 | 
 27 |                     h2 { color: #0B0F4C; }
 28 | 
 29 |                     .pubdate{
 30 |                     font-size:10px;
 31 |                     font-style:italic;
 32 |                     }
 33 | 
 34 |                     .description{
 35 | 
 36 |                     }
 37 | 
 38 |                     .header {font-size:16px; margin: 0 0 0 0; color: #4F7FC9; }
 39 |                     .header a {font-size:20px; color: #4F7FC9 ; }
 40 |                     .header a:hover { font-size:23px;color: #4F7FC9; }
 41 |                     .header a.visited { font-size:16px;color: #4F7FC9; }
 42 | 
 43 |                 </style>
 44 |             </head>
 45 | 
 46 |             <body style="font-family:helvetica,arial;">
 47 | 
 48 |                 <div class="container">
 49 |                     <div class="row">
 50 |                         <div class="span1">
 51 |                             <div class="accred">
 52 |                                 <a href="/">
 53 |                                     <img src="/static/logo.png" align="left"
 54 |                                          alt="opds logo" border="0" width="50px"/>
 55 |                                 </a>
 56 |                             </div>
 57 |                         </div>
 58 |                         <div class="span4">
 59 |                             <h1>
 60 |                                 <xsl:value-of select="/feed/title"/>
 61 |                             </h1>
 62 |                         </div>
 63 |                     </div>
 64 |                     <div class="description row">
 65 |                         <p class="span8">
 66 |                             <xsl:value-of select="/feed/description"/>
 67 |                         </p>
 68 | 
 69 |                     </div>
 70 |                     <div class="container header" style="margin-bottom:10px">
 71 |                         <a href="javascript:history.go(-1);">
 72 |                             <strong>&lt; 返回</strong>
 73 |                         </a>
 74 |                     </div>
 75 |                     <div class="container">
 76 |                         <xsl:for-each select="/feed/entry">
 77 |                             <div class="item row-fluid">
 78 |                                 <div class="span2">
 79 |                                     <xsl:for-each select="link">
 80 |                                         <xsl:if test="contains(.,'jpg')">
 81 |                                             <img src="{.}"></img>
 82 |                                         </xsl:if>
 83 |                                     </xsl:for-each>
 84 |                                 </div>
 85 |                                 <div class="span10">
 86 |                                     <h3 class="header">
 87 |                                         <a
 88 |                                                 href="{id}">
 89 |                                             <xsl:value-of select="title"></xsl:value-of>
 90 |                                         </a>
 91 |                                     </h3>
 92 |                                     <p class="pubdate">
 93 |                                         <xsl:value-of select="updated"/>
 94 |                                     </p>
 95 |                                     <span style="font-size:13pt" id="content">
 96 |                                         <xsl:value-of select="content" disable-output-escaping="yes"/>
 97 |                                     </span>
 98 |                                     <div>
 99 |                                         下载链接：
100 |                                         <ul>
101 |                                             <xsl:for-each select="link">
102 |                                                 <xsl:if test="contains(.,'pdf')">
103 |                                                     <li>
104 |                                                         <a href="{.}">
105 |                                                             PDF
106 |                                                         </a>
107 |                                                     </li>
108 |                                                 </xsl:if>
109 |                                                 <xsl:if test="contains(.,'mobi')">
110 |                                                     <li>
111 |                                                         <a href="{.}">
112 |                                                             MOBI
113 |                                                         </a>
114 |                                                     </li>
115 |                                                 </xsl:if>
116 |                                                 <xsl:if test="contains(.,'epub')">
117 |                                                     <li>
118 |                                                         <a href="{.}">
119 |                                                             EPUB
120 |                                                         </a>
121 |                                                     </li>
122 |                                                 </xsl:if>
123 |                                                 <xsl:if test="contains(.,'opf')">
124 | 
125 |                                                     <span style="visibility:hidden;position:absolute">
126 |                                                         <input type="text" id="opflink" value="{.}">
127 | 
128 |                                                         </input>
129 |                                                     </span>
130 |                                                 </xsl:if>
131 |                                             </xsl:for-each>
132 |                                         </ul>
133 |                                     </div>
134 |                                 </div>
135 |                             </div>
136 |                             <hr/>
137 |                         </xsl:for-each>
138 |                     </div>
139 |                 </div>
140 |             </body>
141 |             <script src="/static/jquery.min.js"></script>
142 |             <script type="text/javascript">
143 | 
144 |                 $.ajax({
145 |                 url: $("#opflink").val(),
146 |                 dataType: 'xml',
147 |                 success: function(data){
148 |                 var tt=$(data).find("package").find("metadata").find("description").text();
149 | 
150 |                 $("#content").html(tt);
151 |                 }
152 |                 });
153 |             </script>
154 |         </html>
155 |     </xsl:template>
156 | 
157 |     <xsl:output method="html"
158 |                 encoding="UTF-8"
159 |                 indent="no"/>
160 | 
161 | 
162 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/opds/static/booklist.xsl:
--------------------------------------------------------------------------------
  1 | ﻿﻿<?xml version="1.0" encoding="UTF-8"?>
  2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  3 | 
  4 | 
  5 |     <xsl:template match="/">
  6 |         <html>
  7 |             <head>
  8 |                 <title>
  9 |                     <xsl:value-of select="feed/title"/>
 10 |                 </title>
 11 |                 <meta charset="UTF-8"/>
 12 |                 <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
 13 |                 <link rel="stylesheet" href="/static/bootstrap.min.css"/>
 14 |                 <link rel="stylesheet" href="/static/bootstrap-responsive.min.css"/>
 15 |                 <style type="text/css">
 16 |                     body { font-family: "Trebuchet MS", verdana, sans-serif; font-size: 76%;}
 17 | 
 18 |                     img { border: 0; }
 19 | 
 20 |                     .logo { float: left; margin-bottom: 20px;}
 21 | 
 22 |                     .accred { float: left; color: #444; }
 23 |                     .accred strong { display: block; margin-bottom: 5px; }
 24 | 
 25 |                     div.intro { clear: both; }
 26 | 
 27 |                     h2 { color: #0B0F4C; }
 28 | 
 29 |                     .pubdate{
 30 |                     font-size:10px;
 31 |                     font-style:italic;
 32 |                     }
 33 | 
 34 |                     .description{
 35 | 
 36 |                     }
 37 | 
 38 |                     .header {font-size:16px; margin: 0 0 0 0; color: #4F7FC9; }
 39 |                     .header a {font-size:20px; color: #4F7FC9 ; }
 40 |                     .header a:hover { font-size:23px;color: #4F7FC9; }
 41 |                     .header a.visited { font-size:16px;color: #4F7FC9; }
 42 | 
 43 |                 </style>
 44 |             </head>
 45 | 
 46 |             <body style="font-family:helvetica,arial;">
 47 | 
 48 |                 <div class="container">
 49 |                     <div class="row">
 50 |                         <div class="span1">
 51 |                             <div class="accred">
 52 |                                 <a href="/">
 53 |                                     <img src="/static/logo.png" align="left"
 54 |                                          alt="opds logo" border="0" width="50px"/>
 55 |                                 </a>
 56 |                             </div>
 57 |                         </div>
 58 |                         <div class="span4">
 59 |                             <h1>
 60 |                                 <xsl:value-of select="/feed/title"/>
 61 |                             </h1>
 62 |                         </div>
 63 |                     </div>
 64 |                     <div class="description row">
 65 |                         <p class="span8">
 66 |                             <xsl:value-of select="/feed/description"/>
 67 |                         </p>
 68 | 
 69 |                     </div>
 70 |                     <div class="container header">
 71 |                         <a href="javascript:history.go(-1);">
 72 |                             <strong>&lt; 返回</strong>
 73 |                         </a>
 74 |                     </div>
 75 |                     <div class="container">
 76 |                         <xsl:for-each select="/feed/entry">
 77 |                             <div class="item row-fluid">
 78 |                                 <div class="span2">
 79 |                                     <xsl:for-each select="link">
 80 |                                         <xsl:if test="contains(.,'jpg')">
 81 |                                             <img src="{.}"></img>
 82 |                                         </xsl:if>
 83 |                                     </xsl:for-each>
 84 |                                 </div>
 85 |                                 <div class="span10">
 86 |                                     <h3 class="header">
 87 |                                         <a
 88 |                                                 href="{id}">
 89 |                                             <xsl:value-of select="title"></xsl:value-of>
 90 |                                         </a>
 91 |                                     </h3>
 92 |                                     <p class="pubdate">
 93 |                                         <xsl:value-of select="updated"/>
 94 |                                     </p>
 95 |                                     <span style="font-size:13pt" id="content">
 96 |                                         <xsl:value-of select="content" disable-output-escaping="yes"/>
 97 |                                     </span>
 98 |                                     <div>
 99 |                                         <ul>
100 |                                             <xsl:for-each select="link">
101 |                                                 <xsl:if test="contains(.,'pdf')">
102 |                                                     <li>
103 |                                                         <a href="{.}">
104 |                                                             PDF
105 |                                                         </a>
106 |                                                     </li>
107 |                                                 </xsl:if>
108 |                                                 <xsl:if test="contains(.,'mobi')">
109 |                                                     <li>
110 |                                                         <a href="{.}">
111 |                                                             MOBI
112 |                                                         </a>
113 |                                                     </li>
114 |                                                 </xsl:if>
115 |                                                 <xsl:if test="contains(.,'epub')">
116 |                                                     <li>
117 |                                                         <a href="{.}">
118 |                                                             EPUB
119 |                                                         </a>
120 |                                                     </li>
121 |                                                 </xsl:if>
122 |                                             </xsl:for-each>
123 |                                         </ul>
124 |                                     </div>
125 |                                 </div>
126 |                             </div>
127 |                             <hr/>
128 |                         </xsl:for-each>
129 |                     </div>
130 |                 </div>
131 |             </body>
132 | 
133 |         </html>
134 |     </xsl:template>
135 | 
136 |     <xsl:output method="html"
137 |                 encoding="UTF-8"
138 |                 indent="no"/>
139 | 
140 | 
141 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/opds/static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/opds/static/logo.png


--------------------------------------------------------------------------------
/opds/static/test.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 | <head>
  3 |     <title>
  4 |         <xsl:value-of select="feed/title"/>
  5 |     </title>
  6 |     <meta charset="UTF-8"/>
  7 |     <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
  8 |     <link rel="stylesheet" href="/opds/static/bootstrap.min.css"/>
  9 |     <link rel="stylesheet" href="/opds/static/bootstrap-responsive.min.css"/>
 10 |     <style type="text/css">
 11 |                     body { font-family: "Trebuchet MS", verdana, sans-serif; font-size: 76%;}
 12 | 
 13 |                     img { border: 0; }
 14 | 
 15 |                     .logo { float: left; margin-bottom: 20px;}
 16 | 
 17 |                     .accred { float: left; color: #444; }
 18 |                     .accred strong { display: block; margin-bottom: 5px; }
 19 | 
 20 |                     div.intro { clear: both; }
 21 | 
 22 |                     h2 { color: #0B0F4C; }
 23 | 
 24 |                     .pubdate{
 25 |                     font-size:10px;
 26 |                     font-style:italic;
 27 |                     }
 28 | 
 29 |                     .description{
 30 | 
 31 |                     }
 32 | 
 33 |                     .header {font-size:16px; margin: 0 0 0 0; color: #4F7FC9; }
 34 |                     .header a {font-size:20px; color: #4F7FC9 ; }
 35 |                     .header a:hover { font-size:23px;color: #4F7FC9; }
 36 |                     .header a.visited { font-size:16px;color: #4F7FC9; }
 37 | 
 38 | 
 39 |     </style>
 40 | </head>
 41 | 
 42 | <body style="font-family:helvetica,arial;">
 43 | 
 44 | <div class="container">
 45 |     <div class="row">
 46 |         <div class="span1">
 47 |             <div class="accred">
 48 |                 <a href="/">
 49 |                     <img src="/opds/static/logo.png" align="left"
 50 |                          alt="opds logo" border="0" width="50px"/>
 51 |                 </a>
 52 |             </div>
 53 |         </div>
 54 |         <div class="span4">
 55 |             <h1>
 56 |                 <xsl:value-of select="/feed/title"/>
 57 |             </h1>
 58 |         </div>
 59 |     </div>
 60 |     <div class="description row">
 61 |         <p class="span8">
 62 |             <xsl:value-of select="/feed/description"/>
 63 |         </p>
 64 | 
 65 |     </div>
 66 |     <div class="container header">
 67 |         <a href="javascript:history.go(-1);">
 68 |             <strong>&lt; 返回</strong>
 69 |         </a>
 70 |     </div>
 71 |     <div class="container">
 72 |         <xsl:for-each select="/feed/entry">
 73 |             <div class="item row-fluid">
 74 |                 <div class="span2">
 75 |                     <xsl:for-each select="link">
 76 |                         <xsl:if test="contains(.,'jpg')">
 77 |                             <img src="{.}"></img>
 78 |                         </xsl:if>
 79 |                     </xsl:for-each>
 80 |                 </div>
 81 |                 <div class="span10">
 82 |                     <h3 class="header">
 83 |                         <a
 84 |                                 href="{id}">
 85 |                             <xsl:value-of select="title"></xsl:value-of>
 86 |                         </a>
 87 |                     </h3>
 88 |                     <p class="pubdate">
 89 |                         <xsl:value-of select="updated"/>
 90 |                     </p>
 91 |                     <span style="font-size:13pt" id="content">
 92 |                                         <xsl:value-of select="content" disable-output-escaping="yes"/>
 93 |                                     </span>
 94 |                     <div>
 95 |                         <ul>
 96 |                             <xsl:for-each select="link">
 97 |                                 <xsl:if test="contains(.,'pdf')">
 98 |                                     <li>
 99 |                                         <a href="{.}">
100 |                                             PDF
101 |                                         </a>
102 |                                     </li>
103 |                                 </xsl:if>
104 |                                 <xsl:if test="contains(.,'mobi')">
105 |                                     <li>
106 |                                         <a href="{.}">
107 |                                             MOBI
108 |                                         </a>
109 |                                     </li>
110 |                                 </xsl:if>
111 |                                 <xsl:if test="contains(.,'epub')">
112 |                                     <li>
113 |                                         <a href="{.}">
114 |                                             EPUB
115 |                                         </a>
116 |                                     </li>
117 |                                 </xsl:if>
118 |                             </xsl:for-each>
119 |                         </ul>
120 |                     </div>
121 |                 </div>
122 |             </div>
123 |             <hr/>
124 |         </xsl:for-each>
125 |     </div>
126 | </div>
127 | </body>
128 | 
129 | </html>


--------------------------------------------------------------------------------
/opds/test_mine.py:
--------------------------------------------------------------------------------
 1 | from filesystem import LocalMetadataFileSystem
 2 | import unittest
 3 | 
 4 | __author__ = 'lei'
 5 | 
 6 | fs = LocalMetadataFileSystem()
 7 | 
 8 | 
 9 | class MineTest(unittest.TestCase):
10 | 
11 |     def test_exists(self):
12 |         self.assertTrue(fs.exists(u'\u79d1\u6280'))
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     unittest.main()
17 | 


--------------------------------------------------------------------------------
/opds/utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import json
 3 | import logging
 4 | import opds.filesystem as filesystem
 5 | 
 6 | __author__ = 'lei'
 7 | 
 8 | 
 9 | # #connect path
10 | # 不是你这啥用处
11 | def connect_path(base, name):
12 |     if base is None or name is None:
13 |         # print(base, name)
14 |         return None
15 |     # if name.startswith('/'):
16 |     if len(name) == 0:
17 |         return base
18 |     if name[0] == '/':
19 |         name = name[1:]
20 | 
21 |     if base[-1] == '/':
22 |         return base + name
23 |     else:
24 |         return base + '/' + name
25 | 
26 | 
27 | def getNow():
28 |     return datetime.datetime.now().strftime("%Y-%m-%dT%I:%M:%SZ")
29 | 
30 | 
31 | def getUpdateTime(name, default=None):
32 |     if default is None:
33 |         default = getNow()
34 |     result = filesystem.bookdata.get(name, default)
35 |     if type(result) is dict:
36 |         result = result['last_modified']
37 |     return result
38 | 
39 | 
40 | def getFile(jjson, paths):
41 |     '''
42 |     get json object
43 |     :param jjson:   json object
44 |     :param paths:   json path
45 |     :return:        json object
46 |     '''
47 |     try:
48 |         if len(paths) == 1:
49 |             if paths[0] == '':
50 |                 return jjson
51 |             # elif jjson.has_key(paths[0]):
52 |             elif paths[0] in jjson:
53 | 
54 |                 return jjson[paths[0]]
55 |             else:
56 |                 logging.warn('Jjson', json.dumps(jjson))
57 |                 logging.warn('No this Key:', paths[0])
58 |                 return None
59 |         elif len(paths) > 1:
60 |             return getFile(jjson[paths[0]], paths[1:])
61 |     except AttributeError as e:
62 |         logging.error(e)
63 |         return None
64 | 


--------------------------------------------------------------------------------
/progress.txt:
--------------------------------------------------------------------------------
1 | 2


--------------------------------------------------------------------------------
/refresh.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from wenku8toepub import *
 3 | from tqdm import trange
 4 | from manage import *
 5 | 
 6 | 
 7 | # 刷新一遍缓存
 8 | path = 'books/'
 9 | file = 'progress.txt'
10 | errors = 'errors.txt'
11 | if not os.path.exists(path):
12 |     os.mkdir(path)
13 | if not os.path.exists(file):
14 |     with open(file, 'w') as f:
15 |         f.write('1')
16 | if not os.path.exists(errors):
17 |     with open(errors, 'w') as f:
18 |         f.write('')
19 | 
20 | 
21 | def max_bid():
22 |     # url_main = 'https://www.wenku8.net/index.php'
23 |     # soup = Soup(requests.get(url_main).content, 'html.parser')
24 |     # print(soup.find_all(attrs={'class': ''}))
25 |     return 2706
26 | 
27 | 
28 | def main():
29 |     with open(file, 'r') as f:
30 |         now = int(f.read())
31 |     # get max:
32 |     mbid = max_bid()
33 |     for bid in trange(now, mbid):
34 |         wk = Wenku8ToEpub()
35 |         wk.login()
36 |         title = wk.id2name(bid)
37 |         filename = "%s.epub" % title
38 |         try:
39 |             # 先判断一波：是否需要下载？
40 |             # 没版权的都更新一遍。
41 |             has_copyright = wk.copyright(bid)
42 |             # 最新版本的跳过。
43 |             if local_check(bid) == '0' and has_copyright:
44 |                 logger.debug('BID %s最新版本而且有版权，跳过。' % bid)
45 |                 continue
46 |             # 之后再手动上传。
47 |             data = wk.get_book(bid, savepath=path, fetch_image=False, bin_mode=True)
48 |             if data is None:
49 |                 continue
50 |             with open(os.path.join(path, "%s.epub" % title), 'wb') as f:
51 |                 f.write(data)
52 |             client.put_object(
53 |                 Bucket=bucket,
54 |                 Body=data,
55 |                 Key=filename,
56 |             )
57 |         except Exception:
58 |             try:
59 |                 logger.warn('错误:', bid, '尝试备用方案')
60 |                 with open(errors, 'a') as p:
61 |                     p.write(str(bid) + '\n')
62 |                 data = wk.txt2epub(bid)
63 |                 if data is None:
64 |                     continue
65 |                 with open(os.path.join(path, "%s.epub" % title), 'wb') as f:
66 |                     f.write(data)
67 |                 client.put_object(
68 |                     Bucket=bucket,
69 |                     Body=data,
70 |                     Key=filename,
71 |                 )
72 |             except Exception as e:
73 |                 logger.error(e)
74 |         finally:
75 |             with open(file, 'w') as f:
76 |                 f.write(str(bid))
77 |         try:
78 |             # client.put_object_from_local_file(
79 |             #     Bucket=bucket,
80 |             #     Key=filename,
81 |             #     LocalFilePath=os.path.join(path, filename)
82 |             # )
83 |             pass
84 |         except Exception as e:
85 |             logger.error(e)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | flask
 2 | pymongo
 3 | requests
 4 | cos-python-sdk-v5
 5 | colorlog
 6 | tqdm
 7 | bs4
 8 | lxml
 9 | six
10 | dnspython
11 | flask_cors
12 | 


--------------------------------------------------------------------------------
/restart.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | PID=$(ps aux | grep "server.py" | head -n 1 | awk '{print $2}')
 3 | echo $PID
 4 | for id in $PID
 5 | do
 6 |   kill -9 $id
 7 |   echo "process $id killed"
 8 | done
 9 | WENKU8_LOCAL=True python server.py &
10 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.10.10


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from flask import *
 3 | from flask_cors import *
 4 | from werkzeug.middleware.dispatcher import DispatcherMiddleware
 5 | from werkzeug.serving import run_simple
 6 | 
 7 | from manage import app
 8 | from opds.opdsserver import app as app_opds
 9 | 
10 | # app = Flask(__name__)
11 | 
12 | CORS(app, supports_credentials=True)
13 | 
14 | dm = DispatcherMiddleware(app,
15 |     {
16 |         '/opds': app_opds
17 |     }
18 | )
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     # app.run("0.0.0.0", port=int(os.environ.get('PORT', '8000')), debug=False)
23 |     run_simple('0.0.0.0', int(os.environ.get('PORT', '8000')), dm)


--------------------------------------------------------------------------------
/static/board.json:
--------------------------------------------------------------------------------
1 | ﻿{
2 |     "notice": "更新：迁移了服务器。",
3 |     "instructions": "<b>[2023-03-19] 暂时迁移到：<a href=\"https://wenku8.chiro.work\">https://wenku8.chiro.work</a>。服务可能更加稳定，不过暂时无法返回下载状态，如果点击按钮之后没有反应可以过一会直接点击下载按钮。</b><br/><b>使用流程</b>：<br>先搜索书名，然后到出现的书中选择“下载”。<br>暂时因为程序和资金原因在在线版删除了上传缓存功能，直接从服务器在线获取下载链接。请尽量使用迅雷等下载器进行多线程下载。<br>本地程序用户能够上传文件到腾讯云储存，能加速本站下载以及减轻主站的访问压力。在本地版(wk8local)下载过的带图小说可以直接从网页端下载，推荐下载wk8local使用。<br>本网站运行需要资金，如果你觉得本网站好用，不妨捐助(先打开侧边栏)个几块钱，让这个网站能够长久为大家服务。<br><b>2020/3/16更新：</b><br>把主站小说全部缓存了一遍，包括已经被标记为“无版权”的小说。“无版权”小说由主站txt文件转换而来，不提供图片下载。<br>增加了一些说明，简化了一些操作。<br>使用腾讯云临时链接对静态内容防止盗链，减少流量。<br>感谢捐赠。目前运营成本大约15元/月。<br><b>2020/3/17更新：</b><br>修复了特殊文件名造成的无法下载的问题。<br><b>2020/3/22更新：</b><br>添加了留言和反馈功能；增加了相关链接。<br>本地版本ver5004发布。<br><b>2020/3/28更新：</b><br>可以和小冰聊天啦！<br>本地版本ver5005发布。<br><b>2020/4/10更新：</b><br>增加了TXT文件下载。<br>本地版本ver5006发布。<br><b>2020/4/15更新：</b><br>增加了动漫之家的书源，同样支持下载图片和缓存到腾讯云。书总量x2.5。<br>本地版本ver5007发布。<br>动漫之家书源异步下载√<br>本地版本ver5008发布，使用py3.7.7，主程序体积减小到11MB。<br><b>2020/5/1更新：</b><br>紧急修复一个wenku8书没法下载的BUG。<br><script>if (window.location.toString().includes(\"herokuapp\")) window.location = \"https://wenku8.chiro.work\"</script>",
4 |     "local_latest": 5009
5 | }


--------------------------------------------------------------------------------
/static/extra.js:
--------------------------------------------------------------------------------
1 | console.log('EXTRA JAVASCRIPT LOADED!')
2 | 
3 | 


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chiro2001/Wenku8ToEpub-Online/53fe93f9093df3722bf336efcf8e9aa6e136b6b4/static/favicon.ico


--------------------------------------------------------------------------------
/static/theme.js:
--------------------------------------------------------------------------------
  1 | function getCookieByArray(name){
  2 |  var cookies = document.cookie.split(';');
  3 |  var c;
  4 |  for(var i=0; i<cookies.length ; i++){
  5 |   c = cookies[i].split('=');
  6 |   if (c[0].replace(' ', '') == name) {
  7 |    return c[1];
  8 |   }
  9 |  }
 10 | }
 11 | /**
 12 |  * 设置文档主题
 13 |  */
 14 | var DEFAULT_PRIMARY = 'amber';
 15 | var DEFAULT_ACCENT = 'pink';
 16 | var DEFAULT_LAYOUT = 'dark';
 17 | 
 18 | // 设置 cookie
 19 | var setCookie = function (key, value) {
 20 |   // cookie 有效期为 1 年
 21 |   var date = new Date();
 22 |   date.setTime(date.getTime() + 365*24*3600*1000);
 23 |   document.cookie = key + '=' + value + '; expires=' + date.toGMTString() + '; path=/';
 24 | };
 25 | 
 26 | var setDocsTheme = function (theme) {
 27 |   if (typeof theme.primary === 'undefined') {
 28 |     theme.primary = false;
 29 |   }
 30 |   if (typeof theme.accent === 'undefined') {
 31 |     theme.accent = false;
 32 |   }
 33 |   if (typeof theme.layout === 'undefined') {
 34 |     theme.layout = false;
 35 |   }
 36 | 
 37 |   var i, len;
 38 |   var $body = $$('body');
 39 | 
 40 |   var classStr = $body.attr('class');
 41 |   var classs = classStr.split(' ');
 42 | 
 43 |   // 设置主色
 44 |   if (theme.primary !== false) {
 45 |     for (i = 0, len = classs.length; i < len; i++) {
 46 |       if (classs[i].indexOf('mdui-theme-primary-') === 0) {
 47 |         $body.removeClass(classs[i])
 48 |       }
 49 |     }
 50 |     $body.addClass('mdui-theme-primary-' + theme.primary);
 51 |     setCookie('docs-theme-primary', theme.primary);
 52 |     $$('input[name="doc-theme-primary"][value="' + theme.primary + '"]').prop('checked', true);
 53 |   }
 54 | 
 55 |   // 设置强调色
 56 |   if (theme.accent !== false) {
 57 |     for (i = 0, len = classs.length; i < len; i++) {
 58 |       if (classs[i].indexOf('mdui-theme-accent-') === 0) {
 59 |         $body.removeClass(classs[i]);
 60 |       }
 61 |     }
 62 |     $body.addClass('mdui-theme-accent-' + theme.accent);
 63 |     setCookie('docs-theme-accent', theme.accent);
 64 |     $$('input[name="doc-theme-accent"][value="' + theme.accent + '"]').prop('checked', true);
 65 |   }
 66 | 
 67 |   // 设置主题色
 68 |   if (theme.layout !== false) {
 69 |     for (i = 0, len = classs.length; i < len; i++) {
 70 |       if (classs[i].indexOf('mdui-theme-layout-') === 0) {
 71 |         $body.removeClass(classs[i]);
 72 |       }
 73 |     }
 74 |     if (theme.layout !== '') {
 75 |       $body.addClass('mdui-theme-layout-' + theme.layout);
 76 |     }
 77 |     setCookie('docs-theme-layout', theme.layout);
 78 |     $$('input[name="doc-theme-layout"][value="' + theme.layout + '"]').prop('checked', true);
 79 |   }
 80 | };
 81 | 
 82 | // 切换主色
 83 | $$(document).on('change', 'input[name="doc-theme-primary"]', function () {
 84 |   setDocsTheme({
 85 |     primary: $$(this).val()
 86 |   });
 87 | });
 88 | 
 89 | // 切换强调色
 90 | $$(document).on('change', 'input[name="doc-theme-accent"]', function () {
 91 |   setDocsTheme({
 92 |     accent: $$(this).val()
 93 |   });
 94 | });
 95 | 
 96 | // 切换主题色
 97 | $$(document).on('change', 'input[name="doc-theme-layout"]', function () {
 98 |   setDocsTheme({
 99 |     layout: $$(this).val()
100 |   });
101 | });
102 | 
103 | // 恢复默认主题
104 | $$(document).on('cancel.mdui.dialog', '#dialog-docs-theme', function () {
105 |   setDocsTheme({
106 |     primary: DEFAULT_PRIMARY,
107 |     accent: DEFAULT_ACCENT,
108 |     layout: DEFAULT_LAYOUT
109 |   });
110 | });
111 | 
112 | function getTheme() {
113 |   var theme = {};
114 |   theme.accent = getCookieByArray('docs-theme-accent');
115 |   theme.primary = getCookieByArray('docs-theme-primary');
116 |   theme.layout = getCookieByArray('docs-theme-layout');
117 |   setDocsTheme(theme);
118 | }
119 | 
120 | getTheme()


--------------------------------------------------------------------------------
/static/wenku8.js:
--------------------------------------------------------------------------------
  1 | wenku8_progress = $('#wenku8-progress');
  2 | 
  3 | function myIsNaN(value) {
  4 |     return !isNaN(value);
  5 | }
  6 | 
  7 | // const api_prefix = "http://wenku8.chiro.work:8000";
  8 | const api_prefix = "";
  9 | async function ajax(url) {
 10 |     return new Promise(function (resolve, reject) {
 11 |         var ajaxSetting = {
 12 |             url: api_prefix + url,
 13 |             success: function (response) {
 14 |                 resolve(response);
 15 |             },
 16 |             error: function () {
 17 |                 reject("请求失败");
 18 |             }
 19 |         }
 20 |         $.ajax(ajaxSetting);
 21 |     });
 22 | }
 23 | 
 24 | function showBoard() {
 25 |     // $.ajax({url:'https://cdn-1254016670.cos.ap-chengdu.myqcloud.com/board/board.json'}).then(d => {
 26 |     $.ajax({url: '/static/board.json'}).then(d => {
 27 |         console.log("news:", d);
 28 |         $('#wenku8-board').text(d.notice);
 29 |         $('#wenku8-instructions').html(d.instructions);
 30 |     });
 31 | }
 32 | 
 33 | function wenku8Fun1() {
 34 |     var text = $('#wenku8-fun1-text').val();
 35 |     if (text.startsWith('dmzj_')) {
 36 |         var bid = text.slice(5, text.length);
 37 |         if (!(myIsNaN(bid) && bid.length <= 5)) {
 38 |             // 不是id
 39 |             mdui.snackbar('输入错误！请输入ID号！');
 40 |             return false;
 41 |         }
 42 |         wenku8_progress.show();
 43 |         $.ajax({
 44 |             url: '/bookinfo_dmzj/' + bid
 45 |         }).then((d) => {
 46 |             wenku8_progress.hide();
 47 |             d = JSON.parse(d);
 48 |             $('#wenku8-book-card').fadeIn('slow');
 49 |             $('#wenku8-bookinfo-name').text(d.name);
 50 |             $('#wenku8-bookinfo-id').text('dmzj_' + d.id);
 51 |             $('#wenku8-bookinfo-author').text(d.authors);
 52 |             $('#wenku8-bookinfo-brief').text(d.introduction);
 53 |             $('#wenku8-bookinfo-time').text(d.update_time);
 54 |             $('#wenku8-bookinfo-copyright').text('√');
 55 |             $('#wenku8-bookinfo-cover').empty();
 56 |             $('#wenku8-bookinfo-cover').append($('<img src="' + d.cover + '" height=205px>'));
 57 |             $('#wenku8-bookinfo-cover').append($('<br>'));
 58 |             $('#wenku8-bookinfo-cover').append($('<a rel="noreferrer" target="_blank" href="' + d.cover + '">封面链接</a>'));
 59 |         })
 60 |         return;
 61 |     }
 62 |     if (!(myIsNaN(text) && text.length <= 5)) {
 63 |         // 不是id
 64 |         mdui.snackbar('输入错误！请输入ID号！');
 65 |         return false;
 66 |     }
 67 |     var bid = text;
 68 |     wenku8_progress.show();
 69 |     $.ajax({
 70 |         url: '/bookinfo/' + bid
 71 |     }).then((d) => {
 72 |         wenku8_progress.hide();
 73 | //        console.log(d);
 74 | //        console.log('ajax: bid:', bid, d);
 75 |         d = JSON.parse(d);
 76 |         $('#wenku8-book-card').fadeIn('slow');
 77 |         $('#wenku8-bookinfo-name').text(d.name);
 78 |         $('#wenku8-bookinfo-id').text(d.id);
 79 |         $('#wenku8-bookinfo-author').text(d.author);
 80 |         $('#wenku8-bookinfo-brief').text(d.brief);
 81 |         $('#wenku8-bookinfo-time').text(d.update_time);
 82 |         if (d.copyright == false) {
 83 |             $('#wenku8-bookinfo-copyright').text('无版权，可下载');
 84 |         } else {
 85 |             $('#wenku8-bookinfo-copyright').text('有版权');
 86 |         }
 87 | //        $('#wenku8-bookinfo-cover').attr('src', d.cover);
 88 |         $('#wenku8-bookinfo-cover').empty();
 89 |         $('#wenku8-bookinfo-cover').append($('<iframe scrolling="no" frameborder=0 src="' + d.cover + '">'));
 90 |         $('#wenku8-bookinfo-cover').append($('<br>'));
 91 |         $('#wenku8-bookinfo-cover').append($('<a rel="noreferrer" target="_blank" href="' + d.cover + '">封面链接</a>'));
 92 |         $('#wenku8-bookinfo-cover').append($('<a rel="noreferrer" target="_blank" href="' + 'http://dl.wenku8.com/down.php?type=txt&id=' + d.id + '&fname=' + d.name + '"> 下载TXT(GBK)</a>'));
 93 |         $('#wenku8-bookinfo-cover').append($('<a rel="noreferrer" target="_blank" href="' + 'http://dl.wenku8.com/down.php?type=utf8&id=' + d.id + '&fname=' + d.name + '"> 下载TXT(UTF8)</a>'));
 94 |     })
 95 | }
 96 | 
 97 | function wenku8Fun1_1(val=undefined) {
 98 |     if (val == undefined)
 99 |         val = $('#wenku8-fun1-text').val()
100 |     $('#wenku8-fun2-text').val(val);
101 |     wenku8Fun2();
102 | }
103 | function wenku8Fun1_4(val=undefined) {
104 |     if (val == undefined)
105 |         val = $('#wenku8-fun1-text').val()
106 |     $('#wenku8-fun2-text').val(val);
107 |     wenku8Fun6();
108 | }
109 | function wenku8Fun1_2(val=undefined) {
110 |     if (val == undefined)
111 |         val = $('#wenku8-fun1-text').val()
112 |     $('#wenku8-fun3-text').val(val);
113 |     console.log('wenku8Fun1_2:', val);
114 |     wenku8Fun3(val);
115 | }
116 | function wenku8Fun1_3(val=undefined) {
117 |     if (val == undefined)
118 |         val = $('#wenku8-fun1-text').val()
119 |     $('#wenku8-fun4-text').val(val);
120 |     wenku8Fun4();
121 | }
122 | 
123 | function wenku8Fun2() {
124 |     var text = $('#wenku8-fun2-text').val();
125 |     var target = '/v2/get/'
126 |     if (text.startsWith('dmzj_')) {
127 |         target = '/v2_dmzj/get/'
128 |         text = text.slice(5, text.length);
129 |     } else if (!(myIsNaN(text) && text.length <= 5)) {
130 |         // 不是id
131 |         target = '/v2/name/';
132 |     }
133 |     wenku8_progress.show();
134 |     $.ajax({url: target + text}).then(d => {
135 |         if (d.length <= 1) {
136 |             mdui.snackbar('没有这个小说');
137 |             wenku8_progress.hide();
138 |             return;
139 |         }
140 |         wenku8_progress.hide();
141 |         $(location).attr('href', d);
142 |     })
143 | }
144 | 
145 | function wenku8Fun3(bid=undefined) {
146 |     if (bid == undefined)
147 |         bid = $('#wenku8-fun3-text').val();
148 |     console.log('wenku8Fun3()', bid);
149 |     if ((!(myIsNaN(bid) && bid.length <= 5)) && !bid.startsWith('dmzj_')) {
150 |         // 不是id
151 |         mdui.snackbar('ID号输入错误')
152 |     }
153 |     remoteDownload(bid);
154 | }
155 | 
156 | downloading = false;
157 | refreshLock = false;
158 | async function refreshDownloadLogs(bid) {
159 |     console.log('refreshDownloadLogs(bid)', bid);
160 |     $('#wenku8-progress').show();
161 |     
162 |     try {
163 |         var messages = await ajax('/v2/cache_logs/' + bid);
164 |     } catch(e) {
165 |         mdui.snackbar(e);
166 |     }
167 | //    console.log(messages);
168 | //    messages.replace(new RegExp("\n","g"), '<br>');
169 |     var messages = messages.split('\n').reverse();
170 |     var text = '';
171 |     for (m of messages) {
172 |         text = text + m + '\n';
173 |     }
174 |     $('#wenku8-fun3-logs').val(text);
175 |     
176 |     console.log('refresh:', 'update')
177 |     if (downloading) {
178 |         setTimeout(function() {
179 |             refreshDownloadLogs(bid);
180 |         }, 3000);
181 |     }
182 |     try {
183 |         var status = await ajax('/v2/cache_status/' + bid);
184 |     } catch(e) {
185 |         mdui.snackbar(e);
186 |         return;
187 |     }
188 |     if (status == 1) {
189 |         downloading = false;
190 |         $('#wenku8-progress').hide();
191 |         return;
192 |     }
193 |     if (status != 0) {
194 |         $('#wenku8-fun3-url').attr('href', status)
195 |         $('#wenku8-fun3-url').fadeIn('slow');
196 |         $('#wenku8-progress').hide();
197 |         mdui.snackbar("下载已经完成，5秒后开始下载");
198 |         downloading = false;
199 |         setTimeout(function() {
200 |             $(location).attr('href', status);
201 |         }, 5000);
202 |         return;
203 |     }
204 | }
205 | 
206 | always_download = false;
207 | 
208 | async function remoteDownload(bid, img=false) {
209 |     console.log('remoteDownload', bid)
210 |     if (downloading == true) {
211 |         mdui.snackbar("下载已经开始");
212 |         return;
213 |     }
214 |     
215 |     if (!always_download) {
216 |         var should = await ajax('/v2/check/' + bid);
217 |         console.log('should:', should);
218 |         if (should == 0) {
219 |             // 不需要，提示
220 |             mdui.confirm("该小说在网盘缓存已经为最新版本，是否仍然开始离线缓存？选择取消则开始下载离线缓存内容，确定则开始离线缓存。", function() {
221 |                 always_download = true;
222 |                 remoteDownload(bid, img);
223 |             }, function() {
224 |                 wenku8Fun1_1(bid);
225 |             });
226 |             return;
227 |         }
228 |     }
229 |     
230 |     var will_request = true;
231 |     
232 |     // 先请求一下状态
233 |     var status = await ajax('/v2/cache_status/' + bid);
234 |     if (status == 0) {
235 |         // 已经开始了下载
236 |         mdui.snackbar("下载已经开始");
237 |         // 那么就不再请求
238 |         will_request = false;
239 |     }
240 |     
241 |     // 显示进度
242 |     $("#wenku8-fun3-logs-outline").show();
243 |     // 转到锚点
244 |     $('body').animate({scrollTop:$("#wenku8-fun3-logs-outline").offset().top},1000);
245 |     
246 |     if (will_request) {
247 |         if (bid.startsWith('dmzj_')) {
248 |             target = '/v2_dmzj/cache/';
249 |             if (img == true) {
250 |                 target = '/v2_dmzj/cache_img/';
251 |             }
252 |             var starting = await ajax(target + bid.slice(5, bid.length));
253 |         } else {
254 |             target = '/v2/cache/';
255 |             if (img == true) {
256 |                 target = '/v2/cache_img/';
257 |             }
258 |             var starting = await ajax(target + bid);
259 |         }
260 |         
261 |         console.log('starting', starting)
262 |         if (starting != 0) {
263 |             if (starting == 1)
264 |                 mdui.snackbar("下载已经开始");
265 |             return;
266 |         }
267 |     }
268 |     downloading = true;
269 |     mdui.snackbar("开始下载");
270 |     refreshDownloadLogs(bid);
271 | }
272 | 
273 | function wenku8Fun4() {
274 |     var bid = $('#wenku8-fun4-text').val();
275 |     if ((!(myIsNaN(bid) && bid.length <= 5)) && !bid.startsWith('dmzj_')) {
276 |         // 不是id
277 |         mdui.snackbar('ID号输入错误')
278 |     }
279 |     remoteDownload(bid, true);
280 | }
281 | 
282 | async function search(key) {
283 |     wenku8_progress.show();
284 |     var results = await ajax('/v2/search/' + key);
285 |     results = JSON.parse(results);
286 |     $('#wenku8-search').empty();
287 |     for (book of results) {
288 |         console.log(book);
289 |         var tmp = $('#wenku8-book-card-tmp').clone(true);
290 |         tmp.show();
291 |         tmp.addClass('wenku8-search-' + book.bid);
292 |         
293 |         $('.wenku8-search-title', tmp).text(book.title);
294 |         $('.wenku8-search-id', tmp).text(book.bid);
295 |         $('.wenku8-search-status', tmp).text(book.status);
296 |         $('.wenku8-search-brief', tmp).text(book.brief);
297 |         
298 |         $('.wenku8-search-cover', tmp).empty();
299 |         $('.wenku8-search-cover', tmp).append($('<iframe scrolling="no" frameborder=0 src="' + book.cover + '">'));
300 |         $('.wenku8-search-cover', tmp).append($('<br>'));
301 |         $('.wenku8-search-cover', tmp).append($('<a rel="noreferrer" target="_blank" href="' + book.cover + '">封面链接</a>'));
302 |         $('.wenku8-search-cover', tmp).append($('<a rel="noreferrer" target="_blank" href="' + 'http://dl.wenku8.com/down.php?type=txt&id=' + book.bid + '&fname=' + book.title + '"> 下载TXT(GBK)</a>'));
303 |         $('.wenku8-search-cover', tmp).append($('<a rel="noreferrer" target="_blank" href="' + 'http://dl.wenku8.com/down.php?type=utf8&id=' + book.bid + '&fname=' + book.title + '"> 下载TXT(UTF8)</a>'));
304 |         
305 |         $('.wenku8-btn-1', tmp).attr('onclick', 'wenku8Fun1_4(' + book.bid + ')');
306 |         $('.wenku8-btn-2', tmp).attr('onclick', 'wenku8Fun1_2(' + book.bid + ')');
307 |         $('.wenku8-btn-3', tmp).attr('onclick', 'wenku8Fun1_3(' + book.bid + ')');
308 |         
309 |         $('#wenku8-search').append(tmp);
310 |         $('#wenku8-search').append($('<br>'));
311 |     }
312 |     var results2 = await ajax('/v2_dmzj/search/' + key);
313 |     results2 = JSON.parse(results2);
314 |     wenku8_progress.hide();
315 |     if (results.length == 0 && results2.length == 0) {
316 |         $('#wenku8-search').append($('<p>抱歉，没有搜索到相关内容。</p>'));
317 |     }
318 |     for (book of results2) {
319 |         book['id'] = 'dmzj_' + book['id']
320 |         console.log(book);
321 |         var tmp = $('#wenku8-book-card-tmp').clone(true);
322 |         tmp.show();
323 |         tmp.addClass('wenku8-search-' + book.id);
324 |         
325 |         $('.wenku8-search-title', tmp).text(book.name);
326 |         $('.wenku8-search-id', tmp).text(book.id);
327 |         $('.wenku8-search-status', tmp).text(book.status + ' 作者:' + book.authors);
328 |         $('.wenku8-search-brief', tmp).text(book.introduction);
329 |         
330 |         $('.wenku8-search-cover', tmp).empty();
331 |         $('.wenku8-search-cover', tmp).append($('<img src="' + book.cover + '" height=205px>'));
332 |         $('.wenku8-search-cover', tmp).append($('<br>'));
333 |         $('.wenku8-search-cover', tmp).append($('<a rel="noreferrer" target="_blank" href="' + book.cover + '">封面链接</a>'));
334 |         
335 |         $('.wenku8-btn-1', tmp).attr('onclick', 'wenku8Fun1_4("' + book.id + '")');
336 |         $('.wenku8-btn-2', tmp).attr('onclick', 'wenku8Fun1_2("' + book.id + '")');
337 |         $('.wenku8-btn-3', tmp).attr('onclick', 'wenku8Fun1_3("' + book.id + '")');
338 |         
339 |         $('#wenku8-search').append(tmp);
340 |         $('#wenku8-search').append($('<br>'));
341 |     }
342 | }
343 | 
344 | function wenku8Fun5() {
345 |     var text = $('#wenku8-fun5-text').val();
346 |     search(text);
347 | }
348 | 
349 | function wenku8Feedback() {
350 |     var user = $('#wenku8-feedback-user').val();
351 |     var email = $('#wenku8-feedback-email').val();
352 |     var password = $('#wenku8-feedback-password').val();
353 |     var message = $('#wenku8-feedback-message').val();
354 |     if (user.length == 0 || user == undefined) {
355 |         mdui.snackbar("请至少输入您的名字");
356 |         return;
357 |     }
358 |     if (message.length == 0 || message == undefined) {
359 |         mdui.snackbar("请输入消息内容");
360 |         return;
361 |     }
362 |     wenku8_progress.show();
363 |     $.post('/v2/feedback', {user:user, message:message, email:email, password:password}).then(d => {
364 |         wenku8_progress.hide();
365 |         if (d == '')
366 |             mdui.snackbar("感谢您的反馈");
367 |         else
368 |             mdui.snackbar(d);
369 |         commentLoad();
370 |     });
371 | }
372 | 
373 | function wenku8Fun6() {
374 |     var bid = $('#wenku8-fun2-text').val();
375 |     ajax('/v2/check/' + bid).then(should => {
376 |         if (should == 0) {
377 |             wenku8Fun2();
378 |         } else {
379 |             always_download = true;
380 |             console.log('wenku8Fun1_2(bid)', bid);
381 |             wenku8Fun1_2(bid);
382 |         }
383 |     });
384 | }
385 | 
386 | function wenku8ShowAdmin() {
387 |     $('.wenku8-feedback-admin').fadeIn('slow');
388 | }
389 | 
390 | function commentLoad() {
391 |     ajax('/v2/comments').then(d => {
392 |         data = JSON.parse(d);
393 |         data = data.reverse()
394 | //        console.log('comments:', data);
395 |         $('.wenku8-chat-spinner').fadeOut('fast');
396 |         var outter = $('#wenku8-chat-box-outline');
397 |         var box = $('.wenku8-chat-box', outter);
398 |         var chat = $('#wenku8-chat');
399 |         $(chat).empty();
400 |         $(chat).append(outter);
401 |         if (data.length == 0) {
402 |             $(chat).append($('<p>暂时没有评论。</p>'))
403 |             return;
404 |         }
405 |         for (c of data) {
406 |             var tmp = $(box).clone(true);
407 |             $('.wenku8-chat-head', tmp).attr('src', c.head);
408 |             $('.wenku8-chat-user', tmp).text(c.username);
409 |             $('.wenku8-chat-message', tmp).text(c.message);
410 |             $(chat).append(tmp);
411 |         }
412 |     })
413 | }
414 | 
415 | function chatLoad(c) {
416 |     var chat = $('#wenku8-chat2');
417 |     if (c['user'] == 'me') {
418 |         var tmp = $($('#wenku8-chat2-me-box-outline')).clone(true);
419 |         $(tmp).fadeIn('slow');
420 |         $('.wenku8-chat2-me-message', tmp).text(c.message);
421 |         $(chat).append(tmp);
422 |     } else {
423 |         var tmp = $($('#wenku8-chat2-xb-box-outline')).clone(true);
424 |         $(tmp).fadeIn('slow');
425 |         $('.wenku8-chat2-xb-message', tmp).text(c.message);
426 |         $(chat).append(tmp);
427 |     }
428 |     var div = document.getElementById('wenku8-chat2');
429 |     div.scrollTop = div.scrollHeight;
430 | }
431 | 
432 | function foo(d) {
433 |     console.log(d);
434 | //    debugger;
435 |     var message = '';
436 |     if (d.code == 0) {
437 |         message = d.data;
438 |     } else {
439 |         message = d.other;
440 |     }
441 |     var c = {
442 |         'user': 'XiaoIce',
443 |         'message': message
444 |     };
445 |     chatLoad(c);
446 | }
447 | 
448 | function wenku8Chat() {
449 |     var message = $('#wenku8-chat-me-message').val();
450 |     $('#wenku8-chat-me-message').val('');
451 |     if (message.length == 0 || message == undefined) {
452 |         mdui.snackbar("请输入消息内容");
453 |         return;
454 |     }
455 |     var c = {
456 |         'user': 'me',
457 |         'message': message
458 |     };
459 |     chatLoad(c);
460 |     wenku8_progress.show();
461 |     $.ajax({
462 |         url: '/chat/' + message + '?callback=foo',
463 |         dataType :'JSONP',
464 |         jsonp: "foo",
465 |         jsonpCallback:"foo",
466 |         contentType: "application/json;charset=utf-8",
467 |         success: function (d) {
468 |             wenku8_progress.hide();
469 |         }
470 |     })
471 | //    $.post('/v2/chat', {user:user, message:message, email:email, password:password}).then(d => {
472 | //        if (d == '')
473 | //            mdui.snackbar("感谢您的反馈");
474 | //        else
475 | //            mdui.snackbar(d);
476 | //        commentLoad();
477 | //    });
478 | }


--------------------------------------------------------------------------------
/templates/forms.html:
--------------------------------------------------------------------------------
 1 | <form action="/search" method="get">
 2 |     <input type="hidden" name="method" value="name">
 3 |     <input class="button" type="submit" value="按名字搜书"><input name="search_key" type="text"><br>
 4 | </form>
 5 | <form action="/search" method="get">
 6 |     <input type="hidden" name="method" value="id">
 7 |     <input class="button" type="submit" value="按ID找书"><input name="search_key" type="text"><br>
 8 | </form>
 9 | <form action="/search" method="get">
10 |     <input type="hidden" name="method" value="cache">
11 |     <input class="button" type="submit" value="按ID缓存"><input name="search_key" type="text"><br>
12 | </form>
13 | <form action="/search" method="get">
14 |     <input type="hidden" name="method" value="cache_img">
15 |     <input class="button" type="submit" value="按ID缓存(包含图片)"><input name="search_key" type="text"><br>
16 | </form>


--------------------------------------------------------------------------------
/wenku8toepub.bkp.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import bs4
  3 | from bs4 import BeautifulSoup as Soup
  4 | from ebooklib import epub
  5 | import os
  6 | import sys
  7 | import getopt
  8 | from base_logger import getLogger
  9 | import threading
 10 | import io
 11 | import copy
 12 | import re
 13 | 
 14 | 
 15 | class MLogger:
 16 |     def __init__(self):
 17 |         self.data = io.StringIO()
 18 | 
 19 |     def write(self, content: str):
 20 |         self.data.write(content + '\n')
 21 |         print(content)
 22 | 
 23 |     def read_all(self):
 24 |         lock.acquire()
 25 |         data2 = copy.deepcopy(self.data)
 26 |         data2.seek(0)
 27 |         d = data2.read()
 28 |         lock.release()
 29 |         return d
 30 | 
 31 |     def info(self, message):
 32 |         self.write(message)
 33 | 
 34 |     def error(self, message):
 35 |         self.write(message)
 36 | 
 37 |     def warning(self, message):
 38 |         self.write(message)
 39 | 
 40 |     def warn(self, message):
 41 |         self.write(message)
 42 | 
 43 |     def critical(self, message):
 44 |         self.write(message)
 45 | 
 46 |     def debug(self, message):
 47 |         self.write(message)
 48 | 
 49 | 
 50 | class Wenku8ToEpub:
 51 |     def __init__(self):
 52 |         # api格式
 53 |         # 参数1：id千位开头
 54 |         # 参数2：id
 55 |         self.api = "https://www.wenku8.net/novel/%s/%d/"
 56 |         self.api_info = "https://www.wenku8.net/book/%d.htm"
 57 |         self.api_img = "http://img.wkcdn.com/image/%s/%d/%ds.jpg"
 58 |         self.img_splits = ['http://pic.wenku8.com/pictures/',
 59 |                            'http://pic.wkcdn.com/pictures/',
 60 |                            'http://picture.wenku8.com/pictures/']
 61 |         self.api_login = 'http://www.wenku8.net/login.php?do=submit"'
 62 |         self.api_serach1 = 'http://www.wenku8.net/modules/article/search.php?searchtype=articlename&searchkey=%s'
 63 |         self.api_serach2 = 'http://www.wenku8.net/modules/article/search.php?searchtype=author&searchkey=%s'
 64 |         self.api_txt = 'http://dl.wenku8.com/down.php?type=txt&id=%d'
 65 |         self.cookies = ''
 66 |         self.cookie_jar = None
 67 |         self.book = epub.EpubBook()
 68 |         self.thread_img_pool = []
 69 |         self.thread_pool = []
 70 |         # 用于章节排序的文件名
 71 |         self.sumi = 0
 72 |         # 目录管理
 73 |         self.toc = []
 74 |         # 主线
 75 |         self.spine = ['cover', 'nav']
 76 |         # 当前章节
 77 |         self.chapters = []
 78 |         self.book_id = 0
 79 |         self.logger = logger
 80 |         self.image_size = None
 81 |         self.image_count = 0
 82 | 
 83 |     # 登录，能够使用搜索功能。
 84 |     def login(self, username='lanceliang', password='1352040930lxr'):
 85 |         payload = {'action': 'login',
 86 |                    'jumpurl': '',
 87 |                    'username': username,
 88 |                    'password': password}
 89 |         headers = {
 90 |             'Content-Type': 'application/x-www-form-urlencoded'
 91 |         }
 92 |         response = requests.request("POST", self.api_login, headers=headers, data=payload)
 93 |         html = response.content.decode('gbk')
 94 |         if '登录成功' not in html:
 95 |             self.logger.error("登录失败")
 96 |             return
 97 |         cookie_value = ''
 98 |         for key, value in response.cookies.items():
 99 |             cookie_value += key + '=' + value + ';'
100 |         self.cookies = cookie_value
101 |         self.cookie_jar = response.cookies
102 | 
103 |     # 搜索，应该先登录
104 |     def search(self, key: str):
105 |         books = self.search_one(self.api_serach1, key)
106 |         books.extend(self.search_one(self.api_serach2, key))
107 |         return books
108 | 
109 |     def search_one(self, selected_api: str, key: str):
110 |         self.login()
111 |         if len(self.cookies) == 0 or self.cookie_jar is None:
112 |             # 还没有登录
113 |             self.logger.error("请先登录再使用搜索功能")
114 |             return []
115 |         headers = {
116 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0',
117 |             'Content-Type': 'multipart/form-data; boundary=--------------------------607040101744888865545920',
118 |             'Cookie': self.cookies
119 |         }
120 |         # 注意编码问题
121 |         # 云 -> %D4%C6
122 |         encodings = key.encode('gbk').hex().upper()
123 |         key_arg = ''
124 |         for i in range(0, len(encodings), 2):
125 |             key_arg = key_arg + '%' + encodings[i] + encodings[i+1]
126 |         response = requests.request("GET", selected_api % key_arg, headers=headers, cookies=self.cookie_jar)
127 |         html = response.content.decode("gbk", errors='ignore')
128 |         soup = Soup(html, 'html.parser')
129 | 
130 |         if '推一下' in html:
131 |             # 直接进入了单本状态
132 |             # print(soup)
133 |             # print(title, bid, cover, status, brief)
134 |             title = soup.find_all('b')[1].get_text()
135 |             bid = ''
136 |             for n in re.findall('\d', response.url)[1:]:
137 |                 bid = bid + n
138 |             bid = int(bid)
139 |             try:
140 |                 cover = soup.find_all('img')[1].get_attribute_list('src')[0]
141 |             except IndexError:
142 |                 cover = None
143 |             try:
144 |                 status = soup.find_all('table')[0].find_all('tr')[2].get_text().replace('\n', ' ')
145 |             except IndexError:
146 |                 status = None
147 |             try:
148 |                 brief = soup.find_all('table')[2].find_all('td')[1].find_all('span')[4].get_text()
149 |             except IndexError:
150 |                 spans = soup.find_all('span')
151 |                 for i in range(len(spans)):
152 |                     if '内容简介' in spans[i].get_text():
153 |                         brief = spans[i+1].get_text()
154 |             book = {
155 |                 'title': title, 'bid': bid, 'cover': cover, 'status': status, 'brief': brief
156 |             }
157 |             return [book, ]
158 | 
159 |         '''
160 |         # 暂时只搜索一页内容
161 |         links = soup.find_all('a')
162 |         books = []
163 |         for a in links:
164 |             if a.has_attr('href') and len(a.get_attribute_list('href')) != 0:
165 |                 href = a.get_attribute_list('href')[0]
166 |                 if '//www.wenku8.net/book/' in href and href not in books:
167 |                     books.append(href)
168 |         # print(books)
169 |         bids = []
170 |         for book in books:
171 |             numbers = re.findall('\d', book)[1:]
172 |             bid = ''
173 |             for n in numbers:
174 |                 bid = bid + n
175 |             bids.append(int(bid))
176 |         print(bids)
177 |         '''
178 |         td = soup.find('td')
179 |         books = []
180 |         for content in td.children:
181 |             if not isinstance(content, bs4.element.Tag):
182 |                 continue
183 |             # print(content)
184 |             # print('#' * 64)
185 |             title = content.find_all('a')[1].get_text()
186 |             url = content.find_all('a')[1].get_attribute_list('href')[0]
187 |             numbers = re.findall('\d', url)[1:]
188 |             bid = ''
189 |             for n in numbers:
190 |                 bid = bid + n
191 |             bid = int(bid)
192 |             cover = content.find_all('img')[0].get_attribute_list('src')[0]
193 |             status = content.find_all('p')[0].get_text()
194 |             brief = content.find_all('p')[1].get_text()[3:]
195 |             # print(title, bid, cover, status, brief)
196 |             book = {
197 |                 'title': title, 'bid': bid, 'cover': cover, 'status': status, 'brief': brief
198 |             }
199 |             books.append(book)
200 | 
201 |         return books
202 | 
203 |     # 获取书籍信息。
204 |     # {
205 |     #   id, name, author, brief, cover, copyright
206 |     # }
207 |     def bookinfo(self, book_id: int):
208 |         url_cat = "%s%s" % (self.api % (("%04d" % book_id)[0], book_id), "index.htm")
209 |         soup_cat = Soup(requests.get(url_cat).content, 'html.parser')
210 |         table = soup_cat.select('table')
211 |         if len(table) == 0:
212 |             self.logger.error("遇到错误")
213 |             return None
214 |         table = table[0]
215 | 
216 |         if len(soup_cat.select("#title")) == 0:
217 |             self.logger.error('该小说不存在！id = ' + str(book_id))
218 |             return None
219 |         title = soup_cat.select("#title")[0].get_text()
220 |         author = soup_cat.select("#info")[0].get_text().split('作者：')[-1]
221 |         url_cover = self.api_img % (("%04d" % book_id)[0], book_id, book_id)
222 |         # print(title, author, url_cover)
223 | 
224 |         brief = ''
225 |         url_cat2 = self.api_info % (book_id)
226 |         soup_cat2 = Soup(requests.get(url_cat2).content, 'html.parser')
227 |         update = ''
228 |         for td in soup_cat2.find_all('td'):
229 |             if '最后更新' in td.get_text():
230 |                 update = td.get_text()[5:]
231 |         iscopyright = True
232 |         if '因版权问题，文库不再提供' in soup_cat2.get_text():
233 |             iscopyright = False
234 |         spans = soup_cat2.select('span')
235 |         for i in range(len(spans)):
236 |             span = spans[i]
237 |             if '内容简介' in span.get_text():
238 |                 brief = spans[i + 1].get_text()
239 |         return {
240 |             "id": book_id,
241 |             "name": title,
242 |             "author": author,
243 |             "brief": brief,
244 |             "cover": url_cover,
245 |             'copyright': iscopyright,
246 |             'update': update
247 |         }
248 | 
249 |     # 获取版权状态
250 |     def copyright(self, book_id=None):
251 |         if book_id is None:
252 |             book_id = self.book_id
253 |         data = requests.get(self.api_info % book_id).content
254 |         soup = Soup(data, 'html.parser')
255 |         if '因版权问题，文库不再提供该' in soup.get_text():
256 |             return False
257 |         return True
258 | 
259 |     def id2name(self, book_id: int):
260 |         url_cat = "%s%s" % (self.api % (("%04d" % book_id)[0], book_id), "index.htm")
261 |         soup_cat = Soup(requests.get(url_cat).content, 'html.parser')
262 |         table = soup_cat.select('table')
263 |         if len(table) == 0:
264 |             self.logger.error("遇到错误")
265 |             return ''
266 |         table = table[0]
267 | 
268 |         if len(soup_cat.select("#title")) == 0:
269 |             self.logger.error('该小说不存在！id = ' + str(book_id))
270 |             return ''
271 |         title = soup_cat.select("#title")[0].get_text()
272 |         # author = soup_cat.select("#info")[0].get_text().split('作者：')[-1]
273 |         # url_cover = self.api_img % (("%04d" % self.book_id)[0], self.book_id, self.book_id)
274 |         return title
275 | 
276 |     def get_page(self, url_page: str, title: str = ''):
277 |         data = requests.get(url_page).content
278 |         soup = Soup(data, 'html.parser')
279 |         content = soup.select('#content')[0]
280 |         # 去除ul属性
281 |         [s.extract() for s in content("ul")]
282 |         return ("<h1>%s</h1>%s" % (title, content.prettify())).encode()
283 | 
284 |     def fetch_img(self, url_img):
285 |         if self.image_size is not None and self.image_size < self.image_count:
286 |             self.logger.warn('达到最大图像总计大小，取消图像下载')
287 |             return
288 |         self.logger.info('->Fetching image: ' + url_img + '...')
289 |         data_img = requests.get(url_img).content
290 |         lock.acquire()
291 |         self.image_count = self.image_count + len(data_img)
292 |         lock.release()
293 |         filename = url_img
294 |         for sp in self.img_splits:
295 |             filename = url_img.split(sp)[-1]
296 |         filetype = url_img.split('.')[-1]
297 |         # print('done. filename:', filename, "filetype", filetype)
298 |         img = epub.EpubItem(file_name="images/%s" % filename,
299 |                             media_type="image/%s" % filetype, content=data_img)
300 |         lock.acquire()
301 |         self.book.add_item(img)
302 |         lock.release()
303 |         self.logger.info('<-Done image: ' + url_img)
304 | 
305 |     def fetch_chapter(self, a, order: int, fetch_image: bool):
306 |         if a.get_text() == '插图':
307 |             self.logger.info('Images: ' + a.get_text())
308 |         else:
309 |             self.logger.info('chapter: ' + a.get_text())
310 | 
311 |         title_page = a.get_text()
312 | 
313 |         url_page = "%s%s" % (self.api % (("%04d" % self.book_id)[0], self.book_id), a.get('href'))
314 | 
315 |         data_page = self.get_page(url_page, title=title_page)
316 |         page = epub.EpubHtml(title=title_page, file_name='%s.xhtml' % self.sumi)
317 |         # 多线程模式下文件名会不按照顺序...
318 |         self.sumi = self.sumi + 1
319 | 
320 |         if fetch_image is True:
321 |             soup_tmp = Soup(data_page, 'html.parser')
322 |             imgcontent = soup_tmp.select(".imagecontent")
323 |             self.thread_img_pool = []
324 |             for img in imgcontent:
325 |                 url_img = img.get("src")
326 |                 # 排除其他站点的图片，防止访问超时
327 |                 origin = False
328 |                 for wenku8_img in self.img_splits:
329 |                     if wenku8_img in url_img:
330 |                         origin = True
331 |                 if not origin:
332 |                     continue
333 |                 th = threading.Thread(target=self.fetch_img, args=(url_img,))
334 |                 self.thread_img_pool.append(th)
335 |                 th.setDaemon(True)
336 |                 th.start()
337 | 
338 |             for it in self.thread_img_pool:
339 |                 it.join()
340 | 
341 |             # 在应该下载图片的时候进行替换
342 |             if self.image_size is None \
343 |                     or (self.image_size is not None and self.image_size > self.image_count):
344 |                 for url in self.img_splits:
345 |                     data_page = (data_page.decode().replace(url, 'images/')).encode()
346 | 
347 |         page.set_content(data_page)
348 |         lock.acquire()
349 |         self.book.add_item(page)
350 |         lock.release()
351 | 
352 |         # self.toc[-1][1].append(page)
353 |         # self.spine.append(page)
354 |         self.chapters[order] = page
355 | 
356 |     # 紧急使用。
357 |     def txt2epub(self, bid, txt: str=None):
358 |         url_cat = "%s%s" % (self.api % (("%04d" % bid)[0], bid), "index.htm")
359 |         soup_cat = Soup(requests.get(url_cat).content, 'html.parser')
360 |         table = soup_cat.select('table')
361 |         if len(table) == 0:
362 |             self.logger.error("遇到错误")
363 |             return False
364 |         table = table[0]
365 | 
366 |         if len(soup_cat.select("#title")) == 0:
367 |             self.logger.error('该小说不存在！id = ' + str(bid))
368 |             return
369 |         title = soup_cat.select("#title")[0].get_text()
370 |         author = soup_cat.select("#info")[0].get_text().split('作者：')[-1]
371 |         url_cover = self.api_img % (("%04d" % bid)[0], bid, bid)
372 |         data_cover = requests.get(url_cover).content
373 |         # print(title, author, url_cover)
374 |         self.logger.info('#' * 15 + '开始下载' + '#' * 15)
375 |         self.logger.info('标题: ' + title + " 作者: " + author)
376 | 
377 |         response = requests.get(self.api_txt % bid, stream=True)
378 |         chunk_size = 1024 * 100  # 单次请求最大值
379 |         # print(response.headers)
380 |         content_size = 0  # 内容体总大小
381 |         self.logger.info('该书没有版权，开始下载TXT文件转化为EPUB')
382 |         data_download = io.BytesIO()
383 |         for data in response.iter_content(chunk_size=chunk_size):
384 |             data_download.write(data)
385 |             content_size = int(content_size + len(data))
386 |             self.logger.info('已经下载 %s KB' % (content_size // 1024))
387 |         txt = data_download.getvalue().decode('gbk', errors='ignore')
388 |         self.logger.info('TXT下载完成')
389 | 
390 |         book = epub.EpubBook()
391 |         book.set_identifier("%s, %s" % (title, author))
392 |         book.set_title(title)
393 |         book.add_author(author)
394 |         book.set_cover('cover.jpg', data_cover)
395 | 
396 |         toc = []
397 |         spine = []
398 | 
399 |         content = ''
400 |         for li in txt.splitlines():
401 |             content = content + '<p>%s</p>\n' % li
402 | 
403 |         page = epub.EpubHtml(title=title, file_name='all.html')
404 |         page.set_content(content.encode())
405 |         toc.append(page)
406 |         spine.append(page)
407 |         book.add_item(page)
408 | 
409 |         book.toc = toc
410 | 
411 |         # add navigation files
412 |         book.add_item(epub.EpubNcx())
413 |         book.add_item(epub.EpubNav())
414 | 
415 |         # create spine
416 |         book.spine = spine
417 |         stream = io.BytesIO()
418 |         epub.write_epub(stream, book)
419 |         return stream.getvalue()
420 | 
421 |     def get_book_no_copyright(self, targets,
422 |                               bin_mode: bool = False,
423 |                               savepath: str = '',
424 |                               author: str = 'undefind'):
425 |         # txt = requests.get(self.api_txt % self.book_id).content.decode('gbk', errors='ignore')
426 |         response = requests.get(self.api_txt % self.book_id, stream=True)
427 |         chunk_size = 1024 * 100  # 单次请求最大值
428 |         # print(response.headers)
429 |         content_size = 0  # 内容体总大小
430 |         self.logger.info('该书没有版权，开始下载TXT文件转化为EPUB')
431 |         data_download = io.BytesIO()
432 |         for data in response.iter_content(chunk_size=chunk_size):
433 |             data_download.write(data)
434 |             content_size = int(content_size + len(data))
435 |             self.logger.info('已经下载 %s KB' % (content_size // 1024))
436 |         # with open('%s.txt' % self.book_id, 'w', encoding='gbk') as f:
437 |         #     f.write(txt)
438 |         # with open('%s.txt' % self.book_id, 'r', encoding='gbk') as f:
439 |         #     txt = f.read()
440 |         data_download.seek(0)
441 |         txt = data_download.read().decode('gbk', errors='ignore')
442 |         self.logger.info('TXT下载完成')
443 |         title = re.findall('<.+>', txt[:81])[0][1:-1]
444 |         txt = txt[40 + len(title):-76]
445 |         # print(txt)
446 |         # print(title)
447 | 
448 |         volumes = []
449 |         chapters = []
450 |         for tar in targets:
451 |             if tar.get_attribute_list('class')[0] == 'vcss':
452 |                 volumes.append(tar.get_text())
453 |                 chapters.append({
454 |                     'volume': tar.get_text(),
455 |                     'chapters': []
456 |                 })
457 |                 continue
458 |             if tar.get_attribute_list('class')[0] == 'ccss' \
459 |                 and tar.get_text().encode() != b'\xc2\xa0':
460 |                 chapters[-1]['chapters'].append(tar.get_text())
461 |                 continue
462 | 
463 |         last_end = 0
464 |         length = len(txt)
465 |         # for v in chapters:
466 |         for i in range(len(chapters)):
467 |             v = chapters[i]
468 |             txts = []
469 |             volume_text = v['volume']
470 |             self.logger.info('volume: ' + volume_text)
471 |             for c in v['chapters']:
472 |                 anchor = "%s %s" % (volume_text, c)
473 |                 next_end = txt.find(anchor, last_end, length)
474 |                 # print('next_end', next_end)
475 |                 if next_end <= 6:
476 |                     continue
477 |                 txt_slice = txt[last_end: next_end]
478 |                 last_end = next_end
479 |                 txt2 = ''
480 |                 for line in txt_slice.splitlines():
481 |                     txt2 = txt2 + '<p>%s</p>' % line
482 |                 txt_slice = txt2
483 |                 txts.append(txt_slice)
484 |             if i + 1 == len(chapters):
485 |                 txts.append(txt[last_end:])
486 |             else:
487 |                 point = txt.find(chapters[i+1]['volume'], last_end, length)
488 |                 # print('point', point)
489 |                 txts.append(txt[last_end:point])
490 |                 last_end = point-1
491 | 
492 |             if len(txts) != len(v['chapters']):
493 |                 # print('err')
494 |                 # 虽然不知道为啥，这么写就对了
495 |                 txts = txts[1:]
496 | 
497 |             # 先增加卷
498 |             self.toc.append((epub.Section(volume_text), []))
499 |             volume = epub.EpubHtml(title=volume_text, file_name='%s.html' % self.sumi)
500 |             self.sumi = self.sumi + 1
501 |             volume.set_content(("<h1>%s</h1><br>" % volume_text).encode())
502 |             self.book.add_item(volume)
503 | 
504 |             # 增加章节
505 |             for i in range(len(v['chapters'])):
506 |                 chapter_title = v['chapters'][i]
507 |                 self.logger.info('chapter: ' + chapter_title)
508 |                 page = epub.EpubHtml(title=chapter_title, file_name='%s.xhtml' % self.sumi)
509 |                 self.sumi = self.sumi + 1
510 |                 page.set_content('\n' + txts[i])
511 |                 lock.acquire()
512 |                 self.book.add_item(page)
513 |                 lock.release()
514 |                 self.toc[-1][1].append(page)
515 |                 self.spine.append(page)
516 | 
517 |         # print('de')
518 |         # exit()
519 | 
520 |         self.book.toc = self.toc
521 | 
522 |         # add navigation files
523 |         self.book.add_item(epub.EpubNcx())
524 |         self.book.add_item(epub.EpubNav())
525 | 
526 |         # create spine
527 |         self.book.spine = self.spine
528 |         if bin_mode is True:
529 |             stream = io.BytesIO()
530 |             epub.write_epub(stream, self.book)
531 |             stream.seek(0)
532 |             return stream.read()
533 |         else:
534 |             # epub.write_epub(os.path.join(savepath, '%s - %s.epub' % (title, author)), self.book)
535 |             epub.write_epub(os.path.join(savepath, '%s.epub' % (title, )), self.book)
536 | 
537 |     def get_book(self, book_id: int, savepath: str = '',
538 |                  fetch_image: bool = True,
539 |                  multiple: bool = True, bin_mode: bool = False,
540 |                  mlogger=None, image_size=None):
541 |         # :param image_size 图像总计最大大小（字节数）
542 |         if mlogger is not None:
543 |             self.logger = mlogger
544 |         self.image_size = image_size
545 |         self.book_id = book_id
546 |         self.book = epub.EpubBook()
547 | 
548 |         url_cat = "%s%s" % (self.api % (("%04d" % self.book_id)[0], self.book_id), "index.htm")
549 |         soup_cat = Soup(requests.get(url_cat).content, 'html.parser')
550 |         table = soup_cat.select('table')
551 |         if len(table) == 0:
552 |             self.logger.error("遇到错误")
553 |             return False
554 |         table = table[0]
555 | 
556 |         if len(soup_cat.select("#title")) == 0:
557 |             self.logger.error('该小说不存在！id = ' + str(self.book_id))
558 |             return
559 |         title = soup_cat.select("#title")[0].get_text()
560 |         author = soup_cat.select("#info")[0].get_text().split('作者：')[-1]
561 |         url_cover = self.api_img % (("%04d" % self.book_id)[0], self.book_id, self.book_id)
562 |         data_cover = requests.get(url_cover).content
563 |         # print(title, author, url_cover)
564 |         self.logger.info('#' * 15 + '开始下载' + '#' * 15)
565 |         self.logger.info('标题: ' + title + " 作者: " + author)
566 |         self.book.set_identifier("%s, %s" % (title, author))
567 |         self.book.set_title(title)
568 |         self.book.add_author(author)
569 |         self.book.set_cover('cover.jpg', data_cover)
570 | 
571 |         targets = table.select('td')
572 |         iscopyright = self.copyright()
573 |         if not iscopyright:
574 |         # if iscopyright:
575 |             # 没有版权的时候
576 |             return self.get_book_no_copyright(targets, bin_mode=bin_mode, author=author)
577 | 
578 |         order = 0
579 |         for tar in targets:
580 |             a = tar.select('a')
581 |             # 这是本卷的标题
582 |             text = tar.get_text()
583 |             # 排除空白表格
584 |             if text.encode() == b'\xc2\xa0':
585 |                 # print('排除了', text, text.encode() == b'\xc2\xa0')
586 |                 continue
587 |             if len(a) == 0:
588 |                 volume_text = tar.get_text()
589 |                 self.logger.info('volume: ' + volume_text)
590 | 
591 |                 # 上一章节的chapter
592 |                 for th in self.thread_pool:
593 |                     th.join()
594 |                 # 已经全部结束
595 |                 if len(self.thread_pool) != 0:
596 |                     self.thread_pool = []
597 |                     for chapter in self.chapters:
598 |                         if chapter is None:
599 |                             continue
600 |                         self.toc[-1][1].append(chapter)
601 |                         self.spine.append(chapter)
602 | 
603 |                 self.chapters = [None for i in range(len(targets))]
604 |                 order = 0
605 |                 self.toc.append((epub.Section(volume_text), []))
606 |                 volume = epub.EpubHtml(title=volume_text, file_name='%s.html' % self.sumi)
607 |                 self.sumi = self.sumi + 1
608 |                 volume.set_content(("<h1>%s</h1><br>" % volume_text).encode())
609 |                 self.book.add_item(volume)
610 |                 continue
611 |             # 是单章
612 |             a = a[0]
613 | 
614 |             th = threading.Thread(target=self.fetch_chapter, args=(a, order, fetch_image))
615 |             order = order + 1
616 |             self.thread_pool.append(th)
617 |             th.setDaemon(True)
618 |             th.start()
619 | 
620 |         # 最后一个章节的chapter
621 |         for th in self.thread_pool:
622 |             th.join()
623 |         # 已经全部结束
624 |         if len(self.thread_pool) != 0:
625 |             self.thread_pool = []
626 |             for chapter in self.chapters:
627 |                 if chapter is None:
628 |                     continue
629 |                 self.toc[-1][1].append(chapter)
630 |                 self.spine.append(chapter)
631 | 
632 |         self.book.toc = self.toc
633 | 
634 |         # add navigation files
635 |         self.book.add_item(epub.EpubNcx())
636 |         self.book.add_item(epub.EpubNav())
637 | 
638 |         # create spine
639 |         self.book.spine = self.spine
640 |         if bin_mode is True:
641 |             stream = io.BytesIO()
642 |             epub.write_epub(stream, self.book)
643 |             stream.seek(0)
644 |             return stream.read()
645 |         else:
646 |             # epub.write_epub(os.path.join(savepath, '%s - %s.epub' % (title, author)), self.book)
647 |             epub.write_epub(os.path.join(savepath, '%s.epub' % (title, )), self.book)
648 | 
649 | 
650 | help_str = '''
651 | 把www.wenku8.net的轻小说在线转换成epub格式。wenku8.net没有版权的小说则下载TXT文件然后转换为epub文件。
652 | 
653 | wk2epub [-h] [-t] [-m] [-b] [list]
654 | 
655 |     list            一个数字列表，中间用空格隔开
656 | 
657 |     -t              只获取文字，忽略图片。
658 |                     但是图像远程连接仍然保留在文中。
659 |                     此开关默认关闭，即默认获取图片。
660 | 
661 |     -m              多线程模式。
662 |                     该开关已默认打开。
663 | 
664 |     -i              显示该书信息。
665 | 
666 |     -b              把生成的epub文件直接从stdio返回。
667 |                     此时list长度应为1。
668 |                     调试用。
669 | 
670 |     -h              显示本帮助。
671 | 
672 | 调用示例:
673 |     wk2epub -t 1 1213
674 | 
675 | 关于:
676 |     https://github.com/LanceLiang2018/Wenku8ToEpub
677 | 
678 | 版本:
679 |     2020/3/8 1:45 AM
680 | '''
681 | 
682 | logger = getLogger()
683 | lock = threading.Lock()
684 | 
685 | if __name__ == '__main__':
686 |     # wk = Wenku8ToEpub()
687 |     # wk.get_book(1614)
688 |     # wk.get_book(1016)
689 |     # wk.get_book(1447)
690 |     # print(wk.bookinfo(1))
691 |     # wk.login()
692 |     # print(wk.search('云'))
693 |     # print(wk.search('东云'))
694 |     # print(wk.search('入间人间'))
695 |     # print(wk.get_book_no_copyright(1614))
696 |     # exit()
697 | 
698 |     opts, args = getopt.getopt(sys.argv[1:], '-h-t-m-b-i', [])
699 |     _fetch_image = True
700 |     _multiple = True
701 |     _bin_mode = False
702 |     _show_info = False
703 |     if len(args) == 0:
704 |         print(help_str)
705 |         sys.exit()
706 |     for name, val in opts:
707 |         if '-h' == name:
708 |             print(help_str)
709 |             sys.exit()
710 |         if '-t' == name:
711 |             _fetch_image = False
712 |         if '-m' == name:
713 |             _multiple = True
714 |         if '-b' == name:
715 |             _bin_mode = True
716 |         if '-i' == name:
717 |             _show_info = True
718 |     try:
719 |         args = list(map(int, args))
720 |     except Exception as e:
721 |         logger.error("错误: 参数只接受数字。")
722 |         print(help_str)
723 |         sys.exit()
724 | 
725 |     for _id in args:
726 |         wk = Wenku8ToEpub()
727 |         _bookinfo = wk.bookinfo(_id)
728 |         print('信息：ID:%s\t书名:%s\t作者:%s' % (_bookinfo['id'], _bookinfo['name'], _bookinfo['author']))
729 |         print('简介：\n%s' % _bookinfo['brief'])
730 |         res = wk.get_book(_id, fetch_image=_fetch_image, multiple=_multiple, bin_mode=_bin_mode)
731 |         if _bin_mode is True:
732 |             print(res)
733 | 
734 | 
735 | 


--------------------------------------------------------------------------------
/wk8local.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['WENKU8_LOCAL'] = "True"
 3 | 
 4 | import time
 5 | import webbrowser
 6 | import threading
 7 | 
 8 | from error_report import *
 9 | 
10 | 
11 | try:
12 |     from server import *
13 |     from manage import logger
14 | except Exception as e:
15 |     report_it(e, _exit=True)
16 | 
17 | 
18 | local_version = 5009
19 | 
20 | 
21 | def open_browser(url, sleep_time=3):
22 |     time.sleep(sleep_time)
23 |     webbrowser.open(url)
24 | 
25 | 
26 | if __name__ == '__main__':
27 | 
28 |     # 新开一个线程，延时然后打开浏览器
29 |     local_url = 'http://localhost:%s/' % local_version
30 |     logger.info('5秒钟后将自动打开浏览器。')
31 |     logger.info('使用完毕请关闭本窗口。')
32 |     logger.info('如果打开失败请刷新浏览器或者重新输入“%s”。' % local_url)
33 |     threading.Thread(target=open_browser, args=(local_url, 5)).start()
34 |     # app.run("0.0.0.0", port=int(os.environ.get('PORT', local_version)), debug=False)
35 |     run_simple("0.0.0.0", int(os.environ.get('PORT', local_version)), dm)
36 | 


--------------------------------------------------------------------------------
/xiaoice.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # coding: utf-8
  3 | 
  4 | # realXiaoice - xiaoice.py
  5 | # 2019/8/11 13:27
  6 | #
  7 | 
  8 | __author__ = "Benny <benny.think@gmail.com>"
  9 | 
 10 | import base64
 11 | import logging
 12 | import random
 13 | import re
 14 | import time
 15 | 
 16 | import requests
 17 | 
 18 | # from config import cookies
 19 | 
 20 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(filename)s [%(levelname)s]: %(message)s')
 21 | SEND = 'https://m.weibo.cn/api/chat/send'
 22 | RECV = 'https://m.weibo.cn/api/chat/list?uid=5175429989&count=2&unfollowing=0'
 23 | 
 24 | s = requests.Session()
 25 | 
 26 | 
 27 | def __read_headers():
 28 |     logging.info('Reading headers...')
 29 |     real = {}
 30 |     f = open('headers.txt', encoding='utf-8')
 31 | 
 32 |     line = f.readline().strip()
 33 |     while line:
 34 |         key = line.split(":")[0]
 35 |         # firefox里的原始头冒号后面会多出一个空格，需除去
 36 |         real[key] = line[len(key) + 1:].strip()
 37 |         line = f.readline().strip()
 38 |     f.close()
 39 |     return real
 40 | 
 41 | 
 42 | def __realtime_csrf():
 43 |     # get realtime csrf token every 30 min
 44 |     logging.info('Get realtime csrf token')
 45 |     cookie_line = __read_headers().get('Cookie').split(':')[-1].strip()
 46 |     r = s.get(RECV, headers={"Cookie": cookie_line})
 47 |     return r.cookies.get('XSRF-TOKEN')
 48 | 
 49 | 
 50 | def __renew_headers():
 51 |     logging.info('Renewwing headers.txt')
 52 |     old_headers = __read_headers()
 53 |     old_csrf = old_headers.get('X-XSRF-TOKEN')
 54 |     new_csrf = __realtime_csrf()
 55 |     old_headers['X-XSRF-TOKEN'] = new_csrf
 56 |     old_headers['Cookie'] = old_headers['Cookie'].replace(old_csrf, new_csrf)
 57 |     # write files
 58 |     with open('headers.txt', 'r')as f:
 59 |         text = f.read()
 60 |         text = text.replace(old_csrf, new_csrf)
 61 |     with open('headers.txt', 'w') as f:
 62 |         f.write(text)
 63 |     return old_headers
 64 | 
 65 | 
 66 | def chat(msg: str) -> str:
 67 |     """
 68 |     chat program
 69 |     :param msg: message send to xiaoice
 70 |     :return: her response
 71 |     """
 72 |     logging.info('Getting headers from headers.txt')
 73 |     cur_headers = __read_headers()
 74 |     data = dict(uid=5175429989,
 75 |                 content=msg,
 76 |                 st=cur_headers.get('X-XSRF-TOKEN'))
 77 | 
 78 |     logging.info('Sending messages...')
 79 |     r = s.post(SEND, headers=cur_headers, data=data).json()
 80 |     logging.info('Server response: {}'.format(r))
 81 | 
 82 |     if r.get('ok') != 1:
 83 |         logging.warning('Headers are invalid, renewing now...')
 84 |         new = __renew_headers()
 85 |         data = dict(uid=5175429989,
 86 |                     content=msg,
 87 |                     st=new.get('X-XSRF-TOKEN'))
 88 | 
 89 |         sub = s.post(SEND, headers=new, data=data).json()
 90 |         logging.warning(sub)
 91 | 
 92 |     # get response
 93 |     time.sleep(random.random())
 94 |     polling_count = 0
 95 |     last_message = {}
 96 |     while 1:
 97 |         if polling_count >= 20:
 98 |             last_message['text'] = ''
 99 |             logging.warning('Last answer message fetch failed')
100 |             break
101 |         logging.info('Getting responses by polling...')
102 |         r = s.get(RECV, headers=cur_headers).json()
103 |         last_message = r.get('data', {}).get('msgs', {})[0]
104 |         if last_message['sender_id'] == 5175429989:
105 |             logging.info('Fetch last message: {}'.format(last_message))
106 |             break
107 |         polling_count += 1
108 |         time.sleep(random.random())
109 | 
110 |     # if the answer is an image file
111 |     if 'attachment' in last_message:
112 |         attachment_uri = last_message['attachment']['original_image']['url']
113 |         attachment_ext = last_message['attachment']['extension']
114 |         base64_image = base64.b64encode(s.get(attachment_uri, headers=cur_headers).content)
115 |         last_message['text'] = 'data:image/' + attachment_ext + ';base64,' + str(base64_image, encoding='utf-8')
116 | 
117 |     return __remove_bad_html(last_message['text'])
118 | 
119 | 
120 | def __remove_bad_html(msg: str) -> str:
121 |     # remove html code in chat message. If fails, this function will return original chat message.
122 |     logging.info("removing bad urls in chat message")
123 |     non_backslash = msg.replace(r'\/', '/')
124 |     try:
125 |         text_list = re.findall(r'(.*)<a.*', non_backslash)
126 |         text = text_list[0]
127 |         url_list = re.findall(r'(?:(?:https?|ftp)://)+[\w/\-?=%.]+\.[\w/\-?=%.]+', non_backslash)
128 |         url = url_list[0]
129 |         logging.info("All right, you are so 'funny' weibo:-(")
130 |     except IndexError:
131 |         logging.info('It seems like a normal text without any html codes.')
132 |         text = ''
133 |         url = ''
134 |     if url and text:
135 |         return text + url
136 |     else:
137 |         return non_backslash
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     res = chat('好好好我错了')
142 |     print(res)
143 | 


--------------------------------------------------------------------------------