├── README.md
├── config.yaml
├── core
    ├── process.py
    └── reload.py
├── js
    ├── dsign.py
    ├── md5.js
    └── runjs.py
├── lightnovel.py
├── lightnovel_exe.spec
├── models
    ├── book.py
    ├── chapter.py
    ├── cookie.py
    └── pic.py
├── requirements.txt
├── sites
    ├── abstract.py
    ├── esj.py
    ├── lk.py
    ├── masiro.py
    └── yuri.py
├── sqlite
    ├── book.py
    ├── chapter.py
    ├── cookie.py
    ├── database.py
    ├── engine.py
    ├── pic.py
    └── script.py
├── utils
    ├── common.py
    ├── config.py
    ├── epub.py
    ├── image.py
    ├── log.py
    ├── push.py
    └── request.py
└── zhconv
    └── zhcdict.json


/README.md:
--------------------------------------------------------------------------------
 1 | # lightnovel-pydownloader
 2 | 
 3 | 轻小说网站爬虫工具，内置sqlite保存数据，最终生成epub，支持推送到calibre-web服务  
 4 | 目前已适配真白萌、esj zone、轻国、百合会  
 5 | 本项目基于pycharm开发，开发环境为python 3.9  
 6 | 
 7 | ## 使用说明
 8 | ### Windows
 9 | 解压release最新版本的压缩包，修改config.yaml配置文件，双击lightnovel.exe运行  
10 | 或使用Pyinstaller从源代码中打包，环境准备见下方Linux说明  
11 | ~~~bash
12 | Pyinstaller lightnovel_exe.spec
13 | ~~~
14 | ### Linux
15 | Linux下暂时需要从源代码运行  
16 | 安装python3环境，建议3.9及以上  
17 | 安装requirements.txt下依赖  
18 | ~~~bash
19 | pip install -r requirements.txt
20 | ~~~
21 | 下载源代码  
22 | ~~~bash
23 | git clone https://github.com/ilusrdbb/lightnovel-pydownloader.git
24 | ~~~
25 | 修改config.yaml配置文件，运行
26 | ~~~bash
27 | python3 lightnovel.py &
28 | ~~~
29 | 
30 | ## 支持站点说明
31 | |站点|配置名称|特殊情况说明|
32 | |:-:|:-:|:-:|
33 | |esj|esj|外链(X)、密码章节(X)|
34 | |轻之国度|lk|轻币购买(√)、勇者权限(X)、app权限(√)|
35 | |真白萌|masiro|cf盾(√)、cookie登录(√)、等级权限(X)、金币购买(√)|
36 | |百合会|yuri|账号密码登录(X)、cookie登录(√)、等级权限(X)|
37 | 
38 | ## 配置说明
39 | 配置文件位于程序目录下的config.yaml，运行程序请根据自身需要修改配置
40 | |配置项|说明|
41 | |:-:|:-:|
42 | |site|配置需要爬取的站点，默认值esj，masiro 真白萌、esj esj、lk 轻国、yuri 百合会，输入all为爬取全部支持的站点（站点登录账号密码未配置时会跳过）|
43 | |white_list|白名单，数组，esj真白萌百合会填入书籍的地址，轻国填入合集的id（数字不是字符串），不支持轻国单本id|
44 | |black_list|黑名单，数组，esj真白萌百合会填入书籍的地址，轻国填入单本或合集的id（数字不是字符串）|
45 | |max_thread|最大线程数，默认值1，不建议设置过大，程序限制esj最大值8、轻国百合会最大值4、真白萌最大值1|
46 | |login_info|登录账号密码或cookie，必填，目前支持的站点都必须登录才可爬取，百合会填写cookie（建议在chrome无痕窗口中抓取），真白萌支持两种登录方式账号密码优先|
47 | |flaresolverr_url|flaresolverr服务地址，用于绕过真白萌cf盾，例：`http://127.0.0.1:8191/v1`，目前只有此镜像可以完美绕过`alexfozor/flaresolverr:pr-1300-experimental`|
48 | |get_collection|是否爬取收藏页，默认值false，如选否则爬取网站日轻列表|
49 | |start_page|爬取范围（包含），收藏或列表开始页数，默认值1|
50 | |end_page|爬取范围（包含），收藏或列表结束页数，默认值1|
51 | |proxy_url|代理地址，仅支持http代理，例：`http://127.0.0.1:1081`，esj只能使用非日韩的代理，真白萌代理只对下载图片生效|
52 | |is_purchase|是否使用轻币或真白萌金币购买付费章节，默认值false|
53 | |max_purchase|消费上限，超过此值的章节不购买，默认值20|
54 | |time_out|请求超时时间（秒），默认值15|
55 | |sleep_time|每次网络请求睡眠时间（秒），默认值1，设置为0时不限制，例：设置2为随机睡0~2秒，注意真白萌此配置项无效程序会强制睡10秒防止频繁请求报错|
56 | |least_words|html字节数小于此值且不存在图片的章节不生成epub，默认值0，设置为0时不限制|
57 | |convert_hans|生成epub是否将标题和内容的繁体转为简体，默认值true|
58 | |convert_txt|生成epub后转换为txt，默认值false|
59 | |scheduler_config|配置每天定时执行爬虫任务，注意如果爬真白萌很可能一天爬不完，此时不建议开启定时执行|
60 | |push_calibre|配置docker版calibre-web推送，此项开启时程序将强制单线程执行，注意此功能仅支持在linux环境且docker与本程序在同一台机器中执行|
61 | |epub_dir|epub保存目录，默认值./epub，不建议更改|
62 | |image_dir|图片保存目录，默认值./images，不建议更改|
63 | |txt_dir|txt保存目录，默认值./txt，不建议更改|
64 | |download_fail_again|是否统一下载之前爬取失败的图片（优先级1），默认值false，定时开启时此项无效，此项开启时正常爬虫任务会停止|
65 | |delete_pic_table|是否清空数据库中的图片信息（优先级2），默认值false，此配置只应该在误删图片保存目录的时候开启，定时开启时此项无效，此项开启时正常爬虫任务会停止|
66 | |purchase_again|是否统一支付之前未支付的章节（优先级3），默认值false，定时开启时此项无效，此项开启时正常爬虫任务会停止|
67 | |export_epub_again|是否导出数据库数据到epub（优先级4），默认值false，定时开启时此项无效，此项开启时正常爬虫任务会停止|
68 | |url_config|网站地址配置，请勿更改|
69 | |xpath_config|xpath配置，请勿更改|
70 | 
71 | ## 文件结构说明
72 | 如果使用默认配置，程序运行后会在程序目录下生成一系列的文件，正常情况不要删除这些文件  
73 | |文件或文件夹名称|说明|
74 | |:-:|:-:|
75 | |logs|日志文件夹，可以删除|
76 | |images|下载插图的文件夹，删除会导致epub无插图，勿删|
77 | |epub|epub保存目录|
78 | |txt|txt保存目录，配置项convert_txt开启时才会出现|
79 | |lightnovel.db|数据库文件，删除会导致爬取数据丢失，勿删|
80 | |config.yaml|配置文件，勿删|
81 | 
82 | ## 须知
83 | 由于本人未系统的学习过python，基本都是照猫画虎抄来的代码，代码非常的屎请见谅  
84 | 本项目未经过大量测试，如发现bug欢迎提issue，~~至于需求有空再说~~  
85 | **本项目仅供个人学习交流，爬取数据不得对外传播，不得用于商业用途**   
86 | 
87 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
  1 | # 抓取站点 all 全部 masiro 真白萌，esj esj，lk 轻国，yuri 百合会
  2 | site: 'esj'
  3 | # 白名单，填入书籍地址，轻国填入合集id
  4 | white_list: []
  5 | # 黑名单，填入书籍地址，轻国填入合集或单本id
  6 | black_list: []
  7 | # 最大线程数 不要设置的过大 小心被ban 建议设置为1单线程执行
  8 | # 真白萌由于并发会ban号在程序中强制单线程执行
  9 | max_thread: 1
 10 | # 登录账号密码或cookie 请确保自己账号的权限充足
 11 | # 真白萌支持账密和cookie两种登录方式，如果是cookie登录还需要抓取User-Agent
 12 | login_info:
 13 |   esj:
 14 |     username: ''
 15 |     password: ''
 16 |   masiro:
 17 |     username: ''
 18 |     password: ''
 19 |     cookie: ''
 20 |     useragent: ''
 21 |   lk:
 22 |     username: ''
 23 |     password: ''
 24 |   yuri:
 25 |     cookie: ''
 26 | # 由于真白萌的cf盾，使用账密登录需要搭建flaresolverr绕过，例http://127.0.0.1:8191/v1
 27 | # 请拉取此镜像alexfozor/flaresolverr:pr-1300-experimental，官方docker镜像无法绕过
 28 | flaresolverr_url: ''
 29 | # 是否只爬取收藏页
 30 | get_collection: false
 31 | # 开始页
 32 | start_page: 1
 33 | # 结束页 不建议一次抓取过多
 34 | end_page: 1
 35 | # 代理地址 仅支持http代理 例'http://127.0.0.1:1081' 注意esj只能挂非中日韩节点的代理
 36 | proxy_url: ''
 37 | # 轻国或真白萌花金币购买 注意剩余金币
 38 | is_purchase: false
 39 | # 消费上限，高于此值的章节不消费，请根据自身账号情况修改
 40 | max_purchase: 20
 41 | # 请求超时时间 单位秒
 42 | time_out: 15
 43 | # 每次请求睡眠时间 比如设置2则每次请求前睡随机0~2秒 设置0不生效
 44 | # 真白萌由于频繁请求会报错，每次请求强制睡10秒
 45 | sleep_time: 1
 46 | # 字数小于此值且无图片的章节不生成epub章节 设置0无限制
 47 | least_words: 0
 48 | # 生成epub时是否全局繁转简
 49 | convert_hans: true
 50 | # 是否同时将epub转为txt
 51 | convert_txt: false
 52 | # 定时执行设置
 53 | scheduler_config:
 54 |   enabled: false
 55 |   hour: 9
 56 |   minute: 0
 57 | # 推送calibre-web设置 下方默认配置仅做示例
 58 | push_calibre:
 59 |   enabled: false
 60 |   # docker容器id
 61 |   container_id: ''
 62 |   # docker映射爬虫的epub目录
 63 |   absolute_path: '/epub'
 64 |   # docker映射书籍目录
 65 |   library_path: '/library'
 66 | 
 67 | # 下面的设置请谨慎更改
 68 | # epub保存目录 目录/站点/书名.epub
 69 | epub_dir: './epub'
 70 | # 图片保存目录 目录/站点/书id/章节id/图片名
 71 | image_dir: './images'
 72 | # txt保存目录 目录/站点/书名.txt
 73 | txt_dir: './txt'
 74 | # 是否统一下载全部之前爬取失败的图片，定时开启时此项无效
 75 | download_fail_again: false
 76 | # 危险！清空数据库中的图片信息，此选项只应该在误删图片保存目录的时候开启，定时开启时此项无效
 77 | delete_pic_table: false
 78 | # 是否统一支付之前未支付的章节，定时开启时此项无效
 79 | purchase_again: false
 80 | # 是否导出数据库数据到epub，定时开启时此项无效
 81 | export_epub_again: false
 82 | # 地址配置
 83 | url_config:
 84 |   esj:
 85 |     user: 'https://www.esjzone.one/my/profile.html'
 86 |     login: 'https://www.esjzone.one/inc/mem_login.php'
 87 |     page: 'https://www.esjzone.one/list-11/%d.html'
 88 |     book: 'https://www.esjzone.one%s'
 89 |     collection: 'https://www.esjzone.one/my/favorite/%d'
 90 |   masiro:
 91 |     user: 'https://masiro.me/admin/userCenterShow'
 92 |     login: 'https://masiro.me/admin/auth/login'
 93 |     page: 'https://masiro.me/admin/loadMoreNovels?ori=0&page=%d'
 94 |     book: 'https://masiro.me/admin/novelView?novel_id=%d'
 95 |     chapter: 'https://masiro.me/admin/novelReading?cid=%s'
 96 |     pic: 'https://masiro.me%s'
 97 |     collection: 'https://masiro.me/admin/loadMoreNovels?page=%d&collection=1'
 98 |     referer: 'https://masiro.me'
 99 |     cost: 'https://masiro.me/admin/pay'
100 |   lk:
101 |     user: 'https://api.lightnovel.fun/api/user/info'
102 |     login: 'https://api.lightnovel.fun/api/user/login'
103 |     page: 'https://api.lightnovel.fun/api/category/get-article-by-cate'
104 |     book: 'https://api.lightnovel.fun/api/series/get-info'
105 |     chapter: 'https://api.lightnovel.fun/api/article/get-detail'
106 |     collection: 'https://api.lightnovel.fun/api/history/get-collections'
107 |     cost: 'https://api.lightnovel.fun/api/coin/use'
108 |   yuri:
109 |     user: 'https://bbs.yamibo.com/home.php?mod=spacecp&ac=usergroup'
110 |     page: 'https://bbs.yamibo.com/forum-55-%d.html'
111 |     book: 'https://bbs.yamibo.com/%s'
112 |     dsign: 'https://bbs.yamibo.com%s'
113 |     chapter: 'https://bbs.yamibo.com/forum.php?mod=viewthread&tid=%s&page=%s&authorid=%s'
114 |     collection: 'https://bbs.yamibo.com/home.php?mod=space&do=favorite&type=thread&page=%d'
115 | # xpath配置
116 | xpath_config:
117 |   esj:
118 |     page: '//a[@class=''card-img-tiles'']/@href'
119 |     title: '//div[contains(@class,''book-detail'')]/h2/text()'
120 |     author: '//div[contains(@class,''book-detail'')]/ul/li[2]/a/text()'
121 |     tags: '//section[contains(@class,''widget-tags'')]/a/text()'
122 |     describe: '//div[@class=''description'']//text()'
123 |     cover: '//div[contains(@class,''product-gallery'')]/a/@href'
124 |     chapter: '//div[@id=''chapterList'']//a'
125 |     content: '//div[contains(@class,''forum-content'')]/*'
126 |     pic: '//div[contains(@class,''forum-content'')]//img/@src'
127 |     collection: '//h5[@class=''product-title'']/a/@href'
128 |   masiro:
129 |     token: '//input[@class=''csrf'']/@value'
130 |     page: '//div[@class=''layui-card'']/a[1]/@href'
131 |     title: '//div[@class=''novel-title'']/text()'
132 |     author: '//div[@class=''author'']/a/text()'
133 |     tags: '//div[@class=''tags'']//a/span/text()'
134 |     describe: '//div[@class=''brief'']/text()'
135 |     cover: '//img[@class=''img img-thumbnail'']/@data-src'
136 |     chapter: '//script[@id=''chapters-json'']//text()'
137 |     parent_chapter: '//script[@id=''f-chapters-json'']//text()'
138 |     content: '//div[@class=''box-body nvl-content'']/*'
139 |     pic: '//div[@class=''box-body nvl-content'']//img/@src'
140 |     collection: '//div[@class=''layui-card'']/a[1]/@href'
141 |   yuri:
142 |     page: '//table[@summary=''forum_55'']/tbody/tr/th/a[2]/@href'
143 |     title: '//span[@id=''thread_subject'']/text()'
144 |     author: '//dl[@class=''pil cl'']//a/text()'
145 |     chapter: '//td[@class=''t_f'']'
146 |     size: '//div[@class=''pg'']//span/@title'
147 |     collection: '//ul[@id=''favorite_ul'']/li/a[2]/@href'
148 |     pic: '//img/@file'
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/core/process.py:
--------------------------------------------------------------------------------
 1 | import aiohttp
 2 | 
 3 | from core.reload import Reload
 4 | from sites.esj import Esj
 5 | from sites.lk import Lk
 6 | from sites.masiro import Masiro
 7 | from sites.yuri import Yuri
 8 | from sqlite.database import Database
 9 | from utils import config
10 | 
11 | 
12 | class Process(object):
13 |     site: str
14 | 
15 |     def __init__(self, site: str):
16 |         self.site = site
17 | 
18 |     async def run(self):
19 |         if not config.read("scheduler_config")["enabled"] and config.read("delete_pic_table"):
20 |             # 删图片库
21 |             with Database() as db:
22 |                 db.pic.clear()
23 |             return
24 |         if not config.read("scheduler_config")["enabled"] and config.read("download_fail_again"):
25 |             # 重新下载图片
26 |             await Reload().re_download()
27 |             return
28 |         if not config.read("scheduler_config")["enabled"] and config.read("purchase_again"):
29 |             # 重爬打钱章节
30 |             await Reload().re_pay()
31 |             return
32 |         if not config.read("scheduler_config")["enabled"] and config.read("export_epub_again"):
33 |             # 重新导出epub
34 |             await Reload().re_epub()
35 |             return
36 |         sites = [self.site]
37 |         if self.site == "all":
38 |             sites = ["esj", "lk", "masiro", "yuri"]
39 |         for site in sites:
40 |             jar = aiohttp.CookieJar(unsafe=True)
41 |             conn = aiohttp.TCPConnector(ssl=False)
42 |             async with aiohttp.ClientSession(connector=conn, trust_env=True, cookie_jar=jar) as session:
43 |                 if site == "esj":
44 |                     await Esj(session).run()
45 |                 if site == "lk":
46 |                     await Lk(session).run()
47 |                 if site == "masiro":
48 |                     await Masiro(session).run()
49 |                 if site == "yuri":
50 |                     await Yuri(session).run()
51 | 


--------------------------------------------------------------------------------
/core/reload.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | 
  3 | import aiohttp
  4 | 
  5 | from models.book import Book
  6 | from sites.lk import Lk
  7 | from sites.masiro import Masiro
  8 | from sqlite.database import Database
  9 | from utils import config, epub, image, log
 10 | 
 11 | 
 12 | class Reload(object):
 13 | 
 14 |     def __init__(self):
 15 |         pass
 16 | 
 17 |     async def re_pay(self):
 18 |         log.info("开始打钱...")
 19 |         # 全部打钱失败的章节
 20 |         with Database() as db:
 21 |             nopay_list = db.chapter.get_nopay_list()
 22 |         if not nopay_list:
 23 |             return
 24 |         book_ids = set()
 25 |         for nopay_chapter in nopay_list:
 26 |             book_ids.add(nopay_chapter.book_table_id)
 27 |         # 获取全部需要再次爬的书
 28 |         with Database() as db:
 29 |             books = db.book.get_by_ids(list(book_ids))
 30 |         if not books:
 31 |             return
 32 |         jar = aiohttp.CookieJar(unsafe=True)
 33 |         conn = aiohttp.TCPConnector(ssl=False)
 34 |         async with aiohttp.ClientSession(connector=conn, trust_env=True, cookie_jar=jar) as session:
 35 |             for book in books:
 36 |                 if book.source == "lk":
 37 |                     lk = Lk(session)
 38 |                     await lk.login()
 39 |                     # 获取书籍下全部章节
 40 |                     with Database() as db:
 41 |                         chapters = db.chapter.get_by_book(book.id)
 42 |                     for chapter in chapters:
 43 |                         if not lk.update_chapter(chapter, chapters):
 44 |                             await lk.build_content(book, chapter)
 45 |                     epub.build_epub(book, chapters)
 46 |                 if book.source == "masiro":
 47 |                     masiro = Masiro(session)
 48 |                     await masiro.login()
 49 |                     book_url = config.read("url_config")[self.site]["book"] % book.book_id
 50 |                     await masiro.build_book(book_url)
 51 |         log.info("已重新打钱！")
 52 | 
 53 |     async def re_download(self):
 54 |         log.info("开始重新下载图片...")
 55 |         book_dict = {}
 56 |         chapter_dict = {}
 57 |         # 全部未下载的图片
 58 |         with Database() as db:
 59 |             fail_list = db.pic.get_null_list()
 60 |         if not fail_list:
 61 |             return
 62 |         jar = aiohttp.CookieJar(unsafe=True)
 63 |         conn = aiohttp.TCPConnector(ssl=False)
 64 |         async with aiohttp.ClientSession(connector=conn, trust_env=True, cookie_jar=jar) as session:
 65 |             for pic in fail_list:
 66 |                 chapter = chapter_dict.get(pic.chapter_table_id)
 67 |                 if not chapter:
 68 |                     with Database() as db:
 69 |                         chapter = db.chapter.get_one(pic.chapter_table_id)
 70 |                         chapter_dict[chapter.id] = chapter
 71 |                 book = book_dict.get(chapter.book_table_id)
 72 |                 if not book:
 73 |                     with Database() as db:
 74 |                         book = db.book.get_by_id(chapter.book_table_id)
 75 |                         book_dict[book.id] = book
 76 |                 # 重新下载
 77 |                 log.info("%s 开始重新下载..." % pic.pic_url)
 78 |                 await image.download(pic, book.source, book.book_id, chapter.chapter_id, session)
 79 |                 if pic.pic_path:
 80 |                     with Database() as db:
 81 |                         db.pic.insert_or_update(pic)
 82 |                     log.info("%s 下载成功" % pic.pic_url)
 83 |                 else:
 84 |                     log.info("%s 下载失败" % pic.pic_url)
 85 |         # 重新构建涉及到的书籍和章节
 86 |         for book in book_dict.values():
 87 |             with Database() as db:
 88 |                 chapters = db.chapter.get_by_book(book.id)
 89 |             epub.build_epub(book, chapters)
 90 |         log.info("图片已重新下载！")
 91 | 
 92 |     async def re_epub(self):
 93 |         log.info("开始全量导出epub...")
 94 |         # 全部书籍
 95 |         with Database() as db:
 96 |             books = db.book.get_all()
 97 |         if not books:
 98 |             return
 99 |         tasks = [self.re_book_epub(book) for book in books]
100 |         if tasks:
101 |             await asyncio.gather(*tasks)
102 |             log.info("已全量导出epub！")
103 | 
104 |     async def re_book_epub(self, book: Book):
105 |         # 全部章节
106 |         with Database() as db:
107 |             chapters = db.chapter.get_by_book(book.id)
108 |         if chapters:
109 |             epub.build_epub(book, chapters)
110 | 


--------------------------------------------------------------------------------
/js/dsign.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def js_dsign(js):
 5 |     js = js[31:-9]
 6 |     for st in ['window', 'location', "'assign'", "'href'", "'replace'"]:
 7 |         equal = re.findall('[_A-Za-z0-9 =]+%s;' % st, js)
 8 |         if not equal:
 9 |             continue
10 |         else:
11 |             equal = equal[0]
12 |         var = equal.split('=')[0].strip()
13 |         js = js.replace(equal, '')
14 |         js = js.replace(var, st)
15 |         js = js.replace("['%s']" % st.strip("'"), '.%s' % st.strip("'"))
16 |     js = js.replace('window.href', 'somefunction')
17 |     js = js.replace('location.assign', 'tempfunction=')
18 |     js = js.replace('location.href', 'tempfunction=')
19 |     js = js.replace('location.replace', 'tempfunction=')
20 |     js = js.replace('location', 'tempfunction=')
21 |     js = js.replace('tempfunction==', 'tempfunction=')
22 |     js = js.replace('for', 'forr')
23 |     js = js.replace('do', 'dodo')
24 |     js = js.replace('if', 'ifif')
25 |     js = js.replace('ifif(name', 'if(name')
26 |     js = js.replace('ifif(caller', 'if(caller')
27 |     js = js.replace('in', 'inin')
28 |     js = js.replace('trining', 'tring')
29 |     return js
30 | 


--------------------------------------------------------------------------------
/js/md5.js:
--------------------------------------------------------------------------------
  1 | var hexcase = 0;
  2 | var chrsz = 8;
  3 | 
  4 | function hex_md5(s) {
  5 |     return binl2hex(core_md5(str2binl(s), s.length * chrsz));
  6 | }
  7 | 
  8 | function core_md5(x, len) {
  9 |     x[len >> 5] |= 0x80 << ((len) % 32);
 10 |     x[(((len + 64) >>> 9) << 4) + 14] = len;
 11 |     var a = 1732584193;
 12 |     var b = -271733879;
 13 |     var c = -1732584194;
 14 |     var d = 271733878;
 15 |     for (var i = 0; i < x.length; i += 16) {
 16 |         var olda = a;
 17 |         var oldb = b;
 18 |         var oldc = c;
 19 |         var oldd = d;
 20 |         a = md5_ff(a, b, c, d, x[i + 0], 7, -680876936);
 21 |         d = md5_ff(d, a, b, c, x[i + 1], 12, -389564586);
 22 |         c = md5_ff(c, d, a, b, x[i + 2], 17, 606105819);
 23 |         b = md5_ff(b, c, d, a, x[i + 3], 22, -1044525330);
 24 |         a = md5_ff(a, b, c, d, x[i + 4], 7, -176418897);
 25 |         d = md5_ff(d, a, b, c, x[i + 5], 12, 1200080426);
 26 |         c = md5_ff(c, d, a, b, x[i + 6], 17, -1473231341);
 27 |         b = md5_ff(b, c, d, a, x[i + 7], 22, -45705983);
 28 |         a = md5_ff(a, b, c, d, x[i + 8], 7, 1770035416);
 29 |         d = md5_ff(d, a, b, c, x[i + 9], 12, -1958414417);
 30 |         c = md5_ff(c, d, a, b, x[i + 10], 17, -42063);
 31 |         b = md5_ff(b, c, d, a, x[i + 11], 22, -1990404162);
 32 |         a = md5_ff(a, b, c, d, x[i + 12], 7, 1804603682);
 33 |         d = md5_ff(d, a, b, c, x[i + 13], 12, -40341101);
 34 |         c = md5_ff(c, d, a, b, x[i + 14], 17, -1502002290);
 35 |         b = md5_ff(b, c, d, a, x[i + 15], 22, 1236535329);
 36 |         a = md5_gg(a, b, c, d, x[i + 1], 5, -165796510);
 37 |         d = md5_gg(d, a, b, c, x[i + 6], 9, -1069501632);
 38 |         c = md5_gg(c, d, a, b, x[i + 11], 14, 643717713);
 39 |         b = md5_gg(b, c, d, a, x[i + 0], 20, -373897302);
 40 |         a = md5_gg(a, b, c, d, x[i + 5], 5, -701558691);
 41 |         d = md5_gg(d, a, b, c, x[i + 10], 9, 38016083);
 42 |         c = md5_gg(c, d, a, b, x[i + 15], 14, -660478335);
 43 |         b = md5_gg(b, c, d, a, x[i + 4], 20, -405537848);
 44 |         a = md5_gg(a, b, c, d, x[i + 9], 5, 568446438);
 45 |         d = md5_gg(d, a, b, c, x[i + 14], 9, -1019803690);
 46 |         c = md5_gg(c, d, a, b, x[i + 3], 14, -187363961);
 47 |         b = md5_gg(b, c, d, a, x[i + 8], 20, 1163531501);
 48 |         a = md5_gg(a, b, c, d, x[i + 13], 5, -1444681467);
 49 |         d = md5_gg(d, a, b, c, x[i + 2], 9, -51403784);
 50 |         c = md5_gg(c, d, a, b, x[i + 7], 14, 1735328473);
 51 |         b = md5_gg(b, c, d, a, x[i + 12], 20, -1926607734);
 52 |         a = md5_hh(a, b, c, d, x[i + 5], 4, -378558);
 53 |         d = md5_hh(d, a, b, c, x[i + 8], 11, -2022574463);
 54 |         c = md5_hh(c, d, a, b, x[i + 11], 16, 1839030562);
 55 |         b = md5_hh(b, c, d, a, x[i + 14], 23, -35309556);
 56 |         a = md5_hh(a, b, c, d, x[i + 1], 4, -1530992060);
 57 |         d = md5_hh(d, a, b, c, x[i + 4], 11, 1272893353);
 58 |         c = md5_hh(c, d, a, b, x[i + 7], 16, -155497632);
 59 |         b = md5_hh(b, c, d, a, x[i + 10], 23, -1094730640);
 60 |         a = md5_hh(a, b, c, d, x[i + 13], 4, 681279174);
 61 |         d = md5_hh(d, a, b, c, x[i + 0], 11, -358537222);
 62 |         c = md5_hh(c, d, a, b, x[i + 3], 16, -722521979);
 63 |         b = md5_hh(b, c, d, a, x[i + 6], 23, 76029189);
 64 |         a = md5_hh(a, b, c, d, x[i + 9], 4, -640364487);
 65 |         d = md5_hh(d, a, b, c, x[i + 12], 11, -421815835);
 66 |         c = md5_hh(c, d, a, b, x[i + 15], 16, 530742520);
 67 |         b = md5_hh(b, c, d, a, x[i + 2], 23, -995338651);
 68 |         a = md5_ii(a, b, c, d, x[i + 0], 6, -198630844);
 69 |         d = md5_ii(d, a, b, c, x[i + 7], 10, 1126891415);
 70 |         c = md5_ii(c, d, a, b, x[i + 14], 15, -1416354905);
 71 |         b = md5_ii(b, c, d, a, x[i + 5], 21, -57434055);
 72 |         a = md5_ii(a, b, c, d, x[i + 12], 6, 1700485571);
 73 |         d = md5_ii(d, a, b, c, x[i + 3], 10, -1894986606);
 74 |         c = md5_ii(c, d, a, b, x[i + 10], 15, -1051523);
 75 |         b = md5_ii(b, c, d, a, x[i + 1], 21, -2054922799);
 76 |         a = md5_ii(a, b, c, d, x[i + 8], 6, 1873313359);
 77 |         d = md5_ii(d, a, b, c, x[i + 15], 10, -30611744);
 78 |         c = md5_ii(c, d, a, b, x[i + 6], 15, -1560198380);
 79 |         b = md5_ii(b, c, d, a, x[i + 13], 21, 1309151649);
 80 |         a = md5_ii(a, b, c, d, x[i + 4], 6, -145523070);
 81 |         d = md5_ii(d, a, b, c, x[i + 11], 10, -1120210379);
 82 |         c = md5_ii(c, d, a, b, x[i + 2], 15, 718787259);
 83 |         b = md5_ii(b, c, d, a, x[i + 9], 21, -343485551);
 84 |         a = safe_add(a, olda);
 85 |         b = safe_add(b, oldb);
 86 |         c = safe_add(c, oldc);
 87 |         d = safe_add(d, oldd);
 88 |     }
 89 |     return Array(a, b, c, d);
 90 | }
 91 | 
 92 | function md5_cmn(q, a, b, x, s, t) {
 93 |     return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s), b);
 94 | }
 95 | 
 96 | function md5_ff(a, b, c, d, x, s, t) {
 97 |     return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t);
 98 | }
 99 | 
100 | function md5_gg(a, b, c, d, x, s, t) {
101 |     return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t);
102 | }
103 | 
104 | function md5_hh(a, b, c, d, x, s, t) {
105 |     return md5_cmn(b ^ c ^ d, a, b, x, s, t);
106 | }
107 | 
108 | function md5_ii(a, b, c, d, x, s, t) {
109 |     return md5_cmn(c ^ (b | (~d)), a, b, x, s, t);
110 | }
111 | 
112 | function safe_add(x, y) {
113 |     var lsw = (x & 0xFFFF) + (y & 0xFFFF);
114 |     var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
115 |     return (msw << 16) | (lsw & 0xFFFF);
116 | }
117 | 
118 | function bit_rol(num, cnt) {
119 |     return (num << cnt) | (num >>> (32 - cnt));
120 | }
121 | 
122 | function str2binl(str) {
123 |     var bin = Array();
124 |     var mask = (1 << chrsz) - 1;
125 |     for (var i = 0; i < str.length * chrsz; i += chrsz) {
126 |         bin[i >> 5] |= (str.charCodeAt(i / chrsz) & mask) << (i % 32);
127 |     }
128 |     return bin;
129 | }
130 | 
131 | function binl2hex(binarray) {
132 |     var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
133 |     var str = "";
134 |     for (var i = 0; i < binarray.length * 4; i++) {
135 |         str += hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8 + 4)) & 0xF) + hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8)) & 0xF);
136 |     }
137 |     return str;
138 | }


--------------------------------------------------------------------------------
/js/runjs.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | from functools import partial
 3 | 
 4 | from js.dsign import js_dsign
 5 | 
 6 | subprocess.Popen = partial(subprocess.Popen, encoding='utf-8')
 7 | 
 8 | import execjs
 9 | 
10 | 
11 | def js_md5(password):
12 |     with open('./js/md5.js', encoding='utf-8') as f:
13 |         read = f.read()
14 |     js = execjs.compile(read)
15 |     return js.call('hex_md5', password)
16 | 
17 | 
18 | def get_dsign(read):
19 |     read = js_dsign(read)
20 |     js = execjs.compile(read)
21 |     result = js.eval('tempfunction')
22 |     result = result.replace('forrum', 'forum')
23 |     return result
24 | 


--------------------------------------------------------------------------------
/lightnovel.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from zoneinfo import ZoneInfo
 3 | 
 4 | from apscheduler.schedulers.background import BlockingScheduler
 5 | 
 6 | from core.process import Process
 7 | from sqlite import script
 8 | from utils import config, log
 9 | 
10 | config.init_config()
11 | scheduler = BlockingScheduler(timezone=ZoneInfo("Asia/Shanghai"))
12 | loop = asyncio.get_event_loop()
13 | 
14 | 
15 | def run():
16 |     if config.read("scheduler_config")["enabled"]:
17 |         print("===========start scheduler===========")
18 |     log.init_log()
19 |     loop.run_until_complete(Process(config.read("site")).run())
20 |     log.remove_log()
21 |     if config.read("scheduler_config")["enabled"]:
22 |         print("===========end scheduler===========")
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     print("Version 2.1.8")
27 |     script.init_db()
28 |     if config.read("scheduler_config")["enabled"]:
29 |         # 添加定时任务
30 |         scheduler.add_job(
31 |             run,
32 |             "cron",
33 |             hour=config.read("scheduler_config")["hour"],
34 |             minute=config.read("scheduler_config")["minute"],
35 |             misfire_grace_time=600,
36 |             coalesce=True,
37 |             max_instances=1
38 |         )
39 |         scheduler.start()
40 |     else:
41 |         run()
42 |         input("Press Enter to exit...")
43 | 


--------------------------------------------------------------------------------
/lightnovel_exe.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | 
 3 | 
 4 | block_cipher = None
 5 | 
 6 | zhconv_data_path = 'zhconv/zhcdict.json'
 7 | 
 8 | a = Analysis(
 9 |     ['lightnovel.py'],
10 |     pathex=[],
11 |     binaries=[],
12 |     datas=[(zhconv_data_path, 'zhconv')],
13 |     hiddenimports=[],
14 |     hookspath=[],
15 |     hooksconfig={},
16 |     runtime_hooks=[],
17 |     excludes=[],
18 |     win_no_prefer_redirects=False,
19 |     win_private_assemblies=False,
20 |     cipher=block_cipher,
21 |     noarchive=False,
22 | )
23 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
24 | 
25 | exe = EXE(
26 |     pyz,
27 |     a.scripts,
28 |     a.binaries,
29 |     a.zipfiles,
30 |     a.datas,
31 |     [],
32 |     name='lightnovel',
33 |     debug=False,
34 |     bootloader_ignore_signals=False,
35 |     strip=False,
36 |     upx=True,
37 |     upx_exclude=[],
38 |     runtime_tmpdir=None,
39 |     console=True,
40 |     disable_windowed_traceback=False,
41 |     argv_emulation=False,
42 |     target_arch=None,
43 |     codesign_identity=None,
44 |     entitlements_file=None,
45 | )
46 | 


--------------------------------------------------------------------------------
/models/book.py:
--------------------------------------------------------------------------------
 1 | from sqlmodel import Field, SQLModel
 2 | 
 3 | 
 4 | class Book(SQLModel, table=True):
 5 |     id: str = Field(default=None, primary_key=True, index=True)
 6 |     book_id: str = Field(alias="book_id", title="爬取网站的书籍id")
 7 |     source: str = Field(alias="source", title="爬取来源")
 8 |     book_name: str = Field(alias="book_name", title="书籍名称")
 9 |     author: str = Field(alias="author", title="作者")
10 |     tags: str = Field(alias="tags", title="标签英文逗号分隔")
11 |     describe: str = Field(alias="describe", title="书籍描述")
12 |     cover_url: str = Field(alias="cover_url", title="封面地址")
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/models/chapter.py:
--------------------------------------------------------------------------------
 1 | from sqlmodel import SQLModel, Field
 2 | 
 3 | 
 4 | class Chapter(SQLModel, table=True):
 5 |     id: str = Field(default=None, primary_key=True, index=True)
 6 |     book_table_id: str = Field(alias="book_table_id", title="关联数据库书籍id")
 7 |     chapter_id: str = Field(alias="chapter_id", title="爬取网站的章节id")
 8 |     chapter_name: str = Field(alias="chapter_name", title="章节名称")
 9 |     chapter_order: int = Field(alias="chapter_order", title="顺序")
10 |     content: str = Field(alias="content", title="完整html内容")
11 |     last_update_time: int = Field(alias="last_update_time", title="最后爬取时间")
12 |     purchase_fail_flag: int = Field(alias="purchase_fail_flag", title="购买失败标识0否1是")
13 | 


--------------------------------------------------------------------------------
/models/cookie.py:
--------------------------------------------------------------------------------
 1 | from sqlmodel import SQLModel, Field
 2 | 
 3 | 
 4 | class Cookie(SQLModel, table=True):
 5 |     id: str = Field(default=None, primary_key=True, index=True)
 6 |     source: str = Field(alias="source", title="source")
 7 |     cookie: str = Field(alias="cookie", title="cookie")
 8 |     token: str = Field(alias="token", title="token")
 9 |     uid: str = Field(alias="uid", title="uid")
10 | 


--------------------------------------------------------------------------------
/models/pic.py:
--------------------------------------------------------------------------------
1 | from sqlmodel import SQLModel, Field
2 | 
3 | 
4 | class Pic(SQLModel, table=True):
5 |     id: str = Field(default=None, primary_key=True, index=True)
6 |     chapter_table_id: str = Field(alias="chapter_table_id", title="关联数据库章节id")
7 |     pic_url: str = Field(alias="pic_url", title="图片地址")
8 |     pic_path: str = Field(alias="pic_path", title="图片存放路径")
9 |     pic_id: str = Field(alias="pic_id", title="图片id，仅轻国需要")


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp~=3.8.3
 2 | lxml~=4.9.1
 3 | tenacity~=8.1.0
 4 | Brotli~=1.0.9
 5 | PyExecJS~=1.5.1
 6 | zhconv~=1.4.3
 7 | PyYaml~=6.0
 8 | EbookLib~=0.18
 9 | pillow-avif-plugin~=1.3.1
10 | Pillow~=9.5.0
11 | sqlmodel~=0.0.20
12 | APScheduler~=3.10.1
13 | SQLAlchemy~=2.0.31


--------------------------------------------------------------------------------
/sites/abstract.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from asyncio import Semaphore
 3 | 
 4 | from aiohttp import ClientSession
 5 | 
 6 | from models.cookie import Cookie
 7 | 
 8 | 
 9 | class Site(object):
10 |     cookie: Cookie
11 |     session: ClientSession
12 |     header: dict
13 |     site: str
14 |     thread: Semaphore
15 | 
16 |     async def run(self):
17 |         try:
18 |             await self.login()
19 |         except:
20 |             return
21 |         await self.get_books()
22 | 
23 |     @abstractmethod
24 |     async def login(self):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     async def valid_cookie(self) -> bool:
29 |         pass
30 | 
31 |     @abstractmethod
32 |     async def get_cookie(self):
33 |         pass
34 | 
35 |     @abstractmethod
36 |     async def get_books(self):
37 |         pass
38 | 


--------------------------------------------------------------------------------
/sites/esj.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import re
  4 | import uuid
  5 | from typing import Optional
  6 | 
  7 | from aiohttp import ClientSession
  8 | from lxml import html
  9 | 
 10 | from models.book import Book
 11 | from models.chapter import Chapter
 12 | from models.cookie import Cookie
 13 | from models.pic import Pic
 14 | from sites.abstract import Site
 15 | 
 16 | from sqlite.database import Database
 17 | from utils import config, request, log, common, image, epub
 18 | 
 19 | 
 20 | class Esj(Site):
 21 | 
 22 |     def __init__(self, session: ClientSession):
 23 |         self.session = session
 24 |         self.site = "esj"
 25 |         self.header = {
 26 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 27 |             "Accept-Encoding": "gzip, deflate, br",
 28 |             "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
 29 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
 30 |         }
 31 |         thread = config.read("max_thread")
 32 |         if thread > 8:
 33 |             thread = 8
 34 |         if config.read("push_calibre")["enabled"]:
 35 |             thread = 1
 36 |         self.thread = asyncio.Semaphore(thread)
 37 | 
 38 |     async def login(self):
 39 |         if not config.read("login_info")[self.site]["username"] \
 40 |                 or not config.read("login_info")[self.site]["password"]:
 41 |             log.info("%s 账号密码未配置，跳过" % self.site)
 42 |             raise Exception()
 43 |         with Database() as db:
 44 |             self.cookie = db.cookie.get_one(self.site)
 45 |             if self.cookie:
 46 |                 valid_bool = await self.valid_cookie()
 47 |                 if valid_bool:
 48 |                     return
 49 |         cookie = Cookie()
 50 |         cookie.id = str(uuid.uuid4())
 51 |         cookie.source = self.site
 52 |         self.cookie = cookie
 53 |         await self.get_cookie()
 54 | 
 55 |     async def valid_cookie(self) -> bool:
 56 |         url = config.read("url_config")[self.site]["user"]
 57 |         self.header["Cookie"] = self.cookie.cookie
 58 |         res = await request.get(url=url, headers=self.header, session=self.session)
 59 |         if res and res.startswith("<!DOCTYPE html>"):
 60 |             log.info("%s校验缓存cookie成功，跳过登录" % self.site)
 61 |             return True
 62 |         return False
 63 | 
 64 |     async def get_cookie(self):
 65 |         log.info("%s开始登录..." % self.site)
 66 |         url = config.read("url_config")[self.site]["login"]
 67 |         login_data = {
 68 |             "email": config.read("login_info")[self.site]["username"],
 69 |             "pwd": config.read("login_info")[self.site]["password"],
 70 |             "remember_me": "on"
 71 |         }
 72 |         res = await request.post_data(url=url, headers=self.header, data=login_data, session=self.session)
 73 |         if res:
 74 |             if json.loads(res["text"])["status"] != 200:
 75 |                 log.info("登录失败！" + json.loads(res["text"])["msg"])
 76 |                 raise Exception()
 77 |             self.cookie.cookie = "; ".join(res["headers"].getall("Set-Cookie"))
 78 |             self.header["Cookie"] = self.cookie.cookie
 79 |             with Database() as db:
 80 |                 db.cookie.insert_or_update(self.cookie)
 81 |             log.info("%s登录成功" % self.site)
 82 |         else:
 83 |             log.info("登录失败！")
 84 |             raise Exception()
 85 | 
 86 |     async def get_books(self):
 87 |         # 白名单
 88 |         if config.read("white_list"):
 89 |             for book_url in config.read("white_list"):
 90 |                 await self.build_book(book_url)
 91 |             return
 92 |         for page in range(config.read("start_page"), config.read("end_page") + 1):
 93 |             book_urls = await self.get_book_urls(page)
 94 |             if not book_urls:
 95 |                 continue
 96 |             tasks = [asyncio.create_task(self.build_book(book_url)) for book_url in book_urls]
 97 |             if tasks:
 98 |                 await asyncio.gather(*tasks)
 99 | 
100 |     async def build_book(self, book_url: str):
101 |         async with self.thread:
102 |             res = await request.get(url=book_url, headers=self.header, session=self.session)
103 |             book = self.build_book_from_text(res, book_url)
104 |             if not book or not book.book_id:
105 |                 return
106 |             with Database() as db:
107 |                 await db.book.insert_or_update(book, self.session)
108 |             # 章节
109 |             chapter_list = await self.build_chapters(book, res)
110 |             # epub
111 |             epub.build_epub(book, chapter_list)
112 | 
113 |     async def build_chapters(self, book: Book, text: str) -> list[Chapter]:
114 |         chapter_xpaths = config.get_xpath(text, self.site, "chapter")
115 |         with Database() as db:
116 |             old_chapters = db.chapter.get_list(book.id)
117 |         order = 1
118 |         chapter_list = []
119 |         for chapter_xpath in chapter_xpaths:
120 |             chapter_body = html.fromstring(html.tostring(chapter_xpath))
121 |             chapter_url = chapter_body.xpath("@href")[0]
122 |             if not chapter_url:
123 |                 continue
124 |             if "esjzone" not in chapter_url or ".html" not in chapter_url:
125 |                 # 外站用url做id
126 |                 chapter_id = chapter_url
127 |             else:
128 |                 try:
129 |                     chapter_id = re.search(r"/(\d+)\.html", chapter_url).group(1)
130 |                 except Exception:
131 |                     continue
132 |             chapter = Chapter()
133 |             chapter.id = str(uuid.uuid4())
134 |             chapter.chapter_order = order
135 |             chapter.book_table_id = book.id
136 |             chapter.chapter_name = chapter_body.xpath("@data-title")[0]
137 |             chapter.chapter_id = chapter_id
138 |             order += 1
139 |             if self.update_chapter(chapter, old_chapters):
140 |                 chapter_list.append(chapter)
141 |                 continue
142 |             # 爬文本和插图
143 |             await self.build_content(book, chapter, chapter_url)
144 |             chapter_list.append(chapter)
145 |         return chapter_list
146 | 
147 |     async def build_content(self, book: Book, chapter: Chapter, chapter_url: str):
148 |         # 外站处理
149 |         if chapter.chapter_id.startswith("http"):
150 |             chapter.content = "<p>请至此链接下查看：" + chapter_url + "</p>"
151 |             with Database() as db:
152 |                 db.chapter.insert_or_update(chapter)
153 |             return
154 |         text = await request.get(chapter_url, self.header, self.session)
155 |         chapter.content = config.get_html(text, self.site, "content")
156 |         if not chapter.content:
157 |             return
158 |         if "btn-send-pw" in chapter.content or "內文目前施工中" in chapter.content:
159 |             # 密码章节跳过
160 |             chapter.content = None
161 |             with Database() as db:
162 |                 db.chapter.insert_or_update(chapter)
163 |             return
164 |         with Database() as db:
165 |             db.chapter.insert_or_update(chapter)
166 |         # 插图处理
167 |         await self.build_images(book, chapter, text)
168 |         log.info("%s 新获取章节内容" % chapter.chapter_name)
169 | 
170 |     async def build_images(self, book: Book, chapter: Chapter, text: str):
171 |         with Database() as db:
172 |             pics = db.pic.get_list(chapter.id)
173 |         pic_urls = config.get_xpath(text, self.site, "pic")
174 |         if not pic_urls:
175 |             return
176 |         for pic_url in pic_urls:
177 |             # 排除非http链接
178 |             if not pic_url.startswith("http"):
179 |                 return
180 |             match_pic = common.find(pics, "pic_url", pic_url)
181 |             if match_pic and match_pic.pic_path:
182 |                 continue
183 |             pic = Pic()
184 |             if match_pic:
185 |                 pic = match_pic
186 |             else:
187 |                 pic.id = str(uuid.uuid4())
188 |                 pic.chapter_table_id = chapter.id
189 |                 pic.pic_url = pic_url
190 |             # 下载图片
191 |             await image.download(pic, self.site, book.book_id, chapter.chapter_id, self.session)
192 |             with Database() as db:
193 |                 db.pic.insert_or_update(pic)
194 | 
195 |     def update_chapter(self, chapter: Chapter, old_chapters: Optional[Chapter]) -> bool:
196 |         old_chapters = common.find_list(old_chapters, "chapter_id", chapter.chapter_id)
197 |         if not old_chapters:
198 |             return False
199 |         old_chapter = old_chapters[0]
200 |         if len(old_chapters) > 1:
201 |             # 多匹配不做处理
202 |             common.copy(old_chapter, chapter)
203 |             return True
204 |         if old_chapter.chapter_name != chapter.chapter_name \
205 |                 or old_chapter.chapter_order != chapter.chapter_order:
206 |             old_chapter.chapter_name = chapter.chapter_name
207 |             old_chapter.chapter_order = chapter.chapter_order
208 |             with Database() as db:
209 |                 db.chapter.insert_or_update(old_chapter)
210 |         common.copy(old_chapter, chapter)
211 |         return True
212 | 
213 |     def build_book_from_text(self, text: str, book_url: str) -> Book:
214 |         if not text:
215 |             return None
216 |         book = Book()
217 |         book.id = str(uuid.uuid4())
218 |         book.book_id = re.search(r"/(\d+)\.html$", book_url).group(1)
219 |         book.source = self.site
220 |         book.book_name = common.first(config.get_xpath(text, self.site, "title"))
221 |         book.author = common.first(config.get_xpath(text, self.site, "author"))
222 |         book.describe = common.join(config.get_xpath(text, self.site, "describe"), "\n")
223 |         book.cover_url = common.first(config.get_xpath(text, self.site, "cover"))
224 |         book.tags = common.join(config.get_xpath(text, self.site, "tags"))
225 |         log.info("%s 书籍信息已获取" % book.book_name)
226 |         return book
227 | 
228 |     async def get_book_urls(self, page: int) -> list:
229 |         log.info("开始爬取%s第%d页" % (self.site, page))
230 |         if config.read("get_collection"):
231 |             page_url = config.read("url_config")[self.site]["collection"] % page
232 |         else:
233 |             page_url = config.read("url_config")[self.site]["page"] % page
234 |         res = await request.get(url=page_url, headers=self.header, session=self.session)
235 |         return self.get_book_urls_from_text(res)
236 | 
237 |     def get_book_urls_from_text(self, text: str) -> list:
238 |         if not text:
239 |             return None
240 |         book_urls = []
241 |         if config.read("get_collection"):
242 |             book_xpaths = config.get_xpath(text, self.site, "collection")
243 |         else:
244 |             book_xpaths = config.get_xpath(text, self.site, "page")
245 |         if not book_xpaths:
246 |             return None
247 |         for book_xpath in book_xpaths:
248 |             book_url = config.read("url_config")[self.site]["book"] % book_xpath
249 |             # 黑名单
250 |             if config.read("black_list") and book_xpath in config.read("black_list"):
251 |                 continue
252 |             book_urls.append(book_url)
253 |         return book_urls
254 | 


--------------------------------------------------------------------------------
/sites/lk.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import copy
  3 | import uuid
  4 | from typing import Optional
  5 | 
  6 | from aiohttp import ClientSession
  7 | 
  8 | from models.book import Book
  9 | from models.chapter import Chapter
 10 | from models.cookie import Cookie
 11 | from models.pic import Pic
 12 | from sites.abstract import Site
 13 | from sqlite.database import Database
 14 | from utils import config, request, common, log, epub, image
 15 | 
 16 | 
 17 | class Lk(Site):
 18 | 
 19 |     def __init__(self, session: ClientSession):
 20 |         self.session = session
 21 |         self.site = "lk"
 22 |         self.header = {
 23 |             "content-type": "application/json; charset=UTF-8",
 24 |             "accept-encoding": "gzip",
 25 |             "host": "api.lightnovel.fun",
 26 |             "user-agent": "Dart/2.10 (dart:io)"
 27 |         }
 28 |         thread = config.read("max_thread")
 29 |         if thread > 4:
 30 |             thread = 4
 31 |         if config.read("push_calibre")["enabled"]:
 32 |             thread = 1
 33 |         self.thread = asyncio.Semaphore(thread)
 34 |         self.param = {
 35 |             "platform": "android",
 36 |             "client": "app",
 37 |             "sign": "",
 38 |             "ver_name": "0.11.52",
 39 |             "ver_code": "192",
 40 |             "d": {
 41 |                 "uid": "",
 42 |                 "security_key": ""
 43 |             },
 44 |             "gz": 1
 45 |         }
 46 | 
 47 |     async def login(self):
 48 |         if not config.read("login_info")[self.site]["username"] \
 49 |                 or not config.read("login_info")[self.site]["password"]:
 50 |             log.info("%s 账号密码未配置，跳过" % self.site)
 51 |             raise Exception()
 52 |         with Database() as db:
 53 |             self.cookie = db.cookie.get_one(self.site)
 54 |             if self.cookie:
 55 |                 valid_bool = await self.valid_cookie()
 56 |                 if valid_bool:
 57 |                     return
 58 |         cookie = Cookie()
 59 |         cookie.id = str(uuid.uuid4())
 60 |         cookie.source = self.site
 61 |         self.cookie = cookie
 62 |         await self.get_cookie()
 63 | 
 64 |     async def valid_cookie(self) -> bool:
 65 |         url = config.read("url_config")[self.site]["user"]
 66 |         self.param["d"]["uid"] = self.cookie.uid
 67 |         self.param["d"]["security_key"] = self.cookie.token
 68 |         res = await request.post_json(url=url, headers=self.header, json=self.param, session=self.session)
 69 |         if res and common.unzip(res).get("code") == 0:
 70 |             log.info("%s校验缓存cookie成功，跳过登录" % self.site)
 71 |             return True
 72 |         return False
 73 | 
 74 |     async def get_cookie(self):
 75 |         log.info("%s开始登录..." % self.site)
 76 |         url = config.read("url_config")[self.site]["login"]
 77 |         param = self.param
 78 |         param["is_encrypted"] = 0
 79 |         param["d"] = {
 80 |             "username": config.read("login_info")[self.site]["username"],
 81 |             "password": config.read("login_info")[self.site]["password"]
 82 |         }
 83 |         res = await request.post_json(url=url, headers=self.header, json=param, session=self.session)
 84 |         if res and common.unzip(res)["code"] == 0:
 85 |             self.cookie.uid = common.unzip(res)["data"]["uid"]
 86 |             self.cookie.token = common.unzip(res)["data"]["security_key"]
 87 |             self.param["d"]["uid"] = self.cookie.uid
 88 |             self.param["d"]["security_key"] = self.cookie.token
 89 |             with Database() as db:
 90 |                 db.cookie.insert_or_update(self.cookie)
 91 |             log.info("%s登录成功" % self.site)
 92 |         else:
 93 |             log.info("登录失败！")
 94 |             raise Exception()
 95 | 
 96 |     async def get_books(self):
 97 |         # 白名单 sid
 98 |         if config.read("white_list"):
 99 |             for sid in config.read("white_list"):
100 |                 if sid == 0:
101 |                     continue
102 |                 await self.build_book({"sid": sid, "aid": 0})
103 |             return
104 |         for page in range(config.read("start_page"), config.read("end_page") + 1):
105 |             page_books = await self.get_page_list(page)
106 |             if not page_books:
107 |                 continue
108 |             tasks = [asyncio.create_task(self.build_book(page_book)) for page_book in page_books]
109 |             if tasks:
110 |                 await asyncio.gather(*tasks)
111 | 
112 |     async def build_book(self, page_book: dict):
113 |         async with self.thread:
114 |             # 屏蔽公告等
115 |             black_aid = [969547, 1113228, 1099310, 1048596]
116 |             aid = page_book["aid"]
117 |             sid = page_book["sid"]
118 |             if aid in black_aid:
119 |                 return
120 |             if sid == 0:
121 |                 # 黑名单
122 |                 if config.read("black_list") and aid in config.read("black_list"):
123 |                     return
124 |                 book = self.build_book_from_page(page_book)
125 |             else:
126 |                 # 黑名单
127 |                 if config.read("black_list") and sid in config.read("black_list"):
128 |                     return
129 |                 book_url = config.read("url_config")[self.site]["book"]
130 |                 param = copy.deepcopy(self.param)
131 |                 param["d"]["sid"] = sid
132 |                 res = await request.post_json(url=book_url, headers=self.header, json=param, session=self.session)
133 |                 book = self.build_book_from_res(res)
134 |             if not book or not book.book_id:
135 |                 return
136 |             with Database() as db:
137 |                 await db.book.insert_or_update(book, self.session)
138 |             # 章节
139 |             if sid == 0:
140 |                 chapter_list = [await self.build_chapter(book, page_book)]
141 |             else:
142 |                 chapter_list = await self.build_chapters(book, res)
143 |             # epub
144 |             epub.build_epub(book, chapter_list)
145 | 
146 |     async def build_chapter(self, book: Book, page_book: dict) -> Chapter:
147 |         with Database() as db:
148 |             old_chapter = common.first(db.chapter.get_list(book.book_id))
149 |         chapter = Chapter()
150 |         chapter.id = str(uuid.uuid4())
151 |         chapter.chapter_order = 1
152 |         chapter.book_table_id = book.id
153 |         chapter.chapter_name = book.book_name
154 |         chapter.chapter_id = book.book_id
155 |         chapter.last_update_time = common.time(page_book["last_time"])
156 |         if self.update_chapter(chapter, [old_chapter]):
157 |             return chapter
158 |         await self.build_content(book, chapter)
159 |         return chapter
160 | 
161 |     async def build_chapters(self, book: Book, res: str) -> list[Chapter]:
162 |         if res and common.unzip(res)["code"] == 0:
163 |             chapter_datas = common.unzip(res)["data"]["articles"]
164 |             with Database() as db:
165 |                 old_chapters = db.chapter.get_list(book.id)
166 |             order = 1
167 |             chapter_list = []
168 |             for chapter_data in chapter_datas:
169 |                 chapter = Chapter()
170 |                 chapter.id = str(uuid.uuid4())
171 |                 chapter.chapter_order = order
172 |                 chapter.book_table_id = book.id
173 |                 chapter.chapter_name = chapter_data["title"]
174 |                 chapter.chapter_id = str(chapter_data["aid"])
175 |                 chapter.last_update_time = common.time(chapter_data["last_time"])
176 |                 order += 1
177 |                 if self.update_chapter(chapter, old_chapters):
178 |                     chapter_list.append(chapter)
179 |                     continue
180 |                 await self.build_content(book, chapter)
181 |                 chapter_list.append(chapter)
182 |             return chapter_list
183 |         return None
184 | 
185 |     async def build_content(self, book: Book, chapter: Chapter):
186 |         pic_urls = []
187 |         chapter_url = config.read("url_config")[self.site]["chapter"]
188 |         param = copy.deepcopy(self.param)
189 |         param["d"]["aid"] = chapter.chapter_id
190 |         param["d"]["simple"] = 0
191 |         res = await request.post_json(url=chapter_url, headers=self.header, json=param, session=self.session)
192 |         if res and common.unzip(res)["code"] == 0:
193 |             chapter_data = common.unzip(res)["data"]
194 |             if chapter_data.get("pay_info"):
195 |                 # 打钱处理
196 |                 if chapter_data.get("pay_info")["is_paid"] == 0 and config.read("is_purchase"):
197 |                     chapter.purchase_fail_flag = 1
198 |                     cost = chapter_data.get("pay_info")["price"]
199 |                     if cost <= config.read("max_purchase"):
200 |                         await self.pay(book, chapter, cost, pic_urls)
201 |                 elif chapter_data.get("pay_info")["is_paid"] == 1:
202 |                     chapter.purchase_fail_flag = 0
203 |                     chapter.content = common.bbcode_to_html(chapter_data["content"], chapter_data, pic_urls)
204 |                 else:
205 |                     chapter.purchase_fail_flag = 1
206 |             else:
207 |                 chapter.content = common.bbcode_to_html(chapter_data["content"], chapter_data, pic_urls)
208 |         with Database() as db:
209 |             db.chapter.insert_or_update(chapter)
210 |         # 插图处理
211 |         await self.build_images(book, chapter, pic_urls)
212 |         log.info("%s 新获取章节内容" % chapter.chapter_name)
213 | 
214 |     async def pay(self, book: Book, chapter: Chapter, cost: int, pic_urls: list):
215 |         log.info("%s 开始打钱..花费: %s轻币" % (chapter.chapter_name, str(cost)))
216 |         cost_url = config.read("url_config")[self.site]["cost"]
217 |         cost_param = copy.deepcopy(self.param)
218 |         cost_param["d"]["goods_id"] = 1
219 |         cost_param["d"]["params"] = int(chapter.chapter_id)
220 |         cost_param["d"]["price"] = cost
221 |         cost_param["d"]["number"] = 1
222 |         cost_param["d"]["total_price"] = cost
223 |         cost_res = await request.post_json(url=cost_url, headers=self.header, json=cost_param, session=self.session)
224 |         if cost_res and common.unzip(cost_res)["code"] == 0:
225 |             # 打钱成功 刷新文本
226 |             log.info("%s 打钱成功！" % chapter.chapter_name)
227 |             chapter_url = config.read("url_config")[self.site]["chapter"]
228 |             param = copy.deepcopy(self.param)
229 |             param["d"]["aid"] = chapter.chapter_id
230 |             param["d"]["simple"] = 0
231 |             res = await request.post_json(url=chapter_url, headers=self.header, json=param, session=self.session)
232 |             if res and common.unzip(res)["code"] == 0:
233 |                 chapter_data = common.unzip(res)["data"]
234 |                 chapter.content = common.bbcode_to_html(chapter_data["content"], chapter_data, pic_urls)
235 |                 if chapter.content:
236 |                     chapter.purchase_fail_flag = 0
237 |         else:
238 |             log.info("%s 打钱失败！" % chapter.chapter_name)
239 | 
240 |     async def build_images(self, book: Book, chapter: Chapter, pic_urls: list):
241 |         if not chapter.content:
242 |             return
243 |         with Database() as db:
244 |             pics = db.pic.get_list(chapter.id)
245 |         if not pic_urls:
246 |             return
247 |         for pic_url in pic_urls:
248 |             match_pic = common.find(pics, "pic_url", pic_url["url"])
249 |             if not match_pic and pic_url["id"]:
250 |                 match_pic = common.find(pics, "pic_id", pic_url["id"])
251 |             if match_pic and match_pic.pic_path:
252 |                 continue
253 |             pic = Pic()
254 |             if match_pic:
255 |                 pic = match_pic
256 |             else:
257 |                 pic.id = str(uuid.uuid4())
258 |                 pic.chapter_table_id = chapter.id
259 |                 pic.pic_url = pic_url["url"]
260 |                 pic.pic_id = pic_url["id"]
261 |             # 下载图片
262 |             await image.download(pic, self.site, book.book_id, chapter.chapter_id, self.session)
263 |             with Database() as db:
264 |                 db.pic.insert_or_update(pic)
265 | 
266 |     def update_chapter(self, chapter: Chapter, old_chapters: Optional[Chapter]) -> bool:
267 |         old_chapter = common.find(old_chapters, "chapter_id", chapter.chapter_id)
268 |         if not old_chapter:
269 |             return False
270 |         if old_chapter.purchase_fail_flag and old_chapter.purchase_fail_flag == 1:
271 |             # 打钱失败的
272 |             common.copy(old_chapter, chapter)
273 |             return False
274 |         if old_chapter.chapter_name != chapter.chapter_name \
275 |                 or old_chapter.chapter_order != chapter.chapter_order:
276 |             old_chapter.chapter_name = chapter.chapter_name
277 |             old_chapter.chapter_order = chapter.chapter_order
278 |             with Database() as db:
279 |                 db.chapter.insert_or_update(old_chapter)
280 |         if old_chapter.last_update_time < chapter.last_update_time:
281 |             old_chapter.last_update_time = chapter.last_update_time
282 |             with Database() as db:
283 |                 db.chapter.insert_or_update(old_chapter)
284 |             # 后续需要更新文本
285 |             common.copy(old_chapter, chapter)
286 |             return False
287 |         common.copy(old_chapter, chapter)
288 |         return True
289 | 
290 |     def build_book_from_res(self, res: str) -> Book:
291 |         if res and common.unzip(res)["code"] == 0:
292 |             book_dict = common.unzip(res)
293 |             if not book_dict["data"]:
294 |                 return None
295 |             book = Book()
296 |             book.id = str(uuid.uuid4())
297 |             book.book_id = str(book_dict["data"]["sid"])
298 |             book.source = self.site
299 |             book.book_name = book_dict["data"]["name"]
300 |             book.author = book_dict["data"]["author"]
301 |             book.cover_url = book_dict["data"]["cover"]
302 |             book.describe = book_dict["data"]["intro"]
303 |             log.info("%s 书籍信息已获取" % book.book_name)
304 |             return book
305 |         return None
306 | 
307 |     def build_book_from_page(self, page_book: dict) -> Book:
308 |         book = Book()
309 |         book.id = str(uuid.uuid4())
310 |         book.book_id = str(page_book["aid"])
311 |         book.source = self.site
312 |         book.book_name = page_book["title"]
313 |         book.cover_url = page_book["cover"]
314 |         log.info("%s 书籍信息已获取" % book.book_name)
315 |         return book
316 | 
317 |     async def get_page_list(self, page: int) -> list:
318 |         log.info("开始爬取%s第%d页" % (self.site, page))
319 |         param = self.param
320 |         param["d"]["page"] = page
321 |         param["d"]["pageSize"] = 20
322 |         if config.read('get_collection'):
323 |             page_url = config.read("url_config")[self.site]["collection"]
324 |             param["d"]["type"] = 1
325 |             # class 1 单本
326 |             param["d"]["class"] = 1
327 |         else:
328 |             page_url = config.read("url_config")[self.site]["page"]
329 |             param["d"]["parent_gid"] = 3
330 |             # gid 106 最新 gid 107 整卷
331 |             param["d"]["gid"] = 106
332 |         res = await request.post_json(url=page_url, headers=self.header, json=param, session=self.session)
333 |         if res and common.unzip(res)["code"] == 0:
334 |             if config.read('get_collection'):
335 |                 # class 2 合集
336 |                 param["d"]["class"] = 2
337 |                 res2 = await request.post_json(url=page_url, headers=self.header, json=param, session=self.session)
338 |                 return common.unzip(res)["data"]["list"] + common.unzip(res2)["data"]["list"]
339 |             else:
340 |                 return common.unzip(res)["data"]["list"]
341 |         return None


--------------------------------------------------------------------------------
/sites/masiro.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import copy
  3 | import json
  4 | import uuid
  5 | from typing import Optional
  6 | 
  7 | from aiohttp import ClientSession
  8 | 
  9 | from models.book import Book
 10 | from models.chapter import Chapter
 11 | from models.cookie import Cookie
 12 | from models.pic import Pic
 13 | from sites.abstract import Site
 14 | from sqlite.database import Database
 15 | from utils import config, request, log, epub, common, image
 16 | 
 17 | 
 18 | class Masiro(Site):
 19 | 
 20 |     def __init__(self, session: ClientSession):
 21 |         self.session = session
 22 |         self.site = "masiro"
 23 |         self.header = {
 24 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 25 |             "Accept-Encoding": "gzip, deflate, br",
 26 |             "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
 27 |             "Referer": config.read("url_config")["masiro"]["referer"] + "/admin",
 28 |             "Origin": config.read("url_config")["masiro"]["referer"]
 29 |         }
 30 |         thread = 1
 31 |         self.thread = asyncio.Semaphore(thread)
 32 |         self.token = ""
 33 |         # 登录方式 0 账密 1 cookie
 34 |         self.login_flag = -1
 35 | 
 36 |     async def get_books(self):
 37 |         # 白名单
 38 |         if config.read("white_list"):
 39 |             for book_url in config.read("white_list"):
 40 |                 await self.build_book(book_url)
 41 |             return
 42 |         for page in range(config.read("start_page"), config.read("end_page") + 1):
 43 |             book_urls = await self.get_book_urls(page)
 44 |             if not book_urls:
 45 |                 continue
 46 |             tasks = [asyncio.create_task(self.build_book(book_url)) for book_url in book_urls]
 47 |             if tasks:
 48 |                 await asyncio.gather(*tasks)
 49 | 
 50 |     async def build_book(self, book_url: str):
 51 |         async with self.thread:
 52 |             res = await request.get(url=book_url, headers=self.header, session=self.session)
 53 |             book = self.build_book_from_text(res, book_url)
 54 |             if not book or not book.book_id:
 55 |                 return
 56 |             with Database() as db:
 57 |                 await db.book.insert_or_update(book, self.session)
 58 |             # 章节
 59 |             chapter_list = await self.build_chapters(book, res)
 60 |             # epub
 61 |             epub.build_epub(book, chapter_list)
 62 | 
 63 |     async def build_chapters(self, book: Book, text: str) -> list[Chapter]:
 64 |         try:
 65 |             parent_chapter_json = json.loads(config.get_xpath(text, self.site, "parent_chapter")[0])
 66 |             chapter_json = json.loads(config.get_xpath(text, self.site, "chapter")[0])
 67 |         except:
 68 |             return None
 69 |         if not chapter_json:
 70 |             return None
 71 |         with Database() as db:
 72 |             old_chapters = db.chapter.get_list(book.id)
 73 |         order = 1
 74 |         chapter_list = []
 75 |         for parent_chapter in parent_chapter_json:
 76 |             for chapter_data in chapter_json:
 77 |                 if chapter_data["parent_id"] != parent_chapter["id"]:
 78 |                     continue
 79 |                 chapter_id = str(chapter_data["id"])
 80 |                 chapter_url = config.read("url_config")[self.site]["chapter"] % chapter_data["id"]
 81 |                 chapter = Chapter()
 82 |                 chapter.id = str(uuid.uuid4())
 83 |                 chapter.chapter_order = order
 84 |                 chapter.book_table_id = book.id
 85 |                 chapter.chapter_name = chapter_data["title"]
 86 |                 chapter.chapter_id = chapter_id
 87 |                 chapter.last_update_time = common.time(chapter_data["episode_update_time"])
 88 |                 order += 1
 89 |                 if self.update_chapter(chapter, old_chapters):
 90 |                     chapter_list.append(chapter)
 91 |                     continue
 92 |                 # 爬文本和插图
 93 |                 await self.build_content(book, chapter, chapter_url, chapter_data["cost"])
 94 |                 chapter_list.append(chapter)
 95 |         return chapter_list
 96 | 
 97 |     async def build_content(self, book: Book, chapter: Chapter, chapter_url: str, cost: int):
 98 |         text = await request.get(chapter_url, self.header, self.session)
 99 |         if cost > 0 and text and "立即打钱" in text:
100 |             # 打钱处理
101 |             chapter.purchase_fail_flag = 1
102 |             if config.read("is_purchase") and cost <= config.read("max_purchase"):
103 |                 text = await self.pay(book, chapter, chapter_url, cost)
104 |         else:
105 |             chapter.content = config.get_html(text, self.site, "content")
106 |         with Database() as db:
107 |             db.chapter.insert_or_update(chapter)
108 |         # 插图处理
109 |         await self.build_images(book, chapter, text)
110 |         log.info("%s 新获取章节内容" % chapter.chapter_name)
111 | 
112 |     async def pay(self, book: Book, chapter: Chapter, chapter_url: str, cost: int) -> str:
113 |         log.info("%s 开始打钱..花费: %s金币" % (chapter.chapter_name, str(cost)))
114 |         cost_url = config.read("url_config")[self.site]["cost"]
115 |         cost_param = {
116 |             "type": 2,
117 |             "object_id": chapter.chapter_id,
118 |             "cost": cost
119 |         }
120 |         cost_header = copy.deepcopy(self.header)
121 |         cost_header['x-csrf-token'] = self.token
122 |         cost_res = await request.post_json(url=cost_url, headers=cost_header, json=cost_param, session=self.session)
123 |         if cost_res and json.loads(cost_res)['code'] == 1:
124 |             # 打钱成功 刷新文本
125 |             log.info("%s 打钱成功！" % chapter.chapter_name)
126 |             text = await request.get(chapter_url, self.header, self.session)
127 |             chapter.content = config.get_html(text, self.site, "content")
128 |             chapter.purchase_fail_flag = 0
129 |             return text
130 |         log.info("%s 打钱失败！" % chapter.chapter_name)
131 |         return None
132 | 
133 |     async def build_images(self, book: Book, chapter: Chapter, text: str):
134 |         if not text:
135 |             return
136 |         with Database() as db:
137 |             pics = db.pic.get_list(chapter.id)
138 |         pic_urls = config.get_xpath(text, self.site, "pic")
139 |         if not pic_urls:
140 |             return
141 |         for pic_url in pic_urls:
142 |             match_pic = common.find(pics, "pic_url", pic_url)
143 |             if match_pic and match_pic.pic_path:
144 |                 continue
145 |             pic = Pic()
146 |             if match_pic:
147 |                 pic = match_pic
148 |             else:
149 |                 pic.id = str(uuid.uuid4())
150 |                 pic.chapter_table_id = chapter.id
151 |                 pic.pic_url = pic_url
152 |             # 下载图片
153 |             await image.download(pic, self.site, book.book_id, chapter.chapter_id, self.session)
154 |             with Database() as db:
155 |                 db.pic.insert_or_update(pic)
156 | 
157 |     def update_chapter(self, chapter: Chapter, old_chapters: Optional[Chapter]) -> bool:
158 |         old_chapter = common.find(old_chapters, "chapter_id", chapter.chapter_id)
159 |         if not old_chapter:
160 |             return False
161 |         if old_chapter.purchase_fail_flag and old_chapter.purchase_fail_flag == 1:
162 |             # 打钱失败的
163 |             common.copy(old_chapter, chapter)
164 |             return False
165 |         if old_chapter.chapter_name != chapter.chapter_name \
166 |                 or old_chapter.chapter_order != chapter.chapter_order:
167 |             old_chapter.chapter_name = chapter.chapter_name
168 |             old_chapter.chapter_order = chapter.chapter_order
169 |             with Database() as db:
170 |                 db.chapter.insert_or_update(old_chapter)
171 |         if old_chapter.last_update_time < chapter.last_update_time:
172 |             old_chapter.last_update_time = chapter.last_update_time
173 |             with Database() as db:
174 |                 db.chapter.insert_or_update(old_chapter)
175 |             # 后续需要更新文本
176 |             common.copy(old_chapter, chapter)
177 |             return False
178 |         common.copy(old_chapter, chapter)
179 |         return True
180 | 
181 |     def build_book_from_text(self, text: str, book_url: str) -> Book:
182 |         if not text:
183 |             return None
184 |         book = Book()
185 |         book.id = str(uuid.uuid4())
186 |         book.book_id = book_url.split('?novel_id=')[-1]
187 |         book.source = self.site
188 |         book.book_name = common.first(config.get_xpath(text, self.site, "title"))
189 |         book.author = common.first(config.get_xpath(text, self.site, "author"))
190 |         book.describe = common.join(config.get_xpath(text, self.site, "describe"), "\n")
191 |         book.cover_url = common.first(config.get_xpath(text, self.site, "cover"))
192 |         book.tags = common.join(config.get_xpath(text, self.site, "tags"))
193 |         log.info("%s 书籍信息已获取" % book.book_name)
194 |         return book
195 | 
196 |     async def get_book_urls(self, page: int) -> list:
197 |         log.info("开始爬取%s第%d页" % (self.site, page))
198 |         if config.read("get_collection"):
199 |             page_url = config.read("url_config")[self.site]["collection"] % page
200 |         else:
201 |             page_url = config.read("url_config")[self.site]["page"] % page
202 |         res = await request.get(url=page_url, headers=self.header, session=self.session)
203 |         return self.get_book_urls_from_text(res)
204 | 
205 |     def get_book_urls_from_text(self, text: str) -> list:
206 |         if not text:
207 |             return None
208 |         book_urls = []
209 |         book_datas = json.loads(text)
210 |         if book_datas["novels"]:
211 |             for book_data in book_datas["novels"]:
212 |                 book_url = config.read("url_config")[self.site]["book"] % book_data["id"]
213 |                 # 黑名单
214 |                 if config.read("black_list") and book_url in config.read("black_list"):
215 |                     continue
216 |                 book_urls.append(book_url)
217 |         return book_urls
218 | 
219 |     async def login(self):
220 |         if config.read("login_info")[self.site]["username"] and config.read("login_info")[self.site]["password"]:
221 |             self.login_flag = 0
222 |         elif config.read("login_info")[self.site]["cookie"] and config.read("login_info")[self.site]["useragent"]:
223 |             self.login_flag = 1
224 |         if self.login_flag < 0:
225 |             log.info("%s 账号密码或cookie未配置，跳过" % self.site)
226 |             raise Exception()
227 |         with Database() as db:
228 |             self.cookie = db.cookie.get_one(self.site)
229 |             if self.cookie:
230 |                 valid_bool = await self.valid_cookie()
231 |                 if valid_bool:
232 |                     return
233 |         if self.login_flag == 0 and not config.read("flaresolverr_url"):
234 |             log.info("真白萌需要在配置中填写flaresolverr_url")
235 |             return
236 |         cookie = Cookie()
237 |         cookie.id = str(uuid.uuid4())
238 |         cookie.source = self.site
239 |         self.cookie = cookie
240 |         await self.get_cookie()
241 | 
242 |     async def valid_cookie(self) -> bool:
243 |         url = config.read("url_config")[self.site]["user"]
244 |         self.header["Cookie"] = self.cookie.cookie
245 |         self.header["User-Agent"] = self.cookie.uid
246 |         self.token = self.cookie.token
247 |         res = await request.get(url=url, headers=self.header, session=self.session)
248 |         if res and "csrf-token" in res:
249 |             log.info("%s校验缓存cookie成功，跳过登录" % self.site)
250 |             return True
251 |         return False
252 | 
253 |     async def get_cookie(self):
254 |         log.info("%s开始登录..." % self.site)
255 |         # 账密
256 |         if self.login_flag == 0:
257 |             url = config.read("url_config")[self.site]["login"]
258 |             cf_bool = await self.fuck_cf()
259 |             if not cf_bool:
260 |                 log.info("真白萌破cf盾失败，停止爬取")
261 |                 raise Exception()
262 |             await self.get_token()
263 |             login_data = {
264 |                 "username": config.read("login_info")[self.site]["username"],
265 |                 "password": config.read("login_info")[self.site]["password"],
266 |                 "remember": "1",
267 |                 "_token": self.token
268 |             }
269 |             res = await request.post_data(url=url, headers=self.header, data=login_data, session=self.session)
270 |             if res:
271 |                 self.cookie.cookie = self.header["Cookie"] + "; ".join(res["headers"].getall("Set-Cookie"))
272 |                 self.cookie.uid = self.header["User-Agent"]
273 |                 self.cookie.token = self.token
274 |                 self.header["Cookie"] = self.cookie.cookie
275 |                 with Database() as db:
276 |                     db.cookie.insert_or_update(self.cookie)
277 |                 log.info("%s登录成功" % self.site)
278 |             else:
279 |                 log.info("登录失败！")
280 |                 raise Exception()
281 |         # cookie
282 |         if self.login_flag == 1:
283 |             self.cookie.cookie = config.read("login_info")[self.site]["cookie"]
284 |             self.cookie.uid = config.read("login_info")[self.site]["useragent"]
285 |             self.header["Cookie"] = self.cookie.cookie
286 |             self.header["User-Agent"] = self.cookie.uid
287 |             await self.get_token()
288 |             if not self.token:
289 |                 log.info("登录失败！")
290 |                 raise Exception()
291 |             self.cookie.token = self.token
292 |             with Database() as db:
293 |                 db.cookie.insert_or_update(self.cookie)
294 |             log.info("%s登录成功" % self.site)
295 | 
296 |     async def get_token(self):
297 |         if self.login_flag == 0:
298 |             url = config.read("url_config")[self.site]["login"]
299 |         else:
300 |             # cookie登录从用户页拿token
301 |             url = config.read("url_config")[self.site]["user"]
302 |         res = await request.get(url=url, headers=self.header, session=self.session)
303 |         if res:
304 |             self.token = config.get_xpath(res, self.site, "token")[0]
305 | 
306 |     async def fuck_cf(self) -> bool:
307 |         log.info("开始破cf盾...")
308 |         url = config.read("flaresolverr_url")
309 |         headers = {
310 |             'content-type': 'application/json'
311 |         }
312 |         data = {
313 |             "cmd": "request.get",
314 |             "url": config.read("url_config")[self.site]["login"],
315 |             "maxTimeout": 60000
316 |         }
317 |         res = await request.post_json(url=url, headers=headers, json=data, session=self.session)
318 |         if res:
319 |             res_json = json.loads(res)
320 |             self.header["User-Agent"] = res_json["solution"]["userAgent"]
321 |             for cf_cookie in res_json["solution"]["cookies"]:
322 |                 if cf_cookie["name"] == "cf_clearance":
323 |                     self.header["Cookie"] = "cf_clearance=" + cf_cookie["value"] + ";"
324 |                     log.info("破cf盾成功！")
325 |                     return True
326 |         return False
327 | 


--------------------------------------------------------------------------------
/sites/yuri.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import re
  3 | import uuid
  4 | from typing import Optional
  5 | 
  6 | from aiohttp import ClientSession
  7 | from lxml import html
  8 | 
  9 | from js import runjs
 10 | from models.book import Book
 11 | from models.chapter import Chapter
 12 | from models.pic import Pic
 13 | from sites.abstract import Site
 14 | from sqlite.database import Database
 15 | from utils import config, request, log, common, image, epub
 16 | 
 17 | 
 18 | class Yuri(Site):
 19 | 
 20 |     def __init__(self, session: ClientSession):
 21 |         self.session = session
 22 |         self.site = "yuri"
 23 |         self.header = {
 24 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 25 |             "Accept-Encoding": "gzip, deflate, br",
 26 |             "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
 27 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
 28 |         }
 29 |         thread = config.read("max_thread")
 30 |         if thread > 4:
 31 |             thread = 4
 32 |         if config.read("push_calibre")["enabled"]:
 33 |             thread = 1
 34 |         self.thread = asyncio.Semaphore(thread)
 35 | 
 36 |     async def login(self):
 37 |         if not config.read("login_info")[self.site]["cookie"]:
 38 |             log.info("%s cookie未配置，跳过" % self.site)
 39 |             raise Exception()
 40 |         self.header["Cookie"] = config.read("login_info")[self.site]["cookie"]
 41 |         await self.valid_cookie()
 42 | 
 43 |     async def valid_cookie(self) -> bool:
 44 |         log.info("%s开始校验cookie..." % self.site)
 45 |         url = config.read("url_config")[self.site]["user"]
 46 |         res = await request.get(url=url, headers=self.header, session=self.session)
 47 |         if res and "<title>用户组" in res:
 48 |             log.info("%s cookie校验通过" % self.site)
 49 |             # await self.get_cookie()
 50 |         else:
 51 |             log.info("cookie校验失败！")
 52 |             raise Exception()
 53 | 
 54 |     async def get_cookie(self):
 55 |         # egg
 56 |         url = "https://bbs.yamibo.com/plugin.php?id=zqlj_sign"
 57 |         res = await request.get(url=url, headers=self.header, session=self.session)
 58 |         page_body = html.fromstring(res)
 59 |         form_hash = page_body.xpath("//input[@name='formhash']/@value")[0]
 60 |         sign_url = "https://bbs.yamibo.com/plugin.php?id=zqlj_sign&sign=" + form_hash
 61 |         await request.get(url=sign_url, headers=self.header, session=self.session)
 62 |         log.info("%s 签到成功！" % self.site)
 63 | 
 64 |     async def get_books(self):
 65 |         # 白名单
 66 |         if config.read("white_list"):
 67 |             for book_url in config.read("white_list"):
 68 |                 await self.build_book(book_url)
 69 |             return
 70 |         for page in range(config.read("start_page"), config.read("end_page") + 1):
 71 |             book_urls = await self.get_book_urls(page)
 72 |             if not book_urls:
 73 |                 continue
 74 |             tasks = [asyncio.create_task(self.build_book(book_url)) for book_url in book_urls]
 75 |             if tasks:
 76 |                 await asyncio.gather(*tasks)
 77 | 
 78 |     async def build_book(self, book_url: str):
 79 |         async with self.thread:
 80 |             res_turple = await self.handle_dsign(book_url)
 81 |             _book_url = res_turple[0]
 82 |             res = res_turple[1]
 83 |             book = self.build_book_from_text(res, _book_url)
 84 |             if not book or not book.book_id:
 85 |                 return
 86 |             with Database() as db:
 87 |                 await db.book.insert_or_update(book, self.session)
 88 |             author_id = common.first(config.get_xpath(res, self.site, "author"))
 89 |             # 只看作者 全部页数的全部数据
 90 |             text_list = await self.get_content_page(book, author_id)
 91 |             if not text_list:
 92 |                 return
 93 |             # 章节
 94 |             all_chapter_list = []
 95 |             order_dict = {"order": 1}
 96 |             for text in text_list:
 97 |                 chapter_list = await self.build_chapters(book, text, order_dict)
 98 |                 if chapter_list:
 99 |                     all_chapter_list += chapter_list
100 |             # epub
101 |             epub.build_epub(book, all_chapter_list)
102 | 
103 |     async def get_content_page(self, book: Book, author_id: str) -> list:
104 |         # 只看作者
105 |         author_url = config.read('url_config')[self.site]["chapter"] % (book.book_id, "1", author_id)
106 |         author_res_turple = await self.handle_dsign(author_url)
107 |         first_res = author_res_turple[1]
108 |         if not first_res:
109 |             return None
110 |         result_list = [first_res]
111 |         # 获取页数
112 |         page_size_xpath = common.first(config.get_xpath(first_res, self.site, "size"))
113 |         if not page_size_xpath:
114 |             page_size = 1
115 |         else:
116 |             page_size = int(re.findall("\d+", page_size_xpath)[0])
117 |         if page_size == 1:
118 |             return result_list
119 |         for page in range(2, page_size + 1):
120 |             url = config.read('url_config')[self.site]["chapter"] % (book.book_id, str(page), author_id)
121 |             res_turple = await self.handle_dsign(url)
122 |             result_list.append(res_turple[1])
123 |         return result_list
124 | 
125 |     async def handle_dsign(self, url: str):
126 |         _url = url
127 |         res = await request.get(url=url, headers=self.header, session=self.session)
128 |         if res.startswith("<script"):
129 |             # 反爬处理
130 |             _url = config.read("url_config")[self.site]["dsign"] % runjs.get_dsign(res)
131 |             res = await request.get(url=_url, headers=self.header, session=self.session)
132 |         return _url, res
133 | 
134 |     async def build_chapters(self, book: Book, text: str, order_dict: dict) -> list[Chapter]:
135 |         chapter_xpaths = config.get_xpath(text, self.site, "chapter")
136 |         if not chapter_xpaths:
137 |             return None
138 |         order = order_dict["order"]
139 |         chapter_list = []
140 |         for xpath in chapter_xpaths:
141 |             chapter_html = html.tostring(xpath, pretty_print=True, encoding="unicode")
142 |             if not chapter_html:
143 |                 continue
144 |             chapter = Chapter()
145 |             chapter.id = str(uuid.uuid4())
146 |             chapter.chapter_order = order
147 |             chapter.book_table_id = book.id
148 |             chapter.chapter_name = str(order)
149 |             chapter.chapter_id = str(order)
150 |             order += 1
151 |             order_dict["order"] = order
152 |             # 爬文本和插图
153 |             await self.build_content(book, chapter, chapter_html)
154 |             chapter_list.append(chapter)
155 |         return chapter_list
156 | 
157 |     async def build_content(self, book: Book, chapter: Chapter, text: str):
158 |         chapter.content = text
159 |         # img标签特殊处理
160 |         chapter.content = chapter.content.replace("src=\"static/image/common/none.gif\"", "")
161 |         chapter.content = chapter.content.replace("file=\"", "src=\"")
162 |         with Database() as db:
163 |             db.chapter.insert_or_update(chapter)
164 |         # 插图处理
165 |         await self.build_images(book, chapter, text)
166 |         log.info("%s楼 获取内容" % chapter.chapter_name)
167 | 
168 |     async def build_images(self, book: Book, chapter: Chapter, text: str):
169 |         with Database() as db:
170 |             pics = db.pic.get_list(chapter.id)
171 |         pic_urls = config.get_xpath(text, self.site, "pic")
172 |         if not pic_urls:
173 |             return
174 |         for pic_url in pic_urls:
175 |             match_pic = common.find(pics, "pic_url", pic_url)
176 |             if match_pic and match_pic.pic_path:
177 |                 continue
178 |             pic = Pic()
179 |             if match_pic:
180 |                 pic = match_pic
181 |             else:
182 |                 pic.id = str(uuid.uuid4())
183 |                 pic.chapter_table_id = chapter.id
184 |                 pic.pic_url = pic_url
185 |             # 论坛自己的图床
186 |             if not pic_url.startswith("http"):
187 |                 pic_full_url = config.read("url_config")[self.site]["book"] % pic_url
188 |                 pic.pic_url = pic_full_url
189 |             # 下载图片
190 |             await image.download(pic, self.site, book.book_id, chapter.chapter_id, self.session)
191 |             pic.pic_url = pic_url
192 |             with Database() as db:
193 |                 db.pic.insert_or_update(pic)
194 | 
195 |     def build_book_from_text(self, text: str, book_url: str) -> Book:
196 |         if not text:
197 |             return None
198 |         book = Book()
199 |         book.id = str(uuid.uuid4())
200 |         book.book_id = book_url.split('-')[1]
201 |         book.source = self.site
202 |         book.book_name = common.first(config.get_xpath(text, self.site, "title"))
203 |         log.info("%s 书籍信息已获取" % book.book_name)
204 |         return book
205 | 
206 |     async def get_book_urls(self, page: int) -> list:
207 |         log.info("开始爬取%s第%d页" % (self.site, page))
208 |         if config.read("get_collection"):
209 |             page_url = config.read("url_config")[self.site]["collection"] % page
210 |         else:
211 |             page_url = config.read("url_config")[self.site]["page"] % page
212 |         res = await request.get(url=page_url, headers=self.header, session=self.session)
213 |         return self.get_book_urls_from_text(res)
214 | 
215 |     def get_book_urls_from_text(self, text: str) -> list:
216 |         if not text:
217 |             return None
218 |         book_urls = []
219 |         if config.read("get_collection"):
220 |             book_xpaths = config.get_xpath(text, self.site, "collection")
221 |         else:
222 |             book_xpaths = config.get_xpath(text, self.site, "page")
223 |         if not book_xpaths:
224 |             return None
225 |         for book_xpath in book_xpaths:
226 |             if "javascript" in book_xpath:
227 |                 continue
228 |             if "thread-535989-" in book_xpath:
229 |                 # 排除置顶专楼
230 |                 continue
231 |             book_url = config.read("url_config")[self.site]["book"] % book_xpath
232 |             # 黑名单
233 |             if config.read("black_list") and book_xpath in config.read("black_list"):
234 |                 continue
235 |             book_urls.append(book_url)
236 |         return book_urls
237 | 


--------------------------------------------------------------------------------
/sqlite/book.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from aiohttp import ClientSession
 4 | from sqlmodel import Session, select
 5 | from utils import common, image
 6 | 
 7 | from models.book import Book
 8 | 
 9 | 
10 | class BookDatabase:
11 | 
12 |     def __init__(self, session: Session):
13 |         self.session = session
14 | 
15 |     def update(self, data: Book):
16 |         self.session.add(data)
17 |         self.session.commit()
18 |         self.session.refresh(data)
19 | 
20 |     def get_one(self, book_id: str, source: str) -> Optional[Book]:
21 |         statement = select(Book).where(Book.book_id == book_id, Book.source == source)
22 |         return self.session.exec(statement).first()
23 | 
24 |     def get_all(self) -> list[Book]:
25 |         statement = select(Book)
26 |         return self.session.exec(statement).all()
27 | 
28 |     async def insert_or_update(self, data: Book, session: ClientSession):
29 |         book = self.get_one(data.book_id, data.source)
30 |         if not book:
31 |             # 生成封面
32 |             if data.cover_url:
33 |                 await image.cover(data.cover_url, data.source, data.book_id, session)
34 |             self.update(data)
35 |             return
36 |         if data.book_name != book.book_name or data.cover_url != book.cover_url:
37 |             book.book_name = data.book_name
38 |             self.update(book)
39 |         if data.cover_url != book.cover_url:
40 |             book.cover_url = data.cover_url
41 |             # 更新封面
42 |             if data.cover_url:
43 |                 await image.cover(data.cover_url, data.source, data.book_id, session)
44 |             self.update(book)
45 |         common.copy(book, data)
46 | 
47 |     def get_by_ids(self, ids: list) -> list[Book]:
48 |         statement = select(Book).where(Book.id.in_(ids))
49 |         return self.session.exec(statement).all()
50 | 
51 |     def get_by_id(self, id: str) -> Optional[Book]:
52 |         statement = select(Book).where(Book.id == id)
53 |         return self.session.exec(statement).first()
54 | 


--------------------------------------------------------------------------------
/sqlite/chapter.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from sqlmodel import Session, select
 4 | 
 5 | from models.chapter import Chapter
 6 | from utils import common
 7 | 
 8 | 
 9 | class ChapterDatabase:
10 | 
11 |     def __init__(self, session: Session):
12 |         self.session = session
13 | 
14 |     def update(self, data: Chapter):
15 |         self.session.add(data)
16 |         self.session.commit()
17 |         self.session.refresh(data)
18 | 
19 |     def get_list(self, book_table_id: str) -> list[Chapter]:
20 |         statement = select(Chapter).where(Chapter.book_table_id == book_table_id).order_by(Chapter.chapter_order)
21 |         return self.session.exec(statement).all()
22 | 
23 |     def get_one(self, id: str) -> Optional[Chapter]:
24 |         statement = select(Chapter).where(Chapter.id == id)
25 |         return self.session.exec(statement).first()
26 | 
27 |     def insert_or_update(self, data: Chapter):
28 |         chapter = self.get_one(data.id)
29 |         if not chapter:
30 |             self.update(data)
31 |             return
32 |         if data.chapter_name != chapter.chapter_name \
33 |                 or data.chapter_order != chapter.chapter_order \
34 |                 or data.content != chapter.content \
35 |                 or data.last_update_time != chapter.last_update_time \
36 |                 or data.purchase_fail_flag != chapter.purchase_fail_flag:
37 |             chapter.chapter_name = data.chapter_name
38 |             chapter.chapter_order = data.chapter_order
39 |             chapter.content = data.content
40 |             chapter.last_update_time = data.last_update_time
41 |             chapter.purchase_fail_flag = data.purchase_fail_flag
42 |             self.update(chapter)
43 |         common.copy(chapter, data)
44 | 
45 |     def get_nopay_list(self) -> list[Chapter]:
46 |         statement = select(Chapter).where(Chapter.purchase_fail_flag == 1)
47 |         return self.session.exec(statement).all()
48 | 
49 |     def get_by_book(self, book_table_id: str) -> list[Chapter]:
50 |         statement = select(Chapter).where(Chapter.book_table_id == book_table_id)
51 |         return self.session.exec(statement).all()
52 | 


--------------------------------------------------------------------------------
/sqlite/cookie.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from sqlmodel import Session, select
 4 | 
 5 | from models.cookie import Cookie
 6 | from utils import common
 7 | 
 8 | 
 9 | class CookieDatabase:
10 | 
11 |     def __init__(self, session: Session):
12 |         self.session = session
13 | 
14 |     def update(self, data: Cookie):
15 |         self.session.add(data)
16 |         self.session.commit()
17 |         self.session.refresh(data)
18 | 
19 |     def get_one(self, source: str) -> Optional[Cookie]:
20 |         statement = select(Cookie).where(Cookie.source == source)
21 |         return self.session.exec(statement).first()
22 | 
23 |     def insert_or_update(self, data: Cookie):
24 |         cookie = self.get_one(data.source)
25 |         if not cookie:
26 |             self.update(data)
27 |             return
28 |         cookie.cookie = data.cookie
29 |         cookie.token = data.token
30 |         cookie.uid = data.uid
31 |         self.update(cookie)
32 |         common.copy(cookie, data)


--------------------------------------------------------------------------------
/sqlite/database.py:
--------------------------------------------------------------------------------
 1 | from sqlmodel import Session
 2 | 
 3 | from .book import BookDatabase
 4 | from .chapter import ChapterDatabase
 5 | from .cookie import CookieDatabase
 6 | from .engine import engine
 7 | from .pic import PicDatabase
 8 | 
 9 | 
10 | class Database(Session):
11 | 
12 |     def __init__(self, _engine=engine):
13 |         self.engine = _engine
14 |         super().__init__(_engine)
15 |         self.book = BookDatabase(self)
16 |         self.chapter = ChapterDatabase(self)
17 |         self.pic = PicDatabase(self)
18 |         self.cookie = CookieDatabase(self)
19 | 


--------------------------------------------------------------------------------
/sqlite/engine.py:
--------------------------------------------------------------------------------
1 | from sqlmodel import Session, create_engine
2 | 
3 | engine = create_engine("sqlite:///lightnovel.db")
4 | 
5 | db_session = Session(engine)


--------------------------------------------------------------------------------
/sqlite/pic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from sqlalchemy.sql.operators import isnot
 4 | from sqlmodel import Session, select, update
 5 | 
 6 | from models.pic import Pic
 7 | from utils import common, log
 8 | 
 9 | 
10 | class PicDatabase:
11 | 
12 |     def __init__(self, session: Session):
13 |         self.session = session
14 | 
15 |     def update(self, data: Pic):
16 |         self.session.add(data)
17 |         self.session.commit()
18 |         self.session.refresh(data)
19 | 
20 |     def get_list(self, chapter_table_id: str) -> list[Pic]:
21 |         statement = select(Pic).where(Pic.chapter_table_id == chapter_table_id)
22 |         return self.session.exec(statement).all()
23 | 
24 |     def get_nonnull_list(self, chapter_table_id: str) -> list[Pic]:
25 |         statement = select(Pic).where(Pic.chapter_table_id == chapter_table_id, isnot(Pic.pic_path, None))
26 |         return self.session.exec(statement).all()
27 | 
28 |     def get_null_list(self) -> list[Pic]:
29 |         statement = select(Pic).where(Pic.pic_path.is_(None))
30 |         return self.session.exec(statement).all()
31 | 
32 |     def get_one(self, id: str) -> Optional[Pic]:
33 |         statement = select(Pic).where(Pic.id == id)
34 |         return self.session.exec(statement).first()
35 | 
36 |     def insert_or_update(self, data: Pic):
37 |         pic = self.get_one(data.id)
38 |         if not pic:
39 |             self.update(data)
40 |             return
41 |         if data.pic_path != pic.pic_path:
42 |             pic.pic_path = data.pic_path
43 |             self.update(pic)
44 |         common.copy(pic, data)
45 | 
46 |     def clear(self):
47 |         statement = update(Pic).values(pic_path=None)
48 |         # 执行更新操作
49 |         self.session.execute(statement)
50 |         self.session.commit()
51 |         log.info("数据库图片地址信息已清除！")
52 | 


--------------------------------------------------------------------------------
/sqlite/script.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | 
 3 | 
 4 | def init_db():
 5 |     conn = sqlite3.connect('lightnovel.db')
 6 |     cursor = conn.cursor()
 7 |     create_table_script = """
 8 |     CREATE TABLE IF NOT EXISTS "book" (
 9 |       "id" text NOT NULL,
10 |       "book_id" text NOT NULL,
11 |       "source" text NOT NULL,
12 |       "book_name" text,
13 |       "author" text,
14 |       "tags" text,
15 |       "describe" text,
16 |       "cover_url" text,
17 |       PRIMARY KEY ("id")
18 |     );
19 |     
20 |     CREATE INDEX IF NOT EXISTS "idx_book" ON "book" (
21 |       "book_id" COLLATE BINARY ASC,
22 |       "source" COLLATE BINARY ASC
23 |     );
24 |     
25 |     CREATE TABLE IF NOT EXISTS "chapter" (
26 |       "id" text NOT NULL,
27 |       "book_table_id" text NOT NULL,
28 |       "chapter_id" text NOT NULL,
29 |       "chapter_name" text,
30 |       "chapter_order" integer NOT NULL,
31 |       "content" text,
32 |       "last_update_time" integer,
33 |       "purchase_fail_flag" integer,
34 |       PRIMARY KEY ("id")
35 |     );
36 |     
37 |     CREATE INDEX IF NOT EXISTS "idx_chapter" ON "chapter" (
38 |       "book_table_id" COLLATE BINARY ASC
39 |     );
40 |     
41 |     CREATE INDEX IF NOT EXISTS "idx_flag" ON "chapter" (
42 |       "purchase_fail_flag" COLLATE BINARY ASC
43 |     );
44 |     
45 |     CREATE TABLE IF NOT EXISTS "cookie" (
46 |       "id" text NOT NULL,
47 |       "source" text NOT NULL,
48 |       "cookie" text,
49 |       "token" text,
50 |       "uid" text,
51 |       PRIMARY KEY ("id")
52 |     );
53 |     
54 |     CREATE TABLE IF NOT EXISTS "pic" (
55 |       "id" text NOT NULL,
56 |       "chapter_table_id" text NOT NULL,
57 |       "pic_url" text NOT NULL,
58 |       "pic_path" text,
59 |       "pic_id" text,
60 |       PRIMARY KEY ("id")
61 |     );
62 |     
63 |     CREATE INDEX IF NOT EXISTS "idx_pic" ON "pic" (
64 |       "chapter_table_id" COLLATE BINARY ASC
65 |     );
66 |     """
67 |     cursor.executescript(create_table_script)
68 |     conn.commit()
69 |     conn.close()


--------------------------------------------------------------------------------
/utils/common.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import re
  4 | import zlib
  5 | from datetime import datetime
  6 | from os.path import basename
  7 | from urllib.parse import urlparse
  8 | 
  9 | from models.book import Book
 10 | 
 11 | 
 12 | def first(in_list: list):
 13 |     if not in_list:
 14 |         return None
 15 |     return in_list[0]
 16 | 
 17 | 
 18 | def join(in_list: list, concat=",") -> str:
 19 |     if not in_list:
 20 |         return None
 21 |     return concat.join(in_list)
 22 | 
 23 | 
 24 | def find(in_list: list, attr_name: str, attr_value):
 25 |     if not in_list or not attr_name:
 26 |         return None
 27 |     for obj in in_list:
 28 |         if getattr(obj, attr_name, None) == attr_value:
 29 |             return obj
 30 |     return None
 31 | 
 32 | 
 33 | def find_list(in_list: list, attr_name: str, attr_value) -> list:
 34 |     if not in_list or not attr_name:
 35 |         return None
 36 |     result_list = []
 37 |     for obj in in_list:
 38 |         if getattr(obj, attr_name, None) == attr_value:
 39 |             result_list.append(obj)
 40 |     return result_list
 41 | 
 42 | 
 43 | def copy(source, target):
 44 |     if not source or not target:
 45 |         return
 46 |     for key, value in vars(source).items():
 47 |         setattr(target, key, value)
 48 | 
 49 | 
 50 | def filename_from_url(url: str):
 51 |     if not url:
 52 |         return None
 53 |     path = urlparse(url).path
 54 |     return basename(path)
 55 | 
 56 | 
 57 | def unzip(text: str):
 58 |     b = base64.b64decode(text)
 59 |     s = zlib.decompress(b).decode()
 60 |     return json.loads(s)
 61 | 
 62 | 
 63 | def time(time_str: str) -> int:
 64 |     time_format = "%Y-%m-%d %H:%M:%S"
 65 |     time_object = datetime.strptime(time_str, time_format)
 66 |     return int(time_object.timestamp())
 67 | 
 68 | 
 69 | def bbcode_to_html(text: str, lk_res: dict, pic_urls: list) -> str:
 70 |     text = lk_bbcode_handler(text, lk_res, pic_urls)
 71 |     # 换行符处理
 72 |     text = text.replace("\n", "<br>")
 73 |     # 移除全部bbcode
 74 |     text = [re.sub(r"\[.*?\]", "", text)][0]
 75 |     return text
 76 | 
 77 | 
 78 | def lk_bbcode_handler(text: str, lk_res: dict, pic_urls: list) -> str:
 79 |     # 文本中剔除插图
 80 |     result = text
 81 |     # bbcode img 轻国较旧的小说使用的图床
 82 |     img_list = re.findall(r"\[img\](.*?)\[/img\]", result)
 83 |     if img_list:
 84 |         for img_url in img_list:
 85 |             if img_url.startswith("http"):
 86 |                 result = result.replace("[img]" + img_url + "[/img]", "<img src=\"" + img_url + "\">")
 87 |                 pic_urls.append({"id": "", "url": img_url})
 88 |     # bbcode res 轻国较新的小说使用的图床
 89 |     if lk_res and lk_res.get("res") and lk_res["res"]["res_info"]:
 90 |         for key, value in lk_res["res"]["res_info"].items():
 91 |             result = result.replace("[res]" + key + "[/res]", "<img src=\"" + key + "\">")
 92 |             pic_urls.append({"id": key, "url": value["url"]})
 93 |     # bbcode attach 轻国较旧的小说引用图片类型的附件
 94 |     if lk_res and lk_res.get("attaches") and lk_res["attaches"]["res_info"]:
 95 |         for key, value in lk_res["attaches"]["res_info"].items():
 96 |             if value.get("isimage") == 1:
 97 |                 result = result.replace("[attach]" + key + "[/attach]", "<img src=\"" + key + "\">")
 98 |                 pic_urls.append({"id": key, "url": value["url"]})
 99 |     # 移除不符合的图片类型bbcode
100 |     result = re.sub(r"\[res\].*?\[/res\]", "", result)
101 |     result = re.sub(r"\[attach\].*?\[/attach\]", "", result)
102 |     result = re.sub(r"\[img\].*?\[/img\]", "", result)
103 |     return result
104 | 
105 | 
106 | def handle_title(book: Book):
107 |     if not book.book_name:
108 |         return
109 |     # windows 文件名限制
110 |     book.book_name = book.book_name.replace("/", " ")
111 |     book.book_name = book.book_name.replace("<", "《")
112 |     book.book_name = book.book_name.replace(">", "》")
113 |     book.book_name = book.book_name.replace(":", "：")
114 |     book.book_name = book.book_name.replace("\\", " ")
115 |     book.book_name = book.book_name.replace("|", " ")
116 |     book.book_name = book.book_name.replace("?", "？")
117 |     book.book_name = book.book_name.replace("*", " ")
118 |     # linux 85 windows 127
119 |     if len(book.book_name) > 85:
120 |         book.book_name = book.book_name[:85]
121 | 


--------------------------------------------------------------------------------
/utils/config.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from lxml import html
 3 | 
 4 | config_data = {}
 5 | 
 6 | 
 7 | def init_config():
 8 |     with open('config.yaml', 'r', encoding='utf-8') as f:
 9 |         global config_data
10 |         config_data = yaml.safe_load(f)
11 | 
12 | 
13 | def read(key):
14 |     return config_data.get(key)
15 | 
16 | 
17 | def get_xpath(text: str, site: str, name: str) -> list:
18 |     page_body = html.fromstring(text)
19 |     return page_body.xpath(read("xpath_config")[site][name])
20 | 
21 | 
22 | def get_html(text: str, site: str, name: str) -> str:
23 |     if not text:
24 |         return None
25 |     page_body = html.fromstring(text)
26 |     xpaths = page_body.xpath(read("xpath_config")[site][name])
27 |     if not xpaths:
28 |         return None
29 |     html_str = []
30 |     for xpath in xpaths:
31 |         html_str.append(html.tostring(xpath, pretty_print=True, encoding="unicode"))
32 |     return "\n".join(html_str)
33 | 


--------------------------------------------------------------------------------
/utils/epub.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Optional
  3 | 
  4 | from ebooklib import epub
  5 | from lxml import html
  6 | from zhconv import zhconv
  7 | 
  8 | from models.book import Book
  9 | from models.chapter import Chapter
 10 | from sqlite.database import Database
 11 | from utils import image, config, log, common, push
 12 | 
 13 | 
 14 | def build_epub(book: Book, chapter_list: Optional[Chapter]):
 15 |     build_txt(book, chapter_list)
 16 |     if not chapter_list:
 17 |         return
 18 |     log.info(book.book_name + " 开始生成epub...")
 19 |     common.handle_title(book)
 20 |     path = config.read("epub_dir") + "/" + book.source + "/" + book.book_name + ".epub"
 21 |     os.makedirs(os.path.dirname(path), exist_ok=True)
 22 |     epub_book = epub.EpubBook()
 23 |     # 元数据
 24 |     epub_book.set_identifier(book.id)
 25 |     epub_book.set_title(book.book_name)
 26 |     epub_book.set_language("zh")
 27 |     epub_book.add_author(book.author)
 28 |     if config.read('convert_hans') and book.describe:
 29 |         book.describe = zhconv.convert(book.describe, 'zh-hans')
 30 |     epub_book.add_metadata("DC", "description", book.describe)
 31 |     epub_book.add_metadata("DC", "source", book.source)
 32 |     epub_book.add_metadata("DC", "publisher", book.source)
 33 |     epub_book.add_metadata("DC", "contributor", "lightnovel-pydownloader")
 34 |     epub_book.add_metadata("DC", "rights", "本电子书由lightnovel-pydownloader制作生成，仅供个人使用，不得对外传播以及用于商业用途。")
 35 |     if book.tags:
 36 |         for tag in book.tags.split(","):
 37 |             epub_book.add_metadata("DC", "subject", tag)
 38 |     if book.cover_url:
 39 |         cover_path = config.read("image_dir") + "/" + book.source + "/" + book.book_id + "/book_cover.jpg"
 40 |         try:
 41 |             epub_book.set_cover("cover.jpg", open(cover_path, 'rb').read())
 42 |         except:
 43 |             pass
 44 |     epub_chapters = []
 45 |     for chapter in chapter_list:
 46 |         # 跳过esj外链
 47 |         if chapter.chapter_id.startswith("http"):
 48 |             continue
 49 |         # 跳过打钱失败的章节
 50 |         if chapter.purchase_fail_flag and chapter.purchase_fail_flag == 1:
 51 |             continue
 52 |         # 图片替换
 53 |         with Database() as db:
 54 |             pics = db.pic.get_nonnull_list(chapter.id)
 55 |         # 跳过字数过少的章节
 56 |         if not pics and not chapter.content:
 57 |             continue
 58 |         if not pics and config.read("least_words") > 0 and config.read("least_words") > len(chapter.content):
 59 |             continue
 60 |         image.replace(chapter, pics, epub_book)
 61 |         epub_chapter = epub.EpubHtml(title=chapter.chapter_name, file_name=chapter.chapter_id + ".xhtml", lang="cn")
 62 |         # 繁转简
 63 |         content = chapter.content
 64 |         if config.read('convert_hans'):
 65 |             content = zhconv.convert(content, 'zh-hans')
 66 |         epub_chapter.content = content
 67 |         epub_chapters.append(epub_chapter)
 68 |         epub_book.add_item(epub_chapter)
 69 |     # 目录和书脊
 70 |     epub_book.toc = epub_chapters
 71 |     epub_book.spine = epub_chapters
 72 |     epub_book.add_item(epub.EpubNcx())
 73 |     epub_book.add_item(epub.EpubNav())
 74 |     # css
 75 |     style = "body { font-family: Times, Times New Roman, serif; }"
 76 |     nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css",
 77 |                             media_type="text/css", content=style)
 78 |     epub_book.add_item(nav_css)
 79 |     # 保存
 80 |     try:
 81 |         epub.write_epub(path, epub_book)
 82 |         log.info(book.book_name + " epub导出成功!")
 83 |     except:
 84 |         log.info(book.book_name + " epub导出失败!")
 85 |         return
 86 |     # 推送
 87 |     if config.read("push_calibre")["enabled"]:
 88 |         push.calibre(book)
 89 | 
 90 | 
 91 | def build_txt(book: Book, chapter_list: Optional[Chapter]):
 92 |     if not chapter_list or not config.read("convert_txt"):
 93 |         return
 94 |     log.info(book.book_name + " 开始生成txt...")
 95 |     common.handle_title(book)
 96 |     path = config.read("txt_dir") + "/" + book.source + "/" + book.book_name + ".txt"
 97 |     os.makedirs(os.path.dirname(path), exist_ok=True)
 98 |     txt_content = ""
 99 |     for chapter in chapter_list:
100 |         # 跳过esj外链
101 |         if chapter.chapter_id.startswith("http"):
102 |             continue
103 |         # 跳过打钱失败的章节
104 |         if chapter.purchase_fail_flag and chapter.purchase_fail_flag == 1:
105 |             continue
106 |         # 跳过字数过少的章节
107 |         if not chapter.content:
108 |             continue
109 |         if config.read("least_words") > 0 and config.read("least_words") > len(chapter.content):
110 |             continue
111 |         # 繁转简
112 |         chapter_content = chapter.content
113 |         if config.read('convert_hans'):
114 |             chapter_content = zhconv.convert(chapter_content, 'zh-hans')
115 |         # html转纯文字
116 |         page_body = html.fromstring(chapter_content)
117 |         content_list = page_body.xpath("//text()")
118 |         content_list = [s.replace("\n", "") for s in content_list]
119 |         content_list = [s for s in content_list if s]
120 |         txt_content += chapter.chapter_name + "\n\n"
121 |         txt_content += "\n".join(content_list) + "\n\n"
122 |     # 写入
123 |     with open(path, 'w', encoding='utf-8') as f:
124 |         f.write(txt_content)
125 |     log.info(book.book_name + " txt导出成功!")
126 | 


--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pillow_avif
  3 | from typing import Optional
  4 | 
  5 | from PIL import Image
  6 | from aiohttp import ClientSession
  7 | from ebooklib import epub
  8 | from ebooklib.epub import EpubBook
  9 | 
 10 | from models.chapter import Chapter
 11 | from models.pic import Pic
 12 | from utils import common, config, log
 13 | 
 14 | 
 15 | async def download(pic: Pic, site: str, book_id: str, chapter_id: str, session: ClientSession):
 16 |     headers = {
 17 |         "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
 18 |         "Accept-Encoding": "gzip, deflate, br, zstd",
 19 |         "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
 20 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
 21 |     }
 22 |     file_url = pic.pic_url
 23 |     if site == "masiro" and file_url.startswith("/images"):
 24 |         file_url = config.read("url_config")[site]["pic"] % file_url
 25 |     if "lightnovel.fun" in file_url:
 26 |         headers["referer"] = "https://www.lightnovel.fun/"
 27 |     if "lightnovel.us" in file_url:
 28 |         file_url = file_url.replace("lightnovel.us", "lightnovel.fun")
 29 |         headers["referer"] = "https://www.lightnovel.fun/"
 30 |     if 'i.noire.cc:332' in file_url:
 31 |         file_url = file_url.replace("i.noire.cc:332", "i.noire.cc")
 32 |     file_name = common.filename_from_url(file_url)
 33 |     if file_name.endswith(".i"):
 34 |         file_name = file_name.replace(".i", ".avif")
 35 |     file_path = config.read("image_dir") + "/" + site + "/" + book_id + "/" + chapter_id + "/" + file_name
 36 |     # 创建文件夹
 37 |     os.makedirs(os.path.dirname(file_path), exist_ok=True)
 38 |     proxy = config.read('proxy_url') if config.read('proxy_url') else None
 39 |     timeout = config.read('time_out')
 40 |     try:
 41 |         res = await session.get(url=file_url, proxy=proxy, headers=headers, timeout=timeout)
 42 |         if not res.status == 200:
 43 |             raise Exception()
 44 |         image_data = await res.read()
 45 |         # 写入文件
 46 |         with open(file_path, 'wb') as f:
 47 |             f.write(image_data)
 48 |         # avif处理
 49 |         if file_name.endswith(".avif"):
 50 |             file_path = avif(file_path)
 51 |         pic.pic_path = file_path
 52 |     except Exception:
 53 |         log.info("%s 图片下载失败！" % pic.pic_url)
 54 |     return
 55 | 
 56 | 
 57 | async def cover(pic_url: str, site: str, book_id: str, session: ClientSession):
 58 |     headers = {
 59 |         "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
 60 |         "Accept-Encoding": "gzip, deflate, br, zstd",
 61 |         "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
 62 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
 63 |     }
 64 |     if site == "masiro" and pic_url.startswith("/images"):
 65 |         pic_url = config.read("url_config")[site]["pic"] % pic_url
 66 |     if "lightnovel.fun" in pic_url:
 67 |         headers["referer"] = "https://www.lightnovel.fun/"
 68 |     if "lightnovel.us" in pic_url:
 69 |         pic_url = pic_url.replace("lightnovel.us", "lightnovel.fun")
 70 |         headers["referer"] = "https://www.lightnovel.fun/"
 71 |     if 'i.noire.cc:332' in pic_url:
 72 |         pic_url = pic_url.replace("i.noire.cc:332", "i.noire.cc")
 73 |     file_name = "book_cover.jpg"
 74 |     file_path = config.read("image_dir") + "/" + site + "/" + book_id + "/" + file_name
 75 |     # 创建文件夹
 76 |     os.makedirs(os.path.dirname(file_path), exist_ok=True)
 77 |     proxy = config.read('proxy_url') if config.read('proxy_url') else None
 78 |     timeout = config.read('time_out')
 79 |     try:
 80 |         res = await session.get(url=pic_url, proxy=proxy, headers=headers, timeout=timeout)
 81 |         if not res.status == 200:
 82 |             raise Exception()
 83 |         image_data = await res.read()
 84 |         # 写入文件
 85 |         with open(file_path, 'wb') as f:
 86 |             f.write(image_data)
 87 |     except Exception:
 88 |         log.info("%s 封面下载失败！" % pic_url)
 89 |     return
 90 | 
 91 | 
 92 | def replace(chapter: Chapter, pics: Optional[Pic], epub_book: EpubBook):
 93 |     if not chapter.content or not pics:
 94 |         return
 95 |     content = chapter.content
 96 |     for pic in pics:
 97 |         try:
 98 |             image_data = open(pic.pic_path, "rb").read()
 99 |             image_name = common.filename_from_url(pic.pic_path)
100 |             image_type = image_name.split('.')[-1]
101 |             image = epub.EpubImage(uid=image_name, file_name='Image/' + image_name,
102 |                                    media_type='image/' + image_type, content=image_data)
103 |             epub_book.add_item(image)
104 |             if pic.pic_id:
105 |                 content = content.replace(pic.pic_id, ("Image/" + image_name))
106 |             else:
107 |                 content = content.replace(pic.pic_url, ("Image/" + image_name))
108 |         except:
109 |             continue
110 |     chapter.content = content
111 | 
112 | 
113 | def avif(in_path) -> str:
114 |     try:
115 |         avif_image = Image.open(in_path)
116 |         png_image = avif_image.convert('RGB')
117 |         out_path = os.path.splitext(in_path)[0] + '.png'
118 |         png_image.save(out_path, 'PNG')
119 |         avif_image.close()
120 |         png_image.close()
121 |     except:
122 |         return in_path
123 |     os.remove(in_path)
124 |     return out_path
125 | 


--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import time
 4 | 
 5 | 
 6 | def init_log():
 7 |     today = time.strftime('%Y-%m-%d', time.localtime(time.time()))
 8 |     log_path = './log'
 9 |     log_name = today + ".log"
10 |     if not os.path.exists(log_path):
11 |         os.makedirs(log_path)
12 |     if not os.path.exists('./log/{}'.format(log_name)):
13 |         report_file = open('./log/{}'.format(log_name), 'w')
14 |         report_file.close()
15 |     global logger
16 |     logger = logging.getLogger()
17 |     handler = logging.FileHandler('./log/{}'.format(log_name), encoding='utf8')
18 |     console = logging.StreamHandler()
19 |     formatter = logging.Formatter('%(asctime)s - %(message)s')
20 |     handler.setFormatter(formatter)
21 |     logger.addHandler(handler)
22 |     console.setFormatter(formatter)
23 |     logger.addHandler(console)
24 | 
25 | 
26 | def remove_log():
27 |     logger.removeHandler(logger.handlers[0])
28 |     logger.removeHandler(logger.handlers[0])
29 | 
30 | 
31 | def info(message):
32 |     logger.setLevel(logging.INFO)
33 |     logging.info(message)


--------------------------------------------------------------------------------
/utils/push.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | from models.book import Book
 4 | from utils import config, log
 5 | 
 6 | 
 7 | def calibre(book: Book):
 8 |     abs_path = config.read("push_calibre")["absolute_path"]
 9 |     container_id = config.read("push_calibre")["container_id"]
10 |     library_path = config.read("push_calibre")["library_path"]
11 |     if not container_id or not abs_path or not library_path:
12 |         return
13 |     full_path = abs_path + "/" + book.source + "/" + book.book_name + ".epub"
14 |     log.info("%s 开始推送calibre..." % book.book_name)
15 |     docker_command = ["docker", "exec", "-it", container_id]
16 |     try:
17 |         # calibre search
18 |         calibre_search_command = "calibredb search publisher:" + book.source + \
19 |                                  " 'title:\"" + book.book_name + "\"'"
20 |         docker_search_command = docker_command + ["/bin/sh", "-c", calibre_search_command]
21 |         search_result = subprocess.run(docker_search_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
22 |         calibre_id = search_result.stdout
23 |         if calibre_id and calibre_id.isdigit():
24 |             # calibre remove
25 |             calibre_remove_command = "calibredb remove " + calibre_id
26 |             docker_remove_command = docker_command + ["/bin/sh", "-c", calibre_remove_command]
27 |             subprocess.run(docker_remove_command)
28 |         # calibre add
29 |         calibre_add_command = "calibredb add \"" + full_path + "\" --with-library " + library_path
30 |         docker_add_command = docker_command + ["/bin/sh", "-c", calibre_add_command]
31 |         subprocess.run(docker_add_command)
32 |         log.info("%s 推送calibre成功！" % book.book_name)
33 |     except:
34 |         log.info("%s 推送calibre失败！" % book.book_name)


--------------------------------------------------------------------------------
/utils/request.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import random
 3 | 
 4 | from aiohttp import ClientSession
 5 | from tenacity import retry, stop_after_attempt
 6 | 
 7 | from utils import config, log
 8 | 
 9 | 
10 | @retry(stop=stop_after_attempt(3))
11 | async def get(url: str, headers: dict, session: ClientSession) -> str:
12 |     if config.read('sleep_time') > 0 and not 'masiro' in url:
13 |         await asyncio.sleep(random.random() * config.read('sleep_time'))
14 |     elif 'masiro.' in url:
15 |         # 真白萌反爬严格，强制sleep
16 |         await asyncio.sleep(10)
17 |     proxy = config.read('proxy_url') if config.read('proxy_url') else None
18 |     if 'masiro.' in url or '/v1' in url:
19 |         proxy = None
20 |     timeout = config.read('time_out')
21 |     try:
22 |         res = await session.get(url=url, headers=headers, proxy=proxy, timeout=timeout)
23 |         if not res.status == 200:
24 |             raise Exception()
25 |         res_text = await res.text("utf-8", "ignore")
26 |         return res_text
27 |     except Exception:
28 |         log.info("%s 请求失败！" % url)
29 |         return None
30 | 
31 | 
32 | @retry(stop=stop_after_attempt(3))
33 | async def post_data(url: str, headers: dict, data: dict, session: ClientSession) -> dict:
34 |     if config.read('sleep_time') > 0:
35 |         await asyncio.sleep(random.random() * config.read('sleep_time'))
36 |     proxy = config.read('proxy_url') if config.read('proxy_url') else None
37 |     if 'masiro.' in url or '/v1' in url:
38 |         proxy = None
39 |     timeout = config.read('time_out')
40 |     try:
41 |         res = await session.post(url=url, headers=headers, proxy=proxy, data=data, timeout=timeout)
42 |         if not res.status == 200:
43 |             raise Exception()
44 |         res_text = await res.text()
45 |         return {
46 |             "text": res_text,
47 |             "headers": res.headers
48 |         }
49 |     except Exception:
50 |         log.info("%s 请求失败！" % url)
51 |         return None
52 | 
53 | 
54 | @retry(stop=stop_after_attempt(3))
55 | async def post_json(url: str, headers: dict, json: dict, session: ClientSession) -> str:
56 |     if config.read('sleep_time') > 0:
57 |         await asyncio.sleep(random.random() * config.read('sleep_time'))
58 |     proxy = config.read('proxy_url') if config.read('proxy_url') else None
59 |     timeout = config.read('time_out')
60 |     if 'masiro.' in url or '/v1' in url:
61 |         proxy = None
62 |     if '/v1' in url:
63 |         timeout = 120
64 |     try:
65 |         res = await session.post(url=url, headers=headers, proxy=proxy, json=json, timeout=timeout)
66 |         if not res.status == 200:
67 |             raise Exception()
68 |         res_text = await res.text()
69 |         return res_text
70 |     except Exception:
71 |         log.info("%s 请求失败！" % url)
72 |         return None
73 | 


--------------------------------------------------------------------------------