├── .gitattributes
├── .github
├── ISSUE_TEMPLATE
│ ├── bug-report.yml
│ ├── feature-request.yml
│ └── rfc.yml
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── gh_pages.yml
│ ├── release.yml
│ └── schedule_test.yml
├── .gitignore
├── CGS.py
├── ComicSpider
├── __init__.py
├── items.py
├── middlewares.py
├── pipelines.py
├── settings.py
└── spiders
│ ├── __init__.py
│ ├── basecomicspider.py
│ ├── comic90mh.py
│ ├── ehentai.py
│ ├── hitomi.py
│ ├── jm.py
│ ├── kaobei.py
│ ├── mangabz.py
│ └── wnacg.py
├── GUI
├── __init__.py
├── browser_window.py
├── conf_dialog.py
├── gui.py
├── hitomi_tools.py
├── mainwindow.py
├── src
│ ├── __init__.py
│ ├── material_ct.py
│ └── preview_format
│ │ ├── bootstrap.min.js
│ │ ├── index.html
│ │ ├── index_by_clip.html
│ │ ├── public.css
│ │ ├── public.js
│ │ ├── tasks_extend.js
│ │ └── tip_downloaded.js
├── thread
│ ├── __init__.py
│ └── other.py
└── uic
│ ├── __init__.py
│ ├── browser.py
│ ├── conf_dia.py
│ ├── qfluent
│ ├── __init__.py
│ ├── action_factory.py
│ ├── components.py
│ └── patch_uic.py
│ └── ui_mainwindow.py
├── LICENSE
├── README.md
├── assets
├── __init__.py
├── conf_sample.yml
├── config_icon.png
├── github_format.html
├── icon.png
└── res
│ ├── __init__.py
│ ├── locale
│ ├── en-US.yml
│ └── zh-CN.yml
│ └── transfer.py
├── crawl_only.py
├── deploy
├── __init__.py
├── env_record.json
├── launcher
│ ├── CGS.bat
│ └── mac
│ │ ├── CGS.bash
│ │ ├── __init__.py
│ │ ├── dos2unix.bash
│ │ └── init.bash
├── online_scripts
│ └── win.ps1
├── packer.py
├── pkg_mgr.py
└── update.py
├── docs
├── .vitepress
│ ├── config.ts
│ └── theme
│ │ ├── Layout.vue
│ │ ├── index.ts
│ │ └── style.css
├── _github
│ ├── README_en.md
│ ├── preset_preview.md
│ ├── preset_stable.md
│ └── release_notes.md
├── assets
│ └── img
│ │ ├── config
│ │ ├── conf_usage.png
│ │ └── conf_usage_en.png
│ │ ├── deploy
│ │ └── mac-app-move.jpg
│ │ ├── dev
│ │ └── branch.png
│ │ ├── faq
│ │ └── ditto_settings.png
│ │ ├── feature
│ │ └── browser_copyBtn.png
│ │ └── icons
│ │ └── website
│ │ ├── copy.png
│ │ ├── ehentai.png
│ │ ├── hitomi.png
│ │ ├── jm.png
│ │ ├── mangabz.png
│ │ └── wnacg.png
├── changelog
│ └── history.md
├── config
│ ├── index.md
│ └── other.md
├── deploy
│ ├── mac-required-reading.md
│ └── quick-start.md
├── dev
│ ├── contribute.md
│ ├── dev_spider.md
│ └── i18n.md
├── faq
│ ├── extra.md
│ ├── index.md
│ └── other.md
├── feature
│ ├── index.md
│ └── script.md
├── home
│ └── index.md
├── index.md
├── locate
│ └── en
│ │ ├── config
│ │ └── index.md
│ │ ├── deploy
│ │ ├── mac-required-reading.md
│ │ └── quick-start.md
│ │ └── index.md
├── package.json
└── public
│ ├── CGS-girl.png
│ └── _redirects
├── requirements
├── linux.txt
├── mac_arm64.txt
├── mac_x86_64.txt
├── script
│ ├── mac_arm64.txt
│ ├── mac_x86_64.txt
│ └── win.txt
└── win.txt
├── scrapy.cfg
├── utils
├── __init__.py
├── docs.py
├── preview
│ ├── __init__.py
│ └── el.py
├── processed_class.py
├── redViewer_tools.py
├── script
│ ├── __init__.py
│ ├── extra.py
│ └── image
│ │ ├── __init__.py
│ │ ├── expander.py
│ │ ├── kemono.py
│ │ ├── nekohouse.py
│ │ └── saucenao.py
├── sql
│ └── __init__.py
└── website
│ ├── __init__.py
│ ├── core.py
│ └── hitomi
│ ├── __init__.py
│ └── scape_dataset.py
└── variables
└── __init__.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # 核心配置:自动识别文本文件,并强制统一为 LF
2 | * text=auto eol=lf
3 |
4 | # 明确排除二进制文件(避免误处理)
5 | *.png binary
6 | *.jpg binary
7 | *.zip binary
8 | *.7z binary
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report 🐛
2 | description: 创建 Bug 报告以帮助我们改进 / Create a report to help us improve
3 | title: 🐛[Bug]
4 | assignees:
5 | - jasoneri
6 | labels: bug
7 | body:
8 | - type: textarea
9 | attributes:
10 | label: 🐛 描述(Description)
11 | description: >-
12 | 详细地描述 bug,让大家都能理解/Describe the bug in detail so that everyone can
13 | understand it
14 | validations:
15 | required: true
16 | - type: textarea
17 | attributes:
18 | label: 📷 复现步骤(Steps to Reproduce)
19 | description: >-
20 | 清晰描述复现步骤,让别人也能看到问题/Clearly describe the reproduction steps so that
21 | others can see the problem
22 | value: |-
23 | 1.
24 | 2.
25 | 3.
26 | validations:
27 | required: true
28 | - type: textarea
29 | attributes:
30 | label: ' 📄 [异常/日志]信息([Exception/Log] Information)'
31 | description: 如报错等其他信息可以贴在这里,或上传log文件/Other information such as crash can be posted here, or upload log file
32 | - type: markdown
33 | attributes:
34 | value: '## 🚑 **基本信息(Basic Information)**'
35 | - type: input
36 | attributes:
37 | label: 程序版本(Program version)
38 | description: >-
39 | 填写当前程序的版本号,在GUI主窗口的左上角图标右边 / Enter the current version, On top-left corner
40 | validations:
41 | required: true
42 | - type: dropdown
43 | attributes:
44 | label: 系统(OS)
45 | description: 在哪些系统中出现此问题/In which systems does this problem occur
46 | multiple: true
47 | options:
48 | - Windows10~+
49 | - Windows7~-
50 | - macOS
51 | validations:
52 | required: true
53 | - type: textarea
54 | attributes:
55 | label: 📄 配置文件(Configuration file)
56 | description: "上传配置文件`scripts/conf.yml` 或 配置窗口的截图 (github是公开的记得脱敏!下面字段的值使用马赛克等手段模糊化)
57 | /Upload configure file`scripts/conf.yml` or configure-dialog screenshots (remember desensitization! The value of below field must be blur)"
58 | placeholder: >-
59 | 储存路径, 代理,eh_cookies / sv_path, proxies, eh_cookies
60 | - type: textarea
61 | attributes:
62 | label: ' 🖼 其他截图(Screenshots)'
63 | description: 其他截图可以贴在这里/Screenshots of other situations can be posted here
64 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Feature request 👑"
3 | description: 对程序的需求或建议 / Suggest an idea for program
4 | title: "👑[Feature Request]"
5 | labels: enhancement
6 | assignees:
7 | - jasoneri
8 | body:
9 | - type: textarea
10 | attributes:
11 | label: 🥰 需求描述(Description)
12 | placeholder: 详细地描述需求,让大家都能理解/escribe the requirements in detail so that everyone can understand them
13 | validations:
14 | required: true
15 | - type: textarea
16 | attributes:
17 | label: 🧐 解决方案(Solution)
18 | placeholder: 如果你有解决方案,在这里清晰地阐述/If you have a solution, explain it clearly here
19 | validations:
20 | required: true
21 | - type: textarea
22 | attributes:
23 | label: Additional context/其他信息
24 | placeholder: 如截图等其他信息可以贴在这里/Other information such as screenshots can be posted here
25 | validations:
26 | required: false
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/rfc.yml:
--------------------------------------------------------------------------------
1 | name: 功能提案
2 | description: Request for Comments
3 | title: "💡[RFC]"
4 | labels: ["RFC"]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | 一份提案(RFC)定位为 **「在某功能/重构的具体开发前,用于开发者间 review 技术设计/方案的文档」**,
10 | 目的是让协作的开发者间清晰的知道「要做什么」和「具体会怎么做」,以及所有的开发者都能公开透明的参与讨论;
11 | 以便评估和讨论产生的影响 (遗漏的考虑、向后兼容性、与现有功能的冲突),
12 | 因此提案侧重在对解决问题的 **方案、设计、步骤** 的描述上。
13 |
14 | 如果仅希望讨论是否添加或改进某功能本身,请使用 -> [Issue: 功能改进](https://github.com/jasoneri/ComicGUISpider/issues/new?labels=feature+request&template=feature-request.yml&title=👑%5BFeature+Request%5D+)
15 | - type: textarea
16 | id: background
17 | attributes:
18 | label: 背景 or 问题
19 | description: 简单描述遇到的什么问题或需要改动什么。可以引用其他 issue、讨论、文档等。
20 | validations:
21 | required: true
22 | - type: textarea
23 | id: goal
24 | attributes:
25 | label: 🥰 目标 & 方案简述
26 | description: 简单描述提案此提案实现后,**预期的目标效果**,以及简单大致描述会采取的方案/步骤,可能会/不会产生什么影响。
27 | validations:
28 | required: true
29 | - type: textarea
30 | id: design
31 | attributes:
32 | label: 🧐 方案设计 & 实现步骤
33 | description: |
34 | 详细描述你设计的具体方案,可以考虑拆分列表或要点,一步步描述具体打算如何实现的步骤和相关细节。
35 | 这部份不需要一次性写完整,即使在创建完此提案 issue 后,依旧可以再次编辑修改。
36 | validations:
37 | required: false
38 | - type: textarea
39 | id: alternative
40 | attributes:
41 | label: 😸 替代方案 & 对比
42 | description: |
43 | [可选] 为来实现目标效果,还考虑过什么其他方案,有什么对比?
44 | validations:
45 | required: false
46 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Description
4 |
5 |
6 | ## Related Issues
7 |
11 |
12 | ### Checklist:
13 |
14 | * [ ] Have you checked to ensure there aren't other open [Pull Requests](../../../pulls) for the same update/change?
15 | * [ ] Have you linted your code locally prior to submission?
16 | * [ ] Have you successfully ran app with your changes locally?
17 |
--------------------------------------------------------------------------------
/.github/workflows/gh_pages.yml:
--------------------------------------------------------------------------------
1 | name: Build to gh_pages
2 |
3 | on:
4 | pull_request:
5 | types:
6 | - closed
7 | workflow_dispatch:
8 |
9 | jobs:
10 | changes:
11 | if: |
12 | github.event.pull_request.merged == true &&
13 | github.event.pull_request.base.ref == 'GUI'
14 | runs-on: ubuntu-latest
15 | permissions:
16 | pull-requests: read
17 | outputs:
18 | docs: ${{ steps.filter.outputs.docs }}
19 | steps:
20 | - uses: dorny/paths-filter@v3
21 | id: filter
22 | with:
23 | filters: |
24 | docs:
25 | - 'docs/**'
26 |
27 | pages-deploy:
28 | needs: changes
29 | if: ${{ needs.changes.outputs.docs == 'true' }}
30 | runs-on: ubuntu-latest
31 | permissions:
32 | contents: write
33 |
34 | steps:
35 | - uses: actions/checkout@v4
36 | - name: Install dependencies
37 | run: |
38 | cd docs && npm install
39 | - name: Build VitePress docs
40 | run: |
41 | cd docs && npm run docs:build
42 | - name: Deploy to GitHub Pages
43 | uses: peaceiris/actions-gh-pages@v3
44 | with:
45 | github_token: ${{ secrets.GITHUB_TOKEN }}
46 | publish_dir: docs/.vitepress/dist
47 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 | on:
3 | push:
4 | tags:
5 | - 'v*.*.*'
6 | workflow_dispatch:
7 |
8 | jobs:
9 | prebuild-windows:
10 | runs-on: windows-latest
11 | defaults:
12 | run:
13 | shell: pwsh
14 | steps:
15 | - name: Checkout code
16 | uses: actions/checkout@v4
17 | with:
18 | fetch-depth: 1
19 | - name: Add PATH
20 | run: |
21 | $7zPath = "C:\Program Files\7-Zip"
22 | $PresetPath = "D:\build\CGS_preset"
23 | Add-Content $env:GITHUB_PATH "$7zPath"
24 | echo "presetP=D:\build\CGS_preset" >> $env:GITHUB_ENV
25 |
26 | - name: Download and Extract Preset
27 | run: |
28 | New-Item -ItemType Directory -Path D:\tmp -Force
29 | New-Item -ItemType Directory -Path D:\build -Force
30 | Invoke-WebRequest -Uri "https://github.com/jasoneri/imgur/releases/download/preset/CGS_preset.7z" -OutFile D:\tmp\CGS_preset.7z
31 | 7z x D:\tmp\CGS_preset.7z -o"$env:presetP" -spe -y
32 | Remove-Item -Path "D:\tmp\CGS_preset.7z" -Force -ErrorAction Stop
33 | - name: Put SourceCode in Preset
34 | run: |
35 | $sourcePath = $env:GITHUB_WORKSPACE
36 | $targetPath = "D:\build\CGS_preset\scripts"
37 | New-Item -ItemType Directory -Path $targetPath -Force
38 | Copy-Item -Path "$sourcePath\*" -Destination $targetPath -Recurse -Force
39 | - name: Install Dependencies
40 | working-directory: D:\build\CGS_preset
41 | run: irm https://raw.githubusercontent.com/jasoneri/ComicGUISpider/refs/heads/GUI/deploy/online_scripts/win.ps1 | iex
42 | - name: Rebuild Preset
43 | working-directory: D:\build\CGS_preset
44 | run: |
45 | Remove-Item -Path "D:\build\CGS_preset\scripts" -Recurse -Force
46 | 7z a -t7z -m0=lzma2 -mx9 -o$env:presetP CGS_preset
47 | Copy-Item -Path "D:\build\CGS_preset\CGS_preset.7z" -Destination "D:\tmp\CGS_preset.7z" -Force
48 |
49 | - name: Upload Preset
50 | uses: actions/upload-artifact@v4
51 | with:
52 | name: windows-preset
53 | path: D:\tmp\CGS_preset.7z
54 |
55 | build:
56 | runs-on: ubuntu-latest
57 | needs: [prebuild-windows]
58 | container:
59 | image: python:3.12
60 | volumes:
61 | - /tmp/build:/build
62 |
63 | steps:
64 | - name: Checkout code
65 | uses: actions/checkout@v4
66 | with:
67 | fetch-depth: 1
68 | path: src
69 |
70 | - name: Install dependencies
71 | run: python -m pip install pydos2unix py7zr tqdm loguru markdown pyyaml polib
72 |
73 | - name: Clean repository
74 | run: |
75 | mkdir -p /build/scripts
76 | mv src/* /build/scripts/
77 | rm -rf /build/scripts/.git
78 | find /build/scripts -name '__pycache__' -exec rm -rf {} +
79 | find /build/scripts -name '*.pyc' -delete
80 |
81 | - name: Download Windows Preset
82 | uses: actions/download-artifact@v4
83 | with:
84 | name: windows-preset
85 | path: /tmp/
86 |
87 | - name: Download macOS Preset
88 | run: |
89 | wget -O /tmp/CGS-macOS_preset.7z \
90 | https://github.com/jasoneri/imgur/releases/download/preset/CGS-macOS_preset.7z
91 |
92 | - name: Compose Release Notes
93 | id: compose_notes
94 | run: |
95 | TAG_NAME="${GITHUB_REF#refs/tags/}"
96 | echo "version: $TAG_NAME"
97 | base=$(cat /build/scripts/docs/_github/release_notes.md)
98 |
99 | case "$TAG_NAME" in
100 | *beta*)
101 | extra=$(cat /build/scripts/docs/_github/preset_preview.md)
102 | echo "is_beta=true" >> $GITHUB_OUTPUT
103 | ;;
104 | *)
105 | extra=$(cat /build/scripts/docs/_github/preset_stable.md)
106 | echo "is_beta=false" >> $GITHUB_OUTPUT
107 | ;;
108 | esac
109 |
110 | echo "$base\n$extra" > /build/full_body.md
111 |
112 | - name: Build packages
113 | working-directory: /build
114 | run: |
115 | python scripts/deploy/packer.py windows -v "${{ github.ref_name }}"
116 | python scripts/deploy/packer.py mac -v "${{ github.ref_name }}"
117 |
118 | - name: Create Release
119 | uses: softprops/action-gh-release@v1
120 | with:
121 | tag_name: ${{ github.ref }}
122 | name: ${{ github.ref_name }}
123 | prerelease: ${{ steps.compose_notes.outputs.is_beta == 'true' }}
124 | body_path: /build/full_body.md
125 | files: |
126 | /build/CGS.7z
127 | /build/CGS-macOS.7z
128 | env:
129 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
130 |
--------------------------------------------------------------------------------
/.github/workflows/schedule_test.yml:
--------------------------------------------------------------------------------
1 | name: Schedule Test Status
2 | permissions:
3 | contents: read
4 | pull-requests: write
5 | on:
6 | schedule:
7 | - cron: '0 4 */4 * *' # UTC 4 == +8 12:00
8 | workflow_dispatch:
9 |
10 | jobs:
11 | test-suite:
12 | runs-on: windows-latest
13 | env:
14 | PYTHONIOENCODING: utf-8
15 | defaults:
16 | run:
17 | shell: pwsh
18 | strategy:
19 | matrix:
20 | include:
21 | # - "ehentai" "jm" 使用本地 act 运行上传
22 | - crawler_name: "kaobei"
23 | params: "-w 1 -k 海贼王 -i 1 -i2 -1 -sp 50021"
24 | - crawler_name: "wnacg"
25 | params: "-w 3 -k ミモネル -i 2 -sp 50051"
26 | - crawler_name: "mangabz"
27 | params: "-w 5 -k 海贼王 -i 1 -i2 -1 -sp 50081"
28 | - crawler_name: "hitomi"
29 | params: "-w 6 -k artist/date/published/mimonel-japanese -i 1 -sp 50111"
30 |
31 | steps:
32 | - name: Checkout code
33 | uses: actions/checkout@v4
34 | with:
35 | fetch-depth: 1
36 | - name: Add PATH
37 | run: |
38 | $7zPath = "C:\Program Files\7-Zip"
39 | Add-Content $env:GITHUB_PATH "$7zPath"
40 | echo "runtimeP=D:\build\CGS" >> $env:GITHUB_ENV
41 | - name: Get Latest Release Tag
42 | id: get-latest-tag
43 | run: |
44 | $response = Invoke-WebRequest -Uri "https://api.github.com/repos/jasoneri/ComicGUISpider/releases?per_page=1" -Headers @{ "Accept" = "application/vnd.github.v3+json" }
45 | $latest_tag = ($response.Content | ConvertFrom-Json)[0].tag_name
46 | echo "latest_tag=$latest_tag" >> $env:GITHUB_OUTPUT
47 | - name: Download Latest CGS
48 | run: |
49 | New-Item -ItemType Directory -Path D:\tmp -Force
50 | New-Item -ItemType Directory -Path D:\build -Force
51 | Invoke-WebRequest -Uri "https://github.com/jasoneri/ComicGUISpider/releases/download/${{ steps.get-latest-tag.outputs.latest_tag }}/CGS.7z" -OutFile D:\tmp\CGS.7z
52 | 7z x D:\tmp\CGS.7z -o"$env:runtimeP" -spe -y
53 | Remove-Item -Path "D:\tmp\CGS.7z" -Force -ErrorAction Stop
54 | - name: Put SourceCode in CGS
55 | run: |
56 | $sourcePath = $env:GITHUB_WORKSPACE
57 | $targetPath = "D:\build\CGS\scripts"
58 | New-Item -ItemType Directory -Path $targetPath -Force
59 | Copy-Item -Path "$sourcePath\*" -Destination $targetPath -Recurse -Force
60 | - name: Run tests and Generate report
61 | working-directory: D:\build\CGS
62 | env:
63 | CRAWLER_NAME: ${{ matrix.crawler_name }}
64 | run: |
65 | $TODAY = (Get-Date -Format "MM-dd").ToString()
66 |
67 | $stdoutLog = $env:CRAWLER_NAME + "_stdout.log"
68 | $stderrLog = $env:CRAWLER_NAME+ "_stderr.log"
69 | $pythonArgs = "./scripts/crawl_only.py ${{ matrix.params }} -l INFO -dt"
70 | $timeoutSeconds = if ($env:CRAWLER_NAME -eq 'hitomi') { 120 } else { 60 }
71 |
72 | $process = Start-Process -FilePath "./runtime/python.exe" -ArgumentList $pythonArgs -NoNewWindow -PassThru -RedirectStandardOutput $stdoutLog -RedirectStandardError $stderrLog
73 | try {
74 | $process | Wait-Process -Timeout $timeoutSeconds -ErrorAction Stop
75 | $result = $process.ExitCode
76 | } catch [TimeoutException] {
77 | $process | Stop-Process -Force
78 | $result = 124
79 | }
80 | $logContent = Get-Content $stderrLog -Raw
81 | $item_count = if ($logContent -match "image/downloaded': (\d+)") { [int]$matches[1] } else { 0 }
82 | $statusData = @{
83 | schemaVersion = 1
84 | label = $TODAY
85 | message = $(if ($item_count -eq 0) { "fail" } else { "pass" })
86 | color = $(if ($item_count -eq 0) { "critical" } else { "success" })
87 | }
88 | $artifactPath = "D:\build\badges"
89 | New-Item -Path $artifactPath -ItemType Directory -Force | Out-Null
90 | $statusData | ConvertTo-Json -Compress | Out-File "$artifactPath\status_$env:CRAWLER_NAME.json" -Encoding utf8
91 | Get-ChildItem -Path $artifactPath -Recurse
92 | Copy-Item -Path $stderrLog -Destination "D:\build\badges\$stderrLog" -Force
93 |
94 | - name: Upload artifacts
95 | uses: actions/upload-artifact@v4
96 | with:
97 | name: badges-${{ matrix.crawler_name }}
98 | path: D:\build\badges/
99 |
100 | deploy-job:
101 | needs: test-suite
102 | runs-on: ubuntu-latest
103 | permissions:
104 | actions: write
105 | contents: read
106 | steps:
107 | - name: Download all artifacts
108 | uses: actions/download-artifact@v4
109 | with:
110 | path: /tmp/
111 | - name: Set up Node.js
112 | uses: actions/setup-node@v3
113 | with:
114 | node-version: '20'
115 | - name: Install wrangler
116 | run: |
117 | npm install -g wrangler
118 | - name: Merge artifacts
119 | run: |
120 | mkdir -p /tmp/merged-badges
121 | sudo find /tmp/ -type f -name 'status_*.json' -exec cp {} /tmp/merged-badges/ \;
122 |
123 | set +e
124 | for crawler in "ehentai" "jm"; do
125 | curl -s "https://cgs-status-badges.pages.dev/status_${crawler}.json" -o "/tmp/merged-badges/status_${crawler}.json" || true
126 | done
127 | - name: Deploy to CloudFlare Pages
128 | env:
129 | CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
130 | CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
131 | run: |
132 | wrangler pages deploy /tmp/merged-badges/ --project-name=cgs-status-badges --branch=main
133 | - name: Delete workflow runs
134 | uses: Mattraks/delete-workflow-runs@main
135 | with:
136 | token: ${{ secrets.GITHUB_TOKEN }}
137 | repository: ${{ github.repository }}
138 | retain_days: 0
139 | keep_minimum_runs: 5
140 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | ##----------idea----------
10 | *.iml
11 | .idea/
12 | *.ipr
13 | *.iws
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | pip-wheel-metadata/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 | node_modules/
36 | docs/.vitepress/dist
37 | docs/.vitepress/cache
38 | package-lock.json
39 |
40 | # PyInstaller
41 | # Usually these files are written by a python script from a template
42 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
43 | *.manifest
44 | *.spec
45 |
46 | # Installer logs
47 | pip-log.txt
48 | pip-delete-this-directory.txt
49 |
50 | # GUI related
51 | *.ui
52 | source/
53 |
54 | # Unit test / coverage reports
55 | htmlcov/
56 | .tox/
57 | .nox/
58 | .coverage
59 | .coverage.*
60 | .cache
61 | nosetests.xml
62 | coverage.xml
63 | *.cover
64 | *.py,cover
65 | .hypothesis/
66 | .pytest_cache/
67 |
68 | # Translations
69 | *.po
70 | *.mo
71 | *.pot
72 |
73 | # Django stuff:
74 | *.log
75 | local_settings.py
76 | db.sqlite3
77 | db.sqlite3-journal
78 |
79 | # Flask stuff:
80 | instance/
81 | .webassets-cache
82 |
83 | # Scrapy stuff:
84 | .scrapy
85 |
86 | ##----------Other----------
87 | # osx
88 | *~
89 | .DS_Store
90 | gradle.properties
91 | comic/
92 |
93 | # Package Files #
94 | *.jar
95 | *.war
96 | *.nar
97 | *.ear
98 | ====.zip
99 | *.tar.gz
100 | ====.rar
101 | ====.exe
102 | *.xml
103 |
104 | ##----------Python----------
105 | *_origin.py
106 | setting.txt
107 | *.pyc
108 | *_info.txt
109 | private_*.json
110 | *test*.py
111 |
112 | # Sphinx documentation
113 | docs/_build/
114 | log/
115 |
116 | # PyBuilder
117 | target/
118 |
119 | # vscode
120 | .vscode
121 |
122 | # Jupyter Notebook
123 | .ipynb_checkpoints
124 |
125 | # IPython
126 | profile_default/
127 | ipython_config.py
128 |
129 | # pyenv
130 | .python-version
131 |
132 | # pipenv
133 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
134 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
135 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
136 | # install all needed dependencies.
137 | #Pipfile.lock
138 |
139 | # celery beat schedule file
140 | celerybeat-schedule
141 |
142 | # SageMath parsed files
143 | *.sage.py
144 |
145 | # Environments
146 | .env
147 | .venv
148 | env/
149 | venv/
150 | ENV/
151 | env.bak/
152 | venv.bak/
153 |
154 | # Spyder project settings
155 | .spyderproject
156 | .spyproject
157 |
158 | # Rope project settings
159 | .ropeproject
160 |
161 | # mkdocs documentation
162 | /site
163 |
164 | # mypy
165 | .mypy_cache/
166 | .dmypy.json
167 | dmypy.json
168 |
169 | # Pyre type checker
170 | .pyre/
171 |
172 | # self
173 | analyze/
174 | *.d
175 | *.zip
176 | *demo*.py
177 | __temp
178 | test/*
179 | .lh/
180 | assets/res/*.png
181 | # self/build
182 | *.ico
183 | *.qrc
184 | src/
185 | temp
186 | *-in.txt
187 | untitled*
188 | Pipfile
189 | Pipfile.lock
190 | codecov*
191 | *_local.yml
192 | *-lock.yaml
193 | *.hash
194 | # self/conf
195 | gitee_t.json
196 | conf.yml
197 | record.db
198 | hitomi.db
199 | # self/desc created html
200 | desc.html
201 | docs/*.html
202 | deploy/launcher/mac/*.html
203 | # self/bug-report
204 | _bug_log
205 | # ide
206 | .cursor
--------------------------------------------------------------------------------
/CGS.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | from multiprocessing import freeze_support
4 |
5 | from PyQt5.QtWidgets import QApplication
6 | from PyQt5.QtCore import Qt
7 |
8 | # 自己项目用到的
9 | from GUI.gui import SpiderGUI
10 | import GUI.src.material_ct
11 |
12 | # from multiprocessing.managers import RemoteError
13 | # sys.setrecursionlimit(5000)
14 |
15 |
16 | def start():
17 | freeze_support()
18 | QApplication.setHighDpiScaleFactorRoundingPolicy(Qt.HighDpiScaleFactorRoundingPolicy.PassThrough)
19 | QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
20 | QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps)
21 | app = QApplication(sys.argv)
22 | app.setStyle("Fusion")
23 | ui = SpiderGUI()
24 | sys.excepthook = ui.hook_exception
25 | QApplication.processEvents()
26 | app.exec_()
27 |
28 |
29 | if __name__ == '__main__':
30 | start()
31 |
--------------------------------------------------------------------------------
/ComicSpider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/ComicSpider/__init__.py
--------------------------------------------------------------------------------
/ComicSpider/items.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import scrapy
3 |
4 |
5 | class ComicspiderItem(scrapy.Item):
6 | title = scrapy.Field()
7 | section = scrapy.Field()
8 | page = scrapy.Field()
9 | image_urls = scrapy.Field()
10 | images = scrapy.Field()
11 | uuid = scrapy.Field()
12 | uuid_md5 = scrapy.Field() # 相当于group_id,并非此item的唯一标识
13 |
14 | @classmethod
15 | def get_group_infos(cls, resp_meta) -> dict:
16 | return {
17 | 'title': resp_meta.get('title'),
18 | 'section': resp_meta.get('section') or 'meaningless',
19 | 'uuid': resp_meta.get('uuid'),
20 | 'uuid_md5': resp_meta.get('uuid_md5'),
21 | }
22 |
--------------------------------------------------------------------------------
/ComicSpider/middlewares.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import re
3 |
4 | # Define here the models for your spider middleware
5 | #
6 | # See documentation in:
7 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
8 |
9 | from scrapy import signals
10 | import random
11 | from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
12 |
13 |
14 | class ComicspiderDownloaderMiddleware(object):
15 | def __init__(self, USER_AGENTS, PROXIES):
16 | self.USER_AGENTS = USER_AGENTS
17 | self.PROXIES = PROXIES
18 |
19 | @classmethod
20 | def from_crawler(cls, crawler):
21 | USER_AGENTS, PROXIES = crawler.settings.get('UA'), crawler.settings.get('PROXY_CUST')
22 | s = cls(USER_AGENTS, PROXIES)
23 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
24 | return s
25 |
26 | def process_request(self, request, spider):
27 | return None
28 |
29 | def process_response(self, request, response, spider):
30 | # Called with the response returned from the downloader.
31 | if response.status != 200:
32 | request.headers['User-Agent'] = random.choice(self.USER_AGENTS)
33 | if self.PROXIES:
34 | proxy = random.choice(self.PROXIES)
35 | request.meta['proxy'] = f"{request.url.split(':')[0]}://{proxy}"
36 | return request
37 | return response
38 |
39 | def process_exception(self, request, exception, spider):
40 | if exception:
41 | spider.crawler.stats.inc_value('process_exception/count')
42 | spider.crawler.stats.set_value('process_exception/last_exception',
43 | f"[{type(exception).__name__}]{str(exception).replace('<', '')}")
44 | return None
45 |
46 | def spider_opened(self, spider):
47 | spider.logger.info(f'Spider opened: 【{spider.name}】')
48 |
49 |
50 | class UAMiddleware(ComicspiderDownloaderMiddleware):
51 | def process_request(self, request, spider):
52 | request.headers.update(getattr(spider, 'ua', {}))
53 | return None
54 |
55 |
56 | class UAKaobeiMiddleware(ComicspiderDownloaderMiddleware):
57 | def process_request(self, request, spider):
58 | if request.url.find(spider.pc_domain) != -1:
59 | ua = getattr(spider, 'ua', {})
60 | if request.url.endswith('/chapters'):
61 | ua.update({'Referer': f'https://{spider.pc_domain}/comic/{request.url.split("/")[-2]}'})
62 | else:
63 | ua.update({'Referer': "/".join(request.url.split("/")[:-2])})
64 | request.headers.update(ua)
65 | else:
66 | request.headers.update(getattr(spider, 'ua_mapi', {}))
67 | return None
68 |
69 |
70 | class MangabzUAMiddleware(UAMiddleware):
71 | def process_request(self, request, spider):
72 | if request.method == "POST":
73 | request.headers.update({
74 | "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Mobile/15E148 Safari/604.1",
75 | "Accept": "application/json, text/javascript, */*; q=0.01",
76 | "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
77 | "Accept-Encoding": "gzip, deflate, br",
78 | "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
79 | "X-Requested-With": "XMLHttpRequest",
80 | "Origin": "https://www.mangabz.com",
81 | "Connection": "keep-alive",
82 | "Sec-Fetch-Dest": "empty",
83 | "Sec-Fetch-Mode": "cors",
84 | "Sec-Fetch-Site": "same-origin",
85 | "Pragma": "no-cache",
86 | "Cache-Control": "no-cache",
87 | "TE": "trailers"
88 | })
89 | else:
90 | request.headers.update(getattr(spider, 'ua', {}))
91 | return None
92 |
93 |
94 | class ComicDlAllProxyMiddleware(ComicspiderDownloaderMiddleware):
95 | def process_request(self, request, spider):
96 | if self.PROXIES:
97 | proxy = random.choice(self.PROXIES)
98 | request.meta['proxy'] = f"http://{proxy}"
99 |
100 |
101 | class ComicDlProxyMiddleware(ComicspiderDownloaderMiddleware):
102 | """使用情况是“通常页需要over wall访问”,“图源cn就能访问”... 因此domain的都使用代理"""
103 | domain_regex: re.Pattern = None
104 |
105 | @classmethod
106 | def from_crawler(cls, crawler):
107 | _ = super(ComicDlProxyMiddleware, cls).from_crawler(crawler)
108 | _.domain_regex = re.compile(crawler.spider.domain)
109 | return _
110 |
111 | def process_request(self, request, spider):
112 | if bool(self.domain_regex.search(request.url)) and self.PROXIES:
113 | proxy = random.choice(self.PROXIES)
114 | request.meta['proxy'] = f"http://{proxy}"
115 |
116 |
117 | class DisableSystemProxyMiddleware(HttpProxyMiddleware):
118 | def _get_proxy(self, scheme, *args, **kwargs):
119 | return None, None
120 |
121 |
122 | class RefererMiddleware(ComicspiderDownloaderMiddleware):
123 | def process_request(self, request, spider):
124 | request.headers['Referer'] = spider.domain
125 | return None
126 |
--------------------------------------------------------------------------------
/ComicSpider/pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import re
4 | import pathlib
5 | import warnings
6 | from io import BytesIO
7 |
8 | import pillow_avif
9 | from itemadapter import ItemAdapter
10 | from scrapy.http import Request
11 | from scrapy.http.request import NO_CALLBACK
12 | from scrapy.pipelines.images import ImagesPipeline, ImageException
13 | from scrapy.exceptions import ScrapyDeprecationWarning
14 | from scrapy.utils.python import get_func_args
15 |
16 | from utils import conf
17 | from utils.website import JmUtils, set_author_ahead, MangabzUtils
18 | from utils.processed_class import TaskObj
19 | from assets import res
20 |
21 |
22 | class PageNamingMgr:
23 | img_sv_type = getattr(conf, 'img_sv_type', 'jpg')
24 | img_suffix_regex = re.compile(r'\.(jpg|png|gif|jpeg|bmp|webp|tiff|tif|ico|avif|svg)$')
25 |
26 | def __init__(self):
27 | self.digits_map = {}
28 |
29 | def __call__(self, taskid, page, info):
30 | if isinstance(page, str) and bool(self.img_suffix_regex.search(page)):
31 | return page
32 | elif not self.digits_map.get(taskid):
33 | self.digits_map[taskid] = len(str(info.spider.tasks[taskid].tasks_count))
34 | digits = self.digits_map[taskid]
35 | return f"{str(page).zfill(digits)}.{self.img_sv_type}"
36 |
37 |
38 | class ComicPipeline(ImagesPipeline):
39 | err_flag = 0
40 | _sub = re.compile(r'([|:<>?*"\\/])')
41 | _sub_index = re.compile(r"^\(.*?\)")
42 |
43 | def __init__(self, store_uri, download_func=None, settings=None):
44 | super(ComicPipeline, self).__init__(store_uri, download_func, settings)
45 | self.page_naming = PageNamingMgr()
46 |
47 | # 图片存储前调用
48 | def file_path(self, request, response=None, info=None, *, item=None):
49 | title = self._sub.sub('-', item.get('title'))
50 | section = self._sub.sub('-', item.get('section'))
51 | taskid = item.get('uuid_md5')
52 | page = self.page_naming(taskid, item.get('page'), info)
53 | spider = self.spiderinfo.spider
54 | basepath: pathlib.Path = spider.settings.get('SV_PATH')
55 | path = self.file_folder(basepath, section, spider, title, item)
56 | os.makedirs(path, exist_ok=True)
57 | fin = os.path.join(path, page)
58 | return fin
59 |
60 | def file_folder(self, basepath, section, spider, title, item):
61 | if spider.name in spider.settings.get('SPECIAL'):
62 | parent_p = basepath.joinpath(f"{res.SPIDER.ERO_BOOK_FOLDER}/web")
63 | _title = self._sub_index.sub('', set_author_ahead(title))
64 | path = parent_p.joinpath(f"{_title}[{item['uuid']}]" if conf.addUuid else _title)
65 | else:
66 | path = basepath.joinpath(f"{title}/{section}")
67 | if item['uuid_md5'] not in spider.tasks_path:
68 | spider.tasks_path[item['uuid_md5']] = path
69 | return path
70 |
71 | def image_downloaded(self, response, request, info, *, item=None):
72 | spider = info.spider
73 | try:
74 | super(ComicPipeline, self).image_downloaded(response, request, info, item=item)
75 | stats = spider.crawler.stats
76 | percent = int((stats.get_value('file_status_count/downloaded', default=0) / spider.total) * 100)
77 | spider.Q('BarQueue').send(int(percent)) # 后台打印百分比进度扔回GUI界面
78 | task_obj = TaskObj(item.get('uuid_md5'), item.get('page'), item['image_urls'][0])
79 | self.handle_task(spider, stats, task_obj)
80 | except Exception as e:
81 | spider.logger.error(f'traceback: {str(type(e))}:: {str(e)}')
82 |
83 | @staticmethod
84 | def handle_task(spider, stats, task_obj):
85 | _tasks = spider.tasks[task_obj.taskid]
86 | _tasks.downloaded.append(task_obj)
87 | curr_progress = int(len(_tasks.downloaded) / _tasks.tasks_count * 100)
88 | if conf.isDeduplicate and curr_progress >= 100:
89 | spider.sql_handler.add(task_obj.taskid)
90 | spider.Q('TasksQueue').send(task_obj, wait=True)
91 | stats.inc_value('image/downloaded')
92 |
93 | def item_completed(self, results, item, info):
94 | _item = super(ComicPipeline, self).item_completed(results, item, info)
95 | return _item
96 |
97 |
98 | class JmComicPipeline(ComicPipeline):
99 | def get_images(self, response, request, info, *, item=None):
100 | path = self.file_path(request, response=response, info=info, item=item)
101 | orig_image = JmUtils.JmImage.by_url(item['image_urls'][0]).convert_img(response.body)
102 |
103 | width, height = orig_image.size
104 | if width < self.min_width or height < self.min_height:
105 | raise ImageException(
106 | "Image too small "
107 | f"({width}x{height} < {self.min_width}x{self.min_height})"
108 | )
109 |
110 | image, buf = self.convert_image(
111 | orig_image, response_body=BytesIO(response.body)
112 | )
113 | yield path, image, buf
114 |
115 | for thumb_id, size in self.thumbs.items():
116 | thumb_path = self.thumb_path(
117 | request, thumb_id, response=response, info=info, item=item
118 | )
119 | thumb_image, thumb_buf = self.convert_image(image, size, buf)
120 | yield thumb_path, thumb_image, thumb_buf
121 |
122 |
123 | class MangabzComicPipeline(ComicPipeline):
124 |
125 | def get_media_requests(self, item, info):
126 | urls = ItemAdapter(item).get(self.images_urls_field, [])
127 | return [Request(u, callback=NO_CALLBACK, headers=MangabzUtils.image_ua) for u in urls]
128 |
--------------------------------------------------------------------------------
/ComicSpider/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for ComicSpider project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # https://docs.scrapy.org/en/latest/topics/settings.html
9 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
10 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
11 | from variables import SPECIAL_WEBSITES
12 | from utils import conf
13 |
14 | BOT_NAME = 'ComicSpider'
15 |
16 | SPIDER_MODULES = ['ComicSpider.spiders']
17 | NEWSPIDER_MODULE = 'ComicSpider.spiders'
18 |
19 |
20 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
21 | #USER_AGENT = 'ComicSpider (+http://www.yourdomain.com)'
22 |
23 | # Obey robots.txt rules
24 | ROBOTSTXT_OBEY = False
25 | REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
26 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
27 | #CONCURRENT_REQUESTS = 32
28 |
29 | # Configure a delay for requests for the same website (default: 0)
30 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
31 | # See also autothrottle settings and docs
32 | DOWNLOAD_DELAY = 0.5
33 | # The download delay setting will honor only one of:
34 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
35 | #CONCURRENT_REQUESTS_PER_IP = 16
36 |
37 | # Override the default request headers:
38 | DEFAULT_REQUEST_HEADERS = {
39 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
40 | 'Accept-Language': 'en',
41 | }
42 |
43 | DOWNLOADER_MIDDLEWARES = {
44 | 'ComicSpider.middlewares.ComicspiderDownloaderMiddleware': 5,
45 | }
46 |
47 | ITEM_PIPELINES = {
48 | 'ComicSpider.pipelines.ComicPipeline': 50
49 | }
50 |
51 | IMAGES_STORE = '/'
52 | SV_PATH, log_path, PROXY_CUST, LOG_LEVEL, CUSTOM_MAP = conf.settings
53 |
54 | UA = [r"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/101.0",
55 | r'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
56 | ]
57 |
58 | # 日志输出
59 | LOG_FILE = log_path.joinpath("scrapy.log")
60 | SPECIAL = SPECIAL_WEBSITES
61 |
--------------------------------------------------------------------------------
/ComicSpider/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/ComicSpider/spiders/comic90mh.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import re
3 | from .basecomicspider import BaseComicSpider, ComicspiderItem
4 |
5 | domain = "m.90mh.org" # 注意mk_page_tasks有域名转换
6 |
7 |
8 | class Comic90mhSpider(BaseComicSpider):
9 | name = 'comic90mh'
10 | search_url_head = f'http://{domain}/search/?keywords='
11 | mappings = {'更新': f'http://{domain}/update/',
12 | '排名': f'http://{domain}/rank/'}
13 |
14 | def frame_book(self, response):
15 | frame_results = {}
16 | example_b = r' {}、 《{}》 【{}】 [{}] [{}]'
17 | self.say(example_b.format('序号', '漫画名', '作者', '更新时间', '最新章节') + ' ')
18 | targets = response.xpath('//div[@class="itemBox"]') # sign -*-
19 | for x, target in enumerate(targets):
20 | title = target.xpath('.//a[@class="title"]/text()').get().strip()
21 | url = target.xpath('.//a[@class="title"]/@href').get()
22 | author = target.xpath('.//p[@class="txtItme"]/text()').get()
23 | refresh_time = target.xpath('.//span[@class="date"]/text()').get().strip()
24 | refresh_section = target.xpath(
25 | './/a[@class="coll"]/text()').get().strip() if 'rank' not in self.search_start else '-*-*-'
26 | self.say(example_b.format(str(x + 1), title, author, refresh_time, refresh_section, chr(12288)))
27 | frame_results[x + 1] = [url, title]
28 | return self.say.frame_book_print(frame_results, extra=" →_→ 鼠标移到序号栏有教输入规则,此步特殊禁止用全选 ")
29 |
30 | def frame_section(self, response):
31 | frame_results = {}
32 | example_s = ' -{}、【{}】'
33 | self.say(example_s.format('序号', '章节') + ' ')
34 | targets = response.xpath('//ul[contains(@id, "chapter")]/li') # sign -*-
35 | for x, target in enumerate(targets):
36 | section_url = target.xpath('./a/@href').get()
37 | section = target.xpath('.//span/text()').get()
38 | frame_results[x + 1] = [section, section_url]
39 | return self.say.frame_section_print(frame_results, print_example=example_s)
40 |
41 | def mk_page_tasks(self, **kw):
42 | return [kw['url'].replace(domain, 'www.90mh.org')]
43 |
44 | def parse_fin_page(self, response):
45 | doc_vars = re.split(r';var', response.text)
46 | img_doc = next(filter(lambda _: "chapterImages" in _, doc_vars))
47 | img_path_doc = next(filter(lambda _: "chapterPath" in _, doc_vars)) # var chapterPath="images/comic/35/69927/"
48 | page_image_doc = next(filter(lambda _: "pageImage" in _, doc_vars)) # var pageImage="http://xx/images/xx.jpg"
49 | img_path = re.search(r"""['"](.*?)['"]""", img_path_doc).group(1)
50 | img_domain = re.search(r"""['"](https?://.*?/).*?['"]""", page_image_doc).group(1)
51 | for page, (img_name, img_type) in enumerate(re.findall(r"""['"](.*?(jp[e]?g|png|webp))['"]""", img_doc)):
52 | item = ComicspiderItem()
53 | item['title'] = response.meta.get('title')
54 | item['section'] = response.meta.get('section')
55 | item['page'] = page + 1
56 | item['image_urls'] = [f"{img_domain}{img_path}{img_name}"]
57 | self.total += 1
58 | yield item
59 | self.process_state.process = 'fin'
60 | self.Q('ProcessQueue').send(self.process_state)
61 |
--------------------------------------------------------------------------------
/ComicSpider/spiders/ehentai.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from scrapy import Request
3 |
4 | from .basecomicspider import BaseComicSpider3
5 | from utils import PresetHtmlEl, conf, re
6 | from utils.processed_class import PreviewHtml, Url
7 | from utils.website import Cookies, EHentaiKits
8 | from assets import res
9 | from ..items import ComicspiderItem
10 |
11 | domain = "exhentai.org"
12 |
13 |
14 | class EHentaiSpider(BaseComicSpider3):
15 | custom_settings = {"DOWNLOADER_MIDDLEWARES": {'ComicSpider.middlewares.ComicDlProxyMiddleware': 5,
16 | 'ComicSpider.middlewares.UAMiddleware': 6},
17 | "COOKIES_ENABLED": False}
18 | name = 'ehentai'
19 | num_of_row = 25
20 | domain = domain
21 | search_url_head = f'https://{domain}/?f_search='
22 | mappings = {
23 | res.EHentai.MAPPINGS_INDEX: f'https://{domain}',
24 | res.EHentai.MAPPINGS_POPULAR: f'https://{domain}/popular'
25 | }
26 | frame_book_format = ['title', 'book_pages', 'preview_url'] # , 'book_idx']
27 | turn_page_info = (r"page=\d+",)
28 |
29 | @property
30 | def ua(self):
31 | return {**EHentaiKits.headers, "cookie": Cookies.to_str_(conf.eh_cookies)}
32 |
33 | def frame_book(self, response):
34 | frame_results = {}
35 | example_b = r' [ {} ], p_{}, ⌈ {} ⌋ '
36 | self.say(example_b.format('index', 'pages', 'name') + ' ')
37 | preview = PreviewHtml(response.url)
38 | targets = response.xpath('//table[contains(@class, "itg")]//td[contains(@class, "glcat")]/..')
39 | for x, target in enumerate(targets):
40 | item_elem = target.xpath('./td/div[@class="glthumb"]')
41 | title = item_elem.xpath('.//img/@title').get()
42 | pages = (next(filter(
43 | lambda _: 'pages' in _, item_elem.xpath('.//div/text()').getall()))
44 | .replace(" pages", ""))
45 | url = preview_url = target.xpath('./td[contains(@class, "glname")]/a/@href').get()
46 | img_preview = (item_elem.xpath('.//img/@data-src') or item_elem.xpath('.//img/@src')).get()
47 | # book_idx = re.search(r"g/(\d+)/", url).group(1)
48 | self.say(example_b.format(str(x + 1), pages, title, chr(12288)))
49 | self.say('') if (x + 1) % self.num_of_row == 0 else None
50 | frame_results[x + 1] = [url, title, pages, preview_url] # , book_idx]
51 | preview.add(x + 1, img_preview, title, preview_url, pages=pages)
52 | self.say(preview.created_temp_html)
53 | return self.say.frame_book_print(frame_results, extra=f" {res.EHentai.JUMP_TIP}")
54 |
55 | def page_turn(self, response, elected_results):
56 | if 'next' in self.input_state.pageTurn:
57 | find_prevurl = re.search(r"""var nexturl="(.*?)";""", response.text)
58 | url = Url(find_prevurl.group(1) if bool(find_prevurl) else "")
59 | yield from self.page_turn_(response, elected_results, url)
60 | elif 'previous' in self.input_state.pageTurn:
61 | find_prevurl = re.search(r"""var prevurl="(.*?)";""", response.text)
62 | url = Url(find_prevurl.group(1) if bool(find_prevurl) else "")
63 | yield from self.page_turn_(response, elected_results, url)
64 | else:
65 | yield Request(url=self.search, callback=self.parse, meta=response.meta, dont_filter=True)
66 |
67 | def parse_section(self, response):
68 | if not response.meta.get('sec_page'):
69 | title_gj = response.xpath('//h1[@id="gj"]/text()')
70 | if title_gj:
71 | response.meta['title'] = title_gj.get()
72 | else:
73 | titles = response.xpath("//h1/text()").getall()
74 | if response.meta.get('title') in titles and len(titles) > 1:
75 | titles.remove(response.meta.get('title'))
76 | response.meta['title'] = titles[0]
77 | yield from super(EHentaiSpider, self).parse_section(response)
78 |
79 | def frame_section(self, response):
80 | next_flag = None
81 | frame_results = response.meta.get('frame_results', {})
82 | sec_page = response.meta.get('sec_page', 1)
83 | this_book_pages = response.meta.get('book_pages') or re.search(r">(\d+) pages<", response.text).group(1)
84 | targets = response.xpath('//div[@id="gdt"]/a')
85 | first_idx = max(frame_results.keys()) if frame_results else 0
86 | for x, target in enumerate(targets):
87 | idx = first_idx + x
88 | url = target.xpath('./@href').get()
89 | frame_results[idx + 1] = url
90 | if int(max(frame_results.keys())) < int(this_book_pages):
91 | if "/?p=" in response.url:
92 | next_flag = re.sub(r'\?p=\d+', rf'?p={sec_page}', response.url)
93 | else:
94 | next_flag = response.url.strip('/') + f"/?p={sec_page}" # ... book-page-index start with 0,not 1
95 | return frame_results, next_flag
96 |
97 | def parse_fin_page(self, response):
98 | url = response.xpath('//img[@id="img"]/@src').get() or ""
99 | page = response.meta.get('page')
100 | group_infos = ComicspiderItem.get_group_infos(response.meta)
101 | if url.endswith('509.gif'):
102 | self.log(f'[509] https://ehgt.org/g/509.gif: [page-{page}] of [{group_infos["title"]}]', level=30)
103 | else:
104 | item = ComicspiderItem()
105 | item.update(**group_infos)
106 | item['page'] = str(page)
107 | item['image_urls'] = [url]
108 | self.total += 1
109 | yield item
110 |
--------------------------------------------------------------------------------
/ComicSpider/spiders/mangabz.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import re
3 | import datetime
4 | import jsonpath_rw as jsonp
5 | from collections import OrderedDict
6 |
7 | from utils.processed_class import execute_js
8 | from .basecomicspider import FormReqBaseComicSpider, ComicspiderItem, BodyFormat
9 |
10 | domain = "www.mangabz.com"
11 |
12 |
13 | def curr_time_format():
14 | return datetime.datetime.now().strftime('%a %b %d %Y %H:%M:%S') + ' GMT 0800 (中国标准时间)'
15 |
16 |
17 | class Body(BodyFormat):
18 | page_index_field = "pageindex"
19 | dic = {
20 | "action": "getclasscomics",
21 | "pageindex": "1",
22 | "pagesize": "21",
23 | "tagid": "0",
24 | "status": "0",
25 | "sort": "2"
26 | }
27 | example_b = ' {}、\t《{}》\t【{}】\t[{}]'
28 | print_head = ['book_path', '漫画名', '作者', '最新话']
29 | target_json_path = ['UrlKey', 'Title', 'Author.[*]', 'ShowLastPartName']
30 |
31 | def rendering_map(self):
32 | return dict(zip(self.print_head, list(map(jsonp.parse, self.target_json_path))))
33 |
34 |
35 | class SearchBody(Body):
36 | dic = {
37 | "t": "3",
38 | "pageindex": "1",
39 | "pagesize": "12",
40 | "f": "0",
41 | "title": "廢淵"
42 | }
43 | target_json_path = ['Url', 'Title', 'Author.[*]', 'LastPartShowName']
44 |
45 |
46 | class MangabzSpider(FormReqBaseComicSpider):
47 | name = 'mangabz'
48 | ua = {
49 | "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Mobile/15E148 Safari/604.1",
50 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8",
51 | "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
52 | "Accept-Encoding": "gzip, deflate, br",
53 | "Connection": "keep-alive",
54 | "Upgrade-Insecure-Requests": "1",
55 | "Sec-Fetch-Dest": "document",
56 | "Sec-Fetch-Mode": "navigate",
57 | "Sec-Fetch-Site": "same-origin",
58 | "Sec-Fetch-User": "?1",
59 | "Priority": "u=0, i",
60 | "Pragma": "no-cache",
61 | "Cache-Control": "no-cache",
62 | "TE": "trailers"
63 | }
64 | num_of_row = 50
65 | domain = domain
66 | custom_settings = {
67 | "DOWNLOADER_MIDDLEWARES": {'ComicSpider.middlewares.MangabzUAMiddleware': 5,
68 | 'ComicSpider.middlewares.ComicDlAllProxyMiddleware': 6},
69 | "ITEM_PIPELINES": {'ComicSpider.pipelines.MangabzComicPipeline': 50}
70 | }
71 | search_url_head = f"https://{domain}/pager.ashx"
72 | mappings = {"更新": ["manga-list-0-0-2", "2"],
73 | "人气": ["manga-list", "10"],
74 | }
75 | body = Body()
76 |
77 | @property
78 | def search(self):
79 | self.process_state.process = 'search'
80 | self.Q('ProcessQueue').send(self.process_state)
81 | keyword = self.input_state.keyword.strip()
82 | if keyword in self.mappings.keys():
83 | search_start_path, body_sort = self.mappings[keyword] # TODO[5](2024-09-30): 后续支持状态:全部/连载中/完结,排序:上架时间
84 | search_start = f"https://{domain}/{search_start_path}/mangabz.ashx?d={curr_time_format()}"
85 | self.body.update(sort=body_sort)
86 | else:
87 | search_start = f"{self.search_url_head}?d={curr_time_format()}"
88 | self.body = SearchBody(title=keyword)
89 | return search_start
90 |
91 | def frame_book(self, response):
92 | frame_results = {}
93 | example_b = self.body.example_b
94 | self.say(example_b.format('序号', *self.body.print_head[1:]) + ' ')
95 | targets = response.json() if isinstance(self.body, SearchBody) \
96 | else response.json().get('UpdateComicItems')
97 | rendering_map = self.body.rendering_map().items()
98 | for x, target in enumerate(targets):
99 | rendered = OrderedDict()
100 | for attr_name, _path in rendering_map:
101 | rendered[attr_name] = ",".join(map(lambda __: str(__.value), _path.find(target))).strip()
102 | url = f"https://{self.domain}/{rendered.pop('book_path').strip('/')}/"
103 | self.say(example_b.format(str(x + 1), *rendered.values(), chr(12288)))
104 | self.say('') if (x + 1) % self.num_of_row == 0 else None
105 | frame_results[x + 1] = [url, rendered['漫画名'], response.url]
106 | return self.say.frame_book_print(frame_results, url=response.url)
107 |
108 | def frame_section(self, response):
109 | frame_results = {}
110 | example_s = ' -{}、【{}】'
111 | self.say(example_s.format('序号', '章节') + ' ')
112 | targets = response.xpath('//div[@class="detail-list-item"]/a')
113 | for x, target in enumerate(reversed(targets)):
114 | section_url = rf"https://{domain}{target.xpath('./@href').get()}"
115 | section = "".join(target.xpath('./text()').get()).strip()
116 | frame_results[x + 1] = [section, section_url]
117 | return self.say.frame_section_print(frame_results, print_example=example_s)
118 |
119 | def parse_fin_page(self, response):
120 | meta = response.meta
121 | js = response.xpath('//script[@type="text/javascript"]/text()').getall()
122 | target_js = next(filter(lambda t: t.strip().startswith('eval'), js), None)
123 | real_js = execute_js(
124 | r"""function run(code){var ret="";eval('ret = '+code.replace(/^;*?\s*(window(\.|\[(["'])))?eval(\3\])?/,
125 | function ($0) {return 'String';})); return ret }""",
126 | "run", target_js)
127 | img_list_ = re.search(r'\[(.*?)]', real_js).group(1)
128 | img_list = [re.sub(r"""['"]""", '', _) for _ in re.split(', ?', img_list_)]
129 | group_infos = ComicspiderItem.get_group_infos(response.meta)
130 | self.set_task((meta['uuid_md5'], f"{meta['title']}-{meta['section']}", len(img_list), meta['title_url']))
131 | for img_url in img_list:
132 | item = ComicspiderItem()
133 | item.update(**group_infos)
134 | page = int(re.search(r'/(\d+)_\d+\.', img_url).group(1))
135 | item['page'] = page
136 | item['image_urls'] = [img_url]
137 | self.total += 1
138 | yield item
139 | self.process_state.process = 'fin'
140 | self.Q('ProcessQueue').send(self.process_state)
141 |
--------------------------------------------------------------------------------
/ComicSpider/spiders/wnacg.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import re
3 |
4 | from .basecomicspider import BaseComicSpider2, font_color
5 | from utils import PresetHtmlEl
6 | from utils.website import WnacgUtils
7 | from utils.processed_class import PreviewHtml
8 |
9 | domain = "wnacg.com"
10 |
11 |
12 | class WnacgSpider(BaseComicSpider2):
13 | custom_settings = {"DOWNLOADER_MIDDLEWARES": {
14 | 'ComicSpider.middlewares.ComicDlProxyMiddleware': 6,
15 | 'ComicSpider.middlewares.RefererMiddleware': 10,
16 | }}
17 | name = 'wnacg'
18 | num_of_row = 4
19 | domain = domain
20 | # allowed_domains = [domain]
21 | search_url_head = f'https://{domain}/search/?f=_all&s=create_time_DESC&syn=yes&q='
22 | mappings = {'更新': f'https://{domain}/albums-index.html',
23 | '汉化': f'https://{domain}/albums-index-cate-1.html', }
24 | turn_page_search = r"p=\d+"
25 | turn_page_info = (r"-page-\d+", "albums-index%s")
26 |
27 | def before_search(self):
28 | if self.settings.get("PROXY_CUST") is None: # 不设配置代理就永远走国内可访问域名,无视全局代理模式
29 | self.domain = WnacgUtils.get_domain()
30 |
31 | def frame_book(self, response):
32 | frame_results = {}
33 | example_b = r' [ {} ]、【 {} 】'
34 | self.say(example_b.format('序号', '漫画名') + ' ')
35 | preview = PreviewHtml(response.url)
36 | targets = response.xpath('//li[contains(@class, "gallary_item")]')
37 | title_xpath = './div[contains(@class, "pic")]/a'
38 | for x, target in enumerate(targets):
39 | item_elem = target.xpath(title_xpath)
40 | title = item_elem.xpath('./@title').get()
41 | pre_url = item_elem.xpath('./@href').get()
42 | preview_url = f'https://{self.domain}{pre_url}' # 人类行为读取的页面
43 | url = preview_url.replace('index', 'gallery') # 压缩步骤,此链直接返回该本全页uri
44 | img_preview = 'http:' + item_elem.xpath('./img/@src').get()
45 | self.say(example_b.format(str(x + 1), title, chr(12288)))
46 | self.say('') if (x + 1) % self.num_of_row == 0 else None
47 | frame_results[x + 1] = [url, title, preview_url]
48 | _page = target.xpath('.//div[contains(@class, "info_col")]/text()').get()
49 | pages = re.search(r'(\d+)[張张]', _page.strip()).group(1) if _page else 0
50 | preview.add(x + 1, img_preview, title, preview_url, pages=pages)
51 | self.say(preview.created_temp_html)
52 | return self.say.frame_book_print(frame_results, url=response.url)
53 |
54 | def frame_section(self, response):
55 | doc_wlns = re.split(r';[\n\s]+?document\.writeln', response.text)
56 | selected_doc = next(filter(lambda _: "var imglist" in _, doc_wlns))
57 | targets = re.findall(r'(//.*?(jp[e]?g|png|webp))', selected_doc)
58 | frame_results = {}
59 | for x, target in enumerate(targets):
60 | img_url = f"https:{target[0]}"
61 | frame_results[x + 1] = img_url
62 | self.say("=" * 15 + font_color(' 本子网没章节的 这本已经扔进任务了', color='blue'))
63 | return frame_results
64 |
--------------------------------------------------------------------------------
/GUI/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | """path to save factitious code, inherit-class from code of uic"""
4 |
--------------------------------------------------------------------------------
/GUI/mainwindow.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | from PyQt5 import QtCore
4 |
5 | from GUI.uic.ui_mainwindow import Ui_MainWindow
6 | from assets import res as ori_res
7 |
8 |
9 | res = ori_res.GUI.Uic
10 |
11 |
12 | class MitmMainWindow(Ui_MainWindow):
13 | def setupUi(self, _mainWindow):
14 | _translate = QtCore.QCoreApplication.translate
15 | super(MitmMainWindow, self).setupUi(_mainWindow)
16 | _mainWindow.setWindowTitle(_translate("MainWindow", "ComicGUISpider v2.2.1"))
17 | self.retrybtn.setDisabled(True)
18 | self.chooseBox.setItemText(0, _translate("MainWindow", res.chooseBoxDefault))
19 | self.chooseBox.setItemText(1, _translate("MainWindow", "1、拷贝漫画"))
20 | self.chooseBox.setItemText(2, _translate("MainWindow", "2、jm🔞"))
21 | self.chooseBox.setItemText(3, _translate("MainWindow", "3、wnacg🔞"))
22 | self.chooseBox.setItemText(4, _translate("MainWindow", "4、ehentai🔞"))
23 | self.chooseBox.setItemText(5, _translate("MainWindow", "5、Māngabz"))
24 | self.chooseBox.addItem("")
25 | self.chooseBox.setItemText(6, _translate("MainWindow", "6、hitomi🔞"))
26 | self.checkisopen.setText(_translate("MainWindow", res.checkisopenDefaultText))
27 | self.searchinput.setPlaceholderText(_translate("MainWindow", res.searchinputPlaceholderText))
28 | self.chooseinput.setPlaceholderText(_translate("MainWindow", res.chooseinputPlaceholderText))
29 | self.next_btn.setText(_translate("MainWindow", res.next_btnDefaultText))
30 | self.chooseinput.setStatusTip(_translate("MainWindow", res.chooseinputTip))
31 | self.chooseBox.setToolTip(_translate("MainWindow", res.chooseBoxToolTip))
32 | self.previewBtn.setStatusTip(_translate("MainWindow", res.previewBtnStatusTip))
33 | self.progressBar.setStatusTip(_translate("MainWindow", res.progressBarStatusTip))
34 |
--------------------------------------------------------------------------------
/GUI/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/GUI/src/__init__.py
--------------------------------------------------------------------------------
/GUI/src/preview_format/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Preview
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | {body}
17 |
18 |
19 |
--------------------------------------------------------------------------------
/GUI/src/preview_format/public.css:
--------------------------------------------------------------------------------
1 | .badge-on-img{position:absolute;right:-0.5em;box-shadow:0 0 3px rgba(0,0,0,0.03);opacity:0.7;min-width:1.5em;text-align:center;padding:0.25em;border-radius:0.25em;}
2 | .badge-on-img:hover {opacity: 0.9;}
--------------------------------------------------------------------------------
/GUI/src/preview_format/public.js:
--------------------------------------------------------------------------------
1 | // task-panel.js
2 | (() => {
3 | window.scanChecked = function () {
4 | var checkboxGroup = document.getElementsByName('img');
5 | var selectedValues = [];
6 | for (let i = 0; i < checkboxGroup.length; i++) {
7 | if (checkboxGroup[i].checked) {
8 | selectedValues.push(checkboxGroup[i].id);
9 | }
10 | }
11 | return selectedValues
12 | }
13 | window.get_curr_hml = function () {
14 | return document.documentElement.outerHTML;
15 | }
16 | document.addEventListener('DOMContentLoaded', function() {
17 | const containers = document.querySelectorAll('div[style*="position: relative"]');
18 | containers.forEach(container => {
19 | const badges = container.querySelectorAll('.badge-on-img');
20 | let verticalOffset = 0;
21 | badges.forEach(badge => {
22 | badge.style.removeProperty('top');
23 | badge.style.top = `${verticalOffset}px`;
24 | verticalOffset += badge.offsetHeight + 2;
25 | });
26 | });
27 | });
28 | })();
29 |
--------------------------------------------------------------------------------
/GUI/src/preview_format/tasks_extend.js:
--------------------------------------------------------------------------------
1 | // task-panel.js
2 | (() => {
3 | // 动态注入样式
4 | const style = document.createElement('style');
5 | style.textContent = `
6 | /* 分界线样式 */
7 | #panelDivider {
8 | margin: 1rem 0;
9 | border-width: 2px;
10 | border-color: #0d6efd;
11 | opacity: 0;
12 | transition: opacity 0.3s ease;
13 | }
14 | #panelDivider.visible {
15 | opacity: 1;
16 | }
17 | #taskPanel {
18 | max-height: 300px;
19 | overflow-y: auto;
20 | transition: all 0.3s ease;
21 | margin-bottom: 15px;
22 | box-shadow: 0 -2px 5px rgba(0,0,0,0.1); /* 投影增强层次 */
23 | direction: rtl; /* 滚动条左侧 */
24 | }
25 | #taskContainer {
26 | direction: ltr;
27 | }
28 | /* 滚动条样式 */
29 | #taskPanel::-webkit-scrollbar {
30 | width: 8px;
31 | background: #f1f1f1;
32 | }
33 | #taskPanel::-webkit-scrollbar-thumb {
34 | background: #888;
35 | border-radius: 4px;
36 | }
37 | #taskPanel::-webkit-scrollbar-thumb:hover {
38 | background: #555;
39 | }
40 | @supports (scrollbar-width: thin) {
41 | #taskPanel {
42 | scrollbar-width: thin;
43 | scrollbar-color: #888 #f1f1f1;
44 | }
45 | }
46 | /* 进度条完成 */
47 | .completed .progress-bar {
48 | background-color: #198754 !important;
49 | }
50 | /* 任务项样式 */
51 | .task-item {
52 | padding: 12px;
53 | border-bottom: 1px solid #dee2e6;
54 | }
55 | .task-progress {
56 | height: 20px;
57 | margin-top: 8px;
58 | }
59 | .task-count {
60 | font-family: monospace;
61 | font-size: 0.85em;
62 | color: #6c757d;
63 | }
64 | `;
65 | document.head.appendChild(style);
66 |
67 | // 初始化任务面板
68 | window.initTaskPanel = function () {
69 | // function initTaskPanel() {
70 | // 创建容器
71 | const container = document.createElement('div');
72 | container.innerHTML = `
73 |
78 | Hide tasks(0 )
79 |
80 |
83 | `;
84 | // 插入分隔线
85 | const divider = document.createElement('hr');
86 | divider.id = 'panelDivider';
87 | document.body.insertAdjacentElement('afterbegin', container);
88 | container.insertAdjacentElement('afterend', divider);
89 | // 事件监听
90 | const taskPanel = document.getElementById('taskPanel');
91 | const counterBtn = container.querySelector('button');
92 | if (taskPanel.classList.contains('show')) {
93 | divider.classList.add('visible');
94 | requestAnimationFrame(autoScroll);
95 | }
96 | taskPanel.addEventListener('shown.bs.collapse', () => {
97 | counterBtn.innerHTML = `Hide tasks (${taskCounter()} )`;
98 | divider.classList.add('visible');
99 | autoScroll(); // 展开时自动滚动
100 | });
101 | taskPanel.addEventListener('hidden.bs.collapse', () => {
102 | counterBtn.innerHTML = `Show tasks (${taskCounter()} )`;
103 | divider.classList.remove('visible');
104 | });
105 | window.scrollTo({top: 0, behavior: 'smooth'});
106 | }
107 |
108 | // 自动滚动优化
109 | function autoScroll() {
110 | const panel = document.getElementById('taskPanel');
111 | panel.scrollTo({
112 | top: panel.scrollHeight,
113 | behavior: 'smooth'
114 | });
115 | }
116 |
117 | // 添加任务
118 | window.addTask = function (uuid, title, task_count, title_url) {
119 | const container = document.getElementById('taskContainer');
120 | // if (document.getElementById(uuid)) return;
121 | const initialProgress = 0;
122 | const task = document.createElement('div');
123 | task.className = 'task-item';
124 | task.id = `task-${uuid}`;
125 | task.innerHTML = `
126 |
127 |
128 | ${title}
129 |
page: ${task_count}
130 |
131 |
132 |
138 | ${initialProgress}%
139 |
140 |
141 | `;
142 |
143 | container.appendChild(task);
144 | updateCounter();
145 | // 自动滚动逻辑
146 | const panel = document.getElementById('taskPanel');
147 | if (panel.classList.contains('show')) {
148 | requestAnimationFrame(autoScroll);
149 | }
150 | }
151 | // 更新子任务进度
152 | window.updateTaskProgress = function (uuid, progress) {
153 | const task = document.getElementById(`task-${uuid}`);
154 | if (!task) return;
155 |
156 | const progressBar = task.querySelector('.progress-bar');
157 | // const progressText = task.querySelector('small');
158 | progressBar.style.width = `${progress}%`;
159 | progressBar.textContent = `${progress}%`;
160 | progressBar.ariaValuenow = progress;
161 | if (progress >= 100) {
162 | task.classList.add('completed');
163 | progressBar.textContent = '100% Completed';
164 | }
165 | }
166 |
167 | // 更新计数器
168 | function taskCounter() {
169 | return document.querySelectorAll('#taskContainer > .task-item').length;
170 | }
171 |
172 | function updateCounter() {
173 | document.querySelectorAll('#taskCounter').forEach(el => {
174 | el.textContent = taskCounter();
175 | });
176 | }
177 |
178 | // 初始化
179 | // document.addEventListener('DOMContentLoaded', initTaskPanel);
180 | })();
181 |
--------------------------------------------------------------------------------
/GUI/src/preview_format/tip_downloaded.js:
--------------------------------------------------------------------------------
1 | // task-panel.js
2 | (() => {
3 | // 动态注入样式
4 | const style = document.createElement('style');
5 | style.textContent = `
6 | .img-downloaded {
7 | filter: grayscale(100%) !important;
8 | opacity: 0.6 !important;
9 | transition: all 0.3s ease;
10 | }
11 | .container-downloaded {
12 | background-color: lightsalmon !important;
13 | }
14 | `;
15 | document.head.appendChild(style);
16 |
17 | function highlightDownloads() {
18 | document.querySelectorAll('a.downloaded').forEach(url_a => {
19 | const container = url_a.closest('.singal-task');
20 | const formCheck = container.querySelector('.form-check');
21 | formCheck ? formCheck.classList.add('container-downloaded') : container.classList.add('container-downloaded');
22 | container.querySelector('img').classList.add('img-downloaded');
23 | });
24 | }
25 |
26 | document.addEventListener('DOMContentLoaded', highlightDownloads);
27 | const observer = new MutationObserver((mutations) => {
28 | mutations.forEach(mutation => {
29 | if (mutation.addedNodes.length) {
30 | highlightDownloads();
31 | }
32 | });
33 | });
34 | observer.observe(document.body, {
35 | childList: true,
36 | subtree: true
37 | });
38 | })();
39 |
--------------------------------------------------------------------------------
/GUI/thread/__init__.py:
--------------------------------------------------------------------------------
1 | from PyQt5.QtCore import QThread, pyqtSignal
2 | from utils import font_color, conf
3 | from assets import res
4 |
5 |
6 | class ClipTasksThread(QThread):
7 | info_signal = pyqtSignal(tuple)
8 | total_signal = pyqtSignal(dict)
9 |
10 | def __init__(self, gui, tasks):
11 | super(ClipTasksThread, self).__init__()
12 | self.gui = gui
13 | self.tasks = tasks
14 |
15 | def run(self):
16 | self.msleep(1200) # 延后1s,否则子线程太快导致主界面没跟上
17 | cli = self.gui.spiderUtils.get_cli(conf)
18 | total = {}
19 | for idx, url in enumerate(self.tasks):
20 | try:
21 | resp = cli.get(url, follow_redirects=True, timeout=3)
22 | info = self.gui.spiderUtils.parse_book(resp.text)
23 | self.msleep(50)
24 | self.info_signal.emit((idx + 1, url, *info[1:]))
25 | total[idx + 1] = [info[2], info[0]]
26 | except Exception as e:
27 | err_msg = rf"{res.GUI.Clip.get_info_error}({url}): [{type(e).__name__}] {str(e)}"
28 | self.gui.log.exception(e)
29 | self.gui.say(font_color(err_msg + ' ', color='red'), ignore_http=True)
30 | self.handle_total(total)
31 |
32 | def check_condition_and_run_js(self):
33 | if self.iterations >= self.max_iterations:
34 | print("[clip tasks loop]❌over max_iterations, fail.")
35 | self.total_signal.emit(self.total)
36 | return
37 | else:
38 | self.iterations += 1
39 | self.gui.BrowserWindow.js_execute("checkDoneTasks();", self.handle_js_result)
40 |
41 | def handle_js_result(self, num):
42 | if num and num >= len(self.total):
43 | print("[clip tasks loop]✅finsh.")
44 | self.total_signal.emit(self.total)
45 | return
46 | self.msleep(250)
47 | self.check_condition_and_run_js()
48 |
49 | def handle_total(self, total):
50 | self.max_iterations = 7 * len(self.tasks) # 一个任务约给1.5秒
51 | self.iterations = 0 # 当前循环次数
52 | self.total = total
53 | if not total:
54 | self.total_signal.emit({})
55 | self.gui.say(font_color(res.GUI.Clip.all_fail, color='red'), ignore_http=True)
56 | self.gui.say(font_color(rf" {res.GUI.Clip.view_log} [{conf.log_path}\GUI.log]", color='red', size=5))
57 | else:
58 | self.msleep(1200 if len(self.total) == 1 else 350)
59 | self.check_condition_and_run_js()
60 |
61 |
62 | class WorkThread(QThread):
63 | """only for monitor signals"""
64 | item_count_signal = pyqtSignal(int)
65 | print_signal = pyqtSignal(str)
66 | finish_signal = pyqtSignal(str)
67 | tasks_signal = pyqtSignal(object)
68 | active = True
69 |
70 | def __init__(self, gui):
71 | super(WorkThread, self).__init__()
72 | self.gui = gui
73 | self.flag = 1
74 |
75 | def run(self):
76 | manager = self.gui.manager
77 | TextBrowser = manager.TextBrowserQueue()
78 | Bar = manager.BarQueue()
79 | _Tasks = manager.TasksQueue()
80 | while self.active:
81 | self.msleep(5)
82 | try:
83 | if not TextBrowser.empty():
84 | _ = str(TextBrowser.get().text)
85 | if "__temp" in _ and _.endswith("html"):
86 | self.gui.tf = _ # REMARK(2024-08-18): QWebEngineView 只允许在 SpiderGUI 自己进程/线程初始化
87 | self.gui.previewBtn.setEnabled(True)
88 | elif '[httpok]' in _:
89 | self.print_signal.emit('[httpok]' + _.replace('[httpok]', ''))
90 | else:
91 | self.print_signal.emit(_)
92 | self.msleep(5)
93 | if not Bar.empty():
94 | self.item_count_signal.emit(Bar.get())
95 | # self.msleep(5)
96 | if not _Tasks.empty():
97 | self.tasks_signal.emit(_Tasks.get())
98 | if res.GUI.WorkThread_finish_flag in self.gui.textBrowser.toPlainText():
99 | self.item_count_signal.emit(100)
100 | break
101 | elif res.GUI.WorkThread_empty_flag in self.gui.textBrowser.toPlainText():
102 | break
103 | except ConnectionResetError:
104 | self.active = False
105 | if self.active:
106 | self.finish_signal.emit(str(conf.sv_path))
107 |
108 | # def __del__(self):
109 | # self.wait()
110 |
111 | def stop(self):
112 | self.flag = 0
113 |
--------------------------------------------------------------------------------
/GUI/thread/other.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | from copy import deepcopy
3 |
4 | from PyQt5.QtCore import Qt, QTimer
5 | from PyQt5.QtWidgets import QApplication
6 | from qfluentwidgets import (
7 | Action, InfoBar, InfoBarPosition, DWMMenu
8 | )
9 |
10 | from assets import res
11 | from utils import conf, curr_os, ori_path
12 | from utils.redViewer_tools import combine_then_mv, show_max
13 | from utils.processed_class import ClipManager
14 | from GUI.uic.qfluent import CustomFlyout, TableFlyoutView, CustomInfoBar
15 | from GUI.hitomi_tools import HitomiTools
16 |
17 |
18 | class ToolMenu(DWMMenu):
19 | res = res.GUI.ToolMenu
20 |
21 | def __init__(self, gui, *args, **kwargs):
22 | super(ToolMenu, self).__init__(*args, **kwargs)
23 | self.gui = gui
24 | self.init_actions()
25 | self.gui.toolButton.setMenu(self)
26 |
27 | def init_actions(self):
28 | self.action_show_max = Action(self.tr(self.res.action1), triggered=self.show_max)
29 | self.action_combine_then_mv = Action(self.tr(self.res.action2), triggered=self.combine_then_mv)
30 | self.addAction(self.action_show_max)
31 | self.addAction(self.action_combine_then_mv)
32 |
33 | def show_max(self):
34 | record_txt = conf.sv_path.joinpath("web_handle/record.txt")
35 | if record_txt.exists():
36 | CustomFlyout.make(
37 | TableFlyoutView(show_max(record_txt), self.gui.textBrowser),
38 | self.gui.searchinput, self.gui.textBrowser)
39 | else:
40 | InfoBar.warning(
41 | title='show_max', content=self.res.action2_warning % record_txt,
42 | orient=Qt.Horizontal, isClosable=True, position=InfoBarPosition.BOTTOM,
43 | duration=5000, parent=self.gui.textBrowser
44 | )
45 |
46 | def combine_then_mv(self):
47 | done = combine_then_mv(conf.sv_path, conf.sv_path.joinpath("web"))
48 | InfoBar.success(
49 | title='combine_then_mv', content=self.res.combined_tip % (done, conf.sv_path.joinpath("web")),
50 | orient=Qt.Horizontal, isClosable=True, position=InfoBarPosition.BOTTOM,
51 | duration=3000, parent=self.gui.textBrowser
52 | )
53 |
54 | def switch_ero(self, index):
55 | self.removeAction(self.action_show_max)
56 | self.removeAction(self.action_combine_then_mv)
57 |
58 | self.action_read_clip = Action(self.tr(self.res.action_ero1), triggered=self.read_clip)
59 | self.addAction(self.action_read_clip)
60 | if index == 6:
61 | self.add_hitomi_tools()
62 |
63 | def read_clip(self):
64 | if self.gui.next_btn.text() != res.GUI.Uic.next_btnDefaultText:
65 | InfoBar.warning(
66 | title='Clip start error', content=res.GUI.Clip.process_warning,
67 | orient=Qt.Horizontal, isClosable=True, position=InfoBarPosition.BOTTOM,
68 | duration=3500, parent=self.gui.textBrowser
69 | )
70 | elif not pathlib.Path(conf.clip_db).exists():
71 | CustomInfoBar.show(
72 | title='Clip-db not found', content=res.GUI.Clip.db_not_found_guide,
73 | parent=self.gui.textBrowser,
74 | url="https://jasoneri.github.io/ComicGUISpider/config/#剪贴板db-clip-db", url_name="Guide"
75 | )
76 | # https://jasoneri.github.io/ComicGUISpider/feature/#_4-1-%E8%AF%BB%E5%89%AA%E8%B4%B4%E6%9D%BF
77 | else:
78 | clip = ClipManager(conf.clip_db, f"{conf.clip_sql} limit {conf.clip_read_num}",
79 | getattr(self.gui.spiderUtils, "book_url_regex"))
80 | tf, match_items = clip.main()
81 | if not match_items:
82 | self.gui.say(res.GUI.Clip.match_none % self.gui.spiderUtils.book_url_regex,
83 | ignore_http=True)
84 | else:
85 | self.gui.init_clip_handle(tf, match_items)
86 |
87 | def add_hitomi_tools(self):
88 | if hasattr(self, "action_read_clip"):
89 | self.removeAction(self.action_read_clip)
90 |
91 | self.action_hitomi_tools = Action(self.tr('hitomi-tools'), triggered=self.hitomi_tools_run)
92 | self.addAction(self.action_hitomi_tools)
93 |
94 | def hitomi_tools_run(self):
95 | hitomi_db_path = ori_path.joinpath("assets/hitomi.db")
96 | if not hitomi_db_path.exists():
97 | CustomInfoBar.show(
98 | title='', content=res.GUI.hitomiDb_guide % hitomi_db_path,
99 | parent=self.gui.textBrowser, _type="WARNING",
100 | url=res.Vars.hitomiDb_tmp_url, url_name="Download"
101 | )
102 | # TODO[1] : 调用 utils/website/hitomi/scape_dataset.py 下载 hitomi.db
103 | else:
104 | if not hasattr(self.gui, "hitomi_tools"):
105 | self.gui.hitomi_tools = HitomiTools(self.gui)
106 | self.gui.hitomi_tools.show()
107 |
108 | class CopyUnfinished:
109 | copy_delay = 150 if curr_os != "macOS" else 300
110 | copied = 0
111 |
112 | def __init__(self, tasks):
113 | self.tasks = deepcopy(tasks)
114 | self.length = len(self.tasks)
115 |
116 | def to_clip(self):
117 | def copy_to_clipboard(text):
118 | QApplication.clipboard().setText(text)
119 | for i, task in enumerate(self.tasks):
120 | QTimer.singleShot(self.copy_delay * (i + 1),
121 | lambda t=task.title_url: copy_to_clipboard(t))
122 |
--------------------------------------------------------------------------------
/GUI/uic/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | """path to save py_file-after-uic, no factitious code will on it"""
4 |
--------------------------------------------------------------------------------
/GUI/uic/qfluent/__init__.py:
--------------------------------------------------------------------------------
1 | import types
2 | from PyQt5.QtWebEngineWidgets import QWebEngineContextMenuData, QWebEngineSettings, QWebEnginePage
3 | from qfluentwidgets import (
4 | Action, RoundMenu, FluentIcon
5 | )
6 | from assets import res as ori_res
7 |
8 | from .components import *
9 |
10 | __all__ = [
11 | 'MonkeyPatch',
12 | 'CustomFlyout', 'CustomInfoBar', 'TableFlyoutView',
13 | ]
14 |
15 | res = ori_res.GUI.Uic
16 |
17 |
18 | class MonkeyPatch:
19 | @staticmethod
20 | def rbutton_menu_lineEdit(line_edit):
21 | def new_context_menu(self, event):
22 | def _showCompleterMenu():
23 | if not self.text().strip():
24 | self.setText(" ")
25 | self._showCompleterMenu()
26 | menu = RoundMenu(parent=self)
27 | undo_action = Action(FluentIcon.CANCEL, text=self.tr("Cancel"), triggered=self.undo)
28 | paste_action = Action(FluentIcon.PASTE, text=self.tr("Paste"), triggered=self.paste)
29 | select_all_action = Action(self.tr("Select all"), triggered=self.selectAll)
30 | show_completer = Action(FluentIcon.ALIGNMENT, text=self.tr(res.menu_show_completer),
31 | triggered=_showCompleterMenu)
32 | menu.addAction(show_completer)
33 | menu.addSeparator()
34 | menu.addAction(paste_action)
35 | menu.addAction(undo_action)
36 | menu.addAction(select_all_action)
37 |
38 | menu.exec_(event.globalPos())
39 | event.accept()
40 | line_edit.contextMenuEvent = types.MethodType(new_context_menu, line_edit)
41 |
42 | @staticmethod
43 | def rbutton_menu_WebEngine(browserWindow):
44 | def custom_context_menu(self, event):
45 | page = self.page()
46 | native_menu = page.createStandardContextMenu()
47 | menu = _convert_menu(native_menu)
48 | menu.exec(event.globalPos())
49 | event.accept()
50 | native_menu.deleteLater()
51 |
52 | def custom_menu():
53 | fluent_menu = RoundMenu(parent=web_view)
54 | next_page_action = Action(FluentIcon.PAGE_RIGHT, web_view.tr(res.menu_next_page),
55 | triggered=browserWindow.gui.nextPageBtn.click, shortcut='Ctrl+.')
56 | previous_page_action = Action(FluentIcon.PAGE_LEFT, web_view.tr(res.menu_prev_page),
57 | triggered=browserWindow.gui.previousPageBtn.click, shortcut='Ctrl+,')
58 | fluent_menu.addAction(next_page_action)
59 | fluent_menu.addAction(previous_page_action)
60 | fluent_menu.addSeparator()
61 | return fluent_menu
62 |
63 | def _convert_menu(native_menu):
64 | """将原生菜单转换为Fluent风格"""
65 | fluent_menu = custom_menu()
66 | for action in native_menu.actions():
67 | # 过滤不需要的默认动作
68 | if action.isSeparator():
69 | fluent_menu.addSeparator()
70 | continue
71 | action_text = action.text()
72 | fluent_action = Action(text=action_text, shortcut=action.shortcut(), triggered=action.trigger)
73 | match action_text: # icon mapping
74 | case 'Copy' | 'Copy link address':
75 | fluent_action.setIcon(FluentIcon.COPY.icon())
76 | case 'Cut':
77 | fluent_action.setIcon(FluentIcon.CUT.icon())
78 | case 'Paste':
79 | fluent_action.setIcon(FluentIcon.PASTE.icon())
80 | case 'Undo':
81 | fluent_action.setIcon(FluentIcon.CANCEL.icon())
82 | case 'Reload':
83 | fluent_action.setIcon(FluentIcon.SYNC.icon())
84 | case 'Back':
85 | fluent_action.setIcon(FluentIcon.LEFT_ARROW.icon())
86 | case 'Forward':
87 | fluent_action.setIcon(FluentIcon.RIGHT_ARROW.icon())
88 | case _:
89 | pass
90 | fluent_menu.addAction(fluent_action)
91 | return fluent_menu
92 |
93 | web_view = browserWindow.view
94 | web_view.contextMenuEvent = types.MethodType(custom_context_menu, web_view)
95 |
96 |
--------------------------------------------------------------------------------
/GUI/uic/qfluent/action_factory.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import sys
4 | import subprocess
5 | import traceback
6 | from PyQt5.QtGui import QDesktopServices
7 | from PyQt5.QtCore import QThread, pyqtSignal, QUrl, QTimer
8 |
9 | from qfluentwidgets import InfoBarPosition
10 |
11 | from assets import res
12 | from utils import ori_path, conf
13 | from deploy.update import Proj
14 | from GUI.uic.qfluent.components import (
15 | CustomInfoBar, CustomFlyout, IndeterminateBarFView, CustomMessageBox
16 | )
17 |
18 |
19 | class DescCreator:
20 | @staticmethod
21 | def run():
22 | QDesktopServices.openUrl(QUrl('https://jasoneri.github.io/ComicGUISpider/'))
23 |
24 |
25 | class ProjUpdateThread(QThread):
26 | checked_signal = pyqtSignal(object)
27 | update_signal = pyqtSignal()
28 | updated_signal = pyqtSignal(object)
29 | debug_signal = pyqtSignal(str)
30 |
31 | def __init__(self, conf_dia):
32 | self.proj = None
33 | super(ProjUpdateThread, self).__init__()
34 | self.conf_dia = conf_dia
35 | self.is_update_requested = False
36 | self.log = conf.cLog(name="GUI")
37 | self.debug_signal.connect(self.conf_dia.gui.say)
38 |
39 | def run(self):
40 | try:
41 | self.proj = Proj(debug_signal=self.debug_signal)
42 | self.proj.check()
43 | self.checked_signal.emit(self.proj)
44 | while not self.is_update_requested and not self.isInterruptionRequested():
45 | self.msleep(100) # 休眠100毫秒,减少CPU使用
46 | if self.is_update_requested and not self.isInterruptionRequested():
47 | self.run_update()
48 | except Exception as e:
49 | self.log.exception(f"ProjCheckError: {e}")
50 | self.checked_signal.emit(traceback.format_exc())
51 |
52 | def request_update(self):
53 | self.is_update_requested = True
54 |
55 | def run_update(self):
56 | try:
57 | # ⚠️ danger!⚠️ -------------->
58 | self.proj.local_update()
59 | # <-------------- ⚠️ danger!⚠️
60 | self.updated_signal.emit(self.proj)
61 | except Exception as e:
62 | self.log.exception(f"ProjUpdateError: {e}")
63 | self.updated_signal.emit(traceback.format_exc())
64 |
65 |
66 | class Updater:
67 | res = res.Updater
68 | proj = None
69 | version = None
70 |
71 | def __init__(self, gui):
72 | self.gui = gui
73 | self.conf_dia = self.gui.conf_dia
74 |
75 | def run(self):
76 | def _close_thread():
77 | if self.conf_dia.puThread:
78 | self.conf_dia.puThread.quit()
79 | self.conf_dia.puThread.wait()
80 |
81 | def updated(recv):
82 | try:
83 | self.gui.updating_fly.close()
84 | except RuntimeError:
85 | pass
86 | if isinstance(recv, str):
87 | self.gui.textBrowser.append(recv)
88 | msg = self.res.updated_fail % str(ori_path.joinpath("logs/GUI.log"))
89 | _type = "ERROR"
90 | reload_time = 10000
91 | else:
92 | msg = self.res.updated_success
93 | _type = "SUCCESS"
94 | reload_time = 5000
95 | CustomInfoBar.show("", msg,
96 | self.gui.textBrowser, self.proj.update_info.get("html_url"),
97 | f"""<{self.proj.update_info.get("tag_name")}>""", _type=_type)
98 | _close_thread()
99 | QTimer.singleShot(reload_time, self.after_update)
100 |
101 | def checked(recv):
102 | try:
103 | self.check_fly.close()
104 | except RuntimeError:
105 | pass
106 | if isinstance(recv, str):
107 | self.gui.textBrowser.append(recv)
108 | CustomInfoBar.show("", self.res.ver_check_fail, self.gui.textBrowser,
109 | f"{Proj.url}/releases", "access releases", _type="ERROR")
110 | _close_thread()
111 | return
112 | self.proj = recv
113 | print(f"checked: {recv.update_flag}")
114 | if recv.update_flag == "local":
115 | CustomInfoBar.show("", self.res.ver_local_latest,
116 | self.conf_dia, f"https://github.com/jasoneri/ComicGUISpider/releases/tag/{recv.local_ver}",
117 | f"""updateInfo-<{recv.local_ver}> """, _type="SUCCESS",
118 | duration=7000, position=InfoBarPosition.BOTTOM_LEFT)
119 | else:
120 | match recv.update_flag:
121 | case "stable":
122 | title = f"📫{res.GUI.Uic.confDia_updateDialog_stable} ⭐️{recv.update_info.get('tag_name')}"
123 | case "dev":
124 | title = f"📫{res.GUI.Uic.confDia_updateDialog_dev} 🧪{recv.update_info.get('tag_name')}"
125 | case _:
126 | title = ""
127 | self.gui.update_dialog = CustomMessageBox(title, self.gui)
128 | self.gui.update_dialog.show_release_note(recv.update_info.get("body"))
129 | self.check_fly = CustomFlyout.make(
130 | view=IndeterminateBarFView(self.conf_dia),
131 | target=self.conf_dia, parent=self.conf_dia, calc_bottom=True
132 | )
133 | self.conf_dia.puThread.checked_signal.connect(checked)
134 | self.conf_dia.puThread.update_signal.connect(self.conf_dia.puThread.request_update)
135 | self.conf_dia.puThread.updated_signal.connect(updated)
136 | self.conf_dia.puThread.start()
137 |
138 | def after_update(self):
139 | subprocess.Popen([sys.executable, ori_path.joinpath("CGS.py")])
140 | QTimer.singleShot(1000, self.gui.close)
141 |
--------------------------------------------------------------------------------
/GUI/uic/qfluent/patch_uic.py:
--------------------------------------------------------------------------------
1 | # patch_ui.py
2 | import pathlib
3 | from copy import deepcopy
4 | import re
5 |
6 | _p = pathlib.Path(__file__).parent.parent
7 | # 定义需要替换的控件映射
8 | REPLACE_MAP = {
9 | "QLineEdit": "LineEdit",
10 | "QTextBrowser": "TextBrowser",
11 | "QTextEdit": "TextEdit",
12 | "QComboBox": "ComboBox",
13 | "QCheckBox": "CheckBox",
14 | "QSpinBox": "CompactSpinBox",
15 | }
16 | DEFAULT_CUSTOM_SUB = {
17 | "import material_ct_rc\n": ""
18 | }
19 |
20 |
21 | class ConvertBase:
22 | def __init__(self, old, new, custom_sub=None, custom_fluent_widgets=None, extra_import=''):
23 | self.old = old
24 | with open(_p.joinpath(old), 'r', encoding='utf8') as f:
25 | self.content = f.read()
26 | self.new = new
27 | self.custom_sub = custom_sub or {}
28 | self.custom_fluent_widgets = custom_fluent_widgets or []
29 | self.extra_import = extra_import
30 |
31 | def convert_ui_file(self):
32 | # 处理导入语句替换 --------------------------------------------------------------
33 | # 添加 qfluentwidgets 的导入(去重处理)
34 | import_part = 'from qfluentwidgets import ' + ', '.join(
35 | sorted(set(REPLACE_MAP.values()) | set(self.custom_fluent_widgets))
36 | )
37 | content = deepcopy(self.content)
38 | content = content.replace(
39 | r'from PyQt5 import QtCore, QtGui, QtWidgets',
40 | f'from PyQt5 import QtCore, QtGui, QtWidgets\n{import_part}{self.extra_import}'
41 | )
42 | # 替换控件实例化代码 ------------------------------------------------------------
43 | for origin, new in REPLACE_MAP.items():
44 | # 替换形如 QtWidgets.QComboBox 的引用
45 | content = content.replace(f'QtWidgets.{origin}', new)
46 | with open(_p.joinpath(self.new), 'w', encoding='utf8') as f:
47 | f.write(content)
48 |
49 | def run(self):
50 | for _o, _n in {**self.custom_sub, **DEFAULT_CUSTOM_SUB}.items():
51 | self.content = re.sub(_o, _n, self.content)
52 | self.convert_ui_file()
53 |
54 |
55 | if __name__ == '__main__':
56 | # ConvertBase('untitled.py', 'untitled.py').run()
57 | # ConvertBase('ui_mainwindow.py', 'ui_mainwindow.py').run()
58 | # ConvertBase('conf_dia.py', 'conf_dia.py', custom_sub={
59 | # "= QtWidgets.QLabel": "= StrongBodyLabel",
60 | # "acceptBtn = QtWidgets.QToolButton": "acceptBtn = PrimaryToolButton",
61 | # "cancelBtn = QtWidgets.QToolButton": "cancelBtn = TransparentToolButton",
62 | # },
63 | # custom_fluent_widgets=['StrongBodyLabel', 'TransparentToolButton', 'PrimaryToolButton']).run()
64 | # ConvertBase('browser.py', 'browser.py', custom_sub={
65 | # "topHintBox = QtWidgets.QToolButton": "topHintBox = TransparentToggleToolButton",
66 | # "ensureBtn = QtWidgets.QToolButton": "ensureBtn = PrimaryToolButton",
67 | # "QtWidgets.QToolButton": "TransparentToolButton",
68 | # }, custom_fluent_widgets=['TransparentToolButton', 'PrimaryToolButton', 'TransparentToggleToolButton']).run()
69 | ...
70 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 jsoneri
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
21 |
22 | ▼ 操作演示 ▼
23 |
24 | | 预览/多选/翻页([备链](https://jsd.vxo.im/gh/jasoneri/imgur@main/CGS/common-usage.gif)) | 读剪贴板([备链](https://jsd.vxo.im/gh/jasoneri/imgur@main/CGS/load_clip.gif)) |
25 | |:--------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------:|
26 | |  |  |
27 |
28 | ## 📑介绍
29 |
30 | 是否有过看漫加载慢,频跳广告而烦躁过😫,用 `CGS` 先下后看就行了啊嗯☝️
31 |
32 | | 网站 | 适用区域 | 补充说明 | 状态 (UTC+8) |
33 | |:--------------------------------------|:----:|:----------:|:----:|
34 | | [拷贝漫画](https://www.copy20.com/) | :cn: | 已解锁隐藏 |  |
35 | | [Māngabz](https://mangabz.com) | :cn: | 代理 |  |
36 | | [禁漫天堂](https://18comic.vip/) | :cn: | 🔞 |  |
37 | | [绅士漫画(wnacg)](https://www.wnacg.com/) | :cn: | 🔞 |  |
38 | | [ExHentai](https://exhentai.org/) | 🌏 | 🔞/代理 |  |
39 | | [Hitomi](https://hitomi.la/) | 🌏 | 🔞 |  |
40 | | [Kemono](https://kemono.su) | 🌏 | 🔞/[📒使用指引](https://jasoneri.github.io/ComicGUISpider/feature/script) | |
41 |
42 | 使用请适度,以免加重对方服务器负担,也减少被封ip风险
43 |
44 | ---
45 |
46 | **[](https://github.com/jasoneri/ComicGUISpider/stargazers) 若觉得体验还不错的话,要不回头点个⭐️star吧👻**
48 |
49 | ---
50 |
51 | ## 📢更新
52 |
53 | ### [](https://github.com/jasoneri/ComicGUISpider/releases/latest) [](https://github.com/jasoneri/ComicGUISpider/actions/workflows/release.yml)
54 |
55 | #### 🎁 Features
56 |
57 | 🔳 hitomi
58 | 🔳 数据集下载自动化/更新等,方式待定
59 | 🔳 读剪贴板功能开发中
60 | 🔳 优化速度,翻页等
61 |
62 | #### 🐞 Fix
63 |
64 | ✅ 拷x恢复 🙊
65 | ✅ jm 发布页( WinError 10054 )问题处理详情看软件内 jm 提示,同时本地缓存统一改为48小时,🔜[相关参考指引](https://jasoneri.github.io/ComicGUISpider/faq/extra.html#_2-%E5%9F%9F%E5%90%8D%E7%9B%B8%E5%85%B3)
66 |
67 | > 配置窗口左下设有`检查更新`按钮,请根据提示进行更新操作
68 |
69 | > [🕑更新历史](docs/changelog/history.md) / [📝开发板](https://github.com/jasoneri/ComicGUISpider/projects?query=is%3Aopen)
70 |
71 | ## 🍮食用搭配(阅读器)
72 |
73 | 完全适配 CGS 而制,取(改)了独特的名字 `RedViewer (RV)`
74 | 加上最近对其手撕了几十个 commit 血改,还在更新中!所以再次推上
75 |
76 | [](https://github.com/jasoneri/redViewer)
77 |
78 |
79 | ## 💝CGS的部分实现依赖于以下开源项目
80 |
81 |
82 |
83 |
86 |
89 |
92 |
95 |
98 | etc..
99 |
100 |
101 |
102 | 由 [Weblate](https://hosted.weblate.org/engage/comicguispider/) 托管实现多语言的翻译
103 |
104 |
105 |
106 |
107 |
108 | ## 🔇免责声明
109 |
110 | 详见[License](LICENSE) 当你下载或使用本项目,将默许
111 |
112 | 本项目仅供交流和学习使用,请勿用此从事 违法/商业盈利 等,开发者团队拥有本项目的最终解释权
113 |
114 | ---
115 | 
116 |
--------------------------------------------------------------------------------
/assets/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
--------------------------------------------------------------------------------
/assets/conf_sample.yml:
--------------------------------------------------------------------------------
1 | ## 配置文件,使用方法详情至readme.md了解
2 |
3 | custom_map:
4 | 更新4: https://wnacg.com/albums-index-page-4.html
5 | 杂志: https://wnacg.com/albums-index-cate-10.html
6 | log_level: WARNING
7 | proxies: null
8 | sv_path: D:\Comic
--------------------------------------------------------------------------------
/assets/config_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/assets/config_icon.png
--------------------------------------------------------------------------------
/assets/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/assets/icon.png
--------------------------------------------------------------------------------
/assets/res/transfer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import pathlib
4 | import hashlib
5 | from datetime import datetime
6 | import yaml
7 | import polib
8 |
9 |
10 | def yaml_to_po(lang, yaml_file, po_file):
11 | with open(yaml_file, 'r', encoding='utf-8') as f:
12 | data = yaml.safe_load(f)
13 |
14 | po = polib.POFile()
15 | po.metadata = {
16 | 'Project-Id-Version': 'ComicGUISpider',
17 | 'POT-Creation-Date': datetime.now().strftime('%Y-%m-%d %H:%M%z'),
18 | 'Language': lang,
19 | 'MIME-Version': '1.0',
20 | 'Content-Type': 'text/plain; charset=utf-8',
21 | 'Content-Transfer-Encoding': '8bit',
22 | }
23 |
24 | def process_dict(data, prefix=''):
25 | for key, value in data.items():
26 | entry_id = f"{prefix}{key}" if prefix else key
27 | if isinstance(value, dict):
28 | process_dict(value, f"{entry_id}.")
29 | else:
30 | entry = polib.POEntry(
31 | msgid=entry_id,
32 | msgstr=str(value),
33 | )
34 | po.append(entry)
35 | process_dict(data)
36 | po.save(po_file)
37 |
38 | return po
39 |
40 | def compile_po_to_mo(po_file, mo_file):
41 | po = polib.pofile(po_file)
42 | po.save_as_mofile(mo_file)
43 |
44 |
45 | def main(base_dir, lang):
46 | locale_dir = base_dir / 'locale' / lang / 'LC_MESSAGES'
47 | locale_dir.mkdir(parents=True, exist_ok=True)
48 |
49 | yaml_file = base_dir / 'locale' / f'{lang}.yml'
50 | yaml_hash = base_dir / 'locale' / f'{lang}.hash'
51 | po_file = base_dir / 'locale' / lang / 'LC_MESSAGES' / 'res.po'
52 | mo_file = base_dir / 'locale' / lang / 'LC_MESSAGES' / 'res.mo'
53 |
54 | po = yaml_to_po(lang, yaml_file, po_file)
55 | compile_po_to_mo(po_file, mo_file)
56 | with open(yaml_hash, 'w', encoding='utf-8') as f:
57 | f.write(hashlib.sha256(yaml_file.read_bytes()).hexdigest())
58 |
59 |
60 | if __name__ == "__main__":
61 | main(pathlib.Path(__file__).parent, 'zh-CN')
62 | main(pathlib.Path(__file__).parent, 'en-US')
63 |
--------------------------------------------------------------------------------
/deploy/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import os
4 | import platform
5 | import pathlib
6 | import subprocess
7 |
8 |
9 | class Env:
10 | default_sv_path = r"D:\Comic"
11 | default_clip_db = pathlib.Path.home().joinpath(r"AppData\Roaming\Ditto\Ditto.db")
12 | clip_sql = "SELECT `mText` FROM `MAIN` order by `LID` desc"
13 |
14 | def __init__(self, _p: pathlib.Path):
15 | self.proj_p = _p
16 | os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = str(
17 | proj_path.parent.joinpath(r"site-packages\PyQt5\Qt5\plugins\platforms"))
18 |
19 | def env_init(self):
20 | ...
21 |
22 | @staticmethod
23 | def open_folder(_p):
24 | os.startfile(_p)
25 |
26 | @staticmethod
27 | def open_file(_f):
28 | subprocess.run(["start", "", f"{_f}"], shell=True, check=True)
29 |
30 |
31 | proj_path = pathlib.Path(__file__).parent.parent
32 | curr_os_module = Env
33 | if platform.system().startswith("Darwin"):
34 | import sys
35 | sys.path.append(str(proj_path))
36 | from deploy.launcher.mac import macOS
37 | curr_os_module = macOS
38 | curr_os = curr_os_module(proj_path)
39 |
40 | if __name__ == '__main__':
41 | curr_os.env_init()
42 |
--------------------------------------------------------------------------------
/deploy/env_record.json:
--------------------------------------------------------------------------------
1 | ["execjs", "qfluentwidgets", "polib", "pillow_avif"]
--------------------------------------------------------------------------------
/deploy/launcher/CGS.bat:
--------------------------------------------------------------------------------
1 | @rem
2 | @echo off
3 |
4 | set "_root=%~dp0"
5 | set "_root=%_root:~0,-1%"
6 | cd "%_root%"
7 | @rem echo "%_root%
8 |
9 | set "_pyBin=%_root%\runtime"
10 | set "PATH=%_root%\site-packages;%_pyBin%;%PATH%"
11 |
12 | cd "%_root%\scripts" && python CGS.py
--------------------------------------------------------------------------------
/deploy/launcher/mac/CGS.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd scripts;
3 | /usr/local/bin/python3.12 CGS.py;
--------------------------------------------------------------------------------
/deploy/launcher/mac/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import pathlib
4 | import re
5 | import subprocess
6 |
7 | # 对应`冬青黑体简体中文`,想要换其他字体可聚焦搜索`字体册`,在目标字体右键`访达中显示`,可以看到字体文件,把字体名替换掉`font`的值即可
8 | # 字体册仅支持能访达/系统alias能搜索出的字体,如果是下载的字体文件,可以看`macOS.font_replace _repl`
9 | font = "Hiragino Sans GB"
10 |
11 |
12 | class macOS:
13 | default_sv_path = pathlib.Path.home().joinpath("Downloads/Comic")
14 | default_clip_db = pathlib.Path.home().joinpath(
15 | "Library/Containers/org.p0deje.Maccy/Data/Library/Application Support/Maccy/Storage.sqlite")
16 | clip_sql = "SELECT `ZTITLE` FROM `ZHISTORYITEM` order by `Z_PK` desc"
17 |
18 | def __init__(self, _p):
19 | self.proj_p = _p
20 |
21 | @staticmethod
22 | def open_folder(_p):
23 | subprocess.run(['open', _p])
24 |
25 | @staticmethod
26 | def open_file(_f):
27 | subprocess.run(['open', _f])
28 |
29 | def env_init(self):
30 | # 1. 更换字体
31 | self.font_replace()
32 | # 2. requirements.txt去掉window相关的包
33 | self.handle_requirements()
34 |
35 | def font_replace(self):
36 | def _repl(content):
37 | """下载的字体用绝对路径时可以用以下注释了的替换方法"""
38 | # font_path = "/Users/Shared/.../xxx.ttc"
39 | # if "QFontDatabase" not in content:
40 | # content = ("from PyQt5.QtGui import QFontDatabase\n"
41 | # f"font_path = '{font_path}'\n"
42 | # f"_id = QFontDatabase.addApplicationFont(font_path)\n") + content
43 | # new_content = re.sub(r'font = .*?\n.*?font\.setFamily\(".*?"\)',
44 | # f'font = QFontDatabase.font("{font}", "Regular", 11)', content, re.M)
45 | new_content = re.sub(r'font\.setFamily\(".*?"\)', f'font.setFamily("{font}")', content)
46 | return new_content
47 | uic_p = self.proj_p.joinpath("GUI/uic")
48 | for _f in ["conf_dia.py", "browser.py", "ui_mainwindow.py"]:
49 | self.file_content_replace(uic_p.joinpath(_f), _repl)
50 |
51 | def handle_requirements(self):
52 | self.file_content_replace(
53 | self.proj_p.joinpath('requirements.txt'),
54 | lambda content: re.sub(r'^(twisted-iocpsupport==.*|pywin32==.*)[\r\n]*', "", content, flags=re.MULTILINE)
55 | )
56 |
57 | @staticmethod
58 | def file_content_replace(file, repl_func):
59 | with open(file, 'r+', encoding='utf-8') as fp:
60 | content = fp.read()
61 | new_content = repl_func(content)
62 | fp.seek(0)
63 | fp.truncate()
64 | fp.write(new_content)
65 |
--------------------------------------------------------------------------------
/deploy/launcher/mac/dos2unix.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | curr_p=$(cd "$(dirname "$0")";pwd)
3 | app_proj_p="/Applications/CGS.app/Contents/Resources/scripts"
4 | if [ ! -x /usr/local/bin/brew ]; then
5 | echo "not brew, downloading brew...";
6 | /bin/zsh -c "$(curl -fsSL https://gitee.com/cunkai/HomebrewCN/raw/master/Homebrew.sh)";
7 | fi
8 | if [ ! -x /usr/local/bin/dos2unix ]; then
9 | echo "not dos2unix, downloading dos2unix...";
10 | brew install dos2unix;
11 | fi
12 | find $curr_p/../ -type f -name "*.bash" -exec sudo dos2unix {} +;
13 | find $curr_p/../ -type f -name "*.md" -exec sudo dos2unix {} +;
14 | find $curr_p/../ -type f -name "*.py" -exec sudo dos2unix {} +;
15 | find $curr_p/../ -type f -name "*.json" -exec sudo dos2unix {} +;
16 | find $curr_p/../ -type f -name "*.yml" -exec sudo dos2unix {} +;
17 | find $app_proj_p -type f -name "*.bash" -exec sudo dos2unix {} +;
18 | find $app_proj_p -type f -name "*.md" -exec sudo dos2unix {} +;
19 | find $app_proj_p -type f -name "*.py" -exec sudo dos2unix {} +;
20 | find $app_proj_p -type f -name "*.json" -exec sudo dos2unix {} +;
21 | find $app_proj_p -type f -name "*.yml" -exec sudo dos2unix {} +;
22 |
--------------------------------------------------------------------------------
/deploy/launcher/mac/init.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # 将终端窗口置于最前
3 | osascript -e 'tell application "Terminal" to activate' -e 'tell application "System Events" to tell process "Terminal" to set frontmost to true'
4 |
5 | curr_p=$(cd "$(dirname "$0")";pwd);
6 | cd $curr_p/../../../;
7 | REQUIREMENTS="requirements/mac_x86_64.txt"
8 |
9 | # 检测是否为 Apple Silicon
10 | if [ "$(uname -m)" = "arm64" ]; then
11 | REQUIREMENTS="requirements/mac_arm64.txt"
12 | # 检测 Rosetta 2 是否已安装
13 | if ! arch -x86_64 echo > /dev/null 2>&1; then
14 | echo "检测到 Apple Silicon Mac,但未安装 Rosetta 2,正在安装..."
15 | /usr/sbin/softwareupdate --install-rosetta --agree-to-license
16 | fi
17 | fi
18 |
19 | PYTHON_PATH="/usr/local/bin/python3.12"
20 | # 确保安装的是 x86_64 版本的 Python
21 | if [ ! -x "$PYTHON_PATH" ]; then
22 | echo "无python3.12环境,正在初始化...";
23 | # 检测 Homebrew 安装路径
24 | if [ -x "/opt/homebrew/bin/brew" ]; then
25 | ARM_BREW_PATH="/opt/homebrew/bin/brew"
26 | fi
27 | if [ -x "/usr/local/bin/brew" ]; then
28 | INTEL_BREW_PATH="/usr/local/bin/brew"
29 | fi
30 | # 如果没有安装 Homebrew,则安装它
31 | if [ ! -x "$INTEL_BREW_PATH" ] && [ ! -x "$ARM_BREW_PATH" ]; then
32 | echo "未检测到 Homebrew,正在安装..."
33 | /bin/zsh -c "$(curl -fsSL https://gitee.com/cunkai/HomebrewCN/raw/master/Homebrew.sh)";
34 | fi
35 | # 在 Apple Silicon Mac 上,通过 Rosetta 2 安装 x86_64 版的 Python
36 | if [ "$(uname -m)" = "arm64" ]; then
37 | if [ -x "$INTEL_BREW_PATH" ]; then
38 | echo "使用 Intel Homebrew 安装 Python..."
39 | "$INTEL_BREW_PATH" install python@3.12
40 | "$INTEL_BREW_PATH" link python@3.12
41 | else
42 | echo "通过 Rosetta 2 安装 Intel 版本的 Python..."
43 | arch -x86_64 /bin/zsh -c "$(curl -fsSL https://gitee.com/cunkai/HomebrewCN/raw/master/Homebrew.sh)"
44 | arch -x86_64 /usr/local/bin/brew install python@3.12
45 | arch -x86_64 /usr/local/bin/brew link python@3.12
46 | fi
47 | else
48 | # 在 Intel Mac 上,直接安装
49 | if [ -x "$INTEL_BREW_PATH" ]; then
50 | "$INTEL_BREW_PATH" install python@3.12
51 | "$INTEL_BREW_PATH" link python@3.12
52 | fi
53 | fi
54 | fi
55 |
56 | "$PYTHON_PATH" deploy/__init__.py;
57 | echo "正在安装依赖(自动过滤macOS不兼容包)..."
58 | cat "$REQUIREMENTS" | grep -vE 'pywin32==|twisted-iocpsupport==' | "$PYTHON_PATH" -m pip install -r /dev/stdin \
59 | -i http://mirrors.aliyun.com/pypi/simple/ \
60 | --trusted-host mirrors.aliyun.com \
61 | --user \
62 | --break-system-packages;
63 |
64 | echo ""
65 | echo "===== 初始化完毕,请手动关闭终端窗口 ====="
66 | echo ""
--------------------------------------------------------------------------------
/deploy/online_scripts/win.ps1:
--------------------------------------------------------------------------------
1 | $IsPwsh = $PSVersionTable.PSEdition -eq "Core"
2 |
3 | $proj_p = Get-Location
4 |
5 | $python_exe = Join-Path $proj_p "runtime/python.exe"
6 | if (-not (Test-Path $python_exe)) {Write-Output "runtime/python.exe not found, need excute on unzipped path/请在解压的根目录下执行";pause;exit}
7 |
8 | $locale = if ($IsPwsh) { (Get-Culture).Name } else { [System.Threading.Thread]::CurrentThread.CurrentUICulture.Name }
9 |
10 | $targetUrl = if ($locale -eq "zh-CN") {"https://gitproxy.click/https://raw.githubusercontent.com/jasoneri/ComicGUISpider/refs/heads/GUI/deploy/pkg_mgr.py"} else {"https://raw.githubusercontent.com/jasoneri/ComicGUISpider/refs/heads/GUI/deploy/pkg_mgr.py"}
11 |
12 | $pyPath = "$proj_p\pkg_mgr.py"
13 | try {
14 | if ($PSVersionTable.PSVersion.Major -le 5) {
15 | Add-Type @"
16 | using System.Net;
17 | using System.Security.Cryptography.X509Certificates;
18 | public class TrustAllCertsPolicy : ICertificatePolicy {
19 | public bool CheckValidationResult(ServicePoint s, X509Certificate c, WebRequest r, int e) { return true; }
20 | }
21 | "@
22 | [Net.ServicePointManager]::CertificatePolicy = [TrustAllCertsPolicy]::new()
23 | }
24 | Invoke-WebRequest -Uri $targetUrl -OutFile $pyPath -UseBasicParsing
25 | }
26 | catch {
27 | Write-Output "install pkg_mgr.py failed/下载pkg_mgr.py失败";pause;exit
28 | }
29 | & "$python_exe" "$pyPath" -l $locale
30 | Remove-Item -Path $pyPath -Force
--------------------------------------------------------------------------------
/deploy/pkg_mgr.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import importlib
4 | import platform
5 | import subprocess
6 | import pathlib
7 | import pip
8 |
9 | import httpx
10 | import tqdm
11 | from loguru import logger
12 |
13 | p = pathlib.Path(__file__).parent
14 | def which_env():
15 | os_type = platform.system()
16 | if os_type == "Darwin":
17 | # 判断Mac架构
18 | arch = platform.machine()
19 | if arch == "arm64":
20 | return "mac_arm64"
21 | elif arch == "x86_64":
22 | return "mac_x86_64"
23 | else:
24 | return "mac_x86_64"
25 | else:
26 | return "win"
27 |
28 |
29 | class PkgMgr:
30 | def __init__(self, locale="zh-CN", run_path=None, debug_signal=None):
31 | self.cli = httpx.Client()
32 | self.locale = locale
33 | self.run_path = run_path or p
34 | self.debug_signal = debug_signal
35 | if self.run_path.joinpath("scripts/CGS.py").exists():
36 | self.proj_p = self.run_path.joinpath("scripts")
37 | elif self.run_path.joinpath("CGS.py").exists():
38 | self.proj_p = self.run_path
39 | else:
40 | raise FileNotFoundError(f"CGS.py not found, unsure env. check your run path > [{self.run_path}].")
41 | self.env = which_env()
42 | self.set_assets()
43 |
44 | def github_speed(self, url):
45 | if self.locale == "zh-CN":
46 | url = "https://gitproxy.click/" + url
47 | return url
48 |
49 | def set_assets(self):
50 | requirements = f"requirements/{self.env}.txt"
51 | self.requirements_url = self.github_speed(f"https://raw.githubusercontent.com/jasoneri/ComicGUISpider/refs/heads/GUI/{requirements}")
52 | self.requirements = self.proj_p.joinpath(requirements)
53 |
54 | def print(self, *args, **kwargs):
55 | if self.debug_signal:
56 | self.debug_signal.emit(*args, **kwargs)
57 | print(*args, **kwargs)
58 |
59 | def dl(self):
60 | def _dl(url, out):
61 | with self.cli.stream("GET", url) as r:
62 | with open(out, "wb") as f:
63 | for chunk in tqdm.tqdm(r.iter_bytes(1000), desc=f"downloading {out.name}"):
64 | f.write(chunk)
65 | self.print(f"[downloaded] {out.name}")
66 |
67 | def _dl_uv():
68 | cmd = ["install", "uv"]
69 | if self.locale == "zh-CN":
70 | cmd.extend(["-i", "https://pypi.tuna.tsinghua.edu.cn/simple"])
71 | exitcode = pip.main(cmd)
72 | self.print(f"[pip install uv exitcode] {exitcode}")
73 |
74 | _dl_uv()
75 | _dl(self.requirements_url, self.requirements)
76 |
77 | def uv_install_pkgs(self):
78 | self.print("uv pip installing pkg...")
79 | uv = importlib.import_module("uv")
80 | cmd = [uv.find_uv_bin(), "pip", "install", "-r", str(self.requirements), "--python", sys.executable]
81 | if self.locale == "zh-CN":
82 | cmd.extend(["--index-url", "http://mirrors.aliyun.com/pypi/simple/", "--trusted-host", "mirrors.aliyun.com"])
83 | self.print("[uv_install_pkgs cmd]" + " ".join(cmd))
84 | process = subprocess.Popen(
85 | cmd, cwd=self.run_path,
86 | stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
87 | text=True, bufsize=1, universal_newlines=True
88 | )
89 | full_output = []
90 | while True:
91 | line = process.stdout.readline()
92 | if not line:
93 | if process.poll() is not None:
94 | break # 进程结束且无输出时退出
95 | continue
96 | line = line.strip()
97 | full_output.append(line)
98 | # 实时发送信号
99 | self.print(line)
100 | # 读取剩余输出
101 | remaining = process.stdout.read()
102 | if remaining:
103 | for line in remaining.splitlines():
104 | cleaned_line = line.strip()
105 | full_output.append(cleaned_line)
106 | self.print(cleaned_line)
107 | # 等待进程结束
108 | exit_code = process.wait()
109 | if exit_code == 0:
110 | self.print("[!uv_install_pkgs done!]")
111 | return exit_code, full_output
112 |
113 | @logger.catch(reraise=True)
114 | def run(self):
115 | self.dl()
116 | exit_code, full_output = self.uv_install_pkgs()
117 | return exit_code, full_output
118 |
119 |
120 | if __name__ == "__main__":
121 | parser = argparse.ArgumentParser()
122 | parser.add_argument("-l", "--locale", default="zh-CN", help="locale")
123 | args = parser.parse_args()
124 | pkg_mgr = PkgMgr(args.locale)
125 | # pkg_mgr = PkgMgr("zh-CN")
126 | pkg_mgr.run()
127 |
--------------------------------------------------------------------------------
/docs/.vitepress/theme/Layout.vue:
--------------------------------------------------------------------------------
1 |
14 |
15 |
16 |
17 |
18 |
19 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/docs/.vitepress/theme/index.ts:
--------------------------------------------------------------------------------
1 | import DefaultTheme from 'vitepress/theme'
2 | import Layout from './Layout.vue'
3 | import './style.css'
4 |
5 | export default {
6 | extends: DefaultTheme,
7 | Layout
8 | }
--------------------------------------------------------------------------------
/docs/.vitepress/theme/style.css:
--------------------------------------------------------------------------------
1 | /**
2 | * Customize default theme styling by overriding CSS variables:
3 | * https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css
4 | */
5 |
6 | /**
7 | * Component: Home
8 | * -------------------------------------------------------------------------- */
9 |
10 | :root {
11 | --vp-home-hero-name-color: transparent;
12 | --vp-home-hero-name-background: -webkit-linear-gradient(
13 | 120deg,
14 | #eea320f4 60%,
15 | #441bd900
16 | );
17 |
18 | --vp-home-hero-image-background-image: -webkit-linear-gradient(
19 | 125deg,
20 | #f4c03bd5 40%,
21 | #fd3e94de 80%
22 | );
23 | --vp-home-hero-image-filter: blur(40px);
24 | }
25 |
--------------------------------------------------------------------------------
/docs/_github/README_en.md:
--------------------------------------------------------------------------------
1 |
20 |
21 | ▼ Demo ▼
22 |
23 | | Preview / Multi-select / Paging | Clipboard Tasks |
24 | |:-------------------------------------------------------------------------------:|:----------------------------------------------------------------------------:|
25 | |  |  |
26 |
27 | ## 📑 Introduction
28 |
29 | ### Supported Websites
30 |
31 | | Website | locale | Notes | status (UTC+8) |
32 | |:----------------------------------------|:------:|:-----------------------:|:--------------------------------------------------------------------------------------------------------------:|
33 | | [MangaCopy](https://www.copy20.com/) | :cn: | Hidden content unlocked |  |
34 | | [Māngabz](https://mangabz.com) | :cn: | |  |
35 | | [18comic](https://18comic.vip/) | :cn: | 🔞 |  |
36 | | [wnacg](https://www.wnacg.com/) | :cn: | 🔞 |  |
37 | | [ExHentai](https://exhentai.org/) | 🌏 | 🔞 |  |
38 | | [Hitomi](https://hitomi.la/) | 🌏 | 🔞 need v2.2.0-beta | |
39 | | [Kemono](https://kemono.su) | 🌏 | 🔞/[📒Usage](https://jasoneri.github.io/ComicGUISpider/feature/script) need [v2.2.0-beta.2](https://github.com/jasoneri/ComicGUISpider/releases/tag/v2.2.0-beta.2) | |
40 |
41 |
42 |
43 | ## 📜Contributing
44 |
45 | now support simple `en-US` of Ui, but still need help for i18n of maintenance, such as Documentation
46 |
47 | Come here [🌏i18n Guide](../dev/i18n.md)
48 |
49 |
50 |
51 | ## 📢 Changelog
52 |
53 | Left-bottom of the config-dialog has `Check Update` button, please update according to the prompt
54 |
55 | > [🕑Full History](docs/UPDATE_RECORD.md)
56 |
57 | ## 🚀 Usage
58 |
59 | ### GUI
60 |
61 | `python CGS.py`
62 |
63 | ### CLI
64 |
65 | `python crawl_only.py --help`
66 | Or using env of portable package:
67 | `.\runtime\python.exe .\scripts\crawl_only.py --help`
68 |
69 | ## 🔨 Configuration
70 |
71 | [🔨Configuration](https://jasoneri.github.io/ComicGUISpider/locate/en/config)
72 |
73 | ## 🔇 Disclaimer
74 |
75 | See [License](LICENSE). By using this project you agree to:
76 |
77 | - Non-commercial use only
78 | - Developer's final interpretation
79 |
80 | ---
81 | 
82 |
--------------------------------------------------------------------------------
/docs/_github/preset_preview.md:
--------------------------------------------------------------------------------
1 |
2 | ---
3 |
4 | 
5 |
6 | [🚀快速开始(❗️新用户必读)](https://jasoneri.github.io/ComicGUISpider/deploy/quick-start) | [❓FAQ](https://jasoneri.github.io/ComicGUISpider/faq) | [⚡️github资源下载加速](https://github.akams.cn/)
7 |
--------------------------------------------------------------------------------
/docs/_github/preset_stable.md:
--------------------------------------------------------------------------------
1 |
2 | ---
3 |
4 | [🚀快速开始(❗️新用户必读)](https://jasoneri.github.io/ComicGUISpider/deploy/quick-start) | [❓FAQ](https://jasoneri.github.io/ComicGUISpider/faq) | [⚡️github资源下载加速](https://github.akams.cn/)
5 |
--------------------------------------------------------------------------------
/docs/_github/release_notes.md:
--------------------------------------------------------------------------------
1 |
2 | ## 🎁儿童节
3 |
4 | ## 🐞 Fix
5 |
6 | ✅ 拷x恢复 🙊
7 | ✅ jm 发布页( WinError 10054 )问题处理详情看软件内 jm 提示,同时本地缓存统一改为48小时,🔜[相关参考指引](https://jasoneri.github.io/ComicGUISpider/faq/extra.html#_2-%E5%9F%9F%E5%90%8D%E7%9B%B8%E5%85%B3)
8 |
--------------------------------------------------------------------------------
/docs/assets/img/config/conf_usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/config/conf_usage.png
--------------------------------------------------------------------------------
/docs/assets/img/config/conf_usage_en.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/config/conf_usage_en.png
--------------------------------------------------------------------------------
/docs/assets/img/deploy/mac-app-move.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/deploy/mac-app-move.jpg
--------------------------------------------------------------------------------
/docs/assets/img/dev/branch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/dev/branch.png
--------------------------------------------------------------------------------
/docs/assets/img/faq/ditto_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/faq/ditto_settings.png
--------------------------------------------------------------------------------
/docs/assets/img/feature/browser_copyBtn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/feature/browser_copyBtn.png
--------------------------------------------------------------------------------
/docs/assets/img/icons/website/copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/icons/website/copy.png
--------------------------------------------------------------------------------
/docs/assets/img/icons/website/ehentai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/icons/website/ehentai.png
--------------------------------------------------------------------------------
/docs/assets/img/icons/website/hitomi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/icons/website/hitomi.png
--------------------------------------------------------------------------------
/docs/assets/img/icons/website/jm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/icons/website/jm.png
--------------------------------------------------------------------------------
/docs/assets/img/icons/website/mangabz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/icons/website/mangabz.png
--------------------------------------------------------------------------------
/docs/assets/img/icons/website/wnacg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/assets/img/icons/website/wnacg.png
--------------------------------------------------------------------------------
/docs/changelog/history.md:
--------------------------------------------------------------------------------
1 | # 🕑 更新历史
2 |
3 | > [!Info] 此页面会忽略修复动作相关的记录,含引导意义的条目除外
4 |
5 | #### v2.2.0 | ~ 2025-05-20
6 |
7 | + hitomi 支持(部分)
8 | + Kemono 脚本集更新(下载引擎使用强大的 `Motrix-PRC`)
9 | + 页数命名优化:更改为纯数字补零命名,附带可选 [文件命名后缀修改](https://jasoneri.github.io/ComicGUISpider/config/#其他-yml-字段)
10 | + i18n 自动编译优化
11 | + 使用 astral-sh/uv 管理依赖
12 |
13 | #### v2.1.3 | ~ 2025-04-19
14 |
15 | + 支持 i18n
16 | + 增加贡献指南等,文档优化,并建成 github-pages 做官网
17 |
18 | ### v2.1.2 | ~ 2025-04-12
19 |
20 | + 更换看板娘
21 | + 版面增设各网站运行状态
22 |
23 | ### v2.1.0 | ~ 2025-03-29
24 |
25 | + 为预览窗口各封面右上增设badge
26 | + 将`requirements.txt`分别以`win`,`mac_x86_64`,`mac_arm64`编译
27 |
28 | ### v2.0.0 | ~ 2025-03-21
29 |
30 | + `使用说明`与`更新`在`v2.0.0`后将设置在配置窗口的左下按钮,绿色包可执行程序只保留主程序(macOS加个初始化.app)
31 | + 优化更新流程,贴近主流软件体验
32 | + ✨使用`QFluentWidgets`优化界面与操作体验
33 | + 搜索框右键选项`展开预设`, 序号输入框也有
34 | + 预览窗口改造了右键菜单,增设翻页进去菜单项,附带有`CGS`内的全局快捷键
35 | + 正确处理小数位级系统缩放,去掉`同步系统缩放`也有良好界面体验
36 | (操作参考[`v1.6.3`删代码部分](#v1-6-3-2025-02-13),后续若有反响则做成开关之类提供切换)
37 |
38 | ### v1.8.2 | ~ 2025-03-08
39 |
40 | + ✨预览窗口新增`复制`未完成任务按钮,配合剪贴板功能功能的流程,常用于进度卡死不动重下或补漏页
41 |
42 | ### v1.7.5 | ~ 2025-03-01
43 |
44 | + 序号输入扩展:输入框支持单个负数,例`-3`表示选择倒数三个
45 |
46 | ### v1.7.2 | ~ 2025-02-24
47 |
48 | + ✨新增`增加标识`开关勾选,为储存目录最后加上网站url上的作品id
49 | + ✨细化任务:预览窗口的`子任务进度`视图
50 | + 处理拷贝的隐藏漫画
51 | + 修正往后jm全程不走代理(如有jm需要走代理的场景请告知开发者)
52 |
53 | ### v1.6.3 | ~ 2025-02-13
54 |
55 | + ✨配置窗口新增`去重`勾选开关:分别有预览提示样式和自动过滤
56 | + ✨增加命令行工具(crawl_only.py)使用
57 | + 优化高分辨率(原开发环境为1080p);若显示不理想可桌面右键显示设置缩放改为100%,或在[`CGS.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI/CGS.py)中删除带`setAttribute(Qt.AA_` 的两行代码
58 |
59 | ### v1.6.2 | ~ 2024-12-08
60 |
61 | + ✨增加域名缓存机制(针对jm/wnacg发布页访问错误),每12小时才刷新可用域名,缓存文件为`scripts/__temp/xxx_domain.txt`,可删可改
62 | + 处理部分用户环境无法显示ui图标相关资源问题(如对比动图/视频仍有ui图标没显示,请反馈)
63 |
64 | ### v1.6.1 | ~ 2024-11-23
65 | + ✨新增读剪切板匹配生成任务功能
66 |
67 | ### v1.6.0 | ~ 2024-09-30
68 | + 🌐支持`Māngabz`
69 | + ✨支持`macOS`
70 | + 🌐支持`exhentai`
71 | + [`exhentai`]优化e绅士标题取名,优先使用副标题的中/日文作为目录名
72 | + ✨新增翻页功能
73 | + 翻页时保留选择状态
74 | + ✨新增预览功能
75 | > [!Info] 内置小型浏览器,无需打开电脑浏览器,视频3有介绍各种用法
76 |
77 | ### v1.5 | 上世纪 ~ 2024-08-05
78 | + ✨发布相关
79 | > [!Info] 发布开箱即用版,GUI视频使用指南
80 |
81 | + ✨脚本集说明(kemono,saucenao)
82 | + 新增`nekohouse`
83 | + 🌐支持`jm(禁漫)`
84 | + 支持车号输入
85 | + 🌐支持`拷贝漫画`
86 | + 在配置设了代理后能解锁部分漫画章节
87 | + 处理章节数量大于300
88 | + 🌐支持`wnacg`
89 |
--------------------------------------------------------------------------------
/docs/config/index.md:
--------------------------------------------------------------------------------
1 | # 🔨 主配置
2 |
3 | 
4 |
5 | ::: info 配置文件为初始使用后产生的 `scripts/conf.yml`
6 | 有关生效时间节点请查阅 [📒配置生效相关](../faq/extra.md#_1-配置生效相关)
7 | :::
8 | ::: warning 多行的编辑框输入为 `yaml` 格式(除了 eh_cookies ),冒号后要加一个⚠️ `空格` ⚠️
9 | :::
10 |
11 | ## 配置项 / 对应 `yml` 字段
12 |
13 | ### 存储路径 / `sv_path`
14 |
15 | 下载目录
16 | 目录结构里还有个 `web` 文件夹的情况是因为默认关联 [`redViewer`](https://github.com/jasoneri/redViewer) 项目所以这样设置的
17 |
18 | ### 日志等级 / `log_level`
19 |
20 | 后台运行过后会有 log 目录,GUI 与 后台 同级,报错时 GUI 会进行操作指引
21 |
22 | ### 去重 / `isDeduplicate`
23 |
24 | 勾选状态下,预览窗口会有已下载的样式提示
25 | 同时下载也会自动过滤已存在的记录
26 | > [!Info] 当前仅🔞网适用
27 |
28 | ### 增加标识 / `addUuid`
29 |
30 | 存储时目录最后增加标识,用以处理同一命名的不同作品等([对应逻辑](../faq/other.md#_1-去重,增加标识相关说明))
31 |
32 | ### 代理 / `proxies`
33 |
34 | 翻墙用
35 | > [!Warning] ⚠️ 已设置 jm 无论用全局还是怎样都只走本地原生ip
36 |
37 | > [!Info] 建议使用代理模式在此配置代理,而非全局代理模式,不然访问图源会吃走大量代理的流量
38 |
39 | ### 映射 / `custom_map`
40 |
41 | 搜索输入映射
42 | 当搜索与预设不满足使用时,先在此加入键值对,重启后在搜索框输入自定义键就会将对应网址结果输出,`🎥视频使用指南3`有介绍用法
43 |
44 | 1. 映射无需理会域名,前提是用在当前网站,只要满足 `不用映射时能访问` 和 `填入的不是无效的url`,
45 | 程序会内置替换成可用的域名,如非代理下映射的`wnacg.com`会自动被替换掉
46 | 2. 注意自制的映射有可能超出翻页规则范围,此时可通知开发者进行扩展
47 |
48 | ### 预设 / `completer`
49 |
50 | 自定义预设
51 | 鼠标悬停在输入框会有`序号对应网站`的提示(其实就是选择框的序号)
52 | `🎥视频使用指南3`有介绍用法
53 |
54 | ### eh_cookies / `eh_cookies`
55 |
56 | 使用`exhentai`时必需
57 | [🎬获取方法](https://jsd.vxo.im/gh/jasoneri/imgur@main/CGS/ehentai_get_cookies_new.gif)
58 | [🔗动图中的curl转换网站](https://tool.lu/curl/)
59 |
60 | ### 剪贴板db / `clip_db`
61 |
62 | ::: tip 前提:已阅 [`读剪贴板`功能说明](../feature/index#_4-1-读剪贴板)
63 | :::
64 |
65 | 读取剪贴板功能无法使用时可查看路径是否存在,通过以下查得正确路径后在此更改
66 |
67 | 1. ditto(win): 打开选项 → 数据库路径
68 | 2. maccy(macOS): [issue 搜索相关得知](https://github.com/p0deje/Maccy/issues/271)
69 |
70 | ### 读取条数 / `clip_read_num`
71 |
72 | 读取剪贴板软件条目数量
73 |
74 | ## 其他 `yml` 字段
75 |
76 | ::: info 此类字段没提供配置窗口便捷修改(或以后支持),不设时使用默认值
77 | :::
78 |
79 | ### `img_sv_type`
80 |
81 | 默认值: `jpg`
82 | 图片文件命名后缀
83 |
--------------------------------------------------------------------------------
/docs/config/other.md:
--------------------------------------------------------------------------------
1 | # 🔧 其他配置
2 |
3 | ## 1. 预览视窗的复制按钮相关
4 |
5 | 需要更改剪贴板软件的设置令功能得以使用正常
6 |
7 | ### [win] ditto
8 |
9 | 进ditto选项,点高级进页面后,查找图示的两个值将其改为150
10 |
11 | 
12 |
13 | ### [macOS] maccy
14 |
15 | 看了一圈Maccy设置没得改,所以用测试过能正常复制的最低延迟,300ms * 复制条数
16 |
--------------------------------------------------------------------------------
/docs/deploy/mac-required-reading.md:
--------------------------------------------------------------------------------
1 | # 💻 macOS( mac 操作系统) 部署
2 |
3 | > [!Info] WantHelp!
4 | > 寻找 `mac—arm64` 开发者维护 `mac` 应用(本渣配置台式开始跑不动 `mac` 虚拟机了) [查看详情](
5 | https://github.com/jasoneri/ComicGUISpider/issues/35)
6 |
7 | ## 🚩 前置架构相关
8 |
9 | 通过以下命令查看架构(一般英特尔芯片i系的即为`x86_64`, 苹果芯片m系的为`arm64`)
10 |
11 | ```bash
12 | python -c "import platform; print(platform.machine())"
13 | ```
14 |
15 | 1. `x86_64` 架构: 开发者虚拟机就是该架构,一般按下面流程走即可
16 | 2. `arm64` 架构: CGS-init.app 会自动安装`Rosetta 2`,下文中有列出一些[应对`CGS.app`无法打开](#针对弹窗报错的尝试)的处理方案
17 |
18 | ## 📑 绿色包说明
19 |
20 | macOS 仅需下载 `CGS-macOS`压缩包
21 |
22 | ::: details 解压后目录树(点击展开)
23 |
24 | ```text
25 | CGS-macOS
26 | ├── CGS.app # 既是 *主程序*,也可以当成代码目录文件夹打开,执行脚本 `scripts/deploy/launcher/mac/CGS.bash`
27 | | ├── Contents
28 | | ├── Resources
29 | | ├── scripts # 真实项目代码目录
30 | ├── CGS-init.app # 执行脚本 `scripts/deploy/launcher/mac/init.bash`
31 | └── CGS_macOS_first_guide.html # 用作刚解压时提供指引的一次性使用说明
32 | ```
33 |
34 | :::
35 | ::: warning macOS由于认证签名收费,app初次打开会有限制,正确操作如下
36 |
37 | 1. 对任一app右键打开,报错不要丢垃圾篓,直接取消
38 | 2. 再对同一app右键打开,此时弹出窗口有打开选项,能打开了
39 | 3. 后续就能双击打开,不用右键打开了
40 | :::
41 |
42 | ## ⛵️ 操作
43 |
44 | ::: warning 所有文档中包含`scripts`目录的
45 | 包括此mac部署说明,主说明README,release页面,issue的等等等等,
46 | 在app移至应用程序后的绝对路径皆指为`/Applications/CGS.app/Contents/Resources/scripts`
47 | :::
48 |
49 | ::: warning 以下初始化步骤严格按序执行
50 | :::
51 |
52 | | 初次化步骤 | 解析说明 |
53 | |:------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
54 | | 1 | 每次解压后,将`CGS.app`移至应用程序 |
55 | | 1.5 | (可选,需要在第2步前进行)由于macOS没微软雅黑字体,默认替换成`冬青黑体简体中文` 不清楚是否每种macOS必有,留了后门替换,在 `scripts/deploy/launcher/mac/__init__.py` 的`font`值,有注释说明 |
56 | | 2 | 每次解压后,必须运行`CGS-init.app`检测/安装环境, ⚠️ _**注意新打开的终端窗口并根据提示操作**_ ⚠️(对应第1.5步改字体可以反复执行第2步) |
57 |
58 | ## 🔰 其他
59 |
60 | ### 针对弹窗报错的尝试
61 |
62 | ```bash
63 | # arm64 CGS.app显示损坏无法打开时,尝试绕过签名
64 | sudo xattr -d com.apple.quarantine /Applications/CGS.app
65 | # 或
66 | sudo xattr -r -d com.apple.quarantine /Applications/CGS.app
67 |
68 | # 或直接运行
69 | /opt/homebrew/bin/python3.12 /Applications/CGS.app/Contents/Resources/scripts/CGS.py
70 | # 或
71 | /usr/local/bin/python3.12 /Applications/CGS.app/Contents/Resources/scripts/CGS.py
72 | ```
73 |
74 | ::: tip 还是失败无果的情况下可先自行deepseek等寻找方法或群内反馈
75 | 除上述命令外的成功命令示例请在下方评论区留言,造福后人
76 | 格式:1.报错信息;2.解决方案;3.结果
77 | :::
78 |
79 | ### 更新相关
80 |
81 | ::: warning 配置文件/去重记录均存放在`scripts`上,注意避免下包直接覆盖导致丢失
82 | :::
83 | 版本如若涉及到 UI/界面变动 相关的,最好运行 `CGS-init.app` 一下以保证字体等设置
84 |
85 | ### bug report / 提交报错 issue
86 |
87 | macOS上运行软件出错需要提issue时,除系统选`macOS`外,还需描述加上系统版本与架构
88 | (开发者测试开发环境为`macOS Sonoma(14) / x86_64`)
89 |
--------------------------------------------------------------------------------
/docs/deploy/quick-start.md:
--------------------------------------------------------------------------------
1 | # 🚀 快速开始
2 |
3 | ## 1. 下载 / 部署
4 |
5 | + 直接下载 [📦绿色包](https://github.com/jasoneri/ComicGUISpider/releases/latest),并解压
6 |
7 | ::: warning 解压路径不能含有中文/中标
8 | :::
9 | ::: warning macOS用户
10 | 须阅读 [macOS 部署](./mac-required-reading.md) 文档
11 | :::
12 |
13 | + 或克隆此项目 `git clone https://github.com/jasoneri/ComicGUISpider.git`
14 | ::: tip 需安装
15 | + `python3.12+`
16 | + 安装 [astral-sh/uv](https://github.com/astral-sh/uv)(然后抛弃 pip ,让 uv 管理依赖)
17 | ``` bash
18 | python -m pip install uv -i http://mirrors.aliyun.com/pypi/simple/
19 | ```
20 | **安装依赖命令示例** (CGS的 `requirements/*.txt` 都是用uv编译的,原生 pip 装你会发现各种麻烦)
21 | ``` bash
22 | python -m uv pip install -r "requirements/win.txt" --index-url http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
23 | ```
24 | :::
25 | ::: warning 使用 git 克隆的话请忽视全篇文档中的 scripts/xxx 的 `scripts`,文档是基于绿色包的说明
26 | :::
27 |
28 | ## 2. 运行
29 |
30 | ### 常规 GUI 运行
31 |
32 | `python CGS.py`
33 | 或使用绿色包程序
34 |
35 | ### 命令行工具
36 |
37 | `python crawl_only.py --help`
38 | 或使用绿色包的环境,在解压目录打开终端执行
39 | `.\runtime\python.exe .\scripts\crawl_only.py --help`
40 |
41 | ::: info 使用方法进help看说明
42 | 当前版本能进行简单下载/调试功能(后续将逐步扩展)
43 | 命令行工具的配置可用GUI方式修改 或 直接修改`scripts/conf.yml`文件
44 | :::
45 |
46 | ## 3. 配置
47 |
48 | 有自定义需求的,参考 [🔨主配置文档](../config/index.md) 进行设置
49 |
50 | ## 4. 更新
51 |
52 | + CGS 内置了更新模块,能在配置窗口中点击 `检查更新` 按钮进行更新
53 | ::: info 当 `本地版本` < `最新稳定版` < `最新开发版` 时
54 | 需更新到`最新稳定版`后,才能更新到`最新开发版`
55 | :::
56 |
57 | + 也可以选择到 releases 手动下载最新版,但需要注意配置等文件不被覆盖丢失
58 | ::: tip 分别是 配置文件 `scripts/conf.yml` 与去重记录 `scripts/record.db`
59 | :::
60 |
61 | ## 5. 搭配阅读器
62 |
63 | 欢迎尝试使用 redViewer ,最适 CGS !也希望能提供有创意的功能想法给 RV !💑
64 |
65 | [](https://github.com/jasoneri/redViewer)
66 |
--------------------------------------------------------------------------------
/docs/dev/contribute.md:
--------------------------------------------------------------------------------
1 | # 📜 贡献指南 Contributing
2 |
3 | 我们欢迎各位 Contributors 参与贡献帮助 ComicGUISpider 更好的解决大家遇到的问题,
4 |
5 | 这篇指南会指导你如何为 ComicGUISpider 贡献功能修复代码,可以在你要提出 Pull Request 之前花几分钟来阅读一遍这篇指南。
6 |
7 | 这篇文章包含什么?
8 |
9 | - [项目规划 Roadmap](#项目规划-roadmap)
10 | - [提案寻求共识 Request for Comments](#提案寻求共识-request-for-comments)
11 | - [分支管理 Git Branch](#分支管理-git-branch)
12 | - [版本号](#版本号)
13 | - [分支开发,主干发布](#分支开发主干发布)
14 | - [Branch 生命周期](#branch-生命周期)
15 | - [Git Workflow 一览](#git-workflow-一览)
16 | - [Pull Request](#pull-request)
17 | - [版本发布介绍](#版本发布介绍)
18 |
19 | ## 项目规划 Roadmap
20 |
21 | ComicGUISpider 开发组使用 [GitHub Project](https://github.com/jasoneri/ComicGUISpider/projects) 看板来管理预计开发的规划、在修复中的问题,以及它们处理的进度;
22 |
23 | 这将帮助你更好的了解
24 | - 开发团队在做什么?
25 | - 有什么和你想贡献的方向一致的,可以直接参与实现与优化
26 | - 有什么已经在进行中的,避免自己重复不必要的工作
27 |
28 | 在 [Project](https://github.com/jasoneri/ComicGUISpider/projects) 中你可以看到除通常的 `[Feat]`, `[BUG]`, 一些小优化项;
29 |
30 | ### 提案寻求共识 Request for Comments
31 |
32 | 对于一些小的优化项或者 bug 修复,你大可以直接帮忙调整代码然后提出 Pull Request,只需要简单阅读下 [分支管理](#分支管理-Git-Branch) 章节以基于正确的版本分支修复、以及通过 [Pull Request](#Pull-Request) 章节了解 PR 将如何被合并。
33 |
34 | 而如果你打算做的是一项**较大的**功能重构,改动范围大而涉及的方面比较多,那么希望你能通过 [Issue: 功能提案](https://github.com/jasoneri/ComicGUISpider/issues/new?assignees=&labels=RFC&projects=&template=rfc.yml&title=%5BRFC%5D%3A+) 先写一份 RFC 提案来简单阐述「你打算怎么做」的简短方案,来寻求开发者的讨论和共识。
35 |
36 | 因为有些方案可能是开发团队原本讨论并且认为不要做的事,而上一步可以避免你浪费大量精力。
37 |
38 | ::: info 如果仅希望讨论是否添加或改进某功能本身,而非「要如何实现」,请使用 -> [Issue: 功能改进](https://github.com/jasoneri/ComicGUISpider/issues/new?labels=feature+request&template=feature_request.yml&title=%5BFeature+Request%5D+)
39 | :::
40 |
41 | 一份 [提案(RFC)](https://github.com/jasoneri/ComicGUISpider/issues?q=is%3Aissue+is%3Aopen+label%3ARFC) 定位为 **「在某功能/重构的具体开发前,用于开发者间 review 技术设计/方案的文档」**,
42 |
43 | 目的是让协作的开发者间清晰的知道「要做什么」和「具体会怎么做」,以及所有的开发者都能公开透明的参与讨论;
44 |
45 | 以便评估和讨论产生的影响 (遗漏的考虑、向后兼容性、与现有功能的冲突),
46 |
47 | 因此提案侧重在对解决问题的 **方案、设计、步骤** 的描述上。
48 |
49 | ## 分支管理 Git Branch
50 |
51 | ### 版本号
52 |
53 | ComicGUISpider 项目中的 Git 分支使用与发布版本规则密切相关,因此先介绍版本规范;
54 |
55 | ComicGUISpider 发布的版本号遵循 [「语义化版本 SemVer」](https://semver.org/lang/zh-CN/) 的规范,
56 |
57 | 使用 `..` 三位版本的格式,每一位版本上的数字更新含义如下:
58 |
59 | - **Major**: 大版本更新,很可能有不兼容的 配置/API 修改
60 | - **Minor**: 向下兼容的功能性新增
61 | - **Patch**: 向下兼容的 Bug 修复 / 小优化修正
62 |
63 | ### 分支开发,主干发布
64 |
65 | ComicGUISpider 项目使用「分支开发,主干发布」的模式,
66 |
67 | [**`GUI`**](https://github.com/jasoneri/ComicGUISpider/commits/GUI) 分支是稳定版本的 **「主干分支」**,只用于修改版本号/打 tag 发版,不用于直接开发新功能或修复。
68 |
69 | 每一个 Minor 版本都有一个对应的 **「开发分支」** 用于开发新功能、与发布后维护修复问题,
70 |
71 | 开发分支的名字为 `.-dev`,如 `2.x-dev`, 你可以在仓库的 [All Branches 中搜索到它们](https://github.com/jasoneri/ComicGUISpider/branches/all?query=-dev)。
72 |
73 | ### Branch 生命周期
74 |
75 | 当一个 Minor 开发分支(以 `2.1-dev` 为例) 完成新功能开发,**首次**合入 GUI 分支后,
76 | - 发布 Minor 版本 (如 `2.1.0`)
77 | - 同时拉出**下一个** Minor 开发分支(`2.2-dev`),用于下一个版本新功能开发
78 | - 而**上一个**版本开发分支(`2.0-dev`)进入归档不再维护
79 | - 且这个 Minor 分支(`2.1-dev`)进入维护阶段,不再增加新功能/重构,只维护 Bugs 修复
80 | - Bug 修复到维护阶段的 Minor 分支(`2.1-dev`)后,会再往 GUI 分支合并,并发布 `Patch` 版本
81 |
82 | 根据这个流程,对于各位 Contributors 在开发贡献时选择 Git Branch 来说,则是:
83 | - 若「修复 Bug」,则基于**当前发布版本**的 Minor 分支开发修复,并 PR 到这个分支
84 | - 若「添加新功能/重构」,则基于**还未发布的下一个版本** Minor 分支开发,并 PR 到这个分支
85 |
86 | ::: info 「当前发布版本」为 [[Releases 页面]](https://github.com/jasoneri/ComicGUISpider/releases) 最新版本
87 | :::
88 |
89 | ### Git Workflow 一览
90 |
91 | > [!Info] 图中 commit timeline 从左到右 --->
92 |
93 | 
94 |
95 | ## Pull Request
96 |
97 | 请确保你根据上文的 Git 分支管理 章节选择了正确的 PR 目标分支,
98 |
99 | > [!Info] 若「修复 Bug」,则 PR 到**当前发布版本**的 Minor 维护分支
100 |
101 | > [!Info] 若「添加新功能/重构」,则 PR **下一个版本** Minor 开发分支
102 |
103 | - 一个 PR 应该只对应一件事,而不应引入不相关的更改;
104 |
105 | 对于不同的事情可以拆分提多个 PR,这能帮助开发组每次 review 只专注一个问题。
106 |
107 | - 在提 PR 的标题与描述中,最好对修改内容做简短的说明,包括原因和意图,
108 |
109 | 如果有相关的 issue 或 RFC,应该把它们链接到 PR 描述中,
110 |
111 | 这将帮助开发组 code review 时能最快了解上下文。
112 |
113 | - 确保勾选了「允许维护者编辑」(`Allow edits from maintainers`) 选项。这使我们可以直接进行较小的编辑/重构并节省大量时间。
114 |
115 | - 请确保本地通过了「单元测试」和「代码风格 Lint」,这也会在 PR 的 GitHub CI 上检查
116 | - 对于 bug fix 和新功能,通常开发组也会请求你添加对应改动的单元测试覆盖
117 |
118 | 开发组会在有时间的最快阶段 Review 贡献者提的 PR 并讨论或批准合并(Approve Merge)。
119 |
120 | ## 版本发布介绍
121 |
122 | 版本发布目前由开发组通过合并「PR」后,GUI 分支上修改版本号然后打 tag 自动触发打包与发布。
123 |
124 | 通常 Bug 修复的 PR 合并后会很快发版,通常不到一周;
125 |
126 | 而新功能的发版时间则会更长而且不定,你可以在我们的 [GitHub Project](https://github.com/jasoneri/ComicGUISpider/projects?query=is%3Aopen) 看板中看到开发进度,一个版本规划的新功能都开发完备后就会发版。
127 |
128 | ## 贡献文档
129 |
130 | 如果要为文档做贡献,请注意以下几点:
131 |
132 | - 文档皆存放在 docs 目录上,仅限 markdown
133 | - 需基于**当前发布版本**的 Minor 维护分支进行修改,并 PR 到这个分支
134 | - 请确保你的 PR 标题和描述中包含了你的修改的目的和意图
135 |
136 | 撰写文档请使用规范的书面化用语,遵照 Markdown 语法,以及 [中文文案排版指北](https://github.com/sparanoid/chinese-copywriting-guidelines) 中的规范。
137 |
--------------------------------------------------------------------------------
/docs/dev/dev_spider.md:
--------------------------------------------------------------------------------
1 | # ✒️ 其他网站的扩展开发指南
2 |
3 | Website crawler develope guide
4 |
5 | 基于 `Scrapy`
6 | 需切换到 **下一个版本** Minor 开发分支,PR 时提交到此分支
7 |
8 | ## 开发步骤
9 |
10 | ### 1. 爬虫代码
11 |
12 | 以 wnacg 为例
13 |
14 | ### WnacgSpider
15 |
16 | [`代码位置`](https://github.com/jasoneri/ComicGUISpider/blob/GUI/ComicSpider/spiders/wnacg.py)
17 |
18 | #### 类属性
19 |
20 | ✅ name: 爬虫名字,取目标网站域名的部分或标题,与分支名相同
21 | ✅ domain: 目标网站域名
22 | ✅ search_url_head: 搜索页url(去掉关键词),大部分网站都是以get形式直出的
23 | 🔳 custom_settings: `scrapy`客制设定。举例两个应用
24 | `wnacg`里的`ComicDlProxyMiddleware`, 配置里设了代理时 & 走目标网站域名情况下,会通过代理进行访问
25 | `jm`里的`JmComicPipeline`,禁漫的图片直接访问链接时是切割加密过的(可自行浏览器右键新建标签打开图像),这里做了解密还原了
26 | 🔳 ua: 若`custom_settings`设了 `UAMiddleware` 才会生效
27 | 🔳 mappings: 默认映射,与`更改配置`里的`映射`相叠加
28 | 🔳 frame_book_format: 影响传递给`self.parse_section`的`meta`组成
29 | 🔳 turn_page_search/turn_page_info: 翻页时需要,使用为`utils.processed_class.Url`, 参照已有案例即可 (注意`Url.set_next`,受传参个数影响)
30 |
31 | #### 类方法
32 |
33 | 🔳 @property search: 生成第一个请求的连接,可结合`mappings`进行复杂输入的转换
34 | 🔳 start_requests: 发出第一个请求,可在此进行`search`实现不了 或 不合其逻辑的操作
35 | ✅ frame_book: "搜索 > 书列表" 之间的清洗
36 | ✅ frame_section:
37 | 一跳页面:书页面 > 能直接获取该书的全页
38 | 二跳页面:书页面 > 章节列表 之间的清洗
39 | 🔳 parse_fin_page: (一跳页面不需要,二跳页面必须) 章节页面 > 直接获取该章节的全页
40 | 🔳 mk_page_tasks: 跟三跳页面相关,可以用巧妙方法绕过,初始先不管,二跳页面情况下参考`kaobei.py`
41 |
42 | #### 常用方法
43 |
44 | + self.say: 能将字符串(可使用部分html标签格式)打印在gui上
45 | + utils.processed_class.PreviewHtml: 通过`add`喂预览图链接,结束后用`created_temp_html`
46 | 生成临时html文件。实例详见`WnacgSpider.frame_book`
47 |
48 | ### WnacgUtils
49 |
50 | [`代码位置`](https://github.com/jasoneri/ComicGUISpider/blob/GUI/utils/website/__init__.py)
51 | 常规漫与🔞继承基类不同
52 |
53 | #### 类属性(Utils)
54 |
55 | ✅ name: 同爬虫名字
56 | ✅ uuid_regex: 将 作品id 从作品 预览url 中抽取的正则表达式
57 | 🔳 headers: 通用请求头
58 | 🔳 book_hea: 读剪贴板功能使用的请求头
59 | 🔳 book_url_regex: 读剪贴板功能使用所对应当前网站抽取 作品id 的正则表达式
60 |
61 | #### 类方法(Utils)
62 |
63 | 🔳 parse_publish_: 清洗发布页
64 | 🔳 parse_book: 清洗出读剪贴板功能的信息
65 | 🔳 test_index: 测试网络环境能否访问当前网站
66 |
67 | ::: tip 最后需要在 spider_utils_map 加上对应的 Utils
68 | :::
69 |
70 | ### 2. 其他代码
71 |
72 | #### [`variables/__init__.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI/variables/__init__.py)
73 |
74 | 1. `SPIDERS` - 爬虫名字:加入新序号(方面下面理解设为序号`3`²),值为爬虫名字`wnacg`
75 | 2. `DEFAULT_COMPLETER` - 默认预设:序号必须,值可空列表。用户配置会覆盖,但是可以先把做了开发的映射放进去
76 | 3. `STATUS_TIP` - 状态栏输入提示:序号必须,值可空字符串。鼠标悬停在搜索框时,最下状态栏会出现的文字
77 |
78 | > [!TIP] 如目标网站为🔞的
79 | > 还需在`SPECIAL_WEBSITES`加进 爬虫名字`wnacg` (此处影响存储位置)
80 | > 在`SPECIAL_WEBSITES_IDXES`加进 序号`3`² (此处影响gui逻辑)
81 |
82 | ### 3. ui 代码
83 |
84 | #### [`GUI/mainwindow.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI/GUI/mainwindow.py)
85 |
86 | 在最下方加入代码(需参考 `variables/__init__.py` 的 `SPIDERS` 避免使用重复序号导致覆盖)
87 |
88 | ```python
89 | self.chooseBox.addItem("")
90 | self.chooseBox.setItemText(3, _translate("MainWindow", "3、wnacg🔞")) # 🔞标识符不影响任何代码
91 | ```
92 |
93 | ---
94 |
95 | ### 4. 无GUI测试
96 |
97 | ```python
98 | python crawl_only.py -w 3 -k 首页 -i 1
99 | ```
100 |
101 | ### 5. GUI测试
102 |
103 | `python CGS.py`,对进行开发的网站测试流程是否正常,然后测试其他网站有没受影响
104 |
105 | > 注意: 当`ComicSpider/settings.py`里的`LOG_FILE`不为空时,控制台不会打印任何信息,只会在日志`log/scrapy.log`中输出,无论什么日志等级
106 | > 反之想让控制台输出时将其值置为空,在commit时需要改回来
107 |
--------------------------------------------------------------------------------
/docs/dev/i18n.md:
--------------------------------------------------------------------------------
1 | # 🌏 i18n guide
2 |
3 | 借助 [Weblate](https://hosted.weblate.org/engage/comicguispider/) 托管多语言的翻译
4 |
5 | Translation hosting by [Weblate](https://hosted.weblate.org/engage/comicguispider/)
6 |
7 | ## Development
8 |
9 | ### Ui
10 |
11 | 翻译仅需处理单个 yaml 文件如 [`en_US.yaml`](https://github.com/jasoneri/ComicGUISpider/blob/GUI//assets/res/locale/en-US.yml),
12 | 编译翻译的流程会在客户端使用时自动实现,除下文提及 Usage-ui 的一小点以外无需将精力放编译上
13 | 也可以阅读 [`transfer.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI//assets/res/transfer.py) 查看编译流程如何生成 res.mo
14 |
15 | Translation only needs to handle single yaml file such as [`en_US.yaml`](https://github.com/jasoneri/ComicGUISpider/blob/GUI//assets/res/locale/en-US.yml)
16 | The compilation process of translation will be automatically implemented when the client is used, except for a small point mentioned in Usage-ui, there is no need to focus on compilation
17 | You can also read [`transfer.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI//assets/res/transfer.py) to see how the res.mo is generated
18 |
19 | ### Documentation
20 |
21 | 文档皆存放在 `docs` 目录里,经由 Github-Action 做成 `github pages`
22 | 参考英文的存储路径为 `docs/locate/en/*`
23 |
24 | documents are stored in the `docs` directory, which will be made into `github pages` by Github-Action
25 | Reference English storage path is `docs/locate/en/*`
26 |
27 | ## Usage
28 |
29 | ### ui
30 |
31 | 软件在开启时,会通过 [`assets/res/__init__.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI//assets/res/__init__.py) 中的 `getUserLanguage` 函数获取当前机器的语言如 `Chinese (Simplified)_China.utf8`(ISO 639-1_ISO 3166-1.encoding),
32 | 检测转换成 RFC1766 标准如 `zh-CN` ,并加载 *.mo 模块进行语言切换。
33 |
34 | > [!Tip] 若进行了语言开发
35 | > 需在 `getUserLanguage` 中增加对应的 `RFC1766` 转换,否则默认使用 `en-US`
36 |
37 | The software will get the current machine language such as `Chinese (Simplified)_China.utf8` (ISO 639-1_ISO 3166-1.encoding)
38 | through the `getUserLanguage` function in [`assets/res/__init__.py`](https://github.com/jasoneri/ComicGUISpider/blob/GUI//assets/res/__init__.py) when it starts,
39 | machine language will convert to the RFC1766 standard such as `zh-CN`, and load the corresponding *.mo module for language switching.
40 |
41 | > [!Tip] If language development has been carried out
42 | > you need to add the corresponding `RFC1766` conversion in `getUserLanguage`, otherwise the default `en-US` is used
43 |
44 | ### crawler
45 |
46 | 对适用区域为🌏的网站发出的请求会基于 `Vars.ua_accept_language`
47 |
48 | Websites which applicable to 🌏 requests base on `Vars.ua_accept_language`
49 |
50 | ## Contant us
51 |
52 | 通过 [`issue`](https://github.com/jasoneri/ComicGUISpider/issues/new?template=feature-request.yml&labels=i18n) 进行反馈
53 |
54 | feedback by [`issue`](https://github.com/jasoneri/ComicGUISpider/issues/new?template=feature-request.yml&labels=i18n)
55 |
--------------------------------------------------------------------------------
/docs/faq/extra.md:
--------------------------------------------------------------------------------
1 |
2 | # 📒 额外使用说明
3 |
4 | ## 1. 配置生效相关
5 |
6 | 除少部分条目例如预设(只影响gui),能当即保存时立即生效(保存配置的操作与gui同一进程);
7 | 其余影响后台进程的配置条目在选择网站后定型(点选网站后`后台进程`即开始),
8 | 如果选网站后才反应过来改配置,需重启CGS方可生效
9 |
10 | ## 2. 域名相关
11 |
12 | 各网站的 `发布页`/`永久链接` 能在 `scripts/utils/website/__init__.py` 里找到
13 | (国内域名专用)域名缓存文件为 `scripts/__temp/xxx_domain.txt`(xxx = `wnacg`或`jm`),
14 | 再开程序会检测修改时间大于48小时则失效重新获取,处于48小时内则可对此文件删改或加个空格保存即时生效
15 |
16 | > [!Info] 手动改域名缓存文件示例
17 | > wnacg_domain.txt,没有则自建,内容填个 `www.wn01.uk` 即可
18 |
19 | > `发布页`/`永久链接`失效的情况下鼓励用户向开发者提供新可用网址,让软件能够持续使用
20 |
--------------------------------------------------------------------------------
/docs/faq/index.md:
--------------------------------------------------------------------------------
1 | # ❓ 常见问题
2 |
3 | ## 1. GUI
4 |
5 | ### 预览窗口页面显示异常/页面空白/图片加载等
6 |
7 | 刷新一下页面
8 | 有些是 JavaScript 没加载,有些是对方服务器问题
9 |
10 | ## 2. 爬虫
11 |
12 | ### 拷贝漫画部分无法出列表
13 |
14 | 拷贝有些漫画卷和话是分开的,api结构转换的当前是有结果的,但没做解析,如需前往群里反馈
15 |
16 | ### 拷贝/Māngabz多选书情况
17 |
18 | 多选书时,在章节序号输入时可以直接点击`开始爬取`跳过当前书的章节选择,只要出进度条即可
19 |
20 | ## 3. 其他
21 |
22 | ### ModuleNotFoundError: No module named 'xxx'
23 |
24 | win:
25 |
26 | 1. 在绿色包解压的目录打开 (powershell) 终端执行命令
27 |
28 | ``` bash
29 | irm https://gitproxy.click/https://raw.githubusercontent.com/jasoneri/ComicGUISpider/refs/heads/GUI/deploy/online_scripts/win.ps1 | iex
30 | ```
31 |
32 | ::: info 非绿色包的用户参考 [🚀 快速开始 > 部署](../deploy/quick-start#1-下载--部署) 的安装依赖命令示例
33 | :::
34 |
35 | macOS: 用`CGS-init`更新环境依赖
36 |
37 | ### 更新失败后程序无法打开
38 |
39 | ::: tip 最简单有效❗️
40 | 备份配置 scripts/conf.yml 与去重记录 scripts/record.db后 下载📦绿色包 覆盖更新
41 | :::
42 |
43 | 更新的报错日志已整合进 log/GUI.log 文件里,建议提 issue 并附上 log,帮助 CGS 进行优化
44 |
45 | 1. 回退到上一个正常版本: 找到对应版本的 `Source code (zip)` 源码包,解压后将全部源码覆盖到 scripts 目录下
46 | 删除 `scripts/deploy/version.json`,恢复正常使用
47 |
48 | 2. 安全使用最新版本: 将最新版本的 `Source code (zip)` 源码包,解压后将全部源码覆盖到 scripts 目录下
49 |
50 | 2.1 按上面 ModuleNotFoundError 的方法安装依赖
51 |
52 | ### 【win】弹出消息框报错而且一堆英文不是中文(非开发者预设报错)的时候
53 |
54 | 例如`Qxxx:xxxx`, `OpenGL`等,此前已优化过,如还有弹出框警告,
55 | 尝试在解压目录使用cmd运行`./CGS.bat > CGS-err.log 2>&1`,然后把`CGS-err.log`发群里反馈
56 |
57 | ---
58 |
59 | ::: warning 如果存在上述没有覆盖的问题
60 | 请带上 `log` 到 [issue](
61 | https://github.com/jasoneri/ComicGUISpider/issues/new?template=bug-report.yml
62 | ) 反馈 或 进群(右上角qq/discord)反馈。
63 | :::
64 |
65 |
66 |
--------------------------------------------------------------------------------
/docs/faq/other.md:
--------------------------------------------------------------------------------
1 | ## 1. 去重,增加标识相关说明
2 |
3 | ### 样例
4 |
5 | 1. http://jm-comic1.html “满开开花”
6 | 2. http://jm-comic2.html “满开开花”
7 | 3. http://wnacg-comic1.html “满开开花”
8 |
9 | > [!Info] 举例:其中 comic1 和 comic2 是 jm 的两个作品id,第三条 comic1 是 wnacg 的作品id
10 |
11 | #### 场景-原始
12 |
13 | 由于1和2同名,所以1下载后会被2覆盖,因为目录路径一样,3同理
14 |
15 | #### 场景-去重✅
16 |
17 | 选择1后得 md5('jm'+'comic1')=md5_1,查表 md5_1 不存在,下载,产生目录`储存目录.../满开开花`
18 | 再次下载1时查表发现 md5_1 已存在,不下载
19 | 选择2后得 md5('jm'+'comic2')=md5_2,查表 md5_2 不存在,下载,记录进表并将内容覆盖到`储存目录.../满开开花`
20 | 选择3后得 md5('wnacg'+'comic1')=md5_3,查表 md5_3 不存在,下载,记录进表并将内容覆盖到`储存目录.../满开开花`
21 |
22 | #### 场景-增加标识❌
23 |
24 | 无论去重还是不去重,目录存在就覆盖
25 |
26 | #### 场景-增加标识✅
27 |
28 | 将 spider_name 加唯一作品id加进命名尾部,例如下载上述三个得
29 |
30 | + `储存目录.../满开开花[jm-comic1]`
31 | + `储存目录.../满开开花[jm-comic2]`
32 | + `储存目录.../满开开花[wnacg-comic1]`
33 |
34 | ---------
35 |
36 | ### 其他
37 |
38 | #### 1. id实则自定义
39 |
40 | comic1 等 id 仅为示例,实际基于开发自定义
41 | 例如 md5('kaobei'+福利莲+第一话)=id 就可去做常规漫的去重,常规漫的任务细化就是此 id
42 |
43 | #### 2. 网站将同一内容的作品从 url 转移到 url2
44 |
45 | 考虑此情况实则并不常见,这种下重了也没所谓,少数情况
46 |
--------------------------------------------------------------------------------
/docs/feature/index.md:
--------------------------------------------------------------------------------
1 | # 🎸 常规功能
2 |
3 | ::: tip 欢迎提供功能建议,提交issue / PR / 此页下方评论区留言 等
4 | 例如打包 epub 格式 zip(需描述过程结果)
5 | :::
6 |
7 | ## 适用性
8 |
9 | > [!Info] 没列出的功能全网适用
10 |
11 | | | [拷贝](https://www.copy20.com/) | [Māngabz](https://mangabz.com) | [禁漫](https://18comic.vip/) | [wnacg](https://www.wnacg.com/) | [ExHentai](https://exhentai.org/) | [hitomi](https://hitomi.la/) |
12 | |:--------------------------------------|:-------------:|:---------:|:----:|:----------:|:----------:|:----------:|
13 | | 预览 | ❌ | ❌ | ✔️ | ✔️ | ✔️ | ✔️ |
14 | | 翻页 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ 禁跳页 | ✔️/🚧 |
15 | | 工具箱-读剪贴板 | ❌ | ❌ | ✔️ | ✔️ | ✔️ | 🚧 |
16 | | 工具箱-显示记录 | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ |
17 | | 工具箱-整合章节 | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ |
18 | | hitomi-tools | ❌ | ❌ | ❌ | ❌ | ❌ | ✔️ |
19 | | 预览窗口-复制 | ❌ | ❌ | ✔️ | ✔️ | ✔️ | 🚧 |
20 |
21 | ## 功能项
22 |
23 | ### 1. 搜索框预设
24 |
25 | 搜索框区域按 `空格` 或右键点`展开预设`即可弹出预设项 (序号输入框同理)
26 |
27 | ### 2. 预览功能
28 |
29 | 内置的浏览器,多选/翻页等如动图所示。其他详情使用看 `🎥视频使用指南3`
30 |
31 | ### 3. 翻页
32 |
33 | 当列表结果出来后开启使用
34 |
35 | ### 4.工具箱
36 |
37 | #### 4.1 读剪贴板
38 |
39 | 读剪贴板匹配生成任务,需配合剪贴板软件使用(自行下载安装)
40 | win: [🌐Ditto](https://github.com/sabrogden/Ditto)
41 | macOS: [🌐Maccy](https://github.com/p0deje/Maccy)
42 | 流程使用看`🎥视频使用指南3`相关部分,此功能说明须知放在任务页面右上的`额外说明`
43 | ::: info 不下载剪贴板软件仅影响 `读剪贴板` 功能,不影响常规流程使用
44 | :::
45 |
46 | #### 4.2 显示记录
47 |
48 | 需配合 [redViewer项目](https://github.com/jasoneri/redViewer) 使用,用其阅读后产生的记录文件能知道从哪一话开始下起
49 |
50 | #### 4.3 整合章节
51 |
52 | 批量整合,例如将`D:\Comic\蓝箱\165\第1页`整合转至`D:\Comic\web\蓝箱_165\第1页`
53 | > [!Info] 使用redViewer项目需要此目录结构
54 |
55 | #### 4.4 hitomi-tools
56 |
57 | 仅 hitomi 用,[📹参考用法](https://jsd.vxo.im/gh/jasoneri/imgur@main/CGS/hitomi-tools-usage.gif)
58 |
59 | ### 5.预览窗口功能项
60 |
61 | #### 1. 复制未完成任务链接
62 |
63 | 
64 |
65 | > [!Tip] 前置设置
66 | > 需参考 [🔧其他配置 > 复制按钮相关](../config/other.md) 对剪贴板软件更改设置
67 |
68 | 将当前未完成链接复制到剪贴板。
69 | 先`复制`后用`工具箱-读剪贴板`的流程,常用于进度卡死不动重下或补漏页
70 |
--------------------------------------------------------------------------------
/docs/feature/script.md:
--------------------------------------------------------------------------------
1 |
2 | # 🚧 其他脚本集
3 |
4 | saucenao / kemono / nekohouse
5 |
6 | <2025-05-11> [Motrix](https://github.com/agalwood/Motrix) yyds!!
7 | `kemono` 下载改用 Motrix-PRC ,太稳了!有兴趣看下方 kemono 等相关说明
8 |
9 | ## ⚠️ 通用前置须知
10 |
11 | ::: tip 脚本集通用前置安装
12 | 任务模块:[Redis-windows](https://github.com/redis-windows/redis-windows/releases) | mac:`brew install redis`
13 | 下载引擎:[Motrix](https://github.com/agalwood/Motrix/releases)
14 |
15 | ---
16 | 使用 `uv` 安装脚本集依赖 `requirements/script/*.txt`
17 | ```bash
18 | python -m uv pip install -r "requirements/script/win.txt" --index-url http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
19 | ```
20 | 绿色包使用的命令为 👇
21 | ```bash
22 | ./runtime/python.exe -m uv pip install -r "./scripts/requirements/script/win.txt" --index-url http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
23 | ```
24 | :::
25 |
26 | ::: details 脚本目录树: `script`目录
27 | ```shell
28 | utils
29 | ├── script
30 | ├── __init__.py
31 | ├── conf.yml # 此目录下的所有脚本配置引用,没上传至git,需要自创建
32 | ├── extra.py # 作为单个简单类爬虫使用
33 | ├── image
34 | ├── __init__.py
35 | ├── kemono.py # 网站有如右相关资源 patreon/fanbox/fantia 等
36 | ├── expander.py # 基于每个作者对作品集取名习惯(标题是颜文字表情之类的见怪不怪了)进行筛选(类kemono网站共用)
37 | ├── nekohouse.py # 大概就是 kemono 的克隆网站
38 | ├── saucenao.py # saucenao 著名的二次元以图搜图网站
39 | ```
40 | :::
41 |
42 | ::: details 配置文件 `./scripts/utils/script/conf.yml` (必要❗️自行创建)
43 | ```yaml
44 | kemono:
45 | sv_path: D:\pic\kemono
46 | cookie: eyJfcGVybWaabbbW50Ijxxxxxxxxxxxxxxxxxxxxx # 需要登录的账号 https://kemono.su/api/schema, F12打开控制台查看cookies, 字段名为 `session`
47 | redis_key: kemono
48 |
49 | nekohouse:
50 | sv_path: D:\pic\nekohouse
51 | cookie: eyJfcGVybWaabbbW50Ijxxxxxxxxxxxxxxxxxxxxx # 需要登录的账号 https://nekohouse.su, F12打开控制台查看cookies, 字段名为 `session`
52 | redis_key: nekohouse
53 |
54 | redis:
55 | host: 127.0.0.1
56 | port: 6379
57 | db: 0
58 | password:
59 | ```
60 | :::
61 |
62 | 暂无开发GUI界面打算
63 |
64 | ---
65 | ::: warning 以下内容 均基于通用前置须知
66 | :::
67 |
68 | ## 1. kemono
69 |
70 | ### 🚀 快速开始
71 |
72 | 1. 启动 `redis` 服务,打开 `Motrix`
73 | ::: details 2. (可选)增加配置
74 | ```yaml
75 | kemono:
76 | ...
77 | filter: # 正则过滤
78 | Artists: # 作品标题过滤
79 | normal: "PSD|支援者" # normal一旦设置则会作为通用的兜底过滤
80 | DaikiKase: "支援者様】" # 单独指定作者过滤规则,作者非纯英文名时需要配合 ArtistsEnum
81 | file: "(mp4|zip)$" # 文件类型过滤
82 |
83 | proxies: # 设代理访问才算通畅,此处代理设置不影响 Motrix 的下载相关
84 | - 127.0.0.1:10809
85 | ```
86 | :::
87 | 3. 命令行工具参考
88 |
89 | ::: tip 绿色包使用的命令为 `./runtime/python.exe ./scripts/utils/script/image/kemono.py --help`
90 | :::
91 |
92 | ```bash
93 | python kemono.py --help
94 | python kemono.py -c 'fav=[["keihh","fanbox"],"サインこす"]' -sd "2025-03-01" -ed "2025-05-01"
95 | python kemono.py -c 'creatorid=[16015726,1145144444444]' -sd "2025-03-01"
96 |
97 | # 部分失败任务的补漏命令 👇
98 | python kemono.py -p run
99 | ```
100 |
101 | ### 📒 说明
102 |
103 | 基于账号收藏 或 作者id,受配置的 filter 所设限制一定量的任务
104 | kemono 性质,资源重复多,文件大,基本设置条件过滤才正常
105 |
106 | ::: tip 过滤扩展:
107 | `expander.py` 内置部分作者命名习惯的过滤,例如`keihh_patreon`
108 | ,其作品通常有无印/v2/v3,而v3会包括无印/v2,这情况就要过滤掉无印/v2
109 | 鉴于作品集命名杂七杂八的,除通用过滤外可对每一位作者单独增加过滤规则
110 | :::
111 |
112 | ---
113 |
114 | ::: details 运行过后所得目录树 (目录结构基于 [redViewer](https://github.com/jasoneri/redViewer))
115 | ```shell
116 | kemono_path
117 | ├── __handle # 爬资源本身没有,redViewer 项目生成的,处理save/remove
118 | ├── __sorted_record # 文件/图片下载时无序也不再是第n页这种命名,此时生成任务时记录列表顺序,用于 redViewer 人类顺序阅读使用
119 | └── a5p74od3_fanbox
120 | ├── [2023-01-01]今年もよろしくお願いします。.json # 作品集顺序记录
121 |
122 | ├── MだSたろう_fanbox # 分隔开的这部分均为作者_平台\作品集\图片or文件,命名格式:作者_平台
123 | ├── a5p74od3_fanbox
124 | ├── keihh_fanbox
125 | ├── keihh_patreon
126 | ├── サインこす_fanbox
127 | ├── ラマンダ_fantia
128 | ├── [2020-07-30]アカリちゃんとクエスト
129 | ├── [2021-01-29]白血球さんお願いします!
130 | └── [2022-07-30]ノノ水着 # 作品集,命名格式:[作品创建时间]kemono的标题名
131 | ├── 85fe7ae7-dfea-4ef2-816d-46f378ee2f80.png # 该作品集的一个文件/图片
132 | ├── c57e9b35-608f-471f-8a34-2e56ead4dc70.png
133 |
134 | ├── blacklist.json # 下载过滤名单,避免重复下载用(redViewer阅读过后操作会加进去 或 手动添加)
135 | └── record.txt # redViewer 阅读后操作记录
136 | ```
137 | :::
138 |
139 | ---
140 |
141 | ## 2. saucenao 二次元的以图搜图
142 |
143 | `Danbooru`无需代理,`Yande`(这个指`yande.re`)需要代理,其他图源没做,感觉也没比`Yande`更全更高清的了,
144 | 没代理就去掉`imgur_module`的`Yande`
145 | 有时也会搜出kemono的,知道作者名之后就用上面的kemono脚本吧
146 |
147 | saucenao限制30秒搜3张图,有它的账号也才30秒4张没什么好说的
148 |
149 | 相似度阈值可自行各个图源分别调整,搜索`similarity_threshold`更改。 匹配的图源是`imgur_module`的值(列表) 从左到右
150 |
151 | ---
152 |
153 | #### 运行/操作
154 |
155 | 1. 随意创建个目录例如 `D:\pic`,丢几张图进去,脚本的`get_hd_img`的位置实参改成该目录,然后跑脚本`python saucenao.py`
156 | 2. 成功后会保存在`D:\pic\hd`里,对照下文件大小之类的,合适就回去把原文件删了(不然下次跑会重复做前面的任务)
157 |
158 | // # TODO[9]: 重复任务用pathlib.exists()查一下hd文件夹内的,并用saucenao.json记录数据
159 |
160 | ::: tip 进阶:
161 | 可以在很多图像的目录上运行脚本,只要在`get_hd_img`加上参数`first='a.png'`,就会以`文件大小`的`顺序`从`a.png`开始进行搜图
162 | 不过同样要对比和手动删源文件,顺序可以自己调代码在`get_tasks`的`__ = sorted(...`的`key`
163 | :::
164 |
165 | ---
166 |
167 | ## 3. nekohouse 类似kemono的补充
168 |
169 | ::: info 除了一些配置等从`kemono`变为`nekohouse`之外,使用方面与`kemono`用法别无二致,参照`kemono`即可
170 | :::
171 |
--------------------------------------------------------------------------------
/docs/home/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: 简介
3 | ---
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | ## 📑 简介
14 |
15 | **`CGS`** 是一个... 能简单使用的漫画下载软件!(忽略200字说明)
16 |
17 | ## 功能说明
18 |
19 | - 简易配置就能使用
20 | - 开预览后随便点点就能下载,还能基于预览窗口进网站看
21 | - 通过加减号,`0 全选`,`-3 选倒数三个` 等输入规则,能方便指定选择
22 | - 基于翻页保留,翻页就像已塞进了购物车一样
23 | - 虽然任务是顺序流,但内置重启很方便,加上多开更方便
24 | - 读剪贴板方式流,字如其名
25 | - 去重,加标识符等
26 |
27 | ## 致谢声明
28 |
29 | ### Credits
30 |
31 | Thanks to
32 | - [PyStand](https://github.com/skywind3000/PyStand) / [Platypus](https://github.com/sveinbjornt/Platypus) for providing win/macOS packaging.
33 | - [Ditto](https://github.com/sabrogden/Ditto) / [Maccy](https://github.com/p0deje/Maccy) for providing great win/macOS Clipboard Soft.
34 | - [PyQt-Fluent-Widgets](https://github.com/zhiyiYo/PyQt-Fluent-Widgets/) for providing elegant qfluent ui.
35 | - [VitePress](https://vitepress.dev) for providing a great documentation framework.
36 | - Every comic production team / translator team / fans.
37 |
38 | ## 贡献
39 |
40 | 欢迎提供 ISSUE 或者 PR
41 |
42 |
43 |
44 |
45 |
46 | ## 传播声明
47 |
48 | - **请勿**将 ComicGUISpider 用于商业用途。
49 | - **请勿**将 ComicGUISpider 制作为视频内容,于境内视频网站(版权利益方)传播。
50 | - **请勿**将 ComicGUISpider 用于任何违反法律法规的行为。
51 |
52 | ComicGUISpider 仅供学习交流使用。
53 |
54 | ## Licence
55 |
56 | [MIT licence](https://github.com/jasoneri/ComicGUISpider/blob/GUI/LICENSE)
57 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | # https://vitepress.dev/reference/default-theme-home-page
3 | layout: home
4 |
5 | hero:
6 | name: "打开 CGS"
7 | text: "鼠标点几点轻松下漫画"
8 | tagline: 最自在
9 | image:
10 | src: /CGS-girl.png
11 | alt: CGS
12 | actions:
13 | - theme: brand
14 | text: 快速开始
15 | link: /deploy/quick-start
16 | - theme: alt
17 | text: 配置
18 | link: /config
19 | - theme: alt
20 | text: FAQ
21 | link: /faq
22 |
23 | features:
24 | - icon:
25 | src: ./assets/img/icons/website/copy.png
26 | title: '拷贝漫画 '
27 | details: ' '
28 | - icon:
29 | src: ./assets/img/icons/website/mangabz.png
30 | title: 'Māngabz '
31 | details: ' '
32 | - icon:
33 | src: ./assets/img/icons/website/jm.png
34 | title: 'jm '
35 | details: ' '
36 | - icon:
37 | src: ./assets/img/icons/website/wnacg.png
38 | title: '绅士漫画 '
39 | details: ' '
40 | - icon:
41 | src: ./assets/img/icons/website/ehentai.png
42 | title: 'exhentai '
43 | details: ' '
44 | - icon:
45 | src: ./assets/img/icons/website/hitomi.png
46 | title: 'hitomi '
47 | details: ' '
48 | ---
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | ## 功能说明
58 |
59 | - 简易配置就能使用
60 | - 开预览后随便点点就能下载,还能基于预览窗口进网站看
61 | - 通过加减号,`0 全选`,`-3 选倒数三个` 等输入规则,能方便指定选择
62 | - 基于翻页保留,翻页就像已塞进了购物车一样
63 | - 虽然任务是顺序流,但内置重启很方便,加上多开更方便
64 | - 读剪贴板方式流,字如其名
65 | - 去重,加标识符等
66 |
67 | ## 食用搭配(阅读器)
68 |
69 | 完全适配 CGS 而制,取(改)了独特的名字 `RedViewer (RV)`
70 | 加上最近对其手撕了几十个 commit 血改,还在更新中!所以再次推上
71 |
72 | [](https://github.com/jasoneri/redViewer)
73 |
74 | ## 致谢声明
75 |
76 | ### Credits
77 |
78 | Thanks to
79 | - [PyStand](https://github.com/skywind3000/PyStand) / [Platypus](https://github.com/sveinbjornt/Platypus) for providing win/macOS packaging.
80 | - [Ditto](https://github.com/sabrogden/Ditto) / [Maccy](https://github.com/p0deje/Maccy) for providing great win/macOS Clipboard Soft.
81 | - [PyQt-Fluent-Widgets](https://github.com/zhiyiYo/PyQt-Fluent-Widgets/) for providing elegant qfluent ui.
82 | - [VitePress](https://vitepress.dev) for providing a great documentation framework.
83 | - [astral-sh/uv](https://github.com/astral-sh/uv) for providing a great requirements manager.
84 | - Every comic production team / translator team / fans.
85 |
86 | ## 贡献
87 |
88 | 欢迎提供 ISSUE 或者 PR
89 |
90 |
91 |
92 |
93 |
94 | ## 传播声明
95 |
96 | - **请勿**将 ComicGUISpider 用于商业用途。
97 | - **请勿**将 ComicGUISpider 制作为视频内容,于境内视频网站(版权利益方)传播。
98 | - **请勿**将 ComicGUISpider 用于任何违反法律法规的行为。
99 |
100 | ComicGUISpider 仅供学习交流使用。
101 |
102 | ## Licence
103 |
104 | [MIT licence](https://github.com/jasoneri/ComicGUISpider/blob/GUI/LICENSE)
105 |
106 | ---
107 |
108 | 
109 |
--------------------------------------------------------------------------------
/docs/locate/en/config/index.md:
--------------------------------------------------------------------------------
1 | # 🔨 配置
2 |
3 | 
4 |
5 | ::: info config file is `scripts/conf.yml`, generated after initial use
6 | :::
7 | ::: warning The input box of multiple lines is in `yaml` format (except for eh_cookies), and a ⚠️ `space` ⚠️ is required after the `colon`
8 | :::
9 |
10 | ## Config Field / Corresponding `yml` Field
11 |
12 | ### Save Path / `sv_path`
13 |
14 | Download directory
15 | The `web` folder in the directory structure is because the default association with the [`redViewer`](https://github.com/jasoneri/redViewer) project is set like this
16 |
17 | ### LogLevel / `log_level`
18 |
19 | After the background runs, there will be a log directory, which is the same level as the GUI, and the GUI will give operation guidance when an error occurs
20 |
21 | ### Dedup / `isDeduplicate`
22 |
23 | When checked, there will be a style hint in the preview window for downloaded
24 | At the same time, the download will automatically filter out the existing records
25 | > [!Info] Currently only applicable in 🔞
26 |
27 | ### AddUuid / `addUuid`
28 |
29 | Add an identifier at the end of the directory when storing, which is used to handle different works with the same name
30 |
31 | ### Proxy / `proxies`
32 |
33 | Proxy
34 |
35 | > [!Info] It is recommended to configure the proxy here, rather than the global proxy mode, otherwise a lot of proxy traffic will be consumed when accessing the source
36 |
37 | ### Mapping / `custom_map`
38 |
39 | Search input mapping
40 | When the search does not meet the preset, first add the key-value pair here, and after restarting, the corresponding website results will be output when entering the custom key in the search box
41 |
42 | 1. Mapping does not need to care about the domain name, as long as it is used in the current website, as long as it meets `can access without mapping` and `the entered is not an invalid url`, the program will automatically replace it with a usable domain name, such as `wnacg.com` will be automatically replaced with the default domain name under non-proxy mapping
43 | 2. Note that the custom mapping may exceed the range of the paging rule, and at this time, it can be notified to the developer for expansion
44 |
45 | ### Preset / `completer`
46 |
47 | Custom preset
48 | There will be a `number corresponding to the website` prompt when the mouse hovers over the input box (in fact, it is the number of the choose-box)
49 |
50 | ### Eh Cookies / `eh_cookies`
51 |
52 | It is necessary to use `exhentai`
53 | [🎬 Method of acquisition](https://raw.githubusercontent.com/jasoneri/imgur/main/CGS/ehentai_get_cookies_new.gif)
54 | [🔗 Tool Website](https://tool.lu/en_US/curl/)
55 |
56 | ### ClipDb / `clip_db`
57 |
58 | ::: tip If function of read-clip wanted, Need Clipboard Soft be installed
59 | win: [🌐Ditto](https://github.com/sabrogden/Ditto)
60 | macOS: [🌐Maccy](https://github.com/p0deje/Maccy)
61 | :::
62 |
63 | When the clipboard reading function unusable, check whether the db exists, and fix it here after obtaining the correct path
64 |
65 | 1. ditto(win): Open options → Database path
66 | 2. maccy(macOS): [issue search for related information](https://github.com/p0deje/Maccy/issues/271)
67 |
68 | ### ClipEntries / `clip_read_num`
69 |
70 | Number of items read from the clipboard software
71 |
72 | ## Other `yml` Field
73 |
74 | ::: info The following fields are not displayed in the Config Dialog, set default value unless customize
75 | :::
76 |
77 | ### `img_sv_type`
78 |
79 | default: `jpg`
80 | image file name suffix
81 |
--------------------------------------------------------------------------------
/docs/locate/en/deploy/mac-required-reading.md:
--------------------------------------------------------------------------------
1 | # 💻 macOS Deploy
2 |
3 | ## 🚩 Architecture related
4 |
5 | Check the architecture with the following command (generally `x86_64` for Intel chips and `arm64` for Apple chips)
6 |
7 | ```bash
8 | python -c "import platform; print(platform.machine())"
9 | ```
10 |
11 | 1. `x86_64` architecture: The developer virtual machine is generally this architecture, and you can follow the process below
12 | 2. `arm64` architecture: CGS-init.app will automatically install `Rosetta 2`, and some [solutions to the error message](#trying-for-pop-up-error-messages) are listed below
13 |
14 | ## Portable Package
15 |
16 | macOS only needs to download the `CGS-macOS` compressed package
17 |
18 | ::: details Unzip directory tree (click to expand)
19 |
20 | ```
21 | CGS-macOS
22 | ├── CGS.app # Both the *main executor* and a code directory, it same as execute script `scripts/deploy/launcher/mac/CGS.bash`
23 | | ├── Contents
24 | | ├── Resources
25 | | ├── scripts # Real project code directory
26 | ├── CGS-init.app # Execute the script `scripts/deploy/launcher/mac/init.bash`
27 | └── CGS_macOS_first_guide.html # Used as a one-time guide for the first use after unzipping
28 | ```
29 |
30 | :::
31 |
32 | ## Operation
33 |
34 | ::: warning All documents containing the `scripts` directory
35 | Including this Deployment document, the main README, releases page, issue, etc.,
36 | The absolute-path in the app after moving to the application is `/Applications/CGS.app/Contents/Resources/scripts`
37 | :::
38 |
39 | ::: warning Execute the following initialization steps
40 | All `.app` must be opened with the right mouse button and clicked cancel the first time,
41 | then opened with the right mouse button to have an option to open,
42 | and then opened with a double-click from then on
43 | :::
44 |
45 | | | Explanation |
46 | |:------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
47 | | initialization | ⚠️following steps must be executed in strict order 1. After each decompression, move `CGS.app` to the application (see below for the figure) 2. After each unzip, you must run `CGS-init.app` to check/install environment, ⚠️ _**Note the new terminal window and follow the prompts**_ ⚠️ (corresponding to step 1.5 to change the font, you can repeat step 2) |
48 |
49 |
50 | app move to Applications
51 |
52 |
53 | ## 🔰 Others
54 |
55 | ### Trying for pop-up error messages
56 |
57 | ```bash
58 | # arm64 CGS.app shows corrupted and cannot be opened
59 | /opt/homebrew/bin/python3.12 /Applications/CGS.app/Contents/Resources/scripts/CGS.py
60 | # or
61 | /usr/local/bin/python3.12 /Applications/CGS.app/Contents/Resources/scripts/CGS.py
62 | ```
63 |
64 | ::: info If both fail, you can try to find methods by chatgpt / feedback in the group
65 | :::
66 |
67 | ### Updating
68 |
69 | ⚠️ Configuration files / deduplication records are stored in `scripts`, please be careful not to lose them by directly overwriting when downloading packages
70 | If there are UI/Interface changes, it is recommended to run `CGS-init.app` to ensure that the font settings are correct
71 |
72 | ### Bug report / submit issue
73 |
74 | When running software on macOS and encountering errors that need to be reported as issues, in addition to selecting `macOS` in the system,
75 | you also need to specify the system version and architecture in the description
76 | (Developer development environment is `macOS Sonoma(14) / x86_64`)
77 |
--------------------------------------------------------------------------------
/docs/locate/en/deploy/quick-start.md:
--------------------------------------------------------------------------------
1 | # 🚀 Quick-Start
2 |
3 | ## 1. Download / Deploy
4 |
5 | + Directly download [📦portable-pkg](https://github.com/jasoneri/ComicGUISpider/releases/latest), and unzip
6 |
7 | ::: warning macOS
8 | need readed [macOS Deploy](./mac-required-reading.md) document
9 | :::
10 |
11 | + Or clone this project `git clone https://github.com/jasoneri/ComicGUISpider.git`
12 | ::: tip required list
13 | + `python3.12+`
14 | + install [`astral-sh/uv`](https://github.com/astral-sh/uv), instead `pip` of manage requiredments
15 |
16 | ```bash
17 | python -m pip install uv
18 | ```
19 |
20 | **Install command** (CGS's `requiredments/*.txt` base on compilation by uv)
21 |
22 | ```bash
23 | python -m uv pip install -r "requirements/win.txt"
24 | ```
25 |
26 | :::
27 |
28 | ::: warning ignore the `scripts` in scripts/xxx of the document, all document are based on the explanation of the 📦portable-pkg
29 | :::
30 |
31 | ## 2. Usage
32 |
33 | ### GUI
34 |
35 | `python CGS.py`
36 | Or using Portable-Applications
37 |
38 | ### CLI
39 |
40 | `python crawl_only.py --help`
41 | Or using env of portable environment:
42 | `.\runtime\python.exe .\scripts\crawl_only.py --help`
43 |
44 | ## 3. Configuration
45 |
46 | If you have needs of custom requirements, reference [🔨Configuration](../config/index.md) for settings
47 |
48 | ## 4. Update
49 |
50 | + CGS innerded an update module, you can click the `Update` button in the configuration window to update
51 | ::: info When `local version` < `latest stable version` < `latest dev version`
52 | You need to update to `latest stable version` before you can update to `latest dev version`
53 | :::
54 |
55 | + You can also choose to download the latest version manually to the releases, but you need to pay attention to the configuration files and duplicate records not being overwritten and lost
56 | ::: tip The configuration file is `scripts/conf.yml` and the duplicate record is `scripts/record.db`
57 | :::
58 |
--------------------------------------------------------------------------------
/docs/locate/en/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | # https://vitepress.dev/reference/default-theme-home-page
3 | layout: home
4 |
5 | hero:
6 | name: "CGS"
7 | text: "Easily dl Comic"
8 | tagline: have fun it
9 | image:
10 | src: /CGS-girl.png
11 | alt: CGS
12 | actions:
13 | - theme: brand
14 | text: Quick-start
15 | link: /locate/en/deploy/quick-start
16 | - theme: alt
17 | text: Config
18 | link: /locate/en/config
19 | ---
20 |
21 |
22 |
23 |
26 |
29 |
32 |
35 |
38 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 | ## Functional Description
59 |
60 | - Easy to use with simple configuration
61 | - Just click a few times after preview to download, and you can also browse the website on the preview window
62 | - Convenient to specify selection with input rules like `-3` (select the last three), `0` (select all), etc.
63 | - Based on page retention, flipping pages is like putting items in a shopping cart
64 | - Built-in restart is very convenient, and it is even more convenient with multiple launches
65 | - Read clipboard stream
66 | - De-duplication, add identifiers, etc.
67 |
68 | ### Credits
69 |
70 | Thanks to
71 | - [PyStand](https://github.com/skywind3000/PyStand) / [Platypus](https://github.com/sveinbjornt/Platypus) for providing win/macOS packaging.
72 | - [Ditto](https://github.com/sabrogden/Ditto) / [Maccy](https://github.com/p0deje/Maccy) for providing great win/macOS Clipboard Soft.
73 | - [PyQt-Fluent-Widgets](https://github.com/zhiyiYo/PyQt-Fluent-Widgets/) for providing elegant qfluent ui.
74 | - [VitePress](https://vitepress.dev) for providing a great documentation framework.
75 | - [astral-sh/uv](https://github.com/astral-sh/uv) for providing a great requirements manager.
76 | - Every comic production team / translator team / fans.
77 |
78 | ## contribution
79 |
80 | Welcome to provide ISSUE or PR
81 |
82 |
83 |
84 |
85 |
86 | ## Disclaimer
87 |
88 | - **Please do not** use ComicGUISpider for commercial purposes.
89 | - **Please do not** make ComicGUISpider into video content and disseminate it on domestic video websites (copyright holders).
90 | - **Please do not** use ComicGUISpider for any behavior that violates laws and regulations.
91 |
92 | ## Licence
93 |
94 | [MIT licence](https://github.com/jasoneri/ComicGUISpider/blob/GUI/LICENSE)
95 |
96 | ---
97 |
98 | 
99 |
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "devDependencies": {
3 | "vitepress": "^1.6.3",
4 | "@vue/tsconfig": "^0.4.0",
5 | "@giscus/vue": "^3.1.1",
6 | "typescript": "4.9.5",
7 | "vue": "3.3.4"
8 | },
9 | "scripts": {
10 | "docs:dev": "vitepress dev",
11 | "docs:build": "rm _github/README_en.md && vitepress build && mkdir -p .vitepress/dist/assets/img/icons && cp -rf assets/img/icons/website .vitepress/dist/assets/img/icons/website",
12 | "docs:preview": "vitepress preview"
13 | },
14 | "type": "module"
15 | }
--------------------------------------------------------------------------------
/docs/public/CGS-girl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasoneri/ComicGUISpider/940f80d32a862550fe7dfdef462d21f30e730c1a/docs/public/CGS-girl.png
--------------------------------------------------------------------------------
/docs/public/_redirects:
--------------------------------------------------------------------------------
1 | /* /en/:splat 302 Language=en
2 | /* /zh/:splat 302
--------------------------------------------------------------------------------
/requirements/linux.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv pip compile requirements-in.txt --python-version 3.12 --python-platform linux -o linux.txt
3 | anyio==4.9.0
4 | # via httpx
5 | attrs==25.3.0
6 | # via
7 | # service-identity
8 | # twisted
9 | automat==25.4.16
10 | # via twisted
11 | brotli==1.1.0
12 | # via -r requirements-in.txt
13 | certifi==2025.4.26
14 | # via
15 | # httpcore
16 | # httpx
17 | # requests
18 | cffi==1.17.1
19 | # via
20 | # cryptography
21 | # xcffib
22 | charset-normalizer==3.4.1
23 | # via requests
24 | colorama==0.4.6
25 | # via -r requirements-in.txt
26 | constantly==23.10.4
27 | # via twisted
28 | cryptography==44.0.2
29 | # via
30 | # pyopenssl
31 | # scrapy
32 | # service-identity
33 | cssselect==1.3.0
34 | # via
35 | # parsel
36 | # scrapy
37 | darkdetect==0.8.0
38 | # via pyqt-fluent-widgets
39 | decorator==5.2.1
40 | # via jsonpath-rw
41 | defusedxml==0.7.1
42 | # via scrapy
43 | filelock==3.18.0
44 | # via tldextract
45 | h11==0.16.0
46 | # via httpcore
47 | h2==4.2.0
48 | # via httpx
49 | hpack==4.1.0
50 | # via h2
51 | httpcore==1.0.9
52 | # via httpx
53 | httpx==0.28.1
54 | # via -r requirements-in.txt
55 | hyperframe==6.1.0
56 | # via h2
57 | hyperlink==21.0.0
58 | # via twisted
59 | idna==3.10
60 | # via
61 | # anyio
62 | # httpx
63 | # hyperlink
64 | # requests
65 | # tldextract
66 | incremental==24.7.2
67 | # via twisted
68 | itemadapter==0.11.0
69 | # via
70 | # itemloaders
71 | # scrapy
72 | itemloaders==1.3.2
73 | # via scrapy
74 | jmespath==1.0.1
75 | # via
76 | # itemloaders
77 | # parsel
78 | jsonpath-rw==1.4.0
79 | # via -r requirements-in.txt
80 | loguru==0.7.3
81 | # via -r requirements-in.txt
82 | lxml==5.4.0
83 | # via
84 | # -r requirements-in.txt
85 | # parsel
86 | # scrapy
87 | markdown==3.8
88 | # via -r requirements-in.txt
89 | packaging==25.0
90 | # via
91 | # parsel
92 | # scrapy
93 | parsel==1.10.0
94 | # via
95 | # itemloaders
96 | # scrapy
97 | pillow==11.2.1
98 | # via -r requirements-in.txt
99 | pillow-avif-plugin==1.5.2
100 | # via -r requirements-in.txt
101 | ply==3.11
102 | # via jsonpath-rw
103 | polib==1.2.0
104 | # via -r requirements-in.txt
105 | protego==0.4.0
106 | # via scrapy
107 | pyasn1==0.6.1
108 | # via
109 | # pyasn1-modules
110 | # service-identity
111 | pyasn1-modules==0.4.2
112 | # via service-identity
113 | pycparser==2.22
114 | # via cffi
115 | pydispatcher==2.0.7
116 | # via scrapy
117 | pyexecjs==1.5.1
118 | # via -r requirements-in.txt
119 | pyopenssl==25.0.0
120 | # via scrapy
121 | pyqt-fluent-widgets==1.7.7
122 | # via -r requirements-in.txt
123 | pyqt5==5.15.11
124 | # via
125 | # -r requirements-in.txt
126 | # pyqt-fluent-widgets
127 | # pyqtwebengine
128 | pyqt5-frameless-window==0.6.0
129 | # via pyqt-fluent-widgets
130 | pyqt5-qt5==5.15.16
131 | # via pyqt5
132 | pyqt5-sip==12.17.0
133 | # via
134 | # pyqt5
135 | # pyqtwebengine
136 | pyqtwebengine==5.15.7
137 | # via -r requirements-in.txt
138 | pyqtwebengine-qt5==5.15.16
139 | # via pyqtwebengine
140 | pyyaml==6.0.2
141 | # via -r requirements-in.txt
142 | queuelib==1.8.0
143 | # via scrapy
144 | requests==2.32.3
145 | # via
146 | # requests-file
147 | # tldextract
148 | requests-file==2.1.0
149 | # via tldextract
150 | scrapy==2.12.0
151 | # via -r requirements-in.txt
152 | service-identity==24.2.0
153 | # via scrapy
154 | setuptools==80.1.0
155 | # via
156 | # incremental
157 | # zope-interface
158 | six==1.17.0
159 | # via
160 | # jsonpath-rw
161 | # pyexecjs
162 | sniffio==1.3.1
163 | # via anyio
164 | tldextract==5.3.0
165 | # via scrapy
166 | tqdm==4.67.1
167 | # via -r requirements-in.txt
168 | twisted==24.11.0
169 | # via scrapy
170 | typing-extensions==4.13.2
171 | # via
172 | # anyio
173 | # pyopenssl
174 | # twisted
175 | urllib3==2.4.0
176 | # via requests
177 | w3lib==2.3.1
178 | # via
179 | # parsel
180 | # scrapy
181 | xcffib==1.8.0
182 | # via pyqt5-frameless-window
183 | zope-interface==7.2
184 | # via
185 | # scrapy
186 | # twisted
187 |
--------------------------------------------------------------------------------
/requirements/script/mac_arm64.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv pip compile requirements-script-in.txt --python-version 3.12 --python-platform aarch64-apple-darwin -o mac_arm64.txt
3 | numpy==2.2.5
4 | # via pandas
5 | pandas==2.2.3
6 | # via -r requirements-script-in.txt
7 | python-dateutil==2.9.0.post0
8 | # via pandas
9 | pytz==2025.2
10 | # via pandas
11 | redis==6.0.0
12 | # via -r requirements-script-in.txt
13 | six==1.17.0
14 | # via python-dateutil
15 | tzdata==2025.2
16 | # via pandas
17 |
--------------------------------------------------------------------------------
/requirements/script/mac_x86_64.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv pip compile requirements-script-in.txt --python-version 3.12 --python-platform x86_64-apple-darwin -o mac_x86_64.txt
3 | numpy==2.2.5
4 | # via pandas
5 | pandas==2.2.3
6 | # via -r requirements-script-in.txt
7 | python-dateutil==2.9.0.post0
8 | # via pandas
9 | pytz==2025.2
10 | # via pandas
11 | redis==6.0.0
12 | # via -r requirements-script-in.txt
13 | six==1.17.0
14 | # via python-dateutil
15 | tzdata==2025.2
16 | # via pandas
17 |
--------------------------------------------------------------------------------
/requirements/script/win.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv pip compile requirements-script-in.txt --python-version 3.12 --python-platform windows -o win.txt
3 | numpy==2.2.5
4 | # via pandas
5 | pandas==2.2.3
6 | # via -r requirements-script-in.txt
7 | python-dateutil==2.9.0.post0
8 | # via pandas
9 | pytz==2025.2
10 | # via pandas
11 | redis==6.0.0
12 | # via -r requirements-script-in.txt
13 | six==1.17.0
14 | # via python-dateutil
15 | tzdata==2025.2
16 | # via pandas
17 |
--------------------------------------------------------------------------------
/requirements/win.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv pip compile requirements-in.txt --python-version 3.12 --python-platform windows -o win.txt
3 | anyio==4.9.0
4 | # via httpx
5 | attrs==25.3.0
6 | # via
7 | # service-identity
8 | # twisted
9 | automat==24.8.1
10 | # via twisted
11 | brotli==1.1.0
12 | # via -r requirements-in.txt
13 | certifi==2025.1.31
14 | # via
15 | # httpcore
16 | # httpx
17 | # requests
18 | cffi==1.17.1
19 | # via cryptography
20 | charset-normalizer==3.4.1
21 | # via requests
22 | colorama==0.4.6
23 | # via
24 | # -r requirements-in.txt
25 | # loguru
26 | # tqdm
27 | constantly==23.10.4
28 | # via twisted
29 | cryptography==44.0.2
30 | # via
31 | # pyopenssl
32 | # scrapy
33 | # service-identity
34 | cssselect==1.3.0
35 | # via
36 | # parsel
37 | # scrapy
38 | darkdetect==0.8.0
39 | # via pyqt-fluent-widgets
40 | decorator==5.2.1
41 | # via jsonpath-rw
42 | defusedxml==0.7.1
43 | # via scrapy
44 | filelock==3.18.0
45 | # via tldextract
46 | h11==0.16.0
47 | # via
48 | # -r requirements-in.txt
49 | # httpcore
50 | h2==4.2.0
51 | # via httpx
52 | hpack==4.1.0
53 | # via h2
54 | httpcore==1.0.9
55 | # via httpx
56 | httpx==0.28.1
57 | # via -r requirements-in.txt
58 | hyperframe==6.1.0
59 | # via h2
60 | hyperlink==21.0.0
61 | # via twisted
62 | idna==3.10
63 | # via
64 | # anyio
65 | # httpx
66 | # hyperlink
67 | # requests
68 | # tldextract
69 | incremental==24.7.2
70 | # via twisted
71 | itemadapter==0.11.0
72 | # via
73 | # itemloaders
74 | # scrapy
75 | itemloaders==1.3.2
76 | # via scrapy
77 | jmespath==1.0.1
78 | # via
79 | # itemloaders
80 | # parsel
81 | jsonpath-rw==1.4.0
82 | # via -r requirements-in.txt
83 | loguru==0.7.3
84 | # via -r requirements-in.txt
85 | lxml==5.3.1
86 | # via
87 | # -r requirements-in.txt
88 | # parsel
89 | # scrapy
90 | markdown==3.7
91 | # via -r requirements-in.txt
92 | packaging==24.2
93 | # via
94 | # parsel
95 | # scrapy
96 | parsel==1.10.0
97 | # via
98 | # itemloaders
99 | # scrapy
100 | pillow==11.1.0
101 | # via -r requirements-in.txt
102 | pillow-avif-plugin==1.5.2
103 | # via -r requirements-in.txt
104 | ply==3.11
105 | # via jsonpath-rw
106 | polib==1.2.0
107 | # via -r requirements-in.txt
108 | protego==0.4.0
109 | # via scrapy
110 | pyasn1==0.6.1
111 | # via
112 | # pyasn1-modules
113 | # service-identity
114 | pyasn1-modules==0.4.2
115 | # via service-identity
116 | pycparser==2.22
117 | # via cffi
118 | pydispatcher==2.0.7
119 | # via scrapy
120 | pyexecjs==1.5.1
121 | # via -r requirements-in.txt
122 | pyopenssl==25.0.0
123 | # via scrapy
124 | pyqt-fluent-widgets==1.7.6
125 | # via -r requirements-in.txt
126 | pyqt5==5.15.11
127 | # via
128 | # -r requirements-in.txt
129 | # pyqt-fluent-widgets
130 | # pyqtwebengine
131 | pyqt5-frameless-window==0.5.1
132 | # via pyqt-fluent-widgets
133 | pyqt5-qt5==5.15.2
134 | # via pyqt5
135 | pyqt5-sip==12.17.0
136 | # via
137 | # pyqt5
138 | # pyqtwebengine
139 | pyqtwebengine==5.15.7
140 | # via -r requirements-in.txt
141 | pyqtwebengine-qt5==5.15.2
142 | # via pyqtwebengine
143 | pywin32==310
144 | # via pyqt5-frameless-window
145 | pyyaml==6.0.2
146 | # via -r requirements-in.txt
147 | queuelib==1.7.0
148 | # via scrapy
149 | requests==2.32.3
150 | # via
151 | # requests-file
152 | # tldextract
153 | requests-file==2.1.0
154 | # via tldextract
155 | scrapy==2.12.0
156 | # via -r requirements-in.txt
157 | service-identity==24.2.0
158 | # via scrapy
159 | setuptools==80.7.1
160 | # via
161 | # -r requirements-in.txt
162 | # incremental
163 | # zope-interface
164 | six==1.17.0
165 | # via
166 | # jsonpath-rw
167 | # pyexecjs
168 | sniffio==1.3.1
169 | # via anyio
170 | tldextract==5.1.3
171 | # via scrapy
172 | tqdm==4.67.1
173 | # via -r requirements-in.txt
174 | twisted==24.11.0
175 | # via scrapy
176 | typing-extensions==4.13.0
177 | # via
178 | # anyio
179 | # pyopenssl
180 | # twisted
181 | urllib3==2.3.0
182 | # via requests
183 | uv==0.7.2
184 | # via -r requirements-in.txt
185 | w3lib==2.3.1
186 | # via
187 | # parsel
188 | # scrapy
189 | win32-setctime==1.2.0
190 | # via loguru
191 | zope-interface==7.2
192 | # via
193 | # scrapy
194 | # twisted
195 |
--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = ComicSpider.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = ComicSpider
12 |
--------------------------------------------------------------------------------
/utils/docs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import markdown
4 | from utils import ori_path
5 |
6 |
7 | with open(ori_path.joinpath('assets/github_format.html'), 'r', encoding='utf-8') as f:
8 | github_markdown_format = f.read()
9 |
10 |
11 | class MarkdownConverter:
12 | github_markdown_format = github_markdown_format
13 | md = markdown.Markdown(extensions=['markdown.extensions.md_in_html',
14 | 'markdown.extensions.tables', 'markdown.extensions.fenced_code', 'markdown.extensions.nl2br',
15 | 'markdown.extensions.admonition'],
16 | output_format='html5')
17 |
18 | @classmethod
19 | def convert_html(cls, md_content):
20 | html_body = cls.md.convert(md_content)
21 | full_html = cls.github_markdown_format.replace('{content}', html_body)
22 | return full_html
23 |
24 | @classmethod
25 | def transfer_markdown(cls, _in, _out):
26 | with open(_in, 'r', encoding='utf-8') as f:
27 | _md_content = f.read()
28 | _html = cls.convert_html(_md_content)
29 | with open(_out, 'w', encoding='utf-8') as f:
30 | f.write(_html)
31 |
32 |
33 | class MdHtml(str):
34 | def cdn_replace(self, author, repo, branch):
35 | return MdHtml(self.replace("raw.githubusercontent.com", "jsd.vxo.im/gh")
36 | .replace(f"{author}/{repo}/{branch}", f"{author}/{repo}@{branch}"))
37 |
38 | @property
39 | def details_formatter(self):
40 | # before MarkdownConverter.convert_html()
41 | return MdHtml(self.replace("", ''))
42 |
--------------------------------------------------------------------------------
/utils/preview/__init__.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | from lxml import etree
3 | from utils import ori_path, temp_p
4 | from utils.sql import SqlUtils
5 | from utils.website import Uuid
6 | from utils.preview.el import El
7 |
8 |
9 | class PreviewHtml:
10 | format_path = ori_path.joinpath("GUI/src/preview_format")
11 |
12 | def __init__(self, url=None, custom_style=None):
13 | self.contents = []
14 | self.el = El(custom_style)
15 | self.url = url
16 |
17 | def add(self, *args, **badges_kw):
18 | """badges_kw support: pages, likes, lang, btype"""
19 | self.contents.append(self.el.create(*args, **badges_kw))
20 |
21 | @property
22 | def created_temp_html(self):
23 | temp_p.mkdir(exist_ok=True)
24 | with open(self.format_path.joinpath("index.html"), 'r', encoding='utf-8') as f:
25 | format_text = f.read()
26 | _content = "\n".join(self.contents)
27 | if self.url:
28 | _content += f'\nfor check current page
检查当前页数
{self.url}
'
29 | html = format_text.replace("{body}", _content)
30 | tf = tempfile.NamedTemporaryFile(suffix=".html", delete=False, dir=temp_p)
31 | tf.write(bytes(html, 'utf-8'))
32 | f = str(tf.name)
33 | tf.close()
34 | return f
35 |
36 | @staticmethod
37 | def tip_duplication(spider, tf):
38 | handler = InfoHandler(spider, tf)
39 | infos = handler.get_infos()
40 | if not infos:
41 | print("tip_duplication got info None")
42 | return
43 | batch_md5 = handler.batch_md5(infos)
44 | sql_utils = SqlUtils()
45 | downloaded_md5 = sql_utils.batch_check_dupe(list(batch_md5.keys()))
46 | sql_utils.close()
47 |
48 | with open(tf, 'r+', encoding='utf-8') as fp:
49 | html_content = fp.read()
50 | for _md5 in downloaded_md5:
51 | info = batch_md5[_md5]
52 | html_content = html_content.replace(
53 | f'href="{info}"',
54 | f'href="{info}" class="downloaded"'
55 | )
56 | fp.seek(0)
57 | fp.truncate()
58 | fp.write(html_content)
59 |
60 |
61 | class InfoHandler:
62 | def __init__(self, spider, tf):
63 | self.spider = spider
64 | self.tf = tf
65 |
66 | def get_infos(self):
67 | with open(self.tf, 'r', encoding='utf-8') as file:
68 | html_content = file.read()
69 | html = etree.HTML(html_content)
70 | # titles = html.xpath('//div[@class="col-md-3"]//img/@title')
71 | urls = html.xpath('//div[contains(@class, "singal-task")]//a/@href')
72 | return urls
73 |
74 | def batch_md5(self, infos):
75 | # return {md5(title): title for title in titles}
76 | uuid_obj = Uuid(self.spider)
77 | _ = {uuid_obj.id_and_md5(info)[-1]: info for info in infos}
78 | return _
79 |
80 |
81 | class PreviewByClipHtml:
82 | format_path = ori_path.joinpath("GUI/src/preview_format")
83 |
84 | @classmethod
85 | def created_temp_html(cls, url_regex, match_num):
86 | with open(cls.format_path.joinpath("index_by_clip.html"), 'r', encoding='utf-8') as f:
87 | format_text = f.read()
88 | html = format_text.replace("{_url_regex}", url_regex).replace("{_match_num}", str(match_num))
89 | tf = tempfile.NamedTemporaryFile(suffix=".html", delete=False, dir=temp_p)
90 | tf.write(bytes(html, 'utf-8'))
91 | f = str(tf.name)
92 | tf.close()
93 | return f
94 |
--------------------------------------------------------------------------------
/utils/preview/el.py:
--------------------------------------------------------------------------------
1 | from utils import PresetHtmlEl
2 |
3 |
4 | class ElMinix:
5 | max_width = 170
6 |
7 | @classmethod
8 | def create(cls, idx, img_src, title, url, **badges_kw):
9 | title = PresetHtmlEl.sub(title)
10 | abbreviated_title = title[:18] + "..."
11 | badges = Badges(**badges_kw)
12 | return cls.create_(idx, img_src, title, abbreviated_title, url, badges)
13 |
14 | @classmethod
15 | def create_(cls, idx, img_src, title, abbreviated_title, url, badges):
16 | el = f""""""
26 | return el
27 |
28 |
29 | def El(custom_style) -> ElMinix:
30 | match custom_style:
31 | case _:
32 | return ElMinix
33 |
34 |
35 | class Badges:
36 | pages = 'p%s '
37 | likes = '♥️%s '
38 | lang = '%s '
39 | btype = '%s '
40 |
41 | def __init__(self, **badges_kw):
42 | self._content = []
43 | for attr, value in badges_kw.items():
44 | if value:
45 | self._content.append(getattr(self, attr) % value)
46 |
47 | def __str__(self):
48 | return r' '.join(self._content)
49 |
--------------------------------------------------------------------------------
/utils/redViewer_tools.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import pathlib
4 | import re
5 | import shutil
6 |
7 | from tqdm import tqdm
8 |
9 | from assets import res
10 |
11 | def combine_then_mv(root_dir, target_dir, order_book=None) -> list:
12 | expect_dir = ('web', 'web_handle', 'log', res.SPIDER.ERO_BOOK_FOLDER)
13 | p = pathlib.Path(root_dir)
14 | target_p = pathlib.Path(target_dir)
15 | done = []
16 | for order_dir in filter(lambda x: x.is_dir() and x.name not in expect_dir, p.iterdir()):
17 | for ordered_section in tqdm(order_dir.iterdir()):
18 | ___ = target_p.joinpath(f"{order_dir.name}_{ordered_section.name}")
19 | if ___.exists():
20 | shutil.rmtree(___)
21 | shutil.move(ordered_section, ___)
22 | shutil.rmtree(order_dir)
23 | done.append(order_dir.name)
24 | return done
25 |
26 |
27 | def restore(ori):
28 | p = pathlib.Path(ori)
29 | book_p = None
30 | for i in tqdm(p.iterdir()):
31 | book, section = i.name.split('_')
32 | if not p.parent.joinpath(book).exists():
33 | book_p = p.parent.joinpath(book)
34 | book_p.mkdir(exist_ok=True)
35 | shutil.move(i, book_p.joinpath(section))
36 |
37 |
38 | def show_max(record_file) -> str:
39 | sec_regex = re.compile(r'.*?(\d+\.?\d?)')
40 | format_regex = re.compile('<(del|save|remove)>')
41 | temp = {}
42 | with open(record_file, 'r', encoding='utf-8') as f:
43 | for line in f.readlines():
44 | book, section = format_regex.sub('', line.strip()).split('_')
45 | if book not in temp:
46 | temp[book] = []
47 | temp[book].append(section)
48 | for book, sections in temp.items():
49 | temp[book] = max(temp[book],
50 | key=lambda x: float(sec_regex.search(x).group(1)) if sec_regex.search(x) else 0)
51 | return temp
52 |
53 |
54 | if __name__ == '__main__':
55 | from utils import conf
56 |
57 | restore(conf.sv_path.joinpath("web"))
58 |
--------------------------------------------------------------------------------
/utils/script/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import json
4 | import typing as t
5 | import pathlib as p
6 | from functools import partial
7 |
8 | from redis import asyncio as aioredis
9 | from utils import Conf, ori_path
10 |
11 | conf = Conf(path=ori_path.joinpath("utils/script"))
12 | redis_conf: dict = conf.redis
13 |
14 |
15 | class AioRClient(aioredis.Redis):
16 | """
17 | conf of ./conf.yml
18 | ```yaml
19 | redis:
20 | host: 127.0.0.1
21 | port: 6379
22 | db: 0
23 | password:
24 | ```
25 | """
26 |
27 | def __init__(self):
28 | """preset redis conf of utils/script/conf.yml"""
29 | super(AioRClient, self).__init__(host=redis_conf['host'], port=redis_conf['port'], db=redis_conf['db'])
30 |
31 | async def hgetall(self, name):
32 | """already decode && json.loads"""
33 | result = await super(AioRClient, self).hgetall(name)
34 | try:
35 | return {key.decode(): json.loads(value) for key, value in result.items()}
36 | except (json.decoder.JSONDecodeError, TypeError):
37 | return {key.decode(): value.decode() for key, value in result.items()}
38 |
39 | async def hget(self, name, key):
40 | """already json.loads"""
41 | result = await super(AioRClient, self).hget(name, key)
42 | try:
43 | return json.loads(result)
44 | except (json.decoder.JSONDecodeError, TypeError):
45 | return result
46 |
47 | async def rpush(self, name, *values):
48 | _values = tuple(map(partial(json.dumps, ensure_ascii=False), values))
49 | return await super(AioRClient, self).rpush(name, *_values)
50 |
51 | async def lpop(self, name: str, count: t.Optional[int] = None) -> list:
52 | results = await super(AioRClient, self).lpop(name, count)
53 | if isinstance(results, str):
54 | results = [results]
55 | elif results is None:
56 | results = []
57 | return list(map(json.loads, results))
58 |
59 |
60 | class BlackList:
61 | def __init__(self, file: p.Path):
62 | self.file = file
63 |
64 | def read(self):
65 | if not self.file.exists():
66 | return []
67 | with open(self.file, 'r', encoding='utf-8') as f:
68 | _ = json.load(f)
69 | return _ or []
70 |
71 | def save(self, new_data):
72 | with open(self.file, 'w', encoding='utf-8') as f:
73 | json.dump(new_data, f, ensure_ascii=False)
74 |
--------------------------------------------------------------------------------
/utils/script/extra.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import httpx
4 | from utils import ori_path
5 |
6 |
7 | def get_one_extra():
8 | """
9 | jpr18.com
10 | dmkumh.com
11 | """
12 | import asyncio
13 | import aiofiles
14 | from lxml import etree
15 | import pathlib as p
16 | from tqdm.asyncio import tqdm
17 | from utils import conf
18 |
19 | name = "[野際かえで] おもちゃの人生3 [無修正] [sky110036漢化]"
20 | book_html = ori_path.joinpath(r"test/analyze/temp/temp.html")
21 | tar_path = p.Path(conf.sv_path).joinpath(r"本子\web", name)
22 |
23 | async def do(targets):
24 | async def pic_fetch(sess, url):
25 | resp = await sess.get(url)
26 | return resp.content
27 |
28 | async with httpx.AsyncClient() as sess:
29 | for page, url in tqdm(targets.items()):
30 | content = await pic_fetch(sess, url)
31 | async with aiofiles.open(tar_path.joinpath(f"第{page}页.jpg"), 'wb') as f:
32 | await f.write(content)
33 |
34 | tar_path.mkdir(exist_ok=True)
35 | with open(book_html, 'r', encoding='utf-8') as f:
36 | html = etree.HTML(f.read())
37 | divs = html.xpath("//div[contains(@class, 'rd-article-wr')]/div")
38 | targets = {div.xpath("./@data-index")[0]: div.xpath("./img/@data-original")[0]
39 | for div in divs}
40 | loop = asyncio.get_event_loop()
41 | loop.run_until_complete(do(targets))
42 |
43 |
44 | if __name__ == '__main__':
45 | get_one_extra()
46 |
--------------------------------------------------------------------------------
/utils/script/image/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
--------------------------------------------------------------------------------
/utils/script/image/expander.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import re
4 | from enum import Enum
5 | from typing import Dict, Optional
6 | import pandas as pd
7 |
8 |
9 | class Filter:
10 | def __init__(self, conf_filter: Optional[Dict[str, str]] = None) -> None:
11 | conf_filter = conf_filter or {}
12 | for regex_str in ("file",):
13 | regex_val = conf_filter.get(regex_str)
14 | if regex_val:
15 | _ = re.compile(regex_val)
16 | setattr(self, regex_str, lambda arg: bool(_.search(arg)))
17 | else:
18 | setattr(self, regex_str, lambda _: False)
19 | self.Artists = Artists(conf_filter.get("Artists", {}))
20 |
21 |
22 | class Artists:
23 | def __init__(self, title_filters: Optional[Dict[str, str]] = None):
24 | self._sanitize_re = re.compile(r'[|:<>?*"\\/]')
25 | self._filters = {}
26 | self.has_normal = False
27 |
28 | if title_filters:
29 | if 'normal' in title_filters:
30 | self.has_normal = True
31 | self._normal_pattern = re.compile(title_filters['normal'])
32 | for name, pattern in title_filters.items():
33 | self._filters[name] = re.compile(pattern)
34 |
35 | def base_process(self, posts):
36 | if self.has_normal:
37 | posts = list(filter(lambda p: not bool(self._normal_pattern.search(p['title'])), posts))
38 | for post in posts:
39 | post['title'] = self._sanitize_re.sub('-', post['title'])
40 | return posts
41 |
42 | def __getattr__(self, name: str):
43 | if name in self._filters:
44 | pattern = self._filters[name]
45 | return lambda posts: [p for p in self.base_process(posts) if not bool(pattern.search(p['title']))]
46 | raise AttributeError(f"unget attr: {name}")
47 |
48 | @staticmethod
49 | def keihh(posts):
50 | """patreon/user/
51 | naming hobby: title, title(v2), title(v3)...
52 | """
53 | df = pd.DataFrame(posts)
54 |
55 | df['BaseName'] = df['title'].str.replace(r'\s*\([vV]\d+\)', '', regex=True)
56 | df['Version'] = df['title'].apply(lambda x: re.search(r'\([vV](\d+)\)', x))
57 | df['Version'] = df['Version'].apply(lambda x: f"v{x.group(1)}" if x else 'v0')
58 |
59 | df['title'] = df['title'].str.replace(r'([|:<>?*"\\/])', '', regex=True)
60 |
61 | latest_versions = df.loc[df.groupby('BaseName')['Version'].idxmax()]
62 | dic_posts = latest_versions.drop(['BaseName','Version'], axis=1).to_dict('records')
63 | return dic_posts
64 |
65 |
66 | class ArtistsEnum(Enum):
67 | DaikiKase="273185"
68 |
--------------------------------------------------------------------------------
/utils/sql/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import sqlite3
4 |
5 | from utils import ori_path
6 | from variables import SPECIAL_WEBSITES
7 |
8 |
9 | class SqlUtils:
10 | db = ori_path.joinpath("record.db")
11 | init_flag = False
12 |
13 | def __init__(self):
14 | if not self.db.exists():
15 | self.init_flag = True
16 | self.conn = sqlite3.connect(self.db)
17 | self.cursor = self.conn.cursor()
18 | self.table = "identity_md5_table"
19 | if self.init_flag or not self.table_exists():
20 | self.create()
21 |
22 | def table_exists(self):
23 | self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (self.table,))
24 | return self.cursor.fetchone() is not None
25 |
26 | def create(self):
27 | sql = f'''CREATE TABLE IF NOT EXISTS `{self.table}` (
28 | `id` INTEGER PRIMARY KEY AUTOINCREMENT,
29 | `identity_md5` TEXT NOT NULL UNIQUE
30 | );'''
31 | self.cursor.execute(sql)
32 | self.conn.commit()
33 |
34 | def add(self, identity_md5):
35 | sql = f'''INSERT OR IGNORE INTO {self.table} (identity_md5) VALUES (?);'''
36 | self.cursor.execute(sql, (identity_md5,))
37 | self.conn.commit()
38 | return identity_md5
39 |
40 | def batch_check_dupe(self, identity_md5s):
41 | placeholders = ','.join('?' * len(identity_md5s))
42 | sql = f'''SELECT identity_md5 FROM {self.table} WHERE identity_md5 IN ({placeholders});'''
43 | self.cursor.execute(sql, identity_md5s)
44 | result = set(row[0] for row in self.cursor.fetchall())
45 | return result
46 |
47 | def check_dupe(self, identity_md5):
48 | sql = f'''SELECT EXISTS (SELECT 1 FROM {self.table} WHERE identity_md5 = ?);'''
49 | self.cursor.execute(sql, (identity_md5,))
50 | result = self.cursor.fetchone()[0]
51 | return bool(result)
52 |
53 | def close(self):
54 | self.cursor.close()
55 | self.conn.close()
56 | del self.conn
57 |
--------------------------------------------------------------------------------
/utils/website/core.py:
--------------------------------------------------------------------------------
1 | import re
2 | from datetime import datetime, timedelta
3 | import httpx
4 | from utils import temp_p
5 |
6 |
7 | class Cookies:
8 | @staticmethod
9 | def to_str_(cookie):
10 | return '; '.join([f"{k}={v}" for k, v in cookie.items()])
11 |
12 |
13 | class Req:
14 | book_hea = {}
15 |
16 | @classmethod
17 | def get_cli(cls, conf):
18 | if conf.proxies:
19 | return httpx.Client(
20 | headers=cls.book_hea,
21 | transport=httpx.HTTPTransport(proxy=f"http://{conf.proxies[0]}", retries=3))
22 | return httpx.Client(headers=cls.book_hea, trust_env=True)
23 |
24 | book_url_regex = ""
25 |
26 | @classmethod
27 | def parse_book(cls):
28 | ...
29 |
30 |
31 | class Utils:
32 | name = ""
33 | headers = {}
34 |
35 | @classmethod
36 | def get_uuid(cls, info):
37 | return f"{cls.name}-{info}"
38 |
39 |
40 | class EroUtils(Utils):
41 | forever_url = ""
42 | publish_url = ""
43 | status_forever = True
44 | status_publish = True
45 | uuid_regex = NotImplementedError
46 | publish_headers = {}
47 |
48 | @classmethod
49 | def by_forever(cls):
50 | if not cls.forever_url:
51 | return None
52 | try:
53 | resp = httpx.head(cls.forever_url, headers=cls.headers, follow_redirects=True)
54 | except httpx.ConnectError:
55 | cls.status_forever = False
56 | print(f"永久网址[{cls.forever_url}]失效了") # logger.warning()
57 | else:
58 | return re.search(r"https?://(.*)/?", str(resp.request.url)).group(1)
59 |
60 | @classmethod
61 | def by_publish(cls):
62 | if not cls.publish_url:
63 | return None
64 | with httpx.Client(headers=cls.publish_headers or cls.headers,
65 | transport=httpx.HTTPTransport(retries=5)) as cli:
66 | try:
67 | resp = cli.get(cls.publish_url)
68 | resp.raise_for_status()
69 | if str(resp.status_code).startswith('2'):
70 | return cls.parse_publish(resp.text)
71 | except httpx.HTTPError as e:
72 | ...
73 | cls.status_publish = False
74 | print(f"发布页获取[{cls.publish_url}]失效了") # logger.warning()
75 |
76 | @classmethod
77 | def get_domain(cls):
78 | domain_file = temp_p.joinpath(f"{cls.name}_domain.txt")
79 | current_time = datetime.now()
80 | if (domain_file.exists() and current_time - datetime.fromtimestamp(domain_file.stat().st_mtime) < timedelta(hours=48)):
81 | with open(domain_file, 'r', encoding='utf-8') as f:
82 | domain = f.read().strip()
83 | else:
84 | domain = cls.by_publish() or cls.by_forever() or None # 控制顺序,例如永久页长期没恢复就前置从发布页获取
85 | if not cls.status_forever and not cls.status_publish:
86 | raise ConnectionError(f"无法获取 {cls.name} domain,方法均失效了,需要查看")
87 | return domain
88 |
89 | @classmethod
90 | def parse_publish(cls, html):
91 | domain = cls.parse_publish_(html)
92 | with open(temp_p.joinpath(f"{cls.name}_domain.txt"), 'w', encoding='utf-8') as f:
93 | f.write(domain)
94 | return domain
95 |
96 | @classmethod
97 | def parse_publish_(cls, html):
98 | ...
99 |
100 | @classmethod
101 | def get_uuid(cls, info):
102 | if hasattr(cls, "uuid_regex"):
103 | _identity = cls.uuid_regex.search(info).group(1)
104 | else:
105 | _identity = info
106 | return f"{cls.name}-{_identity}"
107 |
108 |
109 | def retry(func, retry_limit, *args, retry_times=0, raise_error=False, **kwargs):
110 | try:
111 | return func(*args, **kwargs)
112 | except Exception as e:
113 | retry_times += 1
114 | if retry_times <= retry_limit:
115 | return retry(func, retry_limit, *args, retry_times=retry_times, raise_error=raise_error, **kwargs)
116 | if raise_error:
117 | raise e
118 |
119 | tag_regex = re.compile(r"汉化|漢化|粵化|DL版|修正|中国|翻訳|翻译|翻譯|中文|後編|前編|カラー化|個人|" +
120 | r"無修|重修|重嵌|机翻|機翻|整合|黑字|Chinese|Japanese|\[Digital]|vol|\[\d+]")
121 |
122 |
123 | def set_author_ahead(title: str) -> str:
124 | author_ = re.findall(r"\[.*?]", title)
125 | if bool(re.search(r"[((]", "".join(author_))): # 优先选标签内带括号
126 | author_ = list(filter(lambda x: bool(re.search(r"[((]", x)), author_))
127 | else: # 采用排除法筛选
128 | author_ = list(filter(lambda x: not bool(tag_regex.search(x)), author_))
129 | if len(author_) > 1:
130 | if len(set(author_)) == 1: # 去除重复标签
131 | author_ = [author_[0]]
132 | else:
133 | # logger.warning(f"匹配待改善 {author_=}")
134 | return title
135 | elif not author_:
136 | return title
137 | author = author_[0]
138 | return (author + title.replace(author, '').replace(" ", " ")).strip()
139 |
--------------------------------------------------------------------------------
/utils/website/hitomi/__init__.py:
--------------------------------------------------------------------------------
1 | import time
2 | import re
3 | import json
4 | import struct
5 |
6 | import httpx
7 |
8 | from assets import res
9 | from utils import conf
10 | from utils.website.core import EroUtils, Req
11 |
12 |
13 | class HitomiUtils(EroUtils, Req):
14 | name = "hitomi"
15 | index = "https://hitomi.la/"
16 | domain = r"ltn.gold-usergeneratedcontent.net"
17 | domain2 = r"gold-usergeneratedcontent.net"
18 | headers = {
19 | "accept": "*/*",
20 | "accept-language": res.Vars.ua_accept_language,
21 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0",
22 | "referer": index
23 | }
24 | book_hea = headers
25 | galleries_per_page = 100
26 | uuid_regex = re.compile(r"(\d+)\.(html|js)$")
27 | img_domain = r"w1.gold-usergeneratedcontent.net" # unsure its source or it's stable
28 |
29 | def __init__(self, conf):
30 | self.cli = self.get_cli(conf)
31 | self.gg = gg(cli=self.cli)
32 |
33 | @staticmethod
34 | def parse_nozomi(data):
35 | view = DataView(data)
36 | total = len(data) // 4
37 | return [view.get_int32(i * 4, little_endian=False) for i in range(total)]
38 |
39 | @staticmethod
40 | def parse_galleries(data_str):
41 | json_str = re.search(r"var galleryinfo = (\{.*\}$)", data_str).group(1)
42 | data = json.loads(json_str)
43 | return data
44 |
45 | def get_range(self, page):
46 | end_byte = self.galleries_per_page * int(page)
47 | return f"bytes={end_byte-self.galleries_per_page}-{end_byte-1}"
48 |
49 | def get_img_url(self, img_hash, hasavif=0):
50 | g = self.gg.s(img_hash)
51 | img_type = "avif" if hasavif else "webp"
52 | retval = f"{img_type[0]}{1 + int(self.gg.m((g)))}"
53 | return f"https://{retval}.{self.domain2}/{self.gg.b}{g}/{img_hash}.{img_type}"
54 |
55 | @classmethod
56 | def get_cli(cls, conf):
57 | if conf.proxies:
58 | return httpx.Client(http2=True,
59 | headers=cls.book_hea,
60 | transport=httpx.HTTPTransport(proxy=f"http://{conf.proxies[0]}", retries=3))
61 | return httpx.Client(headers=cls.book_hea, trust_env=True, http2=True)
62 |
63 | def test_index(self):
64 | try:
65 | resp = self.cli.head(f'https://{self.domain}/popular/week-all.nozomi',
66 | headers={**HitomiUtils.headers, "Range": self.get_range(1)},
67 | follow_redirects=True, timeout=3.5)
68 | resp.raise_for_status()
69 | except httpx.HTTPError as e:
70 | return False
71 | return True
72 |
73 |
74 | class gg:
75 | def __init__(self, cli=None, js_code=None):
76 | if not js_code:
77 | script_resp = cli.get(f"https://ltn.{HitomiUtils.domain2}/gg.js?_={int(time.time() * 1000)}")
78 | script_text = script_resp.text
79 | else:
80 | script_text = js_code
81 | self.m_cases = self._parse_m_cases(script_text)
82 | self.b = f"{self._parse_b(script_text)}/"
83 |
84 | def _parse_m_cases(self, js_code):
85 | pattern = r"case (\d+):"
86 | return set(map(int, re.findall(pattern, js_code)))
87 |
88 | def _parse_b(self, js_code):
89 | match = re.search(r"(\d{10})", js_code)
90 | return match.group(1)
91 |
92 | def m(self, g):
93 | return 0 if int(g) in self.m_cases else 1
94 |
95 | def s(self, h):
96 | matched = re.match(r"(..)(.)$", h[-3:])
97 | return str(int(matched.group(2) + matched.group(1), 16))
98 |
99 |
100 | class DataView:
101 | def __init__(self, array):
102 | self.array = array
103 |
104 | def __get_binary(self, start_index, byte_count):
105 | bytes_data = self.array[start_index:start_index + byte_count]
106 | return bytes(bytes_data)
107 |
108 | def get_int32(self, start_index, little_endian=False):
109 | binary = self.__get_binary(start_index, 4)
110 | fmt = 'i'
111 | return struct.unpack(fmt, binary)[0]
112 |
--------------------------------------------------------------------------------
/utils/website/hitomi/scape_dataset.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | import sqlite3
4 | from contextlib import closing
5 | import httpx
6 | from lxml import html
7 |
8 | from assets import res
9 | from utils import ori_path, temp_p, conf
10 |
11 |
12 | BASE_URL = "https://hitomi.la/all{category}-{letter}.html"
13 | CATEGORIES = ['tags', 'artists', 'series', 'characters']
14 | LETTERS = [*[chr(i) for i in range(97, 123)], '123']
15 | db_p = ori_path.joinpath('assets/hitomi.db')
16 | proxy = (conf.proxies or [None])[0]
17 | client = httpx.Client(http2=True,
18 | headers={
19 | "accept": "*/*",
20 | "accept-language": res.Vars.ua_accept_language,
21 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0",
22 | "referer": "https://hitomi.la/"
23 | }
24 | )
25 | if proxy:
26 | client.proxies = {'http://': 'http://{proxy}', 'https://': 'http://{proxy}'}
27 |
28 |
29 | def lstrip(text):
30 | if text.startswith("all"):
31 | return text[3:]
32 | return text
33 |
34 |
35 | class Db:
36 | data_tb = """
37 | CREATE TABLE IF NOT EXISTS `%s` (
38 | id INTEGER PRIMARY KEY AUTOINCREMENT,
39 | content TEXT NOT NULL UNIQUE,
40 | num INTEGER NOT NULL DEFAULT 1
41 | );
42 | """
43 | language_tb = """
44 | CREATE TABLE IF NOT EXISTS `language` (
45 | id INTEGER PRIMARY KEY AUTOINCREMENT,
46 | content TEXT NOT NULL UNIQUE
47 | );
48 | """
49 |
50 | @classmethod
51 | def create_tables(cls):
52 | with closing(sqlite3.connect(db_p)) as db_conn:
53 | cursor = db_conn.cursor()
54 | for category in CATEGORIES:
55 | for letter in LETTERS:
56 | table_name = f"all{lstrip(category)}-{letter}"
57 | cursor.execute(cls.data_tb % table_name)
58 | cursor.execute(cls.language_tb)
59 | db_conn.commit()
60 |
61 | @classmethod
62 | def recreate(cls, table_name):
63 | with closing(sqlite3.connect(db_p)) as db_conn:
64 | cursor = db_conn.cursor()
65 | cursor.execute(f"DROP TABLE IF EXISTS `{table_name}`")
66 | cursor.execute(cls.data_tb % table_name)
67 | db_conn.commit()
68 |
69 | regex = re.compile('.*/(.*?)-all')
70 | digit_regex = re.compile(r'\d+')
71 |
72 |
73 | def scrape_and_save():
74 | def scrape(category, letter):
75 | def get_content(_category, _letter):
76 | resp = client.get(BASE_URL.format(category=_category, letter=_letter), timeout=10)
77 | resp.raise_for_status()
78 | content = resp.content
79 | return content
80 | tree = html.fromstring(get_content(category, letter))
81 | lis = tree.xpath('//ul[@class="posts"]/li')
82 | items = [
83 | (regex.search(li.xpath('./a/@href')[0]).group(1),
84 | int(''.join(digit_regex.findall(li.xpath('.//text()')[-1]))))
85 | for li in lis
86 | ]
87 |
88 | table_name = f"all{category}-{letter}"
89 | if tb_rewrite_flag:
90 | Db.recreate(table_name)
91 | with closing(db_conn.cursor()) as cursor:
92 | cursor.executemany(
93 | f"INSERT OR IGNORE INTO `{table_name}` (content,num) VALUES (?,?)",
94 | [item for item in items if item[0].strip()]
95 | )
96 | db_conn.commit()
97 | print(f"[SUCCESS] {table_name} writen {cursor.rowcount} ")
98 | err = []
99 | init_err_f = temp_p.joinpath('hitomi_db_init_err.json')
100 | tb_rewrite_flag = False
101 | if init_err_f.exists():
102 | with open(init_err_f, 'r', encoding='utf-8') as f:
103 | tasks = json.load(f)
104 | tb_rewrite_flag = True
105 | if not tasks:
106 | tasks = [f'{category}-{letter}' for category in CATEGORIES for letter in LETTERS]
107 | with closing(sqlite3.connect(db_p)) as db_conn:
108 | for task in tasks:
109 | category, letter = task.split('-')
110 | category = lstrip(category)
111 | try:
112 | scrape(category, letter)
113 | except Exception as e:
114 | print(f"[ERROR] {task} {e}")
115 | err.append(f'{category}-{letter}')
116 | with open(init_err_f, 'w', encoding='utf-8') as f:
117 | json.dump(err, f, ensure_ascii=False, indent=4)
118 |
119 |
120 | def main():
121 | Db.create_tables()
122 | scrape_and_save()
123 |
--------------------------------------------------------------------------------
/variables/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | from assets import res
4 |
5 | SPIDERS = {
6 | 1: 'manga_copy', # 🇨🇳
7 | 2: 'jm', # 🇨🇳 🔞
8 | 3: 'wnacg', # 🇨🇳 🔞
9 | 4: 'ehentai', # 🌎 🔞
10 | 5: 'mangabz', # 🇨🇳
11 | 6: 'hitomi', # 🌎 🔞
12 | }
13 | SPECIAL_WEBSITES = ['wnacg', 'jm', 'ehentai', 'hitomi']
14 | SPECIAL_WEBSITES_IDXES = [2, 3, 4, 6]
15 | CN_PREVIEW_NEED_PROXIES_IDXES = [3, 4, 6]
16 |
17 | DEFAULT_COMPLETER = { # only take effect when init (mean value[completer] of conf.yml is null or not exist)
18 | 1: ['更新', '排名日', '排名周', '排名月', '排名总'],
19 | 2: ['C105', '更新周', '更新月', '点击周', '点击月', '评分周', '评分月', '评论周', '评论月', '收藏周', '收藏月'],
20 | 3: ['C105', '更新', '汉化'],
21 | 4: [res.EHentai.MAPPINGS_POPULAR, res.EHentai.MAPPINGS_INDEX, 'C105'],
22 | 5: ['更新', '人气'],
23 | 6: ['index-all', 'popular/week-all', 'popular/month-all']
24 | }
25 |
26 | STATUS_TIP = {
27 | 0: None,
28 | 1: f"manga_copy: {res.GUI.SearchInputStatusTip.manga_copy}",
29 | 2: f"jm: {res.GUI.SearchInputStatusTip.jm}",
30 | 3: f"wnacg: {res.GUI.SearchInputStatusTip.wnacg}",
31 | 4: f"ehentai: {res.GUI.SearchInputStatusTip.ehentai}",
32 | 5: f"mangabz: {res.GUI.SearchInputStatusTip.mangabz}",
33 | 6: f"hitomi: {res.GUI.SearchInputStatusTip.hitomi}"
34 | }
35 |
--------------------------------------------------------------------------------