├── src └── ifixit2zim │ ├── __init__.py │ ├── __about__.py │ ├── assets │ ├── kits.jpg │ ├── check1x.png │ ├── check2x.png │ ├── favicon.ico │ ├── loader.gif │ ├── spinner.gif │ ├── kits-small.jpg │ ├── 16px_11px_5.png │ ├── 32px_32px_5.png │ ├── NoImage_56x42.jpg │ ├── NoImage_96x72.jpg │ ├── checklarge1x.png │ ├── checklarge2x.png │ ├── checksmall1x.png │ ├── checksmall2x.png │ ├── document-add.png │ ├── helptabshadow.png │ ├── illustration.png │ ├── replace-large.png │ ├── replace-small.png │ ├── NoImage_300x225.jpg │ ├── helptaboverlay.png │ ├── moto_g5_plus_4.jpg │ ├── right-to-repair.jpg │ ├── tooltip_sprite.png │ ├── camera-large-add-2.png │ ├── camera-small-add-2.png │ ├── home │ │ ├── macbook-pro.jpg │ │ ├── AdQTqqV252aRMVdf.jpg │ │ ├── H4M1JEQiZabe4Vo2.jpg │ │ ├── IlacTC2EdoTKtKdC.jpg │ │ ├── JxaijlWopNJnHo2S.jpg │ │ ├── P1WJqUOlaXZYNVSD.jpg │ │ ├── RacpAWWRuobmX5g4.jpg │ │ ├── hKrLIluHRDXxUAit.jpg │ │ └── firsttimerepairing_banner-2.jpg │ ├── icomoon_20160111.eot │ ├── icomoon_20160111.ttf │ ├── media-upload-types.png │ ├── sprite_guide_edit3.png │ ├── transparency-50pxa.png │ ├── video-large-add-2.png │ ├── GuideNoImage_300x225.jpg │ ├── badge_icons_20110608.png │ ├── view-question-20091109.gif │ ├── S6uyw4BMUTPHjx4wXiWtFCc.woff2 │ ├── icomoon-gzipped_20210215.woff │ ├── S6u8w4BMUTPHjxsAXC-qNiXg7Q.woff2 │ ├── S6uyw4BMUTPHjxAwXiWtFCfQ7A.woff2 │ ├── S6u8w4BMUTPHjxsAUi-qNiXg7eU0.woff2 │ ├── S6u9w4BMUTPHh6UVSwiPGQ3q5d0.woff2 │ ├── S6u9w4BMUTPHh6UVSwaPGQ3q5d0N7w.woff2 │ ├── not_here.js │ ├── customZimHelpers-1.js │ ├── Shared-print-ej-m-RsicBzcpqbxdfzumQ.css │ ├── repair-score-neutral.svg │ ├── repair-score-good.svg │ ├── repair-score-bad.svg │ ├── Shared-cart_banner-33Ctp6kCy0R-IiTsFeV6cw.css │ ├── css2.css │ ├── area_index-BDTBciD-Y7NVVjoPQBUyhA.css │ ├── Shared-attachment_link-AoWbgS-g65jo1DYOaHV5XA.css │ ├── FrameModules-translation_credit-qBLVoFL8fSDpJmDUuduPTA.css │ ├── Shared-i18n_formatting-7XRaMqur0Z-hJvP-W8sS2A.css │ ├── Wiki-topic-r_spN9srKqcGQAC8emdeTA.css │ ├── core-primitives-F5WnAWhrwpl7oCtqtgogQQ.css │ ├── prosemirror-all-_OBJ3KkZRD0uygPKzpMb8Q.css │ ├── release-version-orbcTfqm6_JKsoz-PPnHGA.css │ └── view_profile-LAZ9O7S0EMQ9_BZEO-F8OQ.css │ ├── __main__.py │ ├── exceptions.py │ ├── context.py │ ├── templates │ ├── guide-comment.html │ ├── guide-step-lines-container.html │ ├── info.html │ ├── guide-comments.html │ ├── external_content.html │ ├── user.html │ ├── base.html │ ├── not_here.html │ └── home.html │ ├── shared.py │ ├── scraper_info.py │ ├── executor.py │ ├── scraper_category.py │ ├── scraper_user.py │ ├── scraper_generic.py │ ├── utils.py │ ├── imager.py │ ├── entrypoint.py │ └── scraper_guide.py ├── tests └── test_basic.py ├── .vscode └── settings.json ├── .github ├── stale.yml ├── FUNDING.yml └── workflows │ ├── PublishDockerDevImage.yaml │ ├── QA.yaml │ ├── Publish.yaml │ ├── update-zim-offliner-definition.yaml │ └── Tests.yaml ├── .pre-commit-config.yaml ├── Dockerfile ├── CHANGELOG.md ├── tasks.py ├── README.md ├── .gitignore ├── offliner-definition.json └── pyproject.toml /src/ifixit2zim/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ifixit2zim/__about__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.4.0-dev0" 2 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/kits.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/kits.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/check1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/check1x.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/check2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/check2x.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/favicon.ico -------------------------------------------------------------------------------- /src/ifixit2zim/assets/loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/loader.gif -------------------------------------------------------------------------------- /src/ifixit2zim/assets/spinner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/spinner.gif -------------------------------------------------------------------------------- /src/ifixit2zim/__main__.py: -------------------------------------------------------------------------------- 1 | from ifixit2zim.entrypoint import main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/kits-small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/kits-small.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/16px_11px_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/16px_11px_5.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/32px_32px_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/32px_32px_5.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/NoImage_56x42.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/NoImage_56x42.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/NoImage_96x72.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/NoImage_96x72.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/checklarge1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/checklarge1x.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/checklarge2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/checklarge2x.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/checksmall1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/checksmall1x.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/checksmall2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/checksmall2x.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/document-add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/document-add.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/helptabshadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/helptabshadow.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/illustration.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/replace-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/replace-large.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/replace-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/replace-small.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/NoImage_300x225.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/NoImage_300x225.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/helptaboverlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/helptaboverlay.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/moto_g5_plus_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/moto_g5_plus_4.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/right-to-repair.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/right-to-repair.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/tooltip_sprite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/tooltip_sprite.png -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | from ifixit2zim.__about__ import __version__ 2 | 3 | 4 | def test_version(): 5 | assert __version__ 6 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/camera-large-add-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/camera-large-add-2.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/camera-small-add-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/camera-small-add-2.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/macbook-pro.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/macbook-pro.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/icomoon_20160111.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/icomoon_20160111.eot -------------------------------------------------------------------------------- /src/ifixit2zim/assets/icomoon_20160111.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/icomoon_20160111.ttf -------------------------------------------------------------------------------- /src/ifixit2zim/assets/media-upload-types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/media-upload-types.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/sprite_guide_edit3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/sprite_guide_edit3.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/transparency-50pxa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/transparency-50pxa.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/video-large-add-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/video-large-add-2.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/GuideNoImage_300x225.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/GuideNoImage_300x225.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/badge_icons_20110608.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/badge_icons_20110608.png -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/AdQTqqV252aRMVdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/AdQTqqV252aRMVdf.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/H4M1JEQiZabe4Vo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/H4M1JEQiZabe4Vo2.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/IlacTC2EdoTKtKdC.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/IlacTC2EdoTKtKdC.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/JxaijlWopNJnHo2S.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/JxaijlWopNJnHo2S.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/P1WJqUOlaXZYNVSD.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/P1WJqUOlaXZYNVSD.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/RacpAWWRuobmX5g4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/RacpAWWRuobmX5g4.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/hKrLIluHRDXxUAit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/hKrLIluHRDXxUAit.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/assets/view-question-20091109.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/view-question-20091109.gif -------------------------------------------------------------------------------- /src/ifixit2zim/assets/S6uyw4BMUTPHjx4wXiWtFCc.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/S6uyw4BMUTPHjx4wXiWtFCc.woff2 -------------------------------------------------------------------------------- /src/ifixit2zim/assets/icomoon-gzipped_20210215.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/icomoon-gzipped_20210215.woff -------------------------------------------------------------------------------- /src/ifixit2zim/assets/S6u8w4BMUTPHjxsAXC-qNiXg7Q.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/S6u8w4BMUTPHjxsAXC-qNiXg7Q.woff2 -------------------------------------------------------------------------------- /src/ifixit2zim/assets/S6uyw4BMUTPHjxAwXiWtFCfQ7A.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/S6uyw4BMUTPHjxAwXiWtFCfQ7A.woff2 -------------------------------------------------------------------------------- /src/ifixit2zim/assets/S6u8w4BMUTPHjxsAUi-qNiXg7eU0.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/S6u8w4BMUTPHjxsAUi-qNiXg7eU0.woff2 -------------------------------------------------------------------------------- /src/ifixit2zim/assets/S6u9w4BMUTPHh6UVSwiPGQ3q5d0.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/S6u9w4BMUTPHh6UVSwiPGQ3q5d0.woff2 -------------------------------------------------------------------------------- /src/ifixit2zim/assets/S6u9w4BMUTPHh6UVSwaPGQ3q5d0N7w.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/S6u9w4BMUTPHh6UVSwaPGQ3q5d0N7w.woff2 -------------------------------------------------------------------------------- /src/ifixit2zim/assets/home/firsttimerepairing_banner-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openzim/ifixit/HEAD/src/ifixit2zim/assets/home/firsttimerepairing_banner-2.jpg -------------------------------------------------------------------------------- /src/ifixit2zim/exceptions.py: -------------------------------------------------------------------------------- 1 | class FinalScrapingFailureError(Exception): 2 | pass 3 | 4 | 5 | class UnexpectedDataKindExceptionError(Exception): 6 | pass 7 | 8 | 9 | class CategoryHomePageContentError(Exception): 10 | pass 11 | 12 | 13 | class ImageUrlNotFoundError(Exception): 14 | pass 15 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": "ms-python.black-formatter", 4 | "editor.formatOnSave": true, 5 | "editor.codeActionsOnSave": { 6 | "source.organizeImports": "explicit" 7 | }, 8 | }, 9 | "python.analysis.typeCheckingMode": "basic", 10 | "editor.rulers": [88], 11 | } -------------------------------------------------------------------------------- /src/ifixit2zim/assets/not_here.js: -------------------------------------------------------------------------------- 1 | function getUrlVars() { 2 | var vars = {}; 3 | window.location.href.replace(/[?&]+([^=&]+)=([^&]*)/gi, function(m,key,value) { 4 | vars[key] = value; 5 | }); 6 | return vars; 7 | } 8 | 9 | document.addEventListener("DOMContentLoaded", function(){ 10 | elink = document.getElementById("my_link"); 11 | targetUrl = decodeURIComponent(getUrlVars()["url"]) 12 | elink.href = targetUrl; 13 | elink.textContent = targetUrl; 14 | }); 15 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | daysUntilClose: false 2 | staleLabel: stale 3 | 4 | issues: 5 | daysUntilStale: 60 6 | markComment: > 7 | This issue has been automatically marked as stale because it has not had 8 | recent activity. It will be now be reviewed manually. Thank you 9 | for your contributions. 10 | pulls: 11 | daysUntilStale: 7 12 | markComment: > 13 | This pull request has been automatically marked as stale because it has not had 14 | recent activity. It will be now be reviewed manually. Thank you 15 | for your contributions. 16 | -------------------------------------------------------------------------------- /src/ifixit2zim/context.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from dataclasses import dataclass 3 | from typing import Any 4 | 5 | from jinja2 import Environment 6 | from zimscraperlib.zim.creator import Creator 7 | 8 | from ifixit2zim.processor import Processor 9 | from ifixit2zim.scraper import Configuration 10 | from ifixit2zim.utils import Utils 11 | 12 | 13 | @dataclass 14 | class Context: 15 | lock: threading.Lock 16 | configuration: Configuration 17 | creator: Creator 18 | utils: Utils 19 | metadata: dict[str, Any] 20 | env: Environment 21 | processor: Processor 22 | -------------------------------------------------------------------------------- /src/ifixit2zim/templates/guide-comment.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | {{curcomment['text_rendered'] | cleanup_rendered_content(rel_prefix) | safe}} 4 |
5 |

6 | {{ curcomment['author'] | get_user_display_name }} - 7 | 8 | 9 | 10 |

11 |
-------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: kiwix # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # https://kiwix.org/support-us/ 13 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/customZimHelpers-1.js: -------------------------------------------------------------------------------- 1 | function switchStepImage(element, mainImgId, className, divThubId) { 2 | document.getElementById(mainImgId).src = element.src; 3 | var myElements = document.getElementsByClassName(className); 4 | for (var counter = 0; counter < myElements.length; counter++) { 5 | myElements[counter].classList.remove("active"); 6 | } 7 | document.getElementById(divThubId).classList.add("active"); 8 | } 9 | 10 | function switchCommentsVisibility(stepCommentsId) { 11 | elem = document.getElementById(stepCommentsId); 12 | if (elem.classList.contains("hide-comments")) { 13 | elem.classList.remove("hide-comments"); 14 | } else { 15 | elem.classList.add("hide-comments"); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/Shared-print-ej-m-RsicBzcpqbxdfzumQ.css: -------------------------------------------------------------------------------- 1 | #mainHeader,footer{display:none!important}#background{background-color:#fff!important}#background #mainBody{box-shadow:none}#background #banner-wrap-bg{display:none}#background .banner-content{background-image:none;background-color:#fff}#background .banner-blurb,#background .banner-title{color:#212426;text-shadow:none}#background #bodyTop,#background #dozukiSidebarAd,#background #guideBottom,#background .addNote,#background .buttonLink,#background .noteToggle,#background .socialMediaLinks,#background .stepTitle .anchor,#background .stepTitle .edit,#background .stepViewOptions,#background .wikiDetailsOther,#background .wikiDetailsResources{display:none}#background .step{page-break-inside:avoid} -------------------------------------------------------------------------------- /.github/workflows/PublishDockerDevImage.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Docker dev image 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-22.04 11 | 12 | steps: 13 | - uses: actions/checkout@v3 14 | 15 | - name: Build and push Docker image 16 | uses: openzim/docker-publish-action@v10 17 | with: 18 | image-name: openzim/ifixit 19 | manual-tag: dev 20 | latest-on-tag: false 21 | restrict-to: openzim/ifixit 22 | registries: ghcr.io 23 | credentials: 24 | GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }} 25 | GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }} 26 | repo_description: auto 27 | repo_overview: auto 28 | -------------------------------------------------------------------------------- /.github/workflows/QA.yaml: -------------------------------------------------------------------------------- 1 | name: QA 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | check-qa: 11 | runs-on: ubuntu-22.04 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version-file: pyproject.toml 20 | architecture: x64 21 | 22 | - name: Install dependencies (and project) 23 | run: | 24 | pip install -U pip 25 | pip install -e .[lint,scripts,test,check] 26 | 27 | - name: Check black formatting 28 | run: inv lint-black 29 | 30 | - name: Check ruff 31 | run: inv lint-ruff 32 | 33 | - name: Check pyright 34 | run: inv check-pyright 35 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.4.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - repo: https://github.com/psf/black 10 | rev: "24.2.0" 11 | hooks: 12 | - id: black 13 | - repo: https://github.com/astral-sh/ruff-pre-commit 14 | rev: v0.3.0 15 | hooks: 16 | - id: ruff 17 | - repo: https://github.com/RobertCraigie/pyright-python 18 | rev: v1.1.352 19 | hooks: 20 | - id: pyright 21 | name: pyright (system) 22 | description: 'pyright static type checker' 23 | entry: pyright 24 | language: system 25 | 'types_or': [python, pyi] 26 | require_serial: true 27 | minimum_pre_commit_version: '2.9.2' 28 | -------------------------------------------------------------------------------- /src/ifixit2zim/shared.py: -------------------------------------------------------------------------------- 1 | import locale 2 | import logging 3 | import threading 4 | from contextlib import contextmanager 5 | 6 | from zimscraperlib.logging import getLogger as lib_getLogger 7 | 8 | from ifixit2zim.constants import NAME 9 | 10 | logger = lib_getLogger( 11 | NAME, 12 | level=logging.INFO, 13 | log_format="[%(threadName)s::%(asctime)s] %(levelname)s:%(message)s", 14 | ) 15 | 16 | 17 | def set_debug(value): 18 | level = logging.DEBUG if value else logging.INFO 19 | logger.setLevel(level) 20 | for handler in logger.handlers: 21 | handler.setLevel(level) 22 | 23 | 24 | LOCALE_LOCK = threading.Lock() 25 | 26 | 27 | @contextmanager 28 | def setlocale(name): 29 | with LOCALE_LOCK: 30 | saved = locale.setlocale(locale.LC_ALL) 31 | try: 32 | yield locale.setlocale(locale.LC_ALL, name) 33 | finally: 34 | locale.setlocale(locale.LC_ALL, saved) 35 | -------------------------------------------------------------------------------- /src/ifixit2zim/templates/guide-step-lines-container.html: -------------------------------------------------------------------------------- 1 |
2 | 23 |
-------------------------------------------------------------------------------- /.github/workflows/Publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish released version 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-22.04 10 | permissions: 11 | id-token: write # mandatory for PyPI trusted publishing 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version-file: pyproject.toml 20 | architecture: x64 21 | 22 | - name: Build packages 23 | run: | 24 | pip install -U pip build 25 | python -m build --sdist --wheel 26 | 27 | - name: Upload to PyPI 28 | uses: pypa/gh-action-pypi-publish@release/v1.8 29 | 30 | - name: Build and push Docker image 31 | uses: openzim/docker-publish-action@v10 32 | with: 33 | image-name: openzim/ifixit 34 | tag-pattern: /^v([0-9.]+)$/ 35 | latest-on-tag: true 36 | restrict-to: openzim/ifixit 37 | registries: ghcr.io 38 | credentials: 39 | GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }} 40 | GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }} 41 | repo_description: auto 42 | repo_overview: auto 43 | -------------------------------------------------------------------------------- /src/ifixit2zim/templates/info.html: -------------------------------------------------------------------------------- 1 | {% set rel_prefix="../" %}{% set bodyFullWidth = True %}{% extends "base.html" %} {% block title %}{{info_wiki['title']}}{% endblock title%} 2 | {% block specific_head %} 3 | 4 | 5 | {% endblock specific_head%} {% block content %} 6 |
7 |
8 |
9 | 10 |
11 |
12 |
13 |

{{info_wiki['display_title']}}

14 |

{{info_wiki['description']}}

15 |
16 |
17 | {{info_wiki['contents_rendered'] | cleanup_rendered_content(rel_prefix) | safe}} 18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | 27 | 28 |
29 |
30 | 31 |
32 | 33 | {% endblock content%} -------------------------------------------------------------------------------- /.github/workflows/update-zim-offliner-definition.yaml: -------------------------------------------------------------------------------- 1 | name: Update ZIMFarm Definitions 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - "offliner-definition.json" 8 | release: 9 | types: [published] 10 | 11 | workflow_dispatch: 12 | inputs: 13 | version: 14 | description: "Version to publish" 15 | required: false 16 | default: "dev" 17 | 18 | jobs: 19 | prepare-json: 20 | runs-on: ubuntu-24.04 21 | outputs: 22 | offliner_definition_b64: ${{ steps.read-json.outputs.offliner_definition_b64 }} 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v4 26 | with: 27 | fetch-depth: 0 28 | 29 | - id: read-json 30 | run: | 31 | if [ ! -f "offliner-definition.json" ]; then 32 | echo "File not found!" >&2 33 | exit 1 34 | fi 35 | json_b64=$(base64 -w0 <<< "$(jq -c . offliner-definition.json)") 36 | echo "offliner_definition_b64=$json_b64" >> $GITHUB_OUTPUT 37 | call-workflow: 38 | needs: prepare-json 39 | uses: openzim/overview/.github/workflows/update-zimfarm-offliner-definition.yaml@main 40 | with: 41 | version: ${{ github.event_name == 'release' && github.event.release.tag_name || (github.event.inputs.version || 'dev') }} 42 | offliner: ifixit 43 | offliner_definition_b64: ${{ needs.prepare-json.outputs.offliner_definition_b64 }} 44 | secrets: 45 | zimfarm_ci_secret: ${{ secrets.ZIMFARM_CI_SECRET }} 46 | 47 | -------------------------------------------------------------------------------- /src/ifixit2zim/templates/guide-comments.html: -------------------------------------------------------------------------------- 1 | {% for comment in curitem['comments'] %} 2 | {% if loop.index == 4 %} 3 |
4 | 7 |
8 | 9 |
10 | {% endif %} 11 |
12 | 15 | 17 | 18 | 19 |
20 | {% set curcomment=comment %} 21 | {% include 'guide-comment.html' %} 22 | {% for reply in comment['replies'] %} 23 |
24 | {% set curcomment=reply %} 25 | {% include 'guide-comment.html' %} 26 |
27 | {% endfor %} 28 |
29 |
30 | {% endfor %} -------------------------------------------------------------------------------- /.github/workflows/Tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | run-tests: 11 | runs-on: ubuntu-22.04 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version-file: pyproject.toml 20 | architecture: x64 21 | 22 | - name: Install dependencies (and project) 23 | run: | 24 | pip install -U pip 25 | pip install -e .[test,scripts] 26 | 27 | - name: Run the tests 28 | run: inv coverage --args "-vvv" 29 | 30 | - name: Upload coverage report to codecov 31 | uses: codecov/codecov-action@v3 32 | with: 33 | token: ${{ secrets.CODECOV_TOKEN }} 34 | 35 | build_python: 36 | runs-on: ubuntu-22.04 37 | steps: 38 | - uses: actions/checkout@v3 39 | 40 | - name: Set up Python 41 | uses: actions/setup-python@v4 42 | with: 43 | python-version-file: pyproject.toml 44 | architecture: x64 45 | 46 | - name: Ensure we can build Python targets 47 | run: | 48 | pip install -U pip build 49 | python3 -m build --sdist --wheel 50 | 51 | build_docker: 52 | runs-on: ubuntu-22.04 53 | steps: 54 | - uses: actions/checkout@v3 55 | 56 | - name: Ensure we can build the Docker image 57 | run: | 58 | docker build -t testimage . 59 | 60 | - name: Ensure we can start the Docker image 61 | run: | 62 | docker run --rm testimage 63 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim-bookworm 2 | LABEL org.opencontainers.image.source https://github.com/openzim/ifixit 3 | 4 | # Install necessary packages 5 | # TODO: do we really need all these packages? 6 | RUN apt-get update \ 7 | && apt-get install -y --no-install-recommends \ 8 | locales \ 9 | locales-all \ 10 | libmagic1 \ 11 | wget \ 12 | ffmpeg \ 13 | libtiff5-dev \ 14 | libjpeg-dev \ 15 | libopenjp2-7-dev \ 16 | zlib1g-dev \ 17 | libfreetype6-dev \ 18 | liblcms2-dev \ 19 | libwebp-dev \ 20 | tcl8.6-dev \ 21 | tk8.6-dev \ 22 | python3-tk \ 23 | libharfbuzz-dev \ 24 | libfribidi-dev \ 25 | libxcb1-dev \ 26 | gifsicle \ 27 | curl \ 28 | unzip \ 29 | && rm -rf /var/lib/apt/lists/* \ 30 | && python -m pip install --no-cache-dir -U \ 31 | pip 32 | 33 | # setup timezone and locale 34 | ENV TZ "UTC" 35 | RUN echo "UTC" > /etc/timezone \ 36 | && sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ 37 | && sed -i '/en_GB ISO-8859-1/s/^# //g' /etc/locale.gen \ 38 | && locale-gen 39 | ENV LANG en_US.UTF-8 40 | ENV LANGUAGE en_US:en 41 | ENV LC_ALL en_US.UTF-8 42 | 43 | # Copy pyproject.toml and its dependencies 44 | COPY pyproject.toml README.md /src/ 45 | COPY src/ifixit2zim/__about__.py /src/src/ifixit2zim/__about__.py 46 | 47 | # Install Python dependencies 48 | RUN pip install --no-cache-dir /src 49 | 50 | # Copy code + associated artifacts 51 | COPY src /src/src 52 | COPY *.md /src/ 53 | 54 | # Install + cleanup 55 | RUN pip install --no-cache-dir /src \ 56 | && rm -rf /src \ 57 | && mkdir -p /output 58 | 59 | CMD ["ifixit2zim", "--help"] 60 | -------------------------------------------------------------------------------- /src/ifixit2zim/templates/external_content.html: -------------------------------------------------------------------------------- 1 | {% set rel_prefix="../" %}{% set bodyFullWidth = True %}{% extends "base.html" %} {% block title %}External content{% endblock title%} 2 | {% block specific_head %} 3 | 4 | 5 | {% endblock specific_head%} {% block content %} 6 | 7 |
8 |
9 |
10 | 11 | 12 |
13 |
14 |
15 | 16 |

17 | External content - be carefull

18 | 19 |

This link is targeting an EXTERNAL content, not available for offline browsing 20 |

21 | 22 |
23 |
24 |

If you want/can, you might go there by clicking on the following link: 25 |

26 |
27 | 28 |
29 | 30 |
31 |
32 |
33 |
34 | 35 | 36 |
37 | 38 |
39 |
40 |
41 | {% endblock content%} -------------------------------------------------------------------------------- /src/ifixit2zim/assets/repair-score-neutral.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | repair-score-neutral 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/repair-score-good.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | repair-score-good 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/repair-score-bad.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | repair-score-bad 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/Shared-cart_banner-33Ctp6kCy0R-IiTsFeV6cw.css: -------------------------------------------------------------------------------- 1 | .cart-banner{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-direction:row;-webkit-flex-direction:row;flex-direction:row;-ms-justify-content:flex-start;-webkit-justify-content:flex-start;justify-content:flex-start;-ms-flex-pack:start;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;width:100%;background-size:270px;background-repeat:no-repeat;image-rendering:-webkit-optimize-contrast}.cart-banner .cart-banner-text{font-size:16px}.cart-banner .cart-banner-text strong{font-weight:700!important}.cart-banner .browse-store-button{margin-left:50px;max-width:200px;color:#212426;font-size:14px;border:1px solid #e9eaeb;padding:10px 20px;font-weight:400}.cart-banner .browse-store-button:hover{background:linear-gradient(to right,#f5f5f6,rgba(246,246,246,.4))}.cart-banner.location-in-cart{-ms-justify-content:flex-end;-webkit-justify-content:flex-end;justify-content:flex-end;-ms-flex-pack:end;height:75px;border-top:1px solid #e9eaeb;background-position:left -100px top -40px}@media only screen and (min-width:1001px),only screen and (min-width:600px) and (max-width:1000px){.cart-banner.location-in-cart p{top:25px}}.cart-banner.location-in-cart p.cart-banner-text{padding-left:150px}@media only screen and (max-width:599px){.cart-banner.location-in-cart p.cart-banner-text{padding-left:130px}}@media only screen and (max-width:599px){.cart-banner.location-in-cart{height:120px}}.cart-banner.location-product,.cart-banner.location-store{margin-top:20px;height:100px;background-size:400px;background-position:right -135px top -70px;border:1px solid #e9eaeb;border-radius:5px}.cart-banner.location-product .browse-store-button,.cart-banner.location-store .browse-store-button{display:none}.cart-banner.location-product p,.cart-banner.location-store p{margin-left:30px;margin-right:200px;min-width:175px}@media only screen and (max-width:599px){.cart-banner.location-product,.cart-banner.location-store{height:120px;margin-bottom:10px;background-size:300px;background-position:right -100px top -35px}.cart-banner.location-product p,.cart-banner.location-store p{margin-left:15px}}@media only screen and (min-width:600px) and (max-width:1000px){.cart-banner.location-product p,.cart-banner.location-store p{max-width:300px}}@media only screen and (min-width:1001px){.cart-banner.location-store{margin-top:35px;margin-bottom:35px;background-position:right -35px top -70px}.cart-banner.location-store p{font-size:21px;max-width:500px}}@media only screen and (min-width:600px) and (max-width:1000px){.cart-banner.location-store{margin-left:20px;width:calc(100% - 40px)}} -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ### Fixed 11 | 12 | - Add retries to avoid 429 too many requests errors (#109) 13 | - Fix ZIM Title still not ok 14 | - Fix crash when using the stats report (#100) 15 | 16 | ## [0.3.0] - 2024-03-02 17 | 18 | ### Changed 19 | 20 | - Migrate to Python 3.12 21 | - Upgrade Python dependencies, including zimscraperlib 3.3.1 (#92) 22 | - Adopt Python bootstrap conventions 23 | - ZIM Title is not sourced from online website anymore to match 30 chars limit 24 | - Description and Long Description are set and match openZIM convention 25 | - User pages are not part of search / suggestion results anymore (#85) 26 | 27 | ### Fixed 28 | 29 | - iFixit API is returning "null" when listing category (#93) 30 | 31 | ## [0.2.4] - 2023-01-05 32 | 33 | ### Fixed 34 | 35 | - Adapt to changes in upstream iFixit main page HTML content 36 | 37 | ## [0.2.3] - 2022-10-20 38 | 39 | ### Fixed 40 | 41 | - Do not process unrecognized href, i.e. pointing outside iFixit 42 | 43 | ## [0.2.2] - 2022-10-04 44 | 45 | ### Fixed 46 | 47 | - Fixed URL normalization on articles redirecting outside domain (help.ifixit.com) 48 | 49 | ## [0.2.1] - 2022-06-02 50 | 51 | ### Fixed 52 | 53 | - Report more clearly in the log when no ZIM is produced on-purpose + produce the ZIM even if some error occured 54 | - Remove unused log about number of images scrapped 55 | - Fix issue with unquoted normalized URLs before regex matching 56 | - Some users have changed their username 57 | - Some users have a quote in their username 58 | - Ignore irelevant info pages 59 | - Some users do not have a username 60 | - URLs of missing items are not encoded properly 61 | - Issues with the "Load more comments" button in guides 62 | 63 | ## [0.2.0] - 2022-05-04 64 | 65 | ### Added 66 | 67 | - Render tools and parts on guides / categories 68 | - Render comments on guides 69 | - Scrape user pages (only the ones linked as an author or in a comment) 70 | - Use a nice looking URL scheme (instead of the previous technical one) 71 | - Report about scraper progression (usefull for ZimFarm monitoring) 72 | - Add a nice page for missing / error items to avoid dead links 73 | - Add a nice looking page for external URLs 74 | - Handle URL-encoded category titles found in links 75 | - Handle unapproved category translations 76 | - Handle most unscrapped iFixit URLs appropriately (redirect to a nice page) 77 | - Detect duplicate images and replace them with a redirect 78 | - Documentation for PyPi installation 79 | 80 | ### Fixed 81 | - Fix issue about items being scrapped twice due to int / str difference 82 | - Fix issue about ANCHOR links 83 | 84 | ## [0.1.0] - 2022-04-17 85 | 86 | ### Added 87 | 88 | - initial version 89 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/css2.css: -------------------------------------------------------------------------------- 1 | /* latin-ext */ 2 | @font-face { 3 | font-family: 'Lato'; 4 | font-style: italic; 5 | font-weight: 400; 6 | font-display: optional; 7 | src: url(../assets/S6u8w4BMUTPHjxsAUi-qNiXg7eU0.woff2) format('woff2'); 8 | unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; 9 | } 10 | /* latin */ 11 | @font-face { 12 | font-family: 'Lato'; 13 | font-style: italic; 14 | font-weight: 400; 15 | font-display: optional; 16 | src: url(../assets/S6u8w4BMUTPHjxsAXC-qNiXg7Q.woff2) format('woff2'); 17 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; 18 | } 19 | /* latin-ext */ 20 | @font-face { 21 | font-family: 'Lato'; 22 | font-style: normal; 23 | font-weight: 400; 24 | font-display: optional; 25 | src: url(../assets/S6uyw4BMUTPHjxAwXiWtFCfQ7A.woff2) format('woff2'); 26 | unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; 27 | } 28 | /* latin */ 29 | @font-face { 30 | font-family: 'Lato'; 31 | font-style: normal; 32 | font-weight: 400; 33 | font-display: optional; 34 | src: url(../assets/S6uyw4BMUTPHjx4wXiWtFCc.woff2) format('woff2'); 35 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; 36 | } 37 | /* latin-ext */ 38 | @font-face { 39 | font-family: 'Lato'; 40 | font-style: normal; 41 | font-weight: 700; 42 | font-display: optional; 43 | src: url(../assets/S6u9w4BMUTPHh6UVSwaPGQ3q5d0N7w.woff2) format('woff2'); 44 | unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; 45 | } 46 | /* latin */ 47 | @font-face { 48 | font-family: 'Lato'; 49 | font-style: normal; 50 | font-weight: 700; 51 | font-display: optional; 52 | src: url(../assets/S6u9w4BMUTPHh6UVSwiPGQ3q5d0.woff2) format('woff2'); 53 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; 54 | } 55 | 56 | div.homepage-top { 57 | height: 376px; 58 | background-position: center; 59 | background-size: cover; 60 | display: flex; 61 | flex-direction: column; 62 | justify-content: center; 63 | align-items: center; 64 | background-image: linear-gradient(0deg, rgba(36, 44, 51, 0.5), rgba(36, 44, 51, 0.5)), url(../assets/moto_g5_plus_4.jpg); 65 | } 66 | 67 | div.homepage-top h1 { 68 | width: 576px; 69 | font-size: 36px; 70 | line-height: 1.25; 71 | text-align: center; 72 | font-family: 'Arial Black', 'Arial Bold', Gadget, sans-serif; 73 | font-style: normal; 74 | font-weight: normal; 75 | margin: 0; 76 | padding-bottom: 24px; 77 | color: rgb(255, 255, 255); 78 | } 79 | 80 | div.hide-comments { 81 | display: none; 82 | } -------------------------------------------------------------------------------- /src/ifixit2zim/templates/user.html: -------------------------------------------------------------------------------- 1 | {% set rel_prefix="../../" %} 2 | {% set bodyFullWidth = False %} 3 | {% extends "base.html" %} 4 | {% block title %}{{user['username']}}{% endblock title%} 5 | {% block specific_head %} 6 | 7 | 8 | {% endblock specific_head%} {% block content %} 9 |
10 |
11 |
12 | 13 |
14 | 53 |
54 | 55 | {% if user['about_rendered'] %} 56 |
57 |
58 |
59 |
60 |
61 | {{user['about_rendered'] | cleanup_rendered_content(rel_prefix) | safe}} 62 |
63 |
64 |
65 |
66 |
67 | {% endif %} 68 |
69 | 70 |
71 | 72 |
73 |
74 |
75 | {% endblock content%} -------------------------------------------------------------------------------- /src/ifixit2zim/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 9 | 10 | {% block title %}{% endblock %} 11 | 12 | 13 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | {% block specific_head %}{% endblock %} 34 | 35 | 36 |
37 | {% block content %}{% endblock %} 38 |
39 |
40 |
41 | 42 | 43 | 44 |
45 | 65 | 66 |
67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | # pyright: strict, reportUntypedFunctionDecorator=false 2 | import os 3 | 4 | from invoke.context import Context 5 | from invoke.tasks import task # pyright: ignore [reportUnknownVariableType] 6 | 7 | use_pty = not os.getenv("CI", "") 8 | 9 | 10 | @task(optional=["args"], help={"args": "pytest additional arguments"}) 11 | def test(ctx: Context, args: str = ""): 12 | """run tests (without coverage)""" 13 | ctx.run(f"pytest {args}", pty=use_pty) 14 | 15 | 16 | @task(optional=["args"], help={"args": "pytest additional arguments"}) 17 | def test_cov(ctx: Context, args: str = ""): 18 | """run test vith coverage""" 19 | ctx.run(f"coverage run -m pytest {args}", pty=use_pty) 20 | 21 | 22 | @task(optional=["html"], help={"html": "flag to export html report"}) 23 | def report_cov(ctx: Context, *, html: bool = False): 24 | """report coverage""" 25 | ctx.run("coverage combine", warn=True, pty=use_pty) 26 | ctx.run("coverage report --show-missing", pty=use_pty) 27 | if html: 28 | ctx.run("coverage html", pty=use_pty) 29 | 30 | 31 | @task( 32 | optional=["args", "html"], 33 | help={ 34 | "args": "pytest additional arguments", 35 | "html": "flag to export html report", 36 | }, 37 | ) 38 | def coverage(ctx: Context, args: str = "", *, html: bool = False): 39 | """run tests and report coverage""" 40 | test_cov(ctx, args=args) 41 | report_cov(ctx, html=html) 42 | 43 | 44 | @task(optional=["args"], help={"args": "black additional arguments"}) 45 | def lint_black(ctx: Context, args: str = "."): 46 | args = args or "." # needed for hatch script 47 | ctx.run("black --version", pty=use_pty) 48 | ctx.run(f"black --check --diff {args}", pty=use_pty) 49 | 50 | 51 | @task(optional=["args"], help={"args": "ruff additional arguments"}) 52 | def lint_ruff(ctx: Context, args: str = "."): 53 | args = args or "." # needed for hatch script 54 | ctx.run("ruff --version", pty=use_pty) 55 | ctx.run(f"ruff check {args}", pty=use_pty) 56 | 57 | 58 | @task( 59 | optional=["args"], 60 | help={ 61 | "args": "linting tools (black, ruff) additional arguments, typically a path", 62 | }, 63 | ) 64 | def lintall(ctx: Context, args: str = "."): 65 | """Check linting""" 66 | args = args or "." # needed for hatch script 67 | lint_black(ctx, args) 68 | lint_ruff(ctx, args) 69 | 70 | 71 | @task(optional=["args"], help={"args": "check tools (pyright) additional arguments"}) 72 | def check_pyright(ctx: Context, args: str = ""): 73 | """check static types with pyright""" 74 | ctx.run("pyright --version") 75 | ctx.run(f"pyright {args}", pty=use_pty) 76 | 77 | 78 | @task(optional=["args"], help={"args": "check tools (pyright) additional arguments"}) 79 | def checkall(ctx: Context, args: str = ""): 80 | """check static types""" 81 | check_pyright(ctx, args) 82 | 83 | 84 | @task(optional=["args"], help={"args": "black additional arguments"}) 85 | def fix_black(ctx: Context, args: str = "."): 86 | """fix black formatting""" 87 | args = args or "." # needed for hatch script 88 | ctx.run(f"black {args}", pty=use_pty) 89 | 90 | 91 | @task(optional=["args"], help={"args": "ruff additional arguments"}) 92 | def fix_ruff(ctx: Context, args: str = "."): 93 | """fix all ruff rules""" 94 | args = args or "." # needed for hatch script 95 | ctx.run(f"ruff check --fix {args}", pty=use_pty) 96 | 97 | 98 | @task( 99 | optional=["args"], 100 | help={ 101 | "args": "linting tools (black, ruff) additional arguments, typically a path", 102 | }, 103 | ) 104 | def fixall(ctx: Context, args: str = "."): 105 | """Fix everything automatically""" 106 | args = args or "." # needed for hatch script 107 | fix_black(ctx, args) 108 | fix_ruff(ctx, args) 109 | lintall(ctx, args) 110 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/area_index-BDTBciD-Y7NVVjoPQBUyhA.css: -------------------------------------------------------------------------------- 1 | #content *{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.page-callout{position:relative;margin-top:36px}.page-callout-inner{border-radius:6px;overflow:hidden;max-height:211px}.page-callout-inner>img:first-of-type{max-width:100%}.close-callout{position:absolute;top:10px;right:10px}.page-callout-content{width:250px;text-align:center;position:absolute;right:24px;top:28px}.page-callout-content h2,.page-callout-content p strong{color:#fff}.page-callout-content h2{font-size:26px;border:0;font-weight:700}.page-callout-content p{color:#fff;font-size:16px;margin-bottom:16px}.page-callout-content p strong{font-size:17px}.featured-categories{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-justify-content:flex-start;-webkit-justify-content:flex-start;justify-content:flex-start;-ms-flex-pack:start;-webkit-flex-wrap:wrap;flex-wrap:wrap;-ms-flex-wrap:wrap}@media only screen and (max-width:599px){.featured-categories{-ms-justify-content:center;-webkit-justify-content:center;justify-content:center;-ms-flex-pack:center}}.featured-category-item{position:relative;-ms-flex:1 auto;-webkit-flex:1 auto;flex:1 auto;width:233px;margin:8px;overflow:hidden}@media only screen and (max-width:1000px){.featured-category-item{width:20%}}@media only screen and (max-width:599px){.featured-category-item{width:44%}}.featured-category-item:hover{text-decoration:none}.featured-category-item:hover img{-webkit-transform:scale(1.1);transform:scale(1.1)}.featured-category-item:hover .shadow-overlay{background-color:rgba(0,117,206,.9)}.featured-category-item img{width:120%;height:auto;margin-top:10px;margin-left:-10%;-webkit-transition:all .2s ease-in-out;transition:all .2s ease-in-out}@media only screen and (max-width:1000px){.featured-category-item img{-webkit-transition:0;transition:0}}.shadow-overlay{position:absolute;top:0;left:0;width:100%;height:100%;background-color:rgba(54,60,64,.8);z-index:9;-webkit-transition:background-color .2s ease-in-out;transition:background-color .2s ease-in-out}.ie8 .shadow-overlay{background-color:#363c40}.ie8 .shadow-overlay:hover{background-color:#3b82f6}.featured-category-title{z-index:10;position:absolute;top:0;bottom:0;left:0;right:0;height:40px;width:100%;margin:auto;font-size:26px;font-weight:600;color:#fff;text-align:center}@media only screen and (max-width:599px){.featured-category-title{font-size:19px}}@media only screen and (max-width:599px),only screen and (min-width:1001px){.collection-guides .blurbListCell:nth-of-type(n+9){display:none}}.section-divider{width:100%;height:1px;background-color:#d4d7dd;position:relative;margin:48px 0;text-align:center;padding:0}@media only screen and (max-width:599px){.section-divider{margin:64px 0}}.section-divider p{display:inline-block;padding:0 20px;margin:0;position:relative;background-color:#fff;top:-10px}.section-divider.primary-divider p{font-size:18px;font-weight:600}.section-divider.secondary-divider p{font-size:13px;font-weight:400}.filter-container{margin-bottom:24px}.filter-container h2{font-size:24px;font-weight:600;border-bottom:0}.sub-categories{display:-ms-flexbox;display:-webkit-flex;display:flex;-webkit-flex-wrap:wrap;flex-wrap:wrap;-ms-flex-wrap:wrap;-ms-justify-content:flex-start;-webkit-justify-content:flex-start;justify-content:flex-start;-ms-flex-pack:start}.sub-category{position:relative;width:23%;width:calc(25% - 14px);text-align:center;background-color:#f9fafb;overflow:hidden;margin:7px;padding:13px 0}@media only screen and (max-width:599px){.sub-category{width:100%}}@media only screen and (min-width:600px) and (max-width:1000px){.sub-category{width:31%}}.sub-category:hover{background-color:#b1cdfb;text-decoration:none}.sub-category:hover .sub-category-title{color:#3b82f6}.sub-category:hover .overflow-slide-in{left:0}.sub-category-title{font-size:16px;color:#212426}.overflow-slide-in{position:absolute;top:0;left:-60px;font-weight:700;padding:13px 0;width:60px;background-color:#3b82f6;color:#fff;overflow:hidden;-webkit-transition:left .2s ease-in-out;transition:left .2s ease-in-out}.overflow-slide-in i{display:block;margin-bottom:2px;font-size:14px;color:#6aafeb} -------------------------------------------------------------------------------- /src/ifixit2zim/assets/Shared-attachment_link-AoWbgS-g65jo1DYOaHV5XA.css: -------------------------------------------------------------------------------- 1 | .attachment-link-list{list-style-type:none;padding:0;margin-bottom:0}.attachment-link-list .attachment-link{margin-top:20px}.attachment-link-list .attachment-container{border-radius:4px;background-color:#fff;border:1px solid #e5e7eb;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center}.attachment-link-list .attachment-container .officeDocumentIcon.document-icon-word{color:#2b549e}.attachment-link-list .attachment-container .officeDocumentIcon.document-icon-excel{color:#1e5635}.attachment-link-list .attachment-container .officeDocumentIcon.document-icon-powerpoint{color:#d24527}.attachment-link-list .attachment-container .officeDocumentIcon.document-icon-access{color:#a53636}.attachment-link-list .attachment-container .officeDocumentIcon.document-icon-pdf{color:#ef4444}.attachment-link-list .attachment-container .officeDocumentIcon.document-icon-3d_model{color:#eab308}.attachment-link-list .column{padding-left:15px;padding-bottom:20px;padding-top:20px;min-width:0}.attachment-link-list img{margin-left:10px}.attachment-link-list a:hover{text-decoration:none}.attachment-link-list a p{color:#1f2937}.attachment-link-list p{margin:4px 0}.attachment-link-list p.attachment-subtitle{font-size:12px;font-weight:400;color:#6b7484}.attachment-link-list p.attachment-subtitle.price{color:#e83d16}.attachment-link-list p.attachment-subtitle.supplier{color:#b98d06}.attachment-link-list p.attachment-info{font-size:11px}.attachment-link-list .button{margin-right:15px}.attachment-link-list .button:hover{border-color:#d1d5db}.attachment-link-list .attachment-icon{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;-ms-justify-content:center;-webkit-justify-content:center;justify-content:center;-ms-flex-pack:center;width:64px;-ms-align-self:stretch;-webkit-align-self:stretch;align-self:stretch;-ms-flex-item-align:stretch;text-align:center;margin-right:10px;background-color:#f2f2f2}.remove-doc-container{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;-ms-flex:0 0 40px;-webkit-flex:0 0 40px;flex:0 0 40px;height:56px;cursor:pointer}.remove-doc-container a.removeLink{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-justify-content:center;-webkit-justify-content:center;justify-content:center;-ms-flex-pack:center;-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;color:#d1d5db;margin:auto;height:100%}.remove-doc-container:active,.remove-doc-container:hover{background-color:#eaeaea}.remove-doc-container:active a.removeLink,.remove-doc-container:hover a.removeLink{border:none}.remove-doc-container:active a.removeLink i,.remove-doc-container:hover a.removeLink i{color:#ef4444}.attachment-link-list.three-columns,.attachment-link-list.two-columns{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap;-ms-align-items:stretch;-webkit-align-items:stretch;align-items:stretch;-ms-flex-align:stretch;margin-left:-20px}.attachment-link-list.three-columns .attachment-link,.attachment-link-list.two-columns .attachment-link{-ms-flex:1 0 50%;-webkit-flex:1 0 50%;flex:1 0 50%;display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;min-width:230px}.attachment-link-list.three-columns .attachment-container,.attachment-link-list.two-columns .attachment-container{-ms-flex:1 0 auto;-webkit-flex:1 0 auto;flex:1 0 auto;margin-left:20px;margin-bottom:0}@media only screen and (max-width:599px){.attachment-link-list.three-columns,.attachment-link-list.two-columns{margin-left:0}.attachment-link-list.three-columns .attachment-container,.attachment-link-list.two-columns .attachment-container{margin-left:0}.attachment-link-list.three-columns .attachment-link,.attachment-link-list.two-columns .attachment-link{-ms-flex:0 0 100%;-webkit-flex:0 0 100%;flex:0 0 100%}}@media only screen and (min-width:1001px){.attachment-link-list.three-columns .attachment-link{-ms-flex:1 0 33.33%;-webkit-flex:1 0 33.33%;flex:1 0 33.33%}} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iFixit 2 | 3 | `ifixit2zim` is an [openZIM](https://openzim.org) scraper to create offline versions of [iFixit](https://www.ifixit.com/) website, in all its supported languages. 4 | 5 | [![CodeFactor](https://www.codefactor.io/repository/github/openzim/ifixit/badge)](https://www.codefactor.io/repository/github/openzim/ifixit) 6 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 7 | [![codecov](https://codecov.io/gh/openzim/ifixit/branch/main/graph/badge.svg)](https://codecov.io/gh/openzim/ifixit) 8 | [![PyPI version shields.io](https://img.shields.io/pypi/v/ifixit2zim.svg)](https://pypi.org/project/ifixit2zim/) 9 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ifixit2zim.svg)](https://pypi.org/project/ifixit2zim) 10 | [![Docker](https://ghcr-badge.egpl.dev/openzim/ifixit/latest_tag?label=docker)](https://ghcr.io/openzim/ifixit) 11 | 12 | This scraper downloads the iFixit resources (categories, guides, ...) and puts them in a ZIM file, a clean and user friendly format for storing content for offline usage. 13 | 14 | ## Usage 15 | 16 | `ifixit2zim` works off a *language version* that you must provide via the `--language` argument. The list of supported languages is visible in the `--help` message. 17 | 18 | ### Docker 19 | 20 | ```bash 21 | docker run -v my_dir:/output ghcr.io/openzim/ifixit ifixit2zim --help 22 | ``` 23 | 24 | ### Python 25 | 26 | `ifixit2zim` is a Python3 (**3.6+**) software. If you are not using the [Docker](https://docker.com) image, you are advised to use it in a virtual environment to avoid installing software dependencies on your system. In addition to Python3, you also need to have an up-to-date installation of pip, setuptools and wheel as recommanded [here](https://packaging.python.org/en/latest/tutorials/installing-packages/#id14) (wheel is important since you will have to build some dependencies). 27 | 28 | ```bash 29 | python3 -m venv .venv 30 | source .venv/bin/activate 31 | 32 | # using published version 33 | pip3 install ifixit2zim 34 | ifixit2zim --help 35 | 36 | # running from source 37 | pip3 install -e ".[dev]" 38 | python3 ifixit2zim/ --help 39 | ``` 40 | 41 | Call `deactivate` to quit the virtual environment. 42 | 43 | See `requirements.txt` for the list of python dependencies. 44 | 45 | 46 | ## Contributing 47 | 48 | **All contributions are welcome!** 49 | 50 | Please open an issue on Github and/or submit a Pull-request. 51 | 52 | This project adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing). 53 | 54 | This project has implemented openZIM's [Python bootstrap, conventions and policies](https://github.com/openzim/_python-bootstrap/blob/main/docs/Policy.md) **v1.0.0**. 55 | 56 | ### Guidelines 57 | 58 | - Don't take assigned issues. Comment if those get staled. 59 | - If your contribution is far from trivial, open an issue to discuss it first. 60 | - Ensure your code passed [black formatting](https://pypi.org/project/black/), [isort](https://pypi.org/project/isort/) and [flake8](https://pypi.org/project/flake8/) (88 chars) 61 | 62 | ### Create an appropriate Python environment 63 | 64 | First time: 65 | ``` 66 | python3 -m venv .venv 67 | source .venv/bin/activate 68 | pip3 install -e ".[dev]" 69 | ``` 70 | 71 | Next times: 72 | ``` 73 | source .venv/bin/activate 74 | ``` 75 | 76 | 77 | NOTA : there is some limitations to the execution of the underlying libzim library on 78 | MacOS with some known bugs. The main issue is that the full-text index is not working, 79 | so this shouldn't be a problem for quick tests. In doubt, execute the scraper in a 80 | Docker container as explained below. 81 | 82 | ### Test the scraper in a Docker container 83 | 84 | First, build the Docker image (to be ran in the main folder of this repo): 85 | ``` 86 | docker build -t local-ifixit . 87 | ``` 88 | 89 | Then run the scraper with CLI arguments needed for your test (everything after `ifixit2zim` in the example below). 90 | 91 | For instance, if you want to run a scrape of only the `Apple_PDA` category, including its guides, 92 | in French : 93 | ``` 94 | docker run -it -v $(pwd)/output:/output --rm local-ifixit ifixit2zim --language fr --output /output --tmp-dir /tmp --category Apple_PDA 95 | ``` 96 | 97 | This will produce a ZIM in the output folder of your current directory. 98 | 99 | ### Test the ZIM produced 100 | 101 | To test if the ZIM produced is OK, you should run kiwix-serve, once more with Docker. 102 | 103 | For instance, if you produced a file named `ifixit_fr_selection_2022-04.zim` in the 104 | `output` subfolder, and port 1256 is unused on your machine, you might run: 105 | ``` 106 | docker run -it --rm -v $(pwd)/output:/data -p 1256:80 ghcr.io/kiwix/kiwix-tools kiwix-serve /data/ifixit_fr_selection_2022-04.zim 107 | ``` 108 | And then navigate to (https://localhost:1256) on your favorite browser. 109 | 110 | Once test are complete, you might stop the Docker container by pressing Ctrl-C 111 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python,macos 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos 3 | 4 | ### macOS ### 5 | # General 6 | .DS_Store 7 | .AppleDouble 8 | .LSOverride 9 | 10 | # Icon must end with two \r 11 | Icon 12 | 13 | 14 | # Thumbnails 15 | ._* 16 | 17 | # Files that might appear in the root of a volume 18 | .DocumentRevisions-V100 19 | .fseventsd 20 | .Spotlight-V100 21 | .TemporaryItems 22 | .Trashes 23 | .VolumeIcon.icns 24 | .com.apple.timemachine.donotpresent 25 | 26 | # Directories potentially created on remote AFP share 27 | .AppleDB 28 | .AppleDesktop 29 | Network Trash Folder 30 | Temporary Items 31 | .apdisk 32 | 33 | ### macOS Patch ### 34 | # iCloud generated files 35 | *.icloud 36 | 37 | ### Python ### 38 | # Byte-compiled / optimized / DLL files 39 | __pycache__/ 40 | *.py[cod] 41 | *$py.class 42 | 43 | # C extensions 44 | *.so 45 | 46 | # Distribution / packaging 47 | .Python 48 | build/ 49 | develop-eggs/ 50 | dist/ 51 | downloads/ 52 | eggs/ 53 | .eggs/ 54 | lib/ 55 | lib64/ 56 | parts/ 57 | sdist/ 58 | var/ 59 | wheels/ 60 | share/python-wheels/ 61 | *.egg-info/ 62 | .installed.cfg 63 | *.egg 64 | MANIFEST 65 | 66 | # PyInstaller 67 | # Usually these files are written by a python script from a template 68 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 69 | *.manifest 70 | *.spec 71 | 72 | # Installer logs 73 | pip-log.txt 74 | pip-delete-this-directory.txt 75 | 76 | # Unit test / coverage reports 77 | htmlcov/ 78 | .tox/ 79 | .nox/ 80 | .coverage 81 | .coverage.* 82 | .cache 83 | nosetests.xml 84 | coverage.xml 85 | *.cover 86 | *.py,cover 87 | .hypothesis/ 88 | .pytest_cache/ 89 | cover/ 90 | 91 | # Translations 92 | *.mo 93 | *.pot 94 | 95 | # Django stuff: 96 | *.log 97 | local_settings.py 98 | db.sqlite3 99 | db.sqlite3-journal 100 | 101 | # Flask stuff: 102 | instance/ 103 | .webassets-cache 104 | 105 | # Scrapy stuff: 106 | .scrapy 107 | 108 | # Sphinx documentation 109 | docs/_build/ 110 | 111 | # PyBuilder 112 | .pybuilder/ 113 | target/ 114 | 115 | # Jupyter Notebook 116 | .ipynb_checkpoints 117 | 118 | # IPython 119 | profile_default/ 120 | ipython_config.py 121 | 122 | # pyenv 123 | # For a library or package, you might want to ignore these files since the code is 124 | # intended to run in multiple environments; otherwise, check them in: 125 | # .python-version 126 | 127 | # pipenv 128 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 129 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 130 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 131 | # install all needed dependencies. 132 | #Pipfile.lock 133 | 134 | # poetry 135 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 136 | # This is especially recommended for binary packages to ensure reproducibility, and is more 137 | # commonly ignored for libraries. 138 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 139 | #poetry.lock 140 | 141 | # pdm 142 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 143 | #pdm.lock 144 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 145 | # in version control. 146 | # https://pdm.fming.dev/#use-with-ide 147 | .pdm.toml 148 | 149 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 150 | __pypackages__/ 151 | 152 | # Celery stuff 153 | celerybeat-schedule 154 | celerybeat.pid 155 | 156 | # SageMath parsed files 157 | *.sage.py 158 | 159 | # Environments 160 | .env 161 | .venv 162 | env/ 163 | venv/ 164 | ENV/ 165 | env.bak/ 166 | venv.bak/ 167 | 168 | # Spyder project settings 169 | .spyderproject 170 | .spyproject 171 | 172 | # Rope project settings 173 | .ropeproject 174 | 175 | # mkdocs documentation 176 | /site 177 | 178 | # mypy 179 | .mypy_cache/ 180 | .dmypy.json 181 | dmypy.json 182 | 183 | # Pyre type checker 184 | .pyre/ 185 | 186 | # pytype static type analyzer 187 | .pytype/ 188 | 189 | # Cython debug symbols 190 | cython_debug/ 191 | 192 | # PyCharm 193 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 194 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 195 | # and can be added to the global gitignore or merged into this file. For a more nuclear 196 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 197 | #.idea/ 198 | 199 | ### Python Patch ### 200 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 201 | poetry.toml 202 | 203 | # ruff 204 | .ruff_cache/ 205 | 206 | # LSP config files 207 | pyrightconfig.json 208 | 209 | # End of https://www.toptal.com/developers/gitignore/api/python,macos 210 | 211 | # ignore all vscode, this is not standard configuration in this place 212 | .vscode 213 | output -------------------------------------------------------------------------------- /src/ifixit2zim/assets/FrameModules-translation_credit-qBLVoFL8fSDpJmDUuduPTA.css: -------------------------------------------------------------------------------- 1 | .translation-credit-container{display:table;border-radius:4px;background-color:#eff6ff;width:100%}.translation-credit-container>div{display:table-cell;vertical-align:middle}.translation-credit-container .title-info-container{-ms-flex:1 1 auto;-webkit-flex:1 1 auto;flex:1 1 auto;background-color:#eff6ff;color:#6b7484;margin:0}.translation-credit-container .title-info-container .contributors-subtitle a,.translation-credit-container .title-info-container h4 a{color:#4b5563}.translation-credit-container .heading{display:-ms-flexbox;display:-webkit-flex;display:flex;padding:10px;padding-bottom:0;-ms-justify-content:space-between;-webkit-justify-content:space-between;justify-content:space-between;-ms-flex-pack:justify}.translation-credit-container .heading .special-thanks{margin-left:5px;font-size:16px;font-weight:700;color:#d1d5db}.translation-credit-container .heading .translation-info{display:-ms-flexbox;display:-webkit-flex;display:flex;position:relative;width:185px;height:38px;float:right;border-radius:44px;box-shadow:0 1px 2px 0 #d1d5db}.translation-credit-container .heading .translation-info .progress-bar{position:absolute;height:100%;border-radius:44px;left:0;right:0;overflow:hidden}.translation-credit-container .heading .translation-info .progress-bar>span{display:block;height:100%;border-radius:3px;position:relative;background-color:#b2ebc7}.translation-credit-container .heading .translation-info .translation-info-section{z-index:1;width:50%;display:-ms-flexbox;display:-webkit-flex;display:flex;align-items:center;padding-left:1em}.translation-credit-container .heading .translation-info .translation-info-section .fa-arrow-right{margin:auto;height:10px}.translation-credit-container .heading .translation-info .translation-info-section .percentage{margin:auto;height:20px}.translation-credit-container .heading .translation-info .translation-info-section:last-child{border-right:none}.translation-credit-container .heading .translation-info .sprite-flag{margin:auto;top:0}.translation-credit-container .author-container{display:-ms-flexbox;display:-webkit-flex;display:flex;-webkit-flex-wrap:wrap;flex-wrap:wrap;-ms-flex-wrap:wrap;-ms-justify-content:center;-webkit-justify-content:center;justify-content:center;-ms-flex-pack:center;margin-bottom:1.5em}.translation-credit-container .author-container .author-extra,.translation-credit-container .author-container .author-profile{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;width:180px;height:100px;margin-top:1em;border-right:1px solid #f9fafb}.translation-credit-container .author-container .author-extra .author-photo,.translation-credit-container .author-container .author-profile .author-photo{position:relative;width:50px;margin:0 auto;display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-justify-content:center;-webkit-justify-content:center;justify-content:center;-ms-flex-pack:center}.translation-credit-container .author-container .author-extra .author-photo img,.translation-credit-container .author-container .author-profile .author-photo img{line-height:0;display:inline-block;border-radius:50%;height:50px;width:50px;overflow:hidden;background:#4b5563}.translation-credit-container .author-container .author-extra .author-photo img img,.translation-credit-container .author-container .author-profile .author-photo img img{border-radius:50%;height:100%}.translation-credit-container .author-container .author-extra .author-photo img img,.translation-credit-container .author-container .author-profile .author-photo img img{width:100%}.translation-credit-container .author-container .author-extra .author-photo span,.translation-credit-container .author-container .author-profile .author-photo span{position:absolute;top:80%;margin:auto;right:0;box-shadow:0 1px 3px 0 rgba(0,0,0,.5)}.translation-credit-container .author-container .author-extra p,.translation-credit-container .author-container .author-profile p{color:#212426;text-align:center}.translation-credit-container .author-container .author-extra .author-name,.translation-credit-container .author-container .author-profile .author-name{font-size:13px;font-weight:700;color:#212426;text-align:center;margin-top:1em}.translation-credit-container .author-container .author-extra .progress-bar,.translation-credit-container .author-container .author-profile .progress-bar{width:75px;height:6px;border-radius:3px;background-color:#f9fafb;box-shadow:inset 0 1px 1px 0 rgba(0,0,0,.07);margin:1em auto}.translation-credit-container .author-container .author-extra .progress-bar>span,.translation-credit-container .author-container .author-profile .progress-bar>span{display:block;height:100%;position:relative;overflow:hidden;border-radius:3px;background-image:linear-gradient(to bottom,#15a1d6,#3b82f6)}.translation-credit-container .author-container .author-extra{border:none}.translation-credit-container .author-container .author-extra .author-photo{width:50px;height:50px;background-color:#f9fafb;border-radius:44px}.translation-credit-container .author-container .author-extra h3{margin:0;color:#4b5563;height:100%;line-height:50px;text-align:center}.translation-credit-container .author-container .author-profile:last-child{border:none}.translation-credit-container .call-to-action-container{background-color:#fff;border:solid 2px #eff6ff;text-align:center;color:#7a7c7d;margin:0;padding-top:10px;padding-bottom:10px} -------------------------------------------------------------------------------- /src/ifixit2zim/scraper_info.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | from ifixit2zim.constants import UNAVAILABLE_OFFLINE_INFOS 4 | from ifixit2zim.context import Context 5 | from ifixit2zim.exceptions import UnexpectedDataKindExceptionError 6 | from ifixit2zim.scraper_generic import ScraperGeneric 7 | from ifixit2zim.shared import logger 8 | 9 | 10 | class ScraperInfo(ScraperGeneric): 11 | def __init__(self, context: Context): 12 | super().__init__(context) 13 | 14 | def setup(self): 15 | self.info_template = self.env.get_template("info.html") 16 | 17 | def get_items_name(self): 18 | return "info" 19 | 20 | def _add_info_to_scrape(self, info_key, info_title, is_expected): 21 | self.add_item_to_scrape( 22 | info_key, 23 | { 24 | "info_title": info_title, 25 | }, 26 | is_expected, 27 | ) 28 | 29 | def _get_info_key_from_title(self, info_title): 30 | return self.processor.convert_title_to_filename(info_title.lower()) 31 | 32 | def _build_info_path(self, info_title): 33 | href = ( 34 | self.configuration.main_url.geturl() 35 | + f"/Info/{info_title.replace('/', ' ')}" 36 | ) 37 | final_href = self.processor.normalize_href(href) 38 | return final_href[1:] 39 | 40 | def get_info_link_from_obj(self, info): 41 | if "title" not in info or not info["title"]: 42 | raise UnexpectedDataKindExceptionError( 43 | f"Impossible to extract info title from {info}" 44 | ) 45 | info_title = info["title"] 46 | return self.get_info_link_from_props(info_title=info_title) 47 | 48 | def get_info_link_from_props(self, info_title): 49 | info_path = urllib.parse.quote(self._build_info_path(info_title)) 50 | if self.configuration.no_info: 51 | return f"home/not_scrapped?url={info_path}" 52 | if info_title in UNAVAILABLE_OFFLINE_INFOS: 53 | return f"home/unavailable_offline?url={info_path}" 54 | info_key = self._get_info_key_from_title(info_title) 55 | if self.configuration.infos: 56 | is_not_included = True 57 | for other_info in self.configuration.infos: 58 | other_info_key = self._get_info_key_from_title(other_info) 59 | if other_info_key == info_key: 60 | is_not_included = False 61 | if is_not_included: 62 | return f"home/not_scrapped?url={info_path}" 63 | self._add_info_to_scrape(info_key, info_title, False) 64 | return info_path 65 | 66 | def build_expected_items(self): 67 | if self.configuration.no_info: 68 | logger.info("No info required") 69 | return 70 | if self.configuration.infos: 71 | logger.info("Adding required infos as expected") 72 | for info_title in self.configuration.infos: 73 | info_key = self._get_info_key_from_title(info_title) 74 | self._add_info_to_scrape(info_key, info_title, True) 75 | return 76 | logger.info("Downloading list of info") 77 | limit = 200 78 | offset = 0 79 | while True: 80 | info_wikis = self.utils.get_api_content( 81 | "/wikis/INFO", limit=limit, offset=offset 82 | ) 83 | if not info_wikis or len(info_wikis) == 0: 84 | break 85 | for info_wiki in info_wikis: 86 | info_title = info_wiki["title"] 87 | info_key = self._get_info_key_from_title(info_title) 88 | self._add_info_to_scrape(info_key, info_title, True) 89 | offset += limit 90 | if self.configuration.scrape_only_first_items: 91 | logger.warning( 92 | "Aborting the retrieval of all infos since only first items" 93 | " will be scraped anyway" 94 | ) 95 | break 96 | logger.info(f"{len(self.expected_items_keys)} info found") 97 | 98 | def get_one_item_content(self, item_key, item_data): # noqa ARG002 99 | info_wiki_title = item_key 100 | info_wiki_content = self.utils.get_api_content(f"/wikis/INFO/{info_wiki_title}") 101 | return info_wiki_content 102 | 103 | def add_item_redirect(self, item_key, item_data, redirect_kind): # noqa ARG002 104 | path = self._build_info_path(item_data["info_title"]) 105 | self.processor.add_redirect( 106 | path=path, 107 | target_path=f"home/{redirect_kind}?{urllib.parse.urlencode({'url':path})}", 108 | ) 109 | 110 | def process_one_item(self, item_key, item_data, item_content): # noqa ARG002 111 | info_wiki_content = item_content 112 | 113 | info_wiki_rendered = self.info_template.render( 114 | info_wiki=info_wiki_content, 115 | # label=INFO_WIKI_LABELS[self.conf.lang_code], 116 | metadata=self.metadata, 117 | lang=self.configuration.lang_code, 118 | ) 119 | 120 | self.processor.add_html_item( 121 | path=self._build_info_path(info_wiki_content["title"]), 122 | title=info_wiki_content["display_title"], 123 | content=info_wiki_rendered, 124 | ) 125 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/Shared-i18n_formatting-7XRaMqur0Z-hJvP-W8sS2A.css: -------------------------------------------------------------------------------- 1 | .de .featuredStats,.nl .featuredStats{white-space:nowrap;font-size:95%}.de .featuredStats .pseudoH3,.nl .featuredStats .pseudoH3{margin-left:-7px}.es .featuredArea h2,.it .featuredArea h2{font-size:2em}.de .minorArea h2,.es .minorArea h2,.fr .minorArea h2,.it .minorArea h2,.nl .minorArea h2{height:20px;font-size:20px}.de .minorArea .minorImg,.es .minorArea .minorImg,.fr .minorArea .minorImg,.it .minorArea .minorImg,.nl .minorArea .minorImg{padding-top:10px}.de #subAreas,.es #subAreas,.fr #subAreas,.it #subAreas,.nl #subAreas{padding-top:10px}.nl .header-nav a{padding:0 40px}.de .site-nav ul li a,.es .site-nav ul li a,.nl .site-nav ul li a{padding-left:25px;padding-right:25px}.de #wizardStrip .stripStep,.es #wizardStrip .stripStep,.fr #wizardStrip .stripStep,.it #wizardStrip .stripStep,.nl #wizardStrip .stripStep{padding-top:3px;font-size:12px}.de #wizardStrip .stepNumber,.es #wizardStrip .stepNumber,.fr #wizardStrip .stepNumber,.it #wizardStrip .stepNumber,.nl #wizardStrip .stepNumber{font-size:16px}.de .subscribe .submit-button{font-size:12px}.de #editPassword1Box label,.de #editPassword2Box label,.es #editPassword1Box label,.es #editPassword2Box label,.it #editPassword1Box label,.it #editPassword2Box label,.nl #editPassword1Box label,.nl #editPassword2Box label{height:42px}.de #wikiRelatedInputDiv input,.es #wikiRelatedInputDiv input,.fr #wikiRelatedInputDiv input,.it #wikiRelatedInputDiv input,.nl #wikiRelatedInputDiv input{width:65%!important}.de #wikiRelatedInputDiv .wikiRelatedButton,.es #wikiRelatedInputDiv .wikiRelatedButton,.fr #wikiRelatedInputDiv .wikiRelatedButton,.it #wikiRelatedInputDiv .wikiRelatedButton,.nl #wikiRelatedInputDiv .wikiRelatedButton{width:30%}.de #tagInput input,.es #tagInput input,.fr #tagInput input,.it #tagInput input,.nl #tagInput input{width:60%!important}.de #tagInput #tagsAdd,.es #tagInput #tagsAdd,.fr #tagInput #tagsAdd,.it #tagInput #tagsAdd,.nl #tagInput #tagsAdd{width:30%!important}.de #cartInput input,.es #cartInput input,.fr #cartInput input,.it #cartInput input,.nl #cartInput input{width:65%!important}.de #cartInput .buttonLink,.es #cartInput .buttonLink,.fr #cartInput .buttonLink,.it #cartInput .buttonLink,.nl #cartInput .buttonLink{width:30%}.de #topicParentForm #th_parentTitle,.es #topicParentForm #th_parentTitle,.fr #topicParentForm #th_parentTitle,.it #topicParentForm #th_parentTitle,.nl #topicParentForm #th_parentTitle{width:55%!important}.de #topicParentForm .buttonLink,.es #topicParentForm .buttonLink,.fr #topicParentForm .buttonLink,.it #topicParentForm .buttonLink,.nl #topicParentForm .buttonLink{width:35%}.fr .toggle-deleted{font-weight:400}.fr .itemHover.hasImage .wikiLink,.it .itemHover.hasImage .wikiLink{bottom:18px}.fr .itemHover.hasImage .purchaseLink,.it .itemHover.hasImage .purchaseLink{left:109px}.fr .itemHover.hasImage .description,.it .itemHover.hasImage .description{height:60px;padding-bottom:30px}body:not(.en) #badgeList h2{font-size:19px;line-height:40px}body:not(.en) #patrolThreshold #inputDiv{float:left;margin-top:25px}body:not(.en) #patrolThreshold button,body:not(.en) #patrolThreshold input{width:75px}body:not(.en) #productQuantityDiv{width:100%;margin-bottom:8px}body:not(.en) #quantityLabel{float:right}@media only screen and (min-width:1001px) and (max-width:1200px),only screen and (min-width:1201px){body:not(.en) .questions-list .question-details-container{width:18%}body:not(.en) .questions-list .question-details-container .question-device-image{width:40%}body:not(.en) .questions-list .question-details-container .question-answers-number{width:55%}body:not(.en) .questions-list .question-title-container{width:56%}}@media only screen and (max-width:599px),only screen and (min-width:600px) and (max-width:1000px){body:not(.en) .questions-list .question-num-answers{font-size:20px}body:not(.en) .questions-list .question-answers-title{text-transform:capitalize}}body:not(.en) #patrolThreshold #patrolSlider{width:100%}body:not(.en) #patrolThreshold #inputDiv{width:100%;margin-top:5px}body:not(.en) #patrolThreshold #inputDiv #thresholdInputBox,body:not(.en) #patrolThreshold #inputDiv #thresholdSubmitBtn{float:left;min-width:75px;width:auto}body:not(.en) .secondary-stores .store-title{width:50%;margin-right:10px}.de .gift-listing{height:800px}@media only screen and (min-width:741px){.es #login .external-login,.fr #login .external-login,.ru #login .external-login{padding:0 50px}}.de a.tab-link,.fr a.tab-link,.it a.tab-link,.pt a.tab-link,.tr a.tab-link{padding:7px 9px 8px;font-size:13px}.es a.tab-link,.nl a.tab-link,.ru a.tab-link{padding:7px 6px 8px;font-size:12px}@media only screen and (min-width:1001px),only screen and (min-width:600px) and (max-width:1000px){.de .store-content .stores h5.title,.es .store-content .stores h5.title,.fr .store-content .stores h5.title,.pt .store-content .stores h5.title,.ru .store-content .stores h5.title,.tr .store-content .stores h5.title{white-space:normal;font-size:20px}}@media only screen and (min-width:1001px){.de .store-content .stores h5.title,.es .store-content .stores h5.title,.fr .store-content .stores h5.title,.pt .store-content .stores h5.title,.ru .store-content .stores h5.title,.tr .store-content .stores h5.title{padding:7px 20px;font-size:16px;display:table-cell;vertical-align:middle;width:220px;height:60px}}.de .page-callout-content h2,.es .page-callout-content h2,.fr .page-callout-content h2,.it .page-callout-content h2,.nl .page-callout-content h2,.ru .page-callout-content h2,.tr .page-callout-content h2{margin-top:5px} -------------------------------------------------------------------------------- /src/ifixit2zim/templates/not_here.html: -------------------------------------------------------------------------------- 1 | {% set rel_prefix="../" %}{% set bodyFullWidth = True %}{% extends "base.html" %} {% block title %}{{metadata['title']}}{% endblock title%} 2 | {% block specific_head %} 3 | 4 | 5 | {% endblock specific_head%} {% block content %} 6 | 7 |
8 |
9 |
10 | 11 | 12 |
13 |
14 |
15 | 16 |

17 | {% if kind == 'not_scrapped' %} 18 | Oups 19 | {% elif kind == 'external_content' %} 20 | External content 21 | {% elif kind == 'unavailable_offline' %} 22 | Oups 23 | {% elif kind == 'not_yet_available' %} 24 | Oups 25 | {% elif kind == 'missing' %} 26 | Missing item 27 | {% elif kind == 'error' %} 28 | Oups 29 | {% else %} 30 | {{ raise("unsupported kind: " + kind)}} 31 | {% endif %} 32 |

33 | 34 |

35 | {% if kind == 'not_scrapped' %} 36 | This content has not been scrapped. 37 | {% elif kind == 'external_content' %} 38 | This link is targeting an EXTERNAL content, not available for offline browsing. 39 | {% elif kind == 'unavailable_offline' %} 40 | This content cannot be made available offline. 41 | {% elif kind == 'not_yet_available' %} 42 | This content is not (yet) available offline. 43 | {% elif kind == 'missing' %} 44 | This content is missing. 45 | {% elif kind == 'error' %} 46 | This content is missing due to an error. 47 | {% endif %} 48 |

49 | 50 |
51 |
52 |

53 | 54 | {% if kind == 'not_scrapped' %} 55 | Due to the scrapper configuration, only partial content has been scrapped and this content was not selected for this ZIM version. 56 | If you are interested in this content, please use another ZIM version. 57 | {% elif kind == 'external_content' %} 58 | If you want/can, you might go there by clicking on the following link: 59 | 60 |

61 |

62 | If you consider it is an essential item that should be part of the offline archive, please open an issue 63 | on Github 64 | when you are back online, or notify Kiwix team by any other mean. 65 | {% elif kind == 'unavailable_offline' %} 66 | Due to the dynamic nature of this content, it cannot be made available offline. 67 | {% elif kind == 'missing' %} 68 | This item is not accessible since we failed to retrieve it at the time we built this ZIM archive, probably becuase it has been archived or moved. 69 | {% elif kind == 'error' %} 70 | This item is not accessible due to an unexpected error at the time we built this ZIM archive. 71 | {% elif kind == 'not_yet_available' %} 72 | This content has unfortunately not yet been retrieved in the archive you are currently browsing. 73 | Current scrapper does not yet support this kind of data. 74 |

75 |

76 | If you consider it is an essential item, please open an issue on Github 77 | when you are back online, or notify Kiwix team by any other mean. 78 | {% endif %} 79 |

80 | {% if kind == 'not_scrapped' or kind == 'not_yet_available' or kind == 'unavailable_offline'%} 81 | Original link: 82 | {% endif %} 83 | 84 | 85 |
86 | 87 |
88 | 89 |
90 |
91 |
92 |
93 | 94 | 95 |
96 | 97 |
98 |
99 |
100 | {% endblock content%} -------------------------------------------------------------------------------- /src/ifixit2zim/templates/home.html: -------------------------------------------------------------------------------- 1 | {% set rel_prefix="../" %}{% set bodyFullWidth = True %}{% extends "base.html" %} 2 | 3 | {% block title %}{{metadata['title']}}{% endblock title%} 4 | 5 | {% block specific_head %} 6 | 7 | 8 | {% endblock specific_head%} 9 | 10 | {% block content %} 11 | 12 |
13 |
14 |
15 | 16 | 17 |
18 |
19 |
20 |
21 | 23 | 24 | 25 | 28 | 29 | 30 | 31 | 32 | 35 | 36 | 38 | 39 | 40 | 41 | 42 | 43 |

{{label['top_title']}}

44 |
45 |
46 | 47 |
48 |
49 |

{{home_content['primary_title']}}

50 |
51 | 52 | 62 | 63 |
64 |

{{home_content['secondary_title']}}

65 |
66 |
67 | {%for category in home_content['sub_categories']%} 68 | 69 |

70 | {{category['count']}} 73 | {{category['text']}} 74 |

75 |
76 | {%endfor%} 77 |
78 |
79 |
80 |
81 |
82 | 83 |
84 | 85 |
86 |
87 |
88 | {% endblock content%} -------------------------------------------------------------------------------- /src/ifixit2zim/executor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim: ai ts=4 sts=4 et sw=4 nu 3 | 4 | import queue 5 | import threading 6 | from collections.abc import Callable 7 | 8 | from ifixit2zim.shared import logger 9 | 10 | _shutdown = False 11 | # Lock that ensures that new workers are not created while the interpreter is 12 | # shutting down. Must be held while mutating _threads_queues and _shutdown. 13 | _global_shutdown_lock = threading.Lock() 14 | 15 | 16 | def excepthook(args): 17 | logger.error( 18 | f"UNHANDLED Exception in {args.thread.name}: {args.exc_type}", 19 | exc_info=args.exc_value, 20 | ) 21 | 22 | 23 | threading.excepthook = excepthook 24 | 25 | 26 | class Executor(queue.Queue): 27 | """Custom FIFO queue based Executor that's less generic than ThreadPoolExec one 28 | 29 | Providing more flexibility for the use cases we're interested about: 30 | - halt immediately (sort of) upon exception (if requested) 31 | - able to join() then restart later to accomodate successive steps 32 | 33 | See: https://github.com/python/cpython/blob/3.8/Lib/concurrent/futures/thread.py 34 | """ 35 | 36 | def __init__(self, queue_size: int = 10, nb_workers: int = 1, prefix: str = "T-"): 37 | super().__init__(queue_size) 38 | self.prefix = prefix 39 | self._shutdown_lock = threading.Lock() 40 | self.nb_workers = nb_workers 41 | self.exceptions = [] 42 | 43 | @property 44 | def exception(self): 45 | """Exception raises in any thread, if any""" 46 | try: 47 | return self.exceptions[0:1].pop() 48 | except IndexError: 49 | return None 50 | 51 | @property 52 | def alive(self): 53 | """whether it should continue running""" 54 | return not self._shutdown 55 | 56 | def submit(self, task: Callable, **kwargs): 57 | """Submit a callable and its kwargs for execution in one of the workers""" 58 | with self._shutdown_lock, _global_shutdown_lock: 59 | if not self.alive: 60 | raise RuntimeError("cannot submit task to dead executor") 61 | if _shutdown: 62 | raise RuntimeError("cannot submit task after interpreter shutdown") 63 | 64 | while True: 65 | try: 66 | self.put((task, kwargs), block=True, timeout=3.0) 67 | except queue.Full: 68 | if self.no_more: 69 | logger.debug("rejecting task: queue full and currently `join`ing") 70 | break 71 | else: 72 | break 73 | 74 | def start(self): 75 | """Enable executor, starting requested amount of workers 76 | 77 | Workers are started always, not provisioned dynamicaly""" 78 | self.drain() 79 | self.release_halt() 80 | self._workers = set() 81 | self._shutdown = False 82 | self.exceptions[:] = [] 83 | 84 | for n in range(self.nb_workers): 85 | t = threading.Thread(target=self.worker, name=f"{self.prefix}{n}") 86 | t.daemon = True 87 | t.start() 88 | self._workers.add(t) 89 | 90 | def worker(self): 91 | while self.alive or self.no_more: 92 | try: 93 | func, kwargs = self.get(block=True, timeout=2.0) 94 | except queue.Empty: 95 | if self.no_more: 96 | break 97 | continue 98 | except TypeError: 99 | # received None from the queue. most likely shuting down 100 | return 101 | 102 | raises = kwargs.pop("raises") if "raises" in kwargs.keys() else False 103 | callback = kwargs.pop("callback") if "callback" in kwargs.keys() else None 104 | dont_release = kwargs.pop("dont_release", False) 105 | 106 | try: 107 | func(**kwargs) 108 | except Exception as exc: 109 | logger.error(f"Error processing {func} with {kwargs=}", exc_info=exc) 110 | if raises: 111 | self.exceptions.append(exc) 112 | self.shutdown() 113 | finally: 114 | # user will manually release the queue for this task. 115 | # most likely in a libzim-written callback 116 | if not dont_release: 117 | self.task_done() 118 | if callback: 119 | callback.__call__() 120 | 121 | def drain(self): 122 | """Empty the queue without processing the tasks (tasks will be lost)""" 123 | while True: 124 | try: 125 | self.get_nowait() 126 | except queue.Empty: 127 | break 128 | 129 | def join(self): 130 | """Await completion of workers, requesting them to stop taking new task""" 131 | logger.debug(f"joining all threads for {self.prefix}") 132 | self.no_more = True 133 | for t in self._workers: 134 | e = threading.Event() 135 | while t.is_alive(): 136 | t.join(1) 137 | e.wait(timeout=2) 138 | logger.debug(f"all threads joined for {self.prefix}") 139 | 140 | def release_halt(self): 141 | """release the `no_more` flag preventing workers from taking up tasks""" 142 | self.no_more = False 143 | 144 | def shutdown(self, *, wait=True): 145 | """stop the executor, either somewhat immediately or awaiting completion""" 146 | logger.debug(f"shutting down executor {self.prefix} with {wait=}") 147 | with self._shutdown_lock: 148 | self._shutdown = True 149 | 150 | # Drain all work items from the queue 151 | if not wait: 152 | self.drain() 153 | if wait: 154 | self.join() 155 | -------------------------------------------------------------------------------- /src/ifixit2zim/scraper_category.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | from ifixit2zim.constants import CATEGORY_LABELS, URLS 4 | from ifixit2zim.context import Context 5 | from ifixit2zim.exceptions import UnexpectedDataKindExceptionError 6 | from ifixit2zim.scraper_generic import ScraperGeneric 7 | from ifixit2zim.shared import logger 8 | 9 | 10 | class ScraperCategory(ScraperGeneric): 11 | def __init__(self, context: Context): 12 | super().__init__(context) 13 | 14 | def setup(self): 15 | self.category_template = self.env.get_template("category.html") 16 | 17 | def get_items_name(self): 18 | return "category" 19 | 20 | def _add_category_to_scrape(self, category_key, category_title, is_expected): 21 | self.add_item_to_scrape( 22 | category_key, 23 | { 24 | "category_title": category_title, 25 | }, 26 | is_expected, 27 | ) 28 | 29 | def _get_category_key_from_title(self, category_title): 30 | return self.processor.convert_title_to_filename(category_title.lower()) 31 | 32 | def _build_category_path(self, category_title): 33 | href = ( 34 | self.configuration.main_url.geturl() 35 | + f"/Device/{category_title.replace('/', ' ')}" 36 | ) 37 | final_href = self.processor.normalize_href(href) 38 | return final_href[1:] 39 | 40 | def get_category_link_from_obj(self, category): 41 | if "title" not in category or not category["title"]: 42 | raise UnexpectedDataKindExceptionError( 43 | f"Impossible to extract category title from {category}" 44 | ) 45 | category_title = category["title"] 46 | return self.get_category_link_from_props(category_title=category_title) 47 | 48 | def get_category_link_from_props(self, category_title): 49 | category_path = urllib.parse.quote(self._build_category_path(category_title)) 50 | if self.configuration.no_category: 51 | return f"home/not_scrapped?url={category_path}" 52 | category_key = self._get_category_key_from_title(category_title) 53 | if self.configuration.categories: 54 | is_not_included = True 55 | for other_category in self.configuration.categories: 56 | other_category_key = self._get_category_key_from_title(other_category) 57 | if other_category_key == category_key: 58 | is_not_included = False 59 | if is_not_included: 60 | return f"home/not_scrapped?url={category_path}" 61 | self._add_category_to_scrape(category_key, category_title, False) 62 | return category_path 63 | 64 | def _process_categories(self, categories): 65 | for category in categories: 66 | category_key = self._get_category_key_from_title(category) 67 | self._add_category_to_scrape(category_key, category, True) 68 | if categories[category]: 69 | self._process_categories(categories[category]) 70 | 71 | def build_expected_items(self): 72 | if self.configuration.no_category: 73 | logger.info("No category required") 74 | return 75 | if self.configuration.categories: 76 | logger.info("Adding required categories as expected") 77 | for category in self.configuration.categories: 78 | category_key = self._get_category_key_from_title(category) 79 | self._add_category_to_scrape(category_key, category, True) 80 | return 81 | logger.info("Downloading list of categories") 82 | categories = self.utils.get_api_content("/categories", includeStubs=True) 83 | self._process_categories(categories) 84 | logger.info(f"{len(self.expected_items_keys)} categories found") 85 | 86 | def get_one_item_content(self, item_key, item_data): # noqa ARG002 87 | categoryid = item_key 88 | 89 | category_content = self.utils.get_api_content( 90 | f"/wikis/CATEGORY/{categoryid}", langid=self.configuration.lang_code 91 | ) 92 | 93 | if category_content and category_content["revisionid"] > 0: 94 | return category_content 95 | 96 | logger.warning("Falling back to category in EN") 97 | category_content = self.utils.get_api_content( 98 | f"/wikis/CATEGORY/{categoryid}", langid="en" 99 | ) 100 | 101 | if category_content and category_content["revisionid"] > 0: 102 | return category_content 103 | 104 | for lang in URLS.keys(): 105 | logger.warning(f"Falling back to category in {lang}") 106 | category_content = self.utils.get_api_content( 107 | f"/wikis/CATEGORY/{categoryid}", langid=lang 108 | ) 109 | 110 | if category_content and category_content["revisionid"] > 0: 111 | return category_content 112 | 113 | logger.warning(f"Impossible to get category content: {item_key}") 114 | self.processor.null_categories.add(item_key) 115 | 116 | return None 117 | 118 | def add_item_redirect(self, item_key, item_data, redirect_kind): # noqa ARG002 119 | path = self._build_category_path(item_data["category_title"]) 120 | self.processor.add_redirect( 121 | path=path, 122 | target_path=f"home/{redirect_kind}?{urllib.parse.urlencode({'url':path})}", 123 | ) 124 | 125 | def process_one_item(self, item_key, item_data, item_content): # noqa ARG002 126 | category_content = item_content 127 | 128 | category_rendered = self.category_template.render( 129 | category=category_content, 130 | label=CATEGORY_LABELS[self.configuration.lang_code], 131 | metadata=self.metadata, 132 | lang=self.configuration.lang_code, 133 | ) 134 | 135 | self.processor.add_html_item( 136 | path=self._build_category_path(category_title=category_content["title"]), 137 | title=category_content["display_title"], 138 | content=category_rendered, 139 | ) 140 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/Wiki-topic-r_spN9srKqcGQAC8emdeTA.css: -------------------------------------------------------------------------------- 1 | #contentPreview .column2{width:290px;margin-left:13px}#contentPreview .column3{width:189px;margin-left:0}#contentPreview .firstColumn{margin-left:0}#sidebarFloatPreview{padding:0 24px 24px 0}#contentFloatPreview{padding:0 0 24px 24px}#wikiPreviewDiv{margin-bottom:24px}#wikiDiffDiv{margin:0 24px 24px 24px}@media only screen and (max-width:599px){#topContent{padding:0 12px}}@media only screen and (min-width:1001px){.full-page-background #main{padding:0}}#contentFloat,#topContent{padding:0 24px!important}#topContent>div:first-child h2,#wikiRenderedText>div:nth-child(3){margin-top:0!important}.diagramsContainer{overflow:auto}.questionsContainer{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;-ms-justify-content:space-between;-webkit-justify-content:space-between;justify-content:space-between;-ms-flex-pack:justify;border-bottom:1px solid #e2e2e2;margin:45px 0 20px;padding-bottom:6px}@media only screen and (max-width:599px){.questionsContainer{-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;-ms-align-self:flex-start;-webkit-align-self:flex-start;align-self:flex-start;-ms-flex-item-align:start;border-bottom:none}}.questionsContainer #deviceQuestions>a{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-direction:row;-webkit-flex-direction:row;flex-direction:row}@media only screen and (max-width:599px){.questionsContainer #askQuestionButton{text-align:left;border-top:1px solid #e2e2e2;padding-top:20px}}.questionsContainer .header{margin:0!important;border-bottom:none!important;padding-bottom:0!important}#topContent h2,.topic.articleContainer h2{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;-ms-justify-content:space-between;-webkit-justify-content:space-between;justify-content:space-between;-ms-flex-pack:justify;font-weight:700;font-size:26px;color:#212426;margin:45px 0 20px;padding-bottom:6px;border-bottom:1px solid #e5e7eb}#topContent h2.tools-header,.topic.articleContainer h2.tools-header{margin-top:0}.topic.articleContainer div.header{font-weight:600;font-size:26px;color:#212426;padding-bottom:6px}@media only screen and (max-width:1000px){.highlight-guides{padding-left:0;padding-right:0;width:auto;overflow:auto}}.highlight-guides .entry-container{min-height:133px;display:-ms-flexbox;display:-webkit-flex;display:flex}@media only screen and (max-width:599px){.highlight-guides .entry-container{-webkit-flex-wrap:wrap;flex-wrap:wrap;-ms-flex-wrap:wrap}}.highlight-guides .entry{display:-ms-flexbox;display:-webkit-flex;display:flex;margin:0 10px 0 0;width:47%;height:100%}@media only screen and (min-width:600px) and (max-width:740px){.highlight-guides .entry{-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;width:100%;margin-bottom:24px;text-align:center}}@media only screen and (max-width:599px){.highlight-guides .entry{-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;width:100%;margin-bottom:26px}}.highlight-guides .entry:hover{text-decoration:none}.highlight-guides .entry .entry-text{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;-ms-align-self:center;-webkit-align-self:center;align-self:center;-ms-flex-item-align:center;margin-left:5px;width:45%}@media only screen and (max-width:599px){.highlight-guides .entry .entry-text{width:280px}}.highlight-guides .entry .entry-text h4{margin-top:0;color:#212426;font-size:18px}.highlight-guides .entry .thumbnail{width:176px;height:131px;float:left;border:none;position:relative;margin-right:14px;border-radius:4px;overflow:hidden;box-shadow:0 1px 2px rgba(17,22,26,.1),0 2px 4px rgba(17,22,26,.1)}@media only screen and (max-width:599px){.highlight-guides .entry .thumbnail{-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;-ms-align-self:center;-webkit-align-self:center;align-self:center;-ms-flex-item-align:center;height:210px;width:280px;overflow:hidden;margin:1em}}@media only screen and (min-width:600px) and (max-width:740px){.highlight-guides .entry .thumbnail{-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;overflow:hidden;margin:1em;-ms-align-self:center;-webkit-align-self:center;align-self:center;-ms-flex-item-align:center}}@media only screen and (max-width:1000px){.highlight-guides .entry .thumbnail{float:none}}.highlight-guides .entry .thumbnail img{position:relative;min-width:100%;height:100%;z-index:1}@media only screen and (max-width:599px){.highlight-guides .entry .thumbnail img{min-height:100%}}.highlight-guides .entry .thumbnail .featured-icon{padding:8px;border:none;position:absolute;top:0;left:0;z-index:2;color:#fff;background-color:#3b82f6;border-top-left-radius:4px;border-bottom-right-radius:4px}@media only screen and (max-width:599px){.highlight-guides .entry h3,.highlight-guides .entry p{margin:0;background:rbga(255,255,255,.9)}}.highlight-guides .entry h3{font-weight:600;font-size:18px;color:#212426;margin:2px 0 5px}.highlight-guides .entry p{color:#4b5563;margin:8px 0 0}.store-feature .grid,.subcategorySection .grid{display:-ms-grid;display:grid;gap:8px;grid-template-columns:repeat(auto-fill,minmax(178px,1fr))}@media all and (-ms-high-contrast:none){.store-feature .grid,.subcategorySection .grid{display:-ms-flexbox;display:flex;flex-wrap:wrap;margin-right:-4px;margin-left:-4px}.store-feature .grid>*,.subcategorySection .grid>*{margin:4px}}@media only screen and (max-width:599px){.subcategorySection{width:100%;margin:0 auto}}.store-feature h5.title{background:rgba(255,255,255,.9)}.store-image{width:178px;max-height:178px}@media only screen and (max-width:599px){.store-cell{width:100%;height:200px}}.store-cell .price{position:absolute;right:0;top:0;height:27px;line-height:27px;background-color:#e83d16;padding:0 10px;color:#fff;font-size:1.4em;font-weight:400;border-bottom-left-radius:3px}#devicePartsList ol,#devicePartsList ul{padding-left:0;width:22%}@media only screen and (max-width:599px){#devicePartsList ol,#devicePartsList ul{padding-left:0;margin-left:0;width:100%}}.topic #questions table{margin-top:0}.topic #questions table tr:first-of-type td{border-top:0}.topic .blurbListWide{width:140%;margin-left:-20%}.wikiPreview .topic .blurbListWide{margin-left:inherit;width:100%} -------------------------------------------------------------------------------- /offliner-definition.json: -------------------------------------------------------------------------------- 1 | { 2 | "offliner_id": "ifixit", 3 | "stdOutput": true, 4 | "stdStats": true, 5 | "flags": { 6 | "language": { 7 | "type": "string", 8 | "required": true, 9 | "title": "Language", 10 | "description": "iFixIt website to build from" 11 | }, 12 | "name": { 13 | "type": "string", 14 | "required": false, 15 | "title": "Name", 16 | "description": "ZIM name. Used as identifier and filename (date will be appended). Constructed from language if not supplied" 17 | }, 18 | "title": { 19 | "type": "string", 20 | "required": false, 21 | "title": "Title", 22 | "description": "Custom title for your ZIM. iFixIt homepage title otherwise", 23 | "minLength": 1, 24 | "maxLength": 30 25 | }, 26 | "description": { 27 | "type": "string", 28 | "required": false, 29 | "title": "Description", 30 | "description": "Custom description for your ZIM. iFixIt homepage description (meta) otherwise", 31 | "minLength": 1, 32 | "maxLength": 80 33 | }, 34 | "icon": { 35 | "type": "blob", 36 | "kind": "image", 37 | "required": false, 38 | "title": "Icon", 39 | "description": "Custom Icon for your ZIM (URL). iFixit square logo otherwise" 40 | }, 41 | "creator": { 42 | "type": "string", 43 | "required": false, 44 | "title": "Creator", 45 | "description": "Name of content creator. “iFixit” otherwise" 46 | }, 47 | "publisher": { 48 | "type": "string", 49 | "required": false, 50 | "title": "Publisher", 51 | "isPublisher": true, 52 | "description": "Custom publisher name (ZIM metadata). openZIM otherwise" 53 | }, 54 | "tags": { 55 | "type": "string", 56 | "required": false, 57 | "title": "ZIM Tags", 58 | "description": "List of semi-colon-separated Tags for the ZIM file. _category:ifixit and ifixit added automatically" 59 | }, 60 | "output": { 61 | "type": "string", 62 | "required": false, 63 | "title": "Output folder", 64 | "description": "Output folder for ZIM file(s). Leave it as `/output`", 65 | "pattern": "^/output$" 66 | }, 67 | "tmp_dir": { 68 | "type": "string", 69 | "required": false, 70 | "title": "Temp folder", 71 | "description": "Where to create temporay build folder. Leave it as `/output`", 72 | "pattern": "^/output$" 73 | }, 74 | "zim_file": { 75 | "type": "string", 76 | "required": false, 77 | "title": "ZIM filename", 78 | "description": "ZIM file name (based on --name if not provided). Include {period} to insert date period dynamically", 79 | "pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+_)([a-z0-9\\-\\.]+_|)([\\d]{4}-[\\d]{2}|\\{period\\}).zim$" 80 | }, 81 | "optimization_cache": { 82 | "type": "url", 83 | "secret": true, 84 | "required": false, 85 | "title": "Optimization Cache URL", 86 | "description": "S3 Storage URL including credentials and bucket" 87 | }, 88 | "stats_filename": { 89 | "type": "string", 90 | "required": false, 91 | "title": "Stats filename", 92 | "description": "Scraping progress file. Leave it as `/output/task_progress.json`", 93 | "pattern": "^/output/task_progress\\.json$" 94 | }, 95 | "debug": { 96 | "type": "boolean", 97 | "required": false, 98 | "title": "Debug", 99 | "description": "Enable verbose output" 100 | }, 101 | "delay": { 102 | "type": "float", 103 | "required": false, 104 | "title": "Delay", 105 | "description": "Add this delay (seconds) before each request to please iFixit servers. Can be fractions. Defaults to 0: no delay", 106 | "min": 0 107 | }, 108 | "api_delay": { 109 | "type": "float", 110 | "required": false, 111 | "title": "API Delay", 112 | "description": "Add this delay (seconds) before each API query (!= calls) to please iFixit servers. Can be fractions. Defaults to 0: no delay", 113 | "min": 0 114 | }, 115 | "cdn_delay": { 116 | "type": "float", 117 | "required": false, 118 | "title": "CDN Delay", 119 | "description": "Add this delay (seconds) before each CDN file download to please iFixit servers. Can be fractions. Defaults to 0: no delay", 120 | "min": 0 121 | }, 122 | "max_missing_items_percent": { 123 | "type": "integer", 124 | "required": false, 125 | "title": "Max Missing Items", 126 | "description": "Amount of missing items which will force the scraper to stop, expressed as a percentage of the total number of items to retrieve. Integer from 1 to 100", 127 | "min": 1, 128 | "max": 100 129 | }, 130 | "max_error_items_percent": { 131 | "type": "integer", 132 | "required": false, 133 | "title": "Max Error Items", 134 | "description": "Amount of items with failed processing which will force the scraper to stop, expressed as a percentage of the total number of items to retrieve. Integer from 1 to 100", 135 | "min": 1, 136 | "max": 100 137 | }, 138 | "categories": { 139 | "type": "string", 140 | "required": false, 141 | "title": "Categories", 142 | "description": "Only scrape those categories (comma-separated). Specify the category names" 143 | }, 144 | "no_category": { 145 | "type": "boolean", 146 | "required": false, 147 | "title": "No category", 148 | "description": "Do not scrape any category" 149 | }, 150 | "guide": { 151 | "type": "string", 152 | "required": false, 153 | "title": "Guides", 154 | "description": "Only scrape this guide (comma-separated)). Specify the guide names" 155 | }, 156 | "no_guide": { 157 | "type": "boolean", 158 | "required": false, 159 | "title": "No guide", 160 | "description": "Do not scrape any guide" 161 | }, 162 | "info": { 163 | "type": "string", 164 | "required": false, 165 | "title": "Info", 166 | "description": "Only scrape this info (comma-separated)). Specify the info names" 167 | }, 168 | "no_info": { 169 | "type": "boolean", 170 | "required": false, 171 | "title": "No info", 172 | "description": "Do not scrape any info" 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/ifixit2zim/scraper_user.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | from ifixit2zim.constants import UNKNOWN_TITLE, USER_LABELS 4 | from ifixit2zim.context import Context 5 | from ifixit2zim.exceptions import UnexpectedDataKindExceptionError 6 | from ifixit2zim.scraper_generic import ScraperGeneric 7 | from ifixit2zim.shared import logger 8 | 9 | 10 | class ScraperUser(ScraperGeneric): 11 | def __init__(self, context: Context): 12 | super().__init__(context) 13 | self.user_id_to_titles = {} 14 | 15 | def setup(self): 16 | self.user_template = self.env.get_template("user.html") 17 | 18 | def get_items_name(self): 19 | return "user" 20 | 21 | def _add_user_to_scrape(self, userid, usertitle, is_expected): 22 | self.add_item_to_scrape( 23 | userid, 24 | { 25 | "userid": userid, 26 | "usertitle": usertitle, 27 | }, 28 | is_expected, 29 | warn_unexpected=False, 30 | ) 31 | if userid in self.user_id_to_titles: 32 | self.user_id_to_titles[userid].append(usertitle) 33 | else: 34 | self.user_id_to_titles[userid] = [usertitle] 35 | 36 | def _build_user_path(self, userid, usertitle): 37 | href = ( 38 | self.configuration.main_url.geturl() 39 | + f"/User/{userid}/{usertitle.replace('/', ' ')}" 40 | ) 41 | final_href = self.processor.normalize_href(href) 42 | return final_href[1:] 43 | 44 | def get_user_link_from_obj(self, user): 45 | if "userid" not in user or not user["userid"]: 46 | raise UnexpectedDataKindExceptionError( 47 | f"Impossible to extract user id from {user}" 48 | ) 49 | userid = user["userid"] 50 | usertitle = user["username"] 51 | if not usertitle: 52 | usertitle = "User" 53 | # override unknown title if needed 54 | if ( 55 | userid in self.expected_items_keys 56 | and self.expected_items_keys[userid]["usertitle"] == UNKNOWN_TITLE 57 | ): 58 | self.expected_items_keys[userid]["usertitle"] = usertitle 59 | return self.get_user_link_from_props(userid=userid, usertitle=usertitle) 60 | 61 | def get_user_link_from_props(self, userid, usertitle): 62 | user_path = urllib.parse.quote( 63 | self._build_user_path(userid=userid, usertitle=usertitle) 64 | ) 65 | if self.configuration.no_user: 66 | return f"home/not_scrapped?url={user_path}" 67 | if self.configuration.users and str(userid) not in self.configuration.users: 68 | return f"home/not_scrapped?url={user_path}" 69 | self._add_user_to_scrape(userid, usertitle, False) 70 | return user_path 71 | 72 | def build_expected_items(self): 73 | if self.configuration.no_user: 74 | logger.info("No user required") 75 | return 76 | if self.configuration.users: 77 | logger.info("Adding required users as expected") 78 | for userid in self.configuration.users: 79 | self._add_user_to_scrape(userid, UNKNOWN_TITLE, True) 80 | return 81 | # WE DO NOT BUILD A LIST OF EXPECTED USERS, THE LIST IS WAY TOO BIG WITH LOTS 82 | # OF USERS WHICH DID NOT CONTRIBUTED AND ARE HENCE NOT NEEDED IN THE ARCHIVE 83 | # logger.info("Downloading list of user") 84 | # limit = 200 85 | # offset = 0 86 | # while True: 87 | # users = get_api_content("/users", limit=limit, offset=offset) 88 | # if len(users) == 0: 89 | # break 90 | # for user in users: 91 | # userid = user["userid"] 92 | # self._add_user_to_scrape(userid, True) 93 | # offset += limit 94 | # logger.info("{} user found".format(len(self.expected_items_keys))) 95 | 96 | def get_one_item_content(self, item_key, _): # ARG002 97 | userid = item_key 98 | user_content = self.utils.get_api_content(f"/users/{userid}") 99 | # other content is available in other endpoints, but not retrieved for now 100 | # (badges: not easy to process ; guides: does not seems to work properly) 101 | return user_content 102 | 103 | def add_item_redirect(self, _, item_data, redirect_kind): 104 | userid = item_data["userid"] 105 | usertitle = item_data["usertitle"] 106 | if usertitle == UNKNOWN_TITLE: 107 | logger.warning(f"Cannot add redirect for user {userid} in error") 108 | return 109 | path = self._build_user_path(userid, usertitle) 110 | self.processor.add_redirect( 111 | path=path, 112 | target_path=f"home/{redirect_kind}?{urllib.parse.urlencode({'url':path})}", 113 | ) 114 | 115 | def process_one_item(self, _, item_data, item_content): 116 | userid = item_data["userid"] 117 | usertitle = item_data["usertitle"] 118 | user_content = item_content 119 | 120 | user_rendered = self.user_template.render( 121 | user=user_content, 122 | label=USER_LABELS[self.configuration.lang_code], 123 | metadata=self.metadata, 124 | ) 125 | 126 | normal_path = self._build_user_path( 127 | userid=user_content["userid"], 128 | usertitle=user_content["username"], 129 | ) 130 | self.processor.add_html_item( 131 | path=normal_path, 132 | title=user_content["username"], 133 | content=user_rendered, 134 | is_front=False, 135 | ) 136 | 137 | for other_user_title in self.user_id_to_titles[userid]: 138 | if other_user_title == UNKNOWN_TITLE: 139 | continue 140 | if other_user_title == usertitle: 141 | continue 142 | alternate_path = self._build_user_path( 143 | userid=userid, 144 | usertitle=other_user_title, 145 | ) 146 | logger.debug( 147 | "Adding user redirect for alternate user path from " 148 | f"{alternate_path} to {normal_path}" 149 | ) 150 | self.processor.add_redirect( 151 | path=alternate_path, 152 | target_path=normal_path, 153 | ) 154 | -------------------------------------------------------------------------------- /src/ifixit2zim/scraper_generic.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from queue import Queue 3 | 4 | from schedule import run_pending 5 | 6 | from ifixit2zim.context import Context 7 | from ifixit2zim.exceptions import FinalScrapingFailureError 8 | from ifixit2zim.shared import logger 9 | 10 | FIRST_ITEMS_COUNT = 5 11 | 12 | 13 | class ScraperGeneric(ABC): 14 | def __init__(self, context: Context): 15 | self.context = context 16 | self.expected_items_keys = {} 17 | self.unexpected_items_keys = {} 18 | self.items_queue = Queue() 19 | self.missing_items_keys = set() 20 | self.error_items_keys = set() 21 | 22 | @property 23 | def configuration(self): 24 | return self.context.configuration 25 | 26 | @property 27 | def utils(self): 28 | return self.context.utils 29 | 30 | @property 31 | def metadata(self): 32 | return self.context.metadata 33 | 34 | @property 35 | def env(self): 36 | return self.context.env 37 | 38 | @property 39 | def lock(self): 40 | return self.context.lock 41 | 42 | @property 43 | def creator(self): 44 | return self.context.creator 45 | 46 | @property 47 | def processor(self): 48 | return self.context.processor 49 | 50 | @abstractmethod 51 | def setup(self): 52 | pass 53 | 54 | @abstractmethod 55 | def get_items_name(self): 56 | pass 57 | 58 | @abstractmethod 59 | def build_expected_items(self): 60 | pass 61 | 62 | @abstractmethod 63 | def get_one_item_content(self, item_key, item_data): 64 | pass 65 | 66 | @abstractmethod 67 | def add_item_redirect(self, item_key, item_data, redirect_kind): 68 | pass 69 | 70 | @abstractmethod 71 | def process_one_item(self, item_key, item_data, item_content): 72 | pass 73 | 74 | def add_item_to_scrape( 75 | self, item_key, item_data, is_expected, *, warn_unexpected=True 76 | ): 77 | item_key = str(item_key) # just in case it's an int 78 | if ( 79 | item_key in self.expected_items_keys 80 | or item_key in self.unexpected_items_keys 81 | ): 82 | return 83 | if is_expected: 84 | logger.debug(f"Adding {self.get_items_name()} {item_key} to scraping queue") 85 | self.expected_items_keys[item_key] = item_data 86 | else: 87 | message = ( 88 | f"Adding unexpected {self.get_items_name()} {item_key} " 89 | "to scraping queue" 90 | ) 91 | if warn_unexpected: 92 | logger.warning(message) 93 | else: 94 | logger.debug(message) 95 | self.unexpected_items_keys[item_key] = item_data 96 | self.items_queue.put( 97 | { 98 | "key": item_key, 99 | "data": item_data, 100 | } 101 | ) 102 | 103 | def add_item_missing_redirect(self, item_key, item_data): 104 | self.add_item_redirect(item_key, item_data, "missing") 105 | 106 | def add_item_error_redirect(self, item_key, item_data): 107 | try: 108 | self.add_item_redirect(item_key, item_data, "error") 109 | except Exception: 110 | logger.warning("Failed to add redirect for item in error") 111 | pass # ignore exceptions, we are already inside an exception handling 112 | 113 | def scrape_one_item(self, item_key, item_data): 114 | item_content = self.get_one_item_content(item_key, item_data) 115 | 116 | if item_content is None: 117 | logger.warning(f"Missing {self.get_items_name()} {item_key}") 118 | self.missing_items_keys.add(item_key) 119 | self.add_item_missing_redirect(item_key, item_data) 120 | return 121 | 122 | logger.debug(f"Processing {self.get_items_name()} {item_key}") 123 | 124 | self.process_one_item(item_key, item_data, item_content) 125 | 126 | def scrape_items(self): 127 | logger.info( 128 | f"Scraping {self.get_items_name()} items ({self.items_queue.qsize()}" 129 | " items remaining)" 130 | ) 131 | 132 | num_items = 1 133 | while not self.items_queue.empty(): 134 | run_pending() 135 | if ( 136 | self.configuration.scrape_only_first_items 137 | and num_items > FIRST_ITEMS_COUNT 138 | ): 139 | break 140 | item = self.items_queue.get(block=False) 141 | item_key = item["key"] 142 | item_data = item["data"] 143 | logger.info( 144 | f" Scraping {self.get_items_name()} {item_key}" 145 | f" ({self.items_queue.qsize()} items remaining)" 146 | ) 147 | try: 148 | self.scrape_one_item(item_key, item_data) 149 | except Exception as exc: 150 | self.error_items_keys.add(item_key) 151 | logger.warning( 152 | f"Error while processing {self.get_items_name()} {item_key}", 153 | exc_info=exc, 154 | ) 155 | self.add_item_error_redirect(item_key, item_data) 156 | finally: 157 | if ( 158 | len(self.missing_items_keys) 159 | * 100 160 | / (len(self.expected_items_keys) + len(self.unexpected_items_keys)) 161 | > self.configuration.max_missing_items_percent 162 | ): 163 | raise FinalScrapingFailureError( 164 | f"Too many {self.get_items_name()}s found missing: " 165 | f"{len(self.missing_items_keys)}" 166 | ) 167 | if ( 168 | len(self.error_items_keys) 169 | * 100 170 | / (len(self.expected_items_keys) + len(self.unexpected_items_keys)) 171 | > self.configuration.max_error_items_percent 172 | ): 173 | raise FinalScrapingFailureError( 174 | f"Too many {self.get_items_name()}s failed to be processed: " 175 | f"{len(self.error_items_keys)}" 176 | ) 177 | num_items += 1 178 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/core-primitives-F5WnAWhrwpl7oCtqtgogQQ.css: -------------------------------------------------------------------------------- 1 | /*! File: Shared/core-primitives.css */:root{--color-black:#000;--color-white:#fff;--color-rose-50:#fff1f2;--color-rose-100:#ffe4e6;--color-rose-200:#fecdd3;--color-rose-300:#fda4af;--color-rose-400:#fb7185;--color-rose-500:#f43f5e;--color-rose-600:#e11d48;--color-rose-700:#be123c;--color-rose-800:#9f1239;--color-rose-900:#881337;--color-pink-50:#fdf2f8;--color-pink-100:#fce7f3;--color-pink-200:#fbcfe8;--color-pink-300:#f9a8d4;--color-pink-400:#f472b6;--color-pink-500:#ec4899;--color-pink-600:#db2777;--color-pink-700:#be185d;--color-pink-800:#9d174d;--color-pink-900:#831843;--color-fuchsia-50:#fdf4ff;--color-fuchsia-100:#fae8ff;--color-fuchsia-200:#f5d0fe;--color-fuchsia-300:#f0abfc;--color-fuchsia-400:#e879f9;--color-fuchsia-500:#d946ef;--color-fuchsia-600:#c026d3;--color-fuchsia-700:#a21caf;--color-fuchsia-800:#86198f;--color-fuchsia-900:#701a75;--color-purple-50:#faf5ff;--color-purple-100:#f3e8ff;--color-purple-200:#e9d5ff;--color-purple-300:#d8b4fe;--color-purple-400:#c084fc;--color-purple-500:#a855f7;--color-purple-600:#9333ea;--color-purple-700:#7e22ce;--color-purple-800:#6b21a8;--color-purple-900:#581c87;--color-violet-50:#f5f3ff;--color-violet-100:#ede9fe;--color-violet-200:#ddd6fe;--color-violet-300:#c4b5fd;--color-violet-400:#a78bfa;--color-violet-500:#8b5cf6;--color-violet-600:#7c3aed;--color-violet-700:#6d28d9;--color-violet-800:#5b21b6;--color-violet-900:#4c1d95;--color-indigo-50:#eef2ff;--color-indigo-100:#e0e7ff;--color-indigo-200:#c7d2fe;--color-indigo-300:#a5b4fc;--color-indigo-400:#818cf8;--color-indigo-500:#6366f1;--color-indigo-600:#4f46e5;--color-indigo-700:#4338ca;--color-indigo-800:#3730a3;--color-indigo-900:#312e81;--color-blue-50:#eff6ff;--color-blue-100:#dbeafe;--color-blue-200:#bfdbfe;--color-blue-300:#93c5fd;--color-blue-400:#60a5fa;--color-blue-500:#3b82f6;--color-blue-600:#2563eb;--color-blue-700:#1d4ed8;--color-blue-800:#1e40af;--color-blue-900:#1e3a8a;--color-blue-ifixit:#1975F1;--color-light-blue-50:#f0f9ff;--color-light-blue-100:#e0f2fe;--color-light-blue-200:#bae6fd;--color-light-blue-300:#7dd3fc;--color-light-blue-400:#38bdf8;--color-light-blue-500:#0ea5e9;--color-light-blue-600:#0284c7;--color-light-blue-700:#0369a1;--color-light-blue-800:#075985;--color-light-blue-900:#0c4a6e;--color-cyan-50:#ecfeff;--color-cyan-100:#cffafe;--color-cyan-200:#a5f3fc;--color-cyan-300:#67e8f9;--color-cyan-400:#22d3ee;--color-cyan-500:#06b6d4;--color-cyan-600:#0891b2;--color-cyan-700:#0e7490;--color-cyan-800:#155e75;--color-cyan-900:#164e63;--color-teal-50:#f0fdfa;--color-teal-100:#ccfbf1;--color-teal-200:#99f6e4;--color-teal-300:#5eead4;--color-teal-400:#2dd4bf;--color-teal-500:#14b8a6;--color-teal-600:#0d9488;--color-teal-700:#0f766e;--color-teal-800:#115e59;--color-teal-900:#134e4a;--color-emerald-50:#ecfdf5;--color-emerald-100:#d1fae5;--color-emerald-200:#a7f3d0;--color-emerald-300:#6ee7b7;--color-emerald-400:#34d399;--color-emerald-500:#10b981;--color-emerald-600:#059669;--color-emerald-700:#047857;--color-emerald-800:#065f46;--color-emerald-900:#064e3b;--color-green-50:#f0fdf4;--color-green-100:#dcfce7;--color-green-200:#bbf7d0;--color-green-300:#86efac;--color-green-400:#4ade80;--color-green-500:#22c55e;--color-green-600:#16a34a;--color-green-700:#15803d;--color-green-800:#166534;--color-green-900:#14532d;--color-lime-50:#f7fee7;--color-lime-100:#ecfccb;--color-lime-200:#d9f99d;--color-lime-300:#bef264;--color-lime-400:#a3e635;--color-lime-500:#84cc16;--color-lime-600:#65a30d;--color-lime-700:#4d7c0f;--color-lime-800:#3f6212;--color-lime-900:#365314;--color-yellow-50:#fefce8;--color-yellow-100:#fef9c3;--color-yellow-200:#fef08a;--color-yellow-300:#fde047;--color-yellow-400:#facc15;--color-yellow-500:#eab308;--color-yellow-600:#ca8a04;--color-yellow-700:#a16207;--color-yellow-800:#854d0e;--color-yellow-900:#713f12;--color-amber-50:#fffbeb;--color-amber-100:#fef3c7;--color-amber-200:#fde68a;--color-amber-300:#fcd34d;--color-amber-400:#fbbf24;--color-amber-500:#f59e0b;--color-amber-600:#d97706;--color-amber-700:#b45309;--color-amber-800:#92400e;--color-amber-900:#78350f;--color-orange-50:#fff7ed;--color-orange-100:#ffedd5;--color-orange-200:#fed7aa;--color-orange-300:#fdba74;--color-orange-400:#fb923c;--color-orange-500:#f97316;--color-orange-600:#ea580c;--color-orange-700:#c2410c;--color-orange-800:#9a3412;--color-orange-900:#7c2d12;--color-red-50:#fef2f2;--color-red-100:#fee2e2;--color-red-200:#fecaca;--color-red-300:#fca5a5;--color-red-400:#f87171;--color-red-500:#ef4444;--color-red-600:#dc2626;--color-red-700:#b91c1c;--color-red-800:#991b1b;--color-red-900:#7f1d1d;--color-gray-50:#f9fafb;--color-gray-100:#f3f4f6;--color-gray-200:#e5e7eb;--color-gray-300:#d1d5db;--color-gray-400:#9ca3af;--color-gray-500:#6b7280;--color-gray-600:#4b5563;--color-gray-700:#374151;--color-gray-800:#1f2937;--color-gray-900:#111827;--space-0:0;--space-1:4px;--space-2:8px;--space-3:12px;--space-4:16px;--space-5:24px;--space-6:32px;--space-7:40px;--space-8:48px;--space-9:64px;--space-10:80px;--space-11:96px;--space-12:112px;--space-13:128px;--font-family-arial-black:'Arial Black','Arial Bold',Gadget,sans-serif;--font-family-inter:Inter,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;--font-family-lato:Lato,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;--font-family-mono:'Roboto Mono',Monaco,'Lucida Console','Courier New',Courier,monospace;--font-family-mono-system:Monaco,'Lucida Console','Courier New',Courier,monospace;--font-family-sans-system:-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;--font-family-serif-system:Georgia,serif;--font-settings-inter:'calt','cpsp','cv01','cv03','cv04','cv05','cv10','kern','liga';--font-size-sm:12px;--font-size-md:14px;--font-size-lg:16px;--font-size-xl:18px;--font-size-2xl:20px;--font-size-3xl:24px;--font-size-4xl:30px;--font-size-5xl:36px;--font-size-6xl:48px;--font-size-7xl:56px;--font-size-8xl:64px;--font-size-9xl:72px;--font-weight-normal:400;--font-weight-semi-bold:600;--font-weight-bold:700;--line-height-none:1;--line-height-base:1.142857143;--line-height-tight:1.25;--line-height-normal:1.42857;--line-height-loose:2;--breakpoint-sm:576px;--breakpoint-md:768px;--breakpoint-lg:1000px;--breakpoint-xl:1200px;--shadow-0:0 0 1px rgba(17, 22, 26, 0.2);--shadow-1:0 1px 2px rgba(17, 22, 26, 0.1),0 2px 4px rgba(17, 22, 26, 0.1);--shadow-2:0 1px 2px rgba(17, 22, 26, 0.1),0 4px 8px rgba(17, 22, 26, 0.1);--shadow-3:0 2px 4px rgba(17, 22, 26, 0.1),0 8px 16px rgba(17, 22, 26, 0.2);--shadow-4:0 4px 8px rgba(17, 22, 26, 0.1),0 16px 32px rgba(17, 22, 26, 0.2);--border-radius-sm:2px;--border-radius-md:4px;--border-radius-lg:8px;--border-radius-xl:16px;--border-radius-pill:999em} -------------------------------------------------------------------------------- /src/ifixit2zim/assets/prosemirror-all-_OBJ3KkZRD0uygPKzpMb8Q.css: -------------------------------------------------------------------------------- 1 | /*! File: Shared/prosemirror.less */.ProseMirror-menubar-wrapper .ProseMirror{margin-top:8px;padding:12px 16px;line-height:1.2;min-height:120px;outline:0 solid transparent;background:#fff;border:1px solid #d1d5db;border-radius:4px;color:#1f2937}.ProseMirror-menubar-wrapper .ProseMirror p:first-of-type{margin:0}.ProseMirror-menubar-wrapper .ProseMirror .widget-container{display:inline-block;position:absolute;z-index:10;min-width:fit-content;min-width:350px;width:60%;left:20%;user-select:none}.ProseMirror-menubar-wrapper .ProseMirror .widget-container .prosemirror-link-editor{margin-top:-115px;padding:10px;background-color:#f9fafb;border:1px solid #a8b4c4;border-radius:2px;box-shadow:0 -5px 15px 10px rgba(0,3,6,.07)}.ProseMirror-menubar-wrapper .ProseMirror .widget-container input{left:-133px;bottom:.75em;visibility:hidden;width:100%;padding:5px;border:1px solid #d1d5db;border-radius:4px;color:rgba(0,3,6,.74);font-size:12px}.ProseMirror-menubar-wrapper .ProseMirror .widget-container .link-edit-buttongroup{display:flex;-ms-justify-content:flex-end;-webkit-justify-content:flex-end;justify-content:flex-end;-ms-flex-pack:end;-ms-flex-direction:row;-webkit-flex-direction:row;flex-direction:row}.ProseMirror-menubar-wrapper .ProseMirror .widget-container .link-edit-buttongroup .link-edit-button{margin:5px;margin-top:15px;white-space:nowrap}.ProseMirror-menubar{display:flex;align-items:center;position:relative;z-index:10;top:0;left:0;right:0;flex-wrap:wrap;-moz-box-sizing:border-box;box-sizing:border-box;border-top-left-radius:inherit;border-top-right-radius:inherit;border:1px solid #e5e7eb;border-radius:4px;background-color:#f9fafb;padding:6px 8px;font-size:16px;line-height:0;overflow:visible;outline-style:none}.ProseMirror-menubar .ProseMirror-menuseparator{margin:0 8px;border:1px solid #e5e7eb;display:inline-block;height:24px}.ProseMirror-menuitem{display:inline-block;line-height:0;margin:0 8px}.ProseMirror-icon{line-height:0;display:inline-block;cursor:pointer;position:relative}.ProseMirror-icon .tooltiptext{opacity:0;pointer-events:none;position:absolute;bottom:120%;left:-1px;background-color:#000;color:#fff;text-align:center;padding:5px 10px;white-space:nowrap;border-style:none;border-radius:4px;font-size:16px;font-family:-apple-system,'Segoe UI',Helvetica,Arial,sans-serif}@media only screen and (max-width:599px){.ProseMirror-icon .tooltiptext{display:none}}.ProseMirror-icon .tooltiptext:after{content:' ';position:absolute;top:100%;left:15px;margin-left:-6px;border-width:6px;border-style:solid;border-color:#000 transparent transparent transparent}.ProseMirror-icon .flipped-tooltip{bottom:-220%}.ProseMirror-icon .flipped-tooltip:after{top:-35%;border-color:transparent transparent #000 transparent}.ProseMirror-icon:hover .tooltiptext{opacity:1;transition:opacity .2s .2s ease}.ProseMirror-icon svg{height:1em}.ProseMirror-icon span{vertical-align:text-top}.ProseMirror-icon-reverse>svg{transform:scaleX(-1)}.ProseMirror-icon,.ProseMirror-menu-dropdown{color:#6b7280}.menubar-header{font-size:26px;font-weight:700;color:#212426}.menubar-subheader{font-size:20px;font-weight:600;color:#4b5563}.menubar-code{font-family:Monaco,'Lucida Console','Courier New',Courier,monospace}.tooltiptext{font-size:16px;font-weight:400;color:#6b7280}.ProseMirror-menu-disabled{color:#d1d5db}.ProseMirror-menu-disabled.ProseMirror-icon{cursor:default}.ProseMirror-menu-active{background-color:rgba(0,3,6,.07);border-radius:4px}.prosemirror{outline-style:none;position:relative;min-height:166px}.ProseMirror-textblock-dropdown{min-width:3em}.ProseMirror-menu{margin:0 -4px;line-height:1}.ProseMirror-tooltip .ProseMirror-menu{width:-webkit-fit-content;width:fit-content;white-space:pre}.ProseMirror-menu-dropdown,.ProseMirror-menu-dropdown-menu{font-size:90%;white-space:nowrap}.ProseMirror-menu-dropdown{display:flex;cursor:pointer;position:relative;padding-right:16px}.ProseMirror-menu-dropdown svg{margin-right:4px}.ProseMirror-menu-dropdown:after{content:'';border-left:4px solid transparent;border-right:4px solid transparent;border-top:5px solid currentColor;position:absolute;right:4px;top:calc(38%)}.ProseMirror-menu-dropdown-wrap{display:inline-block;position:relative}.ProseMirror-menu-dropdown-menu,.ProseMirror-menu-submenu{position:absolute;background:#fff;color:#666;border:1px solid #aaa;padding:2px}.ProseMirror-menu-dropdown-menu{z-index:15;min-width:6em}.ProseMirror-menu-dropdown-item{cursor:pointer;padding:2px 8px 2px 4px}.ProseMirror-menu-dropdown-item:hover{background:#f2f2f2}.ProseMirror-menu-submenu-wrap{position:relative;margin-right:-4px}.ProseMirror-menu-submenu-label:after{content:'';border-top:4px solid transparent;border-bottom:4px solid transparent;border-left:4px solid currentColor;color:rgba(0,0,0,.6);position:absolute;right:4px;top:calc(46%)}.ProseMirror-menu-submenu{display:none;min-width:4em;left:100%;top:-3px}.ProseMirror-menu-submenu-wrap-active .ProseMirror-menu-submenu,.ProseMirror-menu-submenu-wrap:hover .ProseMirror-menu-submenu{display:block}.ProseMirror{position:relative;word-wrap:break-word;white-space:pre-wrap;-webkit-font-variant-ligatures:none;font-variant-ligatures:none}.ProseMirror ol,.ProseMirror ul{padding-left:30px}.ProseMirror blockquote{padding-left:1em;border-left:3px solid #eee;margin-left:0;margin-right:0}.ProseMirror-hideselection ::selection{background:0 0}.ProseMirror-hideselection ::-moz-selection{background:0 0}.ProseMirror-selectednode{outline:2px solid #8cf}li.ProseMirror-selectednode{outline:0}li.ProseMirror-selectednode:after{content:'';position:absolute;left:-32px;right:-2px;top:-2px;bottom:-2px;border:2px solid #8cf;pointer-events:none}.ProseMirror-example-setup-style hr{padding:2px 10px;border:none;margin:1em 0}.ProseMirror-example-setup-style hr:after{content:'';display:block;height:1px;background-color:silver;line-height:2px}.ProseMirror-example-setup-style img{cursor:default}.ProseMirror-prompt{background:#fff;padding:5px 10px 5px 15px;border:1px solid silver;position:fixed;border-radius:3px;z-index:11;box-shadow:-.5px 2px 5px rgba(0,0,0,.2)}.ProseMirror-prompt h5{margin:0;font-weight:400;font-size:100%;color:#444}.ProseMirror-prompt input[type=text],.ProseMirror-prompt textarea{background:#eee;border:none;outline:0}.ProseMirror-prompt input[type=text]{padding:0 4px}.ProseMirror-prompt-close{position:absolute;left:2px;top:1px;color:#666;border:none;background:0 0;padding:0}.ProseMirror-prompt-close:after{content:'✕';font-size:12px}.ProseMirror-invalid{background:#ffc;border:1px solid #cc7;border-radius:4px;padding:5px 10px;position:absolute;min-width:10em}.ProseMirror-prompt-buttons{margin-top:5px;display:none} -------------------------------------------------------------------------------- /src/ifixit2zim/assets/release-version-orbcTfqm6_JKsoz-PPnHGA.css: -------------------------------------------------------------------------------- 1 | /*! File: release_version.less */.releaseVersionStatus{margin:10px auto;text-align:right}@media only screen and (max-width:1000px){.releaseVersionStatus{margin:0}}.releaseVersionStatus .releaseVersionStatusBody{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;height:60px;padding:15px;position:relative;z-index:2;background-color:#fff;border-radius:4px;box-shadow:0 2px 5px 1px rgba(0,0,0,.07)}.releaseVersionStatus .releaseVersionStatusBody>.release-tooltip-group{margin-right:20px}@media only screen and (max-width:599px){.releaseVersionStatus .releaseVersionStatusBody>.release-tooltip-group{margin-right:10px}}.releaseVersionStatus .releaseVersionStatusBody>.release-tooltip-group>p:not(:last-child){-ms-flex:0 1 auto;-webkit-flex:0 1 auto;flex:0 1 auto;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;margin-right:5px}.releaseVersionStatus .releaseVersionStatusBody>div,.releaseVersionStatus .releaseVersionStatusBody>p{-ms-flex:0 1 auto;-webkit-flex:0 1 auto;flex:0 1 auto;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;white-space:nowrap}@media only screen and (max-width:599px){.releaseVersionStatus .releaseVersionStatusBody>div,.releaseVersionStatus .releaseVersionStatusBody>p{margin-right:10px}}.releaseVersionStatus .releaseVersionStatusBody>div:not(:last-child),.releaseVersionStatus .releaseVersionStatusBody>p:not(:last-child){margin-right:20px}.releaseVersionStatus .releaseVersionStatusBody .info-first{margin-right:10px}.releaseVersionStatus .releaseVersionStatusBody .info-first a,.releaseVersionStatus .releaseVersionStatusBody .info-second a{color:inherit}@media only screen and (max-width:1000px){.releaseVersionStatus .releaseVersionStatusBody{border-radius:0;box-shadow:none;border-top:1px solid #e5e7eb;border-bottom:1px solid #e5e7eb}}@media only screen and (max-width:599px){.releaseVersionStatus .releaseVersionStatusBody{-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap;height:auto;text-align:left}}@media only screen and (min-width:600px) and (max-width:1000px){.releaseVersionStatus .releaseVersionStatusBody .info-first,.releaseVersionStatus .releaseVersionStatusBody .info-second{margin:0;display:block;text-align:center}}.releaseVersionStatus #releaseVersionSelect{-ms-flex:0 2 auto;-webkit-flex:0 2 auto;flex:0 2 auto;z-index:5;margin:10px 10px 10px 0}@media only screen and (max-width:599px){.releaseVersionStatus .approver,.releaseVersionStatus .releasePublishedDate{-ms-order:2;-webkit-order:2;order:2;width:100%}}.releaseVersionStatus .releaseVersionStatusBody .bar-main-button-group{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-justify-content:flex-end;-webkit-justify-content:flex-end;justify-content:flex-end;-ms-flex-pack:end;-ms-flex:1 1 auto;-webkit-flex:1 1 auto;flex:1 1 auto}@media only screen and (max-width:599px){.releaseVersionStatus .releaseVersionStatusBody .bar-main-button-group{-ms-justify-content:flex-start;-webkit-justify-content:flex-start;justify-content:flex-start;-ms-flex-pack:start}}.releaseVersionStatus .releaseVersionStatusBody .bar-main-button-group #cancel-release-button{white-space:nowrap}.releaseVersionStatus .releaseVersionStatusBody .bar-main-button{-ms-flex:1 1 auto;-webkit-flex:1 1 auto;flex:1 1 auto;margin-right:0;text-align:right}@media only screen and (max-width:599px){.releaseVersionStatus .releaseVersionStatusBody .bar-main-button{text-align:left;-ms-order:1;-webkit-order:1;order:1}}.releaseVersionStatus .releaseVersionStatusBody .releaseTitle{margin:10px 10px 10px 0;height:30px;max-width:100%}.releaseVersionStatus .releaseVersionStatusBody .releaseTitle .title{text-transform:none;font-weight:700;max-width:100%;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.releaseVersionStatus .version-type{text-transform:none;cursor:help;vertical-align:middle}@media only screen and (min-width:600px) and (max-width:1000px),only screen and (min-width:1001px){.releaseVersionStatus .version-type .tooltip-text{display:inline-block;margin-left:-1px;width:1px;height:1px;overflow:hidden}}@media only screen and (max-width:599px){.releaseVersionStatus .version-type .version-type-indicator{display:none}}.releaseVersionStatus .draft-available{text-transform:none;cursor:pointer;vertical-align:middle;background-color:#f7d17c;color:#886b2b}.releaseVersionStatus .draft-available .draft-available-indicator{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-align-items:center;-webkit-align-items:center;align-items:center;-ms-flex-align:center;-ms-justify-content:center;-webkit-justify-content:center;justify-content:center;-ms-flex-pack:center;height:100%;width:100%}.releaseVersionStatus .draft-available .draft-available-indicator>svg{height:.875rem;width:.875rem}@media only screen and (min-width:600px) and (max-width:1000px),only screen and (min-width:1001px){.releaseVersionStatus .draft-available .tooltip-text{display:none}}@media only screen and (max-width:599px){.releaseVersionStatus .draft-available .draft-available-indicator{display:none}}.releaseVersionStatus .releasePublishedDate .date{font-weight:700}.releaseVersionStatus .releasePublishedDate .username{color:#6b7484}.releaseVersionStatus .approver{color:#6b7484}@media only screen and (min-width:600px) and (max-width:1000px){.releaseVersionStatus .approver .highlight-text{margin-bottom:3px}}.detailsContainer{margin-top:-10px;margin-bottom:10px}@media only screen and (max-width:599px){.detailsContainer{margin:0}}.detailsContainer .detailsBody{position:relative;z-index:1;margin:0 10px;padding:30px 30px 10px 30px;border-right:1px solid #e5e7eb;border-left:1px solid #e5e7eb;border-bottom:1px solid #e5e7eb;border-bottom-right-radius:4px;border-bottom-left-radius:4px;background-color:#fff;text-align:left}@media only screen and (max-width:1000px){.detailsContainer .detailsBody{margin:0;border-left:0;border-right:0;border-radius:0;-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column;padding:30px 0 10px 0}}.detailsContainer .releaseNotesText,.detailsContainer .signaturesText{-ms-flex:1 1 auto;-webkit-flex:1 1 auto;flex:1 1 auto;width:50%;padding:0 20px}@media only screen and (max-width:1000px){.detailsContainer .releaseNotesText,.detailsContainer .signaturesText{width:auto;padding:0 15px}}.detailsContainer .releaseNotesText{margin-bottom:20px}@media only screen and (max-width:1000px){.detailsContainer .releaseNotesText{-ms-order:-1;-webkit-order:-1;order:-1}}.detailsContainer h4{margin-bottom:20px}.draft-available-tooltip,.version-type-tooltip{margin:0;padding:0 10px;font-weight:700}.release-version-cancelled .option-text.primary{color:#9ca3af!important}.release-version-cancelled .svg-icon svg{fill:#9ca3af} -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "ifixit2zim" 7 | authors = [ 8 | { name = "Kiwix", email = "dev@kiwix.org" }, 9 | ] 10 | keywords = ["kiwix","zim","offline","ifixit"] 11 | requires-python = ">=3.12,<3.13" 12 | description = "Make ZIM file from iFixit guides" 13 | readme = "README.md" 14 | license = {text = "GPL-3.0-or-later"} 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.12", 18 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", 19 | ] 20 | dependencies = [ 21 | "requests==2.31.0", 22 | "zimscraperlib==3.3.1", 23 | "kiwixstorage==0.8.3", 24 | "Jinja2==3.1.3", 25 | "backoff==2.2.1", 26 | "pif==0.8.2", 27 | "schedule==1.2.1", 28 | ] 29 | dynamic = ["version"] 30 | 31 | [project.optional-dependencies] 32 | scripts = [ 33 | "invoke==2.2.0", 34 | ] 35 | lint = [ 36 | "black==24.2.0", 37 | "ruff==0.3.0", 38 | ] 39 | check = [ 40 | "pyright==1.1.352", 41 | ] 42 | test = [ 43 | "pytest==8.0.2", 44 | "coverage==7.4.3", 45 | ] 46 | dev = [ 47 | "pre-commit==3.6.2", 48 | "debugpy==1.8.1", 49 | "ifixit2zim[scripts]", 50 | "ifixit2zim[lint]", 51 | "ifixit2zim[test]", 52 | "ifixit2zim[check]", 53 | ] 54 | 55 | [project.urls] 56 | Homepage = "https://github.com/openzim/ifixit2zim" 57 | Donate = "https://www.kiwix.org/en/support-us/" 58 | 59 | [project.scripts] 60 | ifixit2zim = "ifixit2zim.__main__:main" 61 | 62 | [tool.hatch.version] 63 | path = "src/ifixit2zim/__about__.py" 64 | 65 | [tool.hatch.build] 66 | exclude = [ 67 | "/.github", 68 | ] 69 | 70 | [tool.hatch.build.targets.wheel] 71 | packages = ["src/ifixit2zim"] 72 | 73 | [tool.hatch.envs.default] 74 | features = ["dev"] 75 | 76 | [tool.hatch.envs.test] 77 | features = ["scripts", "test"] 78 | 79 | [tool.hatch.envs.test.scripts] 80 | run = "inv test --args '{args}'" 81 | run-cov = "inv test-cov --args '{args}'" 82 | report-cov = "inv report-cov" 83 | coverage = "inv coverage --args '{args}'" 84 | html = "inv coverage --html --args '{args}'" 85 | 86 | [tool.hatch.envs.lint] 87 | template = "lint" 88 | skip-install = false 89 | features = ["scripts", "lint"] 90 | 91 | [tool.hatch.envs.lint.scripts] 92 | black = "inv lint-black --args '{args}'" 93 | ruff = "inv lint-ruff --args '{args}'" 94 | all = "inv lintall --args '{args}'" 95 | fix-black = "inv fix-black --args '{args}'" 96 | fix-ruff = "inv fix-ruff --args '{args}'" 97 | fixall = "inv fixall --args '{args}'" 98 | 99 | [tool.hatch.envs.check] 100 | features = ["scripts", "check"] 101 | 102 | [tool.hatch.envs.check.scripts] 103 | pyright = "inv check-pyright --args '{args}'" 104 | all = "inv checkall --args '{args}'" 105 | 106 | [tool.black] 107 | line-length = 88 108 | target-version = ['py312'] 109 | 110 | [tool.ruff] 111 | target-version = "py312" 112 | line-length = 88 113 | src = ["src"] 114 | 115 | [tool.ruff.lint] 116 | select = [ 117 | "A", # flake8-builtins 118 | # "ANN", # flake8-annotations 119 | "ARG", # flake8-unused-arguments 120 | # "ASYNC", # flake8-async 121 | "B", # flake8-bugbear 122 | # "BLE", # flake8-blind-except 123 | "C4", # flake8-comprehensions 124 | "C90", # mccabe 125 | # "COM", # flake8-commas 126 | # "D", # pydocstyle 127 | # "DJ", # flake8-django 128 | "DTZ", # flake8-datetimez 129 | "E", # pycodestyle (default) 130 | "EM", # flake8-errmsg 131 | # "ERA", # eradicate 132 | # "EXE", # flake8-executable 133 | "F", # Pyflakes (default) 134 | # "FA", # flake8-future-annotations 135 | "FBT", # flake8-boolean-trap 136 | # "FLY", # flynt 137 | # "G", # flake8-logging-format 138 | "I", # isort 139 | "ICN", # flake8-import-conventions 140 | # "INP", # flake8-no-pep420 141 | # "INT", # flake8-gettext 142 | "ISC", # flake8-implicit-str-concat 143 | "N", # pep8-naming 144 | # "NPY", # NumPy-specific rules 145 | # "PD", # pandas-vet 146 | # "PGH", # pygrep-hooks 147 | # "PIE", # flake8-pie 148 | # "PL", # Pylint 149 | "PLC", # Pylint: Convention 150 | "PLE", # Pylint: Error 151 | "PLR", # Pylint: Refactor 152 | "PLW", # Pylint: Warning 153 | # "PT", # flake8-pytest-style 154 | # "PTH", # flake8-use-pathlib 155 | # "PYI", # flake8-pyi 156 | "Q", # flake8-quotes 157 | # "RET", # flake8-return 158 | # "RSE", # flake8-raise 159 | "RUF", # Ruff-specific rules 160 | "S", # flake8-bandit 161 | # "SIM", # flake8-simplify 162 | # "SLF", # flake8-self 163 | "T10", # flake8-debugger 164 | "T20", # flake8-print 165 | # "TCH", # flake8-type-checking 166 | # "TD", # flake8-todos 167 | "TID", # flake8-tidy-imports 168 | # "TRY", # tryceratops 169 | "UP", # pyupgrade 170 | "W", # pycodestyle 171 | "YTT", # flake8-2020 172 | ] 173 | ignore = [ 174 | # Allow non-abstract empty methods in abstract base classes 175 | "B027", 176 | # Remove flake8-errmsg since we consider they bloat the code and provide limited value 177 | "EM", 178 | # Allow boolean positional values in function calls, like `dict.get(... True)` 179 | "FBT003", 180 | # Ignore checks for possible passwords 181 | "S105", "S106", "S107", 182 | # Ignore warnings on subprocess.run / popen 183 | "S603", 184 | # Ignore complexity 185 | "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", 186 | ] 187 | unfixable = [ 188 | # Don't touch unused imports 189 | "F401", 190 | ] 191 | 192 | [tool.ruff.lint.isort] 193 | known-first-party = ["ifixit2zim"] 194 | 195 | [tool.ruff.lint.flake8-bugbear] 196 | # add exceptions to B008 for fastapi. 197 | extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"] 198 | 199 | [tool.ruff.lint.flake8-tidy-imports] 200 | ban-relative-imports = "all" 201 | 202 | [tool.ruff.lint.per-file-ignores] 203 | # Tests can use magic values, assertions, and relative imports 204 | "tests/**/*" = ["PLR2004", "S101", "TID252"] 205 | 206 | [tool.pytest.ini_options] 207 | minversion = "7.3" 208 | testpaths = ["tests"] 209 | pythonpath = [".", "src"] 210 | 211 | [tool.coverage.paths] 212 | great_project = ["src/ifixit2zim"] 213 | tests = ["tests"] 214 | 215 | [tool.coverage.run] 216 | source_pkgs = ["ifixit2zim"] 217 | branch = true 218 | parallel = true 219 | omit = [ 220 | "src/ifixit2zim/__about__.py", 221 | ] 222 | 223 | [tool.coverage.report] 224 | exclude_lines = [ 225 | "no cov", 226 | "if __name__ == .__main__.:", 227 | "if TYPE_CHECKING:", 228 | ] 229 | 230 | [tool.pyright] 231 | include = ["src", "tests", "tasks.py"] 232 | exclude = [".env/**", ".venv/**"] 233 | extraPaths = ["src"] 234 | pythonVersion = "3.12" 235 | typeCheckingMode="basic" 236 | disableBytesTypePromotions = true 237 | -------------------------------------------------------------------------------- /src/ifixit2zim/assets/view_profile-LAZ9O7S0EMQ9_BZEO-F8OQ.css: -------------------------------------------------------------------------------- 1 | .titleRow{border-bottom:1px solid #e5e7eb;position:relative;margin-bottom:16px}.titleRow .profileActions{position:absolute;right:4px;top:8px}.titleRow .profileTitle{font-size:28px;margin:3px 0 0}.titleRow .dropdown{cursor:pointer}.titleRow .dropdown-menu{margin-left:-140px}.titleRow .dropdown-menu a{color:#212426}.titleRow .dropdown-menu a:hover{background:#e6e6e6;color:#212426}.buttonGroup{margin-top:16px}.buttonGroup .buttonLinkFull{width:100%}.buttonGroup .buttonLinkLarge{font-size:18px}#aboutBoxSummary{width:100%}#aboutBoxSummary h2{float:left;margin-left:8px}#aboutBoxAvatar{float:left;width:96px;height:96px;margin-right:7px;margin-bottom:10px;border:1px solid #e5e7eb}#aboutBoxAvatar img{display:block}#userStatList{width:150px;margin:0 0 0 8px;vertical-align:middle;float:left}.statNum{font-size:17px;text-align:right;padding-right:7px}td.statNum{width:60px}@media only screen and (max-width:599px){td.statNum{text-align:left;text-align:start}}.memberSince{padding-top:3px;text-align:center}@media only screen and (max-width:599px){.memberSince{text-align:left;text-align:start}}#aboutBoxContributions.userStatList{float:left}.userStatList h4{font-size:8px}#aboutBoxContributions span{width:30px}.aboutContent{display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-direction:column;-webkit-flex-direction:column;flex-direction:column}.avatarContainer{float:right}.repInfo{float:left;font-size:10px;padding-left:10px}.avatarContainer .profileLink{font-size:15px;margin-left:18px}#notesList{padding-left:25px}.noContent{padding-left:15px}div.hrContentMinor{padding-top:10px}.editProfileLink{float:right}#unique-username{font-size:16px;display:block;margin-bottom:5px}.badge-list-container,.badge-list-container:visited{cursor:pointer;display:block;font-size:12px;text-decoration:none;padding-top:8px}.badge-list-container .badge-list,.badge-list-container:visited .badge-list{width:100%;display:table}.badge-list-container .badge-row,.badge-list-container:visited .badge-row{display:table-row}.badge-list-container .badge-container,.badge-list-container:visited .badge-container{display:table-cell;vertical-align:middle;padding:8px 0;color:#212426}.badge-list-container .badge-container:hover,.badge-list-container:visited .badge-container:hover{text-decoration:none;background-color:#f5f9ff;border-radius:6px}@media only screen and (min-width:1001px){.badge-list-container .badge-container,.badge-list-container:visited .badge-container{width:33%}}@media only screen and (min-width:600px) and (max-width:1000px){.badge-list-container .badge-container,.badge-list-container:visited .badge-container{width:50%}}@media only screen and (max-width:599px){.badge-list-container .badge-container,.badge-list-container:visited .badge-container{width:100%}}.badge-list-container span,.badge-list-container:visited span{vertical-align:middle;margin:0;padding:0;height:25px;font-size:12px}.badge-list-container .badge-tier-image,.badge-list-container:visited .badge-tier-image{vertical-align:middle;width:8px;height:8px;margin:0 8px}.badge-list-container .badge-image,.badge-list-container:visited .badge-image{vertical-align:middle;width:20px;height:20px;margin-right:8px}.badge-list-container .badge-counter,.badge-list-container:visited .badge-counter{margin-left:5px;color:#4b5563}.certifications a.buttonLink,a.buttonLinkFocus{display:block;font-size:14px;line-height:1em;padding:5px 5px 6px}#editCertificatesButton{margin:10px 0;width:100%}#downVotes,#upVotes{padding:5px 0 10px;font-size:20px;font-weight:800}#sidebar>h2.primary,.voteHistory,.votes{margin-left:13px;margin-right:13px}.voteHistory .vote-history-username{max-width:135px}#downVote,#upVote{background-image:url("../assets/view-question-20091109.gif");background-repeat:no-repeat;width:16px;height:16px;display:inline-block;margin:0 17px;padding:0;font-size:62.5%}#upVote{background-position:-2px -2px}#downVote{background-position:-2px -22px}h3.dateCell{padding:5px;background-color:#f9fafb;font-size:15px;font-weight:400;border-radius:4px}.historyEntry{margin:5px 0}.historyEntry .delta{float:left;font-size:18px;padding:0 5px;text-align:center;width:64px;font-weight:700}.historyEntry .positive{color:#22c55e}.historyEntry .negative{color:#ef4444}.historyEntry .historyDescription{margin-left:63px}.historyEntry .time{margin-top:-8px}#aboutTextBox{padding-top:0}.certification{padding-bottom:20px}.certification img{display:block;margin-right:8px;float:left;max-width:96px}.certification .blurbText{display:table-cell;vertical-align:middle}#acceptedCertsBlurbs{padding-top:10px}#acceptedCertsBlurbs .primary{padding-bottom:10px}#acceptedCerts .certification img{max-width:33px;max-height:33px}table.reputationBreakdown td,table.reputationBreakdown th{padding:2px 5px}table.reputationBreakdown tr:nth-child(even){background-color:#f9fafb}table.reputationBreakdown{width:100%}.paginatedDiv{margin:8px 0 24px}.dateCell{margin-top:24px}#activity-graph,.reputationBreakdown{margin:8px 0 24px}@media only screen and (max-width:599px){#eventTimelineGraph{max-width:300px}}#answersDiv ul,#comments ul,#questionsDiv ul{margin:0;padding:0}#answersDiv li,#comments ul{margin-bottom:8px}.activity-list{list-style-type:none;padding:0}.activity-list>li{margin:6px 0}.filterOption:hover{background:#f9fafb;cursor:pointer}.noteDiv .comment-inner{display:block;overflow:hidden}.noteDiv .toggleArrow{margin-top:6px;margin-right:6px}.noteDiv .arrowDown{border-top-color:#3b82f6}.noteDiv .arrowRight{border-left-color:#3b82f6}.noteDiv .noteText,.noteDiv .toggleArrow{float:left}.noteDiv .noteTime{float:right;margin:-2px 0 0 14px;font-style:italic;color:#9ca3af}.noteDiv .profileCommentText{margin:8px 0 0 14px}.actionItems{color:#3b82f6;margin:12px 0 0 0;list-style-type:none;padding-left:0}.actionItems li{font-size:14px;margin:12px 0 0 0}.actionItems li:hover,.actionItems li:hover a{color:#feaa0b;text-decoration:none}.actionItems li .fa{margin:0 6px}.teamsList{list-style-type:none;margin-left:0;padding-left:0;border-bottom:1px solid #e5e7eb;margin-bottom:4px;margin-top:0}.teamsList img{border:1px solid #e5e7eb;width:56px}.teamsList li{padding:8px 0;position:relative}.teamsList h4{margin-top:0;line-height:1em}.teamsList p{margin:4px 0 0}.teamsList .teamDetails{margin-right:10px;margin-left:64px}.teamsList .teamTag{font-size:10px}.teamsLabel{margin-top:10px}#modalHeader{margin:0 0 8px 0;border-bottom:0}#modalContent{margin:8px 16px}#modalEmail{width:98.6%;margin-bottom:8px}#modalSubmit{text-align:right}#businessInfoBox{border-top:1px solid #e5e7eb;padding-bottom:5px}#businessInfoBox h3{margin-top:15px}#businessInfoBox p{margin:0 0 5px 0}#businessInfoBox .fa{margin-right:2px}@media only screen and (max-width:599px){.defaultActivityGraph{width:100%}}.story-blurb{padding:30px 0;border-bottom:1px solid #f9fafb;display:flex;align-items:center}.story-blurb:last-child{border:none}.story-blurb>div{display:inline-block}.story-blurb .blurbImage img{width:110px;height:83px;float:none;border:1px solid #f9fafb;border-radius:4px}.story-blurb .story-blurb-details{margin-left:30px}.story-blurb .story-blurb-details a{color:#4b5563}.story-blurb .story-blurb-details .story-blurb-title{margin:0 0 10px}.story-blurb .story-blurb-details .story-blurb-meta{color:#6b7280;margin:0} -------------------------------------------------------------------------------- /src/ifixit2zim/utils.py: -------------------------------------------------------------------------------- 1 | import io 2 | import re 3 | import urllib.parse 4 | import zlib 5 | from http import HTTPStatus 6 | 7 | import backoff 8 | import bs4 9 | import requests 10 | from kiwixstorage import KiwixStorage 11 | from pif import get_public_ip 12 | from zimscraperlib.download import stream_file 13 | 14 | from ifixit2zim.constants import API_PREFIX, Configuration 15 | from ifixit2zim.shared import logger 16 | 17 | 18 | def backoff_hdlr(details): 19 | logger.warning( 20 | "Backing off {wait:0.1f} seconds after {tries} tries " 21 | "calling function {target} with args {args} and kwargs " 22 | "{kwargs}".format(**details) 23 | ) 24 | 25 | 26 | def fatal_code(e): 27 | """Give up on errors codes 400-499 except 429""" 28 | logger.warning(f"Fatal code {e.response.status_code}") 29 | return ( 30 | HTTPStatus.BAD_REQUEST 31 | <= e.response.status_code 32 | < HTTPStatus.INTERNAL_SERVER_ERROR 33 | and e.response.status_code != HTTPStatus.TOO_MANY_REQUESTS 34 | ) 35 | 36 | 37 | class Utils: 38 | def __init__(self, configuration: Configuration) -> None: 39 | self.configuration = configuration 40 | 41 | def to_path(self, url: str) -> str: 42 | """Path-part of an URL, without leading slash""" 43 | return re.sub(r"^/", "", urllib.parse.urlparse(url).path) 44 | 45 | def get_url(self, path: str, **params) -> str: 46 | """url-encoded in-source website url for a path""" 47 | params_str = f"?{urllib.parse.urlencode(params)}" if params else "" 48 | return ( 49 | f"{self.configuration.main_url.geturl()}" 50 | f"{urllib.parse.quote(path)}" 51 | f"{params_str}" 52 | ) 53 | 54 | def get_url_raw(self, path: str): 55 | """in-source website url for a path, untainted""" 56 | return f"{self.configuration.main_url.geturl()}{path}" 57 | 58 | def to_url(self, value: str) -> str: 59 | """resolved potentially relative url from in-source link""" 60 | return value if value.startswith("http") else self.get_url_raw(value) 61 | 62 | def to_rel(self, url: str) -> None | str: 63 | """path from URL if on our main domain, else None""" 64 | uri = urllib.parse.urlparse(url) 65 | if uri.netloc != self.configuration.domain: 66 | return None 67 | return uri.path 68 | 69 | def no_leading_slash(self, text: str) -> str: 70 | """text with leading slash removed if present""" 71 | return re.sub(r"^/", "", text) 72 | 73 | def no_trailing_slash(self, text: str) -> str: 74 | """text with trailing slash removed if present""" 75 | return re.sub(r"/$", "", text) 76 | 77 | def only_path_of(self, url: str): 78 | """normalized path part of an url""" 79 | return self.normalize_ident(urllib.parse.urlparse(url).path) 80 | 81 | @backoff.on_exception( 82 | backoff.expo, 83 | requests.exceptions.RequestException, 84 | max_time=16, 85 | on_backoff=backoff_hdlr, 86 | giveup=fatal_code, 87 | ) 88 | def fetch(self, path: str, **params) -> tuple[str, list[str]]: 89 | """(source text, actual_paths) of a path from source website 90 | 91 | actual_paths is amn ordered list of paths that were traversed to get to content. 92 | Without redirection, it should be a single path, equal to request 93 | Final, target path is always last""" 94 | resp = requests.get( 95 | self.get_url(path, **params), 96 | params=params, 97 | timeout=self.configuration.request_timeout, 98 | ) 99 | resp.raise_for_status() 100 | 101 | # we have params meaning we requested a page (?pg=xxx) 102 | # assumption: this must be a category page (so on same domain) 103 | # we thus need to use redirection target (which lost param) with params 104 | if params and resp.history: 105 | return self.fetch(self.only_path_of(resp.url), **params) 106 | return resp.text, [ 107 | self.no_leading_slash(self.only_path_of(r.url)) 108 | for r in [*resp.history, resp] 109 | ] 110 | 111 | def get_soup_of(self, text: str, *, unwrap: bool = False): 112 | """an lxml soup of an HTML string""" 113 | soup = bs4.BeautifulSoup(text, "lxml") 114 | if unwrap: 115 | for elem in ("body", "html"): 116 | getattr(soup, elem).unwrap() 117 | return soup 118 | 119 | def get_soup(self, path: str, **params) -> tuple[bs4.BeautifulSoup, list[str]]: 120 | """an lxml soup of a path on source website""" 121 | content, paths = self.fetch(path, **params) 122 | return self.get_soup_of(content), paths 123 | 124 | def get_digest(self, url: str) -> str: 125 | """simple digest of an url for mapping purpose""" 126 | return str(zlib.adler32(url.encode("UTF-8"))) 127 | 128 | def normalize_ident(self, ident: str) -> str: 129 | """URL-decoded category identifier""" 130 | return urllib.parse.unquote(ident) 131 | 132 | def get_version_ident_for(self, url: str) -> str | None: 133 | """~version~ of the URL data to use for comparisons. Built from headers""" 134 | try: 135 | resp = requests.head(url, timeout=10) 136 | headers = resp.headers 137 | except Exception as exc: 138 | logger.warning(f"Unable to HEAD {url}", exc_info=exc) 139 | try: 140 | _, headers = stream_file( 141 | url=url, 142 | byte_stream=io.BytesIO(), 143 | block_size=1, 144 | only_first_block=True, 145 | ) 146 | except Exception as exc: 147 | logger.warning(f"Unable to query image at {url}", exc_info=exc) 148 | return 149 | 150 | for header in ("ETag", "Last-Modified", "Content-Length"): 151 | if headers.get(header): 152 | return headers.get(header) 153 | 154 | return "-1" 155 | 156 | def setup_s3_and_check_credentials(self, s3_url_with_credentials): 157 | logger.info("testing S3 Optimization Cache credentials") 158 | s3_storage = KiwixStorage(s3_url_with_credentials) 159 | if not s3_storage.check_credentials( 160 | list_buckets=True, bucket=True, write=True, read=True, failsafe=True 161 | ): 162 | logger.error("S3 cache connection error testing permissions.") 163 | logger.error(f" Server: {s3_storage.url.netloc}") 164 | logger.error(f" Bucket: {s3_storage.bucket_name}") 165 | logger.error(f" Key ID: {s3_storage.params.get('keyid')}") 166 | logger.error(f" Public IP: {get_public_ip()}") 167 | raise ValueError("Unable to connect to Optimization Cache. Check its URL.") 168 | return s3_storage 169 | 170 | @backoff.on_exception( 171 | backoff.expo, 172 | requests.exceptions.RequestException, 173 | max_time=16, 174 | on_backoff=backoff_hdlr, 175 | giveup=fatal_code, 176 | ) 177 | def get_api_content(self, path, **params): 178 | full_path = self.get_url(API_PREFIX + path, **params) 179 | logger.debug(f"Retrieving {full_path}") 180 | response = requests.get(full_path, timeout=self.configuration.request_timeout) 181 | json_data = ( 182 | response.json() 183 | if response and response.status_code == HTTPStatus.OK 184 | else None 185 | ) 186 | return json_data 187 | -------------------------------------------------------------------------------- /src/ifixit2zim/imager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim: ai ts=4 sts=4 et sw=4 nu 3 | 4 | import hashlib 5 | import io 6 | import pathlib 7 | import re 8 | import threading 9 | import urllib.parse 10 | 11 | from kiwixstorage import KiwixStorage, NotFoundError 12 | from PIL import Image 13 | from zimscraperlib.download import stream_file 14 | from zimscraperlib.image.optimization import optimize_webp 15 | from zimscraperlib.zim.creator import Creator 16 | 17 | from ifixit2zim.constants import IMAGES_ENCODER_VERSION 18 | from ifixit2zim.executor import Executor 19 | from ifixit2zim.scraper import Configuration 20 | from ifixit2zim.shared import logger 21 | from ifixit2zim.utils import Utils 22 | 23 | 24 | class Imager: 25 | def __init__( 26 | self, 27 | img_executor: Executor, 28 | lock: threading.Lock, 29 | creator: Creator, 30 | utils: Utils, 31 | configuration: Configuration, 32 | ): 33 | self.aborted = False 34 | # list of source URLs that we've processed and added to ZIM 35 | self.handled = set() 36 | self.dedup_items = {} 37 | self.img_executor = img_executor 38 | self.lock = lock 39 | self.creator = creator 40 | self.utils = utils 41 | self.configuration = configuration 42 | 43 | self.img_executor.start() 44 | 45 | def abort(self): 46 | """request imager to cancel processing of futures""" 47 | self.aborted = True 48 | 49 | def get_image_data(self, url: str) -> io.BytesIO: 50 | """Bytes stream of an optimized version of source image 51 | 52 | Bitmap images are converted to WebP and optimized 53 | SVG images are kept as is""" 54 | src, webp = io.BytesIO(), io.BytesIO() 55 | stream_file(url=url, byte_stream=src) 56 | 57 | if pathlib.Path(url).suffix == ".svg" or "/math/render/svg/" in url: 58 | return src 59 | 60 | with Image.open(src) as img: 61 | img.save(webp, format="WEBP") 62 | 63 | del src 64 | return optimize_webp( 65 | src=webp, 66 | lossless=False, 67 | quality=60, 68 | method=6, 69 | ) # pyright: ignore[reportReturnType] 70 | 71 | def get_path_for(self, url: urllib.parse.ParseResult) -> str: 72 | url_with_only_path = urllib.parse.ParseResult( 73 | scheme=url.scheme, 74 | netloc=url.netloc, 75 | path=url.path, 76 | query="", 77 | params="", 78 | fragment="", 79 | ) 80 | unquoted_url = urllib.parse.unquote(url_with_only_path.geturl()) 81 | return "images/{}".format(re.sub(r"^(https?)://", r"\1/", unquoted_url)) 82 | 83 | def defer(self, url: str) -> str | None: 84 | """request full processing of url, returning in-zim path immediately""" 85 | 86 | # find actual URL should it be from a provider 87 | try: 88 | parsed_url = urllib.parse.urlparse(self.utils.to_url(url)) 89 | except Exception: 90 | logger.warning(f"Can't parse image URL `{url}`. Skipping") 91 | return 92 | 93 | if parsed_url.scheme not in ("http", "https"): 94 | logger.warning( 95 | f"Not supporting image URL `{parsed_url.geturl()}`. Skipping" 96 | ) 97 | return 98 | 99 | path = self.get_path_for(parsed_url) 100 | 101 | if path in self.handled: 102 | return path 103 | 104 | # record that we are processing this one 105 | self.handled.add(path) 106 | 107 | self.img_executor.submit( 108 | self.process_image, 109 | url=parsed_url, 110 | path=path, 111 | mimetype="image/svg+xml" if path.endswith(".svg") else "image/webp", 112 | dont_release=True, 113 | ) 114 | 115 | return path 116 | 117 | def check_for_duplicate(self, path, content): 118 | digest = hashlib.sha256(content).digest() 119 | if digest in self.dedup_items: 120 | return self.dedup_items[digest] 121 | self.dedup_items[digest] = path 122 | return None 123 | 124 | def add_image_to_zim(self, path, content, mimetype): 125 | duplicate_path = self.check_for_duplicate(path, content) 126 | with self.lock: 127 | if duplicate_path: 128 | self.creator.add_redirect( 129 | path=path, 130 | target_path=duplicate_path, 131 | ) 132 | else: 133 | self.creator.add_item_for( 134 | path=path, 135 | content=content, 136 | mimetype=mimetype, 137 | ) 138 | 139 | def add_missing_image_to_zim(self, path): 140 | with self.lock: 141 | self.creator.add_redirect( 142 | path=path, 143 | target_path="assets/NoImage_300x225.jpg", 144 | ) 145 | 146 | def process_image( 147 | self, url: urllib.parse.ParseResult, path: str, mimetype: str 148 | ) -> str | None: 149 | """download image from url or S3 and add to Zim at path. Upload if req.""" 150 | 151 | if self.aborted: 152 | return 153 | 154 | # just download, optimize and add to ZIM if not using S3 155 | if not self.configuration.s3_url: 156 | try: 157 | fileobj = self.get_image_data(url.geturl()) 158 | except Exception as exc: 159 | logger.error( 160 | f"Failed to download/convert/optim source at {url.geturl()}", 161 | exc_info=exc, 162 | ) 163 | self.add_missing_image_to_zim( 164 | path=path, 165 | ) 166 | return path 167 | 168 | self.add_image_to_zim( 169 | path=path, 170 | content=fileobj.getvalue(), 171 | mimetype=mimetype, 172 | ) 173 | 174 | return path 175 | 176 | # we are using S3 cache 177 | ident = self.utils.get_version_ident_for(url.geturl()) 178 | if ident is None: 179 | logger.error(f"Unable to query {url.geturl()}. Skipping") 180 | self.add_missing_image_to_zim( 181 | path=path, 182 | ) 183 | return path 184 | 185 | # key = self.get_s3_key_for(url.geturl()) 186 | s3_storage = KiwixStorage(self.configuration.s3_url) 187 | meta = {"ident": ident, "encoder_version": str(IMAGES_ENCODER_VERSION)} 188 | 189 | download_failed = False # useful to trigger reupload or not 190 | try: 191 | fileobj = io.BytesIO() 192 | s3_storage.download_matching_fileobj(path, fileobj, meta=meta) 193 | logger.debug(f"'{path}' found in S3") 194 | except NotFoundError: 195 | # don't have it, not a donwload error. we'll upload after processing 196 | pass 197 | except Exception as exc: 198 | logger.error(f"Failed to download '{path}' from cache", exc_info=exc) 199 | download_failed = True 200 | else: 201 | self.add_image_to_zim( 202 | path=path, 203 | content=fileobj.getvalue(), 204 | mimetype=mimetype, 205 | ) 206 | return path 207 | 208 | # we're using S3 but don't have it or failed to download 209 | logger.debug(f"'{path}' not found in S3, downloading from origin") 210 | try: 211 | fileobj = self.get_image_data(url.geturl()) 212 | except Exception as exc: 213 | logger.error( 214 | f"Failed to download/convert/optim source at {url.geturl()}", 215 | exc_info=exc, 216 | ) 217 | self.add_missing_image_to_zim( 218 | path=path, 219 | ) 220 | return path 221 | 222 | self.add_image_to_zim( 223 | path=path, 224 | content=fileobj.getvalue(), 225 | mimetype=mimetype, 226 | ) 227 | 228 | # only upload it if we didn't have it in cache 229 | if not download_failed: 230 | logger.debug(f"Uploading {url.geturl()} to S3::{path} with {meta}") 231 | try: 232 | s3_storage.upload_fileobj(fileobj=fileobj, key=path, meta=meta) 233 | except Exception as exc: 234 | logger.error(f"{path} failed to upload to cache", exc_info=exc) 235 | 236 | return path 237 | -------------------------------------------------------------------------------- /src/ifixit2zim/entrypoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | import sys 6 | 7 | from ifixit2zim.constants import NAME, SCRAPER, URLS 8 | from ifixit2zim.shared import logger, set_debug 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser( 13 | prog=NAME, 14 | description="Scraper to create ZIM files ifixit articles", 15 | ) 16 | 17 | parser.add_argument( 18 | "--language", 19 | choices=URLS.keys(), 20 | required=True, 21 | help="ifixit website to build from", 22 | dest="lang_code", 23 | ) 24 | 25 | parser.add_argument( 26 | "--output", 27 | help="Output folder for ZIM file", 28 | default="/output", 29 | dest="_output_name", 30 | ) 31 | 32 | parser.add_argument( 33 | "--name", 34 | help="ZIM name. Used as identifier and filename (date will be appended). " 35 | "Constructed from language if not supplied", 36 | ) 37 | 38 | parser.add_argument( 39 | "--title", 40 | help="Custom title for your ZIM (30 chars max).", 41 | ) 42 | 43 | parser.add_argument( 44 | "--description", 45 | help="Custom description for your ZIM (80 chars max). " 46 | "Based on iFixit homepage description (meta) otherwise", 47 | ) 48 | 49 | parser.add_argument( 50 | "--long-description", 51 | help="Custom long description for your ZIM (4000 chars max). " 52 | "Based on iFixit homepage description (meta) otherwise", 53 | ) 54 | 55 | parser.add_argument( 56 | "--icon", 57 | help="Custom icon for your ZIM (path or URL). iFixit square logo otherwise", 58 | ) 59 | 60 | parser.add_argument( 61 | "--creator", 62 | help="Name of content creator. “iFixit” otherwise", 63 | dest="author", 64 | default="iFixit", 65 | ) 66 | 67 | parser.add_argument( 68 | "--publisher", 69 | help="Custom publisher name (ZIM metadata). “openZIM” otherwise", 70 | default="openZIM", 71 | ) 72 | 73 | parser.add_argument( 74 | "--tag", 75 | help="Add tag to the ZIM file. " 76 | "_category:ifixit and ifixit added automatically. Use --tag several " 77 | " times or separate with `;`", 78 | default=[], 79 | action="append", 80 | ) 81 | 82 | parser.add_argument( 83 | "--zim-file", 84 | help="ZIM file name (based on --name if not provided)", 85 | dest="fname", 86 | ) 87 | 88 | parser.add_argument( 89 | "--optimization-cache", 90 | help="URL with credentials to S3 for using as optimization cache", 91 | dest="s3_url_with_credentials", 92 | ) 93 | 94 | parser.add_argument( 95 | "--debug", 96 | help="Enable verbose output", 97 | action="store_true", 98 | dest="debug", 99 | default=False, 100 | ) 101 | 102 | parser.add_argument( 103 | "--tmp-dir", 104 | help="Path to create temp folder in. Used for building ZIM file.", 105 | default=os.getenv("TMPDIR", "."), 106 | dest="_tmp_name", 107 | ) 108 | 109 | parser.add_argument( 110 | "--keep", 111 | help="Don't remove build folder on start (for debug/devel)", 112 | default=False, 113 | action="store_true", 114 | dest="keep_build_dir", 115 | ) 116 | 117 | parser.add_argument( 118 | "--build-in-tmp", 119 | help="Use --tmp-dir value as workdir. Otherwise, a unique sub-folder " 120 | "is created inside it. Useful to reuse downloaded files (debug/devel)", 121 | default=False, 122 | action="store_true", 123 | dest="build_dir_is_tmp_dir", 124 | ) 125 | 126 | parser.add_argument( 127 | "--delay", 128 | help="Add this delay (seconds) before each request to please " 129 | "iFixit servers. Can be fractions. Defaults to 0: no delay", 130 | type=float, 131 | ) 132 | 133 | parser.add_argument( 134 | "--api-delay", 135 | help="Add this delay (seconds) before each API query (!= calls) to " 136 | "please iFixit servers. Can be fractions. Defaults to 0: no delay", 137 | type=float, 138 | ) 139 | 140 | parser.add_argument( 141 | "--cdn-delay", 142 | help="Add this delay (seconds) before each CDN file download to please " 143 | "iFixit servers. Can be fractions. Defaults to 0: no delay", 144 | type=float, 145 | ) 146 | 147 | parser.add_argument( 148 | "--request-timeout", 149 | help="Timeout in seconds for HTTP requests (default: 10)", 150 | type=float, 151 | default=10, 152 | ) 153 | 154 | parser.add_argument( 155 | "--skip-checks", 156 | help="[dev] Don't perform Integrity Checks on start", 157 | default=False, 158 | action="store_true", 159 | dest="skip_checks", 160 | ) 161 | 162 | parser.add_argument( 163 | "--stats-filename", 164 | help="Path to store the progress JSON file to.", 165 | dest="stats_filename", 166 | ) 167 | 168 | parser.add_argument( 169 | "--version", 170 | help="Display scraper version and exit", 171 | action="version", 172 | version=SCRAPER, 173 | ) 174 | 175 | parser.add_argument( 176 | "--max-missing-items-percent", 177 | help="Amount of missing items which will force the scraper to stop, expressed" 178 | " as a percentage of the total number of items to retrieve. Integer from 1 to" 179 | " 100.", 180 | default=5, 181 | type=int, 182 | dest="max_missing_items_percent", 183 | ) 184 | 185 | parser.add_argument( 186 | "--max-error-items-percent", 187 | help="Amount of items with failed processing which will force the scraper to" 188 | " stop, expressed as a percentage of the total number of items to retrieve." 189 | " Integer from 1 to 100.", 190 | default=5, 191 | type=int, 192 | dest="max_error_items_percent", 193 | ) 194 | 195 | parser.add_argument( 196 | "--category", 197 | help="Only scrape this category (can be specified multiple times). " 198 | "Specify the category name", 199 | dest="categories", 200 | action="append", 201 | ) 202 | 203 | parser.add_argument( 204 | "--no-category", 205 | help="Do not scrape any category.", 206 | dest="no_category", 207 | action="store_true", 208 | default=False, 209 | ) 210 | 211 | parser.add_argument( 212 | "--guide", 213 | help="Only scrape this guide (can be specified multiple times). " 214 | "Specify the guide name", 215 | dest="guides", 216 | action="append", 217 | ) 218 | 219 | parser.add_argument( 220 | "--no-guide", 221 | help="Do not scrape any guide.", 222 | dest="no_guide", 223 | action="store_true", 224 | default=False, 225 | ) 226 | 227 | parser.add_argument( 228 | "--info", 229 | help="Only scrape this info (can be specified multiple times). " 230 | "Specify the info name", 231 | dest="infos", 232 | action="append", 233 | ) 234 | 235 | parser.add_argument( 236 | "--no-info", 237 | help="Do not scrape any info.", 238 | dest="no_info", 239 | action="store_true", 240 | default=False, 241 | ) 242 | 243 | parser.add_argument( 244 | "--user", 245 | help="Only scrape this user (can be specified multiple times). " 246 | "Specify the userid", 247 | dest="users", 248 | action="append", 249 | ) 250 | 251 | parser.add_argument( 252 | "--no-user", 253 | help="Do not scrape any user.", 254 | dest="no_user", 255 | action="store_true", 256 | default=False, 257 | ) 258 | 259 | parser.add_argument( 260 | "--scrape-only-first-items", 261 | help="Scrape only first items of every type.", 262 | dest="scrape_only_first_items", 263 | action="store_true", 264 | default=False, 265 | ) 266 | 267 | parser.add_argument( 268 | "--no-cleanup", 269 | help="Do not cleanup HTML content.", 270 | dest="no_cleanup", 271 | action="store_true", 272 | default=False, 273 | ) 274 | 275 | args = parser.parse_args() 276 | set_debug(args.debug) 277 | 278 | from ifixit2zim.scraper import IFixit2Zim 279 | 280 | try: 281 | scraper = IFixit2Zim(**dict(args._get_kwargs())) 282 | sys.exit(scraper.run()) 283 | except Exception as exc: 284 | logger.error("FAILED. An error occurred", exc_info=exc) 285 | raise SystemExit(1) from None 286 | 287 | 288 | if __name__ == "__main__": 289 | main() 290 | -------------------------------------------------------------------------------- /src/ifixit2zim/scraper_guide.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | from ifixit2zim.constants import ( 4 | DIFFICULTY_EASY, 5 | DIFFICULTY_HARD, 6 | DIFFICULTY_MODERATE, 7 | DIFFICULTY_VERY_EASY, 8 | DIFFICULTY_VERY_HARD, 9 | GUIDE_LABELS, 10 | UNKNOWN_LOCALE, 11 | UNKNOWN_TITLE, 12 | ) 13 | from ifixit2zim.context import Context 14 | from ifixit2zim.exceptions import UnexpectedDataKindExceptionError 15 | from ifixit2zim.scraper_generic import ScraperGeneric 16 | from ifixit2zim.shared import logger 17 | 18 | 19 | class ScraperGuide(ScraperGeneric): 20 | def __init__(self, context: Context): 21 | super().__init__(context) 22 | 23 | def setup(self): 24 | self.guide_template = self.env.get_template("guide.html") 25 | 26 | def get_items_name(self): 27 | return "guide" 28 | 29 | def _add_guide_to_scrape(self, guideid, guidetitle, locale, is_expected): 30 | self.add_item_to_scrape( 31 | guideid, 32 | { 33 | "guideid": guideid, 34 | "guidetitle": guidetitle, 35 | "locale": locale, 36 | }, 37 | is_expected, 38 | ) 39 | 40 | def _build_guide_path(self, guideid, guidetitle): # noqa ARG002 41 | href = self.configuration.main_url.geturl() + f"/Guide/-/{guideid}" 42 | final_href = self.processor.normalize_href(href) 43 | return final_href[1:] 44 | 45 | def get_guide_link_from_obj(self, guide): 46 | if "guideid" not in guide or not guide["guideid"]: 47 | raise UnexpectedDataKindExceptionError( 48 | f"Impossible to extract guide id from {guide}" 49 | ) 50 | if "locale" not in guide or not guide["locale"]: 51 | raise UnexpectedDataKindExceptionError( 52 | f"Impossible to extract guide locale from {guide}" 53 | ) 54 | if "title" not in guide or not guide["title"]: 55 | raise UnexpectedDataKindExceptionError( 56 | f"Impossible to extract guide title from {guide}" 57 | ) 58 | guideid = guide["guideid"] 59 | locale = guide["locale"] 60 | title = guide["title"] 61 | # override unknown locale if needed 62 | if ( 63 | guideid in self.expected_items_keys 64 | and self.expected_items_keys[guideid]["locale"] == UNKNOWN_LOCALE 65 | ): 66 | self.expected_items_keys[guideid]["locale"] = locale 67 | # override unknown title if needed 68 | if ( 69 | guideid in self.expected_items_keys 70 | and self.expected_items_keys[guideid]["guidetitle"] == UNKNOWN_TITLE 71 | ): 72 | self.expected_items_keys[guideid]["guidetitle"] = title 73 | return self.get_guide_link_from_props( 74 | guideid=guideid, guidetitle=title, guidelocale=locale 75 | ) 76 | 77 | def get_guide_link_from_props( 78 | self, guideid, guidetitle, guidelocale=UNKNOWN_LOCALE 79 | ): 80 | guide_path = urllib.parse.quote( 81 | self._build_guide_path(guideid=guideid, guidetitle=guidetitle) 82 | ) 83 | if self.configuration.no_guide: 84 | return f"home/not_scrapped?url={guide_path}" 85 | if self.configuration.guides and str(guideid) not in self.configuration.guides: 86 | return f"home/not_scrapped?url={guide_path}" 87 | self._add_guide_to_scrape(guideid, guidetitle, guidelocale, False) 88 | return guide_path 89 | 90 | def build_expected_items(self): 91 | if self.configuration.no_guide: 92 | logger.info("No guide required") 93 | return 94 | if self.configuration.guides: 95 | logger.info("Adding required guides as expected") 96 | for guide in self.configuration.guides: 97 | self._add_guide_to_scrape(guide, UNKNOWN_TITLE, UNKNOWN_LOCALE, True) 98 | return 99 | logger.info("Downloading list of guides") 100 | limit = 200 101 | offset = 0 102 | while True: 103 | guides = self.utils.get_api_content("/guides", limit=limit, offset=offset) 104 | if not guides or len(guides) == 0: 105 | break 106 | for guide in guides: 107 | # we ignore archived guides since they are not accessible anywayß 108 | if "GUIDE_ARCHIVED" in guide["flags"]: 109 | continue 110 | if guide["revisionid"] == 0: 111 | logger.warning("Found one guide with revisionid=0") 112 | guideid = guide["guideid"] 113 | # Unfortunately for now iFixit API always returns "en" as language 114 | # on this endpoint, so we consider it as unknown for now 115 | self._add_guide_to_scrape(guideid, UNKNOWN_TITLE, UNKNOWN_LOCALE, True) 116 | offset += limit 117 | if self.configuration.scrape_only_first_items: 118 | logger.warning( 119 | "Aborting the retrieval of all guides since only first items" 120 | " will be scraped anyway" 121 | ) 122 | break 123 | logger.info(f"{len(self.expected_items_keys)} guides found") 124 | 125 | def get_one_item_content(self, item_key, item_data): 126 | guideid = item_key 127 | guide = item_data 128 | locale = guide["locale"] 129 | if locale == UNKNOWN_LOCALE: 130 | locale = self.configuration.lang_code # fallback value 131 | if locale == "ja": 132 | locale = "jp" # Unusual iFixit convention 133 | 134 | guide_content = self.utils.get_api_content(f"/guides/{guideid}", langid=locale) 135 | if guide_content is None and locale != "en": 136 | # guide is most probably available in English anyway 137 | guide_content = self.utils.get_api_content( 138 | f"/guides/{guideid}", langid="en" 139 | ) 140 | 141 | return guide_content 142 | 143 | def add_item_redirect(self, item_key, item_data, redirect_kind): 144 | guideid = item_key 145 | guide = item_data 146 | guidetitle = guide["guidetitle"] 147 | if guidetitle == UNKNOWN_TITLE: 148 | logger.warning(f"Cannot add redirect for guide {guideid} in error") 149 | return 150 | path = self._build_guide_path(guideid, guidetitle) 151 | self.processor.add_redirect( 152 | path=path, 153 | target_path=f"home/{redirect_kind}?{urllib.parse.urlencode({'url':path})}", 154 | ) 155 | 156 | def process_one_item(self, item_key, item_data, item_content): # noqa ARG002 157 | guide_content = item_content 158 | 159 | if guide_content["type"] != "teardown": 160 | if guide_content["difficulty"] in DIFFICULTY_VERY_EASY: 161 | guide_content["difficulty_class"] = "difficulty-1" 162 | elif guide_content["difficulty"] in DIFFICULTY_EASY: 163 | guide_content["difficulty_class"] = "difficulty-2" 164 | elif guide_content["difficulty"] in DIFFICULTY_MODERATE: 165 | guide_content["difficulty_class"] = "difficulty-3" 166 | elif guide_content["difficulty"] in DIFFICULTY_HARD: 167 | guide_content["difficulty_class"] = "difficulty-4" 168 | elif guide_content["difficulty"] in DIFFICULTY_VERY_HARD: 169 | guide_content["difficulty_class"] = "difficulty-5" 170 | else: 171 | raise UnexpectedDataKindExceptionError( 172 | "Unknown guide difficulty: '{}' in guide {}".format( 173 | guide_content["difficulty"], 174 | guide_content["guideid"], 175 | ) 176 | ) 177 | 178 | for step in guide_content["steps"]: 179 | if not step["media"]: 180 | raise UnexpectedDataKindExceptionError( 181 | "Missing media attribute in step {} of guide {}".format( 182 | step["stepid"], guide_content["guideid"] 183 | ) 184 | ) 185 | if step["media"]["type"] not in [ 186 | "image", 187 | "video", 188 | "embed", 189 | ]: 190 | raise UnexpectedDataKindExceptionError( 191 | "Unrecognized media type in step {} of guide {}".format( 192 | step["stepid"], guide_content["guideid"] 193 | ) 194 | ) 195 | if step["media"]["type"] == "video": 196 | if "data" not in step["media"] or not step["media"]["data"]: 197 | raise UnexpectedDataKindExceptionError( 198 | "Missing 'data' in step {} of guide {}".format( 199 | step["stepid"], guide_content["guideid"] 200 | ) 201 | ) 202 | if ( 203 | "image" not in step["media"]["data"] 204 | or not step["media"]["data"]["image"] 205 | ): 206 | raise UnexpectedDataKindExceptionError( 207 | "Missing outer 'image' in step {} of guide {}".format( 208 | step["stepid"], guide_content["guideid"] 209 | ) 210 | ) 211 | if ( 212 | "image" not in step["media"]["data"]["image"] 213 | or not step["media"]["data"]["image"]["image"] 214 | ): 215 | raise UnexpectedDataKindExceptionError( 216 | "Missing inner 'image' in step {} of guide {}".format( 217 | step["stepid"], guide_content["guideid"] 218 | ) 219 | ) 220 | if step["media"]["type"] == "embed": 221 | if "data" not in step["media"] or not step["media"]["data"]: 222 | raise UnexpectedDataKindExceptionError( 223 | "Missing 'data' in step {} of guide {}".format( 224 | step["stepid"], guide_content["guideid"] 225 | ) 226 | ) 227 | if ( 228 | "html" not in step["media"]["data"] 229 | or not step["media"]["data"]["html"] 230 | ): 231 | raise UnexpectedDataKindExceptionError( 232 | "Missing 'html' in step {} of guide {}".format( 233 | step["stepid"], guide_content["guideid"] 234 | ) 235 | ) 236 | for line in step["lines"]: 237 | if line["bullet"] not in [ 238 | "black", 239 | "red", 240 | "orange", 241 | "yellow", 242 | "green", 243 | "blue", 244 | "light_blue", 245 | "violet", 246 | "icon_note", 247 | "icon_caution", 248 | "icon_caution", 249 | "icon_reminder", 250 | ]: 251 | raise UnexpectedDataKindExceptionError( 252 | "Unrecognized bullet '{}' in step {} of guide {}".format( 253 | line["bullet"], 254 | step["stepid"], 255 | guide_content["guideid"], 256 | ) 257 | ) 258 | guide_rendered = self.guide_template.render( 259 | guide=guide_content, 260 | label=GUIDE_LABELS[self.configuration.lang_code], 261 | metadata=self.metadata, 262 | ) 263 | 264 | self.processor.add_html_item( 265 | path=self._build_guide_path( 266 | guideid=guide_content["guideid"], guidetitle=guide_content["title"] 267 | ), 268 | title=guide_content["title"], 269 | content=guide_rendered, 270 | ) 271 | --------------------------------------------------------------------------------