├── .codecov.yml
├── .dockerignore
├── .editorconfig
├── .github
    ├── FUNDING.yml
    ├── stale.yml
    └── workflows
    │   ├── ci.yml
    │   ├── codeql-analysis.yml
    │   ├── docker.yml
    │   └── publish.yml
├── .gitignore
├── .node-version
├── .npmignore
├── .npmrc
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.json
├── .vscode
    └── launch.json
├── CONTRIBUTING.md
├── Changelog
├── Dockerfile
├── LICENSE
├── README.md
├── dev
    ├── build.sh
    └── watch.sh
├── docker
    ├── .custom-bashrc
    ├── Dockerfile
    ├── README.md
    ├── docker-compose.yml
    └── node_redis-entrypoint.sh
├── docs
    └── functional_architecture.md
├── eslint.config.js
├── extensions
    └── wiktionary_fr.js
├── index.js
├── jest.config.cjs
├── package-lock.json
├── package.json
├── res
    ├── DMSans-Regular.ttf
    ├── article_list_home.js
    ├── article_not_found.svg
    ├── content.parsoid.css
    ├── download_error_placeholder.css
    ├── external-link.svg
    ├── footer.css
    ├── images_loaded.min.js
    ├── inserted_style.css
    ├── masonry.min.js
    ├── mobile_main_page.css
    ├── script.js
    ├── style.css
    ├── templates
    │   ├── article_list_home.html
    │   ├── categories.html
    │   ├── download_error_placeholder.html
    │   ├── footer.html
    │   ├── lead_section_wrapper.html
    │   ├── pageVector2022.html
    │   ├── pageVectorLegacy.html
    │   ├── pageWikimediaDesktop.html
    │   ├── pageWikimediaMobile.html
    │   ├── section_wrapper.html
    │   ├── subcategories.html
    │   ├── subpages.html
    │   └── subsection_wrapper.html
    ├── vector-2022.css
    ├── vector.css
    ├── webpHandler.js
    ├── wm_mobile_override_script.js
    └── wm_mobile_override_style.css
├── src
    ├── DOMUtils.ts
    ├── Downloader.ts
    ├── Dump.ts
    ├── Logger.ts
    ├── MediaWiki.ts
    ├── RedisStore.ts
    ├── S3.ts
    ├── Templates.ts
    ├── cli.ts
    ├── config.ts
    ├── error.manager.ts
    ├── mutex.ts
    ├── mwoffliner.lib.ts
    ├── parameterList.ts
    ├── renderers
    │   ├── abstract.renderer.ts
    │   ├── abstractDesktop.render.ts
    │   ├── abstractMobile.render.ts
    │   ├── action-parse.renderer.ts
    │   ├── renderer.builder.ts
    │   ├── rendering.context.ts
    │   ├── rest-api.renderer.ts
    │   ├── visual-editor.renderer.ts
    │   ├── wikimedia-desktop.renderer.ts
    │   └── wikimedia-mobile.renderer.ts
    ├── sanitize-argument.ts
    ├── types.d.ts
    └── util
    │   ├── RedisKvs.ts
    │   ├── Timer.ts
    │   ├── articleListMainPage.ts
    │   ├── builders
    │       └── url
    │       │   ├── action-parse.director.ts
    │       │   ├── api.director.ts
    │       │   ├── base.director.ts
    │       │   ├── basic.director.ts
    │       │   ├── desktop.director.ts
    │       │   ├── mobile.director.ts
    │       │   ├── rest-api.director.ts
    │       │   ├── url.builder.ts
    │       │   ├── visual-editor.director.ts
    │       │   └── web.director.ts
    │   ├── categories.ts
    │   ├── const.ts
    │   ├── dump.ts
    │   ├── index.ts
    │   ├── metaData.ts
    │   ├── misc.ts
    │   ├── mw-api.ts
    │   ├── rewriteUrls.ts
    │   ├── saveArticles.ts
    │   └── url.helper.ts
├── test
    ├── e2e
    │   ├── apiPathParamsSanitizing.e2e.test.ts
    │   ├── articleLists.test.ts
    │   ├── bm.e2e.test.ts
    │   ├── cmd.e2e.test.ts
    │   ├── downloadImage.e2e.test.ts
    │   ├── en.e2e.test.ts
    │   ├── en10.e2e.test.ts
    │   ├── extra.e2e.test.ts
    │   ├── forceRender.test.ts
    │   ├── formatParams.test.ts
    │   ├── multimediaContent.test.ts
    │   ├── openstreetmap.e2e.test.ts
    │   ├── treatMedia.e2e.test.ts
    │   ├── vikidia.e2e.test.ts
    │   ├── wikisource.e2e.test.ts
    │   └── zimMetadata.e2e.test.ts
    ├── testRenders.ts
    ├── unit
    │   ├── bootstrap.ts
    │   ├── builders
    │   │   └── url
    │   │   │   ├── api.director.test.ts
    │   │   │   ├── base.director.test.ts
    │   │   │   ├── basic.director.test.ts
    │   │   │   ├── desktop.director.test.ts
    │   │   │   ├── mobile.director.test.ts
    │   │   │   ├── url.builder.test.ts
    │   │   │   ├── visual-editor.director.test.ts
    │   │   │   └── web.director.test.ts
    │   ├── downloader.test.ts
    │   ├── dump.test.ts
    │   ├── logger.test.ts
    │   ├── misc.test.ts
    │   ├── mock
    │   │   ├── 1x1.png
    │   │   ├── mockRedis.ts
    │   │   └── sg.json
    │   ├── mwApi.test.ts
    │   ├── mwApiCapabilities.test.ts
    │   ├── redis.test.ts
    │   ├── redisKvsIterate.test.ts
    │   ├── renderers
    │   │   ├── article.renderer.test.ts
    │   │   ├── error.render.test.ts
    │   │   ├── mobile.renderer.test.ts
    │   │   └── renderer.builder.test.ts
    │   ├── s3.test.ts
    │   ├── sanitize-argument.test.ts
    │   ├── saveArticles.test.ts
    │   ├── saveStaticFiles.test.ts
    │   ├── treatments
    │   │   ├── article.treatment.test.ts
    │   │   └── media.treatment.test.ts
    │   ├── urlRewriting.test.ts
    │   ├── util.test.ts
    │   ├── util
    │   │   ├── dump.test.ts
    │   │   ├── metaData.test.ts
    │   │   └── url.helper.test.ts
    │   └── webpAndRedirection.test.ts
    └── util.ts
├── translation
    ├── ar.json
    ├── bn.json
    ├── br.json
    ├── dag.json
    ├── de.json
    ├── en.json
    ├── es.json
    ├── fi.json
    ├── fr.json
    ├── ha.json
    ├── he.json
    ├── hi.json
    ├── ia.json
    ├── id.json
    ├── ig.json
    ├── it.json
    ├── kaa.json
    ├── ko.json
    ├── lb.json
    ├── mk.json
    ├── nb.json
    ├── nl.json
    ├── nqo.json
    ├── or.json
    ├── pt-br.json
    ├── pt.json
    ├── qqq.json
    ├── ro.json
    ├── ru.json
    ├── sc.json
    ├── scn.json
    ├── sl.json
    ├── sq.json
    ├── sv.json
    ├── sw.json
    ├── te.json
    ├── tn.json
    ├── tr.json
    ├── zh-hans.json
    └── zh-hant.json
├── tsconfig.build.json
└── tsconfig.json


/.codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   notify:
 3 |     require_ci_to_pass: yes
 4 | 
 5 | coverage:
 6 |   status:
 7 |     project:
 8 |       default:
 9 |         threshold: 1%
10 |     patch:
11 |       default:
12 |         target: 90%
13 |         threshold: 0%
14 | 
15 | ignore:
16 |   - "test"
17 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | cac
3 | out
4 | test
5 | .vscode
6 | .github


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*]
 7 | indent_style = space
 8 | indent_size = 2
 9 | end_of_line = lf
10 | charset = utf-8
11 | trim_trailing_whitespace = true
12 | insert_final_newline = true


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: kiwix # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # https://kiwix.org/support-us/
13 | 


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | daysUntilClose: false
 2 | staleLabel: stale
 3 | 
 4 | issues:
 5 |   daysUntilStale: 60
 6 |   markComment: >
 7 |     This issue has been automatically marked as stale because it has not had
 8 |     recent activity. It will be now be reviewed manually. Thank you
 9 |     for your contributions.
10 | pulls:
11 |    daysUntilStale: 7
12 |    markComment: >
13 |      This pull request has been automatically marked as stale because it has not had
14 |      recent activity. It will be now be reviewed manually. Thank you
15 |      for your contributions.
16 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ main ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ main ]
20 |   schedule:
21 |     - cron: '19 9 * * 0'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'javascript' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
37 |         # Learn more:
38 |         # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
39 | 
40 |     steps:
41 |       - name: Checkout code
42 |         uses: actions/checkout@v3
43 | 
44 |       # Initializes the CodeQL tools for scanning.
45 |       - name: Initialize CodeQL
46 |         uses: github/codeql-action/init@v1
47 |         with:
48 |           languages: ${{ matrix.language }}
49 |           # If you wish to specify custom queries, you can do so here or in a config file.
50 |           # By default, queries listed here will override any specified in a config file.
51 |           # Prefix the list here with "+" to use these queries and those in the config file.
52 |           # queries: ./path/to/local/query, your-org/your-repo/queries@main
53 | 
54 |       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
55 |       # If this step fails, then you should remove it and run the build manually (see below)
56 |       - name: Autobuild
57 |         uses: github/codeql-action/autobuild@v1
58 | 
59 |       # ℹ️ Command-line programs to run using the OS shell.
60 |       # 📚 https://git.io/JvXDl
61 | 
62 |       # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
63 |       #    and modify them (or add more) to build your code if your project
64 |       #    uses a compiled language
65 | 
66 |       #- run: |
67 |       #   make bootstrap
68 |       #   make release
69 | 
70 |       - name: Perform CodeQL Analysis
71 |         uses: github/codeql-action/analyze@v1
72 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: Docker
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build-and-push:
10 |     name: Deploy Docker Dev Image
11 |     runs-on: ubuntu-24.04
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Build and push Docker Dev Image
17 |         uses: openzim/docker-publish-action@v10
18 |         with:
19 |           image-name: openzim/mwoffliner
20 |           on-master: dev
21 |           latest-on-tag: false
22 |           restrict-to: openzim/mwoffliner
23 |           registries: ghcr.io
24 |           credentials: |
25 |             GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
26 |             GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
27 |           repo_description: auto
28 |           repo_overview: auto
29 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to npmjs.com and ghcr.io
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-24.04
10 |     environment: release
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Installing Node.JS
17 |         uses: actions/setup-node@v4
18 |         with:
19 |           node-version-file: .node-version
20 |           registry-url: 'https://registry.npmjs.org'
21 | 
22 |       - run: npm ci
23 | 
24 |       - run: npm publish
25 |         env:
26 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
27 | 
28 |       - name: Build and push Docker image
29 |         uses: openzim/docker-publish-action@v10
30 |         with:
31 |           image-name: openzim/mwoffliner
32 |           tag-pattern: /^v([0-9.]+)$/
33 |           latest-on-tag: true
34 |           restrict-to: openzim/mwoffliner
35 |           registries: ghcr.io
36 |           credentials: |
37 |             GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
38 |             GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
39 |           repo_description: auto
40 |           repo_overview: auto
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | \#*
 2 | *~
 3 | /node_modules
 4 | cac
 5 | */cac
 6 | /out
 7 | */out
 8 | */tmp
 9 | tmp/*
10 | lib/
11 | npm-debug.log
12 | .DS_Store
13 | .env
14 | .nyc_output
15 | coverage
16 | mwo-test-*
17 | .vscode
18 | .tool-versions
19 | output/
20 | src/version.ts
21 | 


--------------------------------------------------------------------------------
/.node-version:
--------------------------------------------------------------------------------
1 | 24.x
2 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | \#*
 2 | *~
 3 | /node_modules
 4 | /cac
 5 | */cac
 6 | /out
 7 | */out
 8 | */tmp
 9 | tmp/*
10 | npm-debug.log
11 | .DS_Store
12 | .env
13 | src
14 | test
15 | dev
16 | Changelog
17 | tslint.json
18 | tsconfig*
19 | .*
20 | README.md
21 | CONTRIBUTING.md
22 | docker
23 | mwo-test-*
24 | *tmp
25 | 


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | engine-strict=true


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v4.6.0
 6 |     hooks:
 7 |       - id: trailing-whitespace
 8 |       - id: end-of-file-fixer
 9 |   - repo: https://github.com/pre-commit/mirrors-prettier
10 |     rev: v3.1.0
11 |     hooks:
12 |       - id: prettier
13 |   - repo: https://github.com/pre-commit/mirrors-eslint
14 |     rev: v9.26.0
15 |     hooks:
16 |       - id: eslint
17 |         args: [--config, eslint.config.js]
18 |         types: [file]
19 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | lib/*
2 | 


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "trailingComma": "all",
 3 |   "tabWidth": 2,
 4 |   "semi": false,
 5 |   "singleQuote": true,
 6 |   "printWidth": 180,
 7 |   "overrides": [
 8 |     {
 9 |       "files": "translation/*",
10 |       "options": {
11 |         "tabWidth": 4,
12 |         "printWidth": 1,
13 |         "useTabs": true
14 |       }
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "type": "node",
 9 |             "request": "launch",
10 |             "name": "Launch Program",
11 |             "sourceMaps": true,
12 |             "cwd": "${workspaceRoot}",
13 |             "protocol": "inspector",
14 |             "args": [
15 |                 "--nolazy",
16 |                 "-r",
17 |                 "tsconfig-paths/register",
18 |                 "-r",
19 |                 "ts-node/register",
20 |                 "${workspaceRoot}/src/cli.ts",
21 |                 /*
22 |                 Args:
23 |                 */
24 |                 "--mwUrl=https://en.wikipedia.org",
25 |                 "--adminEmail=admin@kiwix.com",
26 |                 // "--speed=10",
27 |                 // "--verbose",
28 |                 "--format=nozim",
29 |                 "--articleList=./dev/articleList",
30 |                 // "--articleList=https://ftp.nluug.nl/pub/kiwix/wp1/enwiki_2019-01/tops/100",
31 |                 // "--articleList=https://download.kiwix.org/wp1/enwiki_2019-04/projects/Football",
32 |                 "--redis=redis://127.0.0.1:6379",
33 |                 // "--customProcessor=./extensions/wiktionary_fr.js"
34 |                 // "--getCategories=true"
35 |                 // "--customZimFavicon=test.png"
36 |             ]
37 |         },
38 |         {
39 |             "type": "node",
40 |             "request": "launch",
41 |             "name": "Tests",
42 |             "sourceMaps": true,
43 |             "cwd": "${workspaceRoot}",
44 |             "protocol": "inspector",
45 |             "args": [
46 |                 "--nolazy",
47 |                 "-r",
48 |                 "tsconfig-paths/register",
49 |                 "-r",
50 |                 "ts-node/register",
51 |                 "${file}"
52 |             ]
53 |         }
54 |     ]
55 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | docker/Dockerfile


--------------------------------------------------------------------------------
/dev/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Building at [$(date)]"
 4 | npx tsc -p tsconfig.build.json
 5 | # Remove tslint disable
 6 | npx replace "\/\/ tslint:disable-next-line\n" "" ./lib/cli.js
 7 | # typescript compiler adds a semicolon to the second line we need to remove that too
 8 | npx replace "':'; //#" "':' //#" ./lib/cli.js
 9 | chmod +x ./lib/cli.js
10 | echo "Build Complete at [$(date)]"
11 | 


--------------------------------------------------------------------------------
/dev/watch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # TODO: Check os and support MacOS and Linux
3 | fswatch -0 src | xargs -0 -n 1 -I {} ./dev/build.sh
4 | 


--------------------------------------------------------------------------------
/docker/.custom-bashrc:
--------------------------------------------------------------------------------
1 | source ~/.old-bashrc
2 | alias mwoffliner='mwoffliner --redis=/dev/shm/redis.sock'
3 | alias redis-cli='redis-cli -s /dev/shm/redis.sock'
4 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM redis:7 AS redis
 2 | 
 3 | FROM node:24-bookworm
 4 | LABEL org.opencontainers.image.source=https://github.com/openzim/mwoffliner
 5 | 
 6 | COPY --from=redis /usr/local/bin/redis-* /usr/local/bin/
 7 | RUN redis-cli --version
 8 | RUN redis-server --version
 9 | 
10 | COPY docker/node_redis-entrypoint.sh /usr/local/bin/
11 | 
12 | # Configure launch environment
13 | WORKDIR /
14 | RUN mv /root/.bashrc /root/.old-bashrc
15 | COPY docker/.custom-bashrc /root/.bashrc
16 | 
17 | ENV REDIS=/dev/shm/redis.sock
18 | RUN printf '#!/bin/bash\n/usr/local/bin/mwoffliner --redis=$REDIS "$@"' > /usr/local/sbin/mwoffliner
19 | RUN chmod +x /usr/local/sbin/mwoffliner
20 | 
21 | # Install dependencies
22 | RUN apt-get update && \
23 |     apt-get install -y --no-install-recommends \
24 |     make g++ curl git && \
25 |     apt-get clean && \
26 |     rm -rf /var/lib/apt/lists/*
27 | 
28 | # Install mwoffliner
29 | WORKDIR /tmp/mwoffliner
30 | COPY *.json ./
31 | COPY dev dev
32 | RUN mkdir src && \
33 |     # create fake cli.ts so that install can complete
34 |     printf '#!/usr/bin/env -S node' > src/cli.ts
35 | RUN npm i
36 | COPY src src
37 | COPY res res
38 | COPY translation translation
39 | COPY extensions extensions
40 | COPY index.js .
41 | RUN npm i
42 | RUN npm i -g .
43 | 
44 | 
45 | # Configure launch environment
46 | WORKDIR /
47 | RUN mv /root/.bashrc /root/.old-bashrc
48 | COPY docker/.custom-bashrc /root/.bashrc
49 | 
50 | ENTRYPOINT ["node_redis-entrypoint.sh"]
51 | 
52 | CMD ["mwoffliner"]
53 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | **MWoffliner Docker image** allows to quickly benefit of MWoffliner
 2 | without having to install all dependencies. You just need a working
 3 | [Docker](https://www.docker.com).
 4 | 
 5 | ## Standalone
 6 | 
 7 | MWoffliner requires a [Redis](https://www.redis.io) server to run.
 8 | 
 9 | For convenience purpose, MWoffliner image bundles a Redis daemon launched in the background.
10 | 
11 | This bundled Redis daemon is configured to be used only through a unix socket and to work exclusively from memory (no writes to disk).
12 | 
13 | Use of this bundled server is transparent as `mwoffliner` command is aliased to `mwoffliner --redis /dev/shm/redis.sock`.
14 | 
15 | To run the following examples, you need first to create a local `out`
16 | directory in you current directory. Created ZIM files will be written
17 | there.
18 | 
19 | ```sh
20 | docker run --volume=$(pwd)/out:/out -ti ghcr.io/openzim/mwoffliner mwoffliner --help
21 | ```
22 | 
23 | ## With dedicated Redis
24 | 
25 | You can also use a dedicated redis container with MWoffliner.
26 | 
27 | Run a Redis docker container with:
28 | 
29 | ```sh
30 | docker run --volume=$(pwd)/out:/out --name=redis -d redis
31 | ```
32 | 
33 | ... and then run the moffliner interactively (remember to specify `--redis` in command):
34 | 
35 | ```sh
36 | $docker run --volume=$(pwd)/out:/out --link=redis:redis --name=mwoffliner -ti ghcr.io/openzim/mwoffliner
37 | ```
38 | 
39 | ... or non-interactively, directly with a command line (this is an
40 | example, the second line is the mwoffliner command itself):
41 | 
42 | ```sh
43 | docker run --volume=$(pwd)/out:/out --link=redis:redis --name=mwoffliner -e REDIS="redis://redis" ghcr.io/openzim/mwoffliner \
44 |        mwoffliner --verbose --mwUrl=https://en.wikipedia.org/ --adminEmail=foo@bar.net
45 | ```
46 | 
47 | ## With Docker compose
48 | 
49 | This allows to run both, Redis & MWoffliner, containers simultaneously:
50 | 
51 | ```sh
52 | docker-compose --file docker-compose.yml run mwoffliner
53 | ```
54 | 
55 | ## Build the Docker image
56 | 
57 | Run from the repository root:
58 | ```sh
59 | docker build . -f docker/Dockerfile -t ghcr.io/openzim/mwoffliner
60 | ```
61 | 


--------------------------------------------------------------------------------
/docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | services:
 3 |     redis:
 4 |         image: "redis"
 5 |     mwoffliner:
 6 |         stdin_open: true
 7 |         tty: true
 8 |         build:
 9 |             dockerfile: Dockerfile
10 |             context: ./
11 |         links:
12 |             - redis
13 | 


--------------------------------------------------------------------------------
/docker/node_redis-entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | echo "starting redis-server in the background…"
 5 | nohup redis-server --save "" --appendonly no --unixsocket /dev/shm/redis.sock --unixsocketperm 744 --port 0 --bind 127.0.0.1 > /dev/shm/redis.log 2>&1&
 6 | # allow redis to start before we continue and bind
 7 | sleep 2
 8 | 
 9 | exec "$@"
10 | 


--------------------------------------------------------------------------------
/docs/functional_architecture.md:
--------------------------------------------------------------------------------
 1 | # Functional Architecture
 2 | 
 3 | This document describes a high-level overview of how mwoffliner scraper works.
 4 | 
 5 | At a high level, mwoffliner is divided into following sequence of actions.
 6 | 
 7 | - retrieve list of articles to include and their metadata
 8 | - for every article:
 9 |   - retrieve its parsed HTML (Wikitext transformed into HTML) and JS/CSS dependencie
10 |   - adapt / render it for proper operation within the ZIM file (includes detection of media dependencies)
11 |   - save rendered article HTML into the ZIM
12 | - for every file dependency (JS/CSS/media)
13 |   - if its an image, download it either from S3 cache (images only) or from online and recompress when possible
14 |   - otherwise download it from online
15 |   - save them into the ZIM file
16 | 
17 | The scraper supports flavours, which are variants of the ZIM (e.g. without images, with images but without videos, ...).
18 | 
19 | The process above is repeated for every flavour requested.
20 | 
21 | ## Retrieving article HTML and rendering
22 | 
23 | In order to retrieve article HTML and render it, multiple solutions have been identified.
24 | 
25 | As of today, 5 renderers (way to download article + render it to ZIM compatible HTML) are implemented:
26 | 
27 | - WikimediaDesktop
28 | - WikimediaMobile
29 | - RestApi
30 | - VisualEditor
31 | - ActionParse
32 | 
33 | WikimediaDesktop and WikimediaMobile are only available on Wikimedia Mediawikis.
34 | 
35 | Availability of RestApi and VisualEditor is subject to Mediawiki admin decision to support it or not. RestApi is available by default but might be blocked by admin. VisualEditor is an extension which might be installed or not.
36 | 
37 | ActionParse is available since 1.16.0 (2010) and is anyway a requirement for other APIs.
38 | 
39 | Only ActionParse (most recent renderer) implements a thorough skin support (see below about skin).
40 | 
41 | All renderers but ActionParse needs two HTTP queries: one to retrieve the article HTML and one to retrieve its metadata (to a 'simplified' ActionParse URL in fact).
42 | 
43 | Renderer is automatically selected based on its availability and mwoffliner own preference. ActionParse is the preferred renderer since 1.15.0 due to its general availability and support of skins.
44 | 
45 | ### Skins
46 | 
47 | In Mediwikis, rendering of Wikitext into HTML works around a concept of skin. A skin is a mix of HTML template and CSS+JS dependencies. It defines both the visual appareance of the rendered Wikitext but also everything "around it".
48 | 
49 | Since most wikis have adapted their content to their skin (and vice versa), it is mostly mandatory to use the skin inside the ZIM, both for proper rendering and for a visual appareance similar to online website (users don't mind about technical details, they want the wiki to be the same inside the ZIM than online).
50 | 
51 | Skin detection is automated in mwoffliner for now (see https://github.com/openzim/mwoffliner/issues/2213).
52 | 
53 | For now, only `vector` (legacy) and `vector-2022` are supported, and only with ActionParse renderer. Only `vector-2022` is the truely responsive skin, providing ultimate rendering on mostly all screen sizes.
54 | 
55 | 
56 | ### JS / CSS dependencies
57 | 
58 | ActionParse API is returning the list of JS and CSS dependencies for a given article, by inspecting what Wikitext is using.
59 | 
60 | The special `startup` JS module is missing from results because always used anyway.
61 | 
62 | There are still some known hiccups around this (see https://github.com/openzim/mwoffliner/issues/2212 and https://github.com/openzim/mwoffliner/issues/2215 for instance), and probably more to come, this is a complex area of Mediawiki.
63 | 


--------------------------------------------------------------------------------
/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import js from '@eslint/js'
 2 | import eslint from '@eslint/js'
 3 | import tseslint from 'typescript-eslint'
 4 | import tsparser from '@typescript-eslint/parser'
 5 | import { globalIgnores } from 'eslint/config'
 6 | 
 7 | export default [
 8 |   js.configs.recommended,
 9 |   globalIgnores(['lib/**', 'res/**', '**/*.js', '**/*.cjs']),
10 |   eslint.configs.recommended,
11 |   ...tseslint.configs.recommended,
12 |   {
13 |     languageOptions: {
14 |       parser: tsparser,
15 |       parserOptions: {
16 |         project: 'tsconfig.json',
17 |         sourceType: 'module',
18 |       },
19 |     },
20 |     rules: {
21 |       '@typescript-eslint/no-explicit-any': 'off',
22 |     },
23 |   },
24 | ]
25 | 


--------------------------------------------------------------------------------
/extensions/wiktionary_fr.js:
--------------------------------------------------------------------------------
 1 | module.exports = class WiktionaryFR { // implements CustomProcessor
 2 |     async shouldKeepArticle(articleId, doc) {
 3 |         const frenchTitle = doc.querySelector(`#fr.sectionlangue`);
 4 |         return !!frenchTitle;
 5 |     }
 6 |     async preProcessArticle(articleId, doc) {
 7 |         const nonFrenchTitles = Array.from(doc.querySelectorAll(`.sectionlangue:not(#fr)`));
 8 |         for (const title of nonFrenchTitles) {
 9 |             title.closest('details').remove();
10 |         }
11 | 
12 |         const h4titles = Array.from(doc.querySelectorAll(`h4`));
13 |         for (const h4title of h4titles) {
14 |             h4title.closest('details').remove();
15 |         }
16 |         //Remove h2 summary title
17 |         doc.querySelector('h2').closest('summary').setAttribute('style', 'display:none! important')
18 | 
19 |         return doc;
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | var mwoffliner = require('./lib/mwoffliner.lib.js')
2 | module.exports = mwoffliner
3 | 


--------------------------------------------------------------------------------
/jest.config.cjs:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   extensionsToTreatAsEsm: ['.ts'],
 3 |   moduleNameMapper: {
 4 |     '^(\\.{1,2}/.*)\\.js$': '$1',
 5 |   },
 6 |   moduleFileExtensions: ["ts", "js"],
 7 |   collectCoverage: false,
 8 |   verbose: true,
 9 |   transform: {
10 |     '^.+\\.ts?$': [
11 |       'ts-jest',
12 |       {
13 |         useESM: true,
14 |       },
15 |     ],
16 |   },
17 | }
18 |   


--------------------------------------------------------------------------------
/res/DMSans-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openzim/mwoffliner/edf4cdca3978951fad7da28f00e5a00e59d2a8c1/res/DMSans-Regular.ttf


--------------------------------------------------------------------------------
/res/article_list_home.js:
--------------------------------------------------------------------------------
 1 | 
 2 | (function () {
 3 | 
 4 |     document.addEventListener("DOMContentLoaded", function () {
 5 | 
 6 |         var body = document.body;
 7 |         if (body.classList.contains('article-list-home')) {
 8 |             // 'import' to avoid not defined js error
 9 |             var imagesLoaded = window['imagesLoaded'];
10 |             var Masonry = window['Masonry'];
11 |             var grid = document.getElementById('content');
12 | 
13 |             // js available, remove nojs styles
14 |             document.getElementsByClassName('mw-body-content')[0].classList.remove('nojs');
15 | 
16 |             imagesLoaded(grid, function () {
17 |                 new Masonry(grid, {
18 |                     itemSelector: '.item'
19 |                 });
20 |             });
21 |         }
22 | 
23 |     });
24 | })();


--------------------------------------------------------------------------------
/res/download_error_placeholder.css:
--------------------------------------------------------------------------------
  1 | @font-face {
  2 |     font-family:"DM Sans";
  3 |     font-style:  normal;
  4 |     font-weight: 400;
  5 |     src : url('./DMSans-Regular.ttf');
  6 |   }
  7 |   @font-face {
  8 |     font-family:"DM Sans Bold";
  9 |     font-style:  normal;
 10 |     font-weight: 700;
 11 |     src : url('./DMSans-Regular.ttf');
 12 |   }
 13 |   
 14 |   body {
 15 |     background: linear-gradient(to bottom right, #ffffff, #e6e6e6);
 16 |     background-repeat: no-repeat;
 17 |     background-attachment: fixed;
 18 |   }
 19 |   
 20 |   header {
 21 |     width: 100%;
 22 |     margin: auto;
 23 |     text-align: center;
 24 |   
 25 |     margin-top: 15%;
 26 |     margin-bottom: 15%;
 27 |   }
 28 |   
 29 |   header img {
 30 |     width: 60%;
 31 |     min-width: 200px;
 32 |     max-width: 500px;
 33 |     max-height: 300px;
 34 |   }
 35 |   
 36 |   section {
 37 |     display: flex;
 38 |     flex-direction: column;
 39 |     align-items: center;
 40 |   }
 41 |   
 42 |   header, .intro {
 43 |     font-family: "DM Sans";
 44 |   }
 45 |   
 46 |   .intro {
 47 |     font-size: 1em;
 48 |     padding: 0 10%;
 49 |     line-height: 1.2em;
 50 |     text-align: center;
 51 |   }
 52 |   
 53 |   .intro h1 {
 54 |     font-family: "DM Sans Bold";
 55 |     font-size: 1.2em;
 56 |   }
 57 |   
 58 |   .intro code {
 59 |     font-family: monospace;
 60 |     font-size: 1.1em;
 61 |     word-break: break-all;
 62 |   }
 63 |   
 64 |   .intro a, .intro a:active, .intro a:visited {
 65 |     color: #00b4e4;
 66 |     text-decoration: none;
 67 |     word-break: break-all;
 68 |   }
 69 |   
 70 |   .advice {
 71 |     width: 80%;
 72 |     margin: auto;
 73 |     margin-bottom: 15%;
 74 |     margin-top: 5em;
 75 |   
 76 |     background-color: #ffffff;
 77 |     border-radius: 1rem;
 78 |     border: 1px solid #b7b7b7;
 79 |   
 80 |     padding: 2em;
 81 |   
 82 |     font-family: "DM Sans";
 83 |     font-size: .9em;
 84 |     box-sizing: border-box;
 85 |   
 86 |     align-items: normal;
 87 |   }
 88 |   
 89 |   .advice p {
 90 |     margin-bottom: 1em;
 91 |   }
 92 |   
 93 |   .advice p:first-child {
 94 |     margin-top: 0;
 95 |   }
 96 |   
 97 |   .advice p.list-intro {
 98 |     margin: 0;
 99 |   }
100 |   
101 |   .advice ul {
102 |     list-style-type: square;
103 |     margin: 0;
104 |     padding: 0 1em;
105 |   }
106 |   
107 |   .advice ul li {
108 |     line-height: 2em;
109 |   }
110 |   
111 |   .advice p:last-child {
112 |     margin-bottom: 0;
113 |   }
114 |   
115 |   
116 |   /* sm: 640px+ */
117 |   @media (width >= 40rem) {
118 |     header {
119 |       margin-bottom: 1em;
120 |       margin-top: 5em;
121 |     }
122 |   
123 |     header img {
124 |       width: 50%;
125 |     }
126 |   
127 |     .intro h1 {
128 |       font-size: 2em;
129 |     }
130 |   
131 |     .advice {
132 |       width: 50%;
133 |     }
134 |   }
135 |   
136 |   /* md: 768px+ */
137 |   @media (width >= 48rem) {}
138 |   
139 |   /* lg: 1024px+ */
140 |   @media (width >= 64rem) {}
141 |   
142 |   /* xl: 1280px+ */
143 |   @media (width >= 80rem) {
144 |     .intro h1 {
145 |       font-size: 3.4em;
146 |     }
147 |   }
148 |   
149 |   /* 2xl: 1536px+ */
150 |   @media (width >= 96rem) {
151 |     header img {
152 |       width: 25%;
153 |       min-width: 200px;
154 |       max-width: 500px;
155 |       max-height: 300px;
156 |     }
157 |   
158 |     .advice {
159 |       width: 25%;
160 |       min-width: 200px;
161 |       min-width: 300px;
162 |       max-width: 500px;
163 |     }
164 |   }
165 |   


--------------------------------------------------------------------------------
/res/external-link.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" viewBox="0 0 12 12">
2 | 	<title>
3 | 		external link
4 | 	</title>
5 | 	<path fill="#36c" d="M6 1h5v5L8.86 3.85 4.7 8 4 7.3l4.15-4.16zM2 3h2v1H2v6h6V8h1v2a1 1 0 0 1-1 1H2a1 1 0 0 1-1-1V4a1 1 0 0 1 1-1"/>
6 | </svg>


--------------------------------------------------------------------------------
/res/footer.css:
--------------------------------------------------------------------------------
 1 | .zim-footer {
 2 |     clear:both; 
 3 |     background-image:linear-gradient(180deg, #E8E8E8, white); 
 4 |     border-top: dashed 2px #AAAAAA; 
 5 |     padding: 0.5em 0.5em 0.5em 0.5em; 
 6 |     margin-top: 1em;
 7 | }
 8 | 
 9 | .zim-footer a {
10 |     background-image: url(./external-link.svg);
11 |     background-position: center right;
12 |     background-repeat: no-repeat;
13 |     background-size: 0.857em;
14 |     padding-right: 1em;
15 | }


--------------------------------------------------------------------------------
/res/mobile_main_page.css:
--------------------------------------------------------------------------------
  1 | html {
  2 |     box-sizing: border-box;
  3 | }
  4 | 
  5 | /* *,
  6 | *:before,
  7 | *:after {
  8 |     box-sizing: inherit;
  9 | } */
 10 | 
 11 | body {
 12 |     margin: 0;
 13 |     border-width: 0;
 14 |     padding: 0;
 15 |     display: flex;
 16 |     justify-content: center;
 17 | }
 18 | 
 19 | h1,
 20 | h2 {
 21 |     color: #333;
 22 |     text-shadow: #1114 1px 1px 1px;
 23 |     text-align: center;
 24 |     border-bottom: none;
 25 |     padding: 0;
 26 | }
 27 | 
 28 | #container {
 29 |     margin: 0;
 30 |     border-width: 0;
 31 |     padding: 0;
 32 |     display: flex;
 33 |     justify-content: center;
 34 |     background:black;
 35 |     position: absolute;
 36 |     left:0;
 37 |     top:0;
 38 |     height:100%;
 39 |     width:100%;
 40 |     overflow: auto;
 41 | }
 42 | 
 43 | #content {
 44 |     width: 100%;
 45 |     overflow: hidden;
 46 |     font-size: 1em;
 47 |     background: black;
 48 |     max-width: 8096px;
 49 | }
 50 | 
 51 | #list {
 52 |     background: white;
 53 | }
 54 | 
 55 | .nojs #content {
 56 |     column-count: 4;
 57 |     column-gap: 0;
 58 | }
 59 | 
 60 | .item {
 61 |     display: block;
 62 |     position: relative;
 63 |     overflow: hidden;
 64 | }
 65 | 
 66 | .nojs #content .item {
 67 |     display: inline-block;
 68 |     width: 100%;
 69 | }
 70 | 
 71 | .item img {
 72 |     width: 100%;
 73 |     height: auto;
 74 |     transition: all 0.3s;
 75 |     transform: scale(1);
 76 |     background: white;
 77 | }
 78 | 
 79 | .item:hover img {
 80 |     transform: scale(1.1);
 81 | }
 82 | 
 83 | .item figure {
 84 |     margin: 0;
 85 | }
 86 | 
 87 | .item figcaption {
 88 |     position: absolute;
 89 |     bottom: 0;
 90 |     width: calc(100% - 1.2em); /* remove left/right padding */
 91 |     vertical-align: bottom;
 92 |     background: rgba(48, 48, 48, 0.5);
 93 |     color: white;
 94 |     padding: 0.6em;
 95 |     font-weight: bold;
 96 |     transition: all 0.3s;
 97 |     transform: scale(1);
 98 |     overflow: hidden;
 99 |     text-overflow: ellipsis;
100 | }
101 | 
102 | /* .item:hover figcaption {
103 |     transform: scale(1);
104 |     bottom: 50%;
105 | } */
106 | 
107 | #footer {
108 |     margin: 0;
109 | }
110 | 
111 | #footer ul {
112 |     margin: 0;
113 |     margin-left: 10%;
114 | }
115 | 
116 | @media only screen and (min-width: 8096px) {
117 |     h1 {
118 |         font-size: 0.75vw;
119 |     }
120 | 
121 |     h2 {
122 |         font-size: 0.5vw;
123 |     }
124 | 
125 |     #content .item {
126 |         width: 3.125%;
127 |     }
128 | 
129 |     .nojs #content {
130 |         column-count: 32;
131 |     }
132 | 
133 |     ul {
134 |         column-count: 32;
135 |     }
136 | }
137 | 
138 | @media only screen and (min-width: 4048px) {
139 |     h1 {
140 |         font-size: 1.5vw;
141 |     }
142 | 
143 |     h2 {
144 |         font-size: 1vw;
145 |     }
146 | 
147 |     #content .item {
148 |         width: 6.25%;
149 |     }
150 | 
151 |     .nojs #content {
152 |         column-count: 16;
153 |     }
154 | 
155 |     ul {
156 |         column-count: 16;
157 |     }
158 | }
159 | 
160 | @media only screen and (max-width: 4047px) and (min-width: 2024px) {
161 |     h1 {
162 |         font-size: 3vw;
163 |     }
164 | 
165 |     h2 {
166 |         font-size: 2vw;
167 |     }
168 | 
169 |     #content .item {
170 |         width: 12.5%;
171 |     }
172 | 
173 |     .nojs #content {
174 |         column-count: 8;
175 |     }
176 | 
177 |     ul {
178 |         column-count: 8;
179 |     }
180 | }
181 | 
182 | @media only screen and (max-width: 2023px) and (min-width: 1024px) {
183 |     h1 {
184 |         font-size: 6vw;
185 |     }
186 | 
187 |     h2 {
188 |         font-size: 4vw;
189 |     }
190 | 
191 |     #content .item {
192 |         width: 25%;
193 |     }
194 | 
195 |     .nojs #content {
196 |         column-count: 4;
197 |     }
198 | 
199 |     ul {
200 |         column-count: 4;
201 |     }
202 | }
203 | 
204 | @media only screen and (max-width: 1023px) and (min-width: 768px) {
205 |     h1 {
206 |         font-size: 6vw;
207 |     }
208 | 
209 |     h2 {
210 |         font-size: 4vw;
211 |     }
212 | 
213 |     #content {
214 |         font-size: 1em;
215 |     }
216 | 
217 |     #content .item {
218 |         width: 33.3333%;
219 |     }
220 | 
221 |     .nojs #content {
222 |         column-count: 3;
223 |     }
224 | 
225 |     ul {
226 |         column-count: 2;
227 |     }
228 | 
229 |     #footer ul li {
230 |         font-size: 0.9em;
231 |     }
232 | }
233 | 
234 | @media only screen and (max-width: 767px) {
235 |     h1 {
236 |         font-size: 7vw;
237 |     }
238 | 
239 |     h2 {
240 |         font-size: 5vw;
241 |     }
242 | 
243 |     #content {
244 |         font-size: 1.2em;
245 |     }
246 | 
247 |     #content .item {
248 |         width: 50%
249 |     }
250 | 
251 |     .nojs #content {
252 |         column-count: 2;
253 |     }
254 | 
255 |     ul {
256 |         column-count: 1;
257 |     }
258 | 
259 |     #footer ul li {
260 |         font-size: 1em;
261 |     }
262 | }


--------------------------------------------------------------------------------
/res/script.js:
--------------------------------------------------------------------------------
 1 | function importScript() { return 1 } // this is to avoid the error from site.js
 2 | 
 3 | window.onload = function () {
 4 | 
 5 |     /* Collapsing of the sections */
 6 |     $('.mw-ref').on({
 7 |         click: function (ev) {
 8 |             var targetId = ev.target.hash || ev.target.parentNode.hash;
 9 |             var targetEl = document.getElementById(targetId.slice(1));
10 |             var refDetails = $(targetEl).closest('details');
11 |             refDetails.attr('open', true);
12 |         }
13 |     });
14 | 
15 |     /* If small screen size and contains section(s) */
16 |     if (window.innerWidth < 720 && $('details')) {
17 | 
18 |         /* Find the highest level section in window */
19 |         const sectionTopLevel = Math.min(...$('details').
20 |             map( function() { return $(this).attr('data-level'); }).get());
21 | 
22 |         /* Collapse all highest level section if more than one */
23 |         if ($(`details[data-level=${sectionTopLevel}]`).length !== 1) {
24 |             $(`details[data-level=${sectionTopLevel}]`).attr('open', false);
25 |         }
26 |     }
27 | 
28 |     /* Add the user-agent to allow dedicated CSS rules (like for KaiOS) */
29 |     document.querySelector('body').setAttribute('data-useragent',  navigator.userAgent);
30 | }
31 | 
32 | /* WebP Polyfill */
33 | var webpScripts = ['./webpHeroPolyfill.js',
34 |                    './webpHeroBundle.js',
35 |                    './webpHandler.js'];
36 | webpScripts = webpScripts.map(function(scriptUrl) {
37 |     const articleId = document.getElementById('script-js').dataset.articleId;
38 | 
39 |     return (typeof(articleId)) ? '../'.repeat(articleId.split('/').length - 1) + scriptUrl : scriptUrl;
40 | });
41 | var script = document.createElement('script');
42 | script.type = 'text/javascript';
43 | script.src = webpScripts.pop();;
44 | script.onload = function () {
45 |     new WebPHandler({
46 |         scripts_urls: webpScripts,
47 |         on_ready: function (handler) { handler.polyfillDocument(); },
48 |     });
49 | }
50 | document.getElementsByTagName('head')[0].appendChild(script);
51 | 


--------------------------------------------------------------------------------
/res/style.css:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Custom style meant to match the design of mwoffliner mobile version to the mobile version of wikipedia
  3 |  */
  4 | 
  5 | .mw-body-content {
  6 |     font-family: 'Helvetica Neue', 'Helvetica', 'Nimbus Sans L', 'Arial', 'Liberation Sans', sans-serif;
  7 |     font-size: 16px;
  8 |     line-height: 26px;
  9 | }
 10 | 
 11 | .mw-body-content p {
 12 |     margin: 0.5em 0 1em 0;
 13 | }
 14 | 
 15 | .mw-body h1,
 16 | .mw-body summary {
 17 | 	outline: none;
 18 | }
 19 | 
 20 | .mw-body h2,
 21 | .mw-body h3,
 22 | .mw-body h4,
 23 | .mw-body h5,
 24 | .mw-body h6 {
 25 |     padding: 0.5em 0;
 26 | }
 27 | 
 28 | .mw-body h3,
 29 | .mw-body h2 {
 30 |     clear: both;
 31 |     width: 100%;
 32 |     margin-bottom: 0.5em;
 33 |     border-bottom: solid 1px #eaecf0;
 34 |     font-family: 'Linux Libertine', 'Georgia', 'Times', serif;
 35 | }
 36 | 
 37 | 
 38 | .mw-body h3 {
 39 |     border-bottom: 0px solid #eaecf0;
 40 |     font-weight: initial;
 41 | }
 42 | 
 43 | .mw-body h4 {
 44 |     border-bottom: 0px solid #eaecf0;
 45 | }
 46 | 
 47 | .mw-body h1.article-header {
 48 |     border-bottom: 1px solid #eaecf0;
 49 | }
 50 | 
 51 | @media (max-width: 720px) {
 52 |     .content .thumb .thumbinner>.thumbcaption {
 53 |         flex: 1 1 auto !important;
 54 |     }
 55 | }
 56 | 
 57 | .mwo-catlinks {
 58 |     border: 1px solid #a2a9b1;
 59 |     background-color: #f8f9fa;
 60 |     padding: 5px;
 61 |     margin-top: 1em;
 62 |     clear: both;
 63 | }
 64 | 
 65 | .mwo-catlinks ul {
 66 |     display: inline;
 67 |     margin: 0;
 68 |     padding: 0;
 69 |     list-style: none none;
 70 | }
 71 | 
 72 | .mwo-catlinks li {
 73 |     display: inline-block;
 74 |     line-height: 1.25em;
 75 |     border-left: 1px solid #a2a9b1;
 76 |     margin: 0.125em 0;
 77 |     padding: 0 0.5em;
 78 |     zoom: 1;
 79 | }
 80 | 
 81 | .mwo-catlinks li:first-child {
 82 |     padding-left: 0.25em;
 83 |     border-left: 0;
 84 | }
 85 | 
 86 | .mwo-groups {
 87 |     -webkit-column-count: 3;
 88 |     -moz-column-count: 3;
 89 |     column-count: 3;
 90 |     -webkit-column-width: 24em;
 91 |     -moz-column-width: 24em;
 92 |     column-width: 24em;
 93 | }
 94 | 
 95 | .mwo-groups h3 {
 96 |     line-height: 1.6;
 97 |     margin-top: 0.3em;
 98 |     margin-bottom: 0;
 99 |     padding-bottom: 0;
100 | }
101 | 
102 | summary.section-heading {
103 |     display: list-item !important;
104 |     cursor: pointer;
105 | }
106 | 
107 | .section-heading>h1,
108 | .section-heading>h2,
109 | .section-heading>h3,
110 | .section-heading>h4 {
111 |     display: inline;
112 |     border: none;
113 | }
114 | 
115 | body[data-useragent*='KAIOS'] h1 {
116 |     display: none !important;
117 | }
118 | 


--------------------------------------------------------------------------------
/res/templates/article_list_home.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8" />
 6 |     <title></title>
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 8 |     <script src="./masonry.min.js"></script>
 9 |     <script src="./script.js"></script>
10 | </head>
11 | 
12 | <body class='article-list-home mw-body-content nojs'>
13 |     <!-- <ul id="list"></ul> -->
14 |     <!-- <div id="container">
15 |         <div id="content">
16 |             <div id="masonry"></div>
17 |         </div>
18 |     </div> -->
19 | </body>
20 | 
21 | </html>


--------------------------------------------------------------------------------
/res/templates/categories.html:
--------------------------------------------------------------------------------
 1 | <br />
 2 | <div class="mwo-catlinks">
 3 |     Categories:
 4 |     <ul>
 5 |         {% for category in categories %}
 6 |         <li>
 7 |             <a href="{{category.url}}" rel="mwo:NoRewrite">{{category.name}}</a>
 8 |         </li>
 9 |         {% endfor %}
10 |     </ul>
11 | </div>


--------------------------------------------------------------------------------
/res/templates/download_error_placeholder.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
 6 |     <title></title>
 7 |     <link type="text/css" href="{{ relative_file_path }}download_error_placeholder.css" rel="stylesheet" />
 8 |   </head>
 9 |   <body>
10 |     <header>
11 |         <img src="{{ relative_file_path }}article_not_found.svg"
12 |              alt="oops image"
13 |              aria-label="oops image"
14 |              title="oops image">
15 |     </header>
16 |     <section class="intro">
17 |       <h1>{{heading}}</h1>
18 |       <p>{{message}}</p>
19 |     </section>
20 |     <section class="advice">
21 |       <p>{{advice_line1}}</p>
22 |       <p>{{advice_line2}}</p>
23 |       {% if display_third_line %}<p>{{advice_line3}}</p>{% endif %}
24 |     </section>
25 |   </body>
26 | </html>


--------------------------------------------------------------------------------
/res/templates/footer.html:
--------------------------------------------------------------------------------
1 | <div class="zim-footer">
2 |     {{ disclaimer|safe }}
3 | </div>
4 | 


--------------------------------------------------------------------------------
/res/templates/lead_section_wrapper.html:
--------------------------------------------------------------------------------
1 | <h1 class="section-heading" tabindex="0" aria-haspopup="true" data-section-id='0'>
2 |   <span class="mw-headline" id="title_0">{% autoescape false %}{{ lead_display_title }}{% endautoescape %}</span>
3 | </h1>
4 | <div id="mf-section-0" class="mf-section-0" aria-pressed="true" aria-expanded="true">
5 |   {% autoescape false %}{{ lead_section_text }}{% endautoescape %}
6 | </div>


--------------------------------------------------------------------------------
/res/templates/pageVector2022.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class='client-js'>
 3 |   <head>
 4 |     <meta charset="UTF-8"/>
 5 |     <title></title>
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0"/> __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__
 7 |     <link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH__footer.css">
 8 |     <link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH__vector-2022.css">
 9 |   </head>
10 |   <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable skin-vector-2022 action-view uls-dialog-sticky-hide" cz-shortcut-listen="true">
11 |     <div class="mw-page-container">
12 |       <div class="mw-page-container-inner">
13 |         <div class="mw-content-container">
14 |           <main id="content" class="mw-body">
15 |             <header class="mw-body-header vector-page-titlebar">
16 |               <h1 id="firstHeading" class="firstHeading mw-first-heading">
17 |                 <span id="openzim-page-title" class="mw-page-title-main"></span>
18 |               </h1>
19 |             </header>
20 |             <a id="top"></a>
21 |             <div id="bodyContent" class="vector-body ve-init-mw-desktopArticleTarget-targetContainer" aria-labelledby="firstHeading" data-mw-ve-target-container="">
22 |               <div id="mw-content-text" class="mw-body-content"></div>
23 |             </div>
24 |           </main>
25 |         </div>
26 |       </div>
27 |     </div>    
28 |     __ARTICLE_CONFIGVARS_LIST__
29 |     __ARTICLE_JS_LIST__
30 |   </body>
31 | </html>
32 | 


--------------------------------------------------------------------------------
/res/templates/pageVectorLegacy.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class="client-js">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <title></title>
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__
 8 |     <link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH__footer.css">
 9 |     <link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH__vector.css">
10 |   </head>
11 | 
12 |   <body
13 |     class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable skin-vector action-view minerva--history-page-action-enabled skin-vector-legacy"
14 |     cz-shortcut-listen="true"
15 |   >
16 |     <div id="content" class="mw-body" role="main">
17 |       <a id="top"></a>
18 |       <h1 id="firstHeading" class="firstHeading" lang="en"><span id="openzim-page-title"></span></h1>
19 |       <div id="bodyContent" class="mw-body-content">
20 |         <div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"></div>
21 |       </div>
22 |     </div>
23 |     __ARTICLE_CONFIGVARS_LIST__ __ARTICLE_JS_LIST__
24 |   </body>
25 | </html>
26 | 


--------------------------------------------------------------------------------
/res/templates/pageWikimediaDesktop.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class='client-js'>
 3 |   <head>
 4 |     <meta charset="UTF-8"/>
 5 |     <title></title>
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0"/> __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__
 7 |     __CSS_LINKS__ __JS_SCRIPTS__
 8 |   </head>
 9 |   <body class="mediawiki mw-hide-empty-elt ns-0 ns-subject stable skin-minerva action-view animations">
10 |     <div id="mw-mf-viewport" class="feature-header-v2">
11 |       <div id="mw-mf-page-center">
12 |         <div id="content" class="mw-body">
13 |           <a id="top"></a>
14 |           <div id="bodyContent" class="content mw-parser-output">
15 |             <h1 id="titleHeading" style="background-color: white; margin: 0;"></h1>
16 |             <div id="mw-content-text"></div>
17 |           </div>
18 |         </div>
19 |       </div>
20 |     </div>
21 |     __ARTICLE_CONFIGVARS_LIST__
22 |     __ARTICLE_JS_LIST__
23 |   </body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/res/templates/pageWikimediaMobile.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class='client-js'>
 3 |   <head>
 4 |     <meta charset="UTF-8"/>
 5 |     <title></title>
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0"/> __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__
 7 |     __CSS_LINKS__ __ARTICLE_JS_LIST__
 8 |   </head>
 9 |   <body class="mediawiki mw-hide-empty-elt ns-0 ns-subject stable skin-minerva action-view animations">
10 |     <div id="mw-mf-viewport" class="feature-header-v2">
11 |       <div id="mw-mf-page-center">
12 |         <div id="content" class="mw-body">
13 |           <a id="top"></a>
14 |           <div id="bodyContent" class="content mw-parser-output">
15 |             <h1 id="titleHeading" style="background-color: white; margin: 0;"></h1>
16 |             <div id="mw-content-text"></div>
17 |           </div>
18 |         </div>
19 |       </div>
20 |     </div>
21 |     __ARTICLE_CONFIGVARS_LIST__
22 |     __JS_SCRIPTS__
23 |   </body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/res/templates/section_wrapper.html:
--------------------------------------------------------------------------------
1 | <details data-level="2" open>
2 |     <summary class='section-heading'><h2 id="{{ section_anchor }}">{% autoescape false %}{{ section_line }}{% endautoescape %}</h2></summary>
3 |     {% autoescape false %}{{ section_text }}{% endautoescape %}
4 |     __SUB_LEVEL_SECTION_{{ section_index }}__
5 | </details>


--------------------------------------------------------------------------------
/res/templates/subcategories.html:
--------------------------------------------------------------------------------
 1 | <details id="sub-categories" data-level="2">
 2 |     <summary class='section-heading'>
 3 |         <h2>Sub-Categories</h2>
 4 |     </summary>
 5 |     {% if !!prevArticleUrl %}(<a href="{{prevArticleUrl}}" rel="mwo:NoRewrite">previous page</a>){% endif %}
 6 |     {% if !!nextArticleUrl %}(<a href="{{nextArticleUrl}}" rel="mwo:NoRewrite">next page</a>){% endif %}
 7 |     <div class="mwo-groups">
 8 |         <ul>
 9 |             {% for categoryGroup in groups %}
10 |             <div>
11 |                 <h3>{{categoryGroup.title}}</h3>
12 |                 <ul>
13 |                     {% for category in categoryGroup.items %}
14 |                     <li><a href="{{category.url}}" rel="mwo:NoRewrite">{{category.name}}</a></li>
15 |                     {% endfor %}
16 |                 </ul>
17 |             </div>
18 |             {% endfor %}
19 |         </ul>
20 |     </div>
21 |     {% if !!prevArticleUrl %}(<a href="{{prevArticleUrl}}" rel="mwo:NoRewrite">previous page</a>){% endif %}
22 |     {% if !!nextArticleUrl %}(<a href="{{nextArticleUrl}}" rel="mwo:NoRewrite">next page</a>){% endif %}
23 | </details>


--------------------------------------------------------------------------------
/res/templates/subpages.html:
--------------------------------------------------------------------------------
 1 | <details id="sub-pages" data-level="2">
 2 |     <summary class='section-heading'><h2>Pages</h2></summary>
 3 |     <div class="mwo-groups">
 4 |         <ul>
 5 |             {% for pageGroup in groups %}
 6 |             <div>
 7 |                 <h3>{{pageGroup.title}}</h3>
 8 |                 <ul>
 9 |                     {% for page in pageGroup.items %}
10 |                     <li><a href="{{page.url}}" rel="mwo:NoRewrite">{{page.name}}</a></li>
11 |                     {% endfor %}
12 |                 </ul>
13 |             </div>
14 |             {% endfor %}
15 |         </ul>
16 |     </div>
17 | </details>


--------------------------------------------------------------------------------
/res/templates/subsection_wrapper.html:
--------------------------------------------------------------------------------
1 | <details data-level="{{ section_toclevel }}" open>
2 |     <summary class='section-heading'><h{{ section_toclevel }} id="{{ section_anchor }}">{% autoescape false %}{{ section_line }}{% endautoescape %}</h{{ section_toclevel }}></summary>
3 |     {% autoescape false %}{{ section_text }}{% endautoescape %}
4 | </details>
5 | __SUB_LEVEL_SECTION_{{ section_index }}__


--------------------------------------------------------------------------------
/res/vector-2022.css:
--------------------------------------------------------------------------------
 1 | /* Set font-size and line-height global default */
 2 | .vector-body {
 3 |   font-size: 1rem;
 4 |   line-height: 1.6;
 5 | }
 6 | 
 7 | /* Customize infobox on small screens, for some reason this seems to not be done automatically */
 8 | @media screen {
 9 |   @media (max-width: calc(639px)) {
10 |     table.infobox {
11 |       width: 100% !important;
12 |       display: table;
13 |     }
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/res/vector.css:
--------------------------------------------------------------------------------
1 | .mw-body {
2 |   margin-left: auto;
3 | }
4 | 


--------------------------------------------------------------------------------
/res/wm_mobile_override_script.js:
--------------------------------------------------------------------------------
 1 | function importScript() { return 1 } // this is to avoid the error from site.js
 2 | 
 3 | window.onload = function () {
 4 |   // Check if there is a Wikimedia mobile output page
 5 |   if (document.querySelector('#pcs')) {
 6 |       const supElements = document.querySelectorAll('sup');
 7 |       const linkElements = document.querySelectorAll('a');
 8 |       const disabledElems = Array.from(supElements).concat(Array.from(linkElements))
 9 |       disabledElems.forEach((elem) => {
10 |         elem.addEventListener('click', (event) => {
11 |           event.stopPropagation();
12 |         }, true);
13 |       });
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/res/wm_mobile_override_style.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0 auto !important;
 3 | }
 4 | p#pcs-edit-section-add-title-description {
 5 |   display: none !important;
 6 | }
 7 | span.noviewer {
 8 |   display: none !important;
 9 | }
10 | .reference-link::after {
11 |   content: none !important;
12 | }
13 | .mw-body h3, .mw-body h2 {
14 |   width: auto;
15 | }
16 | 
17 | .thumbinner img.pcs-widen-image-override {
18 |   width: auto !important;
19 |     max-width: 100% !important;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/DOMUtils.ts:
--------------------------------------------------------------------------------
 1 | const DOMUtils = {
 2 |   deleteNode(node: DominoElement) {
 3 |     if (!node) {
 4 |       return
 5 |     }
 6 |     if (node.parentNode) {
 7 |       node.parentNode.removeChild(node)
 8 |     } else {
 9 |       node.outerHTML = ''
10 |     }
11 |     node = undefined
12 |   },
13 | 
14 |   appendToAttr(node: DominoElement, attr: string, val: any) {
15 |     const oldVal = node.getAttribute(attr)
16 |     const valToSet = oldVal ? `${oldVal} ${val}` : val
17 |     node.setAttribute(attr, valToSet as any)
18 |   },
19 | 
20 |   nextElementSibling(node: DominoElement) {
21 |     let sibling = node.nextSibling
22 |     while (sibling && sibling.nodeType !== 1 /* ELEMENT_NODE */) {
23 |       sibling = sibling.nextSibling
24 |     }
25 |     return sibling
26 |   },
27 | }
28 | 
29 | export default DOMUtils
30 | 


--------------------------------------------------------------------------------
/src/Logger.ts:
--------------------------------------------------------------------------------
 1 | export const logLevels = ['info', 'log', 'warn', 'error', 'quiet']
 2 | export type LogLevel = (typeof logLevels)[number]
 3 | 
 4 | let verboseLevel = 'error'
 5 | 
 6 | const isVerbose = (level: LogLevel) => {
 7 |   if (!verboseLevel) {
 8 |     return false
 9 |   }
10 | 
11 |   const verboseLevelIndex = logLevels.indexOf(verboseLevel)
12 |   const logLevelIndex = logLevels.indexOf(level)
13 |   return logLevelIndex >= verboseLevelIndex ? true : false
14 | }
15 | 
16 | const doLog = (type: LogLevel, args: any[]) => {
17 |   if (isVerbose(type)) {
18 |     console[type](`[${type}] [${getTs()}]`, ...args)
19 |   }
20 | }
21 | 
22 | const getTs = () => {
23 |   return new Date().toISOString()
24 | }
25 | 
26 | export const setVerboseLevel = (level: LogLevel | true) => {
27 |   verboseLevel = level === true ? 'info' : level
28 | }
29 | 
30 | export const info = (...args: any[]) => {
31 |   doLog('info', args)
32 | }
33 | 
34 | export const log = (...args: any[]) => {
35 |   doLog('log', args)
36 | }
37 | 
38 | export const warn = (...args: any[]) => {
39 |   doLog('warn', args)
40 | }
41 | 
42 | export const error = (...args: any[]) => {
43 |   doLog('error', args)
44 | }
45 | 
46 | export const logifyArray = (arr: any[]) => {
47 |   if (arr.length < 3) {
48 |     return JSON.stringify(arr)
49 |   } else {
50 |     const ret = arr
51 |       .slice(0, 1)
52 |       .concat(`+${arr.length - 2} more +`)
53 |       .concat(arr[arr.length - 1])
54 |     return JSON.stringify(ret)
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/RedisStore.ts:
--------------------------------------------------------------------------------
  1 | import { createClient } from 'redis'
  2 | import type { RedisClientType } from 'redis'
  3 | import RedisKvs from './util/RedisKvs.js'
  4 | import * as logger from './Logger.js'
  5 | 
  6 | class RedisStore implements RS {
  7 |   private static instance: RedisStore
  8 | 
  9 |   #client: RedisClientType
 10 |   #storesReady: boolean
 11 |   #filesToDownloadXPath: RKVS<FileDetail>
 12 |   #filesToRetryXPath: RKVS<FileDetail>
 13 |   #articleDetailXId: RKVS<ArticleDetail>
 14 |   #redirectsXId: RKVS<ArticleRedirect>
 15 | 
 16 |   public get client() {
 17 |     return this.#client
 18 |   }
 19 | 
 20 |   public get filesToDownloadXPath(): RKVS<FileDetail> {
 21 |     return this.#filesToDownloadXPath
 22 |   }
 23 | 
 24 |   public get filesToRetryXPath(): RKVS<FileDetail> {
 25 |     return this.#filesToRetryXPath
 26 |   }
 27 | 
 28 |   public get articleDetailXId(): RKVS<ArticleDetail> {
 29 |     return this.#articleDetailXId
 30 |   }
 31 | 
 32 |   public get redirectsXId(): RKVS<ArticleRedirect> {
 33 |     return this.#redirectsXId
 34 |   }
 35 | 
 36 |   public static getInstance(): RedisStore {
 37 |     if (!RedisStore.instance) {
 38 |       RedisStore.instance = new RedisStore()
 39 |     }
 40 |     return RedisStore.instance
 41 |   }
 42 | 
 43 |   public setOptions(redisPath: string, opts?: any): void {
 44 |     if (RedisStore.instance) {
 45 |       const options = { ...opts }
 46 |       const quitOnError = !(options.quitOnError === false)
 47 |       delete options.quitOnError
 48 | 
 49 |       if (redisPath.startsWith('/') || redisPath.startsWith('./')) {
 50 |         options.socket = {
 51 |           ...options.socket,
 52 |           path: redisPath,
 53 |         }
 54 |       } else {
 55 |         options.url = redisPath
 56 |       }
 57 | 
 58 |       this.#client = createClient(options)
 59 | 
 60 |       this.#client.on('error', (err) => {
 61 |         if (quitOnError) {
 62 |           logger.error('Redis Client Error', err)
 63 |           process.exit(3)
 64 |         }
 65 |       })
 66 |     } else {
 67 |       throw new Error('Redis store has not been instantiated before setting options')
 68 |     }
 69 |   }
 70 | 
 71 |   public async connect(populateStores = true) {
 72 |     if (this.#client.isOpen) {
 73 |       return
 74 |     }
 75 |     await this.#client.connect()
 76 |     if (populateStores) {
 77 |       await this.checkForExistingStores()
 78 |       await this.populateStores()
 79 |       this.#storesReady = true
 80 |     }
 81 |   }
 82 | 
 83 |   public async close() {
 84 |     if (this.#client.isReady && this.#storesReady) {
 85 |       logger.log('Flushing Redis DBs')
 86 |       await Promise.all([this.#filesToDownloadXPath.flush(), this.#filesToRetryXPath.flush(), this.#articleDetailXId.flush(), this.#redirectsXId.flush()])
 87 |     }
 88 |     if (this.#client.isOpen) {
 89 |       await this.#client.quit()
 90 |     }
 91 |   }
 92 | 
 93 |   public async checkForExistingStores() {
 94 |     const patterns = ['*-media', '*-media-retry', '*-detail', '*-redirect']
 95 |     let keys: string[] = []
 96 |     for (const pattern of patterns) {
 97 |       keys = keys.concat(await this.#client.keys(pattern))
 98 |     }
 99 | 
100 |     keys.forEach(async (key) => {
101 |       try {
102 |         const length = await this.#client.hLen(key)
103 |         const time = new Date(Number(key.slice(0, key.indexOf('-'))))
104 |         logger.error(`Found store from previous run from ${time} that is still in redis: ${key} with length ${length}`)
105 |       } catch {
106 |         logger.error(`Key ${key} exists in DB, and is no hash.`)
107 |       }
108 |     })
109 |   }
110 | 
111 |   private async populateStores() {
112 |     this.#filesToDownloadXPath = new RedisKvs(this.#client, `${Date.now()}-media`, {
113 |       u: 'url',
114 |       m: 'mult',
115 |       w: 'width',
116 |     })
117 |     this.#filesToRetryXPath = new RedisKvs(this.#client, `${Date.now()}-media-retry`, {
118 |       u: 'url',
119 |       m: 'mult',
120 |       w: 'width',
121 |     })
122 |     this.#articleDetailXId = new RedisKvs(this.#client, `${Date.now()}-detail`, {
123 |       s: 'subCategories',
124 |       c: 'categories',
125 |       p: 'pages',
126 |       h: 'thumbnail',
127 |       g: 'coordinates',
128 |       t: 'timestamp',
129 |       r: 'revisionId',
130 |       i: 'internalThumbnailUrl',
131 |       m: 'missing',
132 |       n: 'title',
133 |     })
134 |     this.#redirectsXId = new RedisKvs(this.#client, `${Date.now()}-redirect`, {
135 |       t: 'targetId',
136 |       n: 'title',
137 |     })
138 |   }
139 | 
140 |   public createRedisKvs(...args: [string, KVS<string>?]): RKVS<any> {
141 |     return new RedisKvs(this.#client, ...args)
142 |   }
143 | }
144 | 
145 | const rs = RedisStore.getInstance()
146 | export default rs as RedisStore
147 | 


--------------------------------------------------------------------------------
/src/Templates.ts:
--------------------------------------------------------------------------------
 1 | import swig from 'swig-templates'
 2 | import pathParser from 'path'
 3 | import { config } from './config.js'
 4 | import { readFileSync } from 'fs'
 5 | import * as path from 'path'
 6 | import { fileURLToPath } from 'url'
 7 | 
 8 | const __filename = fileURLToPath(import.meta.url)
 9 | const __dirname = path.dirname(__filename)
10 | 
11 | function readTemplate(t: string) {
12 |   return readFileSync(pathParser.resolve(__dirname, '../res', t), 'utf-8')
13 | }
14 | 
15 | /* Compile templates */
16 | const footerTemplate = swig.compile(readTemplate(config.output.templates.footer))
17 | const leadSectionTemplate = swig.compile(readTemplate(config.output.templates.lead_section_wrapper))
18 | const sectionTemplate = swig.compile(readTemplate(config.output.templates.section_wrapper))
19 | const subSectionTemplate = swig.compile(readTemplate(config.output.templates.subsection_wrapper))
20 | const categoriesTemplate = swig.compile(readTemplate(config.output.templates.categories))
21 | const subCategoriesTemplate = swig.compile(readTemplate(config.output.templates.subCategories))
22 | const subPagesTemplate = swig.compile(readTemplate(config.output.templates.subPages))
23 | const downloadErrorPlaceholderTemplate = swig.compile(readTemplate(config.output.templates.downloadErrorPlaceholder))
24 | 
25 | const htmlWikimediaMobileTemplateCode = () => {
26 |   return readTemplate(config.output.templates.pageWikimediaMobile)
27 | }
28 | 
29 | const htmlWikimediaDesktopTemplateCode = () => {
30 |   return readTemplate(config.output.templates.pageWikimediaDesktop)
31 | }
32 | 
33 | const htmlVectorLegacyTemplateCode = () => {
34 |   return readTemplate(config.output.templates.pageVectorLegacy)
35 | }
36 | 
37 | const htmlVector2022TemplateCode = () => {
38 |   return readTemplate(config.output.templates.pageVector2022)
39 | }
40 | 
41 | const articleListHomeTemplate = readTemplate(config.output.templates.articleListHomeTemplate)
42 | 
43 | export {
44 |   footerTemplate,
45 |   leadSectionTemplate,
46 |   sectionTemplate,
47 |   subSectionTemplate,
48 |   htmlWikimediaMobileTemplateCode,
49 |   htmlWikimediaDesktopTemplateCode,
50 |   htmlVectorLegacyTemplateCode,
51 |   htmlVector2022TemplateCode,
52 |   articleListHomeTemplate,
53 |   categoriesTemplate,
54 |   subCategoriesTemplate,
55 |   subPagesTemplate,
56 |   downloadErrorPlaceholderTemplate,
57 | }
58 | 


--------------------------------------------------------------------------------
/src/cli.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env -S node --max-old-space-size=9000 --stack-size=42000 --enable-source-maps
  2 | 
  3 | 'use strict'
  4 | 
  5 | import yargs from 'yargs'
  6 | import { hideBin } from 'yargs/helpers'
  7 | import { parameterDescriptions, requiredParams } from './parameterList.js'
  8 | import * as logger from './Logger.js'
  9 | import { AxiosError } from 'axios'
 10 | import { cleanupAxiosError } from './util/misc.js'
 11 | 
 12 | import * as mwofflinerLib from './mwoffliner.lib.js'
 13 | 
 14 | /** **********************************/
 15 | /* Command Parsing ******************/
 16 | /** **********************************/
 17 | const argv: any = yargs(hideBin(process.argv))
 18 |   .help('help')
 19 |   .usage(
 20 |     `Create a look-alike HTML - ZIM file based - snapshot of a remote MediaWiki instance.
 21 | 
 22 | Usage: npm run mwoffliner -- --help`,
 23 |   )
 24 |   .describe(parameterDescriptions)
 25 |   .require(requiredParams as any)
 26 |   .strict().argv
 27 | 
 28 | /* ***********************************/
 29 | /* TMPDIR OVERRIDE HAS TO BE HANDLED */
 30 | /* AT THE REALLY BEGIN               */
 31 | /* ***********************************/
 32 | 
 33 | import fs from 'fs'
 34 | 
 35 | if (argv.osTmpDir) {
 36 |   const osTmpDir = argv.osTmpDir as string
 37 | 
 38 |   try {
 39 |     if (fs.statSync(osTmpDir)) {
 40 |       process.env.TMPDIR = osTmpDir
 41 |     } else {
 42 |       throw new Error()
 43 |     }
 44 |   } catch {
 45 |     logger.error(`--osTmpDir value [${osTmpDir}] is not valid`)
 46 |     process.exit(2)
 47 |   }
 48 | }
 49 | 
 50 | /* ***********************/
 51 | /* TESTING ALL ARGUMENTS */
 52 | /* ***********************/
 53 | 
 54 | import { sanitize_all } from './sanitize-argument.js'
 55 | const execStartTime = Date.now()
 56 | sanitize_all(argv)
 57 |   .then(() => {
 58 |     /* ***********************************/
 59 |     /* GO THROUGH ENTRY POINT            */
 60 |     /* ***********************************/
 61 | 
 62 |     mwofflinerLib
 63 |       .execute(argv)
 64 |       .then(() => {
 65 |         logger.info(`Finished running mwoffliner after [${Math.round((Date.now() - execStartTime) / 1000)}s]`)
 66 |         process.exit(0)
 67 |       })
 68 |       .catch((err) => {
 69 |         errorHandler(err)
 70 |       })
 71 |   })
 72 |   .catch((err) => {
 73 |     errorHandler(err)
 74 |   })
 75 | 
 76 | // Hack to allow serializing of Errors
 77 | // https://stackoverflow.com/questions/18391212/is-it-not-possible-to-stringify-an-error-using-json-stringify
 78 | if (!('toJSON' in Error.prototype)) {
 79 |   Object.defineProperty(Error.prototype, 'toJSON', {
 80 |     value() {
 81 |       const alt = {} as any
 82 | 
 83 |       Object.getOwnPropertyNames(this).forEach(function (key) {
 84 |         alt[key] = this[key]
 85 |       }, this)
 86 | 
 87 |       return alt
 88 |     },
 89 |     configurable: true,
 90 |     writable: true,
 91 |   })
 92 | }
 93 | 
 94 | function errorHandler(err: any) {
 95 |   if (err instanceof AxiosError) {
 96 |     err = cleanupAxiosError(err)
 97 |   }
 98 |   logger.error(`Failed to run mwoffliner after [${Math.round((Date.now() - execStartTime) / 1000)}s]:\n`, err)
 99 |   process.exit(2)
100 | }
101 | 


--------------------------------------------------------------------------------
/src/mutex.ts:
--------------------------------------------------------------------------------
1 | import { Mutex } from 'async-mutex'
2 | 
3 | const zimCreatorMutex = new Mutex()
4 | 
5 | export { zimCreatorMutex }
6 | 


--------------------------------------------------------------------------------
/src/parameterList.ts:
--------------------------------------------------------------------------------
 1 | export const requiredParams = ['mwUrl', 'adminEmail']
 2 | 
 3 | export const parameterDescriptions = {
 4 |   mwUrl: 'MediaWiki base URL (any URL paths appending)',
 5 |   adminEmail: 'Email of the MWoffliner operator. Will be put in the HTTP user-agent string for information only',
 6 |   articleList: 'List of articles to include. Comma separated list of titles or a local path or HTTP(S) URL to a file with one title (in UTF8) per line',
 7 |   articleListToIgnore: 'List of articles to ignore. Comma separated list of titles or local path or HTTP(S) URL to a file with one title (in UTF8) per line',
 8 |   customZimFavicon: 'Local path or HTTP(S) URL to a PNG favicon (will be resized to 48x48). Default to MediaWiki if not set',
 9 |   customZimTitle: 'ZIM custom title metadata (30 characters max)',
10 |   customZimDescription: 'ZIM custom description (80 characters max)',
11 |   customZimLongDescription: 'ZIM custom long description (4000 characters max)',
12 |   customZimTags: 'ZIM tags metadata (semi-colon separated)',
13 |   customZimLanguage: 'ZIM ISO 639-3 content language code',
14 |   customMainPage: 'Custom page to be used as welcome page.',
15 |   filenamePrefix: 'Part of the ZIM filename which is before the format & date parts.',
16 |   format:
17 |     'Flavour for the scraping. If missing, scrape all article contents. Each --format argument will cause a new local file to be created but options can be combined. Supported options are:\n * novid: no video & audio content\n * nopic: no pictures (implies "novid")\n * nopdf: no PDF files\n * nodet: only the first/head paragraph (implies "novid")\nFlavour can be named (and corresponding ZIM metadata will be created) using a ":":\nExample: "--format=nopic,nodet:mini"',
18 |   keepEmptyParagraphs: 'Keep all paragraphs, even empty ones.',
19 |   mwWikiPath: 'MediaWiki article path (by default "/wiki/")',
20 |   mwIndexPhpPath: 'MediaWiki index.php path (by default "/w/index.php")',
21 |   mwActionApiPath: 'MediaWiki API path (by default "/w/api.php")',
22 |   mwRestApiPath: 'MediaWiki REST API path (by default "/w/rest.php")',
23 |   mwModulePath: 'MediaWiki module load path (by default "/w/load.php")',
24 |   mwDomain: 'MediaWiki user domain (thought for private wikis)',
25 |   mwUsername: 'MediaWiki username (thought for private wikis)',
26 |   mwPassword: 'MediaWiki user password (thought for private wikis)',
27 |   minifyHtml: 'Try to reduce the size of the HTML',
28 |   outputDirectory: 'Directory to write the downloaded content',
29 |   publisher: "ZIM publisher meta data, by default 'Kiwix'",
30 |   redis: 'Redis path (redis:// URL or path to UNIX socket)',
31 |   requestTimeout: 'Request timeout in seconds (defaultis 120s)',
32 |   resume: 'Skip already existing/created ZIM files',
33 |   speed: 'Multiplicator for the number of parallel HTTP requests on Parsoid backend (by default the number of CPU cores). The default value is 1.',
34 |   verbose:
35 |     'Print logging information to standard streams. To filter messages, one of the following values can be given: "info", "log", "warn", "error" or "quiet" (default level being "error"). All messages are printed from the given value and higher/worse.',
36 |   withoutZimFullTextIndex: "Don't include a fulltext search index to the ZIM",
37 |   webp: 'Convert all jpeg, png and gif images to webp format',
38 |   addNamespaces: 'Force additional namespace (comma separated numbers)',
39 |   osTmpDir: 'Override default operating system temporary directory path environment variable',
40 |   optimisationCacheUrl: 'Object Storage URL (including credentials and bucket name) to cache optimised media files',
41 |   forceRender:
42 |     'Force the usage of a specific API end-point/render, automatically chosen otherwise. Accepted values: [ VisualEditor, WikimediaDesktop. WikimediaMobile, RestApi, ActionParse ]',
43 |   insecure: 'Skip HTTPS server authenticity verification step',
44 | }
45 | 
46 | // TODO: Add an interface based on the object above
47 | 


--------------------------------------------------------------------------------
/src/renderers/abstractDesktop.render.ts:
--------------------------------------------------------------------------------
 1 | import * as domino from 'domino'
 2 | import { DownloadOpts, DownloadRes, Renderer } from './abstract.renderer.js'
 3 | import { getStaticFiles, genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js'
 4 | import { config } from '../config.js'
 5 | import MediaWiki from '../MediaWiki.js'
 6 | 
 7 | import { htmlWikimediaDesktopTemplateCode } from '../Templates.js'
 8 | import Downloader from '../Downloader.js'
 9 | 
10 | export abstract class DesktopRenderer extends Renderer {
11 |   public staticFilesListDesktop: string[] = []
12 |   constructor() {
13 |     super()
14 |     this.staticFilesListDesktop = this.staticFilesListCommon.concat(getStaticFiles(config.output.jsResources, config.output.cssResources))
15 |   }
16 | 
17 |   public async download(downloadOpts: DownloadOpts): Promise<DownloadRes> {
18 |     const { articleUrl, articleDetail } = downloadOpts
19 | 
20 |     const moduleDependencies = this.filterWikimediaDesktopModules(await Downloader.getModuleDependencies(articleDetail.title))
21 | 
22 |     const data = await Downloader.getJSON<any>(articleUrl)
23 |     if (data.error) {
24 |       throw new Error(data.error)
25 |     }
26 | 
27 |     return { data, moduleDependencies, redirects: [] }
28 |   }
29 | 
30 |   public filterWikimediaDesktopModules(_moduleDependencies) {
31 |     const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as {
32 |       jsConfigVars: string
33 |       jsDependenciesList: string[]
34 |       styleDependenciesList: string[]
35 |     }
36 | 
37 |     const wikimediaDesktopJsModuleDependencies = jsDependenciesList.filter((item) => !item.includes('javascript/mobile'))
38 |     const wikimediaDesktopCssModuleDependencies = styleDependenciesList.filter((item) => !item.includes('css/mobile'))
39 | 
40 |     const wikimediaDesktopModuleDependencies = {
41 |       jsConfigVars,
42 |       jsDependenciesList: wikimediaDesktopJsModuleDependencies,
43 |       styleDependenciesList: wikimediaDesktopCssModuleDependencies,
44 |     }
45 | 
46 |     return wikimediaDesktopModuleDependencies
47 |   }
48 | 
49 |   public templateDesktopArticle(moduleDependencies: any, articleId: string): Document {
50 |     const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as {
51 |       jsConfigVars
52 |       jsDependenciesList: string[]
53 |       styleDependenciesList: string[]
54 |     }
55 | 
56 |     const cssLinks = config.output.cssResources.reduce((buf, css) => {
57 |       return buf + genHeaderCSSLink(config, css, articleId)
58 |     }, '')
59 | 
60 |     const jsScripts = config.output.jsResources.reduce((buf, js) => {
61 |       return (
62 |         buf +
63 |         (js === 'script'
64 |           ? genHeaderScript(config, js, articleId, '', `data-article-id="${articleId.replace(/"/g, '\\\\"')}" id="script-js"`)
65 |           : genHeaderScript(config, js, articleId))
66 |       )
67 |     }, '')
68 | 
69 |     const articleConfigVarsList = jsConfigVars === '' ? '' : genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki)
70 |     const articleJsList =
71 |       jsDependenciesList.length === 0 ? '' : jsDependenciesList.map((oneJsDep: string) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n')
72 |     const articleCssList =
73 |       styleDependenciesList.length === 0
74 |         ? ''
75 |         : styleDependenciesList.map((oneCssDep: string) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n')
76 | 
77 |     const htmlTemplateString = htmlWikimediaDesktopTemplateCode()
78 |       .replace('__CSS_LINKS__', cssLinks)
79 |       .replace('__JS_SCRIPTS__', jsScripts)
80 |       .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId))
81 |       .replace('__ARTICLE_CONFIGVARS_LIST__', articleConfigVarsList)
82 |       .replace('__ARTICLE_JS_LIST__', articleJsList)
83 |       .replace('__ARTICLE_CSS_LIST__', articleCssList)
84 | 
85 |     return domino.createDocument(htmlTemplateString)
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/renderers/abstractMobile.render.ts:
--------------------------------------------------------------------------------
 1 | import * as domino from 'domino'
 2 | import { Renderer } from './abstract.renderer.js'
 3 | import { getStaticFiles, genCanonicalLink, genHeaderScript, genHeaderCSSLink, getRelativeFilePath } from '../util/misc.js'
 4 | import { config } from '../config.js'
 5 | import MediaWiki from '../MediaWiki.js'
 6 | 
 7 | import { htmlWikimediaMobileTemplateCode } from '../Templates.js'
 8 | 
 9 | export abstract class MobileRenderer extends Renderer {
10 |   public staticFilesListMobile: string[] = []
11 |   constructor() {
12 |     super()
13 |     this.staticFilesListMobile = this.staticFilesListCommon.concat(getStaticFiles(config.output.wikimediaMobileJsResources, config.output.wikimediaMobileCssResources))
14 |   }
15 | 
16 |   public filterWikimediaMobileModules(_moduleDependencies) {
17 |     const { jsDependenciesList, styleDependenciesList } = _moduleDependencies as {
18 |       jsDependenciesList: string[]
19 |       styleDependenciesList: string[]
20 |     }
21 | 
22 |     const wikimediaMobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile'))
23 |     const wikimediaMobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile'))
24 | 
25 |     const wikimediaMobileModuleDependencies = {
26 |       jsDependenciesList: wikimediaMobileJsModuleDependencies,
27 |       styleDependenciesList: wikimediaMobileCssModuleDependencies,
28 |     }
29 | 
30 |     return wikimediaMobileModuleDependencies
31 |   }
32 | 
33 |   private genWikimediaMobileOverrideCSSLink(relativeFilePath: string, css: string) {
34 |     return `<link rel="stylesheet" href="${relativeFilePath}${css}.css" />`
35 |   }
36 | 
37 |   private genWikimediaMobileOverrideScript(relativeFilePath: string, js: string) {
38 |     return `<script src='${relativeFilePath}${js}.js'></script>`
39 |   }
40 | 
41 |   public templateMobileArticle(moduleDependencies: any, articleId: string): Document {
42 |     const { jsDependenciesList, styleDependenciesList } = moduleDependencies
43 | 
44 |     const articleJsList =
45 |       jsDependenciesList.length === 0 ? '' : jsDependenciesList.map((oneJsDep: string) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n')
46 |     const articleCssList =
47 |       styleDependenciesList.length === 0
48 |         ? ''
49 |         : styleDependenciesList.map((oneCssDep: string) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n')
50 | 
51 |     const relativeFilePath = getRelativeFilePath(articleId, '')
52 |     const htmlTemplateString = htmlWikimediaMobileTemplateCode()
53 |       .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId))
54 |       .replace('__ARTICLE_CONFIGVARS_LIST__', '')
55 |       .replace('__JS_SCRIPTS__', this.genWikimediaMobileOverrideScript(relativeFilePath, config.output.wikimediaMobileJsResources[0]))
56 |       .replace('__CSS_LINKS__', this.genWikimediaMobileOverrideCSSLink(relativeFilePath, config.output.wikimediaMobileCssResources[0]))
57 |       .replace('__ARTICLE_JS_LIST__', articleJsList)
58 |       .replace('__ARTICLE_CSS_LIST__', articleCssList)
59 | 
60 |     return domino.createDocument(htmlTemplateString)
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/renderers/rendering.context.ts:
--------------------------------------------------------------------------------
 1 | import Downloader from '../Downloader.js'
 2 | import { Renderer, renderName } from './abstract.renderer.js'
 3 | import { RendererBuilder } from './renderer.builder.js'
 4 | import * as logger from '../Logger.js'
 5 | 
 6 | class RenderingContext {
 7 |   private static instance: RenderingContext
 8 | 
 9 |   public mainPageRenderer: Renderer
10 |   public articlesRenderer: Renderer
11 | 
12 |   public static getInstance(): RenderingContext {
13 |     if (!RenderingContext.instance) {
14 |       RenderingContext.instance = new RenderingContext()
15 |     }
16 |     return RenderingContext.instance
17 |   }
18 | 
19 |   public async createRenderers(forceRender: renderName | null, hasWikimediaMobileApi: boolean) {
20 |     const rendererBuilder = new RendererBuilder()
21 | 
22 |     if (forceRender) {
23 |       // All articles and main page will use the same renderer if 'forceRender' is specified
24 |       const renderer = await rendererBuilder.createRenderer({
25 |         renderType: 'specific',
26 |         renderName: forceRender,
27 |       })
28 |       this.mainPageRenderer = renderer
29 |       this.articlesRenderer = renderer
30 |     } else {
31 |       this.mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' })
32 |       this.articlesRenderer = await rendererBuilder.createRenderer({
33 |         renderType: hasWikimediaMobileApi ? 'mobile' : 'auto',
34 |       })
35 |     }
36 |     logger.log(`Using ${this.mainPageRenderer.constructor.name} for main page renderer`)
37 |     logger.log(`Using ${this.articlesRenderer.constructor.name} for articles renderer`)
38 |     Downloader.setUrlsDirectors(this.mainPageRenderer, this.articlesRenderer)
39 |   }
40 | }
41 | 
42 | const instance = RenderingContext.getInstance()
43 | export default instance as RenderingContext
44 | 


--------------------------------------------------------------------------------
/src/renderers/rest-api.renderer.ts:
--------------------------------------------------------------------------------
 1 | import domino from 'domino'
 2 | import { DesktopRenderer } from './abstractDesktop.render.js'
 3 | import { getStrippedTitleFromHtml } from '../util/misc.js'
 4 | import { RenderOpts, RenderOutput } from './abstract.renderer.js'
 5 | 
 6 | // Represent 'https://{wikimedia-wiki}/api/rest.php/v1/page/html/'
 7 | export class RestApiRenderer extends DesktopRenderer {
 8 |   constructor() {
 9 |     super()
10 |   }
11 | 
12 |   private async retrieveHtml(data: string, i: number, articleId, articleDetail, numberOfPagesToSplitInto: number, articleDetailXId): Promise<any> {
13 |     const pageId = i === 0 ? '' : `__${i}`
14 |     const _articleId = articleId + pageId
15 |     const _articleDetail = Object.assign({}, articleDetail, {
16 |       subCategories: (articleDetail.subCategories || []).slice(i * 200, (i + 1) * 200),
17 |       nextArticleId: numberOfPagesToSplitInto > i + 1 ? `${articleId}__${i + 1}` : null,
18 |       prevArticleId: i - 1 > 0 ? `${articleId}__${i - 1}` : i - 1 === 0 ? articleId : null,
19 |     })
20 | 
21 |     if (articleDetailXId && (articleDetail.subCategories || []).length > 200) {
22 |       await articleDetailXId.set(_articleId, _articleDetail)
23 |     }
24 | 
25 |     let strippedTitle = getStrippedTitleFromHtml(data)
26 |     if (!strippedTitle) {
27 |       const title = articleId
28 |       const doc = domino.createDocument(`<span class='mw-title'>${title}</span>`)
29 |       strippedTitle = doc.getElementsByClassName('mw-title')[0].textContent
30 |     }
31 | 
32 |     return { strippedTitle, _articleId }
33 |   }
34 | 
35 |   public async render(renderOpts: RenderOpts): Promise<any> {
36 |     const result: RenderOutput = []
37 |     const { data, articleId, articleDetailXId, moduleDependencies, isMainPage, dump } = renderOpts
38 | 
39 |     /* istanbul ignore if */
40 |     if (!data) {
41 |       throw new Error(`Cannot render [${data}] into an article`)
42 |     }
43 | 
44 |     const articleDetail = await renderOpts.articleDetailXId.get(articleId)
45 | 
46 |     // Paginate when there are more than 200 subCategories
47 |     const numberOfPagesToSplitInto = Math.max(Math.ceil((articleDetail.subCategories || []).length / 200), 1)
48 | 
49 |     for (let i = 0; i < numberOfPagesToSplitInto; i++) {
50 |       const { strippedTitle, _articleId } = await this.retrieveHtml(data, i, articleId, articleDetail, numberOfPagesToSplitInto, articleDetailXId)
51 |       let dataWithHeader = ''
52 |       if (!isMainPage) {
53 |         dataWithHeader = super.injectH1TitleToHtml(data, articleDetail)
54 |       }
55 |       const { finalHTML, mediaDependencies, videoDependencies, imageDependencies, subtitles } = await super.processHtml(
56 |         dataWithHeader || data,
57 |         dump,
58 |         articleId,
59 |         articleDetail,
60 |         moduleDependencies,
61 |         super.templateDesktopArticle.bind(this),
62 |       )
63 | 
64 |       result.push({
65 |         articleId: _articleId,
66 |         displayTitle: (strippedTitle || articleId.replace(/_/g, ' ')) + (i === 0 ? '' : `/${i}`),
67 |         html: finalHTML,
68 |         mediaDependencies,
69 |         videoDependencies,
70 |         imageDependencies,
71 |         moduleDependencies,
72 |         staticFiles: this.staticFilesListDesktop,
73 |         subtitles,
74 |       })
75 |     }
76 |     return result
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/renderers/visual-editor.renderer.ts:
--------------------------------------------------------------------------------
 1 | import * as logger from '../Logger.js'
 2 | import { DELETED_ARTICLE_ERROR } from '../util/const.js'
 3 | import { DesktopRenderer } from './abstractDesktop.render.js'
 4 | import { getStrippedTitleFromHtml } from '../util/misc.js'
 5 | import { RenderOpts, RenderOutput } from './abstract.renderer.js'
 6 | import { DownloadError } from '../Downloader.js'
 7 | 
 8 | /*
 9 | Relies on VisualEditor API typically looking like 'https://{wiki-host}/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page={title}'
10 | */
11 | export class VisualEditorRenderer extends DesktopRenderer {
12 |   constructor() {
13 |     super()
14 |   }
15 | 
16 |   private async retrieveHtml(renderOpts: RenderOpts): Promise<any> {
17 |     const { data, articleId, articleDetail, isMainPage } = renderOpts
18 | 
19 |     /* istanbul ignore if */
20 |     if (!data) {
21 |       throw new Error(`Cannot render [${data}] into an article`)
22 |     }
23 | 
24 |     let html: string
25 |     let displayTitle: string
26 |     let strippedTitle: string
27 | 
28 |     if (data.visualeditor) {
29 |       // Testing if article has been deleted between fetching list and downloading content.
30 |       if (data.visualeditor.oldid === 0) {
31 |         logger.error(DELETED_ARTICLE_ERROR)
32 |         throw new DownloadError(DELETED_ARTICLE_ERROR, null, null, null, DELETED_ARTICLE_ERROR)
33 |       }
34 |       html = isMainPage ? data.visualeditor.content : super.injectH1TitleToHtml(data.visualeditor.content, articleDetail)
35 |       strippedTitle = getStrippedTitleFromHtml(html)
36 |       displayTitle = strippedTitle || articleId.replace('_', ' ')
37 |       return { html, displayTitle }
38 |     } else if (data.contentmodel === 'wikitext' || (data.html && data.html.body)) {
39 |       html = data.html.body
40 |       strippedTitle = getStrippedTitleFromHtml(html)
41 |       displayTitle = strippedTitle || articleId.replace('_', ' ')
42 | 
43 |       return { html, displayTitle }
44 |     } else if (data.error) {
45 |       logger.error(`Error in retrieved article [${articleId}]:`, data.error)
46 |       return ''
47 |     }
48 |     logger.error('Unable to parse data from visual editor')
49 |     return ''
50 |   }
51 | 
52 |   public async render(renderOpts: RenderOpts): Promise<any> {
53 |     const result: RenderOutput = []
54 |     const { articleId, articleDetail, moduleDependencies, dump } = renderOpts
55 |     const { html, displayTitle } = await this.retrieveHtml(renderOpts)
56 |     if (html) {
57 |       const { finalHTML, mediaDependencies, videoDependencies, imageDependencies, subtitles } = await super.processHtml(
58 |         html,
59 |         dump,
60 |         articleId,
61 |         articleDetail,
62 |         moduleDependencies,
63 |         super.templateDesktopArticle.bind(this),
64 |       )
65 |       result.push({
66 |         articleId,
67 |         displayTitle,
68 |         html: finalHTML,
69 |         mediaDependencies,
70 |         videoDependencies,
71 |         imageDependencies,
72 |         moduleDependencies,
73 |         staticFiles: this.staticFilesListDesktop,
74 |         subtitles,
75 |       })
76 |       return result
77 |     }
78 |     return ''
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/renderers/wikimedia-desktop.renderer.ts:
--------------------------------------------------------------------------------
1 | import { RestApiRenderer } from './rest-api.renderer.js'
2 | 
3 | export class WikimediaDesktopRenderer extends RestApiRenderer {
4 |   constructor() {
5 |     super()
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/util/Timer.ts:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * timer class that executes callback in
 3 |  * timeout milliseconds and can be reset and cleared
 4 |  * Used for detecting freezes
 5 |  */
 6 | class Timer {
 7 |   public readonly timeout: number
 8 | 
 9 |   private readonly callback: () => void
10 |   private timer: ReturnType<typeof setTimeout>
11 | 
12 |   constructor(callback: () => void, timeout: number) {
13 |     this.timeout = timeout
14 |     this.callback = callback
15 |     this.start()
16 |   }
17 | 
18 |   public clear() {
19 |     if (this.timer) {
20 |       clearTimeout(this.timer)
21 |     }
22 |   }
23 | 
24 |   public reset() {
25 |     this.clear()
26 |     this.start()
27 |   }
28 | 
29 |   private start() {
30 |     this.timer = setTimeout(this.callback, this.timeout)
31 |   }
32 | }
33 | 
34 | export default Timer
35 | 


--------------------------------------------------------------------------------
/src/util/articleListMainPage.ts:
--------------------------------------------------------------------------------
 1 | import { Dump } from '../Dump.js'
 2 | import { encodeArticleIdForZimHtmlUrl } from '../util/index.js'
 3 | 
 4 | export function makeArticleListItem(dump: Dump, articleEntry: ArticleDetail) {
 5 |   return `<li><a href="${encodeArticleIdForZimHtmlUrl(articleEntry.title.replace(/ /g, '_'))}">${articleEntry.title.replace(/_/g, ' ') || ''}<a></li>\n`
 6 | }
 7 | 
 8 | export function makeArticleImageTile(dump: Dump, articleEntry: ArticleDetail) {
 9 |   return `<a class="item" href="${encodeArticleIdForZimHtmlUrl(articleEntry.title.replace(/ /g, '_'))}">
10 |             <figure><img loading="lazy" src="${articleEntry.internalThumbnailUrl}" />
11 |             <figcaption>${(articleEntry.title || '').replace(/_/g, ' ')}</figcaption></figure></a>\n`
12 | }
13 | 


--------------------------------------------------------------------------------
/src/util/builders/url/action-parse.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on MediaWiki ActionParse
 5 |  */
 6 | export default class ActionParseURLDirector {
 7 |   baseDomain: string
 8 |   skin: string
 9 | 
10 |   constructor(baseDomain: string, skin: string) {
11 |     this.baseDomain = baseDomain
12 |     this.skin = skin
13 |   }
14 | 
15 |   buildArticleURL(articleId: string) {
16 |     return urlBuilder
17 |       .setDomain(this.baseDomain)
18 |       .setQueryParams({ action: 'parse', format: 'json', prop: 'modules|jsconfigvars|headhtml|text', parsoid: '1', page: articleId, useskin: this.skin, redirects: '1' })
19 |       .build()
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/util/builders/url/api.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on MediaWiki API URL
 5 |  */
 6 | export default class ApiURLDirector {
 7 |   private baseDomain: string
 8 | 
 9 |   constructor(baseDomain: string) {
10 |     this.baseDomain = baseDomain
11 |   }
12 | 
13 |   buildSubCategoriesURL(articleId: string, continueStr = '') {
14 |     return urlBuilder
15 |       .setDomain(this.baseDomain)
16 |       .setQueryParams({
17 |         action: 'query',
18 |         list: 'categorymembers',
19 |         cmtype: 'subcat',
20 |         cmlimit: 'max',
21 |         format: 'json',
22 |         formatversion: '2',
23 |         cmtitle: articleId,
24 |         cmcontinue: continueStr,
25 |       })
26 |       .build()
27 |   }
28 | 
29 |   buildSiteInfoQueryURL() {
30 |     return urlBuilder
31 |       .setDomain(this.baseDomain)
32 |       .setQueryParams({ action: 'query', meta: 'siteinfo', format: 'json', formatversion: '2', siprop: 'general|namespaces|statistics|variables|category|wikidesc' })
33 |       .build()
34 |   }
35 | 
36 |   buildQueryURL<T extends Record<string, any>>(queryParams: T) {
37 |     return urlBuilder.setDomain(this.baseDomain).setQueryParams(queryParams, '?', true).build()
38 |   }
39 | 
40 |   buildNamespacesURL() {
41 |     return urlBuilder
42 |       .setDomain(this.baseDomain)
43 |       .setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json', formatversion: '2' })
44 |       .build()
45 |   }
46 | 
47 |   buildSiteInfoURL() {
48 |     return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'query', meta: 'siteinfo', format: 'json', formatversion: '2' }).build()
49 |   }
50 | 
51 |   buildVisualEditorURL() {
52 |     return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'visualeditor', mobileformat: 'html', format: 'json', paction: 'parse', formatversion: '2' }).build(true)
53 |   }
54 | 
55 |   buildLogEventsQuery(letype: string, articleId: string) {
56 |     return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'query', list: 'logevents', letype: letype, letitle: articleId, format: 'json' }).build()
57 |   }
58 | 
59 |   buildArticleApiURL(articleId: string) {
60 |     return urlBuilder
61 |       .setDomain(this.baseDomain)
62 |       .setQueryParams({ action: 'parse', format: 'json', prop: 'modules|jsconfigvars|headhtml', formatversion: '2', page: articleId })
63 |       .build()
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/util/builders/url/base.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | import { WIKIMEDIA_REST_API_PATH } from '../../const.js'
 3 | 
 4 | /**
 5 |  * Interface to build URLs based on base URL
 6 |  */
 7 | export default class BaseURLDirector {
 8 |   private baseDomain: string
 9 | 
10 |   constructor(baseDomain: string) {
11 |     this.baseDomain = baseDomain.endsWith('/') ? baseDomain.substring(0, baseDomain.length - 1) : baseDomain
12 |   }
13 | 
14 |   buildURL(path: string) {
15 |     return urlBuilder.setDomain(this.baseDomain).setPath(path).build(true)
16 |   }
17 | 
18 |   buildWikimediaDesktopApiUrl() {
19 |     return urlBuilder
20 |       .setDomain(this.baseDomain)
21 |       .setPath(WIKIMEDIA_REST_API_PATH + 'page/html')
22 |       .build(true, '/')
23 |   }
24 | 
25 |   buildWikimediaMobileApiUrl() {
26 |     return urlBuilder
27 |       .setDomain(this.baseDomain)
28 |       .setPath(WIKIMEDIA_REST_API_PATH + 'page/mobile-html')
29 |       .build(true, '/')
30 |   }
31 | 
32 |   buildModuleURL(path?: string) {
33 |     return urlBuilder
34 |       .setDomain(this.baseDomain)
35 |       .setPath(path ?? '/w/load.php')
36 |       .build(false, '?')
37 |   }
38 | 
39 |   buildMobileModuleURL(path?: string) {
40 |     return urlBuilder
41 |       .setDomain(this.baseDomain)
42 |       .setPath(path ?? '/api/rest_v1/page/mobile-html-offline-resources')
43 |       .build(false, '/')
44 |   }
45 | 
46 |   buildRestApiUrl(path?: string) {
47 |     return urlBuilder
48 |       .setDomain(this.baseDomain)
49 |       .setPath(path ?? '/w/rest.php')
50 |       .build(true, '/')
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/util/builders/url/basic.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | type DownloaderBaseUrlConditions = Array<{ condition: boolean; value: string }>
 4 | 
 5 | /**
 6 |  * Build base URL for specific wiki. Example of the output: 'https://en.wikipedia.org' or 'https://de.wikivoyage.org
 7 |  */
 8 | class BasicURLDirector {
 9 |   buildMediawikiBaseURL(domain: string) {
10 |     return urlBuilder.setDomain(domain).build(true, '')
11 |   }
12 | 
13 |   buildDownloaderBaseUrl(conditions: DownloaderBaseUrlConditions): string | undefined {
14 |     let baseUrl: string
15 | 
16 |     for (const { condition, value } of conditions) {
17 |       if (condition) {
18 |         baseUrl = value
19 |         break
20 |       }
21 |     }
22 | 
23 |     return baseUrl
24 |   }
25 | }
26 | 
27 | const basicURLDirector = new BasicURLDirector()
28 | 
29 | export default basicURLDirector
30 | 


--------------------------------------------------------------------------------
/src/util/builders/url/desktop.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on Downloader desktop URL
 5 |  */
 6 | export default class WikimediaDesktopURLDirector {
 7 |   baseDomain: string
 8 | 
 9 |   constructor(baseDomain: string) {
10 |     this.baseDomain = baseDomain
11 |   }
12 | 
13 |   buildArticleURL(articleId: string) {
14 |     return urlBuilder.setDomain(this.baseDomain).setPath(encodeURIComponent(articleId)).build()
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/util/builders/url/mobile.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on MediaWiki mobile URL
 5 |  */
 6 | export default class WikimediaMobileURLDirector {
 7 |   baseDomain: string
 8 | 
 9 |   constructor(baseDomain: string) {
10 |     this.baseDomain = baseDomain
11 |   }
12 | 
13 |   buildArticleURL(articleId: string) {
14 |     return urlBuilder.setDomain(this.baseDomain).setPath(encodeURIComponent(articleId)).build()
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/util/builders/url/rest-api.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on MediaWiki REST API URL
 5 |  */
 6 | export default class RestApiURLDirector {
 7 |   baseDomain: string
 8 | 
 9 |   constructor(baseDomain: string) {
10 |     this.baseDomain = baseDomain
11 |   }
12 | 
13 |   buildArticleURL(articleId: string) {
14 |     return urlBuilder
15 |       .setDomain(this.baseDomain)
16 |       .setPath('v1/page/' + encodeURIComponent(articleId) + '/html')
17 |       .build()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/util/builders/url/url.builder.ts:
--------------------------------------------------------------------------------
  1 | import { ensureTrailingChar } from '../../misc.js'
  2 | 
  3 | /**
  4 |  * Concat the path to the domain and setting query params
  5 |  */
  6 | class URLBuilder {
  7 |   private domain = ''
  8 |   private path = ''
  9 |   private queryParams = ''
 10 | 
 11 |   setDomain(domain: string) {
 12 |     this.domain = domain
 13 | 
 14 |     return this
 15 |   }
 16 | 
 17 |   setPath(path: string) {
 18 |     this.path = path
 19 | 
 20 |     return this
 21 |   }
 22 | 
 23 |   /**
 24 |    * This function sets query parameters for a URL.
 25 |    *
 26 |    * @param params - These key-value pairs represent the query parameters that will be added to the URL.
 27 |    * @param [trailingChar] - trailingChar is an optional parameter that specifies a character
 28 |    * to be added at the beginning of the query parameters string. It is used to indicate the start of
 29 |    * the query parameters in a URL.
 30 |    *
 31 |    * @returns the current object (`this`) after setting the `queryParams` property to a string
 32 |    */
 33 |   setQueryParams<T extends Record<string, string>>(params: T, trailingChar = '?', filterParams?: boolean) {
 34 |     if (!filterParams) {
 35 |       const queryParams = new URLSearchParams(params)
 36 | 
 37 |       this.queryParams = trailingChar + queryParams.toString()
 38 | 
 39 |       return this
 40 |     }
 41 | 
 42 |     const filteredParams = Object.keys(params).reduce((accum, key) => {
 43 |       if (params[key]) {
 44 |         accum[key] = params[key]
 45 |       }
 46 | 
 47 |       return accum
 48 |     }, {})
 49 | 
 50 |     const queryParams = new URLSearchParams(filteredParams)
 51 | 
 52 |     this.queryParams = trailingChar + queryParams.toString()
 53 | 
 54 |     return this
 55 |   }
 56 | 
 57 |   /**
 58 |    * This function builds a URL by combining the domain, path, and query parameters, and can optionally
 59 |    * add a trailing character and return a URL object.
 60 |    *
 61 |    * @param [returnUrl] - A boolean parameter that determines whether the method should
 62 |    * return a URL object or a string.
 63 |    * @param [trailingChar] - The `trailingChar` parameter is an optional string parameter that
 64 |    * specifies a character to be added at the end of the generated link.
 65 |    *
 66 |    * @returns The `build` function returns a string that represents a URL constructed from the
 67 |    * `domain`, `path`, and `queryParams` properties of the object. The returned URL can optionally have
 68 |    * a trailing character appended to it, and can be returned as a string or as a `URL` object
 69 |    * depending on the values of the `returnUrl` and `trailingChar` parameters.
 70 |    */
 71 |   build(returnUrl?: false, trailingChar?: string): string
 72 |   build(returnUrl?: true, trailingChar?: string): URL
 73 |   build(returnUrl?: boolean, trailingChar?: string) {
 74 |     const currentDomain = this.domain
 75 |     const currentPath = this.path
 76 |     const currentQueryParams = this.queryParams
 77 | 
 78 |     this.domain = ''
 79 |     this.path = ''
 80 |     this.queryParams = ''
 81 | 
 82 |     if (!currentDomain) {
 83 |       throw new Error('The link must contain a domain')
 84 |     }
 85 | 
 86 |     const link = currentDomain + currentPath + currentQueryParams
 87 | 
 88 |     if (returnUrl && trailingChar) {
 89 |       return new URL(ensureTrailingChar(link, trailingChar))
 90 |     }
 91 | 
 92 |     if (returnUrl && !trailingChar) {
 93 |       return new URL(link)
 94 |     }
 95 | 
 96 |     if (!returnUrl && trailingChar) {
 97 |       return ensureTrailingChar(link, trailingChar)
 98 |     }
 99 | 
100 |     return link
101 |   }
102 | }
103 | 
104 | const urlBuilder = new URLBuilder()
105 | 
106 | export default urlBuilder
107 | 


--------------------------------------------------------------------------------
/src/util/builders/url/visual-editor.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on MediaWiki visual editor URL
 5 |  */
 6 | export default class VisualEditorURLDirector {
 7 |   baseDomain: string
 8 | 
 9 |   constructor(baseDomain: string) {
10 |     this.baseDomain = baseDomain
11 |   }
12 | 
13 |   buildArticleURL(articleId: string) {
14 |     return urlBuilder.setDomain(this.baseDomain).setQueryParams({ page: articleId }, '&').build()
15 |   }
16 | 
17 |   get validMimeTypes() {
18 |     return ['application/json']
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/util/builders/url/web.director.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from './url.builder.js'
 2 | 
 3 | /**
 4 |  * Interface to build URLs based on MediaWiki Web URL
 5 |  */
 6 | export default class WebURLDirector {
 7 |   baseDomain: string
 8 | 
 9 |   constructor(baseDomain: string) {
10 |     this.baseDomain = baseDomain
11 |   }
12 | 
13 |   buildArticleRawURL(articleId: string) {
14 |     return urlBuilder
15 |       .setDomain(this.baseDomain)
16 |       .setQueryParams({ title: encodeURIComponent(articleId), action: 'raw' })
17 |       .build()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/util/const.ts:
--------------------------------------------------------------------------------
 1 | export const MAX_CPU_CORES = 8
 2 | export const IMAGE_THUMB_URL_REGEX = /^.*\/[0-9a-fA-F]{1}\/[0-9a-fA-F]{2}\/([^/]+\/)?(\d+px[-]+)?(.+?\.[A-Za-z0-9]{2,6}(\.[A-Za-z0-9]{2,6})?)$/
 3 | export const LATEX_IMAGE_URL_REGEX = /^.*\/math\/render\/svg\/([A-Za-z0-9]+)$/
 4 | export const WIKIHIERO_IMAGE_URL_REGEX = /^.*\/wikihiero\/img\/(.*\.png)(\?.*)?$/
 5 | export const FANDOM_IMAGE_URL_REGEX = /([^/]+)\/revision\//i
 6 | export const MIN_IMAGE_THRESHOLD_ARTICLELIST_PAGE = 10
 7 | export const CONCURRENCY_LIMIT = 10
 8 | export const IMAGE_MIME_REGEX = /^image+[/-\w.]+$/
 9 | export const FIND_HTTP_REGEX = /^(?:https?:\/\/)?/i
10 | export const DB_ERROR = 'internal_api_error_DBQueryError'
11 | export const DELETED_ARTICLE_ERROR = 'Article has been deleted.'
12 | export const WEAK_ETAG_REGEX = /^(W\/)/
13 | export const BITMAP_IMAGE_MIME_REGEX = /^image+[/-\w.]+(jpeg|png|gif)$/
14 | export const WEBP_CANDIDATE_IMAGE_MIME_TYPE = /image+[/]+(jpeg|png)/
15 | export const ALL_READY_FUNCTION = /function allReady\( modules \) {/
16 | export const DO_PROPAGATION = /mw\.requestIdleCallback\( doPropagation, \{ timeout: 1 \} \);/
17 | export const LOAD_PHP = /script.src = ".*load\.php.*";/
18 | export const RULE_TO_REDIRECT = /window\.top !== window\.self/
19 | export const MAX_FILE_DOWNLOAD_RETRIES = 5
20 | export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
21 | export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile', 'RestApi', 'ActionParse']
22 | export const WIKIMEDIA_REST_API_PATH = '/api/rest_v1/'
23 | 
24 | /*
25 |  Handle redirection pages for 3rd party wikis that have 200 response code
26 |  Check this link: https://pokemon.fandom.com/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&formatversion=2&page=MediaWiki%3ASidebar
27 | */
28 | export const REDIRECT_PAGE_SIGNATURE = 'Moved to'
29 | 


--------------------------------------------------------------------------------
/src/util/index.ts:
--------------------------------------------------------------------------------
1 | export * from './articleListMainPage.js'
2 | export * from './misc.js'
3 | export * from './dump.js'
4 | export * from './const.js'
5 | export * from './mw-api.js'
6 | export * from './metaData.js'
7 | 


--------------------------------------------------------------------------------
/src/util/metaData.ts:
--------------------------------------------------------------------------------
 1 | import AjvModule from 'ajv'
 2 | import type { KeywordCxt } from 'ajv'
 3 | import { byGrapheme } from 'split-by-grapheme'
 4 | 
 5 | const Ajv = AjvModule.default
 6 | const ajv = new Ajv({ allErrors: true })
 7 | 
 8 | ajv.addKeyword({
 9 |   keyword: 'checkRegexFromBuffer',
10 |   validate: (regexStr: string, buffer) => {
11 |     if (Buffer.isBuffer(buffer)) {
12 |       const regex = new RegExp(regexStr)
13 |       const binary = buffer.toString('binary')
14 |       return regex.test(binary)
15 |     }
16 |     return false
17 |   },
18 |   error: {
19 |     message: 'must match regex pattern',
20 |   },
21 | })
22 | 
23 | ajv.addKeyword({
24 |   keyword: 'uMaxLength',
25 |   type: 'string',
26 |   validate: (max_length: number, value) => {
27 |     return value.split(byGrapheme).length <= max_length
28 |   },
29 |   error: {
30 |     message: (cxt: KeywordCxt): string => {
31 |       return `must NOT have more than ${cxt.schemaValue} graphemes`
32 |     },
33 |   },
34 | })
35 | 
36 | const schema = {
37 |   type: 'object',
38 |   properties: {
39 |     Name: { type: 'string', minLength: 1 },
40 |     Creator: { type: 'string', minLength: 1 },
41 |     Description: { type: 'string', uMaxLength: 80, minLength: 1 },
42 |     Language: { type: 'string', minLength: 1, pattern: '^\\w{3}(,\\w{3})*$' },
43 |     Publisher: { type: 'string', minLength: 1 },
44 |     Title: { type: 'string', uMaxLength: 30, minLength: 1 },
45 |     Date: { type: 'string', maxLength: 10, minLength: 10 },
46 |     'Illustration_48x48@1': { checkRegexFromBuffer: '^\x89\x50\x4e\x47\x0d\x0a\x1a\x0a.+' },
47 |     LongDescription: { type: 'string', uMaxLength: 4000 },
48 |     License: { type: 'string' },
49 |     Tags: { type: 'string' },
50 |     Relation: { type: 'string' },
51 |     Flavour: { type: 'string' },
52 |     Source: { type: 'string' },
53 |     Counter: { type: 'string' },
54 |     Scraper: { type: 'string' },
55 |   },
56 |   required: ['Creator', 'Description', 'Language', 'Publisher', 'Title', 'Illustration_48x48@1'],
57 |   additionalProperties: true,
58 | }
59 | 
60 | const validate = ajv.compile(schema)
61 | 
62 | export const validateMetadata = (metaData): void => {
63 |   const valid = validate(metaData)
64 | 
65 |   if (!valid) {
66 |     const error = validate.errors[0]
67 |     const keyword = error.instancePath.substring(1)
68 | 
69 |     if (error.keyword === 'required') {
70 |       throw new Error(`Metadata "${error.params.missingProperty}" is required`)
71 |     }
72 |     if (error.keyword === 'minLength') {
73 |       throw new Error(`Metadata "${keyword}" is required`)
74 |     }
75 |     throw new Error(`MetaData ${keyword}: ${error.message}`)
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/util/url.helper.ts:
--------------------------------------------------------------------------------
 1 | class URLHelper {
 2 |   private readonly urlPartCache: KVS<string> = {}
 3 | 
 4 |   public serializeUrl(url: string): string {
 5 |     const urlObj = new URL(url)
 6 |     const path = urlObj.pathname + urlObj.search + urlObj.hash
 7 |     const cacheablePart = url.replace(path, '')
 8 |     const cacheEntry = Object.entries(this.urlPartCache).find(([, value]) => value === cacheablePart)
 9 |     let cacheKey
10 |     if (!cacheEntry) {
11 |       const cacheId = String(Object.keys(this.urlPartCache).length + 1)
12 |       this.urlPartCache[cacheId] = cacheablePart
13 |       cacheKey = `_${cacheId}_`
14 |     } else {
15 |       cacheKey = `_${cacheEntry[0]}_`
16 |     }
17 |     return `${cacheKey}${path}`
18 |   }
19 | 
20 |   public deserializeUrl(url: string): string {
21 |     if (!url.startsWith('_')) return url
22 |     const [, cacheId, ...pathParts] = url.split('_')
23 |     const path = pathParts.join('_')
24 |     const cachedPart = this.urlPartCache[cacheId]
25 |     return `${cachedPart}${path}`
26 |   }
27 | }
28 | 
29 | const urlHelper = new URLHelper()
30 | 
31 | export default urlHelper
32 | 


--------------------------------------------------------------------------------
/test/e2e/apiPathParamsSanitizing.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { testAllRenders } from '../testRenders.js'
 2 | import 'dotenv/config.js'
 3 | import { jest } from '@jest/globals'
 4 | import { rimraf } from 'rimraf'
 5 | import { sanitizeApiPathParam, sanitizeWikiPath } from '../../src/sanitize-argument.js'
 6 | import { zimcheck } from '../util.js'
 7 | 
 8 | jest.setTimeout(60000)
 9 | 
10 | const parameters = {
11 |   mwUrl: 'https://en.wikipedia.org',
12 |   articleList: 'BMW',
13 |   adminEmail: 'test@kiwix.org',
14 |   mwActionApiPath: sanitizeApiPathParam('/w/api.php'),
15 |   mwRestApiPath: sanitizeApiPathParam('/w/rest.php'),
16 |   mwModulePath: sanitizeApiPathParam('/w/load.php'),
17 |   mwWikiPath: sanitizeWikiPath('/wiki/'),
18 |   mwIndexPhpPath: sanitizeApiPathParam('/w/index.php'),
19 | }
20 | 
21 | await testAllRenders('api-path-params', parameters, async (outFiles) => {
22 |   describe(`e2e test for api url params for en.wikipedia.org for ${outFiles[0]?.renderer} renderer`, () => {
23 |     test('Mediawiki actionApiPath option sanitized', () => {
24 |       expect(outFiles[0].mwMetaData.actionApiPath).toBe('/w/api.php')
25 |     })
26 | 
27 |     test('Mediawiki restApiPath option sanitized', () => {
28 |       expect(outFiles[0].mwMetaData.restApiPath).toBe('/w/rest.php')
29 |     })
30 | 
31 |     test('Mediawiki wikiPath option sanitized', () => {
32 |       expect(outFiles[0].mwMetaData.wikiPath).toBe('/wiki/')
33 |     })
34 | 
35 |     test('Mediawiki wikiPath option sanitized', () => {
36 |       expect(outFiles[0].mwMetaData.indexPhpPath).toBe('/w/index.php')
37 |     })
38 | 
39 |     test('Mediawiki modulePathOpt option sanitized', () => {
40 |       expect(outFiles[0].mwMetaData.modulePathOpt).toBe('/w/load.php')
41 |     })
42 | 
43 |     test('Mediawiki modulePath and actionApiUrl options', () => {
44 |       expect(outFiles[0].mwMetaData.modulePath).toBe('https://en.wikipedia.org/w/load.php?')
45 |       expect(outFiles[0].mwMetaData.actionApiUrl).toBe('https://en.wikipedia.org/w/api.php')
46 |     })
47 | 
48 |     test(`test ZIM integrity for ${outFiles[0]?.renderer} renderer`, async () => {
49 |       await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
50 |     })
51 | 
52 |     afterAll(() => {
53 |       if (!process.env.KEEP_ZIMS) {
54 |         rimraf.sync(`./${outFiles[0].testId}`)
55 |       }
56 |     })
57 |   })
58 | })
59 | 


--------------------------------------------------------------------------------
/test/e2e/articleLists.test.ts:
--------------------------------------------------------------------------------
 1 | import { execa } from 'execa'
 2 | import { rimraf } from 'rimraf'
 3 | import 'dotenv/config'
 4 | import { jest } from '@jest/globals'
 5 | import { testAllRenders } from '../testRenders.js'
 6 | import { zimcheckAvailable, zimcheck } from '../util.js'
 7 | 
 8 | jest.setTimeout(10000)
 9 | 
10 | const articleList = 'Kiwix,Wikipedia,Internet,Real-time computer graphics'
11 | const articleListToIgnore = 'Wikipedia, Internet'
12 | 
13 | const parameters = {
14 |   mwUrl: 'https://en.wikipedia.org',
15 |   adminEmail: 'test@kiwix.org',
16 |   articleList,
17 |   articleListToIgnore,
18 |   redis: process.env.REDIS,
19 |   format: ['nopic'],
20 | }
21 | 
22 | await testAllRenders('article-lists', parameters, async (outFiles) => {
23 |   describe('articleList', () => {
24 |     const listMinusIgnore = 2
25 | 
26 |     test(`articleList and articleListIgnore check using ${outFiles[0].renderer} renderer`, async () => {
27 |       await execa('redis-cli flushall', { shell: true })
28 | 
29 |       // Created 1 output
30 |       expect(outFiles).toHaveLength(1)
31 | 
32 |       for (const dump of outFiles) {
33 |         if (dump.nopic) {
34 |           // Output has right amount of articles
35 |           expect(dump.status.articles.success).toEqual(listMinusIgnore)
36 |           // Output has no failed article
37 |           expect(dump.status.articles.hardFail).toEqual(0)
38 |           expect(dump.status.articles.softFail).toEqual(0)
39 |         }
40 |       }
41 | 
42 |       // Scraped selected articles from wikipedia en');
43 |       if (await zimcheckAvailable()) {
44 |         await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
45 |       } else {
46 |         console.log('Zimcheck not installed, skipping test')
47 |       }
48 | 
49 |       const redisScan = await execa('redis-cli --scan', { shell: true })
50 |       // Redis has been cleared
51 |       expect(redisScan.stdout).toEqual('')
52 |     })
53 |     afterAll(() => {
54 |       if (!process.env.KEEP_ZIMS) {
55 |         rimraf.sync(`./${outFiles[0].testId}`)
56 |       }
57 |     })
58 |   })
59 | })
60 | 


--------------------------------------------------------------------------------
/test/e2e/bm.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { zimdump, zimcheck } from '../util.js'
 2 | import { testAllRenders } from '../testRenders.js'
 3 | import { execa } from 'execa'
 4 | import { jest } from '@jest/globals'
 5 | import { rimraf } from 'rimraf'
 6 | import 'dotenv/config.js'
 7 | 
 8 | jest.setTimeout(200000)
 9 | 
10 | const parameters = {
11 |   mwUrl: 'https://bm.wikipedia.org',
12 |   adminEmail: 'test@kiwix.org',
13 |   redis: process.env.REDIS,
14 |   format: ['nopic'],
15 | }
16 | 
17 | await testAllRenders('bm-wikipedia', parameters, async (outFiles) => {
18 |   test(`test ZIM integrity for ${outFiles[0]?.renderer} renderer`, async () => {
19 |     await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
20 |   })
21 | 
22 |   test(`Simple articleList for ${outFiles[0]?.renderer} renderer for bm.wikipedia.org`, async () => {
23 |     await execa('redis-cli flushall', { shell: true })
24 |     // Created 1 output
25 |     expect(outFiles).toHaveLength(1)
26 | 
27 |     for (const dump of outFiles) {
28 |       if (dump.nopic) {
29 |         // nopic has enough files (this is just an estimate and can change
30 |         // with time, as new Mediwiki versions are released).
31 |         expect(dump.status.files.success).toBeGreaterThanOrEqual(outFiles[0].renderer == 'WikimediaMobile' ? 1 : 3)
32 |         expect(dump.status.files.success).toBeLessThan(14)
33 |         // nopic has enough redirects
34 |         expect(dump.status.redirects.written).toBeGreaterThan(170)
35 |         // nopic has enough articles
36 |         expect(dump.status.articles.success).toBeGreaterThan(700)
37 |       }
38 |     }
39 |   })
40 | 
41 |   afterAll(() => {
42 |     if (!process.env.KEEP_ZIMS) {
43 |       rimraf.sync(`./${outFiles[0].testId}`)
44 |     }
45 |   })
46 | })
47 | 
48 | await testAllRenders('bm-wikipedia-with-ns-1', { ...parameters, addNamespaces: 1 }, async (outFiles) => {
49 |   test(`Articles with "Discussion" namespace for ${outFiles[0]?.renderer} renderer for bm.wikipedia.org`, async () => {
50 |     await execa('redis-cli flushall', { shell: true })
51 | 
52 |     // Created 1 output
53 |     expect(outFiles).toHaveLength(1)
54 |     const discussionArticlesStr = await zimdump(`list ${outFiles[0].outFile}`)
55 |     const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g)
56 |     expect(discussionArticlesList.length).toBeGreaterThan(30)
57 |   })
58 |   afterAll(() => {
59 |     if (!process.env.KEEP_ZIMS) {
60 |       rimraf.sync(`./${outFiles[0].testId}`)
61 |     }
62 |   })
63 | })
64 | 


--------------------------------------------------------------------------------
/test/e2e/cmd.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | // eslint-disable-next-line @typescript-eslint/triple-slash-reference
 2 | /// <reference path="../../src/types.d.ts" />
 3 | 
 4 | import packageJSON from '../../package.json'
 5 | import { execa } from 'execa'
 6 | 
 7 | const mwo = 'node lib/cli.js'
 8 | 
 9 | describe('Exec Command With Bash', () => {
10 |   test('Exec Command With --version option', async () => {
11 |     const version = await execa(`${mwo} --version`, { shell: true })
12 |     expect(version.stdout.trim()).toEqual(packageJSON.version)
13 |   })
14 | 
15 |   test('Exec Command With --help option', async () => {
16 |     const help = await execa(`${mwo} --help`, { shell: true })
17 |     expect(help.stdout.trim().split('\n').length).toBeGreaterThan(55)
18 |   })
19 | 
20 |   describe('Sanitizing', () => {
21 |     test('Exec Command With --articlesList and --addNamespaces together', async () => {
22 |       await expect(execa(`${mwo} --adminEmail=test@test.test --articleList=Portal:Biology --mwUrl=https://en.wikipedia.org/ --addNamespaces=100`, { shell: true })).rejects.toThrow(
23 |         /options --articlesList and --addNamespaces cannot be used together/,
24 |       )
25 |     })
26 | 
27 |     test('Exec Command With --verbose option', async () => {
28 |       await expect(execa(`${mwo} --verbose=anyString --mwUrl="https://en.wikipedia.org" --adminEmail="test@test.test"`, { shell: true })).rejects.toThrow(
29 |         /"anyString" is not a valid value for option verbose. It should be empty or one of \[info, log, warn, error, quiet\]/,
30 |       )
31 |     })
32 |   })
33 | })
34 | 


--------------------------------------------------------------------------------
/test/e2e/downloadImage.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { execa } from 'execa'
 2 | import { rimraf } from 'rimraf'
 3 | import { testAllRenders } from '../testRenders.js'
 4 | import 'dotenv/config.js'
 5 | import { jest } from '@jest/globals'
 6 | import { zimcheck } from '../util.js'
 7 | 
 8 | jest.setTimeout(200000)
 9 | 
10 | const describeIf = process.env.S3_URL ? describe : describe.skip
11 | 
12 | const parameters = {
13 |   mwUrl: 'https://fr.wikipedia.org',
14 |   adminEmail: 'test@kiwix.org',
15 |   redis: process.env.REDIS,
16 |   articleList: 'Paris',
17 |   format: ['nodet'],
18 |   optimisationCacheUrl: process.env.S3_URL,
19 | }
20 | 
21 | await testAllRenders('download-image', parameters, async (outFiles) => {
22 |   describeIf('Check image downloading from S3 using optimisationCacheUrl parameter', () => {
23 |     test(`right scrapping from fr.wikipedia.org with optimisationCacheUrl parameter for ${outFiles[0]?.renderer} renderer`, async () => {
24 |       await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
25 |       await execa('redis-cli flushall', { shell: true })
26 | 
27 |       const redisScan = await execa('redis-cli --scan', { shell: true })
28 |       expect(redisScan.stdout).toEqual('')
29 |     })
30 |     afterAll(() => {
31 |       if (!process.env.KEEP_ZIMS) {
32 |         rimraf.sync(`./${outFiles[0].testId}`)
33 |       }
34 |     })
35 |   })
36 | })
37 | 


--------------------------------------------------------------------------------
/test/e2e/en.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { testAllRenders } from '../testRenders.js'
 2 | import domino from 'domino'
 3 | import { zimdump, zimcheck } from '../util.js'
 4 | import 'dotenv/config.js'
 5 | import { jest } from '@jest/globals'
 6 | import { rimraf } from 'rimraf'
 7 | 
 8 | jest.setTimeout(60000)
 9 | 
10 | // Check the integrity of img elements between ZIM file and article html taken from it
11 | const verifyImgElements = (imgFilesArr, imgElements) => {
12 |   for (const img of imgElements) {
13 |     for (const imgFile of imgFilesArr) {
14 |       if (img.getAttribute('src').includes(imgFile)) {
15 |         return true
16 |       }
17 |     }
18 |   }
19 |   return false
20 | }
21 | 
22 | const parameters = {
23 |   mwUrl: 'https://en.wikipedia.org',
24 |   articleList: 'Providence/Stoughton Line', // use article with a slash in its name to check relative links are properly handled
25 |   adminEmail: 'test@kiwix.org',
26 | }
27 | 
28 | await testAllRenders('en-wikipedia', parameters, async (outFiles) => {
29 |   const articleFromDump = await zimdump(`show --url ${parameters.articleList.replace(' ', '_')} ${outFiles[0].outFile}`)
30 |   describe('e2e test for en.wikipedia.org', () => {
31 |     const articleDoc = domino.createDocument(articleFromDump)
32 | 
33 |     test(`test ZIM integrity for ${outFiles[0]?.renderer} renderer`, async () => {
34 |       await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
35 |     })
36 | 
37 |     test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => {
38 |       expect(articleDoc.querySelector('h1.firstHeading > span#openzim-page-title, h1.article-header, h1.pcs-edit-section-title')).toBeTruthy()
39 |     })
40 |     test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => {
41 |       const allFiles = await zimdump(`list ${outFiles[0].outFile}`)
42 |       const allFilesArr = allFiles.split('\n')
43 |       const imgFilesArr = allFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg'))
44 |       const imgElements = Array.from(articleDoc.querySelectorAll('img'))
45 |       expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true)
46 |     })
47 | 
48 |     afterAll(() => {
49 |       if (!process.env.KEEP_ZIMS) {
50 |         rimraf.sync(`./${outFiles[0].testId}`)
51 |       }
52 |     })
53 |   })
54 | })
55 | 


--------------------------------------------------------------------------------
/test/e2e/en10.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { rimraf } from 'rimraf'
 2 | import { testAllRenders } from '../testRenders.js'
 3 | import { jest } from '@jest/globals'
 4 | import 'dotenv/config.js'
 5 | 
 6 | jest.setTimeout(200000)
 7 | 
 8 | const parameters = {
 9 |   mwUrl: 'https://en.wikipedia.org',
10 |   adminEmail: 'test@kiwix.org',
11 |   articleList: 'https://api.wp1.openzim.org/v1/builders/28a933d2-1903-417d-939e-3b6e8ee7a126/selection/latest.tsv',
12 |   redis: process.env.REDIS,
13 |   // format: ['nopic', 'novid', 'nopdf', 'nodet'],
14 |   format: ['nopic', 'nopdf'],
15 | }
16 | 
17 | await testAllRenders('en10-wikipedia', parameters, async (outFiles) => {
18 |   describe('en10', () => {
19 |     test(`Simple articleList for ${outFiles[0]?.renderer} renderer`, async () => {
20 |       // Created 2 outputs
21 |       expect(outFiles).toHaveLength(2)
22 | 
23 |       for (const dump of outFiles) {
24 |         if (dump.nopic) {
25 |           // nopic has enough files (this is just an estimate and can change
26 |           // with time, as new Mediwiki versions are released).
27 |           expect(dump.status.files.success).toBeGreaterThanOrEqual(outFiles[0].renderer == 'WikimediaMobile' ? 42 : 47)
28 |           expect(dump.status.files.success).toBeLessThan(59)
29 |           // nopic has enough redirects
30 |           expect(dump.status.redirects.written).toBeGreaterThan(500)
31 |           // nopic has 10 articles
32 |           expect(dump.status.articles.success).toEqual(10)
33 |           // No article and files error
34 |           expect(dump.status.articles.hardFail).toEqual(0)
35 |           expect(dump.status.articles.softFail).toEqual(0)
36 |           expect(dump.status.files.fail).toEqual(0)
37 |         } else if (dump.novid) {
38 |           // novid has enough files
39 |           expect(dump.status.files.success).toBeGreaterThan(314)
40 |           // novid has enough redirects
41 |           expect(dump.status.redirects.written).toBeGreaterThan(314)
42 |           // novid has 10 articles
43 |           expect(dump.status.articles.success).toEqual(10)
44 |         } else if (dump.nopdf) {
45 |           // nopdf has enough files
46 |           expect(dump.status.files.success).toBeGreaterThan(340)
47 |           // nopdf has enough redirects
48 |           expect(dump.status.redirects.written).toBeGreaterThan(314)
49 |           // nopdf has 10 articles
50 |           expect(dump.status.articles.success).toEqual(10)
51 |         } else if (dump.nodet) {
52 |           // nodet has enough files
53 |           expect(dump.status.files.success).toBeGreaterThan(50)
54 |           // nodet has enough redirects
55 |           expect(dump.status.redirects.written).toBeGreaterThan(314)
56 |           // nodet has 10 articles
57 |           expect(dump.status.articles.success).toEqual(10)
58 |         }
59 | 
60 |         // No download error
61 |         expect(dump.status.articles.hardFail).toEqual(0)
62 |         expect(dump.status.articles.softFail).toEqual(0)
63 |         expect(dump.status.files.fail).toEqual(0)
64 |       }
65 |     })
66 | 
67 |     afterAll(() => {
68 |       if (!process.env.KEEP_ZIMS) {
69 |         rimraf.sync(`./${outFiles[0].testId}`)
70 |       }
71 |     })
72 |   })
73 | })
74 | 


--------------------------------------------------------------------------------
/test/e2e/extra.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import * as mwoffliner from '../../src/mwoffliner.lib.js'
 2 | import { rimraf } from 'rimraf'
 3 | import { writeFilePromise, mkdirPromise } from '../../src/util/index.js'
 4 | import { join } from 'path'
 5 | import { execa } from 'execa'
 6 | import 'dotenv/config.js'
 7 | import { jest } from '@jest/globals'
 8 | import { RENDERERS_LIST } from '../../src/util/const.js'
 9 | import { zimcheckAvailable, zimcheck } from '../util.js'
10 | 
11 | jest.setTimeout(20000)
12 | 
13 | describe('Extra', () => {
14 |   for (const renderer of RENDERERS_LIST) {
15 |     const now = new Date()
16 |     const testId = join(process.cwd(), `mwo-test-${+now}`)
17 | 
18 |     const articleListUrl = join(testId, '/articleList')
19 | 
20 |     test(`Simple customMainPage for ${renderer} renderer`, async () => {
21 |       await mkdirPromise(testId)
22 | 
23 |       const articleListLines = `
24 |   1%_(South_Park)
25 |   İznik
26 |   Egyptian_hieroglyphs
27 |   Wikipedia:Books/archive/Cancer care
28 |   AC/DC`
29 | 
30 |       await writeFilePromise(articleListUrl, articleListLines, 'utf8')
31 | 
32 |       const outFiles = await mwoffliner.execute({
33 |         mwUrl: 'https://en.wikipedia.org',
34 |         adminEmail: 'test@kiwix.org',
35 |         articleList: articleListUrl,
36 |         customMainPage: 'Wikipedia:WikiProject_Medicine/Open_Textbook_of_Medicine2',
37 |         outputDirectory: testId,
38 |         redis: process.env.REDIS,
39 |         format: ['nopic'],
40 |         forceRender: 'WikimediaDesktop',
41 |       })
42 | 
43 |       // Created 1 outputs
44 |       expect(outFiles).toHaveLength(1)
45 | 
46 |       for (const dump of outFiles) {
47 |         if (dump.nopic) {
48 |           const articleCount = articleListLines.split(/\r\n|\r|\n/).length
49 |           expect(dump.status.articles.success).toEqual(articleCount)
50 |         }
51 | 
52 |         if (await zimcheckAvailable()) {
53 |           await expect(zimcheck(dump.outFile)).resolves.not.toThrowError()
54 |         } else {
55 |           console.log('Zimcheck not installed, skipping test')
56 |         }
57 |       }
58 | 
59 |       await execa('redis-cli flushall', { shell: true })
60 | 
61 |       // Scraped customMainPage
62 |       // TODO: clear test dir
63 |       rimraf.sync(testId)
64 | 
65 |       const redisScan = await execa('redis-cli --scan', { shell: true })
66 |       // Redis has been cleared
67 |       expect(redisScan.stdout).toEqual('')
68 |     })
69 |   }
70 | })
71 | 


--------------------------------------------------------------------------------
/test/e2e/forceRender.test.ts:
--------------------------------------------------------------------------------
 1 | import * as mwoffliner from '../../src/mwoffliner.lib.js'
 2 | import { execa } from 'execa'
 3 | import { rimraf } from 'rimraf'
 4 | import { jest } from '@jest/globals'
 5 | import { zimcheckAvailable, zimcheck } from '../util.js'
 6 | 
 7 | jest.setTimeout(200000)
 8 | 
 9 | describe('forceRender', () => {
10 |   const now = new Date()
11 |   const testId = `mwo-test-${+now}`
12 | 
13 |   const parameters = {
14 |     mwUrl: 'https://bm.wikipedia.org',
15 |     adminEmail: 'test@kiwix.org',
16 |     outputDirectory: testId,
17 |     redis: process.env.REDIS,
18 |     format: ['nopic'],
19 |     articleList: 'France',
20 |     mwActionApiPath: '/w/api.php',
21 |   }
22 | 
23 |   afterAll(async () => {
24 |     await execa('redis-cli flushall', { shell: true })
25 |     rimraf.sync(`./${testId}`)
26 |     const redisScan = await execa('redis-cli --scan', { shell: true })
27 |     // Redis has been cleared
28 |     expect(redisScan.stdout).toEqual('')
29 |   })
30 | 
31 |   test('Scrape article from bm.wikipedia.org using WikimediaDesktop render', async () => {
32 |     const forceRender = 'WikimediaDesktop'
33 |     const outFiles = await mwoffliner.execute({ ...parameters, forceRender })
34 | 
35 |     if (await zimcheckAvailable()) {
36 |       await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
37 |     } else {
38 |       console.log('Zimcheck not installed, skipping test')
39 |     }
40 |   })
41 | })
42 | 


--------------------------------------------------------------------------------
/test/e2e/formatParams.test.ts:
--------------------------------------------------------------------------------
  1 | import 'dotenv/config.js'
  2 | import domino from 'domino'
  3 | import { testAllRenders } from '../testRenders.js'
  4 | import { jest } from '@jest/globals'
  5 | import { zimdump } from '../util.js'
  6 | import { rimraf } from 'rimraf'
  7 | import { execa } from 'execa'
  8 | 
  9 | jest.setTimeout(200000)
 10 | 
 11 | const parameters = {
 12 |   mwUrl: 'https://en.wikipedia.org',
 13 |   adminEmail: 'mail@mail.com',
 14 |   redis: process.env.REDIS,
 15 | }
 16 | 
 17 | await testAllRenders('format-params-nopic', { ...parameters, format: 'nopic', articleList: 'BMW' }, async (outFiles) => {
 18 |   describe('format:nopic', () => {
 19 |     test(`Test en.wikipedia.org using format:nopic for ${outFiles[0]?.renderer} renderer`, async () => {
 20 |       await execa('redis-cli flushall', { shell: true })
 21 |       const articleFromDump = await zimdump(`show --url BMW ${outFiles[0].outFile}`)
 22 |       const articleDoc = domino.createDocument(articleFromDump)
 23 | 
 24 |       const imgElements = Array.from(articleDoc.querySelectorAll('img'))
 25 | 
 26 |       expect(imgElements).toHaveLength(0)
 27 |       if (!process.env.KEEP_ZIMS) {
 28 |         rimraf.sync(`./${outFiles[0].testId}`)
 29 |       }
 30 |     })
 31 |   })
 32 | })
 33 | 
 34 | await testAllRenders('format-params-nodet', { ...parameters, format: 'nodet', articleList: 'BMW' }, async (outFiles) => {
 35 |   describe('format:nodet', () => {
 36 |     test(`Test en.wikipedia.org using format:nodet for ${outFiles[0]?.renderer} renderer`, async () => {
 37 |       await execa('redis-cli flushall', { shell: true })
 38 |       const articleFromDump = await zimdump(`show --url BMW ${outFiles[0].outFile}`)
 39 |       const articleDoc = domino.createDocument(articleFromDump)
 40 | 
 41 |       const sectionsElements = Array.from(articleDoc.querySelectorAll('section'))
 42 | 
 43 |       expect(sectionsElements).toHaveLength(1)
 44 |       expect(sectionsElements[0].getAttribute('data-mw-section-id')).toEqual('0')
 45 |       if (!process.env.KEEP_ZIMS) {
 46 |         rimraf.sync(`./${outFiles[0].testId}`)
 47 |       }
 48 |     })
 49 |   })
 50 | })
 51 | 
 52 | await testAllRenders('format-params-novid-1', { ...parameters, format: 'novid', articleList: 'Animation' }, async (outFiles) => {
 53 |   describe('format:novid to check no video tags', () => {
 54 |     test(`Test en.wikipedia.org using format:novid for ${outFiles[0]?.renderer} renderer (no video)`, async () => {
 55 |       await execa('redis-cli flushall', { shell: true })
 56 |       const articleFromDump = await zimdump(`show --url Animation ${outFiles[0].outFile}`)
 57 |       const articleDoc = domino.createDocument(articleFromDump)
 58 | 
 59 |       const audioElements = Array.from(articleDoc.querySelectorAll('audio'))
 60 | 
 61 |       expect(audioElements).toHaveLength(0)
 62 |       if (!process.env.KEEP_ZIMS) {
 63 |         rimraf.sync(`./${outFiles[0].testId}`)
 64 |       }
 65 |     })
 66 |   })
 67 | })
 68 | 
 69 | await testAllRenders('format-params-novid-2', { ...parameters, format: 'novid', articleList: 'English_alphabet' }, async (outFiles) => {
 70 |   describe('format:novid to check no audio tags', () => {
 71 |     test(`Test en.wikipedia.org using format:novid for ${outFiles[0]?.renderer} renderer (no audio)`, async () => {
 72 |       await execa('redis-cli flushall', { shell: true })
 73 |       const articleFromDump = await zimdump(`show --url English_alphabet ${outFiles[0].outFile}`)
 74 |       const articleDoc = domino.createDocument(articleFromDump)
 75 | 
 76 |       const videoElements = Array.from(articleDoc.querySelectorAll('video'))
 77 | 
 78 |       expect(videoElements).toHaveLength(0)
 79 |       if (!process.env.KEEP_ZIMS) {
 80 |         rimraf.sync(`./${outFiles[0].testId}`)
 81 |       }
 82 |     })
 83 |   })
 84 | })
 85 | 
 86 | // TODO: blocked by issues/1928
 87 | /*
 88 | await testRenders({ ...parameters, format: 'nopdf', articleList: 'PDF' }, async (outFiles) => {
 89 |   describe('format:pdf to check no internal links pdf files', () => {
 90 |     test(`Test en.wikipedia.org using format:nopdf for ${outFiles[0]?.renderer} renderer`, async () => {
 91 |       await execa('redis-cli flushall', { shell: true })
 92 |       const articleFromDump = await zimdump(`show --url PDF ${outFiles[0].outFile}`)
 93 |       const articleDoc = domino.createDocument(articleFromDump)
 94 |       // eslint-disable-next-line @typescript-eslint/no-unused-vars
 95 |       const anchorElements = Array.from(articleDoc.querySelectorAll('a'))
 96 |       if (!process.env.KEEP_ZIMS) {
 97 |         rimraf.sync(`./${outFiles[0].testId}`)
 98 |       }
 99 |     })
100 |   })
101 | })
102 | */
103 | 


--------------------------------------------------------------------------------
/test/e2e/openstreetmap.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { testRenders } from '../testRenders.js'
 2 | import domino from 'domino'
 3 | import { zimdump, zimcheck } from '../util.js'
 4 | import 'dotenv/config.js'
 5 | import { jest } from '@jest/globals'
 6 | import { rimraf } from 'rimraf'
 7 | 
 8 | jest.setTimeout(60000)
 9 | 
10 | // Check the integrity of img elements between ZIM file and article html taken from it
11 | const verifyImgElements = (imgFilesArr, imgElements) => {
12 |   for (const img of imgElements) {
13 |     for (const imgFile of imgFilesArr) {
14 |       if (img.getAttribute('src').includes(imgFile)) {
15 |         return true
16 |       }
17 |     }
18 |   }
19 |   return false
20 | }
21 | 
22 | const parameters = {
23 |   mwUrl: 'https://wiki.openstreetmap.org',
24 |   articleList: 'London',
25 |   adminEmail: 'test@kiwix.org',
26 | }
27 | 
28 | await testRenders(
29 |   'openstreetmap',
30 |   parameters,
31 |   async (outFiles) => {
32 |     const articleFromDump = await zimdump(`show --url ${parameters.articleList} ${outFiles[0].outFile}`)
33 |     describe('e2e test for wiki.openstreetmap.org', () => {
34 |       const articleDoc = domino.createDocument(articleFromDump)
35 | 
36 |       test(`test ZIM integrity for ${outFiles[0]?.renderer} renderer`, async () => {
37 |         await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
38 |       })
39 | 
40 |       test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => {
41 |         expect(articleDoc.querySelector('h1.firstHeading > span#openzim-page-title, h1.article-header, h1.pcs-edit-section-title')).toBeTruthy()
42 |       })
43 |       test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => {
44 |         const allFiles = await zimdump(`list ${outFiles[0].outFile}`)
45 |         const allFilesArr = allFiles.split('\n')
46 |         const imgFilesArr = allFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg'))
47 |         const imgElements = Array.from(articleDoc.querySelectorAll('img'))
48 |         expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true)
49 |       })
50 | 
51 |       afterAll(() => {
52 |         if (!process.env.KEEP_ZIMS) {
53 |           rimraf.sync(`./${outFiles[0].testId}`)
54 |         }
55 |       })
56 |     })
57 |   },
58 |   ['RestApi', 'VisualEditor'],
59 | )
60 | 


--------------------------------------------------------------------------------
/test/e2e/treatMedia.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { execa } from 'execa'
 2 | import { rimraf } from 'rimraf'
 3 | import { testAllRenders } from '../testRenders.js'
 4 | import { zimdump } from '../util.js'
 5 | import 'dotenv/config'
 6 | import { jest } from '@jest/globals'
 7 | 
 8 | jest.setTimeout(20000)
 9 | 
10 | const parameters = {
11 |   mwUrl: 'https://en.wikipedia.org',
12 |   adminEmail: 'test@kiwix.org',
13 |   articleList: 'Read_my_lips:_no_new_taxes',
14 |   redis: process.env.REDIS,
15 | }
16 | 
17 | await testAllRenders('treat-media', parameters, async (outFiles) => {
18 |   test('media file from hidden element should not be downloaded', async () => {
19 |     await execa('redis-cli flushall', { shell: true })
20 | 
21 |     // Created 1 output
22 |     expect(outFiles).toHaveLength(1)
23 |     await expect(zimdump(`list --url "George_Bush_1988_No_New_Taxes.ogg" ${outFiles[0].outFile}`)).rejects.toThrow('Entry not found')
24 |   })
25 | 
26 |   afterAll(() => {
27 |     if (!process.env.KEEP_ZIMS) {
28 |       rimraf.sync(`./${outFiles[0].testId}`)
29 |     }
30 |   })
31 | })
32 | 


--------------------------------------------------------------------------------
/test/e2e/vikidia.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | test('skipped vikidia test', () => {
 2 |   return
 3 | })
 4 | 
 5 | /*
 6 | // https://github.com/openzim/mwoffliner/issues/2039
 7 | import { execa } from 'execa'
 8 | import { rimraf } from 'rimraf'
 9 | import { testRenders } from '../testRenders.js'
10 | import 'dotenv/config.js'
11 | import { jest } from '@jest/globals'
12 | import { zimcheck } from '../util.js'
13 | 
14 | jest.setTimeout(200000)
15 | 
16 | const parameters = {
17 |   mwUrl: 'https://en.vikidia.org',
18 |   adminEmail: 'test@kiwix.org',
19 |   redis: process.env.REDIS,
20 |   articleList: 'Alaska',
21 |   customZimDescription: 'Alaska article',
22 | }
23 | 
24 | await testRenders(
25 |   parameters,
26 |   async (outFiles) => {
27 |     test(`right scrapping from vikidia.org for ${outFiles[0]?.renderer} renderer`, async () => {
28 |       await execa('redis-cli flushall', { shell: true })
29 |       expect(outFiles).toHaveLength(1)
30 |     })
31 | 
32 |     test(`test ZIM integrity for ${outFiles[0]?.renderer} renderer`, async () => {
33 |       await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
34 |     })
35 | 
36 |     afterAll(() => {
37 |       if (!process.env.KEEP_ZIMS) {
38 |         rimraf.sync(`./${outFiles[0].testId}`)
39 |       }
40 |     })
41 |   },
42 |   // en.vikidia.org supports only VisualEditor among other renders
43 |   ['VisualEditor'],
44 | )
45 | */
46 | 


--------------------------------------------------------------------------------
/test/e2e/wikisource.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { execa } from 'execa'
 2 | import { rimraf } from 'rimraf'
 3 | import { testRenders } from '../testRenders.js'
 4 | import 'dotenv/config.js'
 5 | import { jest } from '@jest/globals'
 6 | 
 7 | jest.setTimeout(20000)
 8 | 
 9 | const parameters = {
10 |   mwUrl: 'https://fo.wikisource.org',
11 |   adminEmail: 'test@kiwix.org',
12 |   redis: process.env.REDIS,
13 |   format: ['nopic'],
14 |   noLocalParserFallback: true,
15 | }
16 | 
17 | await testRenders(
18 |   'wikisource',
19 |   parameters,
20 |   async (outFiles) => {
21 |     describe('wikisource', () => {
22 |       switch (outFiles[0].renderer) {
23 |         case 'WikimediaDesktop':
24 |           test(`Wikisource List for ${outFiles[0]?.renderer} renderer`, async () => {
25 |             await execa('redis-cli flushall', { shell: true })
26 | 
27 |             expect(outFiles).toHaveLength(1)
28 | 
29 |             for (const dump of outFiles) {
30 |               if (dump.nopic) {
31 |                 console.log(dump.status.files.fail)
32 |                 // nopic has enough files
33 |                 expect(dump.status.files.success).toBeGreaterThanOrEqual(2)
34 |                 // nopic has enough redirects
35 |                 expect(dump.status.redirects.written).toBeGreaterThanOrEqual(16)
36 |                 // nopic has enough articles
37 |                 expect(dump.status.articles.success).toBeGreaterThanOrEqual(61)
38 |               }
39 |             }
40 |           })
41 | 
42 |           afterAll(() => {
43 |             if (!process.env.KEEP_ZIMS) {
44 |               rimraf.sync(`./${outFiles[0].testId}`)
45 |             }
46 |           })
47 |           break
48 |         case 'VisualEditor':
49 |           test(`Wikisource List for ${outFiles[0]?.renderer} renderer`, async () => {
50 |             await execa('redis-cli flushall', { shell: true })
51 | 
52 |             expect(outFiles).toHaveLength(1)
53 | 
54 |             for (const dump of outFiles) {
55 |               if (dump.nopic) {
56 |                 // nopic has enough files
57 |                 expect(dump.status.files.success).toBeGreaterThanOrEqual(2)
58 |                 // nopic has enough redirects
59 |                 expect(dump.status.redirects.written).toBeGreaterThanOrEqual(16)
60 |                 // nopic has enough articles
61 |                 expect(dump.status.articles.success).toBeGreaterThanOrEqual(61)
62 |               }
63 |             }
64 |           })
65 |           if (!process.env.KEEP_ZIMS) {
66 |             rimraf.sync(`./${outFiles[0].testId}`)
67 |           }
68 |           break
69 |       }
70 |     })
71 |   },
72 |   ['WikimediaDesktop', 'VisualEditor', 'RestApi'],
73 | )
74 | 


--------------------------------------------------------------------------------
/test/e2e/zimMetadata.e2e.test.ts:
--------------------------------------------------------------------------------
 1 | import { rimraf } from 'rimraf'
 2 | import { execa } from 'execa'
 3 | import { testAllRenders } from '../testRenders.js'
 4 | import { zimdump } from '../util.js'
 5 | import 'dotenv/config'
 6 | import { jest } from '@jest/globals'
 7 | 
 8 | jest.setTimeout(20000)
 9 | 
10 | const parameters = {
11 |   mwUrl: 'https://en.wikipedia.org',
12 |   adminEmail: 'test@kiwix.org',
13 |   articleList: 'Kiwix',
14 |   redis: process.env.REDIS,
15 |   format: ['nopic'],
16 |   customZimDescription: 'Example of the description',
17 |   customZimLongDescription: 'Example of the long description',
18 |   customZimTitle: 'Example of the title',
19 |   publisher: 'Example of the publisher',
20 | }
21 | 
22 | await testAllRenders('zim-metadata', parameters, async (outFiles) => {
23 |   describe('zimMetadata', () => {
24 |     test(`check all ZIM metadata using zimdump for ${outFiles[0]?.renderer} renderer`, async () => {
25 |       await execa('redis-cli flushall', { shell: true })
26 | 
27 |       expect(outFiles).toHaveLength(1)
28 | 
29 |       await Promise.all(
30 |         [
31 |           { option: 'Tags', output: 'wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:yes;_ftindex:yes' },
32 |           { option: 'Language', output: 'eng' },
33 |           { option: 'Title', output: parameters.customZimTitle },
34 |           { option: 'Name', output: 'wikipedia_en_kiwix' },
35 |           { option: 'Flavour', output: 'nopic' },
36 |           { option: 'Description', output: parameters.customZimDescription },
37 |           { option: 'LongDescription', output: parameters.customZimLongDescription },
38 |           { option: 'Creator', output: 'Wikipedia' },
39 |           { option: 'Publisher', output: parameters.publisher },
40 |           { option: 'Source', output: 'en.wikipedia.org' },
41 |         ].map(async ({ option, output }) => {
42 |           expect(await zimdump(`show --ns M --url "${option}" ${outFiles[0].outFile}`)).toBe(output)
43 |         }),
44 |       )
45 | 
46 |       expect(await zimdump(`show --ns M --url "Illustration_48x48@1" ${outFiles[0].outFile}`)).toBeDefined()
47 |     })
48 | 
49 |     afterAll(() => {
50 |       if (!process.env.KEEP_ZIMS) {
51 |         rimraf.sync(`./${outFiles[0].testId}`)
52 |       }
53 |     })
54 |   })
55 | })
56 | 


--------------------------------------------------------------------------------
/test/testRenders.ts:
--------------------------------------------------------------------------------
 1 | import * as logger from '../src/Logger.js'
 2 | import * as mwoffliner from '../src/mwoffliner.lib.js'
 3 | import { execa } from 'execa'
 4 | import { RENDERERS_LIST } from '../src/util/const.js'
 5 | import { zimcheckAvailable, zimdumpAvailable } from './util.js'
 6 | import { Dump } from '../src/Dump.js'
 7 | 
 8 | interface Parameters {
 9 |   mwUrl: string
10 |   adminEmail: string
11 |   outputDirectory?: string
12 |   addNamespaces?: number
13 |   articleList?: string
14 |   articleListToIgnore?: string
15 |   redis?: string
16 |   format?: string | string[]
17 |   noLocalParserFallback?: boolean
18 |   forceRender?: string
19 |   mwActionApiPath?: string
20 |   mwRestApiPath?: string
21 |   mwModulePath?: string
22 | }
23 | 
24 | /*
25 |   This is the template for e2e tests of different wikis
26 |   1. Verify zimcheck and zimdump availability and caches result
27 |   2. Gets output file and checks its integrity
28 |   3. Returns output file per renderer in the callback function
29 | */
30 | 
31 | let zimToolsChecked = false
32 | async function checkZimTools() {
33 |   if (zimToolsChecked) {
34 |     return
35 |   }
36 | 
37 |   const zimcheckIsAvailable = await zimcheckAvailable()
38 |   const zimdumpIsAvailable = await zimdumpAvailable()
39 | 
40 |   if (!zimcheckIsAvailable || !zimdumpIsAvailable) {
41 |     const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump'
42 |     logger.error(`${missingTool} not installed, exiting test`)
43 |     process.exit(1)
44 |   }
45 | 
46 |   zimToolsChecked = true
47 | }
48 | 
49 | async function getOutFiles(renderName: string, testId: string, parameters: Parameters): Promise<Dump[]> {
50 |   await execa('redis-cli flushall', { shell: true })
51 |   const outFiles = await mwoffliner.execute({ ...parameters, outputDirectory: testId, forceRender: renderName })
52 | 
53 |   return outFiles
54 | }
55 | 
56 | interface TestDump extends Dump {
57 |   testId: string
58 |   renderer: string
59 | }
60 | 
61 | export async function testRenders(testName: string, parameters: Parameters, callback: { (outFiles: TestDump[]): any }, renderersList: Array<string>) {
62 |   await checkZimTools()
63 |   for (const renderer of renderersList) {
64 |     try {
65 |       const now = new Date()
66 |       const testId = `mwo-test-${testName}-${renderer}-${+now}`
67 |       const outFiles = (await getOutFiles(renderer, testId, parameters)) as TestDump[]
68 |       outFiles[0].testId = testId
69 |       outFiles[0].renderer = renderer
70 |       await callback(outFiles)
71 |     } catch (err) {
72 |       logger.error(err.message)
73 |       return
74 |     }
75 |   }
76 | }
77 | 
78 | export async function testAllRenders(testName: string, parameters: Parameters, callback: { (outFiles: TestDump[]): any }) {
79 |   return testRenders(testName, parameters, callback, RENDERERS_LIST)
80 | }
81 | 


--------------------------------------------------------------------------------
/test/unit/bootstrap.ts:
--------------------------------------------------------------------------------
 1 | // eslint-disable-next-line @typescript-eslint/triple-slash-reference
 2 | /// <reference path="../../src/types.d.ts" />
 3 | 
 4 | import 'dotenv/config'
 5 | import RedisStore from '../../src/RedisStore.js'
 6 | import { config } from '../../src/config.js'
 7 | 
 8 | RedisStore.setOptions(process.env.REDIS || config.defaults.redisPath, { quitOnError: false })
 9 | 
10 | export const startRedis = async () => {
11 |   await RedisStore.connect()
12 |   const { articleDetailXId, redirectsXId, filesToDownloadXPath, filesToRetryXPath } = RedisStore
13 |   await Promise.all([articleDetailXId.flush(), redirectsXId.flush(), filesToDownloadXPath.flush(), filesToRetryXPath.flush()])
14 | }
15 | 
16 | export const stopRedis = async () => {
17 |   console.info('Closing all redis connections')
18 |   await RedisStore.close()
19 | }
20 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/api.director.test.ts:
--------------------------------------------------------------------------------
 1 | import ApiURLDirector from '../../../../src/util/builders/url/api.director.js'
 2 | 
 3 | describe('ApiURLDirector', () => {
 4 |   const apiUrlDirector = new ApiURLDirector('https://en.wikipedia.org/w/api.php')
 5 | 
 6 |   describe('buildSubCategoriesURL', () => {
 7 |     it('should return a string URL to get article sub categories', () => {
 8 |       const url = apiUrlDirector.buildSubCategoriesURL('article-123')
 9 | 
10 |       expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtype=subcat&cmlimit=max&format=json&formatversion=2&cmtitle=article-123&cmcontinue=')
11 |     })
12 |   })
13 | 
14 |   describe('buildSiteInfoQueryURL', () => {
15 |     it('should return string URL to get site info', () => {
16 |       const url = apiUrlDirector.buildSiteInfoQueryURL()
17 | 
18 |       expect(url).toBe(
19 |         'https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json&formatversion=2&siprop=general%7Cnamespaces%7Cstatistics%7Cvariables%7Ccategory%7Cwikidesc',
20 |       )
21 |     })
22 |   })
23 | 
24 |   describe('buildQueryURL', () => {
25 |     it('should build a string URL with provided query params', () => {
26 |       const url = apiUrlDirector.buildQueryURL({ param1: 'param1', param2: 'param2' })
27 | 
28 |       expect(url).toBe('https://en.wikipedia.org/w/api.php?param1=param1&param2=param2')
29 |     })
30 |   })
31 | 
32 |   describe('buildArticleApiURL', () => {
33 |     it('should return a string URL with predefined query params and provided page for retrieving article', () => {
34 |       const url = apiUrlDirector.buildArticleApiURL('article-123')
35 | 
36 |       expect(url).toBe('https://en.wikipedia.org/w/api.php?action=parse&format=json&prop=modules%7Cjsconfigvars%7Cheadhtml&formatversion=2&page=article-123')
37 |     })
38 |   })
39 | 
40 |   describe('buildNamespacesURL', () => {
41 |     it('should return a string URL with predefined query params to get article namespaces', () => {
42 |       const url = apiUrlDirector.buildNamespacesURL()
43 | 
44 |       expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces%7Cnamespacealiases&format=json&formatversion=2')
45 |     })
46 |   })
47 | 
48 |   describe('buildSiteInfoURL', () => {
49 |     it('should return a string URL with predefined query params for retrieving site info', () => {
50 |       const url = apiUrlDirector.buildSiteInfoURL()
51 | 
52 |       expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json&formatversion=2')
53 |     })
54 |   })
55 | 
56 |   describe('buildVisualEditorURL', () => {
57 |     it('should return base visual editor URL object with default query params', () => {
58 |       const url = apiUrlDirector.buildVisualEditorURL()
59 | 
60 |       expect(url.href).toBe('https://en.wikipedia.org/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&formatversion=2')
61 |     })
62 |   })
63 | })
64 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/base.director.test.ts:
--------------------------------------------------------------------------------
 1 | import BaseURLDirector from '../../../../src/util/builders/url/base.director.js'
 2 | 
 3 | describe('BaseURLDirector', () => {
 4 |   const baseUrlDirector = new BaseURLDirector('https://en.m.wikipedia.com/')
 5 | 
 6 |   describe('buildURL', () => {
 7 |     it('should return URL object with path', () => {
 8 |       const url = baseUrlDirector.buildURL('/v1/test/api')
 9 | 
10 |       expect(url.href).toBe('https://en.m.wikipedia.com/v1/test/api')
11 |     })
12 | 
13 |     it('should return URL object with mwActionApiPath param', () => {
14 |       const url = baseUrlDirector.buildURL('/api.php')
15 | 
16 |       expect(url.href).toBe('https://en.m.wikipedia.com/api.php')
17 |     })
18 |   })
19 | 
20 |   describe('buildWikimediaMobileApiUrl', () => {
21 |     it('should return mobile rest URL with default path and trailing char', () => {
22 |       const url = baseUrlDirector.buildWikimediaMobileApiUrl()
23 | 
24 |       expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/')
25 |     })
26 |   })
27 | 
28 |   describe('buildWikimediaDesktopApiUrl', () => {
29 |     it('should return a desktop URL with default path and trailing char', () => {
30 |       const url = baseUrlDirector.buildWikimediaDesktopApiUrl()
31 | 
32 |       expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/html/')
33 |     })
34 |   })
35 | 
36 |   describe('buildModuleURL', () => {
37 |     it('should return a module URL with provided path and question mark as a trailing char', () => {
38 |       const url = baseUrlDirector.buildModuleURL('/w/reload.php')
39 | 
40 |       expect(url).toBe('https://en.m.wikipedia.com/w/reload.php?')
41 |     })
42 | 
43 |     it('should return a module URL with default path and question mark as a trailing char', () => {
44 |       const url = baseUrlDirector.buildModuleURL()
45 | 
46 |       expect(url).toBe('https://en.m.wikipedia.com/w/load.php?')
47 |     })
48 |   })
49 | })
50 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/basic.director.test.ts:
--------------------------------------------------------------------------------
 1 | import basicURLDirector from '../../../../src/util/builders/url/basic.director.js'
 2 | 
 3 | describe('BasicURLDirector', () => {
 4 |   describe('buildMediawikiBaseURL', () => {
 5 |     it('should return a basic URL as an URL object with trailing character', () => {
 6 |       const url = basicURLDirector.buildMediawikiBaseURL('https://en.m.wikipedia.org')
 7 | 
 8 |       expect(url.href).toBe('https://en.m.wikipedia.org/')
 9 |     })
10 |   })
11 | 
12 |   describe('buildDownloaderBaseUrl', () => {
13 |     it('should return the first value because its condition is true', () => {
14 |       const conditions = [
15 |         { condition: true, value: 'https://en.wikipedia.org' },
16 |         { condition: false, value: 'https://en.vikidia.org' },
17 |       ]
18 | 
19 |       const url = basicURLDirector.buildDownloaderBaseUrl(conditions)
20 | 
21 |       expect(url).toBe('https://en.wikipedia.org')
22 |     })
23 | 
24 |     it('should return the middle value because its condition is true and first one is false', () => {
25 |       const conditions = [
26 |         { condition: false, value: 'https://en.wikipedia.org' },
27 |         { condition: true, value: 'https://en.vikidia.org' },
28 |         { condition: true, value: 'https://en.wikimedia.org' },
29 |       ]
30 | 
31 |       const url = basicURLDirector.buildDownloaderBaseUrl(conditions)
32 | 
33 |       expect(url).toBe('https://en.vikidia.org')
34 |     })
35 | 
36 |     it('should return the last value because its condition is true and first ones is false', () => {
37 |       const conditions = [
38 |         { condition: false, value: 'https://en.wikipedia.org' },
39 |         { condition: false, value: 'https://en.vikidia.org' },
40 |         { condition: true, value: 'https://en.wikimedia.org' },
41 |       ]
42 | 
43 |       const url = basicURLDirector.buildDownloaderBaseUrl(conditions)
44 | 
45 |       expect(url).toBe('https://en.wikimedia.org')
46 |     })
47 | 
48 |     it('should return undefined if all conditions are false', () => {
49 |       const conditions = [
50 |         { condition: false, value: 'https://en.wikipedia.org' },
51 |         { condition: false, value: 'https://en.vikidia.org' },
52 |         { condition: false, value: 'https://en.wikimedia.org' },
53 |       ]
54 | 
55 |       const url = basicURLDirector.buildDownloaderBaseUrl(conditions)
56 | 
57 |       expect(url).toBe(undefined)
58 |     })
59 |   })
60 | })
61 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/desktop.director.test.ts:
--------------------------------------------------------------------------------
 1 | import WikimediaDesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js'
 2 | 
 3 | describe('WikimediaDesktopURLDirector', () => {
 4 |   const wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/')
 5 | 
 6 |   describe('buildArticleURL', () => {
 7 |     it('should return the URL to retrieve a desktop article', () => {
 8 |       const url = wikimediaDesktopUrlDirector.buildArticleURL('article-1234')
 9 | 
10 |       expect(url).toBe('https://en.m.wikipedia.org/api/rest_v1/page/html/article-1234')
11 |     })
12 |   })
13 | })
14 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/mobile.director.test.ts:
--------------------------------------------------------------------------------
 1 | import WikimediaMobileURLDirector from '../../../../src/util/builders/url/mobile.director.js'
 2 | 
 3 | describe('WikimediaMobileURLDirector', () => {
 4 |   const mobuleUrlDirector = new WikimediaMobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/')
 5 | 
 6 |   describe('buildArticleURL', () => {
 7 |     it('should return a URL for retrieving mobile article', () => {
 8 |       const url = mobuleUrlDirector.buildArticleURL('article-123')
 9 | 
10 |       expect(url).toBe('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/article-123')
11 |     })
12 |   })
13 | })
14 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/url.builder.test.ts:
--------------------------------------------------------------------------------
 1 | import urlBuilder from '../../../../src/util/builders/url/url.builder.js'
 2 | 
 3 | describe('URLBuilder', () => {
 4 |   it('should throw an error if domain is not specified', () => {
 5 |     expect(() => urlBuilder.setPath('/v1/api').setQueryParams({ param1: 'param1' }).build()).toThrow(new Error('The link must contain a domain'))
 6 |   })
 7 | 
 8 |   it('should return URL as a string', () => {
 9 |     const url = urlBuilder.setDomain('https://localhost:3000').setPath('/v1/api').build()
10 | 
11 |     expect(url).toBe('https://localhost:3000/v1/api')
12 |   })
13 | 
14 |   it('should return URL as a URL object', () => {
15 |     const url = urlBuilder.setDomain('https://localhost:3000').setPath('/v1/api').build(true) as URL
16 | 
17 |     expect(url.href).toBe('https://localhost:3000/v1/api')
18 |   })
19 | 
20 |   it('should return URL as a URL object with trailing char', () => {
21 |     const url = urlBuilder.setDomain('https://localhost:3000').setPath('/v1/api').build(true, '/') as URL
22 | 
23 |     expect(url.href).toBe('https://localhost:3000/v1/api/')
24 |   })
25 | 
26 |   it('should return URL as a string with trailing char', () => {
27 |     const url = urlBuilder.setDomain('https://localhost:3000').setPath('/v1/api').build(false, '/')
28 | 
29 |     expect(url).toBe('https://localhost:3000/v1/api/')
30 |   })
31 | 
32 |   it('should return a URL with query params', () => {
33 |     const url = urlBuilder.setDomain('https://localhost:3000').setPath('/v1/api').setQueryParams({ param1: 'param1', param2: 'param2' }).build()
34 | 
35 |     expect(url).toBe('https://localhost:3000/v1/api?param1=param1&param2=param2')
36 |   })
37 | 
38 |   it('should append query params to the URL where some query params already exist', () => {
39 |     const url = urlBuilder.setDomain('https://localhost:3000?param1=param1&param2=param2').setQueryParams({ param3: 'param3', param4: 'param4' }, '&').build()
40 | 
41 |     expect(url).toBe('https://localhost:3000?param1=param1&param2=param2&param3=param3&param4=param4')
42 |   })
43 | })
44 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/visual-editor.director.test.ts:
--------------------------------------------------------------------------------
 1 | import VisualEditorURLDirector from '../../../../src/util/builders/url/visual-editor.director.js'
 2 | 
 3 | describe('VisualEditorURLDirector', () => {
 4 |   const visualEditorUrlDirector = new VisualEditorURLDirector('https://en.m.wikipedia.org/?action=visualeditor&mobileformat=html&format=json&paction=parse')
 5 | 
 6 |   describe('buildArticleURL', () => {
 7 |     it('should build a URL object with query params to get article', () => {
 8 |       const url = visualEditorUrlDirector.buildArticleURL('article-123')
 9 | 
10 |       expect(url).toBe('https://en.m.wikipedia.org/?action=visualeditor&mobileformat=html&format=json&paction=parse&page=article-123')
11 |     })
12 |   })
13 | })
14 | 


--------------------------------------------------------------------------------
/test/unit/builders/url/web.director.test.ts:
--------------------------------------------------------------------------------
 1 | import WebURLDirector from '../../../../src/util/builders/url/web.director.js'
 2 | 
 3 | describe('WebURLDirector', () => {
 4 |   const webUrlDirector = new WebURLDirector('https://en.m.wikipedia.org/w/load.php')
 5 | 
 6 |   describe('buildArticleRawURL', () => {
 7 |     it('should return web URL to get an article', () => {
 8 |       const url = webUrlDirector.buildArticleRawURL('article-123')
 9 | 
10 |       expect(url).toBe('https://en.m.wikipedia.org/w/load.php?title=article-123&action=raw')
11 |     })
12 |   })
13 | })
14 | 


--------------------------------------------------------------------------------
/test/unit/dump.test.ts:
--------------------------------------------------------------------------------
 1 | import { startRedis, stopRedis } from './bootstrap.js'
 2 | import { Dump } from '../../src/Dump.js'
 3 | 
 4 | describe('Dump filename radical', () => {
 5 |   beforeAll(startRedis)
 6 |   afterAll(stopRedis)
 7 | 
 8 |   describe('Based on format', () => {
 9 |     const formatTests = {
10 |       '': '',
11 |       ':extra_alias_tag': '_extra_alias_tag',
12 |       'nopic:nopic_alias': '_nopic_alias',
13 |       'nopic,nopdf': '_nopic',
14 |       'nopic,nopdf:pdf_alias': '_pdf_alias',
15 |       'nopic,:extra_alias': '_extra_alias',
16 |       'nopic:': '',
17 |       'nopic,novid:': '',
18 |       'nopic,nodet': '_nopic_nodet',
19 |       'nodet,nopic': '_nopic_nodet',
20 |     }
21 | 
22 |     for (const [format, expectedFormatTags] of Object.entries(formatTests)) {
23 |       test(`tag [${expectedFormatTags}] is correct`, async () => {
24 |         const dump = new Dump(format, {} as any, { creator: '', webUrl: 'https://en.wikipedia.org', langIso2: '' } as any)
25 |         const outFormat = dump.computeFilenameRadical(true, false, true)
26 | 
27 |         expect(outFormat).toEqual(`_${expectedFormatTags}`)
28 |       })
29 |     }
30 |   })
31 | 
32 |   describe('Based on article list', () => {
33 |     const radicalTests = {
34 |       Brian_May: 'brian-may',
35 |       'Bob:Morane': 'bob-morane',
36 |       'Brian,Bob,Morane': 'brian-bob-morane',
37 |       'https://myhost.acme.com/mylist.tsv': 'mylist',
38 |       'https://myhost.acme.com/mylist1.tsv,https://myhost.acme.com/mylist2.tsv': 'mylist2',
39 |     }
40 | 
41 |     for (const [articleList, expectedRadicalSuffix] of Object.entries(radicalTests)) {
42 |       test(`radical for article list [${articleList}] is correct`, async () => {
43 |         const dump = new Dump('', { articleList } as any, { creator: '', webUrl: 'https://en.wikipedia.org', langIso2: 'en' } as any)
44 |         const outFormat = dump.computeFilenameRadical(false, false, true)
45 |         expect(outFormat).toEqual(`_en_${expectedRadicalSuffix}`)
46 |       })
47 |     }
48 |   })
49 | })
50 | 


--------------------------------------------------------------------------------
/test/unit/logger.test.ts:
--------------------------------------------------------------------------------
  1 | import * as logger from '../../src/Logger.js'
  2 | import { jest } from '@jest/globals'
  3 | 
  4 | describe('Logger', () => {
  5 |   let info
  6 |   let log
  7 |   let warn
  8 |   let error
  9 | 
 10 |   afterEach(() => {
 11 |     jest.clearAllMocks()
 12 |   })
 13 | 
 14 |   beforeEach(() => {
 15 |     info = jest.spyOn(console, 'info').mockImplementation(() => {
 16 |       return
 17 |     })
 18 |     log = jest.spyOn(console, 'log').mockImplementation(() => {
 19 |       return
 20 |     })
 21 |     warn = jest.spyOn(console, 'warn').mockImplementation(() => {
 22 |       return
 23 |     })
 24 |     error = jest.spyOn(console, 'error').mockImplementation(() => {
 25 |       return
 26 |     })
 27 |   })
 28 | 
 29 |   test('logger info level', async () => {
 30 |     logger.setVerboseLevel('info')
 31 | 
 32 |     logger.info('test info', 'info test message')
 33 |     logger.log('test log', 'log test message')
 34 |     logger.warn('test warn', 'warn test message')
 35 |     logger.error('test error', 'error test message')
 36 | 
 37 |     expect(info).toBeCalledWith(expect.any(String), 'test info', 'info test message')
 38 |     expect(log).toBeCalledWith(expect.any(String), 'test log', 'log test message')
 39 |     expect(warn).toBeCalledWith(expect.any(String), 'test warn', 'warn test message')
 40 |     expect(error).toBeCalledWith(expect.any(String), 'test error', 'error test message')
 41 |   })
 42 | 
 43 |   test('logger log level', async () => {
 44 |     logger.setVerboseLevel('log')
 45 | 
 46 |     logger.info('test info', 'info test message')
 47 |     logger.log('test log', 'log test message')
 48 |     logger.warn('test warn', 'warn test message')
 49 |     logger.error('test error', 'error test message')
 50 | 
 51 |     expect(info).not.toBeCalled()
 52 |     expect(log).toBeCalledWith(expect.any(String), 'test log', 'log test message')
 53 |     expect(warn).toBeCalledWith(expect.any(String), 'test warn', 'warn test message')
 54 |     expect(error).toBeCalledWith(expect.any(String), 'test error', 'error test message')
 55 |   })
 56 | 
 57 |   test('logger warn level', async () => {
 58 |     logger.setVerboseLevel('warn')
 59 | 
 60 |     logger.info('test info', 'info test message')
 61 |     logger.log('test log', 'log test message')
 62 |     logger.warn('test warn', 'warn test message')
 63 |     logger.error('test error', 'error test message')
 64 | 
 65 |     expect(info).not.toBeCalled()
 66 |     expect(log).not.toBeCalled()
 67 |     expect(warn).toBeCalledWith(expect.any(String), 'test warn', 'warn test message')
 68 |     expect(error).toBeCalledWith(expect.any(String), 'test error', 'error test message')
 69 |   })
 70 | 
 71 |   test('logger error level', async () => {
 72 |     logger.setVerboseLevel('error')
 73 | 
 74 |     logger.info('test info', 'info test message')
 75 |     logger.log('test log', 'log test message')
 76 |     logger.warn('test warn', 'warn test message')
 77 |     logger.error('test error', 'error test message')
 78 | 
 79 |     expect(info).not.toBeCalled()
 80 |     expect(log).not.toBeCalled()
 81 |     expect(warn).not.toBeCalled()
 82 |     expect(error).toBeCalledWith(expect.any(String), 'test error', 'error test message')
 83 |   })
 84 | 
 85 |   test('logger verbose true', async () => {
 86 |     logger.setVerboseLevel('true')
 87 | 
 88 |     logger.info('test info', 'info test message')
 89 |     logger.log('test log', 'log test message')
 90 |     logger.warn('test warn', 'warn test message')
 91 |     logger.error('test error', 'error test message')
 92 | 
 93 |     expect(info).toBeCalledWith(expect.any(String), 'test info', 'info test message')
 94 |     expect(log).toBeCalledWith(expect.any(String), 'test log', 'log test message')
 95 |     expect(warn).toBeCalledWith(expect.any(String), 'test warn', 'warn test message')
 96 |     expect(error).toBeCalledWith(expect.any(String), 'test error', 'error test message')
 97 |   })
 98 | 
 99 |   test('logger verbose empty', async () => {
100 |     logger.setVerboseLevel(null)
101 | 
102 |     logger.info('test info', 'info test message')
103 |     logger.log('test log', 'log test message')
104 |     logger.warn('test warn', 'warn test message')
105 |     logger.error('test error', 'error test message')
106 | 
107 |     expect(info).not.toBeCalled()
108 |     expect(log).not.toBeCalled()
109 |     expect(warn).not.toBeCalled()
110 |     expect(error).not.toBeCalled()
111 |   })
112 | })
113 | 


--------------------------------------------------------------------------------
/test/unit/mock/1x1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openzim/mwoffliner/edf4cdca3978951fad7da28f00e5a00e59d2a8c1/test/unit/mock/1x1.png


--------------------------------------------------------------------------------
/test/unit/mock/mockRedis.ts:
--------------------------------------------------------------------------------
 1 | import data from './sg.json'
 2 | 
 3 | /*
 4 |  * get mock data
 5 |  */
 6 | export const initMockData = async (kvs: RKVS<any>, size?: number): Promise<void> => {
 7 |   const len = Object.keys(data).length
 8 |   const multiplier = (size ?? len) / len
 9 | 
10 |   for (let i = 0; i < multiplier; i++) {
11 |     const d: Array<{ n: string; r: number; t: string }> = []
12 |     Object.values(data).forEach((item, x) => {
13 |       d.push({ ...item, n: `${data[x].n}_${i}` })
14 |     })
15 |     await kvs.setMany(d)
16 |   }
17 | }
18 | 
19 | /*
20 |  * mock of RedisClient that is just providing
21 |  * the most basic methods to test RedisKvs.iterateItems
22 |  */
23 | export class MockRedis {
24 |   private data: KVS<string> = {}
25 |   private nextCursor: number
26 | 
27 |   // eslint-disable-next-line @typescript-eslint/no-unused-vars
28 |   async hLen(dbName: string): Promise<number> {
29 |     return Object.keys(this.data).length
30 |   }
31 | 
32 |   async hSet(dbName: string, newData: KVS<string> | string, value?: any): Promise<number> {
33 |     if (typeof newData === 'object') {
34 |       this.data = {
35 |         ...this.data,
36 |         ...newData,
37 |       }
38 |       return Object.keys(newData).length
39 |     }
40 |     this.data[newData] = String(value)
41 |     return 1
42 |   }
43 | 
44 |   async hScan(dbName: string, cursor: number): Promise<{ cursor: number; tuples: { field: string; value: string }[] }> {
45 |     let amount = 9 + Math.floor(Math.random() * 3)
46 |     const maxLength = await this.hLen('')
47 |     let curCursor = this.nextCursor
48 | 
49 |     if (cursor < 0 || cursor >= maxLength) {
50 |       throw new Error(`Cursor ${cursor} is out of range`)
51 |     }
52 |     if (!curCursor) {
53 |       if (cursor !== 0) {
54 |         throw new Error(`No running iteration, ${cursor} ${curCursor}`)
55 |       }
56 |       curCursor = 0
57 |       amount = Math.min(amount, maxLength)
58 |     } else if (curCursor !== cursor) {
59 |       throw new Error(`Invalid cursor ${cursor}`)
60 |     } else {
61 |       amount = Math.min(curCursor + amount, maxLength) - curCursor
62 |     }
63 | 
64 |     const nextCursor = curCursor + amount
65 | 
66 |     const tuples = Object.keys(this.data)
67 |       .slice(curCursor, nextCursor)
68 |       .map((key) => {
69 |         return { field: key, value: this.data[key] }
70 |       })
71 |     this.nextCursor = nextCursor >= maxLength ? 0 : nextCursor
72 | 
73 |     await new Promise((res) => setTimeout(res, Math.floor(Math.random() * 10)))
74 | 
75 |     return {
76 |       cursor: this.nextCursor,
77 |       tuples,
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/test/unit/mwApiCapabilities.test.ts:
--------------------------------------------------------------------------------
 1 | import Downloader from '../../src/Downloader.js'
 2 | import MediaWiki from '../../src/MediaWiki.js'
 3 | import { jest } from '@jest/globals'
 4 | import { config } from '../../src/config.js'
 5 | 
 6 | jest.setTimeout(30000)
 7 | 
 8 | describe('Checking Mediawiki capabilities', () => {
 9 |   beforeEach(() => {
10 |     MediaWiki.reset()
11 |   })
12 | 
13 |   afterEach(() => {
14 |     MediaWiki.reset()
15 |   })
16 | 
17 |   test('test capabilities of en.wikipedia.org', async () => {
18 |     MediaWiki.base = 'https://en.wikipedia.org'
19 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
20 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(true)
21 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(true)
22 |     expect(await MediaWiki.hasRestApi()).toBe(true)
23 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(true)
24 |   })
25 | 
26 |   test('test capabilities of wiki.openstreetmap.org', async () => {
27 |     MediaWiki.base = 'https://wiki.openstreetmap.org'
28 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
29 | 
30 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(false)
31 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(false)
32 |     expect(await MediaWiki.hasRestApi()).toBe(true)
33 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(true)
34 |   })
35 | 
36 |   test('test capabilities of fo.wikisource.org', async () => {
37 |     MediaWiki.base = 'https://fo.wikisource.org'
38 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
39 | 
40 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(true)
41 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(false)
42 |     expect(await MediaWiki.hasRestApi()).toBe(true)
43 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(true)
44 |   })
45 | 
46 |   test('test capabilities of minecraft.wiki with correct VisualEditor receipt', async () => {
47 |     MediaWiki.base = 'https://minecraft.wiki'
48 |     MediaWiki.wikiPath = '/'
49 |     MediaWiki.actionApiPath = '/api.php'
50 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
51 | 
52 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(false)
53 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(false)
54 |     expect(await MediaWiki.hasRestApi()).toBe(false)
55 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(true)
56 |   })
57 | 
58 |   test('test capabilities of pokemon.fandom.com with correct VisualEditor receipt', async () => {
59 |     MediaWiki.base = 'https://pokemon.fandom.com/'
60 |     MediaWiki.wikiPath = '/'
61 |     MediaWiki.actionApiPath = '/api.php'
62 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
63 | 
64 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(false)
65 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(false)
66 |     expect(await MediaWiki.hasRestApi()).toBe(false)
67 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(true)
68 |   })
69 | 
70 |   test('test capabilities of pokemon.fandom.com with default receipt', async () => {
71 |     MediaWiki.base = 'https://pokemon.fandom.com/'
72 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
73 | 
74 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(false)
75 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(false)
76 |     expect(await MediaWiki.hasRestApi()).toBe(false)
77 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(false)
78 |   })
79 | 
80 |   test('test capabilities of pokemon.fandom.com with RestApi receipt', async () => {
81 |     MediaWiki.base = 'https://pokemon.fandom.com/'
82 |     MediaWiki.wikiPath = '/'
83 |     MediaWiki.restApiPath = '/rest.php'
84 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
85 | 
86 |     expect(await MediaWiki.hasWikimediaDesktopApi()).toBe(false)
87 |     expect(await MediaWiki.hasWikimediaMobileApi()).toBe(false)
88 |     expect(await MediaWiki.hasVisualEditorApi()).toBe(false)
89 | 
90 |     /* TODO:
91 |       Title MediaWiki:Sidebar does not exist for Mediawiki Rest Api in pokemon.fandom.com for some reason. This will lead to incorrect capability check
92 |       See: https://pokemon.fandom.com/rest.php/v1/page/MediaWiki%3ASidebar/html
93 |     */
94 |     MediaWiki.apiCheckArticleId = 'Volcarona'
95 |     expect(await MediaWiki.hasRestApi()).toBe(true)
96 |   })
97 | })
98 | 


--------------------------------------------------------------------------------
/test/unit/redis.test.ts:
--------------------------------------------------------------------------------
 1 | import RedisKvs from '../../src/util/RedisKvs.js'
 2 | import RedisStore from '../../src/RedisStore.js'
 3 | import { startRedis, stopRedis } from './bootstrap.js'
 4 | 
 5 | describe('Redis', () => {
 6 |   beforeAll(startRedis)
 7 |   afterAll(stopRedis)
 8 | 
 9 |   const mock = {
10 |     testItem1: { value: 1 },
11 |     testItem2: { value: 2 },
12 |     testItem3: { value: 3 },
13 |     testItem4: { value: 4 },
14 |   }
15 | 
16 |   test('Redis Tests', async () => {
17 |     const kvs = new RedisKvs<{ value: number }>(RedisStore.client, 'test-kvs')
18 | 
19 |     const len = await kvs.len()
20 |     // New RedisKVS should have 0 items
21 |     expect(len).toEqual(0)
22 | 
23 |     await Promise.all(Object.entries(mock).map(([k, v]) => kvs.set(k, v)))
24 | 
25 |     const newLen = await kvs.len()
26 |     // Can set items
27 |     expect(newLen).toEqual(4)
28 | 
29 |     const newKeys = await kvs.keys()
30 |     const areKeysCorrect =
31 |       newKeys.length === newLen && newKeys.filter((x) => !Object.keys(mock).includes(x)).length === 0 && Object.keys(mock).filter((x) => !newKeys.includes(x)).length === 0
32 |     // Can get the keys properly
33 |     expect(areKeysCorrect).toBeTruthy()
34 | 
35 |     const item2 = await kvs.get('testItem2')
36 |     // Can get single item
37 |     expect(item2.value).toEqual(2)
38 | 
39 |     const { testItem1, testItem4 } = await kvs.getMany(['testItem1', 'testItem4'])
40 |     // Can get multiple items (1/2)
41 |     expect(testItem1.value).toEqual(1)
42 |     // Can get multiple items (2/2)
43 |     expect(testItem4.value).toEqual(4)
44 | 
45 |     await kvs.delete('testItem2')
46 |     const deletedTestItem2 = await kvs.get('testItem2')
47 |     // Can delete single item
48 |     expect(deletedTestItem2).toBeNull()
49 | 
50 |     await kvs.deleteMany(['testItem1', 'testItem4'])
51 |     const { deletedTestItem1, deletedTestItem4 } = await kvs.getMany(['testItem1', 'testItem4'])
52 |     // Can delete multiple items (1/2)
53 |     expect(deletedTestItem1).toBeUndefined()
54 |     // Can delete multiple items (2/2)
55 |     expect(deletedTestItem4).toBeUndefined()
56 | 
57 |     await kvs.flush()
58 | 
59 |     const flushedLen = await kvs.len()
60 |     // Can flush KVS
61 |     expect(flushedLen).toEqual(0)
62 |   })
63 | })
64 | 


--------------------------------------------------------------------------------
/test/unit/redisKvsIterate.test.ts:
--------------------------------------------------------------------------------
 1 | import { initMockData, MockRedis } from './mock/mockRedis.js'
 2 | import RedisKvs from '../../src/util/RedisKvs.js'
 3 | import { jest } from '@jest/globals'
 4 | 
 5 | let client: MockRedis
 6 | let kvs: RKVS<any>
 7 | 
 8 | const numberOfItems = [100, 1000]
 9 | const timeouts = [0, 10, 20]
10 | 
11 | jest.setTimeout(10000)
12 | 
13 | const getHandler = (delay: number) => async (): Promise<any> => {
14 |   const t = Math.random() * delay
15 |   return new Promise((resolve) => {
16 |     setTimeout(() => {
17 |       resolve(null)
18 |     }, t)
19 |   })
20 | }
21 | 
22 | const getTestHandler = (handler: (items: any, activeWorkers: number) => any | Promise<any>, numWorkers: number) => async () => {
23 |   const len = await kvs.len()
24 |   const mockHandler = jest.fn(handler)
25 | 
26 |   await kvs.iterateItems(numWorkers, mockHandler)
27 | 
28 |   // ...have been called at all
29 |   expect(mockHandler).toHaveBeenCalled()
30 | 
31 |   let count = 0
32 |   let maxWorkers = 0
33 |   mockHandler.mock.calls.forEach(([items, activeWorkers]) => {
34 |     count += Object.keys(items).length
35 |     if (maxWorkers < activeWorkers) {
36 |       maxWorkers = activeWorkers
37 |     }
38 |   })
39 | 
40 |   // ...iterated over all items
41 |   expect(count).toEqual(len)
42 |   // used right amount of workers
43 |   expect(maxWorkers).toEqual(numWorkers)
44 | }
45 | 
46 | describe('RedisKvs.iterateItems()', () => {
47 |   for (const numItems of numberOfItems) {
48 |     describe(`Items: ${numItems}`, () => {
49 |       beforeAll(async () => {
50 |         client = new MockRedis()
51 |         // eslint-disable-next-line @typescript-eslint/ban-ts-comment
52 |         // @ts-ignore
53 |         kvs = new RedisKvs(client, 'test-kvs')
54 |         await initMockData(kvs, numItems)
55 |       })
56 | 
57 |       describe('Workers: 2', () => {
58 |         for (const timeout of timeouts) {
59 |           test(`${timeout} ms`, getTestHandler(getHandler(timeout), 2))
60 |         }
61 |       })
62 |     })
63 |   }
64 | })
65 | 


--------------------------------------------------------------------------------
/test/unit/s3.test.ts:
--------------------------------------------------------------------------------
 1 | import S3 from '../../src/S3.js'
 2 | import 'dotenv/config.js'
 3 | import { jest } from '@jest/globals'
 4 | 
 5 | jest.setTimeout(60000)
 6 | 
 7 | const describeIf = process.env.S3_URL ? describe : describe.skip
 8 | describeIf('S3', () => {
 9 |   test('S3 checks', async () => {
10 |     const s3UrlObj = new URL(`${process.env.S3_URL}`)
11 | 
12 |     const s3 = new S3(
13 |       `${s3UrlObj.protocol}//${s3UrlObj.host}/`,
14 |       new URLSearchParams({
15 |         bucketName: s3UrlObj.searchParams.get('bucketName'),
16 |         keyId: s3UrlObj.searchParams.get('keyId'),
17 |         secretAccessKey: s3UrlObj.searchParams.get('secretAccessKey'),
18 |       }),
19 |       1000 * 60,
20 |       false,
21 |     )
22 | 
23 |     const credentialExists = await s3.initialise()
24 |     // Credentials on S3 exists
25 |     expect(credentialExists).toBeTruthy()
26 | 
27 |     const bucketExists = await s3.bucketExists(s3UrlObj.searchParams.get('bucketName') as string)
28 |     // Given bucket exists in S3
29 |     expect(bucketExists).toBeDefined()
30 | 
31 |     // Given bucket does not exists in S3
32 |     await expect(s3.bucketExists('random-string')).rejects.toThrowError()
33 | 
34 |     const s3TestKey = `bm.wikipedia.org/static/images/project-logos/${Math.random().toString(36).slice(2, 7)}.png`
35 |     // Image uploaded to S3
36 |     await s3.uploadBlob(s3TestKey, '42', '42', '1')
37 | 
38 |     const imageExist = await s3.downloadBlob(s3TestKey)
39 |     // Image exists in S3
40 |     expect(imageExist).toBeDefined()
41 | 
42 |     // Remove Image after test
43 |     await s3.deleteBlob({ Bucket: s3UrlObj.searchParams.get('bucketName') as string, Key: s3TestKey })
44 | 
45 |     const imageNotExist = await s3.downloadBlob('bm.wikipedia.org/static/images/project-logos/polsjsshsgd.png')
46 |     // Image doesnt exist in S3
47 |     expect(imageNotExist).toBeNull()
48 |   })
49 | 
50 |   test('Test whether the wrong region was set', async () => {
51 |     const wrongS3UrlObj = new URL('https://wrong-s3.region.com/?keyId=123&secretAccessKey=123&bucketName=kiwix')
52 | 
53 |     expect(
54 |       () =>
55 |         new S3(
56 |           `${wrongS3UrlObj.protocol}//${wrongS3UrlObj.host}/`,
57 |           new URLSearchParams({
58 |             bucketName: wrongS3UrlObj.searchParams.get('bucketName'),
59 |             keyId: wrongS3UrlObj.searchParams.get('keyId'),
60 |             secretAccessKey: wrongS3UrlObj.searchParams.get('secretAccessKey'),
61 |           }),
62 |           1000 * 60,
63 |           false,
64 |         ),
65 |     ).toThrow('Unknown S3 region set')
66 |   })
67 | })
68 | 


--------------------------------------------------------------------------------
/test/unit/sanitize-argument.test.ts:
--------------------------------------------------------------------------------
 1 | import { sanitize_all } from '../../src/sanitize-argument.js'
 2 | 
 3 | describe('Sanitize parameters', () => {
 4 |   test('sanitizing usage of the same parameter more than one time', async () => {
 5 |     // equivalent to command: node lib/cli.js --verbose --mwUrl="https://en.wikipedia.org" --adminEmail="test@test.test" --verbose=info
 6 |     const twoVerboseParameters = {
 7 |       _: [],
 8 |       verbose: [true, 'info'],
 9 |       mwUrl: 'https://en.wikipedia.org',
10 |       'mw-url': 'https://en.wikipedia.org',
11 |       adminEmail: 'test@test.test',
12 |       'admin-email': 'test@test.test',
13 |       $0: 'node_modules/ts-node/dist/child/child-entrypoint.js',
14 |     }
15 | 
16 |     await expect(sanitize_all(twoVerboseParameters)).rejects.toThrow(/Parameter '--verbose' can only be used once/)
17 | 
18 |     // equivalent to command: node lib/cli.js --verbose --mwUrl="https://en.wikipedia.org" --adminEmail="test@test.test" --mwUrl="https://en.wikipedia.org"
19 |     const twoUrlParameters = {
20 |       _: [],
21 |       verbose: true,
22 |       mwUrl: ['https://en.wikipedia.org', 'https://en.wikipedia.org'],
23 |       'mw-url': ['https://en.wikipedia.org', 'https://en.wikipedia.org'],
24 |       adminEmail: 'test@test.test',
25 |       'admin-email': 'test@test.test',
26 |       $0: 'node_modules/ts-node/dist/child/child-entrypoint.js',
27 |     }
28 | 
29 |     await expect(sanitize_all(twoUrlParameters)).rejects.toThrow(/Parameter '--mwUrl' can only be used once/)
30 | 
31 |     // equivalent to command: node lib/cli.js --verbose=info --adminEmail="est@test.test" --articleList="User:Kelson/MWoffliner_CI_reference" --mwUrl="https://en.m.wikipedia.org/" --format=nopic --format=nopdf --format=novid
32 |     const threeFormatParameters = {
33 |       _: [],
34 |       verbose: 'info',
35 |       adminEmail: 'test@test.test',
36 |       'admin-email': 'test@test.test',
37 |       articleList: 'User:Kelson/MWoffliner_CI_reference',
38 |       'article-list': 'User:Kelson/MWoffliner_CI_reference',
39 |       mwUrl: 'https://en.m.wikipedia.org/',
40 |       'mw-url': 'https://en.m.wikipedia.org/',
41 |       format: ['nopic', 'nopdf', 'novid'],
42 |       $0: 'node_modules/ts-node/dist/child/child-entrypoint.js',
43 |     }
44 | 
45 |     expect(await sanitize_all(threeFormatParameters)).toBeUndefined()
46 |   })
47 | })
48 | 


--------------------------------------------------------------------------------
/test/unit/saveStaticFiles.test.ts:
--------------------------------------------------------------------------------
 1 | import { startRedis, stopRedis } from './bootstrap.js'
 2 | import { jest } from '@jest/globals'
 3 | import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js'
 4 | import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js'
 5 | 
 6 | jest.setTimeout(10000)
 7 | 
 8 | describe('saveStaticFiles', () => {
 9 |   beforeAll(startRedis)
10 |   afterAll(stopRedis)
11 | 
12 |   test('Compare desktop static files list', async () => {
13 |     const desktopAndCommonStaticFiles = [
14 |       'script.js',
15 |       'masonry.min.js',
16 |       'article_list_home.js',
17 |       'images_loaded.min.js',
18 |       'style.css',
19 |       'mobile_main_page.css',
20 |       'footer.css',
21 |       '../node_modules/details-element-polyfill/dist/details-element-polyfill.js',
22 |       'content.parsoid.css',
23 |       'inserted_style.css',
24 |     ]
25 | 
26 |     const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
27 |     const staticFilesFromRenderer = wikimediaDesktopRenderer.staticFilesListDesktop
28 | 
29 |     expect(desktopAndCommonStaticFiles).toEqual(staticFilesFromRenderer)
30 |   })
31 | 
32 |   test('Compare mobile static files list', async () => {
33 |     const mobileAndCommonStatiFiles = [
34 |       'script.js',
35 |       'masonry.min.js',
36 |       'article_list_home.js',
37 |       'images_loaded.min.js',
38 |       'style.css',
39 |       'mobile_main_page.css',
40 |       'footer.css',
41 |       'wm_mobile_override_script.js',
42 |       'wm_mobile_override_style.css',
43 |     ]
44 | 
45 |     const wikimediaMobileRenderer = new WikimediaMobileRenderer()
46 |     const staticFilesFromRenderer = wikimediaMobileRenderer.staticFilesListMobile
47 | 
48 |     expect(mobileAndCommonStatiFiles).toEqual(staticFilesFromRenderer)
49 |   })
50 | })
51 | 


--------------------------------------------------------------------------------
/test/unit/treatments/article.treatment.test.ts:
--------------------------------------------------------------------------------
 1 | import domino from 'domino'
 2 | import RedisStore from '../../../src/RedisStore.js'
 3 | import { StringItem } from '@openzim/libzim'
 4 | import { mwRetToArticleDetail } from '../../../src/util/mw-api.js'
 5 | import { setupScrapeClasses } from '../../util.js'
 6 | import { startRedis, stopRedis } from '../bootstrap.js'
 7 | import { saveArticles } from '../../../src/util/saveArticles.js'
 8 | import { jest } from '@jest/globals'
 9 | import { RENDERERS_LIST } from '../../../src/util/const.js'
10 | import Downloader from '../../../src/Downloader.js'
11 | import RenderingContext from '../../../src/renderers/rendering.context.js'
12 | import { renderName } from 'src/renderers/abstract.renderer.js'
13 | 
14 | jest.setTimeout(10000)
15 | 
16 | describe('ArticleTreatment', () => {
17 |   beforeAll(startRedis)
18 |   afterAll(stopRedis)
19 | 
20 |   for (const renderer of RENDERERS_LIST) {
21 |     test(`Article html processing for ${renderer} render`, async () => {
22 |       const { dump } = await setupScrapeClasses() // en wikipedia
23 |       await RenderingContext.createRenderers(renderer as renderName, true)
24 |       const _articlesDetail = await Downloader.getArticleDetailsIds(['London', 'non-existent-article'])
25 |       const articlesDetail = mwRetToArticleDetail(_articlesDetail)
26 |       const { articleDetailXId } = RedisStore
27 |       await articleDetailXId.flush()
28 |       await articleDetailXId.setMany(articlesDetail)
29 | 
30 |       const addedArticles: StringItem[] = []
31 | 
32 |       // TODO: use proper spied (like sinon.js)
33 |       await saveArticles(
34 |         {
35 |           addItem(article: StringItem) {
36 |             if (article.mimeType === 'text/html') {
37 |               addedArticles.push(article)
38 |             }
39 |             return Promise.resolve(null)
40 |           },
41 |         } as any,
42 |         dump,
43 |       )
44 | 
45 |       // Successfully scrapped existent articles + placeholder for deleted article
46 |       expect(addedArticles).toHaveLength(2)
47 | 
48 |       expect([addedArticles[0].title, addedArticles[1].title]).toEqual(expect.arrayContaining(['London', 'non-existent-article']))
49 | 
50 |       for (let i = 0; i <= 1; i++) {
51 |         if (addedArticles[i].title === 'London') {
52 |           const articleDoc = domino.createDocument(addedArticles[i].getContentProvider().feed().toString())
53 | 
54 |           // Successfully scrapped existent articles
55 |           expect(articleDoc.querySelector('meta[name="geo.position"]')).toBeDefined()
56 |           // Geo Position data is correct
57 |           expect(articleDoc.querySelector('meta[name="geo.position"]')?.getAttribute('content')).toEqual('51.50722222;-0.1275')
58 |         }
59 | 
60 |         if (addedArticles[i].title === 'non-existent-article') {
61 |           expect(addedArticles[i].getContentProvider().feed().toString()).toContain('Oops. Article not found.')
62 |         }
63 |       }
64 |     })
65 |   }
66 | })
67 | 


--------------------------------------------------------------------------------
/test/unit/util/dump.test.ts:
--------------------------------------------------------------------------------
 1 | import { startRedis, stopRedis } from '../bootstrap.js'
 2 | import Downloader from '../../../src/Downloader.js'
 3 | import MediaWiki from '../../../src/MediaWiki.js'
 4 | import { config } from '../../../src/config.js'
 5 | import { downloadModule, processStylesheetContent } from '../../../src/util/dump.js'
 6 | import RedisStore from '../../../src/RedisStore.js'
 7 | import urlHelper from '../../../src/util/url.helper.js'
 8 | 
 9 | describe('Download CSS or JS Module', () => {
10 |   beforeAll(startRedis)
11 |   afterAll(stopRedis)
12 | 
13 |   beforeEach(() => {
14 |     const { filesToDownloadXPath } = RedisStore
15 |     filesToDownloadXPath.flush()
16 |     MediaWiki.base = 'https://en.wikipedia.org'
17 |     Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }
18 |   })
19 | 
20 |   test('download skins.vector.styles CSS', async () => {
21 |     const { text: content, moduleApiUrl } = await downloadModule('skins.vector.styles', 'css')
22 | 
23 |     // URL expected to be used to retrieve CSS module
24 |     expect(moduleApiUrl).toBe('https://en.wikipedia.org/w/load.php?debug=true&lang=en&modules=skins.vector.styles&only=styles&skin=vector&version=&*')
25 | 
26 |     // Check if CSS module still contain this background image
27 |     expect(content).toContain(`background-image: url(link.ernal-small-ltr-progressive.svg`)
28 | 
29 |     // One SVG (among others) expected to be used inside the CSS
30 |     expect(Object.keys(Downloader.cssDependenceUrls)).toContain(
31 |       'https://en.wikipedia.org/w/skins/Vector/resources/skins.vector.styles/images/link-external-small-ltr-progressive.svg?fb64d',
32 |     )
33 |   })
34 | 
35 |   test('rewrite CSS', async () => {
36 |     const rewrittenCSS = processStylesheetContent(
37 |       'https://en.wikipedia.org/w/load.php?debug=true&lang=en&modules=skins.vector.styles&only=styles&skin=vector&version=&*',
38 |       '',
39 |       'a.external { background-image: url(/w/skins/Vector/resources/skins.vector.styles/images/link-external-small-ltr-progressive.svg?fb64d)); }',
40 |     )
41 |     expect(rewrittenCSS).toContain('a.external { background-image: url(link.ernal-small-ltr-progressive.svg)); }')
42 |     expect(await RedisStore.filesToDownloadXPath.keys()).toStrictEqual(['mw/link.ernal-small-ltr-progressive.svg'])
43 |     const redisValue = await RedisStore.filesToDownloadXPath.get('mw/link.ernal-small-ltr-progressive.svg')
44 |     expect(urlHelper.deserializeUrl(redisValue.url)).toBe(
45 |       'https://en.wikipedia.org/w/skins/Vector/resources/skins.vector.styles/images/link-external-small-ltr-progressive.svg?fb64d',
46 |     )
47 |   })
48 | })
49 | 


--------------------------------------------------------------------------------
/test/unit/util/metaData.test.ts:
--------------------------------------------------------------------------------
 1 | import { byGrapheme } from 'split-by-grapheme'
 2 | 
 3 | describe('Test metadata utilities', () => {
 4 |   describe('Count graphemes', () => {
 5 |     test('simple latin', async () => {
 6 |       expect('title'.split(byGrapheme).length).toBe(5)
 7 |     })
 8 | 
 9 |     test('multiletter graphemes', async () => {
10 |       expect('में'.split(byGrapheme).length).toBe(1)
11 |     })
12 | 
13 |     test('multiletter graphemes', async () => {
14 |       expect('विकी मेड मेडिकल इनसाइक्लोपीडिया हिंदी में'.split(byGrapheme).length).toBe(24)
15 |     })
16 |   })
17 | })
18 | 


--------------------------------------------------------------------------------
/test/unit/util/url.helper.test.ts:
--------------------------------------------------------------------------------
 1 | import urlHelper from '../../../src/util/url.helper.js'
 2 | 
 3 | describe('URL helper tests', () => {
 4 |   test('Simple', () => {
 5 |     const originalUrl = 'https://en.wikipedia.org/w/skins/Vector/resources/skins.vector.styles/images/link-external-small-ltr-progressive.svg?fb64d'
 6 |     const serialized = urlHelper.serializeUrl(originalUrl)
 7 |     expect(serialized).toMatch(/_\d+_\/w\/skins\/Vector\/resources\/skins.vector.styles\/images\/link-external-small-ltr-progressive.svg\?fb64d/)
 8 |     expect(urlHelper.deserializeUrl(serialized)).toBe(originalUrl)
 9 |   })
10 | })
11 | 


--------------------------------------------------------------------------------
/test/unit/webpAndRedirection.test.ts:
--------------------------------------------------------------------------------
 1 | import { execa } from 'execa'
 2 | import { join } from 'path'
 3 | import * as MwOffliner from '../../src/mwoffliner.lib.js'
 4 | import { writeFilePromise, mkdirPromise } from '../../src/util/index.js'
 5 | import { Archive } from '@openzim/libzim'
 6 | import * as FileType from 'file-type'
 7 | import { rimraf } from 'rimraf'
 8 | import { jest } from '@jest/globals'
 9 | 
10 | jest.setTimeout(30000)
11 | 
12 | const now = new Date()
13 | const testId = join(process.cwd(), `mwo-test-${+now}`)
14 | 
15 | const articleListUrl = join(testId, '/articleList')
16 | 
17 | test('Webp Option check', async () => {
18 |   await execa('redis-cli flushall', { shell: true })
19 |   await mkdirPromise(testId)
20 | 
21 |   const articleList = `
22 | Animation
23 | Real-time computer graphics`
24 | 
25 |   await writeFilePromise(articleListUrl, articleList, 'utf8')
26 | 
27 |   const outFiles = await MwOffliner.execute({
28 |     mwUrl: 'https://en.wikipedia.org',
29 |     adminEmail: 'test@kiwix.org',
30 |     articleList: articleListUrl,
31 |     outputDirectory: testId,
32 |     redis: process.env.REDIS,
33 |     webp: true,
34 |   })
35 | 
36 |   const zimFile = new Archive(outFiles[0].outFile)
37 | 
38 |   // passed test for png
39 |   expect(await isWebpPresent('Animexample3edit.png', zimFile)).toBeTruthy()
40 |   // passed test for jpg
41 |   expect(await isWebpPresent('Claychick.jpg', zimFile)).toBeTruthy()
42 |   // redirection check successful
43 |   expect(await isRedirectionPresent('href="Real-time_rendering"', zimFile)).toBeTruthy()
44 |   rimraf.sync(testId)
45 | })
46 | 
47 | async function isWebpPresent(path: string, zimFile: Archive) {
48 |   return (await FileType.fileTypeFromBuffer(zimFile.getEntryByPath(path).getItem().data.data))?.mime === 'image/webp'
49 | }
50 | 
51 | async function isRedirectionPresent(path: string, zimFile: Archive) {
52 |   return zimFile.getEntryByPath('Animation').getItem().data.data.includes(path)
53 | }
54 | 


--------------------------------------------------------------------------------
/test/util.ts:
--------------------------------------------------------------------------------
  1 | import MediaWiki from '../src/MediaWiki.js'
  2 | import Downloader from '../src/Downloader.js'
  3 | import { Dump } from '../src/Dump.js'
  4 | import { config } from '../src/config.js'
  5 | import axios from 'axios'
  6 | import { execa } from 'execa'
  7 | import * as logger from '../src/Logger.js'
  8 | import 'dotenv/config.js'
  9 | 
 10 | export function leftPad(_num: number, length: number) {
 11 |   const num = `${_num}`
 12 |   return '0'.repeat(length - num.length) + num
 13 | }
 14 | 
 15 | export function makeLink($doc: Document, href: string, rel: string, title: string, text: string = href, attributes: KVS<string> = {}) {
 16 |   const $link = $doc.createElement('a')
 17 |   $link.setAttribute('href', href)
 18 |   $link.setAttribute('rel', rel)
 19 |   $link.setAttribute('title', title)
 20 |   $link.innerHTML = text
 21 | 
 22 |   for (const [key, value] of Object.entries(attributes)) {
 23 |     $link.setAttribute(key, value)
 24 |   }
 25 | 
 26 |   const $wrapper = $doc.createElement('div')
 27 |   $wrapper.appendChild($link)
 28 |   $doc.body.appendChild($wrapper)
 29 | 
 30 |   return $link
 31 | }
 32 | 
 33 | export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', format = '' } = {}) {
 34 |   MediaWiki.base = mwUrl
 35 | 
 36 |   Downloader.init = { uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }
 37 | 
 38 |   await MediaWiki.getMwMetaData()
 39 |   await MediaWiki.hasCoordinates()
 40 |   await MediaWiki.hasWikimediaDesktopApi()
 41 |   await MediaWiki.hasWikimediaMobileApi()
 42 |   await MediaWiki.hasRestApi()
 43 |   await MediaWiki.hasVisualEditorApi()
 44 |   await MediaWiki.hasModuleApi()
 45 | 
 46 |   const dump = new Dump(format, {} as any, MediaWiki.metaData)
 47 | 
 48 |   return {
 49 |     dump,
 50 |   }
 51 | }
 52 | 
 53 | export function sleep(ms: number) {
 54 |   return new Promise((resolve) => {
 55 |     setTimeout(resolve, ms)
 56 |   })
 57 | }
 58 | 
 59 | const zimcheckPath = process.env.ZIMCHECK_PATH || 'zimcheck'
 60 | export async function zimcheckAvailable() {
 61 |   try {
 62 |     await execa(`which ${zimcheckPath}`, { shell: true })
 63 |     return true
 64 |   } catch {
 65 |     return false
 66 |   }
 67 | }
 68 | 
 69 | export async function zimcheck(filePath: string) {
 70 |   return execa(`${zimcheckPath} ${filePath}`, { shell: true })
 71 | }
 72 | 
 73 | const zimdumpPath = process.env.ZIMDUMP_PATH || 'zimdump'
 74 | export async function zimdumpAvailable(): Promise<boolean> {
 75 |   try {
 76 |     await execa(`which ${zimdumpPath}`, { shell: true })
 77 |     return true
 78 |   } catch {
 79 |     return false
 80 |   }
 81 | }
 82 | 
 83 | export async function zimdump(params: string): Promise<string> {
 84 |   return execa(`${zimdumpPath} ${params}`, { shell: true }).then(({ stdout }) => stdout)
 85 | }
 86 | 
 87 | export async function convertWikicodeToHtml(wikicode: string, baseUrl: string): Promise<any> {
 88 |   try {
 89 |     return await axios.post(`${baseUrl}api/rest_v1/transform/wikitext/to/html`, {
 90 |       wikitext: wikicode,
 91 |       body_only: true,
 92 |     })
 93 |   } catch (err) {
 94 |     logger.log(`Got error during conversion of wikicode to HTML due to ${err}`)
 95 |     return err
 96 |   }
 97 | }
 98 | 
 99 | export async function testHtmlRewritingE2e(wikicode: string, html: string) {
100 |   const resultHtml = await convertWikicodeToHtml(wikicode, 'https://en.wikipedia.org/')
101 |   expect(html).toEqual(resultHtml.data)
102 | }
103 | 


--------------------------------------------------------------------------------
/translation/ar.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Meno25",
 5 | 			"عبد الإله صديقي"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "هذه المقالة صادرة عن ${creator}. النص متاح تحت رخصة ${license}. قد تُطبَّق شروط إضافية على ملفات الوسائط.",
 9 | 	"LAST_EDITED_ON": "تم التحرير آخر مرة في ${date}",
10 | 	"LICENSE_NAME": "المشاع الإبداعي - الإسناد - الترخيص بالمثل"
11 | }
12 | 


--------------------------------------------------------------------------------
/translation/bn.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"আফতাবুজ্জামান"
 5 | 		]
 6 | 	},
 7 | 	"LAST_EDITED_ON": "${date} তারিখে সর্বশেষ সম্পাদিত",
 8 | 	"LICENSE_NAME": "ক্রিয়েটিভ কমন্স - অ্যাট্রিবিউশন - শেয়ারঅ্যালাইক"
 9 | }
10 | 


--------------------------------------------------------------------------------
/translation/br.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"@metadata": {
3 | 		"authors": [
4 | 			"Adriendelucca"
5 | 		]
6 | 	},
7 | 	"LAST_EDITED_ON": "Kemm diwezhañ d'an ${date}"
8 | }
9 | 


--------------------------------------------------------------------------------
/translation/dag.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Kalakpagh"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Lahabali ŋɔ nyɛla din yina ${creator}. Sabbu ŋɔ nyɛla  ${license} nima ni saɣi n-ti shɛli. Din pahira zalikpana ni tooi zali zaŋ n-ti pɔhim zuɣu fasara nima.",
 8 | 	"LAST_EDITED_ON": "Bahigu maliniŋ nyɛ ${date}"
 9 | }
10 | 


--------------------------------------------------------------------------------
/translation/de.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Brettchenweber",
 5 | 			"IMayBeABitShy"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "Dieser Artikel wurde von ${creator} herausgegeben. Der Text ist als ${license} lizenziert. Möglicherweise können weitere Bestimmungen für Mediendateien gelten.",
 9 | 	"LAST_EDITED_ON": "Zuletzt bearbeitet am ${date}",
10 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
11 | }
12 | 


--------------------------------------------------------------------------------
/translation/en.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"DISCLAIMER": "This article is issued from ${creator}. The text is licensed under ${license}. Additional terms may apply for the media files.",
 3 | 	"LAST_EDITED_ON": "Last edited on ${date}",
 4 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike",
 5 | 	"DOWNLOAD_ERRORS_HEADING": "Oops. Article not found.",
 6 | 	"DOWNLOAD_ERRORS_MESSAGE": "The requested article '${articleTitle}' is not available inside this ZIM, it was not possible to retrieve it from ${server}.",
 7 | 	"DOWNLOAD_ERRORS_LINE1_DELETED_ARTICLE": "This article was deleted after we compiled the list of articles to retrieve but before we fetched it to build the ZIM you are browsing.",
 8 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_500_ERROR": "When the ZIM you are browsing was built, ${server} server failed to render this article HTML content and returned an HTTP 500 error.",
 9 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_504_UPSTREAM_TIMEOUT": "When the ZIM you are browsing was built, ${server} server timed-out while rendering this article HTML content and returned an HTTP 504 error.",
10 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UPSTREAM_TIMEOUT": "When the ZIM you are browsing was built, ${server} server ActionParse API timed-out while processing this article and returned an HTTP 504 error.",
11 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_HTML_503_ERROR": "When the ZIM you are browsing was built, ${server} server ActionParse API raised an HTTP 503 error while giving details about this article HTML.",
12 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_BAD_REVISION_ERROR": "When the ZIM you are browsing was built, ${server} server ActionParse API raised an unexpected bad revision ID error while giving details about this article HTML.",
13 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UNREACHABLE_EXCEPTION_ERROR": "When the ZIM you are browsing was built, ${server} server ActionParse API raised an unreachable exception error while giving details about this article HTML.",
14 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_GENERIC_INTERNAL_API_ERROR": "When the ZIM you are browsing was built, ${server} server ActionParse API raised an internal API error while giving details about this article HTML.",
15 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_TRUNCATED_RESPONSE": "When the ZIM you are browsing was built, ${server} server ActionParse API failed to give details about this article HTML because this article is way too big. Someone should probably split this article into multiple smaller articles on ${server} server.",
16 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_EMPTY_500_RESPONSE": "When the ZIM you are browsing was built, ${server} server ActionParse API failed to give details about this article HTML returning an empty response.",
17 | 	"DOWNLOAD_ERRORS_LINE2": "The missing article was replaced by the placeholder page you are currently seeing.",
18 | 	"DOWNLOAD_ERRORS_LINE3": "Let's hope the issue will be solved on ${server} server and our next version of this ZIM will contain this article."
19 | }
20 | 


--------------------------------------------------------------------------------
/translation/es.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"@metadata": {
3 | 		"authors": []
4 | 	},
5 | 	"DISCLAIMER": "Este artículo ha sido escrito por ${creator}. El texto está disponible bajo la licencia ${license}. Pueden aplicarse cláusulas adicionales a los archivos multimedia.",
6 | 	"LAST_EDITED_ON": "Esta página se editó por última vez el ${date}",
7 | 	"LICENSE_NAME": "Creative Commons - Atribución - CompartirIgual"
8 | }
9 | 


--------------------------------------------------------------------------------
/translation/fi.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Pyscowicz"
 5 | 		]
 6 | 	},
 7 | 	"LAST_EDITED_ON": "Viimeksi muokattu ${date}",
 8 | 	"LICENSE_NAME": "Creative Commons - Nimeä - JaaSamoin"
 9 | }
10 | 


--------------------------------------------------------------------------------
/translation/fr.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Bananax47",
 5 | 			"Verdy p"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "Cet article est issu de ${creator}. Le texte est sous licence ${license}. Des conditions supplémentaires peuvent s’appliquer aux fichiers multimédias.",
 9 | 	"LAST_EDITED_ON": "Dernière modification le ${date}",
10 | 	"LICENSE_NAME": "Creative Commons – Attribution – Partage à l’identique",
11 | 	"DOWNLOAD_ERRORS_HEADING": "Oups. Article non trouvé."
12 | }
13 | 


--------------------------------------------------------------------------------
/translation/ha.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"El-hussain14",
 5 | 			"Rofiatmustapha12"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "An bayar da wannan labarin daga ${creator}. Rubutun yana da lasisi a ƙarƙashin ${license}. Ƙarin sharuɗɗa na iya aiki don fayilolin mai jarida.",
 9 | 	"LAST_EDITED_ON": "Gyaran ƙarshe akan ${date}",
10 | 	"LICENSE_NAME": "Creative Commons - Bayyanawa - Sharealike"
11 | }
12 | 


--------------------------------------------------------------------------------
/translation/he.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Amire80"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "הערך הזה נוצר על־ידי ${creator}. הטקסט מתפרסם לפי תנאי רישיון ${license}. תנאים נוספים עשויים לחול על קובצי המדיה.",
 8 | 	"LAST_EDITED_ON": "נערך לאחרונה ב־${date}",
 9 | 	"LICENSE_NAME": "קריאייטיב קומונז–ייחוס–שיתוף זהה"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/hi.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Juuz0"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "यह आलेख ${creator} से जारी किया गया है। पाठ को ${license} के अंतर्गत लाइसेंसीकृत किया गया है। मीडिया फ़ाइलों के लिए अतिरिक्त शर्तें लागू हो सकती हैं।",
 8 | 	"LAST_EDITED_ON": "अंतिम बार ${date} को संपादित किया गया",
 9 | 	"LICENSE_NAME": "क्रिएटिव कॉमन्स - एट्रिब्यूशन - शेयरलाइक"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/ia.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"McDutchie"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Iste articulo es publicate per ${creator}. Le texto es licentiate sub ${license}. Additional terminos pote applicar se al files multimedial.",
 8 | 	"LAST_EDITED_ON": "Ultime modification le ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Attribution - CompartiSimile"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/id.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Akmaie Ajam"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Artikel ini diterbitkan oleh ${creator}. Teks ini dilisensikan berdasarkan ${license}. Ketentuan tambahan mungkin berlaku untuk berkas media.",
 8 | 	"LAST_EDITED_ON": "Terakhir diubah pada ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Atribusi - Serupa",
10 | 	"DOWNLOAD_ERRORS_HEADING": "Waduh. Artikel tidak ditemukan.",
11 | 	"DOWNLOAD_ERRORS_MESSAGE": "Artikel yang diminta '${articleTitle}' tidak tersedia di dalam ZIM ini, tidak mungkin untuk mengambilnya dari ${server}.",
12 | 	"DOWNLOAD_ERRORS_LINE1_DELETED_ARTICLE": "Artikel ini dihapus setelah kami menyusun daftar artikel yang akan diambil tapi sebelum kami mengambilnya untuk membangun ZIM yang Anda telusuri.",
13 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_500_ERROR": "Saat ZIM yang Anda jelajahi dibuat, server ${server} gagal menyajikan konten HTML artikel ini dan menyatakan kesalahan HTTP 500.",
14 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_504_UPSTREAM_TIMEOUT": "Saat ZIM yang Anda jelajahi sedang dibuat, server ${server} gagal menyajikan konten HTML artikel ini dan menyatakan kesalahan HTTP 504.",
15 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UPSTREAM_TIMEOUT": "Saat ZIM yang Anda jelajahi dibuat, API ActionParse server ${server} gagal menyajikan konten HTML artikel ini dan menyatakan kesalahan HTTP 504.",
16 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_HTML_503_ERROR": "Saat ZIM yang Anda jelajahi dibuat, API ActionParse server ${server} menyatakan kesalahan HTTP 503 saat memberikan detail tentang HTML artikel ini.",
17 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_BAD_REVISION_ERROR": "Saat ZIM yang Anda telusuri dibuat, API ActionParse server ${server} memunculkan kesalahan revisi ID buruk tak terduga saat memberikan detail tentang HTML artikel ini.",
18 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UNREACHABLE_EXCEPTION_ERROR": "Saat ZIM yang Anda telusuri dibuat, API ActionParse server ${server} memunculkan kesalahan pengecualian yang tidak dapat dijangkau saat memberikan detail tentang HTML artikel ini.",
19 | 	"DOWNLOAD_ERRORS_LINE2": "Artikel yang hilang telah digantikan oleh halaman pengganti yang sedang Anda lihat.",
20 | 	"DOWNLOAD_ERRORS_LINE3": "Mari kita berharap masalah server ${server} akan terpecahkan dan versi ZIM berikutnya akan memuat artikel ini."
21 | }
22 | 


--------------------------------------------------------------------------------
/translation/ig.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Accuratecy051",
 5 | 			"Oby Ezeilo"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "Ewepụtara akụkọ a n'aka ${creator}. Enyere ikike ederede n'okpuru ${license}. Usoro mgbakwunye nwere ike itinye maka faịlụ mgbasa ozi.",
 9 | 	"LAST_EDITED_ON": "ụbọchị ikpeazụ edeziri ya bụ  na ${date}",
10 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
11 | }
12 | 


--------------------------------------------------------------------------------
/translation/it.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Beta16",
 5 | 			"Clorofolle"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "Questa voce è stata pubblicata da ${creator}. Il testo è rilasciato in base alla licenza ${license}. Potrebbero essere applicate clausole aggiuntive per i file multimediali.",
 9 | 	"LAST_EDITED_ON": "Ultima modifica il ${date}",
10 | 	"LICENSE_NAME": "Creative Commons Attribuzione-Condividi allo stesso modo",
11 | 	"DOWNLOAD_ERRORS_HEADING": "Oops. Voce non trovata.",
12 | 	"DOWNLOAD_ERRORS_MESSAGE": "La voce richiesta '${articleTitle}' non è disponibile in questo ZIM. Non è stato possibile recuperarla da ${server}.",
13 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_500_ERROR": "Quando è stato creato lo ZIM che stai navigando, il server ${server} non è riuscito a visualizzare il contenuto HTML di questa voce ed ha restituito un errore HTTP 500.",
14 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_504_UPSTREAM_TIMEOUT": "Quando è stato creato lo ZIM che stai navigando, il server ${server} ha registrato un timeout durante il rendering del contenuto HTML di questa voce e ha restituito un errore HTTP 504.",
15 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UPSTREAM_TIMEOUT": "Durante la creazione dello ZIM che stai navigando, l'API ActionParse del server ${server} ha registrato un timeout durante l'elaborazione di questa voce ed ha restituito un errore HTTP 504."
16 | }
17 | 


--------------------------------------------------------------------------------
/translation/kaa.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Inabat Allanova"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Bul maqala ${creator} tárepinen berilgen. Tekst ${license} licenziyası astında licenziyalanǵan. Media fayllarına qosımsha shártler qollanılıwı múmkin.",
 8 | 	"LAST_EDITED_ON": "Aqırǵı márte ${date} sánesinde redaktorlanǵan",
 9 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/ko.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Suleiman the Magnificent Television",
 5 | 			"YeBoy371",
 6 | 			"Ykhwong"
 7 | 		]
 8 | 	},
 9 | 	"DISCLAIMER": "이 기사는 ${creator}에서 발행되었다. 본문은 ${license}에 따라 허가되어 있습니다. 미디어 파일에는 추가 조건이 적용될 수 있습니다.",
10 | 	"LAST_EDITED_ON": "마지막 편집일: ${date}",
11 | 	"LICENSE_NAME": "크리에이티브 커먼즈 - 저작자표시 - 동일조건변경허락"
12 | }
13 | 


--------------------------------------------------------------------------------
/translation/lb.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Volvox"
 5 | 		]
 6 | 	},
 7 | 	"LAST_EDITED_ON": "Fir d'lescht geännert de(n) ${date}",
 8 | 	"DOWNLOAD_ERRORS_HEADING": "Ups. Artikel net fonnt."
 9 | }
10 | 


--------------------------------------------------------------------------------
/translation/mk.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Bjankuloski06"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Оваа статија е дело на ${creator}. Текстот е под лиценцата ${license}. За медиумските податотеки може да важат дополнителни услови.",
 8 | 	"LAST_EDITED_ON": "Последна измена на ${date}",
 9 | 	"LICENSE_NAME": "Криејтив комонс - Наведи извор - Сподели под исти услови"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/nb.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Jon Harald Søby"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Denne artikkelen er utgitt av ${creator}. Teksten er lisensiert under ${license}. Ytterligere vilkår kan gjelde mediefilene.",
 8 | 	"LAST_EDITED_ON": "Sist redigert ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons-Navngivelse-DelPåSammeVilkår"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/nl.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"McDutchie"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Dit artikel is uitgegeven door ${creator}. De tekst is vrijgegeven onder de licentie ${license}. Voor de mediabestanden kunnen aanvullende voorwaarden gelden.",
 8 | 	"LAST_EDITED_ON": "Laatst bewerkt op ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Naamsvermelding - Gelijk delen"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/nqo.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"@metadata": {
3 | 		"authors": [
4 | 			"Lancine.kounfantoh.fofana"
5 | 		]
6 | 	},
7 | 	"LAST_EDITED_ON": "ߡߊ߬ߦߟߍ߬ߡߊ߲߬ߠߌ߲߫ ߟߊ߬ߓߊ߲ ${date} ߟߊ߫"
8 | }
9 | 


--------------------------------------------------------------------------------
/translation/or.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Gouri"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "ଏହି ଲେଖାଟି ${creator}ରୁ ପ୍ରକାଶିତ ହୋଇଛି । ଏହି ଲେଖାଟି ${license} ଅଧୀନରେ ଲାଇସେନ୍ସପ୍ରାପ୍ତ । ମିଡିଆ ଫାଇଲଗୁଡ଼ିକ ପାଇଁ ଅତିରିକ୍ତ ସର୍ତ୍ତ ଲାଗୁ ହୋଇପାରେ ।",
 8 | 	"LAST_EDITED_ON": "${date} ରେ ଶେଷ ଥର ପାଇଁ ସମ୍ପାଦିତ",
 9 | 	"LICENSE_NAME": "କ୍ରିଏଟିଭ୍ କମନ୍ସ - ଆଟ୍ରିବ୍ୟୁସନ୍ - ଶାରିଆଲାଇକ୍"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/pt-br.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Klgor1803",
 5 | 			"Robertpontes"
 6 | 		]
 7 | 	},
 8 | 	"DISCLAIMER": "Esse artigo foi publicado por ${creator}. O texto é licenciado sobre a ${license}. Termos adicionar podem ser aplicados aos arquivos de mídia.",
 9 | 	"LAST_EDITED_ON": "Ultima edição em ${date}",
10 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
11 | }
12 | 


--------------------------------------------------------------------------------
/translation/pt.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"B3rnas"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Este artigo é emitido por ${creator}. O texto é licenciado sob ${license}. Termos adicionais podem ser aplicados aos arquivos de mídia.",
 8 | 	"LAST_EDITED_ON": "Última edição em ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Atribuição-CompartilhaIgual"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/qqq.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Amire80"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Text printed in the footer of each article. With some publishers, \"${creator}\" will be the name of a person who wrote the article; with some others, it will be the publisher name (e.g. \"Wikipedia\").",
 8 | 	"LAST_EDITED_ON": "Hint indicated at which time the article has been updated last time",
 9 | 	"LICENSE_NAME": "Create Commons name localised",
10 | 	"DOWNLOAD_ERRORS_HEADING": "Title of the placeholder HTML when article failed to download.",
11 | 	"DOWNLOAD_ERRORS_MESSAGE": "Main message of the placeholder HTML when article failed to download.",
12 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_500_ERROR": "First details message showed on placeholder HTML when article failed to download and error is of given type",
13 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_504_UPSTREAM_TIMEOUT": "First details message showed on placeholder HTML when article failed to download and error is of given type",
14 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UPSTREAM_TIMEOUT": "First details message showed on placeholder HTML when article failed to download and error is of given type",
15 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_HTML_503_ERROR": "First details message showed on placeholder HTML when article failed to download and error is of given type",
16 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_BAD_REVISION_ERROR": "First details message showed on placeholder HTML when article failed to download and error is of given type",
17 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UNREACHABLE_EXCEPTION_ERROR": "First details message showed on placeholder HTML when article failed to download and error is of given type",
18 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_GENERIC_INTERNAL_API_ERROR": "First details message showed on placeholder HTML when article failed to download and error is of given type",
19 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_TRUNCATED_RESPONSE": "First details message showed on placeholder HTML when article failed to download and error is of given type",
20 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_EMPTY_500_RESPONSE": "First details message showed on placeholder HTML when article failed to download and error is of given type",
21 | 	"DOWNLOAD_ERRORS_LINE2": "Second details message showed on placeholder HTML when article failed to download",
22 | 	"DOWNLOAD_ERRORS_LINE3": "Third details message showed on placeholder HTML when article failed to download"
23 | }
24 | 


--------------------------------------------------------------------------------
/translation/ro.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"MSClaudiu"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Acest articol este emis de la ${creator}. Textul este licențiat sub ${license}. Se pot aplica termeni suplimentari pentru fișierele media.",
 8 | 	"LAST_EDITED_ON": "Ultima editare pe ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/ru.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"@metadata": {
3 | 		"authors": []
4 | 	},
5 | 	"DISCLAIMER": "Эта статья взята у (из) ${creator}. Текст лицензируется по ${license}. К медиа файлам могут применятся дополнительные условия.",
6 | 	"LAST_EDITED_ON": "Последний раз редактировалась ${date}",
7 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
8 | }
9 | 


--------------------------------------------------------------------------------
/translation/sc.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"L2212"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Custu artìculu l'at publicadu ${creator}. Su testu est frunidu suta de sa litzèntzia ${license}. Tèrmines additzionales si diant pòdere aplicare pro sos documentos multimediales.",
 8 | 	"LAST_EDITED_ON": "Ùrtima modìfica su ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Atributzione - Cumpartzi in sa matessi manera"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/scn.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"@metadata": {
3 | 		"authors": [
4 | 			"Ajeje Brazorf"
5 | 		]
6 | 	},
7 | 	"LAST_EDITED_ON": "Ùrtimu canciamentu lu ${date}"
8 | }
9 | 


--------------------------------------------------------------------------------
/translation/sl.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Eleassar"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Ta članek je izdal ${creator}. Besedilo je licencirano pod ${license}. Za predstavnostne datoteke lahko veljajo dodatni pogoji.",
 8 | 	"LAST_EDITED_ON": "Zadnja sprememba ${date}.",
 9 | 	"LICENSE_NAME": "Creative Commons Priznanje avtorstva-Deljenje pod enakimi pogoji"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/sq.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Besnik b"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Ky artikull është botuar nga ${creator}. Teksti licencohet sipar ${license}. Mbi kartelat media mund të ketë kushte shtesë.",
 8 | 	"LAST_EDITED_ON": "Përpunuar së fundi më ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/sv.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Sabelöga"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Den här artikeln kommer från ${creator}. Texten är licensierad under ${license}. Ytterligare termer kan gälla för mediefiler.",
 8 | 	"LAST_EDITED_ON": "Senast redigerad den ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Erkännande - DelaLika"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/sw.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Peggy"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Makala haya yametolewa kutoka ${creator}. Maandishi yamepewa leseni chini ya ${license}. Masharti ya ziada yanaweza kutumika kwa faili za midia.",
 8 | 	"LAST_EDITED_ON": "Ilihaririwa mwisho mnamo ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Ugawaji - Sharealike"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/te.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Rishitha 1238"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "ఈ కథనం ${creator} నుండి జారీ చేయబడింది. వచనం ${license} క్రింద లైసెన్స్ చేయబడింది. మీడియా ఫైల్‌లకు అదనపు నిబంధనలు వర్తించవచ్చు.",
 8 | 	"LAST_EDITED_ON": "${date}న చివరిగా సవరించబడింది",
 9 | 	"LICENSE_NAME": "క్రియేటివ్ కామన్స్ - అట్రిబ్యూషన్ - షేర్అలైక్"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/tn.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Sekhomba"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Padi ye e gatisitswe gotswa ko ${creator}. Mokwalo o gatisitswe kafa tlase ga ${license}. Melawana ya tlaleletsa e ka diragala mo ditseleng tsa media.",
 8 | 	"LAST_EDITED_ON": "E gatisitswe la bofelo ka ${date}",
 9 | 	"LICENSE_NAME": "Creative Commons - Attribution - Sharealike"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/tr.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Hedda"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "Bu makale, ${creator} tarafından yayımlanmıştır. Metin, ${license} kapsamında lisanslanmıştır. Medya dosyaları için ek koşullar geçerli olabilir.",
 8 | 	"LAST_EDITED_ON": "Son düzenleme ${date} tarihinde yapıldı",
 9 | 	"LICENSE_NAME": "Creative Commons - Atıf - Benzer Paylaşım"
10 | }
11 | 


--------------------------------------------------------------------------------
/translation/zh-hans.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"GuoPC",
 5 | 			"IceButBin",
 6 | 			"Prmsh"
 7 | 		]
 8 | 	},
 9 | 	"DISCLAIMER": "本条目由${creator}发布。该条目基于${license}获得许可。附加条款可能适用于媒体文件。",
10 | 	"LAST_EDITED_ON": "最后编辑于：${date}",
11 | 	"LICENSE_NAME": "知识共享署名-相同方式共享",
12 | 	"DOWNLOAD_ERRORS_HEADING": "哎呀！找不到条目。",
13 | 	"DOWNLOAD_ERRORS_MESSAGE": "所请求的条目“${articleTitle}”在此 ZIM 中不可用，无法从 ${server} 检索。",
14 | 	"DOWNLOAD_ERRORS_LINE1_DELETED_ARTICLE": "此条目在待检索条目的列表编译之后、获取此条目以构建您浏览的 ZIM 之前被删除。",
15 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_500_ERROR": "在构建您浏览的 ZIM 时，${server} 服务器无法渲染此条目的 HTML 内容，并返回 HTTP 500 错误。",
16 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_504_UPSTREAM_TIMEOUT": "在构建您浏览的 ZIM 时，${server} 服务器在渲染此条目 HTML 内容时超时，并返回 HTTP 504 错误。",
17 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UPSTREAM_TIMEOUT": "在构建您浏览的 ZIM 时，${server} 服务器的 ActionParse API 在处理此条目时超时，并返回 HTTP 504 错误。",
18 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_HTML_503_ERROR": "在构建您浏览的 ZIM 时，${server} 服务器的 ActionParse API 在提供此关于此条目 HTML 的详细信息时引发 HTTP 503 错误。",
19 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_BAD_REVISION_ERROR": "在构建您浏览的 ZIM 时，${server} 服务器的 ActionParse API 在提供关于此条目 HTML 的详细信息时，引发意外的修订版本 ID 不合法的错误。",
20 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UNREACHABLE_EXCEPTION_ERROR": "在构建您浏览的 ZIM 时，${server} 服务器的 ActionParse API 在提供关于此条目 HTML 的详细信息时引发无法访问的异常错误。",
21 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_TRUNCATED_RESPONSE": "在构建您浏览的 ZIM 时，${server} 服务器的 ActionParse API 未能提供关于此条目 HTML 的详细信息，因为此条目过长。需要在 ${server} 服务器上将此条目拆分为多个相对短的条目。",
22 | 	"DOWNLOAD_ERRORS_LINE2": "缺失的条目已被您当前看到的占位符页面所取代。",
23 | 	"DOWNLOAD_ERRORS_LINE3": "希望 ${server} 服务器上的问题能够得到解决，以及希望下一个版本的 ZIM 能够包含此条目。"
24 | }
25 | 


--------------------------------------------------------------------------------
/translation/zh-hant.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@metadata": {
 3 | 		"authors": [
 4 | 			"Kly"
 5 | 		]
 6 | 	},
 7 | 	"DISCLAIMER": "此條目由${creator}發布。內容文字基於${license}獲得許可。額外條款有機會適用於媒體檔案。",
 8 | 	"LAST_EDITED_ON": "最後編輯於${date}",
 9 | 	"LICENSE_NAME": "創用CC - 姓名標示 ─ 相同方式分享",
10 | 	"DOWNLOAD_ERRORS_HEADING": "哎呀。找不到條目。",
11 | 	"DOWNLOAD_ERRORS_MESSAGE": "所要求的條目「${articleTitle}」在此 ZIM 中不可用，無法從 ${server} 檢索。",
12 | 	"DOWNLOAD_ERRORS_LINE1_DELETED_ARTICLE": "此條目在我們編譯要檢索的條目清單之後，且在我們取得它以建置您正在瀏覽的 ZIM 之前被刪除了。",
13 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_500_ERROR": "當您瀏覽的 ZIM 在建置時，${server} 伺服器會無法呈現該條目的 HTML 內容，並會回傳 HTTP 500 錯誤。",
14 | 	"DOWNLOAD_ERRORS_LINE1_WIKIMEDIA_DESKTOP_API_HTML_504_UPSTREAM_TIMEOUT": "當您瀏覽的 ZIM 在建置時，${server} 伺服器會在呈現該條目的 HTML 內容時逾時，並會回傳 HTTP 504 錯誤。",
15 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UPSTREAM_TIMEOUT": "當您瀏覽的 ZIM 在建置時，${server} 伺服器的 ActionParse API 會在處理該條目時逾時，並會回傳 HTTP 504 錯誤。",
16 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_HTML_503_ERROR": "當您瀏覽的 ZIM 在建置時，${server} 伺服器的 ActionParse API 會在提供關於該條目 HTML 詳細資訊時引發 HTTP 503 錯誤。",
17 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_BAD_REVISION_ERROR": "當您瀏覽的 ZIM 在建置時，${server} 伺服器的 ActionParse API 會在提供關於該條目 HTML 詳細資訊時引發非預期錯誤修訂 ID。",
18 | 	"DOWNLOAD_ERRORS_LINE1_ACTION_PARSE_UNREACHABLE_EXCEPTION_ERROR": "當您瀏覽的 ZIM 在建置時，${server} 伺服器的 ActionParse API 會在提供關於該條目 HTML 詳細資訊時引發無法存取的例外錯誤。",
19 | 	"DOWNLOAD_ERRORS_LINE2": "缺少的條目已被您目前看到的佔位符頁面所取代。",
20 | 	"DOWNLOAD_ERRORS_LINE3": "希望 ${server} 伺服器上的問題可以解決，以及希望我們的下一個版本的 ZIM 將包含此條目。"
21 | }
22 | 


--------------------------------------------------------------------------------
/tsconfig.build.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": "./tsconfig.json",
3 |     "include": [
4 |         "src/**/*"
5 |     ]
6 | }


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "isolatedModules": true,
 7 |     "experimentalDecorators": true,
 8 |     "emitDecoratorMetadata": true,
 9 |     "strictNullChecks": false,
10 |     "noImplicitAny": false,
11 |     "sourceMap": true,
12 |     "declaration": true,
13 |     "baseUrl": ".",
14 |     "outDir": "./lib/",
15 |     "resolveJsonModule": true,
16 |     "esModuleInterop": true,
17 |     "allowSyntheticDefaultImports": true,
18 |     "types": [
19 |       "node", "jest"
20 |     ],
21 |     "typeRoots": [
22 |       "node_modules/@types/",
23 |       "./src/"
24 |     ],
25 |     "lib": [
26 |       "ES2022.Intl", "DOM"
27 |     ],
28 |     "plugins": [
29 |       {
30 |         "name": "typescript-tslint-plugin",
31 |         "ignoreDefinitionFiles": false
32 |       }
33 |     ],
34 |     "paths": {
35 |       "#test*": [
36 |         "test/*"
37 |       ]
38 |     }
39 |   },
40 |   "include": ["src/**/*", "test/**/*"],
41 |   "exclude": [
42 |     "node_modules",
43 |     "bin"
44 |   ]
45 | }
46 | 


--------------------------------------------------------------------------------