├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── build.rs
├── docs
    ├── exclusion.md
    └── parser.md
├── examples
    ├── docker-ce.yaml
    ├── mysql-repo.yaml
    ├── node.yaml
    ├── openresty.yaml
    ├── proxmox.yaml
    ├── python.yaml
    ├── vyos.yaml
    ├── wine-builds.yaml
    └── zerotier.yaml
├── fixtures
    ├── artifactrepo
    │   ├── 10
    │   │   └── index.html
    │   └── index.html
    ├── bmclapi
    │   └── index.html
    ├── buildroot
    │   ├── acl
    │   │   └── index.html
    │   ├── index.html
    │   └── mimalloc
    │   │   ├── build.html
    │   │   ├── index.html
    │   │   ├── test
    │   │       └── test
    │   │   └── using.html
    ├── caddy-symlink
    │   └── index.html
    ├── clickhouse
    │   ├── clickhouse-client
    │   │   └── index.html
    │   ├── index.html
    │   └── stable
    │   │   ├── index.html
    │   │   └── index2.html
    ├── docker
    │   ├── armv7l
    │   │   └── index.html
    │   └── index.html
    ├── ghettoforge
    │   └── index.html
    ├── gradle
    │   └── index.html
    ├── grml
    │   └── index.html
    ├── loongnix
    │   └── index.html
    ├── misc
    │   └── 1
    │   │   └── index.html
    ├── monitoring-plugins
    │   └── index.html
    ├── mozilla
    │   ├── OJI
    │   │   └── index.html
    │   └── index.html
    ├── mysql
    │   └── index.html
    ├── nodejs
    │   └── v4.9.1
    │   │   └── index.html
    ├── proxmox
    │   └── index.html
    ├── raspberrypi
    │   └── index.html
    ├── sdumirror-ubuntu
    │   └── index.html
    ├── start_fileserver.sh
    ├── vscode
    │   └── index.html
    ├── vyos
    │   ├── index.html
    │   └── vyos-accel-ppp
    │   │   └── index.html
    ├── wine-builds
    │   └── index.html
    └── zabbix
    │   └── index.html
└── src
    ├── bar.rs
    ├── cli
        ├── list.rs
        ├── mod.rs
        └── sync.rs
    ├── compare.rs
    ├── extensions
        ├── apt.rs
        ├── mod.rs
        └── yum.rs
    ├── listing.rs
    ├── main.rs
    ├── parser
        ├── apache_f2.rs
        ├── caddy.rs
        ├── denoflare_r2.rs
        ├── directory_lister.rs
        ├── docker.rs
        ├── fallback.rs
        ├── fancyindex.rs
        ├── gradle.rs
        ├── lighttpd.rs
        ├── mod.rs
        └── nginx.rs
    ├── regex_manager
        ├── mod.rs
        ├── v1.rs
        └── v2.rs
    ├── timezone.rs
    └── utils.rs


/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   - push
 5 |   - pull_request
 6 | 
 7 | env:
 8 |   CARGO_TERM_COLOR: always
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v4
15 |       with:
16 |         submodules: 'recursive'
17 |     - name: Generate Cargo.toml.cache (Ignore version=)
18 |       run: |
19 |         sed '/^version = /d' Cargo.toml > Cargo.toml.cache
20 |     - uses: actions/cache@v4
21 |       with:
22 |         path: |
23 |           ~/.cargo/registry/index
24 |           ~/.cargo/registry/cache
25 |           ~/.cargo/git
26 |           target
27 |         key: musl-cargo-${{ hashFiles('./Cargo.toml.cache') }}
28 |     - name: Run fileserver
29 |       run: |
30 |         fixtures/start_fileserver.sh &
31 | 
32 |     - name: Remove files for a build with correct version info
33 |       run: |
34 |         find target/ -name 'shadow*' -exec rm -r {} + || true
35 |         find target/ -name 'tsumugu' -delete || true
36 |     - name: Test & Compile
37 |       run: |
38 |         mkdir -p ~/.cargo/{git,registry}
39 |         # Fix git permission issue with Docker and shadow-rs
40 |         sudo chown -R root .
41 |         docker run --rm -t \
42 |             --mount type=bind,source=${{ github.workspace }},target=/volume \
43 |             --mount type=bind,source=$HOME/.cargo/registry,target=/root/.cargo/registry \
44 |             --mount type=bind,source=$HOME/.cargo/git,target=/root/.cargo/git \
45 |             --network=host \
46 |             clux/muslrust:stable \
47 |             cargo test
48 |         docker run --rm -t \
49 |             --mount type=bind,source=${{ github.workspace }},target=/volume \
50 |             --mount type=bind,source=$HOME/.cargo/registry,target=/root/.cargo/registry \
51 |             --mount type=bind,source=$HOME/.cargo/git,target=/root/.cargo/git \
52 |             --network=host \
53 |             clux/muslrust:stable \
54 |             cargo build --release
55 |         sudo chown -R runner ~/.cargo/
56 |         sudo chown -R runner .
57 |         # show version info
58 |         RUST_LOG=debug target/x86_64-unknown-linux-musl/release/tsumugu --version
59 | 
60 |     - name: Deploy - Create and Upload Release
61 |       if: startsWith(github.ref, 'refs/tags/')
62 |       uses: ncipollo/release-action@v1
63 |       with:
64 |         artifacts: target/x86_64-unknown-linux-musl/release/tsumugu
65 |     - name: Release to crates.io
66 |       if: startsWith(github.ref, 'refs/tags/')
67 |       uses: katyo/publish-crates@v2
68 |       with:
69 |         registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.toml.cache
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tsumugu"
 3 | version = "0.20250422.0"
 4 | edition = "2021"
 5 | description = "A HTTP(S) syncing tool with lower overhead, for OSS mirrors"
 6 | license = "MIT"
 7 | repository = "https://github.com/taoky/tsumugu"
 8 | 
 9 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
10 | 
11 | [dependencies]
12 | anyhow = { version = "1.0.71", features = ["backtrace"] }
13 | chrono = { version = "0.4.26", default-features = false, features = ["clock"] }
14 | clap = { version = "4.3.12", features = ["derive"] }
15 | regex = "1.9.1"
16 | reqwest = { version = "0.12.9", features = ["stream", "gzip", "deflate", "brotli", "socks"] }
17 | scraper = "0.23.1"
18 | url = "2.5.4"
19 | tracing = "0.1"
20 | tracing-subscriber = { version = "0.3", features = ["env-filter"] }
21 | filetime = "0.2.21"
22 | crossbeam-deque = "0.8.3"
23 | walkdir = "2.3.3"
24 | tokio = { version = "1.43.1", features = ["rt-multi-thread"] }
25 | kyuri = "0.2.0"
26 | futures-util = "0.3.31"
27 | humansize = "2.1.3"
28 | apt-parser = "1.0.0"
29 | flate2 = "1.0.28"
30 | shadow-rs = "0.32.0"
31 | thiserror = "1.0.63"
32 | percent-encoding = "2.3.1"
33 | 
34 | [build-dependencies]
35 | shadow-rs = "0.32.0"
36 | 
37 | [dev-dependencies]
38 | test-log = { version = "0.2.14", default-features = false, features = ["trace"] }
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 taoky
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: check release
 2 | 
 3 | check:
 4 | 	cargo fmt --check
 5 | 	cargo clippy
 6 | 	cargo test
 7 | 
 8 | release:
 9 | ifndef version
10 | 	$(error version is not set. Usage: make release version=<version> msg="<msg>")
11 | endif
12 | ifndef msg
13 | 	$(error msg is not set. Usage: make release version=<version> msg="<msg>")
14 | endif
15 | 	@full_version=$(shell echo $(version) | grep -q '\.' && echo "0.$(version)" || echo "0.$(version).0"); \
16 | 	echo $$full_version; \
17 | 	cargo set-version $$full_version; \
18 | 	git commit -a -m "Bump version to $$full_version" ; \
19 | 	git tag $(version) -m "$(msg)"
20 | 	echo "Run 'git push' and 'git push --tag' afterwards."
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tsumugu
  2 | 
  3 | A HTTP(S) syncing tool with lower overhead, for OSS mirrors.
  4 | 
  5 | Instead of `HEAD`ing every single file, tsumugu parses directory listing HTML and downloads only files that do not seem to be up-to-date.
  6 | 
  7 | ## Design goals
  8 | 
  9 | To successfully sync from these domains, where lftp/rclone fails or finds difficulties:
 10 | 
 11 | - [x] http://download.proxmox.com/
 12 | - [x] https://download.docker.com/
 13 | - [x] https://dl.winehq.org/wine-builds/
 14 | 
 15 | ## TODOs
 16 | 
 17 | - [x] Add "--include": Sync even if the file is excluded by `--exclude` regex.
 18 | - [x] Add supported Debian, Ubuntu, Fedora and RHEL versions support to `--include` regex.
 19 |   - Something like `--include debian/${DEBIAN_VERSIONS}`?
 20 | - [x] Check for APT/YUM repo integrity (avoid keeping old invalid metadata files)
 21 |   - (This is experimental and may not work well)
 22 | 
 23 | ## Usage
 24 | 
 25 | ```console
 26 | > ./tsumugu --help
 27 | A HTTP(S) syncing tool with lower overhead, for OSS mirrors
 28 | 
 29 | Usage: tsumugu <COMMAND>
 30 | 
 31 | Commands:
 32 |   sync  Sync files from upstream to local
 33 |   list  List files from upstream
 34 |   help  Print this message or the help of the given subcommand(s)
 35 | 
 36 | Options:
 37 |   -h, --help     Print help
 38 |   -V, --version  Print version
 39 | > ./tsumugu sync --help
 40 | Sync files from upstream to local
 41 | 
 42 | Usage: tsumugu sync [OPTIONS] <UPSTREAM> <LOCAL>
 43 | 
 44 | Arguments:
 45 |   <UPSTREAM>  The upstream URL
 46 |   <LOCAL>     The local directory
 47 | 
 48 | Options:
 49 |       --user-agent <USER_AGENT>
 50 |           Customize tsumugu's user agent [default: tsumugu]
 51 |       --dry-run
 52 |           Do not download files and cleanup
 53 |       --threads <THREADS>
 54 |           Threads at work [default: 2]
 55 |       --no-delete
 56 |           Do not clean up after sync
 57 |       --max-delete <MAX_DELETE>
 58 |           Set max delete count [default: 100]
 59 |       --timezone-file <TIMEZONE_FILE>
 60 |           You can set a valid URL for guessing. Set it to "no" to disable this behavior. By default it would recursively find the first file to HEAD for guessing
 61 |       --timezone <TIMEZONE>
 62 |           Manually set timezone (+- hrs). This overrides timezone_file
 63 |       --retry <RETRY>
 64 |           Retry count for each request [default: 3]
 65 |       --head-before-get
 66 |           Do an HEAD before actual GET. Otherwise when head-before-get and allow-time-from-parser are not set, when GETting tsumugu would try checking if we still need to download it
 67 |       --parser <PARSER>
 68 |           Choose a main parser [default: nginx] [possible values: nginx, apache-f2, docker, directory-lister, lighttpd, caddy, fancy-index, gradle, fallback]
 69 |       --parser-match <PARSER_MATCH>
 70 |           Choose supplementary parsers. Format: "parsername:matchpattern". matchpattern is a relative path regex. Supports multiple
 71 |       --exclude <EXCLUDE>
 72 |           Excluded relative path regex. Supports multiple
 73 |       --include <INCLUDE>
 74 |           Included relative path regex (even if excluded). Supports multiple
 75 |       --skip-if-exists <SKIP_IF_EXISTS>
 76 |           Skip relative path regex if they exist. Supports multiple
 77 |       --compare-size-only <COMPARE_SIZE_ONLY>
 78 |           Relative path regex for those compare size only **after** HEAD (head_before_get on) or GET (head_before_get off)
 79 |       --trust-mtime-from-parser
 80 |           Allow mtime from parser if not available from HTTP headers [aliases: allow-mtime-from-parser]
 81 |       --apt-packages
 82 |           (Experimental) APT Packages file parser to find out missing packages
 83 |       --yum-packages
 84 |           (Experimental) YUM Packages file parser to find out missing packages
 85 |       --ignore-nonexist
 86 |           Ignore 404 NOT FOUND as error when downloading files
 87 |       --auto-fallback
 88 |           Allow automatically choose fallback parser when ParseError occurred
 89 |       --header <HEADER>
 90 |           Custom header for HTTP(S) requests in format "Headerkey: headervalue". Supports multiple
 91 |       --exclusion-v2
 92 |           The exclusion v2 mode. To keep compatibility, this is off by default
 93 |   -h, --help
 94 |           Print help
 95 |   -V, --version
 96 |           Print version
 97 | > ./tsumugu list --help
 98 | List files from upstream
 99 | 
100 | Usage: tsumugu list [OPTIONS] <UPSTREAM>
101 | 
102 | Arguments:
103 |   <UPSTREAM>  The upstream URL
104 | 
105 | Options:
106 |       --user-agent <USER_AGENT>        Customize tsumugu's user agent [default: tsumugu]
107 |       --parser <PARSER>                Choose a main parser [default: nginx] [possible values: nginx, apache-f2, docker, directory-lister, lighttpd, caddy, fancy-index, gradle, fallback]
108 |       --exclude <EXCLUDE>              Excluded relative path regex. Supports multiple
109 |       --include <INCLUDE>              Included relative path regex (even if excluded). Supports multiple
110 |       --upstream-base <UPSTREAM_BASE>  The upstream base starting with "/" [default: /]
111 |       --header <HEADER>                Custom header for HTTP(S) requests in format "Headerkey: headervalue". Supports multiple
112 |       --exclusion-v2                   The exclusion v2 mode. To keep compatibility, this is off by default
113 |   -h, --help                           Print help
114 |   -V, --version                        Print version
115 | ```
116 | 
117 | For a very brief introduction of parser, see [./docs/parser.md](./docs/parser.md).
118 | 
119 | ## Exit code
120 | 
121 | - 0: Success
122 | - 1: Failed to list
123 | - 2: Failed to download
124 | - 3: A panic!() occurred
125 | - 4: Error when cleaning up
126 | - 25: The limit stopped deletions
127 | 
128 | ## Building with musl
129 | 
130 | Unfortunately, this requires openssl-sys, which is not included in cross's prebuilt images. Try https://github.com/clux/muslrust.
131 | 
132 | ## Evaluation
133 | 
134 | Default concurrency is 2 threads.
135 | 
136 | (Note: Please see [examples](./examples/) for latest commands to sync.)
137 | 
138 | ### http://download.proxmox.com/
139 | 
140 | Proxmox uses a self-hosted CDN server architecture, and unfortunately its server limits concurrency to only 1 (as far as I could test). With traditional lftp/rclone it could take > 10 hours to sync once (even when your local files are identical with remote ones).
141 | 
142 | Note: Consider using [Proxmox Offline Mirror](https://pom.proxmox.com/) or other tools like `apt-mirror` if you only need its APT repository.
143 | 
144 | ```console
145 | > time ./tsumugu sync --threads 1 --dry-run --exclude '^temp' http://download.proxmox.com/ /srv/repo/proxmox/
146 | ...
147 | 
148 | real	1m48.746s
149 | user	0m3.468s
150 | sys	0m3.385s
151 | ```
152 | 
153 | ### https://download.docker.com/
154 | 
155 | We use [a special script](https://github.com/ustclug/ustcmirror-images/blob/master/docker-ce/tunasync/sync.py) for syncing docker-ce before, but tsumugu can also handle this now. And also, for 30x inside linux/centos/ and linux/rhel/, tsumugu could create symlinks as what this script do before.
156 | 
157 | ```console
158 | > time ./tsumugu sync --timezone-file https://download.docker.com/linux/centos/docker-ce-staging.repo --parser docker --dry-run https://download.docker.com/ /srv/repo/docker-ce/
159 | ...
160 | 
161 | real	8m32.674s
162 | user	0m4.532s
163 | sys	0m2.855s
164 | ```
165 | 
166 | ### https://dl.winehq.org/wine-builds/
167 | 
168 | lftp/rclone fails to handle complex HTML.
169 | 
170 | ```console
171 | > time ./tsumugu sync --parser apache-f2 --dry-run --exclude '^mageia' --exclude '^macosx' --exclude '^debian' --exclude '^ubuntu' --exclude '^fedora' --include '^debian/dists/${DEBIAN_CURRENT}' --include '^ubuntu/dists/${UBUNTU_LTS}' --include '^fedora/${FEDORA_CURRENT}' https://dl.winehq.org/wine-builds/ /srv/repo/wine/wine-builds/
172 | ...
173 | 
174 | <TIMESTAMP>  INFO ThreadId(01) tsumugu: (Estimated) Total objects: 17514, total size: 342.28 GiB
175 | 
176 | real	0m5.664s
177 | user	0m1.475s
178 | sys	0m0.294s
179 | ```
180 | 
181 | ## Notes
182 | 
183 | ### Yuki integration
184 | 
185 | See <https://github.com/ustclug/ustcmirror-images#tsumugu>.
186 | 
187 | YAML example:
188 | 
189 | ```yaml
190 | envs:
191 |   UPSTREAM: http://download.proxmox.com/
192 |   TSUMUGU_EXCLUDE: --exclude ^temp --exclude pmg/dists/.+changelog$ --exclude devel/dists/.+changelog$
193 |   TSUMUGU_TIMEZONEFILE: http://download.proxmox.com/images/aplinfo.dat
194 |   TSUMUGU_THREADS: 1
195 | image: ustcmirror/tsumugu:latest
196 | interval: 12 3 * * *
197 | logRotCycle: 10
198 | name: proxmox
199 | storageDir: /srv/repo/proxmox/
200 | ```
201 | 
202 | More examples in [examples/](./examples/).
203 | 
204 | ### Regex variables
205 | 
206 | See [./src/regex_manager/mod.rs](./src/regex_manager/mod.rs).
207 | 
208 | ### Exclusion and inclusion
209 | 
210 | **There's a breaking change since 20240902. User regexes with `^` and `$` would be affected.**
211 | 
212 | See [./docs/exclusion.md](./docs/exclusion.md).
213 | 
214 | ### Deduplication
215 | 
216 | Tsumugu relies on local file size and mtime to check if file shall be downloaded. Some file-level deduplicators like [jdupes](https://codeberg.org/jbruchon/jdupes) would ignore file mtime when deduplicating with hard links. This could be an issue for some repos, as some files would be redownloaded again and again every time as it does not have a correct mtime locally.
217 | 
218 | Workarounds:
219 | 
220 | - Set `--compare-size-only`.
221 | - Use filesystem-level/block-level deduplication like `zfs dedup`.
222 | - Use another file-level deduplicator which considers mtime (though I don't know which would do this).
223 | 
224 | Also, if you are sure that some directory is identical with another, you could manually create a symlink for that. Tsumugu would ignore symlinks during syncing.
225 | 
226 | ## Acknowledgements
227 | 
228 | Special thanks to [NJU Mirror](https://mirrors.nju.edu.cn/) for extensive testing and bug reporting.
229 | 
230 | ## Naming
231 | 
232 | The name "tsumugu", and current branch name "pudding", are derived from the manga *A Drift Girl and a Noble Moon*.
233 | 
234 | <details>
235 | <summary>And...</summary>
236 | <a href="https://github.com/taoky/paintings/blob/master/tsumugu_github_comic_20230721.png"><img alt="tsumugu, drawn as simplified version of hitori" src="https://github.com/taoky/paintings/blob/master/tsumugu_github_comic_20230721.png?raw=true"></img></a>
237 | 
238 | Tsumugu in the appearance of a very simplified version of Hitori (Obviously I am not very good at drawing though).
239 | </details>
240 | 
241 | Old (2020), unfinished golang version is named as "traverse", under the `main-old` branch.
242 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
1 | fn main() -> shadow_rs::SdResult<()> {
2 |     shadow_rs::new()
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/exclusion.md:
--------------------------------------------------------------------------------
  1 | # Tsumugu exclusion/inclusion logic and rules
  2 | 
  3 | ## v2
  4 | 
  5 | v2 is a breaking but much simpler change. It is based on two simple rules:
  6 | 
  7 | - Match excludes and includes by their order in argv.
  8 | - If nothing is matched, include.
  9 | 
 10 | You need `--exclusion-v2` to enable this new behavior.
 11 | 
 12 | > [!TIP]
 13 | > To include `/a/b/c/d`, `/`, `/a/`, `/a/b/`, `/a/b/c/` and `/a/b/c/d` shall all be included.
 14 | 
 15 | ## v1
 16 | 
 17 | Currently tsumugu follows a simple algorithm to determine whether a path should be completely excluded, partially excluded, or included:
 18 | 
 19 | 0. When parsing regex, a `rev_inner` regex will be generated by replacing variables (`${UBUNTU_LTS}`, etc.) to `(?<distro_ver>.+)` (aka, match everything). The `rev_inner` would be used like this:
 20 | 
 21 |     ```rust
 22 |     pub fn is_others_match(&self, text: &str) -> bool {
 23 |         !self.inner.is_match(text) && self.rev_inner.is_match(text)
 24 |     }
 25 |     ```
 26 | 
 27 | 1. First, users' exclusions and inclusions are preprocessed. For all **exclusions, if it is a prefix of any inclusion**, it will be put into the `list_only_regexes`, otherwise it will be put into `instant_stop_regexes`. All inclusions are in `include_regexes`.
 28 | 2. While working threads are handling listing requests:
 29 |     1. Check with `instant_stop_regexes` and `include_regexes`:
 30 | 
 31 |         ```rust
 32 |         for regex in &self.instant_stop_regexes {
 33 |             if regex.is_match(text) {
 34 |                 return Comparison::Stop;
 35 |             }
 36 |         }
 37 |         for regex in &self.include_regexes {
 38 |             if regex.is_match(text) {
 39 |                 return Comparison::Ok;
 40 |             }
 41 |         }
 42 |         ```
 43 | 
 44 |     2. Then, the path will be checked with `rev_inner` regex by `is_others_match()`, and also completely excluded if matches (a fast shortcut).
 45 | 
 46 |        This is used for cases like Fedora -- it has many versions (currently from 1 to 40). Listing other version folders not in `${FEDORA_CURRENT}` is a waste of time and network. With this trick we could skip these unmatched versions.
 47 |     3. Finally, if the path matches `list_only_regexes`, files under this directory will be ignored (unless they are matched by `include_regexes`), but subdirectories will still be listed. Paths that are not matched by any regexes will be included as usual.
 48 | 
 49 | In this process some paths, which would be unnecessary, will still be listed. However, this logic suits needs of filtering OS versions well.
 50 | 
 51 | ## Relative path
 52 | 
 53 | Also note that currently, this is used when generating relative path for comparison:
 54 | 
 55 | ```rust
 56 | pub fn relative_to_str(relative: &[String], filename: Option<&str>) -> String {
 57 |     let mut r = relative.join("/");
 58 |     if r.starts_with('/') {
 59 |         warn!("unexpected / at the beginning of relative ({r})");
 60 |     } else {
 61 |         r.insert(0, '/');
 62 |     }
 63 |     if r.len() != 1 {
 64 |         if r.ends_with('/') {
 65 |             warn!("unexpected / at the end of relative ({r})")
 66 |         } else {
 67 |             r.push('/')
 68 |         }
 69 |     }
 70 | 
 71 |     // here r already has / at the end
 72 |     match filename {
 73 |         None => r,
 74 |         Some(filename) => {
 75 |             assert!(!filename.starts_with('/') && !filename.ends_with('/'));
 76 |             format!("{}{}", r, filename)
 77 |         }
 78 |     }
 79 | }
 80 | ```
 81 | 
 82 | As a result:
 83 | 
 84 | 1. All relative paths for comparison have "/" at front.
 85 | 2. Directory paths have "/" at back, and files don't.
 86 | 
 87 | Examples:
 88 | 
 89 | 1. `http://example.com/file` => `/file`
 90 | 2. `http://example.com/dir` => `/dir/`
 91 | 3. `http://example.com/dir/file` => `/dir/file`
 92 | 
 93 | Not that for compatibilities considerations, this trick is done: User regex which starts with `^` and not `^/`, would be replaced: `^` -> `^/` (this might break some very rare regexes).
 94 | 
 95 | So you could **write `/something$` to exclude ALL files and directories with name `something`**, instead of using 2 regexes (`^something$` and `/something$`, to match `something` at root and others not in root).
 96 | 
 97 | And also, `upstream` itself is NOT included when comparing. So if your upstream is set to `https://some.example.com/dir/`, you need to exclude `^something/` to exclude `https://some.example.com/dir/something/` instead of `^dir/something/`.
 98 | 
 99 | Test with [tsumugu list](./parser.md#debugging), if in doubt.
100 | 


--------------------------------------------------------------------------------
/docs/parser.md:
--------------------------------------------------------------------------------
  1 | # Parsers of tsumugu
  2 | 
  3 | This is a list of parsers that tsumugu supports:
  4 | 
  5 | - apache_f2: [Apache2's autoindex](https://httpd.apache.org/docs/2.4/mod/mod_autoindex.html) with HTMLTable FancyIndexed list (`F=2`).
  6 | - directory_lister: [Directory Lister](https://www.directorylister.com/).
  7 | - docker: A specialized parser for <https://download.docker.com/>.
  8 | - lighttpd: [lighttpd's mod_dirlisting](https://redmine.lighttpd.net/projects/lighttpd/wiki/Docs_ModDirlisting).
  9 | - nginx: [Nginx's autoindex](https://nginx.org/en/docs/http/ngx_http_autoindex_module.html). It should also work with Apache2's autoindex `F=1` mode.
 10 | - caddy: [Caddy's file_server](https://caddyserver.com/docs/caddyfile/directives/file_server).
 11 | - fancyindex: [Nginx fancyindex](https://github.com/aperezdc/ngx-fancyindex).
 12 | - gradle: A specialized parser for <https://services.gradle.org/distributions/>, might suitable for other websites like this:
 13 | 
 14 |     ```html
 15 |     <li>
 16 |     <a href="/distributions/gradle-8.10-wrapper.jar.sha256"><img src="/images/file.gif">
 17 |     <span class="name">gradle-8.10-wrapper.jar.sha256</span>
 18 |     <span class="date">14-Aug-2024 11:18 +0000</span>
 19 |     <span class="size">64.00B</span>
 20 |     </a>
 21 |     </li>
 22 |     ```
 23 | 
 24 | - denoflare-r2: Specialized parser for <https://github.com/skymethod/denoflare/blob/2e89fb33972a924dd9c5078bb2b2834a1f619081/examples/r2-public-read-worker/listing.ts>.
 25 | - fallback: An inefficient fallback parser for `index.htm(l)` which is NOT a file listing:
 26 | 
 27 |     ```rust
 28 |     // An inefficient fallback parser only for non-listing HTML.
 29 |     // Limitations:
 30 |     // 1. It requires /index.html or /index.htm available.
 31 |     // Parser cannot write to disk, so index file would be accessed twice during sync.
 32 |     // 2. Currently it ignores files in directories.
 33 |     // For example, it recognizes "static/css.css" as contains a "static" directory only.
 34 |     // If "static/" is inaccessible, "static/css.css" would NOT be synced.
 35 |     // In future it might be implemented when we have another parser returning a full file tree.
 36 |     // 3. It would always try HEAD to confirm existence and get file mtime & size. Items with 403/404 code would be ignored.
 37 |     // 4. It does not try parse other html files.
 38 |     // 5. It only looks for <a>. <img>, <script> and other tags are ignored.
 39 | 
 40 |     // Remember that tsumugu is NOT a nice tools when upstream does NOT show its file with size & mtime in HTML.
 41 |     // This parser shall be used only as a supplementary parser.
 42 |     ```
 43 | 
 44 | You could also check every parser's testing code and corresponding HTML files in `fixtures/` to get ideas of what every parser could parse.
 45 | 
 46 | ## Debugging
 47 | 
 48 | You could use `tsumugu list` to help you debug the parser (and behavior of [exclusion/inclusion](./exclusion.md)).
 49 | 
 50 | For `--upstream-base`, if your upstream is like `https://some.example.com/`, it would be just `/` (default value). Otherwise if upstream is `https://some.example.com/somedir/`, then it would be `/somedir/` (or `/somedir`). `upstream_base` is used to show if an item would be included/excluded if `--exclude` or `--include` is set.
 51 | 
 52 | Example 1:
 53 | 
 54 | ```console
 55 | $ ./tsumugu list --parser lighttpd --exclude /edk2/git/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/ --upstream-base / https://sources.buildroot.net/edk2/git/MdeModulePkg/Universal/RegularExpressionDxe/
 56 | Relative: /edk2/git/MdeModulePkg/Universal/RegularExpressionDxe/
 57 | Exclusion: Ok
 58 | https://sources.buildroot.net/edk2/git/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/ Directory (none) 2023-09-07 20:21:46 oniguruma (stop)
 59 | https://sources.buildroot.net/edk2/git/MdeModulePkg/Universal/RegularExpressionDxe/OnigurumaUefiPort.c File 2.9 K 2023-09-07 20:21:19 OnigurumaUefiPort.c
 60 | https://sources.buildroot.net/edk2/git/MdeModulePkg/Universal/RegularExpressionDxe/OnigurumaUefiPort.h File 3 K 2023-09-07 20:21:19 OnigurumaUefiPort.h
 61 | ```
 62 | 
 63 | Example 2:
 64 | 
 65 | ```console
 66 | $ ./tsumugu list --parser apache-f2 --exclude ^fedora --upstream-base /wine-builds/ https://dl.winehq.org/wine-builds/fedora/
 67 | Relative: /fedora/
 68 | Exclusion: Stop
 69 | 2024-09-01T18:47:29.966453Z  WARN ThreadId(01) tsumugu::cli::list: This listing would NOT be accessed at all.
 70 | https://dl.winehq.org/wine-builds/fedora/24/ Directory (none) 2018-12-16 15:16:00 24 (stop)
 71 | https://dl.winehq.org/wine-builds/fedora/25/ Directory (none) 2018-12-16 15:18:00 25 (stop)
 72 | https://dl.winehq.org/wine-builds/fedora/26/ Directory (none) 2018-12-16 15:19:00 26 (stop)
 73 | https://dl.winehq.org/wine-builds/fedora/27/ Directory (none) 2019-01-23 04:46:00 27 (stop)
 74 | https://dl.winehq.org/wine-builds/fedora/28/ Directory (none) 2019-05-16 03:14:00 28 (stop)
 75 | https://dl.winehq.org/wine-builds/fedora/29/ Directory (none) 2019-11-30 14:44:00 29 (stop)
 76 | https://dl.winehq.org/wine-builds/fedora/30/ Directory (none) 2020-05-09 12:33:00 30 (stop)
 77 | https://dl.winehq.org/wine-builds/fedora/31/ Directory (none) 2020-11-11 03:11:00 31 (stop)
 78 | https://dl.winehq.org/wine-builds/fedora/32/ Directory (none) 2021-05-08 10:55:00 32 (stop)
 79 | https://dl.winehq.org/wine-builds/fedora/33/ Directory (none) 2021-10-27 14:25:00 33 (stop)
 80 | https://dl.winehq.org/wine-builds/fedora/34/ Directory (none) 2022-05-21 16:41:00 34 (stop)
 81 | https://dl.winehq.org/wine-builds/fedora/35/ Directory (none) 2022-11-14 04:31:00 35 (stop)
 82 | https://dl.winehq.org/wine-builds/fedora/36/ Directory (none) 2023-04-30 09:55:00 36 (stop)
 83 | https://dl.winehq.org/wine-builds/fedora/37/ Directory (none) 2023-11-25 10:34:00 37 (stop)
 84 | https://dl.winehq.org/wine-builds/fedora/38/ Directory (none) 2024-05-04 12:55:00 38 (stop)
 85 | https://dl.winehq.org/wine-builds/fedora/39/ Directory (none) 2024-07-29 03:48:00 39 (stop)
 86 | https://dl.winehq.org/wine-builds/fedora/40/ Directory (none) 2024-07-29 03:49:00 40 (stop)
 87 | $ ./tsumugu list --parser apache-f2 --exclude ^fedora --include '^fedora/${FEDORA_CURRENT}' --upstream-base /wine-builds/ https://dl.winehq.org/wine-builds/fedora/
 88 | Relative: /fedora/
 89 | Exclusion: ListOnly
 90 | https://dl.winehq.org/wine-builds/fedora/24/ Directory (none) 2018-12-16 15:16:00 24 (stop)
 91 | https://dl.winehq.org/wine-builds/fedora/25/ Directory (none) 2018-12-16 15:18:00 25 (stop)
 92 | https://dl.winehq.org/wine-builds/fedora/26/ Directory (none) 2018-12-16 15:19:00 26 (stop)
 93 | https://dl.winehq.org/wine-builds/fedora/27/ Directory (none) 2019-01-23 04:46:00 27 (stop)
 94 | https://dl.winehq.org/wine-builds/fedora/28/ Directory (none) 2019-05-16 03:14:00 28 (stop)
 95 | https://dl.winehq.org/wine-builds/fedora/29/ Directory (none) 2019-11-30 14:44:00 29 (stop)
 96 | https://dl.winehq.org/wine-builds/fedora/30/ Directory (none) 2020-05-09 12:33:00 30 (stop)
 97 | https://dl.winehq.org/wine-builds/fedora/31/ Directory (none) 2020-11-11 03:11:00 31 (stop)
 98 | https://dl.winehq.org/wine-builds/fedora/32/ Directory (none) 2021-05-08 10:55:00 32 (stop)
 99 | https://dl.winehq.org/wine-builds/fedora/33/ Directory (none) 2021-10-27 14:25:00 33 (stop)
100 | https://dl.winehq.org/wine-builds/fedora/34/ Directory (none) 2022-05-21 16:41:00 34 (stop)
101 | https://dl.winehq.org/wine-builds/fedora/35/ Directory (none) 2022-11-14 04:31:00 35 (stop)
102 | https://dl.winehq.org/wine-builds/fedora/36/ Directory (none) 2023-04-30 09:55:00 36 (stop)
103 | https://dl.winehq.org/wine-builds/fedora/37/ Directory (none) 2023-11-25 10:34:00 37 (stop)
104 | https://dl.winehq.org/wine-builds/fedora/38/ Directory (none) 2024-05-04 12:55:00 38 (stop)
105 | https://dl.winehq.org/wine-builds/fedora/39/ Directory (none) 2024-07-29 03:48:00 39
106 | https://dl.winehq.org/wine-builds/fedora/40/ Directory (none) 2024-07-29 03:49:00 40
107 | ```
108 | 


--------------------------------------------------------------------------------
/examples/docker-ce.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: https://download.docker.com/
 3 |   TSUMUGU_PARSER: docker
 4 |   TSUMUGU_EXTRA: >
 5 |     --timezone 0
 6 |     --head-before-get
 7 |     --skip-if-exists /static/
 8 |     --skip-if-exists 0.0.0 --skip-if-exists 2019 --skip-if-exists 2018
 9 |     --skip-if-exists (nightly|edge|s390x|ppc64le|ppc64el|test|debug-.+)/.+\.(rpm|deb)
10 |     --compare-size-only \.(rpm|deb)$
11 |   TSUMUGU_EXCLUDE: >
12 |     --exclude /debian/
13 |     --include /debian/dists/${DEBIAN_CURRENT}
14 |     --include /debian/gpg
15 |     --exclude /fedora/
16 |     --include /fedora/docker-ce.+
17 |     --include /fedora/${FEDORA_CURRENT}/
18 |     --exclude /ubuntu/
19 |     --include /ubuntu/dists/${UBUNTU_LTS}/
20 |     --include /ubuntu/dists/${UBUNTU_NONLTS}/
21 |     --include /ubuntu/gpg
22 |     --exclude /centos/
23 |     --include /centos/${RHEL_CURRENT}.*/
24 |     --include /centos/docker-ce.+
25 |     --include /centos/gpg
26 |     --exclude /rhel/
27 |     --include /rhel/${RHEL_CURRENT}.*/
28 |     --include /rhel/docker-ce.+
29 |     --include /rhel/gpg
30 | image: ustcmirror/tsumugu:latest
31 | cron: 10 4 * * *
32 | logRotCycle: 10
33 | name: docker-ce
34 | storageDir: /srv/repo/docker-ce
35 | 


--------------------------------------------------------------------------------
/examples/mysql-repo.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   TSUMUGU_EXCLUDE: >
 3 |     --exclude /apt/dists/
 4 |     --exclude /apt/pool/
 5 |     --exclude /ubuntu/dists/
 6 |     --include /ubuntu/dists/${UBUNTU_LTS}/
 7 |     --exclude /debian/dists/
 8 |     --include /debian/dists/${DEBIAN_CURRENT}/
 9 |     --exclude /fc/
10 |     --include /fc/${FEDORA_CURRENT}/
11 |     --exclude /el/
12 |     --include /el/${RHEL_CURRENT}/
13 |     --exclude /yum/mysql-tools-preview/
14 |     --exclude dbgsym
15 |     --exclude debuginfo
16 |   UPSTREAM: https://repo.mysql.com/
17 |   TSUMUGU_EXTRA: --apt-packages --yum-packages --ignore-nonexist
18 | image: ustcmirror/tsumugu:latest
19 | cron: 15 5 * * *
20 | logRotCycle: 10
21 | name: mysql-repo
22 | storageDir: /srv/repo/mysql-repo
23 | 


--------------------------------------------------------------------------------
/examples/node.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: http://nodejs.org/dist/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude /docs/
 5 |     --exclude /old-docs/
 6 |     --exclude SHASUMS$
 7 |   # nodejs page mtime and actual mtime always does not match
 8 |   # so we ignore package changes (assuming that this would not happen)
 9 |   # we need to keep index.{json,tab} to be latest and regex crate does not support look-around
10 |   # so let's just skip-if-exists for node* and npm* files
11 |   TSUMUGU_EXTRA: >
12 |     --skip-if-exists /node[^/]+$
13 |     --skip-if-exists /npm[^/]+$
14 | image: ustcmirror/tsumugu:latest
15 | cron: 7 7 * * *
16 | logRotCycle: 10
17 | name: node
18 | storageDir: /srv/repo/node/
19 | 


--------------------------------------------------------------------------------
/examples/openresty.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: http://openresty.org/package/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude ^/centos/7/2/
 5 |     --exclude ^/oracle/7/2/
 6 |     --exclude ^/rhel/7/2/
 7 |     --exclude ^/rhel/7Client/2/
 8 |     --exclude ^/rhel/7Server/2/
 9 |     --exclude ^/centos/7Client/2/
10 |     --exclude ^/centos/7Server/2/
11 |     --exclude ^/centos/8/8/
12 |     --exclude ^/rocky/8/8/
13 |     --exclude ^/alinux/
14 |     --exclude ^/tlinux/
15 |     --exclude ^/oracle/
16 |     --exclude ^/amazon/
17 |     --exclude ^/almalinux/8.*/3$
18 |     --exclude ^/rhel/8.*/3$
19 |     --exclude ^/rocky/8.*/3$
20 |     --exclude ^/centos/8.*/3$
21 |     --exclude ^/kylin/10/3$
22 | image: ustcmirror/tsumugu:latest
23 | cron: 13 5 * * *
24 | logRotCycle: 10
25 | name: openresty
26 | storageDir: /srv/repo/openresty/
27 | 


--------------------------------------------------------------------------------
/examples/proxmox.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: http://download.proxmox.com/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude ^/temp
 5 |     --exclude /pmg/dists/.+changelog$
 6 |     --exclude /devel/dists/.+changelog$
 7 |   TSUMUGU_TIMEZONEFILE: http://download.proxmox.com/images/aplinfo.dat
 8 |   TSUMUGU_THREADS: 1
 9 | image: ustcmirror/tsumugu:latest
10 | cron: 17 5 * * *
11 | logRotCycle: 10
12 | name: proxmox
13 | storageDir: /srv/repo/proxmox/
14 | 


--------------------------------------------------------------------------------
/examples/python.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: https://www.python.org/ftp/python/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude \.rekor$
 5 |     --exclude ^/2.7.16/windows-fixed/
 6 |     --exclude ^/3.12.0/tmp/
 7 |     --exclude ^/mail/
 8 |     --exclude ^/nt/
 9 |     --exclude ^/pc/
10 |     --exclude ^/win32/
11 | image: ustcmirror/tsumugu:latest
12 | cron: 11 5 * * *
13 | logRotCycle: 10
14 | name: python
15 | storageDir: /srv/repo/python/
16 | 


--------------------------------------------------------------------------------
/examples/vyos.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: https://dev.packages.vyos.net/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude [=/]tmp
 5 |     --exclude /legacy/
 6 |     --exclude /sagitta/
 7 |     --exclude /equuleus/
 8 |   TSUMUGU_PARSER: directory-lister
 9 |   TSUMUGU_EXTRA: --allow-mtime-from-parser
10 | image: ustcmirror/tsumugu:latest
11 | cron: 25 5 * * *
12 | logRotCycle: 10
13 | name: vyos
14 | storageDir: /srv/repo/vyos
15 | 


--------------------------------------------------------------------------------
/examples/wine-builds.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: https://dl.winehq.org/wine-builds/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude ^/mageia/
 5 |     --exclude ^/macosx/
 6 |     --exclude ^/android/
 7 |     --exclude ^/debian/
 8 |     --include ^/debian/dists/${DEBIAN_CURRENT}/
 9 |     --exclude ^/ubuntu/
10 |     --include ^/ubuntu/dists/${UBUNTU_LTS}/
11 |     --exclude ^/fedora/
12 |     --include ^/fedora/${FEDORA_CURRENT}/
13 |   TSUMUGU_PARSER: apache-f2
14 |   TSUMUGU_EXTRA: --head-before-get
15 | image: ustcmirror/tsumugu:latest
16 | cron: 40 6 * * *
17 | logRotCycle: 10
18 | name: wine-builds
19 | storageDir: /srv/repo/wine-builds
20 | 


--------------------------------------------------------------------------------
/examples/zerotier.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   UPSTREAM: https://download.zerotier.com/
 3 |   TSUMUGU_EXCLUDE: >
 4 |     --exclude /1.8.[123]/dist/wd$
 5 |     --exclude /1.8.[123]/dist/ubiquiti/zerotier-one_latest_mips.deb$
 6 |     --exclude /dist/dist/
 7 |     --exclude /fc/
 8 |     --include /fc/${FEDORA_CURRENT}/
 9 |     --exclude /el/
10 |     --include /el/${RHEL_CURRENT}/
11 |     --exclude /debian/
12 |     --include /debian/${UBUNTU_LTS}/
13 |     --include /debian/${UBUNTU_NONLTS}/
14 |     --include /debian/${DEBIAN_CURRENT}/
15 |   # looks like that zerotier server does not report correct mtime
16 |   TSUMUGU_EXTRA: >
17 |     --allow-mtime-from-parser
18 |     --timezone 0
19 | image: ustcmirror/tsumugu:latest
20 | cron: 33 2 * * *
21 | logRotCycle: 10
22 | name: zerotier
23 | storageDir: /srv/repo/zerotier/
24 | 


--------------------------------------------------------------------------------
/fixtures/artifactrepo/10/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |     <title>Index of
 4 |         openjdk-local/10
 5 |     </title>
 6 | </head>
 7 | <body>
 8 | <h1>
 9 |     <p>Index of openjdk-local/10</p>
10 | </h1>
11 | <pre>Name                              Last modified     Size</pre>
12 | <hr>
13 | <pre><a href="../">../</a>
14 | <a href="openjdk-10_linux-x64_bin.tar.gz"
15 |                                                              onclick="handleLinkUrl(this)">openjdk-10_linux-x64_bin.tar.gz</a>   08-Mar-2018 10:06 195.38 MB
16 | <a href="openjdk-10_osx-x64_bin.tar.gz"
17 |                                                              onclick="handleLinkUrl(this)">openjdk-10_osx-x64_bin.tar.gz</a>     08-Mar-2018 10:06 191.54 MB
18 | <a href="openjdk-10_windows-x64_bin.tar.gz"
19 |                                                              onclick="handleLinkUrl(this)">openjdk-10_windows-x64_bin.tar.gz</a> 08-Mar-2018 10:25 189.68 MB
20 | </pre>
21 | <hr>
22 | <address style="font-size: small;">
23 |     ArtifactRepo/ Server Port 443
24 | </address>
25 | <script type="text/javascript">
26 |     function handleLinkUrl(element) {
27 |         var value = element.getAttribute("href");
28 |         element.setAttribute("href", encodeURI(value));
29 |     }
30 | </script>
31 | </body>
32 | </html>
33 | 


--------------------------------------------------------------------------------
/fixtures/artifactrepo/index.html:
--------------------------------------------------------------------------------
 1 | <html><head>
 2 |     <title>Index of
 3 |         openjdk-local
 4 |     </title>
 5 | </head>
 6 | <body>
 7 | <h1>
 8 |     <p>Index of openjdk-local</p>
 9 | </h1>
10 | <pre>Name         Last modified     Size</pre>
11 | <hr>
12 | <pre><a href="10/" onclick="handleLinkUrl(this)">10/</a>          22-Aug-2021 15:18 -
13 | <a href="10.0.1/" onclick="handleLinkUrl(this)">10.0.1/</a>      22-Aug-2021 15:18 -
14 | <a href="10.0.2/" onclick="handleLinkUrl(this)">10.0.2/</a>      22-Aug-2021 15:18 -
15 | <a href="11.0.1/" onclick="handleLinkUrl(this)">11.0.1/</a>      22-Aug-2021 15:19 -
16 | <a href="11.0.2/" onclick="handleLinkUrl(this)">11.0.2/</a>      22-Aug-2021 15:19 -
17 | <a href="12/" onclick="handleLinkUrl(this)">12/</a>          22-Aug-2021 15:19 -
18 | <a href="12.0.1/" onclick="handleLinkUrl(this)">12.0.1/</a>      22-Aug-2021 15:19 -
19 | <a href="12.0.2/" onclick="handleLinkUrl(this)">12.0.2/</a>      22-Aug-2021 15:19 -
20 | <a href="13/" onclick="handleLinkUrl(this)">13/</a>          22-Aug-2021 15:19 -
21 | <a href="13.0.1/" onclick="handleLinkUrl(this)">13.0.1/</a>      22-Aug-2021 15:20 -
22 | <a href="13.0.2/" onclick="handleLinkUrl(this)">13.0.2/</a>      22-Aug-2021 15:20 -
23 | <a href="14/" onclick="handleLinkUrl(this)">14/</a>          22-Aug-2021 15:20 -
24 | <a href="14.0.1/" onclick="handleLinkUrl(this)">14.0.1/</a>      22-Aug-2021 15:20 -
25 | <a href="14.0.2/" onclick="handleLinkUrl(this)">14.0.2/</a>      22-Aug-2021 15:20 -
26 | <a href="15/" onclick="handleLinkUrl(this)">15/</a>          22-Aug-2021 15:21 -
27 | <a href="15.0.1/" onclick="handleLinkUrl(this)">15.0.1/</a>      22-Aug-2021 15:21 -
28 | <a href="15.0.2/" onclick="handleLinkUrl(this)">15.0.2/</a>      22-Aug-2021 15:21 -
29 | <a href="16/" onclick="handleLinkUrl(this)">16/</a>          22-Aug-2021 15:21 -
30 | <a href="16.0.1/" onclick="handleLinkUrl(this)">16.0.1/</a>      22-Aug-2021 15:21 -
31 | <a href="16.0.2/" onclick="handleLinkUrl(this)">16.0.2/</a>      22-Aug-2021 15:22 -
32 | <a href="17/" onclick="handleLinkUrl(this)">17/</a>          15-Sep-2021 03:36 -
33 | <a href="17.0.1/" onclick="handleLinkUrl(this)">17.0.1/</a>      20-Oct-2021 03:38 -
34 | <a href="17.0.2/" onclick="handleLinkUrl(this)">17.0.2/</a>      19-Jan-2022 03:39 -
35 | <a href="18/" onclick="handleLinkUrl(this)">18/</a>          23-Mar-2022 03:36 -
36 | <a href="18.0.1/" onclick="handleLinkUrl(this)">18.0.1/</a>      21-Apr-2022 03:36 -
37 | <a href="18.0.1.1/" onclick="handleLinkUrl(this)">18.0.1.1/</a>    03-May-2022 03:40 -
38 | <a href="18.0.2/" onclick="handleLinkUrl(this)">18.0.2/</a>      20-Jul-2022 03:37 -
39 | <a href="18.0.2.1/" onclick="handleLinkUrl(this)">18.0.2.1/</a>    20-Aug-2022 03:42 -
40 | <a href="19/" onclick="handleLinkUrl(this)">19/</a>          21-Sep-2022 03:36 -
41 | <a href="19.0.1/" onclick="handleLinkUrl(this)">19.0.1/</a>      19-Oct-2022 03:37 -
42 | <a href="19.0.2/" onclick="handleLinkUrl(this)">19.0.2/</a>      20-Dec-2022 12:17 -
43 | <a href="20/" onclick="handleLinkUrl(this)">20/</a>          14-Feb-2023 00:53 -
44 | <a href="20.0.1/" onclick="handleLinkUrl(this)">20.0.1/</a>      30-Mar-2023 16:43 -
45 | <a href="20.0.2/" onclick="handleLinkUrl(this)">20.0.2/</a>      26-Jun-2023 15:00 -
46 | <a href="21/" onclick="handleLinkUrl(this)">21/</a>          12-Aug-2023 00:39 -
47 | <a href="21.0.1/" onclick="handleLinkUrl(this)">21.0.1/</a>      06-Oct-2023 16:18 -
48 | <a href="21.0.2/" onclick="handleLinkUrl(this)">21.0.2/</a>      06-Jan-2024 15:26 -
49 | <a href="22/" onclick="handleLinkUrl(this)">22/</a>          17-Feb-2024 00:20 -
50 | <a href="22.0.1/" onclick="handleLinkUrl(this)">22.0.1/</a>      26-Mar-2024 13:00 -
51 | <a href="22.0.2/" onclick="handleLinkUrl(this)">22.0.2/</a>      20-Jun-2024 17:06 -
52 | <a href="9/" onclick="handleLinkUrl(this)">9/</a>           22-Aug-2021 15:22 -
53 | <a href="9.0.1/" onclick="handleLinkUrl(this)">9.0.1/</a>       21-May-2022 03:39 -
54 | <a href="9.0.4/" onclick="handleLinkUrl(this)">9.0.4/</a>       22-Aug-2021 15:22 -
55 | <a href="java-jse-ri/" onclick="handleLinkUrl(this)">java-jse-ri/</a> 22-Aug-2021 15:22 -
56 | </pre>
57 | <hr>
58 | <address style="font-size: small;">
59 |     ArtifactRepo/ Server Port 443
60 | </address>
61 | <script type="text/javascript">
62 |     function handleLinkUrl(element) {
63 |         var value = element.getAttribute("href");
64 |         element.setAttribute("href", encodeURI(value));
65 |     }
66 | </script>
67 | 
68 | 
69 | </body></html>


--------------------------------------------------------------------------------
/fixtures/buildroot/acl/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 | <meta charset="UTF-8">
  5 | <title>Index of /acl/</title>
  6 | <style type="text/css">
  7 | a, a:active {text-decoration: none; color: blue;}
  8 | a:visited {color: #48468F;}
  9 | a:hover, a:focus {text-decoration: underline; color: red;}
 10 | body {background-color: #F5F5F5;}
 11 | h2 {margin-bottom: 12px;}
 12 | table {margin-left: 12px;}
 13 | th, td { font: 90% monospace; text-align: left;}
 14 | th { font-weight: bold; padding-right: 14px; padding-bottom: 3px;}
 15 | td {padding-right: 14px;}
 16 | td.s, th.s {text-align: right;}
 17 | div.list { background-color: white; border-top: 1px solid #646464; border-bottom: 1px solid #646464; padding-top: 10px; padding-bottom: 14px;}
 18 | div.foot { font: 90% monospace; color: #787878; padding-top: 4px;}
 19 | </style>
 20 | </head>
 21 | <body>
 22 | <h2>Index of /acl/</h2>
 23 | <div class="list">
 24 | <table summary="Directory Listing" cellpadding="0" cellspacing="0">
 25 | <thead><tr><th class="n">Name</th><th class="m">Last Modified</th><th class="s">Size</th><th class="t">Type</th></tr></thead>
 26 | <tbody>
 27 | <tr class="d"><td class="n"><a href="../">..</a>/</td><td class="m">&nbsp;</td><td class="s">- &nbsp;</td><td class="t">Directory</td></tr>
 28 | <tr><td class="n"><a href="acl-2.2.52.src.tar.gz">acl-2.2.52.src.tar.gz</a></td><td class="m">2013-May-19 06:10:38</td><td class="s">377.5K</td><td class="t">application/x-gtar-compressed</td></tr>
 29 | <tr><td class="n"><a href="acl-2.2.53.tar.gz">acl-2.2.53.tar.gz</a></td><td class="m">2018-Jul-10 22:18:45</td><td class="s">512.0K</td><td class="t">application/x-gtar-compressed</td></tr>
 30 | <tr><td class="n"><a href="acl-2.3.1.tar.xz">acl-2.3.1.tar.xz</a></td><td class="m">2021-Apr-09 21:39:55</td><td class="s">347.3K</td><td class="t">application/x-xz</td></tr>
 31 | <tr><td class="n"><a href="acl-2.3.2.tar.xz">acl-2.3.2.tar.xz</a></td><td class="m">2024-Feb-07 03:04:10</td><td class="s">362.9K</td><td class="t">application/x-xz</td></tr>
 32 | </tbody>
 33 | </table>
 34 | </div>
 35 | <div class="foot">lighttpd/1.4.67</div>
 36 | 
 37 | <script type="text/javascript">
 38 | // <!--
 39 | 
 40 | var click_column;
 41 | var name_column = 0;
 42 | var date_column = 1;
 43 | var size_column = 2;
 44 | var type_column = 3;
 45 | var prev_span = null;
 46 | 
 47 | if (typeof(String.prototype.localeCompare) === 'undefined') {
 48 |  String.prototype.localeCompare = function(str, locale, options) {
 49 |    return ((this == str) ? 0 : ((this > str) ? 1 : -1));
 50 |  };
 51 | }
 52 | 
 53 | if (typeof(String.prototype.toLocaleUpperCase) === 'undefined') {
 54 |  String.prototype.toLocaleUpperCase = function() {
 55 |   return this.toUpperCase();
 56 |  };
 57 | }
 58 | 
 59 | function get_inner_text(el) {
 60 |  if((typeof el == 'string')||(typeof el == 'undefined'))
 61 |   return el;
 62 |  if(el.innerText)
 63 |   return el.innerText;
 64 |  else {
 65 |   var str = "";
 66 |   var cs = el.childNodes;
 67 |   var l = cs.length;
 68 |   for (var i=0;i<l;i++) {
 69 |    if (cs[i].nodeType==1) str += get_inner_text(cs[i]);
 70 |    else if (cs[i].nodeType==3) str += cs[i].nodeValue;
 71 |   }
 72 |  }
 73 |  return str;
 74 | }
 75 | 
 76 | function isdigit(c) {
 77 |  return (c >= '0' && c <= '9');
 78 | }
 79 | 
 80 | function unit_multiplier(unit) {
 81 |  return (unit=='K') ? 1000
 82 |       : (unit=='M') ? 1000000
 83 |       : (unit=='G') ? 1000000000
 84 |       : (unit=='T') ? 1000000000000
 85 |       : (unit=='P') ? 1000000000000000
 86 |       : (unit=='E') ? 1000000000000000000 : 1;
 87 | }
 88 | 
 89 | var li_date_regex=/(\d{4})-(\w{3})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/;
 90 | 
 91 | var li_mon = ['Jan','Feb','Mar','Apr','May','Jun',
 92 |               'Jul','Aug','Sep','Oct','Nov','Dec'];
 93 | 
 94 | function li_mon_num(mon) {
 95 |  var i; for (i = 0; i < 12 && mon != li_mon[i]; ++i); return i;
 96 | }
 97 | 
 98 | function li_date_cmp(s1, s2) {
 99 |  var dp1 = li_date_regex.exec(s1)
100 |  var dp2 = li_date_regex.exec(s2)
101 |  for (var i = 1; i < 7; ++i) {
102 |   var cmp = (2 != i)
103 |    ? parseInt(dp1[i]) - parseInt(dp2[i])
104 |    : li_mon_num(dp1[2]) - li_mon_num(dp2[2]);
105 |   if (0 != cmp) return cmp;
106 |  }
107 |  return 0;
108 | }
109 | 
110 | function sortfn_then_by_name(a,b,sort_column) {
111 |  if (sort_column == name_column || sort_column == type_column) {
112 |   var ad = (a.cells[type_column].innerHTML === 'Directory');
113 |   var bd = (b.cells[type_column].innerHTML === 'Directory');
114 |   if (ad != bd) return (ad ? -1 : 1);
115 |  }
116 |  var at = get_inner_text(a.cells[sort_column]);
117 |  var bt = get_inner_text(b.cells[sort_column]);
118 |  var cmp;
119 |  if (sort_column == name_column) {
120 |   if (at == '../') return -1;
121 |   if (bt == '../') return  1;
122 |  }
123 |  if (a.cells[sort_column].className == 'int') {
124 |   cmp = parseInt(at)-parseInt(bt);
125 |  } else if (sort_column == date_column) {
126 |   var ad = isdigit(at.substr(0,1));
127 |   var bd = isdigit(bt.substr(0,1));
128 |   if (ad != bd) return (!ad ? -1 : 1);
129 |   cmp = li_date_cmp(at,bt);
130 |  } else if (sort_column == size_column) {
131 |   var ai = parseInt(at, 10) * unit_multiplier(at.substr(-1,1));
132 |   var bi = parseInt(bt, 10) * unit_multiplier(bt.substr(-1,1));
133 |   if (at.substr(0,1) == '-') ai = -1;
134 |   if (bt.substr(0,1) == '-') bi = -1;
135 |   cmp = ai - bi;
136 |  } else {
137 |   cmp = at.toLocaleUpperCase().localeCompare(bt.toLocaleUpperCase());
138 |   if (0 != cmp) return cmp;
139 |   cmp = at.localeCompare(bt);
140 |  }
141 |  if (0 != cmp || sort_column == name_column) return cmp;
142 |  return sortfn_then_by_name(a,b,name_column);
143 | }
144 | 
145 | function sortfn(a,b) {
146 |  return sortfn_then_by_name(a,b,click_column);
147 | }
148 | 
149 | function resort(lnk) {
150 |  var span = lnk.childNodes[1];
151 |  var table = lnk.parentNode.parentNode.parentNode.parentNode;
152 |  var rows = new Array();
153 |  for (var j=1;j<table.rows.length;j++)
154 |   rows[j-1] = table.rows[j];
155 |  click_column = lnk.parentNode.cellIndex;
156 |  rows.sort(sortfn);
157 | 
158 |  if (prev_span != null) prev_span.innerHTML = '';
159 |  if (span.getAttribute('sortdir')=='down') {
160 |   span.innerHTML = '&uarr;';
161 |   span.setAttribute('sortdir','up');
162 |   rows.reverse();
163 |  } else {
164 |   span.innerHTML = '&darr;';
165 |   span.setAttribute('sortdir','down');
166 |  }
167 |  for (var i=0;i<rows.length;i++)
168 |   table.tBodies[0].appendChild(rows[i]);
169 |  prev_span = span;
170 | }
171 | 
172 | function init_sort(init_sort_column, ascending) {
173 |  var tables = document.getElementsByTagName("table");
174 |  for (var i = 0; i < tables.length; i++) {
175 |   var table = tables[i];
176 |   //var c = table.getAttribute("class")
177 |   //if (-1 != c.split(" ").indexOf("sort")) {
178 |    var row = table.rows[0].cells;
179 |    for (var j = 0; j < row.length; j++) {
180 |     var n = row[j];
181 |     if (n.childNodes.length == 1 && n.childNodes[0].nodeType == 3) {
182 |      var link = document.createElement("a");
183 |      var title = n.childNodes[0].nodeValue.replace(/:$/, "");
184 |      link.appendChild(document.createTextNode(title));
185 |      link.setAttribute("href", "#");
186 |      link.setAttribute("class", "sortheader");
187 |      link.setAttribute("onclick", "resort(this);return false;");
188 |      var arrow = document.createElement("span");
189 |      arrow.setAttribute("class", "sortarrow");
190 |      arrow.appendChild(document.createTextNode(":"));
191 |      link.appendChild(arrow)
192 |      n.replaceChild(link, n.firstChild);
193 |     }
194 |    }
195 |    var lnk = row[init_sort_column].firstChild;
196 |    if (ascending) {
197 |     var span = lnk.childNodes[1];
198 |     span.setAttribute('sortdir','down');
199 |    }
200 |    resort(lnk);
201 |   //}
202 |  }
203 | }
204 | 
205 | function init_sort_from_query() {
206 |   var urlParams = new URLSearchParams(location.search);
207 |   var c = 0;
208 |   var o = 0;
209 |   switch (urlParams.get('C')) {
210 |     case "N": c=0; break;
211 |     case "M": c=1; break;
212 |     case "S": c=2; break;
213 |     case "T":
214 |     case "D": c=3; break;
215 |   }
216 |   switch (urlParams.get('O')) {
217 |     case "A": o=1; break;
218 |     case "D": o=0; break;
219 |   }
220 |   init_sort(c,o);
221 | }
222 | init_sort_from_query();
223 | 
224 | // -->
225 | </script>
226 | 
227 | </body>
228 | </html>
229 | 


--------------------------------------------------------------------------------
/fixtures/buildroot/mimalloc/build.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taoky/tsumugu/7b4673cdb05d87c24e8eb0c3e53dab5bece3efc3/fixtures/buildroot/mimalloc/build.html


--------------------------------------------------------------------------------
/fixtures/buildroot/mimalloc/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  2 | <html xmlns="http://www.w3.org/1999/xhtml">
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
  5 | <meta http-equiv="X-UA-Compatible" content="IE=9"/>
  6 | <meta name="generator" content="Doxygen 1.9.1"/>
  7 | <meta name="viewport" content="width=device-width, initial-scale=1"/>
  8 | <title>mi-malloc: Main Page</title>
  9 | <link href="tabs.css" rel="stylesheet" type="text/css"/>
 10 | <script type="text/javascript" src="jquery.js"></script>
 11 | <script type="text/javascript" src="dynsections.js"></script>
 12 | <link href="navtree.css" rel="stylesheet" type="text/css"/>
 13 | <script type="text/javascript" src="resize.js"></script>
 14 | <script type="text/javascript" src="navtreedata.js"></script>
 15 | <script type="text/javascript" src="navtree.js"></script>
 16 | <link href="search/search.css" rel="stylesheet" type="text/css"/>
 17 | <script type="text/javascript" src="search/searchdata.js"></script>
 18 | <script type="text/javascript" src="search/search.js"></script>
 19 | <script type="text/javascript">
 20 | /* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
 21 |   $(document).ready(function() { init_search(); });
 22 | /* @license-end */
 23 | </script>
 24 | <link href="doxygen.css" rel="stylesheet" type="text/css" />
 25 | <link href="mimalloc-doxygen.css" rel="stylesheet" type="text/css"/>
 26 | </head>
 27 | <body>
 28 | <div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 29 | <div id="titlearea">
 30 | <table cellspacing="0" cellpadding="0">
 31 |  <tbody>
 32 |  <tr style="height: 56px;">
 33 |   <td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
 34 |   <td id="projectalign" style="padding-left: 0.5em;">
 35 |    <div id="projectname">mi-malloc
 36 |    &#160;<span id="projectnumber">1.7/2.0</span>
 37 |    </div>
 38 |   </td>
 39 |    <td>        <div id="MSearchBox" class="MSearchBoxInactive">
 40 |         <span class="left">
 41 |           <img id="MSearchSelect" src="search/mag_sel.svg"
 42 |                onmouseover="return searchBox.OnSearchSelectShow()"
 43 |                onmouseout="return searchBox.OnSearchSelectHide()"
 44 |                alt=""/>
 45 |           <input type="text" id="MSearchField" value="Search" accesskey="S"
 46 |                onfocus="searchBox.OnSearchFieldFocus(true)"
 47 |                onblur="searchBox.OnSearchFieldFocus(false)"
 48 |                onkeyup="searchBox.OnSearchFieldChange(event)"/>
 49 |           </span><span class="right">
 50 |             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
 51 |           </span>
 52 |         </div>
 53 | </td>
 54 |  </tr>
 55 |  </tbody>
 56 | </table>
 57 | </div>
 58 | <!-- end header part -->
 59 | <!-- Generated by Doxygen 1.9.1 -->
 60 | <script type="text/javascript">
 61 | /* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
 62 | var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
 63 | /* @license-end */
 64 | </script>
 65 | </div><!-- top -->
 66 | <div id="side-nav" class="ui-resizable side-nav-resizable">
 67 |   <div id="nav-tree">
 68 |     <div id="nav-tree-contents">
 69 |       <div id="nav-sync" class="sync"></div>
 70 |     </div>
 71 |   </div>
 72 |   <div id="splitbar" style="-moz-user-select:none;"
 73 |        class="ui-resizable-handle">
 74 |   </div>
 75 | </div>
 76 | <script type="text/javascript">
 77 | /* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
 78 | $(document).ready(function(){initNavTree('index.html',''); initResizable(); });
 79 | /* @license-end */
 80 | </script>
 81 | <div id="doc-content">
 82 | <!-- window showing the filter options -->
 83 | <div id="MSearchSelectWindow"
 84 |      onmouseover="return searchBox.OnSearchSelectShow()"
 85 |      onmouseout="return searchBox.OnSearchSelectHide()"
 86 |      onkeydown="return searchBox.OnSearchSelectKey(event)">
 87 | </div>
 88 | 
 89 | <!-- iframe showing the search results (closed by default) -->
 90 | <div id="MSearchResultsWindow">
 91 | <iframe src="javascript:void(0)" frameborder="0"
 92 |         name="MSearchResults" id="MSearchResults">
 93 | </iframe>
 94 | </div>
 95 | 
 96 | <div class="PageDoc"><div class="header">
 97 |   <div class="headertitle">
 98 | <div class="title">mi-malloc Documentation</div>  </div>
 99 | </div><!--header-->
100 | <div class="contents">
101 | <div class="textblock"><p>This is the API documentation of the <a href="https://github.com/microsoft/mimalloc">mimalloc</a> allocator (pronounced "me-malloc") &ndash; a general purpose allocator with excellent <a href="bench.html">performance</a> characteristics. Initially developed by Daan Leijen for the run-time systems of the <a href="https://github.com/koka-lang/koka">Koka</a> and <a href="https://github.com/leanprover/lean">Lean</a> languages.</p>
102 | <p>It is a drop-in replacement for <code>malloc</code> and can be used in other programs without code changes, for example, on Unix you can use it as: </p><div class="fragment"><div class="line">&gt; LD_PRELOAD=/usr/bin/libmimalloc.so  myprogram</div>
103 | </div><!-- fragment --><p>Notable aspects of the design include:</p>
104 | <ul>
105 | <li><b>small and consistent</b>: the library is about 8k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic <em>heartbeat</em> and deferred freeing (for bounded worst-case times with reference counting).</li>
106 | <li><b>free list sharding</b>: instead of one big free list (per size class) we have many smaller lists per "mimalloc page" which reduces fragmentation and increases locality &ndash; things that are allocated close in time get allocated close in memory. (A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system).</li>
107 | <li><b>free list multi-sharding</b>: the big idea! Not only do we shard the free list per mimalloc page, but for each page we have multiple free lists. In particular, there is one list for thread-local <code>free</code> operations, and another one for concurrent <code>free</code> operations. Free-ing from another thread can now be a single CAS without needing sophisticated coordination between threads. Since there will be thousands of separate free lists, contention is naturally distributed over the heap, and the chance of contending on a single location will be low &ndash; this is quite similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm.</li>
108 | <li><b>eager page reset</b>: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") reducing (real) memory pressure and fragmentation, especially in long running programs.</li>
109 | <li><b>secure</b>: <em>mimalloc</em> can be build in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various heap vulnerabilities. The performance penalty is only around 5% on average over our benchmarks.</li>
110 | <li><b>first-class heaps</b>: efficiently create and use multiple heaps to allocate across different regions. A heap can be destroyed at once instead of deallocating each object separately.</li>
111 | <li><b>bounded</b>: it does not suffer from <em>blowup</em> [1], has bounded worst-case allocation times (<em>wcat</em>), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes), and has no internal points of contention using only atomic operations.</li>
112 | <li><b>fast</b>: In our benchmarks (see <a href="#performance">below</a>), <em>mimalloc</em> outperforms all other leading allocators (<em>jemalloc</em>, <em>tcmalloc</em>, <em>Hoard</em>, etc), and usually uses less memory (up to 25% more in the worst case). A nice property is that it does consistently well over a wide range of benchmarks.</li>
113 | </ul>
114 | <p>You can read more on the design of <em>mimalloc</em> in the <a href="https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action">technical report</a> which also has detailed benchmark results.</p>
115 | <p>Further information:</p>
116 | <ul>
117 | <li><a class="el" href="build.html">Building</a></li>
118 | <li><a class="el" href="using.html">Using the library</a></li>
119 | <li><a class="el" href="environment.html">Environment Options</a></li>
120 | <li><a class="el" href="overrides.html">Overriding Malloc</a></li>
121 | <li><a class="el" href="bench.html">Performance</a></li>
122 | <li><a class="el" href="group__malloc.html">Basic Allocation</a></li>
123 | <li><a class="el" href="group__extended.html">Extended Functions</a></li>
124 | <li><a class="el" href="group__aligned.html">Aligned Allocation</a></li>
125 | <li><a class="el" href="group__heap.html">Heap Allocation</a></li>
126 | <li><a class="el" href="group__typed.html">Typed Macros</a></li>
127 | <li><a class="el" href="group__analysis.html">Heap Introspection</a></li>
128 | <li><a class="el" href="group__options.html">Runtime Options</a></li>
129 | <li><a class="el" href="group__posix.html">Posix</a></li>
130 | <li><a class="el" href="group__cpp.html">C++ wrappers</a> </li>
131 | <li><a class="el" href="test/test">testing purpose</a> </li>
132 | </ul>
133 | </div></div><!-- PageDoc -->
134 | </div><!-- contents -->
135 | </div><!-- doc-content -->
136 | <!-- start footer part -->
137 | <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
138 |   <ul>
139 |     <li class="footer">Generated by <a href="https://www.doxygen.org/index.html"><img class="footer" src="doxygen.svg" width="104" height="31" alt="doxygen"/></a> 1.9.1 </li>
140 |   </ul>
141 | </div>
142 | </body>
143 | </html>
144 | 


--------------------------------------------------------------------------------
/fixtures/buildroot/mimalloc/test/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taoky/tsumugu/7b4673cdb05d87c24e8eb0c3e53dab5bece3efc3/fixtures/buildroot/mimalloc/test/test


--------------------------------------------------------------------------------
/fixtures/buildroot/mimalloc/using.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taoky/tsumugu/7b4673cdb05d87c24e8eb0c3e53dab5bece3efc3/fixtures/buildroot/mimalloc/using.html


--------------------------------------------------------------------------------
/fixtures/clickhouse/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <style>
 5 | 
 6 | body { margin: 3rem; font-family: sans-serif; }
 7 | a { text-decoration: none; text-underline-offset: 0.2rem; }
 8 | a:hover { text-decoration: underline; }
 9 | .ralign { text-align: right; }
10 | #contents { display: grid; grid-template-columns: 1fr 6rem auto auto; gap: 0.5rem 1.5rem; white-space: nowrap; }
11 | #contents .full { grid-column: 1 / span 4; }
12 | @media (prefers-color-scheme: dark) {
13 |   body {background: #121212; color: #f5f5f5; }
14 |   a { color: #bb86fc; }
15 | }
16 | 
17 | </style>
18 | </head>
19 | <body>
20 | <div id="contents">
21 | <div class="full"><a href="/">root</a> ⟩ repo-archive</div>
22 | <div class="full">&nbsp;</div>
23 | <a class="full" href="/repo-archive/deb/">deb/</a>
24 | <a class="full" href="/repo-archive/rpm/">rpm/</a>
25 | <a class="full" href="/repo-archive/tgz/">tgz/</a>
26 | <div class="full">&nbsp;</div>
27 | <a href="/repo-archive/"></a><div class="ralign">0</div><div class="ralign"></div><div>2022-09-23T13:39:52.080Z</div>
28 | <a href="/repo-archive/CLICKHOUSE-KEY.GPG">CLICKHOUSE-KEY.GPG</a><div class="ralign">3,133</div><div class="ralign">(3.06 kb)</div><div>2022-09-23T13:53:51.925Z</div>
29 | </div>
30 | </body>
31 | </html>


--------------------------------------------------------------------------------
/fixtures/clickhouse/stable/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <style>
 5 | 
 6 | body { margin: 3rem; font-family: sans-serif; }
 7 | a { text-decoration: none; text-underline-offset: 0.2rem; }
 8 | a:hover { text-decoration: underline; }
 9 | .ralign { text-align: right; }
10 | #contents { display: grid; grid-template-columns: 1fr 6rem auto auto; gap: 0.5rem 1.5rem; white-space: nowrap; }
11 | #contents .full { grid-column: 1 / span 4; }
12 | @media (prefers-color-scheme: dark) {
13 |   body {background: #121212; color: #f5f5f5; }
14 |   a { color: #bb86fc; }
15 | }
16 | 
17 | </style>
18 | </head>
19 | <body>
20 | <div id="contents">
21 | <div class="full"><a href="/">root</a> ⟩ <a href="/tgz/">tgz</a> ⟩ stable</div>
22 | <div class="full">&nbsp;</div>
23 | <a href="/tgz/stable/"></a><div class="ralign">0</div><div class="ralign"></div><div>2022-09-21T12:08:53.750Z</div>
24 | <a href="/tgz/stable/clickhouse-client-21.1.9.41.tgz.sha512">clickhouse-client-21.1.9.41.tgz.sha512</a><div class="ralign">162</div><div class="ralign"></div><div>2023-11-24T13:12:00.540Z</div>
25 | <a href="/tgz/stable/clickhouse-client-21.1.9.41.tgz">clickhouse-client-21.1.9.41.tgz</a><div class="ralign">161,531</div><div class="ralign">(157.75 kb)</div><div>2022-09-21T23:58:17.236Z</div>
26 | <div class="full">&nbsp;</div>
27 | <div class="full"><a href="index2.html?abc123">next ➜</a></div>
28 | </div>
29 | </body>
30 | </html>


--------------------------------------------------------------------------------
/fixtures/clickhouse/stable/index2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <style>
 5 | 
 6 | body { margin: 3rem; font-family: sans-serif; }
 7 | a { text-decoration: none; text-underline-offset: 0.2rem; }
 8 | a:hover { text-decoration: underline; }
 9 | .ralign { text-align: right; }
10 | #contents { display: grid; grid-template-columns: 1fr 6rem auto auto; gap: 0.5rem 1.5rem; white-space: nowrap; }
11 | #contents .full { grid-column: 1 / span 4; }
12 | @media (prefers-color-scheme: dark) {
13 |   body {background: #121212; color: #f5f5f5; }
14 |   a { color: #bb86fc; }
15 | }
16 | 
17 | </style>
18 | </head>
19 | <body>
20 | <div id="contents">
21 | <div class="full"><a href="/">root</a> ⟩ <a href="/tgz/">tgz</a> ⟩ stable</div>
22 | <div class="full">&nbsp;</div>
23 | <a href="/tgz/stable/clickhouse-client-23.7.3.14-arm64.tgz.sha512">clickhouse-client-23.7.3.14-arm64.tgz.sha512</a><div class="ralign">176</div><div class="ralign"></div><div>2023-08-05T19:31:50.283Z</div>
24 | <a href="/tgz/stable/clickhouse-client-23.7.3.14-arm64.tgz">clickhouse-client-23.7.3.14-arm64.tgz</a><div class="ralign">79,837</div><div class="ralign">(77.97 kb)</div><div>2023-08-05T19:30:42.398Z</div>
25 | </div>
26 | </body>
27 | </html>


--------------------------------------------------------------------------------
/fixtures/docker/armv7l/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <meta charset="UTF-8">
 5 | <title>Index of linux/centos/7/armv7l/</title>
 6 | </head>
 7 | <body>
 8 | <h1>Index of linux/centos/7/armv7l/</h1>
 9 | <hr>
10 | <pre><a href="../">../</a>
11 | <a href="nightly">nightly/</a>                                    2020-01-21 07:38  -
12 | <a href="test">test/</a>                                       2020-01-21 07:38  -
13 | </pre><hr></body></html>
14 | 


--------------------------------------------------------------------------------
/fixtures/docker/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <meta charset="UTF-8">
 5 | <title>Index of linux/centos/</title>
 6 | </head>
 7 | <body>
 8 | <h1>Index of linux/centos/</h1>
 9 | <hr>
10 | <pre><a href="../">../</a>
11 | <a href="7.0/">7.0/</a>
12 | <a href="7.1/">7.1/</a>
13 | <a href="7.2/">7.2/</a>
14 | <a href="7.3/">7.3/</a>
15 | <a href="7.4/">7.4/</a>
16 | <a href="7.5/">7.5/</a>
17 | <a href="7.6/">7.6/</a>
18 | <a href="7.7/">7.7/</a>
19 | <a href="7.8/">7.8/</a>
20 | <a href="7.9/">7.9/</a>
21 | <a href="7/">7/</a>
22 | <a href="7Client/">7Client/</a>
23 | <a href="7Server/">7Server/</a>
24 | <a href="7Workstation/">7Workstation/</a>
25 | <a href="8.0/">8.0/</a>
26 | <a href="8.1/">8.1/</a>
27 | <a href="8.2/">8.2/</a>
28 | <a href="8.3/">8.3/</a>
29 | <a href="8.4/">8.4/</a>
30 | <a href="8.5/">8.5/</a>
31 | <a href="8.6/">8.6/</a>
32 | <a href="8.7/">8.7/</a>
33 | <a href="8.8/">8.8/</a>
34 | <a href="8.9/">8.9/</a>
35 | <a href="8/">8/</a>
36 | <a href="8Client/">8Client/</a>
37 | <a href="8Server/">8Server/</a>
38 | <a href="8Workstation/">8Workstation/</a>
39 | <a href="9.0/">9.0/</a>
40 | <a href="9.1/">9.1/</a>
41 | <a href="9.2/">9.2/</a>
42 | <a href="9.3/">9.3/</a>
43 | <a href="9.4/">9.4/</a>
44 | <a href="9.5/">9.5/</a>
45 | <a href="9.6/">9.6/</a>
46 | <a href="9.7/">9.7/</a>
47 | <a href="9.8/">9.8/</a>
48 | <a href="9.9/">9.9/</a>
49 | <a href="9/">9/</a>
50 | <a href="9Client/">9Client/</a>
51 | <a href="9Server/">9Server/</a>
52 | <a href="9Workstation/">9Workstation/</a>
53 | <a href="docker-ce-staging.repo">docker-ce-staging.repo</a>                                                                2023-07-07 20:20:56 2.0 KiB
54 | <a href="docker-ce.repo">docker-ce.repo</a>                                                                        2023-07-07 20:20:51 1.9 KiB
55 | <a href="gpg">gpg</a>                                                                                   2023-07-07 20:21:31 1.6 KiB
56 | </pre><hr></body></html>
57 | 


--------------------------------------------------------------------------------
/fixtures/ghettoforge/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 2 | <html>
 3 |  <head>
 4 |   <title>Index of /distributions/gf</title>
 5 |  </head>
 6 |  <body>
 7 | <h1>Index of /distributions/gf</h1>
 8 | <pre><img src="/icons/blank.gif" alt="Icon "> <a href="?C=N;O=D">Name</a>                                <a href="?C=M;O=A">Last modified</a>      <a href="?C=S;O=A">Size</a>  <a href="?C=D;O=A">Description</a><hr><img src="/icons/back.gif" alt="[PARENTDIR]"> <a href="/distributions/">Parent Directory</a>                                         -   
 9 | <img src="/icons/unknown.gif" alt="[   ]"> <a href="RPM-GPG-KEY-gf.el7">RPM-GPG-KEY-gf.el7</a>                  2014-12-30 02:53  3.0K  
10 | <img src="/icons/unknown.gif" alt="[   ]"> <a href="RPM-GPG-KEY-gf.el8">RPM-GPG-KEY-gf.el8</a>                  2020-01-13 09:40  3.1K  
11 | <img src="/icons/unknown.gif" alt="[   ]"> <a href="RPM-GPG-KEY-gf.el9">RPM-GPG-KEY-gf.el9</a>                  2022-08-03 11:28  1.6K  
12 | <img src="/icons/folder.gif" alt="[DIR]"> <a href="archive/">archive/</a>                            2020-12-21 02:34    -   
13 | <img src="/icons/folder.gif" alt="[DIR]"> <a href="el/">el/</a>                                 2022-08-02 11:57    -   
14 | <img src="/icons/unknown.gif" alt="[   ]"> <a href="gf-release-latest.gf.el7.noarch.rpm">gf-release-latest.gf.el7.noarch.rpm</a> 2021-08-21 10:38  8.0K  
15 | <img src="/icons/unknown.gif" alt="[   ]"> <a href="gf-release-latest.gf.el8.noarch.rpm">gf-release-latest.gf.el8.noarch.rpm</a> 2021-08-21 10:39   11K  
16 | <img src="/icons/unknown.gif" alt="[   ]"> <a href="gf-release-latest.gf.el9.noarch.rpm">gf-release-latest.gf.el9.noarch.rpm</a> 2022-08-03 12:16  9.2K  
17 | <hr></pre>
18 | </body></html>
19 | 


--------------------------------------------------------------------------------
/fixtures/grml/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 2 | <html>
 3 |  <head>
 4 |   <title>Index of /pool/main/m</title>
 5 |  </head>
 6 |  <body>
 7 | <h1>Index of /pool/main/m</h1>
 8 |   <table>
 9 |    <tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=A;F=2">Name</a></th><th><a href="?C=M;O=D;F=2">Last modified</a></th><th><a href="?C=S;O=A;F=2">Size</a></th><th><a href="?C=D;O=A;F=2">Description</a></th></tr>
10 |    <tr><th colspan="5"><hr></th></tr>
11 | <tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="/pool/main/">Parent Directory</a></td><td>&nbsp;</td><td align="right">  - </td><td>&nbsp;</td></tr>
12 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="mozilla-firefox-adblock/">mozilla-firefox-adblock/</a></td><td align="right">2006-11-27 07:00  </td><td align="right">  - </td><td>&nbsp;</td></tr>
13 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="msgid-chooser/">msgid-chooser/</a></td><td align="right">2006-11-27 07:00  </td><td align="right">  - </td><td>&nbsp;</td></tr>
14 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="magicrescue/">magicrescue/</a></td><td align="right">2006-12-10 10:30  </td><td align="right">  - </td><td>&nbsp;</td></tr>
15 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="memtest86+/">memtest86+/</a></td><td align="right">2007-03-27 22:41  </td><td align="right">  - </td><td>&nbsp;</td></tr>
16 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="misdn-kernel/">misdn-kernel/</a></td><td align="right">2007-07-11 19:45  </td><td align="right">  - </td><td>&nbsp;</td></tr>
17 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="minised/">minised/</a></td><td align="right">2007-11-04 19:15  </td><td align="right">  - </td><td>&nbsp;</td></tr>
18 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="md5deep/">md5deep/</a></td><td align="right">2007-11-04 21:09  </td><td align="right">  - </td><td>&nbsp;</td></tr>
19 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="multiseat/">multiseat/</a></td><td align="right">2010-01-08 17:40  </td><td align="right">  - </td><td>&nbsp;</td></tr>
20 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="mdadm/">mdadm/</a></td><td align="right">2013-02-22 10:40  </td><td align="right">  - </td><td>&nbsp;</td></tr>
21 | <tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="madwifi/">madwifi/</a></td><td align="right">2024-10-07 18:11  </td><td align="right">  - </td><td>&nbsp;</td></tr>
22 |    <tr><th colspan="5"><hr></th></tr>
23 | </table>
24 | </body></html>
25 | 


--------------------------------------------------------------------------------
/fixtures/loongnix/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta http-equiv="x-ua-compatible" content="IE=edge">
 6 |         <title>Nginx Directory</title>
 7 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 8 |         <link rel="stylesheet" href="/Nginx-Fancyindex-Theme-light/styles.css">
 9 |         <script type="text/javascript" src="/Nginx-Fancyindex-Theme-light/jquery.min.js"></script>
10 |     </head>
11 | <body>
12 | <!--
13 | header.html
14 | © 2015-18, Lilian Besson (Naereen) and contributors,
15 | open-sourced under the MIT License, https://lbesson.mit-license.org/
16 | hosted on GitHub, https://GitHub.com/Naereen/Nginx-Fancyindex-Theme
17 | -->
18 | <div id="raw_include_HEADER_md"></div>
19 | <h1>Loongnix操作系统源</h1>
20 | <p align="center">访问地址 http://pkg.loongnix.cn</p>
21 | <h1>Directory:
22 | /loongnix/dists/DaoXiangHu-cartoons/</h1>
23 | <table id="list"><thead><tr><th colspan="2"><a href="?C=N&amp;O=A">File Name</a>&nbsp;<a href="?C=N&amp;O=D">&nbsp;&darr;&nbsp;</a></th><th><a href="?C=S&amp;O=A">File Size</a>&nbsp;<a href="?C=S&amp;O=D">&nbsp;&darr;&nbsp;</a></th><th><a href="?C=M&amp;O=A">Date</a>&nbsp;<a href="?C=M&amp;O=D">&nbsp;&darr;&nbsp;</a></th></tr></thead>
24 | <tbody><tr><td colspan="2" class="link"><a href="../">Parent directory/</a></td><td class="size">-</td><td class="date">-</td></tr>
25 | <tr><td colspan="2" class="link"><a href="contrib/" title="contrib">contrib/</a></td><td class="size">-</td><td class="date">2023-08-15 05:48</td></tr>
26 | <tr><td colspan="2" class="link"><a href="main/" title="main">main/</a></td><td class="size">-</td><td class="date">2023-08-15 05:48</td></tr>
27 | <tr><td colspan="2" class="link"><a href="non-free/" title="non-free">non-free/</a></td><td class="size">-</td><td class="date">2023-08-15 05:48</td></tr>
28 | <tr><td colspan="2" class="link"><a href="Contents-loongarch64" title="Contents-loongarch64">Contents-loongarch64</a></td><td class="size">10.0 MiB</td><td class="date">2023-08-15 05:48</td></tr>
29 | <tr><td colspan="2" class="link"><a href="Contents-loongarch64.gz" title="Contents-loongarch64.gz">Contents-loongarch64.gz</a></td><td class="size">659.9 KiB</td><td class="date">2023-08-15 05:48</td></tr>
30 | <tr><td colspan="2" class="link"><a href="Contents-loongarch64.xz" title="Contents-loongarch64.xz">Contents-loongarch64.xz</a></td><td class="size">454.6 KiB</td><td class="date">2023-08-15 05:48</td></tr>
31 | <tr><td colspan="2" class="link"><a href="InRelease" title="InRelease">InRelease</a></td><td class="size">7.9 KiB</td><td class="date">2023-08-15 05:48</td></tr>
32 | <tr><td colspan="2" class="link"><a href="Release" title="Release">Release</a></td><td class="size">7.2 KiB</td><td class="date">2023-08-15 05:48</td></tr>
33 | <tr><td colspan="2" class="link"><a href="Release.gpg" title="Release.gpg">Release.gpg</a></td><td class="size">659 B</td><td class="date">2023-08-15 05:48</td></tr>
34 | </tbody></table>    <div id="raw_include_README_md"></div>
35 |     <footer>
36 |         Theme by <a href="http://pkg.loongnix.cn">loongnix</a>, © 2015-18, released under <a href="https://lbesson.mit-license.org/">the MIT License</a>.
37 |     </footer>
38 |     <script src="https://unpkg.com/xregexp/xregexp-all.js"></script>
39 |     <script type="text/javascript" src="/Nginx-Fancyindex-Theme-light/addNginxFancyIndexForm.js"></script>
40 |     <script type="text/javascript" src="/Nginx-Fancyindex-Theme-light/showdown.min.js"></script>
41 |     <script type="text/javascript" defer>
42 |         var converter = new showdown.Converter({tables: true});
43 |         $( "#raw_include_HEADER_md" ).load( "HEADER.md", function (){
44 |             var elem = document.querySelector("#raw_include_HEADER_md");
45 |             // strip leading whitespace so it isn't evaluated as code
46 |             var text = elem.innerHTML;
47 |             //console.log("text =", text);
48 |             text = text.replace(/\n[ ]*/g, '\n');
49 |             //console.log("text =", text);
50 |             var html = converter.makeHtml(text);
51 |             //console.log("html =", html);
52 |             // here, have some HTML
53 |             elem.innerHTML = html;
54 |         });
55 |         $( "#raw_include_README_md" ).load( "README.md", function (){
56 |             var elem = document.querySelector("#raw_include_README_md");
57 |             // strip leading whitespace so it isn't evaluated as code
58 |             var text = elem.innerHTML;
59 |             // console.log("text =", text);
60 |             text = text.replace(/\n[ ]*/g, '\n');
61 |             // console.log("text =", text);
62 |             var html = converter.makeHtml(text);
63 |             // console.log("html =", html);
64 |             // here, have some HTML
65 |             elem.innerHTML = html;
66 |         });
67 |     </script>
68 | </body>
69 | </html>
70 | <!--
71 | footer.html
72 | © 2015-18, Lilian Besson (Naereen) and contributors,
73 | open-sourced under the MIT License, https://lbesson.mit-license.org/
74 | hosted on GitHub, https://GitHub.com/Naereen/Nginx-Fancyindex-Theme
75 | -->
76 | 


--------------------------------------------------------------------------------
/fixtures/misc/1/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |   <title>Misc test index 1</title>
 4 | </head>
 5 | <body>
 6 |   <h2>/etc/</h2>
 7 |   <table>
 8 |     <tr>
 9 |       <th>Filename</th>
10 |       <th>Size</th>
11 |       <th>Last Modified</th>
12 |       <th>SHA256</th>
13 |     </tr>
14 |     <tr>
15 |       <td><a href="passwd">passwd</a></td>
16 |       <td>3.3 KB</td>
17 |       <td>2024-08-24 15:04:11 +0000</td>
18 |       <td>477a3d43f692aeaf1c7f40c0c91bffde3e2e638d8e90c668422373ee82a18521</td>
19 |     </tr></table>
20 | </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/fixtures/monitoring-plugins/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head><title>Index of /monitoring-plugins/</title></head>
 3 | <body>
 4 | <h1>Index of /monitoring-plugins/</h1><hr><pre><a href="../">../</a>
 5 | <a href="archive/">archive/</a>                                           09-Oct-2015 16:12                   -
 6 | <a href="mib/">mib/</a>                                               29-Nov-2013 20:20                   -
 7 | <a href="presentation/">presentation/</a>                                      26-Sep-2013 05:15                   -
 8 | <a href="snapshot/">snapshot/</a>                                          10-Feb-2023 17:42                   -
 9 | <a href="monitoring-plugins-2.0.tar.gz">monitoring-plugins-2.0.tar.gz</a>                      11-Jul-2014 23:17             2610000
10 | <a href="monitoring-plugins-2.0.tar.gz.sha1">monitoring-plugins-2.0.tar.gz.sha1</a>                 11-Jul-2014 23:17                  72
11 | <a href="monitoring-plugins-2.1.1.tar.gz">monitoring-plugins-2.1.1.tar.gz</a>                    02-Dec-2014 07:46             2612331
12 | <a href="monitoring-plugins-2.1.1.tar.gz.sha1">monitoring-plugins-2.1.1.tar.gz.sha1</a>               02-Dec-2014 07:46                  74
13 | <a href="monitoring-plugins-2.1.2.tar.gz">monitoring-plugins-2.1.2.tar.gz</a>                    16-Oct-2015 17:40             2613060
14 | <a href="monitoring-plugins-2.1.2.tar.gz.sha1">monitoring-plugins-2.1.2.tar.gz.sha1</a>               16-Oct-2015 17:40                  74
15 | <a href="monitoring-plugins-2.1.tar.gz">monitoring-plugins-2.1.tar.gz</a>                      15-Oct-2014 20:32             2611940
16 | <a href="monitoring-plugins-2.1.tar.gz.sha1">monitoring-plugins-2.1.tar.gz.sha1</a>                 15-Oct-2014 20:32                  72
17 | <a href="monitoring-plugins-2.2.tar.gz">monitoring-plugins-2.2.tar.gz</a>                      29-Nov-2016 16:49             2461548
18 | <a href="monitoring-plugins-2.2.tar.gz.sha1">monitoring-plugins-2.2.tar.gz.sha1</a>                 29-Nov-2016 16:49                  72
19 | <a href="monitoring-plugins-2.3.1.tar.gz">monitoring-plugins-2.3.1.tar.gz</a>                    11-Apr-2021 17:07             2529669
20 | <a href="monitoring-plugins-2.3.1.tar.gz.sha1">monitoring-plugins-2.3.1.tar.gz.sha1</a>               11-Apr-2021 17:07                  74
21 | <a href="monitoring-plugins-2.3.2.tar.gz">monitoring-plugins-2.3.2.tar.gz</a>                    19-Oct-2022 20:58             2766966
22 | <a href="monitoring-plugins-2.3.2.tar.gz.sha1">monitoring-plugins-2.3.2.tar.gz.sha1</a>               19-Oct-2022 20:58                  74
23 | <a href="monitoring-plugins-2.3.3.tar.gz">monitoring-plugins-2.3.3.tar.gz</a>                    01-Feb-2023 21:53             2620192
24 | <a href="monitoring-plugins-2.3.3.tar.gz.sha1">monitoring-plugins-2.3.3.tar.gz.sha1</a>               01-Feb-2023 21:53                  74
25 | <a href="monitoring-plugins-2.3.tar.gz">monitoring-plugins-2.3.tar.gz</a>                      10-Dec-2020 05:50             2528556
26 | <a href="monitoring-plugins-2.3.tar.gz.sha1">monitoring-plugins-2.3.tar.gz.sha1</a>                 10-Dec-2020 05:50                  72
27 | <a href="timestamp">timestamp</a>                                          20-Jul-2023 10:46                  11
28 | </pre><hr></body>
29 | </html>
30 | 


--------------------------------------------------------------------------------
/fixtures/mozilla/OJI/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |         <head>
 4 |                 <meta charset="UTF-8">
 5 |                 <title>Directory Listing: /pub/OJI/</title>
 6 |         </head>
 7 |         <body>
 8 |                 <h1>Index of /pub/OJI/</h1>
 9 |                 <table>
10 |                         <tr>
11 |                                 <th>Type</th>
12 |                                 <th>Name</th>
13 |                                 <th>Size</th>
14 |                                 <th>Last Modified</th>
15 |                         </tr>
16 | 
17 |                         <tr>
18 |                                 <td>Dir</td>
19 |                                 <td><a href="/pub/">..</a></td>
20 |                                 <td></td>
21 |                                 <td></td>
22 |                         </tr>
23 | 
24 | 
25 |                         <tr>
26 |                                 <td>Dir</td>
27 |                                 <td><a href="/pub/OJI/MRJPlugin/">MRJPlugin/</a></td>
28 |                                 <td></td>
29 |                                 <td></td>
30 |                         </tr>
31 | 
32 | 
33 | 
34 |                         <tr>
35 |                                 <td>File</td>
36 |                                 <td><a href="/pub/OJI/MRJPlugin.sit.hqx">MRJPlugin.sit.hqx</a></td>
37 |                                 <td>234K</td>
38 |                                 <td>13-Feb-2023 04:21</td>
39 |                         </tr>
40 | 
41 | 
42 |                 </table>
43 |         </body>
44 | </html>


--------------------------------------------------------------------------------
/fixtures/mysql/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 2 | <HTML>
 3 |  <HEAD>
 4 |   <TITLE>Index of /232905/apt/ubuntu/pool/mysql-tools/m</TITLE>
 5 |  </HEAD>
 6 |  <BODY>
 7 | <H1>Index of /232905/apt/ubuntu/pool/mysql-tools/m</H1>
 8 | <PRE>   Name                              Last modified        Size  
 9 | <HR>
10 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="../">Parent Directory</A>                  01-Jan-1970 00:00      -  
11 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-community/">mysql-community/</A>                  19-Apr-2023 14:57      -  
12 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-connector-c++/">mysql-connector-c++/</A>              24-Oct-2023 18:04      -  
13 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-connector-j/">mysql-connector-j/</A>                24-Oct-2023 18:17      -  
14 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-connector-java/">mysql-connector-java/</A>             08-Oct-2022 08:19      -  
15 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-connector-odbc/">mysql-connector-odbc/</A>             24-Oct-2023 17:29      -  
16 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-connector-python/">mysql-connector-python/</A>           25-Oct-2023 16:10      -  
17 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-router/">mysql-router/</A>                     24-Apr-2019 12:18      -  
18 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-shell/">mysql-shell/</A>                      19-Apr-2023 07:30      -  
19 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-utilities/">mysql-utilities/</A>                  07-Nov-2017 09:27      -  
20 | <IMG SRC="/icons/dir.gif" ALT="[DIR]"> <A HREF="mysql-workbench-community/">mysql-workbench-community/</A>        19-Apr-2023 06:44      -  
21 | </PRE><HR>
22 | </BODY></HTML>
23 | 


--------------------------------------------------------------------------------
/fixtures/nodejs/v4.9.1/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html><html>
 2 | <head>
 3 |   <title>Index of /dist/v4.9.1/</title>
 4 |   <style>   
 5 |     @media (prefers-color-scheme: dark) {
 6 |       body {
 7 |         color: white;
 8 |         background-color: #1c1b22;
 9 |       }
10 |       a {
11 |         color: #3391ff;
12 |       }
13 |       a:visited {
14 |         color: #C63B65;
15 |       }
16 |     }
17 |   </style>
18 | </head>
19 | <body>
20 | <h1>Index of /dist/v4.9.1/</h1><hr><pre><a href="../">../</a>
21 | <a href="docs/">docs/</a>                                                             -                   -
22 | <a href="win-x64/">win-x64/</a>                                                          -                   -
23 | <a href="win-x86/">win-x86/</a>                                                          -                   -
24 | <a href="/dist/v4.9.1/SHASUMS256.txt.asc">SHASUMS256.txt.asc</a>                                 04-Nov-2024 17:40               4.1 KB
25 | <a href="/dist/v4.9.1/SHASUMS256.txt.sig">SHASUMS256.txt.sig</a>                                 04-Nov-2024 17:40                310 B
26 | <a href="/dist/v4.9.1/SHASUMS256.txt">SHASUMS256.txt</a>                                     04-Nov-2024 17:40               3.6 KB
27 | <a href="/dist/v4.9.1/node-v4.9.1-darwin-x64.tar.gz">node-v4.9.1-darwin-x64.tar.gz</a>                      30-Oct-2024 18:21                10 MB
28 | <a href="/dist/v4.9.1/node-v4.9.1-darwin-x64.tar.xz">node-v4.9.1-darwin-x64.tar.xz</a>                      04-Nov-2024 17:40               7.1 MB
29 | <a href="/dist/v4.9.1/node-v4.9.1-headers.tar.gz">node-v4.9.1-headers.tar.gz</a>                         04-Nov-2024 17:40               471 KB
30 | <a href="/dist/v4.9.1/node-v4.9.1-headers.tar.xz">node-v4.9.1-headers.tar.xz</a>                         04-Nov-2024 17:40               342 KB
31 | <a href="/dist/v4.9.1/node-v4.9.1-linux-arm64.tar.gz">node-v4.9.1-linux-arm64.tar.gz</a>                     30-Oct-2024 18:21                12 MB
32 | <a href="/dist/v4.9.1/node-v4.9.1-linux-arm64.tar.xz">node-v4.9.1-linux-arm64.tar.xz</a>                     04-Nov-2024 17:40               7.7 MB
33 | <a href="/dist/v4.9.1/node-v4.9.1-linux-armv6l.tar.gz">node-v4.9.1-linux-armv6l.tar.gz</a>                    30-Oct-2024 18:21                11 MB
34 | <a href="/dist/v4.9.1/node-v4.9.1-linux-armv6l.tar.xz">node-v4.9.1-linux-armv6l.tar.xz</a>                    04-Nov-2024 17:40               7.3 MB
35 | <a href="/dist/v4.9.1/node-v4.9.1-linux-armv7l.tar.gz">node-v4.9.1-linux-armv7l.tar.gz</a>                    30-Oct-2024 18:21                11 MB
36 | <a href="/dist/v4.9.1/node-v4.9.1-linux-armv7l.tar.xz">node-v4.9.1-linux-armv7l.tar.xz</a>                    04-Nov-2024 17:40               7.3 MB
37 | <a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64.tar.gz">node-v4.9.1-linux-ppc64.tar.gz</a>                     30-Oct-2024 18:21                12 MB
38 | <a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64.tar.xz">node-v4.9.1-linux-ppc64.tar.xz</a>                     04-Nov-2024 17:40               7.5 MB
39 | <a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64le.tar.gz">node-v4.9.1-linux-ppc64le.tar.gz</a>                   30-Oct-2024 18:21                12 MB
40 | <a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64le.tar.xz">node-v4.9.1-linux-ppc64le.tar.xz</a>                   04-Nov-2024 17:40               7.6 MB
41 | <a href="/dist/v4.9.1/node-v4.9.1-linux-x64.tar.gz">node-v4.9.1-linux-x64.tar.gz</a>                       30-Oct-2024 18:21                12 MB
42 | <a href="/dist/v4.9.1/node-v4.9.1-linux-x64.tar.xz">node-v4.9.1-linux-x64.tar.xz</a>                       04-Nov-2024 17:40               8.2 MB
43 | <a href="/dist/v4.9.1/node-v4.9.1-linux-x86.tar.gz">node-v4.9.1-linux-x86.tar.gz</a>                       30-Oct-2024 18:21                12 MB
44 | <a href="/dist/v4.9.1/node-v4.9.1-linux-x86.tar.xz">node-v4.9.1-linux-x86.tar.xz</a>                       04-Nov-2024 17:40               7.8 MB
45 | <a href="/dist/v4.9.1/node-v4.9.1-sunos-x64.tar.gz">node-v4.9.1-sunos-x64.tar.gz</a>                       30-Oct-2024 18:21                13 MB
46 | <a href="/dist/v4.9.1/node-v4.9.1-sunos-x64.tar.xz">node-v4.9.1-sunos-x64.tar.xz</a>                       30-Oct-2024 18:21               8.4 MB
47 | <a href="/dist/v4.9.1/node-v4.9.1-sunos-x86.tar.gz">node-v4.9.1-sunos-x86.tar.gz</a>                       30-Oct-2024 18:21                12 MB
48 | <a href="/dist/v4.9.1/node-v4.9.1-sunos-x86.tar.xz">node-v4.9.1-sunos-x86.tar.xz</a>                       04-Nov-2024 17:40               7.7 MB
49 | <a href="/dist/v4.9.1/node-v4.9.1-win-x64.7z">node-v4.9.1-win-x64.7z</a>                             04-Nov-2024 17:40               6.1 MB
50 | <a href="/dist/v4.9.1/node-v4.9.1-win-x64.zip">node-v4.9.1-win-x64.zip</a>                            30-Oct-2024 18:21                11 MB
51 | <a href="/dist/v4.9.1/node-v4.9.1-win-x86.7z">node-v4.9.1-win-x86.7z</a>                             04-Nov-2024 17:40               5.4 MB
52 | <a href="/dist/v4.9.1/node-v4.9.1-win-x86.zip">node-v4.9.1-win-x86.zip</a>                            30-Oct-2024 18:21               9.6 MB
53 | <a href="/dist/v4.9.1/node-v4.9.1-x64.msi">node-v4.9.1-x64.msi</a>                                30-Oct-2024 18:21                11 MB
54 | <a href="/dist/v4.9.1/node-v4.9.1-x86.msi">node-v4.9.1-x86.msi</a>                                30-Oct-2024 18:21              10.0 MB
55 | <a href="/dist/v4.9.1/node-v4.9.1.pkg">node-v4.9.1.pkg</a>                                    30-Oct-2024 18:21                13 MB
56 | <a href="/dist/v4.9.1/node-v4.9.1.tar.gz">node-v4.9.1.tar.gz</a>                                 30-Oct-2024 18:21                23 MB
57 | <a href="/dist/v4.9.1/node-v4.9.1.tar.xz">node-v4.9.1.tar.xz</a>                                 30-Oct-2024 18:21                13 MB
58 | </pre><hr /></body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/fixtures/start_fileserver.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | python3 -m http.server 1921 -d $(dirname "$0")


--------------------------------------------------------------------------------
/fixtures/vscode/index.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <html>
 3 | <head><title>Index of /repos/vscode/</title></head>
 4 | <body bgcolor="white">
 5 | <h1>Index of /repos/vscode/</h1>
 6 | <hr><pre><a href="../">../</a>
 7 | <a href="dists/">dists/</a>                                                                                              06-Mar-2025 07:19  481 Bytes
 8 | <a href="pool/">pool/</a>                                                                                               05-Sep-2024 18:01  104.1 MB
 9 | </pre><hr></body>
10 | </html>


--------------------------------------------------------------------------------
/fixtures/wine-builds/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | <html xmlns="http://www.w3.org/1999/xhtml">
 4 |  <head>
 5 |   <title>Index of /wine-builds</title>
 6 |   <link rel="stylesheet" href="/share/theme/style.css" type="text/css" />
 7 | <meta name="viewport" content="width=device-width, initial-scale=1" /><link rel="shortcut icon" type="image/png" href="https://dl.winehq.org/share/images/winehq_logo_16.png" /> </head>
 8 |  <body>
 9 | 
10 | <div id="logo_glass" class="pos">
11 |     <a href="https://www.winehq.org"><img src="https://dl.winehq.org/share/images/winehq_logo_glass_sm.png" alt="" /></a>
12 | </div>
13 | <div id="logo_text" class="pos">
14 |     <a href="https://www.winehq.org"><img src="https://dl.winehq.org/share/images/winehq_logo_text.png" alt="WineHQ" title="WineHQ" /></a>
15 | </div>
16 | 
17 | <div id="logo_blurb">Run Windows applications on Linux, BSD, Solaris and Mac OS X.</div>
18 | 
19 | <div id="search_box">
20 |   <form action="https://www.winehq.org/search/" id="cse-search-box" style="margin: 0; padding: 0;">
21 |     <span style="color: #ffffff;">Search:</span> <input type="text" name="q" size="20">
22 |   </form>
23 | </div>
24 | 
25 | <div id="tabs" class="pos">
26 |     <ul>
27 |         <li class="s"><a href="https://www.winehq.org/">WineHQ</a></li>
28 |         <li><a href="http://wiki.winehq.org/">Wiki</a></li>
29 |         <li><a href="https://appdb.winehq.org/">AppDB</a></li>
30 |         <li><a href="https://bugs.winehq.org/">Bugzilla</a></li>
31 |         <li><a href="https://forums.winehq.org/">Forums</a></li>
32 |     </ul>
33 | </div>
34 | 
35 | <div id="main_content">
36 |     <div class="content cornerround" style="padding: 35px 20px 10px 80px">
37 |     <!-- Start Content -->
38 | 
39 | <h1 class="title">Wine Download Server</h1>
40 | 
41 | <div class="wrapper">
42 | 
43 |   <table id="indexlist">
44 |    <tr class="indexhead"><th class="indexcolicon"><img src="/share/theme/icons/blank.png" alt="[ICO]" /></th><th class="indexcolname"><a href="?C=N;O=D">Name</a></th><th class="indexcollastmod"><a href="?C=M;O=A">Last modified</a></th><th class="indexcolsize"><a href="?C=S;O=A">Size</a></th></tr>
45 |    <tr class="even"><td class="indexcolicon"><a href="/"><img src="/share/theme/icons/folder-home.png" alt="[PARENTDIR]" /></a></td><td class="indexcolname"><a href="/">Parent Directory</a></td><td class="indexcollastmod">&nbsp;</td><td class="indexcolsize">  - </td></tr>
46 |    <tr class="odd"><td class="indexcolicon"><a href="android/"><img src="/share/theme/icons/folder.png" alt="[DIR]" /></a></td><td class="indexcolname"><a href="android/">android/</a></td><td class="indexcollastmod">2022-01-18 15:14  </td><td class="indexcolsize">  - </td></tr>
47 |    <tr class="even"><td class="indexcolicon"><a href="debian/"><img src="/share/theme/icons/folder.png" alt="[DIR]" /></a></td><td class="indexcolname"><a href="debian/">debian/</a></td><td class="indexcollastmod">2019-01-07 19:52  </td><td class="indexcolsize">  - </td></tr>
48 |    <tr class="odd"><td class="indexcolicon"><a href="fedora/"><img src="/share/theme/icons/folder.png" alt="[DIR]" /></a></td><td class="indexcolname"><a href="fedora/">fedora/</a></td><td class="indexcollastmod">2023-04-20 14:52  </td><td class="indexcolsize">  - </td></tr>
49 |    <tr class="even"><td class="indexcolicon"><a href="macosx/"><img src="/share/theme/icons/folder.png" alt="[DIR]" /></a></td><td class="indexcolname"><a href="macosx/">macosx/</a></td><td class="indexcollastmod">2017-03-30 15:49  </td><td class="indexcolsize">  - </td></tr>
50 |    <tr class="odd"><td class="indexcolicon"><a href="mageia/"><img src="/share/theme/icons/folder.png" alt="[DIR]" /></a></td><td class="indexcolname"><a href="mageia/">mageia/</a></td><td class="indexcollastmod">2017-09-29 23:46  </td><td class="indexcolsize">  - </td></tr>
51 |    <tr class="even"><td class="indexcolicon"><a href="ubuntu/"><img src="/share/theme/icons/folder.png" alt="[DIR]" /></a></td><td class="indexcolname"><a href="ubuntu/">ubuntu/</a></td><td class="indexcollastmod">2019-01-03 09:20  </td><td class="indexcolsize">  - </td></tr>
52 |    <tr class="odd"><td class="indexcolicon"><a href="Release.key"><img src="/share/theme/icons/default.png" alt="[   ]" /></a></td><td class="indexcolname"><a href="Release.key">Release.key</a></td><td class="indexcollastmod">2017-03-28 14:54  </td><td class="indexcolsize">3.0K</td></tr>
53 |    <tr class="even"><td class="indexcolicon"><a href="winehq.key"><img src="/share/theme/icons/default.png" alt="[   ]" /></a></td><td class="indexcolname"><a href="winehq.key">winehq.key</a></td><td class="indexcollastmod">2018-12-19 08:07  </td><td class="indexcolsize">3.1K</td></tr>
54 | </table>
55 | 	<div class="block">
56 | 	</div><!--/.postlisting-->
57 | 
58 | </div><!--/.wrapper-->
59 | 
60 |       </div>
61 |     </div>
62 | 
63 | </div>
64 | 
65 | 
66 | <!-- END BODY -->
67 | 
68 | </div>
69 | 
70 | <div id="footer">
71 |     Hosted By
72 |     <a href="https://www.codeweavers.com/"><img src="https://dl.winehq.org/share/images/cw_logo_sm.png" alt="CodeWeavers"
73 |     title="CodeWeavers - Run Windows applications and games on Mac and Linux" /></a><br /> and 
74 |     <a href="https://www.fastly.com"><img src="https://www.fastly.com/sites/default/files/fastly_logo.png" alt="Fastly"
75 |     title="Fastly" style="width:110px;" /></a>
76 | </div>
77 | 
78 | <script type="text/javascript">
79 | // grab the 2nd child and add the parent class. tr:nth-child(2)
80 | document.getElementsByTagName('tr')[1].className = 'parent';
81 | </script>
82 | 
83 | </body></html>


--------------------------------------------------------------------------------
/fixtures/zabbix/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | <meta charset="UTF-8" />
 4 | <title>Zabbix Cloud Images and Appliances</title>
 5 | <link rel="stylesheet" type="text/css" href="/.headers/main.css" />
 6 | </head>
 7 | <body>
 8 | <div id="description">
 9 | <h3>Zabbix Cloud Images and Appliances</h3>
10 | Zabbix is an enterprise-class open source distributed monitoring solution designed to monitor and track performance and availability of network servers, devices and other IT resources. It supports distributed and WEB monitoring, auto-discovery, and more.
11 | <br><br>
12 | Zabbix appliances are available on Google Cloud Platform along with previously released Zabbix appliances, Microsoft Azure, DigitalOcean and Amazon Web Services. Zabbix is now available on all major cloud platforms.
13 | <br><br>
14 | These appliances are created and officially supported by Zabbix SIA. 
15 | <br><br>
16 | Installation instructions are available in <a href="https://www.zabbix.com/cloud_images" target="_blank">Zabbix Cloud Images</a> page. 
17 | <br><br>
18 | If you have any problems or suggestions, please report an issue on <a href="https://support.zabbix.com" target="_blank">Zabbix Bug Tracking System</a>. 
19 | <br><br>
20 | If you want to get professional support, installation or upgrade service, please see our <a href="https://www.zabbix.com/support" target="_blank">Zabbix technical support service</a> page.
21 | 
22 | </div>
23 | <div id="index">
24 | <html>
25 | <head><title>Index of /zabbix/</title></head>
26 | <body bgcolor="white">
27 | <h1>Index of /zabbix/</h1><hr><pre><a href="../">../</a>
28 | <a href="appliances/">appliances/</a>                                        27-Jul-2020 11:06                   -
29 | <a href="binaries/">binaries/</a>                                          01-Dec-2020 20:09                   -
30 | <a href="integrations/">integrations/</a>                                      12-Nov-2021 12:30                   -
31 | <a href="nightly/">nightly/</a>                                           24-Aug-2024 12:03                   -
32 | <a href="sources/">sources/</a>                                           14-Dec-2020 13:39                   -
33 | </pre><hr></body>
34 | </html>
35 | </div>
36 | <footer>&copy; 2001-2024 Zabbix SIA. All right reserved. <a href="https://www.zabbix.com/trademark" target="_blank">Trademark Policy</a></footer>
37 | </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/src/bar.rs:
--------------------------------------------------------------------------------
 1 | pub const TEMPLATE_DEFAULT: &str =
 2 |     "{msg}\n[{elapsed_precise}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})";
 3 | pub fn set_progress_bar(bar: &kyuri::Bar, len: u64, url: &url::Url) {
 4 |     bar.set_len(len);
 5 |     bar.set_message(&format!("Downloading {}", url));
 6 |     bar.set_template(TEMPLATE_DEFAULT);
 7 |     bar.set_pos(0);
 8 |     bar.set_visible(true);
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/cli/list.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     parser::ListResult,
 3 |     regex_manager::{get_exclusion_manager, Comparison},
 4 |     utils::{build_client, relative_str_process},
 5 |     AsyncContext, ListArgs,
 6 | };
 7 | 
 8 | // TODO: clean code
 9 | pub fn list(args: &ListArgs, bind_address: Option<String>) -> ! {
10 |     let parser = args.parser.build();
11 |     let client = build_client(args, parser.is_auto_redirect(), bind_address.as_ref(), true);
12 |     let async_context = AsyncContext {
13 |         runtime: tokio::runtime::Runtime::new().unwrap(),
14 |         listing_client: client.clone(),
15 |         download_client: client,
16 |     };
17 |     let exclusion_manager = get_exclusion_manager(args);
18 |     // get relative
19 |     let upstream = &args.upstream;
20 |     let upstream_path = parser.get_path(upstream);
21 |     let relative = upstream_path
22 |         .strip_prefix(&args.upstream_base)
23 |         .unwrap()
24 |         .to_str()
25 |         .unwrap()
26 |         .to_owned();
27 |     let relative = relative_str_process(&relative);
28 |     assert!(relative.starts_with('/') && relative.ends_with('/'));
29 |     let list = parser.get_list(&async_context, upstream).unwrap();
30 |     let match_cmp = exclusion_manager.match_str(&relative);
31 | 
32 |     println!("Relative: {relative}");
33 |     println!("Exclusion: {:?}", match_cmp);
34 |     if match_cmp == Comparison::Stop {
35 |         tracing::warn!("This listing would NOT be accessed at all.");
36 |     }
37 |     match list {
38 |         ListResult::Redirect(url) => {
39 |             println!("Redirect to {url}");
40 |         }
41 |         ListResult::List(list) => {
42 |             for item in list {
43 |                 print!("{item}");
44 |                 let new_relative = format!("{}{}", relative, item.name);
45 |                 tracing::debug!("new_relative: {new_relative}");
46 |                 println!(
47 |                     "{}",
48 |                     match exclusion_manager.match_str(new_relative.as_str()) {
49 |                         crate::regex_manager::Comparison::Stop => " (stop)",
50 |                         crate::regex_manager::Comparison::ListOnly => " (list only)",
51 |                         crate::regex_manager::Comparison::Ok => "",
52 |                     }
53 |                 );
54 |             }
55 |         }
56 |     }
57 | 
58 |     std::process::exit(0);
59 | }
60 | 


--------------------------------------------------------------------------------
/src/cli/mod.rs:
--------------------------------------------------------------------------------
1 | mod list;
2 | mod sync;
3 | pub use list::list;
4 | pub use sync::sync;
5 | 


--------------------------------------------------------------------------------
/src/compare.rs:
--------------------------------------------------------------------------------
  1 | use std::path::Path;
  2 | 
  3 | use chrono::{DateTime, FixedOffset, Utc};
  4 | use tracing::{debug, warn};
  5 | 
  6 | use crate::{
  7 |     listing::{FileSize, FileType, ListItem},
  8 |     utils::{self, naive_to_utc},
  9 | };
 10 | 
 11 | pub fn compare_filetype(fstype: std::fs::FileType, tsumugu_type: FileType) -> bool {
 12 |     match tsumugu_type {
 13 |         FileType::File => fstype.is_file(),
 14 |         FileType::Directory => fstype.is_dir(),
 15 |     }
 16 | }
 17 | 
 18 | pub fn should_download_by_list(
 19 |     path: &Path,
 20 |     remote: &ListItem,
 21 |     remote_timezone: Option<FixedOffset>,
 22 |     skip_if_exists: bool,
 23 |     size_only: bool,
 24 | ) -> bool {
 25 |     let local_metadata = match path.metadata() {
 26 |         Ok(m) => {
 27 |             if skip_if_exists || remote.skip_check {
 28 |                 debug!("Skipping {:?} because it exists", path);
 29 |                 return false;
 30 |             }
 31 |             m
 32 |         }
 33 |         Err(e) => {
 34 |             if e.kind() != std::io::ErrorKind::NotFound {
 35 |                 warn!("Failed to get metadata of {:?}: {:?}", path, e);
 36 |             }
 37 |             return true;
 38 |         }
 39 |     };
 40 |     if !compare_filetype(local_metadata.file_type(), remote.type_) {
 41 |         // TODO: delete old file which type is not correct
 42 |         warn!("Type mismatch: {:?} remote {:?}", path, remote.type_);
 43 |         return true;
 44 |     }
 45 |     let local_size = local_metadata.len();
 46 |     let is_size_match = match remote.size.unwrap_or(FileSize::Precise(0)) {
 47 |         FileSize::Precise(size) => local_size == size,
 48 |         // A very rough size check is used here,
 49 |         // as it looks like size returned by server may not be very accurate
 50 |         FileSize::HumanizedBinary(size, unit) => {
 51 |             let base = 1024_f64.powf(unit.get_exp().into());
 52 |             let lsize = local_size as f64 / base;
 53 |             (lsize - size).abs() < 2.0
 54 |         }
 55 |         FileSize::HumanizedDecimal(size, unit) => {
 56 |             let base = 1000_f64.powf(unit.get_exp().into());
 57 |             let lsize = local_size as f64 / base;
 58 |             (lsize - size).abs() < 2.0
 59 |         }
 60 |     };
 61 |     if !is_size_match {
 62 |         debug!(
 63 |             "Size mismatch: {:?} local {:?} remote {:?}",
 64 |             path, local_size, remote.size
 65 |         );
 66 |         return true;
 67 |     }
 68 |     if size_only {
 69 |         return false;
 70 |     }
 71 |     let local_mtime: DateTime<Utc> = match local_metadata.modified() {
 72 |         Ok(m) => m,
 73 |         Err(_) => {
 74 |             // Here we expect all fs to support mtime
 75 |             unreachable!()
 76 |         }
 77 |     }
 78 |     .into();
 79 |     // Use remote timezone or not?
 80 |     let timezone = match remote.timezone {
 81 |         None => remote_timezone,
 82 |         Some(tz) => Some(tz),
 83 |     };
 84 |     let remote_mtime = naive_to_utc(&remote.mtime, timezone);
 85 |     let offset = remote_mtime - local_mtime;
 86 |     debug!("DateTime offset: {:?} {:?}", path, offset);
 87 |     match timezone {
 88 |         None => {
 89 |             // allow an offset to up to 24hrs
 90 |             offset.num_hours().abs() > 24
 91 |         }
 92 |         Some(_) => {
 93 |             // allow an offset up to 1min
 94 |             offset.num_minutes().abs() > 1
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | pub fn should_download_by_header(path: &Path, resp: &reqwest::Response, size_only: bool) -> bool {
100 |     // Construct a valid "ListItem" and pass to should_download_by_list
101 |     debug!("Checking {:?} by header: {:?}", path, resp);
102 |     let item = ListItem {
103 |         url: resp.url().clone(),
104 |         name: path.file_name().unwrap().to_str().unwrap().to_string(),
105 |         type_: if resp.url().as_str().ends_with('/') {
106 |             FileType::Directory
107 |         } else {
108 |             FileType::File
109 |         },
110 |         size: Some(FileSize::Precise(match resp.content_length() {
111 |             Some(l) => l,
112 |             None => {
113 |                 warn!(
114 |                     "No content-length from upstream ({}), go downloading anyway",
115 |                     resp.url()
116 |                 );
117 |                 return true;
118 |             }
119 |         })),
120 |         mtime: match utils::get_response_mtime(resp) {
121 |             Ok(m) => m,
122 |             Err(e) => {
123 |                 warn!(
124 |                     "Cannot get mtime from {} ({}), go downloading anyway",
125 |                     resp.url(),
126 |                     e
127 |                 );
128 |                 return true;
129 |             }
130 |         }
131 |         .naive_utc(),
132 |         timezone: None,
133 |         skip_check: false,
134 |     };
135 |     should_download_by_list(path, &item, FixedOffset::east_opt(0), false, size_only)
136 | }
137 | 


--------------------------------------------------------------------------------
/src/extensions/apt.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use std::path::{Path, PathBuf};
  3 | use tracing::warn;
  4 | use url::Url;
  5 | 
  6 | pub fn is_apt_package(p: &Path) -> bool {
  7 |     // check if basename is Packages
  8 |     let basename = p.file_name().unwrap().to_str().unwrap();
  9 |     if basename != "Packages" {
 10 |         return false;
 11 |     }
 12 |     // check if parents contain dists
 13 |     let parents = p.ancestors();
 14 |     for iter in parents {
 15 |         let basename = iter.file_name().unwrap().to_str().unwrap();
 16 |         if basename == "dists" {
 17 |             return true;
 18 |         }
 19 |     }
 20 |     false
 21 | }
 22 | 
 23 | // In every iter packages_path and packages_url be updated to their parents
 24 | // When they reach the dists directory, return the root of debian
 25 | // Otherwise when one of them reach the root, return error
 26 | fn get_debian_root(
 27 |     packages_path: &Path,
 28 |     relative: &[String],
 29 |     packages_url: &Url,
 30 | ) -> Result<(PathBuf, Vec<String>, Url)> {
 31 |     fn pop(p: &mut PathBuf, r: Option<&mut Vec<String>>, u: &mut Url) -> Result<()> {
 32 |         if !p.pop() {
 33 |             anyhow::bail!(
 34 |                 "Cannot find debian root (path can not be popped, path = {:?})",
 35 |                 p
 36 |             );
 37 |         }
 38 |         if u.path() == "/" {
 39 |             anyhow::bail!(
 40 |                 "Cannot find debian root (url can not be popped, url = {:?})",
 41 |                 u
 42 |             );
 43 |         }
 44 |         if let Some(r) = r {
 45 |             if r.pop().is_none() {
 46 |                 anyhow::bail!(
 47 |                     "Cannot find debian root (relative can not be popped, relative = {:?})",
 48 |                     r
 49 |                 );
 50 |             }
 51 |         }
 52 |         u.path_segments_mut().unwrap().pop();
 53 |         Ok(())
 54 |     }
 55 |     let mut packages_path = packages_path.to_path_buf();
 56 |     let mut relative = relative.to_owned();
 57 |     let mut packages_url = packages_url.clone();
 58 |     // first pop of file name to match relative
 59 |     pop(&mut packages_path, None, &mut packages_url)?;
 60 |     loop {
 61 |         let basename = packages_path.file_name().unwrap().to_str().unwrap();
 62 |         let url_basename = packages_url.path_segments().unwrap().next_back().unwrap();
 63 |         if basename == "dists" && url_basename == "dists" {
 64 |             // we don't wanna dists folder in return value
 65 |             pop(&mut packages_path, Some(&mut relative), &mut packages_url)?;
 66 |             // add trailing slash to packages_url
 67 |             packages_url.path_segments_mut().unwrap().push("");
 68 |             return Ok((packages_path, relative, packages_url));
 69 |         }
 70 |         if basename != url_basename {
 71 |             warn!(
 72 |                 "basename = {}, url_basename = {}, relative = {:?}",
 73 |                 basename, url_basename, relative
 74 |             );
 75 |         }
 76 |         pop(&mut packages_path, Some(&mut relative), &mut packages_url)?;
 77 |     }
 78 | }
 79 | 
 80 | #[derive(Debug)]
 81 | pub struct AptPackage {
 82 |     pub url: Url,
 83 |     pub relative: Vec<String>,
 84 |     #[allow(dead_code)]
 85 |     pub size: usize,
 86 |     pub filename: String,
 87 | }
 88 | 
 89 | impl From<AptPackage> for super::ExtensionPackage {
 90 |     fn from(val: AptPackage) -> Self {
 91 |         super::ExtensionPackage {
 92 |             url: val.url,
 93 |             relative: val.relative,
 94 |             filename: val.filename,
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | pub fn parse_package(
100 |     packages_path: &Path,
101 |     relative: &[String],
102 |     packages_url: &Url,
103 | ) -> Result<Vec<AptPackage>> {
104 |     let data = std::fs::read_to_string(packages_path)?;
105 |     let packages = apt_parser::Packages::from(&data);
106 |     let (_, root_relative, debian_root_url) =
107 |         get_debian_root(packages_path, relative, packages_url)?;
108 |     // ignore errors
109 |     let mut res = vec![];
110 |     for package in packages {
111 |         let pool_url = package.filename;
112 |         let size = package.size;
113 |         let url = debian_root_url.join(&pool_url)?;
114 | 
115 |         let mut pool_split: Vec<String> = pool_url.split('/').map(|s| s.to_string()).collect();
116 |         let mut relative = root_relative.clone();
117 |         relative.append(&mut pool_split);
118 | 
119 |         let basename = relative.pop().unwrap();
120 | 
121 |         res.push(AptPackage {
122 |             url,
123 |             relative,
124 |             size: size as usize,
125 |             filename: basename,
126 |         })
127 |     }
128 | 
129 |     Ok(res)
130 | }
131 | 
132 | #[cfg(test)]
133 | mod tests {
134 |     use super::*;
135 |     use test_log::test;
136 | 
137 |     #[test]
138 |     fn test_debian_root() {
139 |         let packages_path = Path::new("/var/www/html/dists/buster/main/binary-amd64/Packages");
140 |         let relative = vec![
141 |             "dists".to_string(),
142 |             "buster".to_string(),
143 |             "main".to_string(),
144 |             "binary-amd64".to_string(),
145 |         ];
146 |         let packages_url =
147 |             Url::parse("http://localhost/dists/buster/main/binary-amd64/Packages").unwrap();
148 |         let (debian_root_path, root_relative, debian_root_url) =
149 |             get_debian_root(packages_path, &relative, &packages_url).unwrap();
150 |         assert_eq!(debian_root_path, Path::new("/var/www/html/"));
151 |         assert_eq!(root_relative, Vec::<String>::new());
152 |         assert_eq!(debian_root_url, Url::parse("http://localhost/").unwrap());
153 | 
154 |         let packages_path =
155 |             Path::new("/var/www/html/mysql/apt/ubuntu/dists/jammy/mysql-8.0/binary-amd64/Packages");
156 |         let relative = vec![
157 |             "apt".to_string(),
158 |             "ubuntu".to_string(),
159 |             "dists".to_string(),
160 |             "jammy".to_string(),
161 |             "mysql-8.0".to_string(),
162 |             "binary-amd64".to_string(),
163 |         ];
164 |         let packages_url = Url::parse(
165 |             "http://repo.mysql.com/apt/ubuntu/dists/jammy/mysql-8.0/binary-amd64/Packages",
166 |         )
167 |         .unwrap();
168 |         let (debian_root_path, root_relative, debian_root_url) =
169 |             get_debian_root(packages_path, &relative, &packages_url).unwrap();
170 |         assert_eq!(
171 |             debian_root_path,
172 |             Path::new("/var/www/html/mysql/apt/ubuntu/")
173 |         );
174 |         assert_eq!(root_relative, vec!["apt".to_string(), "ubuntu".to_string()]);
175 |         assert_eq!(
176 |             debian_root_url,
177 |             Url::parse("http://repo.mysql.com/apt/ubuntu/").unwrap()
178 |         );
179 |     }
180 | }
181 | 


--------------------------------------------------------------------------------
/src/extensions/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::SyncArgs;
 2 | use std::path::Path;
 3 | use tracing::{info, warn};
 4 | use url::Url;
 5 | 
 6 | mod apt;
 7 | mod yum;
 8 | 
 9 | pub struct ExtensionPackage {
10 |     pub url: Url,
11 |     pub relative: Vec<String>,
12 |     pub filename: String,
13 | }
14 | 
15 | pub fn extension_handler<F>(
16 |     args: &SyncArgs,
17 |     path: &Path,
18 |     relative: &[String],
19 |     url: &Url,
20 |     push_func: F,
21 | ) where
22 |     F: Fn(&ExtensionPackage),
23 | {
24 |     if args.apt_packages && crate::extensions::apt::is_apt_package(path) {
25 |         let packages = apt::parse_package(path, relative, url);
26 |         match packages {
27 |             Err(e) => {
28 |                 warn!("Failed to parse APT package {:?}: {:?}", path, e);
29 |             }
30 |             Ok(packages) => {
31 |                 for package in packages {
32 |                     info!("APT package: {:?}", package);
33 |                     push_func(&package.into());
34 |                 }
35 |             }
36 |         }
37 |     }
38 |     if args.yum_packages {
39 |         let is_primary = crate::extensions::yum::is_yum_primary_xml(path);
40 |         let is_repomd = crate::extensions::yum::is_yum_repomd_xml(path);
41 |         match (is_primary, is_repomd) {
42 |             (false, false) => (),
43 |             (p, r) => {
44 |                 assert!(!(p && r), "File is both primary and repomd");
45 |                 let xml_type = if p {
46 |                     crate::extensions::yum::YumXmlType::Primary
47 |                 } else {
48 |                     crate::extensions::yum::YumXmlType::Repomd
49 |                 };
50 |                 let packages = yum::parse_package(path, relative, url, xml_type);
51 |                 match packages {
52 |                     Err(e) => {
53 |                         warn!("Failed to parse YUM file {:?}: {:?}", path, e);
54 |                     }
55 |                     Ok(packages) => {
56 |                         for package in packages {
57 |                             info!("YUM package: {:?}", package);
58 |                             push_func(&package.into());
59 |                         }
60 |                     }
61 |                 }
62 |             }
63 |         }
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/extensions/yum.rs:
--------------------------------------------------------------------------------
  1 | use std::{io::Read, path::Path};
  2 | 
  3 | use anyhow::Result;
  4 | use flate2::read::GzDecoder;
  5 | use tracing::info;
  6 | use url::Url;
  7 | 
  8 | fn get_locations_from_xml(s: &str) -> Vec<String> {
  9 |     let re = regex::Regex::new(r#"<location href="(.+?)".*/>"#).unwrap();
 10 |     let mut urls = Vec::new();
 11 |     for line in s.lines() {
 12 |         if let Some(caps) = re.captures(line) {
 13 |             let url = caps.get(1).unwrap().as_str();
 14 |             urls.push(url.to_string());
 15 |         }
 16 |     }
 17 |     urls
 18 | }
 19 | 
 20 | pub fn is_yum_primary_xml(p: &Path) -> bool {
 21 |     p.file_name()
 22 |         .map(|f| f.to_str().unwrap())
 23 |         .map(|f| f.ends_with("primary.xml.gz"))
 24 |         .unwrap_or(false)
 25 | }
 26 | 
 27 | // read and extract location
 28 | pub fn read_primary_xml(p: &Path) -> Result<Vec<String>> {
 29 |     let bytes = std::fs::read(p)?;
 30 |     let mut gzd = GzDecoder::new(&bytes[..]);
 31 |     let mut s = String::new();
 32 |     gzd.read_to_string(&mut s)?;
 33 | 
 34 |     Ok(get_locations_from_xml(&s))
 35 | }
 36 | 
 37 | pub enum YumXmlType {
 38 |     Primary,
 39 |     Repomd,
 40 | }
 41 | 
 42 | #[derive(Debug)]
 43 | pub struct YumPackage {
 44 |     pub url: Url,
 45 |     pub relative: Vec<String>,
 46 |     pub filename: String,
 47 | }
 48 | 
 49 | impl From<YumPackage> for super::ExtensionPackage {
 50 |     fn from(val: YumPackage) -> Self {
 51 |         super::ExtensionPackage {
 52 |             url: val.url,
 53 |             relative: val.relative,
 54 |             filename: val.filename,
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | pub fn parse_package(
 60 |     packages_path: &Path,
 61 |     relative: &[String],
 62 |     packages_url: &Url,
 63 |     xml_type: YumXmlType,
 64 | ) -> Result<Vec<YumPackage>> {
 65 |     let packages = match xml_type {
 66 |         YumXmlType::Primary => read_primary_xml(packages_path)?,
 67 |         YumXmlType::Repomd => read_yum_repomd_xml(packages_path)?,
 68 |     };
 69 |     let mut relative = relative.to_owned();
 70 |     relative.pop(); // pop "repodata"
 71 | 
 72 |     let mut base_url = packages_url.clone();
 73 |     base_url.path_segments_mut().unwrap().pop().pop().push("");
 74 |     info!("base_url = {:?}", base_url);
 75 |     info!("relative = {:?}", relative);
 76 | 
 77 |     let mut res = vec![];
 78 |     for package in packages {
 79 |         let url = base_url.join(&package)?;
 80 |         let split: Vec<String> = package.split('/').map(|s| s.to_string()).collect();
 81 |         let mut relative = relative.clone();
 82 |         relative.append(&mut split.clone());
 83 | 
 84 |         let basename = relative.pop().unwrap();
 85 |         res.push(YumPackage {
 86 |             url,
 87 |             relative,
 88 |             filename: basename,
 89 |         })
 90 |     }
 91 | 
 92 |     Ok(res)
 93 | }
 94 | 
 95 | // Well, brain-damaged mysql-repo even cannot show all primary.xml.gz...
 96 | // So I have to use repomd.xml to get primary.xml.gz...
 97 | // Good news is that it seems like existing functions for handling primary.xml.gz can be reused.
 98 | pub fn is_yum_repomd_xml(p: &Path) -> bool {
 99 |     p.file_name()
100 |         .map(|f| f.to_str().unwrap())
101 |         .map(|f| f == "repomd.xml")
102 |         .unwrap_or(false)
103 | }
104 | 
105 | pub fn read_yum_repomd_xml(p: &Path) -> Result<Vec<String>> {
106 |     let bytes = std::fs::read(p)?;
107 |     let s = String::from_utf8_lossy(&bytes);
108 | 
109 |     Ok(get_locations_from_xml(s.as_ref()))
110 | }
111 | 


--------------------------------------------------------------------------------
/src/listing.rs:
--------------------------------------------------------------------------------
  1 | // Module for handling directory listing
  2 | 
  3 | use std::fmt::Display;
  4 | 
  5 | use chrono::{FixedOffset, NaiveDateTime};
  6 | use url::Url;
  7 | 
  8 | #[derive(Debug, PartialEq, Clone, Copy)]
  9 | pub enum FileType {
 10 |     File,
 11 |     Directory,
 12 | }
 13 | 
 14 | #[derive(Debug, PartialEq, Clone, Copy)]
 15 | pub enum SizeUnit {
 16 |     B,
 17 |     K,
 18 |     M,
 19 |     G,
 20 |     T,
 21 |     P,
 22 | }
 23 | 
 24 | impl Display for SizeUnit {
 25 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 26 |         let unit = match self {
 27 |             SizeUnit::B => "B",
 28 |             SizeUnit::K => "K",
 29 |             SizeUnit::M => "M",
 30 |             SizeUnit::G => "G",
 31 |             SizeUnit::T => "T",
 32 |             SizeUnit::P => "P",
 33 |         };
 34 |         write!(f, "{unit}")
 35 |     }
 36 | }
 37 | 
 38 | impl SizeUnit {
 39 |     pub fn get_exp(self) -> u32 {
 40 |         match self {
 41 |             SizeUnit::B => 0,
 42 |             SizeUnit::K => 1,
 43 |             SizeUnit::M => 2,
 44 |             SizeUnit::G => 3,
 45 |             SizeUnit::T => 4,
 46 |             SizeUnit::P => 5,
 47 |         }
 48 |     }
 49 | }
 50 | 
 51 | #[derive(Debug, Clone, Copy, PartialEq)]
 52 | pub enum FileSize {
 53 |     Precise(u64),
 54 |     /// 1024B -> 1KiB
 55 |     HumanizedBinary(f64, SizeUnit),
 56 |     #[allow(dead_code)]
 57 |     /// 1000B -> 1KB
 58 |     HumanizedDecimal(f64, SizeUnit),
 59 | }
 60 | 
 61 | impl Display for FileSize {
 62 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 63 |         match self {
 64 |             FileSize::Precise(size) => write!(f, "{}", size),
 65 |             FileSize::HumanizedBinary(size, unit) => write!(f, "{size} {unit}"),
 66 |             FileSize::HumanizedDecimal(size, unit) => write!(f, "{size} {unit}"),
 67 |         }
 68 |     }
 69 | }
 70 | 
 71 | impl FileSize {
 72 |     pub fn get_humanized(s: &str) -> (f64, SizeUnit) {
 73 |         // separate numeric and unit
 74 |         let mut numeric = String::new();
 75 |         let mut unit = String::new();
 76 |         for c in s.chars() {
 77 |             if c.is_ascii_digit() || c == '.' {
 78 |                 numeric.push(c);
 79 |             } else {
 80 |                 unit.push(c);
 81 |             }
 82 |         }
 83 |         let unit = unit.to_lowercase();
 84 |         let unit = unit.trim();
 85 | 
 86 |         let numeric = numeric.parse::<f64>().unwrap();
 87 |         let unit = match unit.chars().next() {
 88 |             None => SizeUnit::B,
 89 |             Some(u) => match u {
 90 |                 'b' => SizeUnit::B,
 91 |                 'k' => SizeUnit::K,
 92 |                 'm' => SizeUnit::M,
 93 |                 'g' => SizeUnit::G,
 94 |                 't' => SizeUnit::T,
 95 |                 'p' => SizeUnit::P,
 96 |                 _ => panic!("Unknown unit: {unit}"),
 97 |             },
 98 |         };
 99 | 
100 |         (numeric, unit)
101 |     }
102 | 
103 |     pub fn get_estimated(&self) -> u64 {
104 |         match self {
105 |             FileSize::Precise(size) => *size,
106 |             FileSize::HumanizedBinary(size, unit) => {
107 |                 let exp = unit.get_exp();
108 |                 (size * 1024_f64.powi(exp as i32)) as u64
109 |             }
110 |             FileSize::HumanizedDecimal(size, unit) => {
111 |                 let exp = unit.get_exp();
112 |                 (size * 1000_f64.powi(exp as i32)) as u64
113 |             }
114 |         }
115 |     }
116 | }
117 | 
118 | #[derive(Debug, Clone)]
119 | pub struct ListItem {
120 |     pub url: Url,
121 |     pub name: String,
122 |     pub type_: FileType,
123 |     pub size: Option<FileSize>,
124 |     /// mtime is parsed from HTML, which is the local datetime of the "server" (not necessarily localtime or UTC)
125 |     pub mtime: NaiveDateTime,
126 |     /// Some HTML provides "timezone", parser shall set this if so (otherwise just None)
127 |     pub timezone: Option<FixedOffset>,
128 |     /// Don't check size and mtime: download only if the file doesn't exist.
129 |     /// This is expected to be set by apt/yum parser extension (parser will not use this).
130 |     pub skip_check: bool,
131 | }
132 | 
133 | impl ListItem {
134 |     pub fn new(
135 |         url: Url,
136 |         name: String,
137 |         type_: FileType,
138 |         size: Option<FileSize>,
139 |         mtime: NaiveDateTime,
140 |         timezone: Option<FixedOffset>,
141 |     ) -> Self {
142 |         Self {
143 |             url,
144 |             name,
145 |             type_,
146 |             size,
147 |             mtime,
148 |             timezone,
149 |             skip_check: false,
150 |         }
151 |     }
152 | }
153 | 
154 | impl Display for ListItem {
155 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156 |         let size_str = match self.size {
157 |             Some(size) => size.to_string(),
158 |             None => String::from("(none)"),
159 |         };
160 |         let mtime_str = self.mtime.format("%Y-%m-%d %H:%M:%S").to_string();
161 |         let timezone = match self.timezone {
162 |             None => "",
163 |             Some(tz) => &format!("({})", tz),
164 |         };
165 |         write!(
166 |             f,
167 |             "{} {:?} {} {}{} {}",
168 |             self.url, self.type_, size_str, mtime_str, timezone, self.name
169 |         )
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | #![warn(clippy::cognitive_complexity)]
  2 | use std::{path::PathBuf, sync::Mutex};
  3 | 
  4 | use clap::{Parser, Subcommand};
  5 | 
  6 | use parser::{ParserType, ParserTypeMatch};
  7 | use tracing::level_filters::LevelFilter;
  8 | use tracing_subscriber::EnvFilter;
  9 | use url::Url;
 10 | 
 11 | use shadow_rs::shadow;
 12 | use utils::{headers_to_headermap, Header};
 13 | shadow!(build);
 14 | 
 15 | mod bar;
 16 | mod cli;
 17 | mod compare;
 18 | mod listing;
 19 | mod parser;
 20 | mod regex_manager;
 21 | mod timezone;
 22 | mod utils;
 23 | 
 24 | mod extensions;
 25 | 
 26 | use crate::regex_manager::ExpandedRegex;
 27 | 
 28 | #[allow(clippy::const_is_empty)]
 29 | fn get_version() -> &'static str {
 30 |     let tag = build::TAG;
 31 |     let clean = build::GIT_CLEAN;
 32 |     let short_commit = build::SHORT_COMMIT;
 33 |     if !clean {
 34 |         Box::leak(format!("{} (dirty)", build::SHORT_COMMIT).into_boxed_str())
 35 |     } else if tag.is_empty() {
 36 |         if short_commit.is_empty() {
 37 |             return build::PKG_VERSION;
 38 |         } else {
 39 |             return short_commit;
 40 |         }
 41 |     } else {
 42 |         return tag;
 43 |     }
 44 | }
 45 | 
 46 | #[derive(Parser, Debug)]
 47 | #[command(about)]
 48 | #[command(propagate_version = true)]
 49 | #[command(version = get_version())]
 50 | struct Cli {
 51 |     #[command(subcommand)]
 52 |     command: Commands,
 53 | }
 54 | 
 55 | #[derive(Subcommand, Debug)]
 56 | enum Commands {
 57 |     /// Sync files from upstream to local.
 58 |     Sync(SyncArgs),
 59 | 
 60 |     /// List files from upstream.
 61 |     List(ListArgs),
 62 | }
 63 | 
 64 | trait SharedArgs {
 65 |     fn user_agent(&self) -> &str;
 66 |     fn headers(&self) -> reqwest::header::HeaderMap;
 67 |     fn use_v2_exclusion(&self) -> bool;
 68 |     fn exclude(&self) -> &[ExpandedRegex];
 69 |     fn include(&self) -> &[ExpandedRegex];
 70 | }
 71 | 
 72 | #[derive(Parser, Debug)]
 73 | pub struct SyncArgs {
 74 |     /// Customize tsumugu's user agent.
 75 |     #[clap(long, default_value = "tsumugu")]
 76 |     user_agent: String,
 77 | 
 78 |     /// Do not download files and cleanup.
 79 |     #[clap(long)]
 80 |     dry_run: bool,
 81 | 
 82 |     /// Threads at work.
 83 |     #[clap(long, default_value_t = 2)]
 84 |     threads: usize,
 85 | 
 86 |     /// Do not clean up after sync.
 87 |     #[clap(long)]
 88 |     no_delete: bool,
 89 | 
 90 |     /// Set max delete count.
 91 |     #[clap(long, default_value_t = 100)]
 92 |     max_delete: usize,
 93 | 
 94 |     /// The upstream URL.
 95 |     #[clap(value_parser)]
 96 |     upstream: Url,
 97 | 
 98 |     /// The local directory.
 99 |     #[clap(value_parser)]
100 |     local: PathBuf,
101 | 
102 |     /// You can set a valid URL for guessing. Set it to "no" to disable this behavior.
103 |     /// By default it would recursively find the first file to HEAD for guessing
104 |     #[clap(long)]
105 |     timezone_file: Option<String>,
106 | 
107 |     /// Manually set timezone (+- hrs). This overrides timezone_file.
108 |     #[clap(long)]
109 |     timezone: Option<i32>,
110 | 
111 |     /// Retry count for each request.
112 |     #[clap(long, default_value_t = 3)]
113 |     retry: usize,
114 | 
115 |     /// Do an HEAD before actual GET.
116 |     /// Otherwise when head-before-get and allow-time-from-parser are not set,
117 |     /// when GETting tsumugu would try checking if we still need to download it.
118 |     #[clap(long)]
119 |     head_before_get: bool,
120 | 
121 |     /// Choose a main parser.
122 |     #[clap(long, value_enum, default_value_t = ParserType::Nginx)]
123 |     parser: ParserType,
124 | 
125 |     /// Choose supplementary parsers. Format: "parsername:matchpattern".
126 |     /// matchpattern is a relative path regex.
127 |     /// Supports multiple.
128 |     #[clap(long, value_parser)]
129 |     parser_match: Vec<ParserTypeMatch>,
130 | 
131 |     /// Excluded relative path regex. Supports multiple.
132 |     #[clap(long, value_parser)]
133 |     exclude: Vec<ExpandedRegex>,
134 | 
135 |     /// Included relative path regex (even if excluded). Supports multiple.
136 |     #[clap(long, value_parser)]
137 |     include: Vec<ExpandedRegex>,
138 | 
139 |     /// Skip relative path regex if they exist. Supports multiple.
140 |     #[clap(long, value_parser)]
141 |     skip_if_exists: Vec<ExpandedRegex>,
142 | 
143 |     /// Relative path regex for those compare size only **after** HEAD (head_before_get on) or GET (head_before_get off)
144 |     #[clap(long, value_parser)]
145 |     compare_size_only: Vec<ExpandedRegex>,
146 | 
147 |     /// Allow mtime from parser if not available from HTTP headers.
148 |     #[clap(long, visible_alias = "allow-mtime-from-parser")]
149 |     trust_mtime_from_parser: bool,
150 | 
151 |     /// (Experimental) APT Packages file parser to find out missing packages.
152 |     #[clap(long)]
153 |     apt_packages: bool,
154 | 
155 |     /// (Experimental) YUM Packages file parser to find out missing packages.
156 |     #[clap(long)]
157 |     yum_packages: bool,
158 | 
159 |     /// Ignore 404 NOT FOUND as error when downloading files.
160 |     #[clap(long)]
161 |     ignore_nonexist: bool,
162 | 
163 |     /// Allow automatically choose fallback parser when ParseError occurred.
164 |     #[clap(long)]
165 |     auto_fallback: bool,
166 | 
167 |     /// Custom header for HTTP(S) requests in format "Headerkey: headervalue". Supports multiple.
168 |     #[clap(long, value_parser)]
169 |     header: Vec<Header>,
170 | 
171 |     /// The exclusion v2 mode. To keep compatibility, this is off by default.
172 |     #[clap(long)]
173 |     exclusion_v2: bool,
174 | }
175 | 
176 | impl SharedArgs for &SyncArgs {
177 |     fn user_agent(&self) -> &str {
178 |         &self.user_agent
179 |     }
180 | 
181 |     fn headers(&self) -> reqwest::header::HeaderMap {
182 |         headers_to_headermap(&self.header)
183 |     }
184 | 
185 |     fn use_v2_exclusion(&self) -> bool {
186 |         self.exclusion_v2
187 |     }
188 | 
189 |     fn exclude(&self) -> &[ExpandedRegex] {
190 |         &self.exclude
191 |     }
192 | 
193 |     fn include(&self) -> &[ExpandedRegex] {
194 |         &self.include
195 |     }
196 | }
197 | 
198 | #[derive(Parser, Debug)]
199 | pub struct ListArgs {
200 |     /// Customize tsumugu's user agent.
201 |     #[clap(long, default_value = "tsumugu")]
202 |     user_agent: String,
203 | 
204 |     /// The upstream URL.
205 |     #[clap(value_parser)]
206 |     upstream: Url,
207 | 
208 |     /// Choose a main parser.
209 |     #[clap(long, value_enum, default_value_t=ParserType::Nginx)]
210 |     parser: ParserType,
211 | 
212 |     /// Excluded relative path regex. Supports multiple.
213 |     #[clap(long, value_parser)]
214 |     exclude: Vec<ExpandedRegex>,
215 | 
216 |     /// Included relative path regex (even if excluded). Supports multiple.
217 |     #[clap(long, value_parser)]
218 |     include: Vec<ExpandedRegex>,
219 | 
220 |     /// The upstream base starting with "/".
221 |     #[clap(long, default_value = "/")]
222 |     upstream_base: String,
223 | 
224 |     /// Custom header for HTTP(S) requests in format "Headerkey: headervalue". Supports multiple.
225 |     #[clap(long, value_parser)]
226 |     header: Vec<Header>,
227 | 
228 |     /// The exclusion v2 mode. To keep compatibility, this is off by default.
229 |     #[clap(long)]
230 |     exclusion_v2: bool,
231 | }
232 | 
233 | impl SharedArgs for &ListArgs {
234 |     fn user_agent(&self) -> &str {
235 |         &self.user_agent
236 |     }
237 | 
238 |     fn headers(&self) -> reqwest::header::HeaderMap {
239 |         headers_to_headermap(&self.header)
240 |     }
241 | 
242 |     fn use_v2_exclusion(&self) -> bool {
243 |         self.exclusion_v2
244 |     }
245 | 
246 |     fn exclude(&self) -> &[ExpandedRegex] {
247 |         &self.exclude
248 |     }
249 | 
250 |     fn include(&self) -> &[ExpandedRegex] {
251 |         &self.include
252 |     }
253 | }
254 | 
255 | pub struct AsyncContext {
256 |     pub listing_client: reqwest::Client,
257 |     pub download_client: reqwest::Client,
258 |     pub runtime: tokio::runtime::Runtime,
259 | }
260 | 
261 | fn main() {
262 |     let enable_color = std::env::var("NO_COLOR").is_err();
263 |     let pb_manager = kyuri::Manager::new(std::time::Duration::from_secs(1));
264 |     pb_manager.set_ticker(true);
265 |     let pb_writer = pb_manager.create_writer();
266 |     tracing_subscriber::fmt()
267 |         .with_thread_ids(true)
268 |         .with_env_filter(
269 |             // https://github.com/tokio-rs/tracing/issues/735
270 |             EnvFilter::builder()
271 |                 .with_default_directive(LevelFilter::INFO.into())
272 |                 .from_env_lossy(),
273 |         )
274 |         .with_ansi(enable_color)
275 |         .with_writer(Mutex::new(pb_writer))
276 |         .init();
277 | 
278 |     // Print version info in debug mode
279 |     tracing::debug!("{}", build::CLAP_LONG_VERSION);
280 | 
281 |     let bind_address = match std::env::var("BIND_ADDRESS").ok() {
282 |         Some(s) => {
283 |             let s = s.trim();
284 |             if s.is_empty() {
285 |                 None
286 |             } else {
287 |                 Some(s.to_owned())
288 |             }
289 |         }
290 |         None => None,
291 |     };
292 | 
293 |     // terminate whole process when a thread panics
294 |     let orig_hook = std::panic::take_hook();
295 |     std::panic::set_hook(Box::new(move |panic_info| {
296 |         orig_hook(panic_info);
297 |         std::process::exit(3);
298 |     }));
299 | 
300 |     let args = Cli::parse();
301 |     match args.command {
302 |         Commands::Sync(args) => {
303 |             if !args.upstream.path().ends_with('/') {
304 |                 tracing::warn!("It's suggested to append backslash to upstream, though this also works in most cases (most web servers redirects this to URL with backslash at end).")
305 |             }
306 |             cli::sync(&args, bind_address, pb_manager);
307 |         }
308 |         Commands::List(args) => {
309 |             // extra arg check
310 |             if !args.upstream.path().ends_with('/') {
311 |                 panic!("upstream should end with /");
312 |             }
313 |             if !args.upstream_base.starts_with('/') {
314 |                 panic!("upstream_base does not start with /")
315 |             }
316 |             cli::list(&args, bind_address);
317 |         }
318 |     };
319 | }
320 | 


--------------------------------------------------------------------------------
/src/parser/apache_f2.rs:
--------------------------------------------------------------------------------
  1 | // https://httpd.apache.org/docs/2.4/mod/mod_autoindex.html
  2 | // > F=2 formats the listing as an HTMLTable FancyIndexed list
  3 | 
  4 | use crate::{
  5 |     listing::{FileSize, FileType, ListItem},
  6 |     utils::get,
  7 | };
  8 | 
  9 | use super::*;
 10 | use anyhow::{anyhow, Result};
 11 | use chrono::NaiveDateTime;
 12 | use scraper::{Html, Selector};
 13 | use tracing::debug;
 14 | 
 15 | #[derive(Debug, Clone, Default)]
 16 | pub struct ApacheF2ListingParser;
 17 | 
 18 | impl Parser for ApacheF2ListingParser {
 19 |     fn name(&self) -> &'static str {
 20 |         "Apache-f2 format"
 21 |     }
 22 | 
 23 |     fn get_list(
 24 |         &self,
 25 |         async_context: &AsyncContext,
 26 |         url: &url::Url,
 27 |     ) -> Result<ListResult, ParserError> {
 28 |         let resp = get(
 29 |             &async_context.runtime,
 30 |             &async_context.listing_client,
 31 |             url.clone(),
 32 |         )?;
 33 |         let url = resp.url().clone();
 34 |         let body = get_text(&async_context.runtime, resp)?;
 35 |         assert_if_url_has_no_trailing_slash(&url);
 36 |         let document = Html::parse_document(&body);
 37 |         // find the indexlist which contains file index
 38 |         let selector = Selector::parse("table").unwrap();
 39 |         let mut selector_iter = document.select(&selector);
 40 |         let indexlist;
 41 |         loop {
 42 |             let t = selector_iter
 43 |                 .next()
 44 |                 .ok_or(anyhow!("No more <table> matched"))?;
 45 |             let t_html = t.html().to_lowercase();
 46 |             if t_html.contains("name")
 47 |                 && t_html.contains("last modified")
 48 |                 && t_html.contains("size")
 49 |             {
 50 |                 indexlist = t;
 51 |                 break;
 52 |             }
 53 |         }
 54 |         // find all <tr> inside -- there might have titlebar or <hr>, filter them later
 55 |         let selector = Selector::parse("tr").unwrap();
 56 |         let mut items = Vec::new();
 57 | 
 58 |         let mut lastmod_before_size = true;
 59 |         for element in indexlist.select(&selector) {
 60 |             // skip divider
 61 |             let hr_selector = Selector::parse("hr").unwrap();
 62 |             if element.select(&hr_selector).next().is_some() {
 63 |                 continue;
 64 |             }
 65 |             // skip table title
 66 |             let a_selector = Selector::parse("a").unwrap();
 67 |             let hrefs: Vec<&str> = element
 68 |                 .select(&a_selector)
 69 |                 .map(|a| a.value().attr("href").unwrap_or("?"))
 70 |                 .collect();
 71 |             // Empty or all query string hrefs
 72 |             if hrefs.iter().all(|h| h.starts_with('?')) {
 73 |                 let lastmod_pos = element.inner_html().to_lowercase().find("last modified");
 74 |                 let size_pos = element.inner_html().to_lowercase().find("size");
 75 |                 if let (Some(lastmod_pos), Some(size_pos)) = (lastmod_pos, size_pos) {
 76 |                     lastmod_before_size = lastmod_pos < size_pos;
 77 |                 }
 78 |                 continue;
 79 |             }
 80 | 
 81 |             let td_selector = Selector::parse("td").unwrap();
 82 |             let mut td_iterator = element.select(&td_selector);
 83 |             // skip icon (first col)
 84 |             td_iterator.next();
 85 |             let td = td_iterator
 86 |                 .next()
 87 |                 .ok_or(anyhow!("no more td after first iterate"))?;
 88 |             let a = td.select(&a_selector).next().unwrap();
 89 |             let displayed_filename = a.inner_html();
 90 |             if displayed_filename == "Parent Directory" || displayed_filename == ".." {
 91 |                 continue;
 92 |             }
 93 | 
 94 |             let href = a.value().attr("href").unwrap();
 95 |             let name = get_real_name_from_href(href);
 96 |             let href = url.join(href)?;
 97 |             let type_ = if href.as_str().ends_with('/') {
 98 |                 FileType::Directory
 99 |             } else {
100 |                 FileType::File
101 |             };
102 |             let col2 = td_iterator
103 |                 .next()
104 |                 .ok_or(anyhow!("no more td after second iterate"))?
105 |                 .inner_html();
106 |             let col2 = col2.trim();
107 |             let col3 = td_iterator
108 |                 .next()
109 |                 .ok_or(anyhow!("no more td after third iterate"))?
110 |                 .inner_html();
111 |             let col3 = col3.trim();
112 | 
113 |             let (lastmod, size) = if lastmod_before_size {
114 |                 (col2, col3)
115 |             } else {
116 |                 (col3, col2)
117 |             };
118 | 
119 |             // debug!("{} {} {} {}", href, name, lastmod, size);
120 | 
121 |             let date = if lastmod.is_empty() && type_ == FileType::Directory {
122 |                 // if it's a directory, it's okay to have empty lastmod
123 |                 NaiveDateTime::default()
124 |             } else {
125 |                 debug!("lastmod: {}", lastmod);
126 |                 let (date_fmt, _) = guess_date_fmt(lastmod);
127 |                 NaiveDateTime::parse_from_str(lastmod, &date_fmt)?
128 |             };
129 | 
130 |             items.push(ListItem::new(
131 |                 href,
132 |                 name.to_string(),
133 |                 type_,
134 |                 {
135 |                     if size == "-" || size.is_empty() {
136 |                         None
137 |                     } else {
138 |                         let (n_size, unit) = FileSize::get_humanized(size);
139 |                         Some(FileSize::HumanizedBinary(n_size, unit))
140 |                     }
141 |                 },
142 |                 date,
143 |                 None,
144 |             ))
145 |         }
146 | 
147 |         Ok(ListResult::List(items))
148 |     }
149 | }
150 | 
151 | #[cfg(test)]
152 | mod tests {
153 |     use crate::listing::SizeUnit;
154 | 
155 |     use super::*;
156 |     use crate::parser::tests::*;
157 | 
158 |     #[test]
159 |     fn test_winehq_root() {
160 |         let context = init_async_context();
161 |         let items = ApacheF2ListingParser
162 |             .get_list(
163 |                 &context,
164 |                 &url::Url::parse("http://localhost:1921/wine-builds").unwrap(),
165 |             )
166 |             .unwrap();
167 |         match items {
168 |             ListResult::List(items) => {
169 |                 assert_eq!(items.len(), 8);
170 |                 assert_eq!(items[0].name, "android");
171 |                 assert_eq!(items[0].type_, FileType::Directory);
172 |                 assert_eq!(items[0].size, None);
173 |                 assert_eq!(
174 |                     items[0].mtime,
175 |                     NaiveDateTime::parse_from_str("2022-01-18 15:14", "%Y-%m-%d %H:%M").unwrap()
176 |                 );
177 |                 assert_eq!(items[6].name, "Release.key");
178 |                 assert_eq!(items[6].type_, FileType::File);
179 |                 assert_eq!(
180 |                     items[6].size,
181 |                     Some(FileSize::HumanizedBinary(3.0, SizeUnit::K))
182 |                 );
183 |                 assert_eq!(
184 |                     items[6].mtime,
185 |                     NaiveDateTime::parse_from_str("2017-03-28 14:54", "%Y-%m-%d %H:%M").unwrap()
186 |                 );
187 |             }
188 |             _ => unreachable!(),
189 |         }
190 |     }
191 | 
192 |     #[test]
193 |     fn test_raspberrypi_root() {
194 |         let context = init_async_context();
195 |         let items = ApacheF2ListingParser
196 |             .get_list(
197 |                 &context,
198 |                 &url::Url::parse("http://localhost:1921/raspberrypi/").unwrap(),
199 |             )
200 |             .unwrap();
201 |         match items {
202 |             ListResult::List(items) => {
203 |                 assert_eq!(items.len(), 61);
204 |                 assert_eq!(items[0].name, "AstroPi");
205 |                 assert_eq!(items[0].type_, FileType::Directory);
206 |                 assert_eq!(items[0].size, None);
207 |                 assert_eq!(
208 |                     items[0].mtime,
209 |                     NaiveDateTime::parse_from_str("2017-09-04 15:41", "%Y-%m-%d %H:%M").unwrap()
210 |                 );
211 |                 assert_eq!(items[6].name, "Raspberry_Pi_Education_Manual.pdf");
212 |                 assert_eq!(items[6].type_, FileType::File);
213 |                 assert_eq!(
214 |                     items[6].size,
215 |                     Some(FileSize::HumanizedBinary(2.8, SizeUnit::M))
216 |                 );
217 |                 assert_eq!(
218 |                     items[6].mtime,
219 |                     NaiveDateTime::parse_from_str("2013-09-16 13:51", "%Y-%m-%d %H:%M").unwrap()
220 |                 );
221 |             }
222 |             _ => unreachable!(),
223 |         }
224 |     }
225 | 
226 |     #[test]
227 |     fn test_mozilla_root() {
228 |         let context = init_async_context();
229 |         let items = ApacheF2ListingParser
230 |             .get_list(
231 |                 &context,
232 |                 &url::Url::parse("http://localhost:1921/mozilla/").unwrap(),
233 |             )
234 |             .unwrap();
235 |         match items {
236 |             ListResult::List(items) => {
237 |                 assert_eq!(items.len(), 46);
238 |                 assert_eq!(items[0].name, "OJI");
239 |                 assert_eq!(items[0].type_, FileType::Directory);
240 |                 assert_eq!(items[0].size, None);
241 |                 assert_eq!(
242 |                     items[0].mtime,
243 |                     NaiveDateTime::parse_from_str("1970-01-01 00:00", "%Y-%m-%d %H:%M").unwrap()
244 |                 );
245 |             }
246 |             _ => unreachable!(),
247 |         }
248 |     }
249 | 
250 |     #[test]
251 |     fn test_mozilla_oji() {
252 |         let context = init_async_context();
253 |         let items = ApacheF2ListingParser
254 |             .get_list(
255 |                 &context,
256 |                 &url::Url::parse("http://localhost:1921/mozilla/OJI/").unwrap(),
257 |             )
258 |             .unwrap();
259 |         match items {
260 |             ListResult::List(items) => {
261 |                 assert_eq!(items.len(), 2);
262 |                 assert_eq!(items[0].name, "MRJPlugin");
263 |                 assert_eq!(items[0].type_, FileType::Directory);
264 |                 assert_eq!(items[0].size, None);
265 |                 assert_eq!(
266 |                     items[0].mtime,
267 |                     NaiveDateTime::parse_from_str("1970-01-01 00:00", "%Y-%m-%d %H:%M").unwrap()
268 |                 );
269 |                 assert_eq!(items[1].name, "MRJPlugin.sit.hqx");
270 |                 assert_eq!(items[1].type_, FileType::File);
271 |                 assert_eq!(
272 |                     items[1].size,
273 |                     Some(FileSize::HumanizedBinary(234.0, SizeUnit::K))
274 |                 );
275 |                 assert_eq!(
276 |                     items[1].mtime,
277 |                     NaiveDateTime::parse_from_str("2023-02-13 04:21", "%Y-%m-%d %H:%M").unwrap()
278 |                 );
279 |             }
280 |             _ => unreachable!(),
281 |         }
282 |     }
283 | 
284 |     #[test]
285 |     fn test_grml() {
286 |         let context = init_async_context();
287 |         let items = ApacheF2ListingParser
288 |             .get_list(
289 |                 &context,
290 |                 &url::Url::parse("http://localhost:1921/grml/").unwrap(),
291 |             )
292 |             .unwrap();
293 |         match items {
294 |             ListResult::List(items) => {
295 |                 assert_eq!(items.len(), 10);
296 |                 // Test "+"
297 |                 assert_eq!(items[3].name, "memtest86+");
298 |             }
299 |             _ => unreachable!(),
300 |         }
301 |     }
302 | }
303 | 


--------------------------------------------------------------------------------
/src/parser/caddy.rs:
--------------------------------------------------------------------------------
  1 | /// A parser for default caddy file_server format
  2 | use crate::{
  3 |     listing::{FileSize, FileType, ListItem},
  4 |     utils::get,
  5 | };
  6 | 
  7 | use super::*;
  8 | use anyhow::Result;
  9 | use chrono::NaiveDateTime;
 10 | use scraper::{Html, Selector};
 11 | 
 12 | #[derive(Debug, Clone, Default)]
 13 | pub struct CaddyListingParser;
 14 | 
 15 | impl Parser for CaddyListingParser {
 16 |     fn name(&self) -> &'static str {
 17 |         "Caddy"
 18 |     }
 19 | 
 20 |     fn get_list(
 21 |         &self,
 22 |         async_context: &AsyncContext,
 23 |         url: &url::Url,
 24 |     ) -> Result<ListResult, ParserError> {
 25 |         let resp = get(
 26 |             &async_context.runtime,
 27 |             &async_context.listing_client,
 28 |             url.clone(),
 29 |         )?;
 30 |         let url = resp.url().clone();
 31 |         let body = get_text(&async_context.runtime, resp)?;
 32 |         assert_if_url_has_no_trailing_slash(&url);
 33 |         let document = Html::parse_document(&body);
 34 |         let selector = Selector::parse("tr.file").unwrap();
 35 |         let mut items = Vec::new();
 36 |         for element in document.select(&selector) {
 37 |             // name and herf
 38 |             let selector = Selector::parse("td a").unwrap();
 39 |             let a = element.select(&selector).next().unwrap();
 40 |             let href = a.value().attr("href").unwrap();
 41 |             // Caddy file_server will append "./" to href
 42 |             let name = get_real_name_from_href(href)
 43 |                 .trim_start_matches("./")
 44 |                 .to_string();
 45 |             let href = url.join(href)?;
 46 |             let type_ = if href.as_str().ends_with('/') {
 47 |                 FileType::Directory
 48 |             } else {
 49 |                 FileType::File
 50 |             };
 51 |             // size
 52 |             let selector = Selector::parse("td.size div.sizebar div.sizebar-text").unwrap();
 53 |             let size = match element.select(&selector).next() {
 54 |                 Some(s) => {
 55 |                     let size_text = s.inner_html();
 56 |                     // ↱&nbsp; would be added by caddy when it's a symlink
 57 |                     // https://github.com/caddyserver/caddy/commit/9338741ca79a74247ced86bc26e4994138470852
 58 |                     let size_text = size_text.trim().trim_start_matches("↱&nbsp;");
 59 |                     let (n_size, unit) = FileSize::get_humanized(size_text);
 60 |                     Some(FileSize::HumanizedBinary(n_size, unit))
 61 |                 }
 62 |                 None => None,
 63 |             };
 64 |             // date
 65 |             let selector = Selector::parse("td.timestamp time").unwrap();
 66 |             let mtime = element
 67 |                 .select(&selector)
 68 |                 .next()
 69 |                 .unwrap()
 70 |                 .value()
 71 |                 .attr("datetime")
 72 |                 .unwrap()
 73 |                 .trim();
 74 |             // Store UTC time
 75 |             let date = NaiveDateTime::parse_from_str(mtime, "%Y-%m-%dT%H:%M:%S%Z")?;
 76 | 
 77 |             items.push(ListItem::new(href, name, type_, size, date, None))
 78 |         }
 79 | 
 80 |         Ok(ListResult::List(items))
 81 |     }
 82 | }
 83 | 
 84 | #[cfg(test)]
 85 | mod tests {
 86 |     use crate::listing::SizeUnit;
 87 | 
 88 |     use super::*;
 89 |     use crate::parser::tests::*;
 90 | 
 91 |     #[test]
 92 |     fn test_sdumirror_ubuntu() {
 93 |         let context = init_async_context();
 94 |         let items = CaddyListingParser
 95 |             .get_list(
 96 |                 &context,
 97 |                 &url::Url::parse("http://localhost:1921/sdumirror-ubuntu").unwrap(),
 98 |             )
 99 |             .unwrap();
100 |         match items {
101 |             ListResult::List(items) => {
102 |                 assert_eq!(items.len(), 7);
103 |                 assert_eq!(items[0].name, ".trace");
104 |                 assert_eq!(items[0].type_, FileType::Directory);
105 |                 assert_eq!(items[0].size, None);
106 |                 assert_eq!(
107 |                     items[0].mtime,
108 |                     NaiveDateTime::parse_from_str("2023-07-10T13:07:52Z", "%Y-%m-%dT%H:%M:%S%Z")
109 |                         .unwrap()
110 |                 );
111 |                 assert_eq!(items[5].name, "ubuntu");
112 |                 assert_eq!(items[5].type_, FileType::Directory);
113 |                 assert_eq!(items[5].size, None);
114 |                 assert_eq!(
115 |                     items[5].mtime,
116 |                     NaiveDateTime::parse_from_str("2010-11-24T11:01:53Z", "%Y-%m-%dT%H:%M:%S%Z")
117 |                         .unwrap()
118 |                 );
119 |                 assert_eq!(items[6].name, "ls-lR.gz");
120 |                 assert_eq!(items[6].type_, FileType::File);
121 |                 assert_eq!(
122 |                     items[6].size,
123 |                     Some(FileSize::HumanizedBinary(26.0, SizeUnit::M))
124 |                 );
125 |                 assert_eq!(
126 |                     items[6].mtime,
127 |                     NaiveDateTime::parse_from_str("2024-03-10T04:45:24Z", "%Y-%m-%dT%H:%M:%S%Z")
128 |                         .unwrap()
129 |                 );
130 |             }
131 |             _ => unreachable!(),
132 |         }
133 |     }
134 | 
135 |     #[test]
136 |     fn test_caddy_symlink() {
137 |         let context = init_async_context();
138 |         let items = CaddyListingParser
139 |             .get_list(
140 |                 &context,
141 |                 &url::Url::parse("http://localhost:1921/caddy-symlink").unwrap(),
142 |             )
143 |             .unwrap();
144 |         match items {
145 |             ListResult::List(items) => {
146 |                 assert_eq!(items.len(), 3);
147 |                 assert_eq!(items[0].name, "aoi.png");
148 |                 assert_eq!(items[0].type_, FileType::File);
149 |                 assert_eq!(
150 |                     items[0].size,
151 |                     Some(FileSize::HumanizedBinary(32.0, SizeUnit::K))
152 |                 );
153 |                 assert_eq!(
154 |                     items[0].mtime,
155 |                     NaiveDateTime::parse_from_str("2022-11-19T19:15:45Z", "%Y-%m-%dT%H:%M:%S%Z")
156 |                         .unwrap()
157 |                 );
158 |                 assert_eq!(items[1].name, "index.html.bak");
159 |                 assert_eq!(items[1].type_, FileType::File);
160 |                 assert_eq!(
161 |                     items[1].size,
162 |                     Some(FileSize::HumanizedBinary(143.0, SizeUnit::B))
163 |                 );
164 |                 assert_eq!(
165 |                     items[1].mtime,
166 |                     NaiveDateTime::parse_from_str("2022-11-19T19:14:38Z", "%Y-%m-%dT%H:%M:%S%Z")
167 |                         .unwrap()
168 |                 );
169 |                 assert_eq!(items[2].name, "symlink");
170 |                 assert_eq!(items[2].type_, FileType::File);
171 |                 assert_eq!(
172 |                     items[2].size,
173 |                     Some(FileSize::HumanizedBinary(143.0, SizeUnit::B))
174 |                 );
175 |                 assert_eq!(
176 |                     items[2].mtime,
177 |                     NaiveDateTime::parse_from_str("2025-02-27T10:45:49Z", "%Y-%m-%dT%H:%M:%S%Z")
178 |                         .unwrap()
179 |                 );
180 |             }
181 |             _ => unreachable!(),
182 |         }
183 |     }
184 | }
185 | 


--------------------------------------------------------------------------------
/src/parser/denoflare_r2.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     listing::{FileSize, FileType, ListItem},
  3 |     parser::{assert_if_url_has_no_trailing_slash, get_real_name_from_href},
  4 |     utils::{get, get_text},
  5 |     AsyncContext,
  6 | };
  7 | 
  8 | use super::{ListResult, Parser, ParserError};
  9 | use anyhow::Result;
 10 | use chrono::{FixedOffset, NaiveDateTime};
 11 | use scraper::CaseSensitivity::*;
 12 | use scraper::{Html, Selector};
 13 | use tracing::info;
 14 | 
 15 | #[derive(Debug, Clone, Default)]
 16 | pub struct DenoFlareR2ListingParser;
 17 | 
 18 | // Ref: https://github.com/skymethod/denoflare/blob/2e89fb33972a924dd9c5078bb2b2834a1f619081/examples/r2-public-read-worker/listing.ts
 19 | 
 20 | impl Parser for DenoFlareR2ListingParser {
 21 |     fn name(&self) -> &'static str {
 22 |         "DenoFlare R2 Public Read Worker example"
 23 |     }
 24 | 
 25 |     fn get_list(
 26 |         &self,
 27 |         async_context: &AsyncContext,
 28 |         url: &url::Url,
 29 |     ) -> Result<ListResult, ParserError> {
 30 |         let mut documents = vec![];
 31 |         assert_if_url_has_no_trailing_slash(url);
 32 |         let mut inner_url = url.clone();
 33 |         loop {
 34 |             info!("(in paging loop) Fetching: {}", inner_url);
 35 |             let resp = get(
 36 |                 &async_context.runtime,
 37 |                 &async_context.listing_client,
 38 |                 inner_url.clone(),
 39 |             )?;
 40 |             let body = get_text(&async_context.runtime, resp)?;
 41 |             let document = Html::parse_document(&body);
 42 |             documents.push((inner_url.clone(), document.clone()));
 43 | 
 44 |             // Check if last element of #contents is next ➜
 45 |             let selector = Selector::parse("div#contents").unwrap();
 46 |             let contents = document
 47 |                 .select(&selector)
 48 |                 .next()
 49 |                 .expect("<div id=\"contents\"> not found");
 50 |             // <div class="full"><a href="...">next ➜</a></div>
 51 |             let last_child = contents
 52 |                 .child_elements()
 53 |                 .last()
 54 |                 .expect("Expected last child");
 55 |             // <a href="...">next ➜</a>
 56 |             let last_child = match last_child.last_child() {
 57 |                 Some(child) => child,
 58 |                 None => break,
 59 |             };
 60 |             // next ➜
 61 |             let textnode = match last_child.first_child() {
 62 |                 Some(child) => child,
 63 |                 _ => break,
 64 |             };
 65 |             if textnode.value().as_text().map_or("", |t| t) != "next ➜" {
 66 |                 break;
 67 |             }
 68 |             let href = last_child
 69 |                 .value()
 70 |                 .as_element()
 71 |                 .unwrap()
 72 |                 .attr("href")
 73 |                 .unwrap();
 74 |             inner_url = url.join(href)?;
 75 |         }
 76 |         let mut items = Vec::new();
 77 |         for (url, document) in documents {
 78 |             let selector = Selector::parse("div#contents").unwrap();
 79 |             let contents = document
 80 |                 .select(&selector)
 81 |                 .next()
 82 |                 .expect("<div id=\"contents\"> not found");
 83 | 
 84 |             enum State {
 85 |                 Start,
 86 |                 Dirs,
 87 |                 Files,
 88 |             }
 89 |             let mut state = State::Start;
 90 | 
 91 |             let mut iter = contents.child_elements().peekable();
 92 |             while let Some(child) = iter.next() {
 93 |                 match state {
 94 |                     State::Start => {
 95 |                         if child.value().name() == "div"
 96 |                             && child.value().has_class("full", CaseSensitive)
 97 |                             && child.text().next().unwrap_or_default() == "\u{a0}"
 98 |                         // &nbsp;
 99 |                         {
100 |                             // peek
101 |                             let next_elem = iter.peek().expect("Expected next element");
102 |                             let class_is_full = next_elem.value().has_class("full", CaseSensitive);
103 |                             if class_is_full {
104 |                                 state = State::Dirs;
105 |                             } else {
106 |                                 state = State::Files;
107 |                             }
108 |                         }
109 |                     }
110 |                     State::Dirs => {
111 |                         if child.value().name() == "div" {
112 |                             assert!(
113 |                                 child.value().has_class("full", CaseSensitive),
114 |                                 "Expected class=\"full\" as end of dirs"
115 |                             );
116 |                             assert!(
117 |                                 child.text().next().unwrap_or_default() == "\u{a0}",
118 |                                 "Expected &nbsp; as end of dirs"
119 |                             );
120 |                             state = State::Files;
121 |                             continue;
122 |                         }
123 |                         assert!(child.value().name() == "a", "Expected <a> in dirs");
124 |                         let href = child.value().attr("href").expect("href not found");
125 |                         let name = get_real_name_from_href(href);
126 |                         let href = url.join(href)?;
127 |                         items.push(ListItem::new(
128 |                             href,
129 |                             name,
130 |                             FileType::Directory,
131 |                             None,
132 |                             NaiveDateTime::UNIX_EPOCH,
133 |                             None,
134 |                         ));
135 |                     }
136 |                     State::Files => {
137 |                         if child.value().name() == "div" {
138 |                             assert!(
139 |                                 child.value().has_class("full", CaseSensitive),
140 |                                 "Expected class=\"full\" as end of files, if paging required."
141 |                             );
142 |                             break;
143 |                         }
144 |                         assert!(child.value().name() == "a", "Expected <a> in files");
145 |                         let href = child.value().attr("href").expect("href not found");
146 |                         if href.ends_with('/') {
147 |                             for _ in 0..3 {
148 |                                 iter.next();
149 |                             }
150 |                             continue;
151 |                         }
152 |                         let name = get_real_name_from_href(href);
153 |                         let child = iter.next().expect("Expected next child");
154 |                         let size = child
155 |                             .text()
156 |                             .next()
157 |                             .expect("Expected size text")
158 |                             .replace(',', ""); // bytes
159 |                         let size = size.parse::<u64>().expect("Expected size to be u64");
160 |                         iter.next(); // skip estimated size
161 |                         let mtime = iter
162 |                             .next()
163 |                             .expect("Expected mtime")
164 |                             .text()
165 |                             .next()
166 |                             .expect("Expected mtime text");
167 |                         let mtime = NaiveDateTime::parse_from_str(mtime, "%Y-%m-%dT%H:%M:%S.%3fZ")
168 |                             .expect("Expected mtime to be NaiveDateTime");
169 |                         let href = url.join(href)?;
170 |                         items.push(ListItem::new(
171 |                             href,
172 |                             name,
173 |                             FileType::File,
174 |                             Some(FileSize::Precise(size)),
175 |                             mtime,
176 |                             FixedOffset::east_opt(0),
177 |                         ));
178 |                     }
179 |                 }
180 |             }
181 |         }
182 | 
183 |         Ok(ListResult::List(items))
184 |     }
185 | }
186 | 
187 | #[cfg(test)]
188 | mod tests {
189 |     use url::Url;
190 | 
191 |     use crate::parser::tests::*;
192 | 
193 |     use super::*;
194 | 
195 |     #[test]
196 |     fn test_clickhouse() {
197 |         let context = init_async_context();
198 |         let items = DenoFlareR2ListingParser
199 |             .get_list(
200 |                 &context,
201 |                 &Url::parse("http://localhost:1921/clickhouse/").unwrap(),
202 |             )
203 |             .unwrap();
204 |         match items {
205 |             ListResult::List(items) => {
206 |                 assert_eq!(items.len(), 4);
207 |                 assert_eq!(items[0].name, "deb");
208 |                 assert_eq!(items[0].type_, FileType::Directory);
209 |                 assert_eq!(items[1].name, "rpm");
210 |                 assert_eq!(items[1].type_, FileType::Directory);
211 |                 assert_eq!(items[2].name, "tgz");
212 |                 assert_eq!(items[2].type_, FileType::Directory);
213 |                 assert_eq!(items[3].name, "CLICKHOUSE-KEY.GPG");
214 |                 assert_eq!(items[3].type_, FileType::File);
215 |                 assert_eq!(items[3].size, Some(FileSize::Precise(3133)));
216 |                 assert_eq!(
217 |                     items[3].mtime,
218 |                     NaiveDateTime::parse_from_str(
219 |                         "2022-09-23 13:53:51.925",
220 |                         "%Y-%m-%d %H:%M:%S.%3f"
221 |                     )
222 |                     .unwrap()
223 |                 );
224 |                 assert_eq!(items[3].timezone, FixedOffset::east_opt(0));
225 |             }
226 |             _ => unreachable!(),
227 |         }
228 |     }
229 | 
230 |     #[test]
231 |     fn test_clickhouse_fileonly() {
232 |         let context = init_async_context();
233 |         let items = DenoFlareR2ListingParser
234 |             .get_list(
235 |                 &context,
236 |                 &Url::parse("http://localhost:1921/clickhouse/clickhouse-client/").unwrap(),
237 |             )
238 |             .unwrap();
239 |         match items {
240 |             ListResult::List(items) => {
241 |                 assert_eq!(items.len(), 61);
242 |                 assert_eq!(items[0].name, "clickhouse-client_22.3.10.22_amd64.deb");
243 |                 assert_eq!(items[0].type_, FileType::File);
244 |             }
245 |             _ => unreachable!(),
246 |         }
247 |     }
248 | 
249 |     #[test]
250 |     fn test_clickhouse_multipage() {
251 |         let context = init_async_context();
252 |         let items = DenoFlareR2ListingParser
253 |             .get_list(
254 |                 &context,
255 |                 &Url::parse("http://localhost:1921/clickhouse/stable/").unwrap(),
256 |             )
257 |             .unwrap();
258 |         match items {
259 |             ListResult::List(items) => {
260 |                 assert_eq!(items.len(), 4);
261 |                 assert_eq!(items[0].name, "clickhouse-client-21.1.9.41.tgz.sha512");
262 |                 assert_eq!(items[3].name, "clickhouse-client-23.7.3.14-arm64.tgz");
263 |             }
264 |             _ => unreachable!(),
265 |         }
266 |     }
267 | }
268 | 


--------------------------------------------------------------------------------
/src/parser/directory_lister.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     listing::{FileSize, FileType, ListItem},
  3 |     utils::get,
  4 | };
  5 | 
  6 | use super::*;
  7 | use anyhow::Result;
  8 | use chrono::NaiveDateTime;
  9 | use scraper::{Html, Selector};
 10 | 
 11 | #[derive(Debug, Clone, Default)]
 12 | pub struct DirectoryListerListingParser;
 13 | 
 14 | impl Parser for DirectoryListerListingParser {
 15 |     fn name(&self) -> &'static str {
 16 |         "Directory Lister"
 17 |     }
 18 | 
 19 |     fn get_path(&self, url: &Url) -> PathBuf {
 20 |         // Extract things after ?dir
 21 |         let dir = url
 22 |             .query_pairs()
 23 |             .find(|(key, _value)| key == "dir")
 24 |             .map(|(_key, value)| value.to_string());
 25 |         let mut dir = match dir {
 26 |             Some(d) => d,
 27 |             None => return PathBuf::from(url.path()),
 28 |         };
 29 |         if !dir.starts_with('/') {
 30 |             dir.insert(0, '/');
 31 |         }
 32 |         if !dir.ends_with('/') {
 33 |             dir.push('/');
 34 |         }
 35 |         PathBuf::from(dir)
 36 |     }
 37 | 
 38 |     fn get_list(
 39 |         &self,
 40 |         async_context: &AsyncContext,
 41 |         url: &url::Url,
 42 |     ) -> Result<ListResult, ParserError> {
 43 |         let resp = get(
 44 |             &async_context.runtime,
 45 |             &async_context.listing_client,
 46 |             url.clone(),
 47 |         )?;
 48 |         let url = resp.url().clone();
 49 |         let body = get_text(&async_context.runtime, resp)?;
 50 |         assert_if_url_has_no_trailing_slash(&url);
 51 |         let document = Html::parse_document(&body);
 52 |         // https://github.com/DirectoryLister/DirectoryLister/blob/0283f14aa1fbd97796f753e8d6105c752546050f/app/views/components/file.twig
 53 | 
 54 |         // find <ul> which contains file index
 55 |         let selector = Selector::parse("ul").unwrap();
 56 |         let indexlist = document.select(&selector).next().unwrap();
 57 |         // find second <li>
 58 |         let selector = Selector::parse("li").unwrap();
 59 |         let indexlist = indexlist.select(&selector).nth(1).unwrap();
 60 |         let selector = Selector::parse("a").unwrap();
 61 |         let mut items = Vec::new();
 62 |         for element in indexlist.select(&selector) {
 63 |             let href = element.value().attr("href").unwrap();
 64 |             let href = url.join(href)?;
 65 |             // displayed file name, class = "flex-1 truncate"
 66 |             let selector = Selector::parse("div.flex-1.truncate").unwrap();
 67 |             let displayed_filename = element.select(&selector).next().unwrap().inner_html();
 68 |             let displayed_filename = displayed_filename.trim();
 69 |             // size, class = "hidden whitespace-nowrap text-right mx-2 w-1/6 sm:block"
 70 |             let selector = Selector::parse("div.hidden.whitespace-nowrap.text-right.mx-2").unwrap();
 71 |             let size = element.select(&selector).next().unwrap().inner_html();
 72 |             let size = size.trim();
 73 |             // mtime, class = "hidden whitespace-nowrap text-right truncate ml-2 w-1/4 sm:block"
 74 |             let selector =
 75 |                 Selector::parse("div.hidden.whitespace-nowrap.text-right.truncate.ml-2").unwrap();
 76 |             let mtime = element.select(&selector).next().unwrap().inner_html();
 77 |             let mtime = mtime.trim();
 78 | 
 79 |             if displayed_filename == ".." {
 80 |                 continue;
 81 |             }
 82 |             let type_ = if size == "—" {
 83 |                 FileType::Directory
 84 |             } else {
 85 |                 FileType::File
 86 |             };
 87 |             let date = NaiveDateTime::parse_from_str(mtime, "%Y-%m-%d %H:%M:%S")?;
 88 |             items.push(ListItem::new(
 89 |                 href,
 90 |                 displayed_filename.to_string(),
 91 |                 type_,
 92 |                 {
 93 |                     if size == "—" {
 94 |                         None
 95 |                     } else {
 96 |                         let (n_size, unit) = FileSize::get_humanized(size);
 97 |                         Some(FileSize::HumanizedBinary(n_size, unit))
 98 |                     }
 99 |                 },
100 |                 date,
101 |                 None,
102 |             ))
103 |         }
104 | 
105 |         Ok(ListResult::List(items))
106 |     }
107 | }
108 | 
109 | #[cfg(test)]
110 | mod tests {
111 |     use url::Url;
112 | 
113 |     use crate::listing::SizeUnit;
114 | 
115 |     use super::*;
116 |     use crate::parser::tests::*;
117 | 
118 |     #[test]
119 |     fn test_vyos() {
120 |         let context = init_async_context();
121 |         let items = DirectoryListerListingParser
122 |             .get_list(
123 |                 &context,
124 |                 &url::Url::parse("http://localhost:1921/vyos/").unwrap(),
125 |             )
126 |             .unwrap();
127 |         match items {
128 |             ListResult::List(items) => {
129 |                 assert_eq!(items.len(), 7);
130 |                 assert_eq!(items[0].name, "main");
131 |                 assert_eq!(items[0].type_, FileType::Directory);
132 |                 assert_eq!(items[0].size, None);
133 |                 assert_eq!(
134 |                     items[0].mtime,
135 |                     NaiveDateTime::parse_from_str("2023-08-07 21:11:02", "%Y-%m-%d %H:%M:%S")
136 |                         .unwrap()
137 |                 );
138 |                 assert_eq!(
139 |                     items[0].url,
140 |                     Url::parse(
141 |                         "http://localhost:1921/vyos/?dir=repositories/current/dists/current/main"
142 |                     )
143 |                     .unwrap()
144 |                 );
145 |                 assert_eq!(items[4].name, "Contents-amd64.gz");
146 |                 assert_eq!(items[4].type_, FileType::File);
147 |                 assert_eq!(
148 |                     items[4].size,
149 |                     Some(FileSize::HumanizedBinary(1.80, SizeUnit::M))
150 |                 );
151 |                 assert_eq!(
152 |                     items[4].mtime,
153 |                     NaiveDateTime::parse_from_str("2023-08-07 21:10:57", "%Y-%m-%d %H:%M:%S")
154 |                         .unwrap()
155 |                 );
156 |                 assert_eq!(items[4].url, Url::parse("http://localhost:1921/vyos/repositories/current/dists/current/Contents-amd64.gz").unwrap());
157 |             }
158 |             _ => unreachable!(),
159 |         }
160 |     }
161 | 
162 |     #[test]
163 |     fn test_vyos_2() {
164 |         let context = init_async_context();
165 |         let items = DirectoryListerListingParser
166 |             .get_list(
167 |                 &context,
168 |                 &url::Url::parse("http://localhost:1921/vyos/vyos-accel-ppp/").unwrap(),
169 |             )
170 |             .unwrap();
171 |         match items {
172 |             ListResult::List(items) => {
173 |                 assert_eq!(items.len(), 3);
174 |             }
175 |             _ => unreachable!(),
176 |         }
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/parser/docker.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     listing::{FileSize, FileType, ListItem},
  3 |     utils::get,
  4 | };
  5 | use chrono::NaiveDateTime;
  6 | use scraper::{Html, Selector};
  7 | // use tracing::debug;
  8 | 
  9 | use super::*;
 10 | use anyhow::Result;
 11 | use regex::Regex;
 12 | 
 13 | #[derive(Debug, Clone)]
 14 | pub struct DockerListingParser {
 15 |     metadata_regex: Regex,
 16 | }
 17 | 
 18 | impl Default for DockerListingParser {
 19 |     fn default() -> Self {
 20 |         Self {
 21 |             metadata_regex: Regex::new(
 22 |                 r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}(:\d{2})?)\s+([\d \w\.-]+)$",
 23 |             )
 24 |             .unwrap(),
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | impl Parser for DockerListingParser {
 30 |     fn name(&self) -> &'static str {
 31 |         "download.docker.com"
 32 |     }
 33 | 
 34 |     fn is_auto_redirect(&self) -> bool {
 35 |         false
 36 |     }
 37 | 
 38 |     fn get_list(
 39 |         &self,
 40 |         async_context: &AsyncContext,
 41 |         url: &url::Url,
 42 |     ) -> Result<ListResult, ParserError> {
 43 |         assert_if_url_has_no_trailing_slash(url);
 44 |         let resp = get(
 45 |             &async_context.runtime,
 46 |             &async_context.listing_client,
 47 |             url.clone(),
 48 |         )?;
 49 |         // if is a redirect?
 50 |         if let Some(url) = resp.headers().get("location") {
 51 |             let mut url = url.to_str()?.to_string();
 52 |             // replace /index.html at the end to /
 53 |             if url.ends_with("/index.html") {
 54 |                 url = url.trim_end_matches("/index.html").to_string();
 55 |                 url.push('/');
 56 |             }
 57 |             return Ok(ListResult::Redirect(url));
 58 |         }
 59 |         let body = get_text(&async_context.runtime, resp)?;
 60 |         let document = Html::parse_document(&body);
 61 |         let selector = Selector::parse("a").unwrap();
 62 |         let mut items = Vec::new();
 63 |         for element in document.select(&selector) {
 64 |             let href = match element.value().attr("href") {
 65 |                 Some(href) => href,
 66 |                 None => continue,
 67 |             };
 68 |             let name = get_real_name_from_href(href);
 69 |             let mut href = url.join(href)?;
 70 | 
 71 |             if name == ".." {
 72 |                 continue;
 73 |             }
 74 | 
 75 |             let displayed_name = element.inner_html();
 76 | 
 77 |             let (type_, size, date) = {
 78 |                 if href.as_str().ends_with('/') || displayed_name.ends_with('/') {
 79 |                     (FileType::Directory, None, NaiveDateTime::default())
 80 |                 } else {
 81 |                     let metadata_raw = element
 82 |                         .next_sibling()
 83 |                         .unwrap()
 84 |                         .value()
 85 |                         .as_text()
 86 |                         .unwrap()
 87 |                         .to_string();
 88 |                     let metadata_raw = metadata_raw.trim();
 89 |                     let metadata = self.metadata_regex.captures(metadata_raw).unwrap();
 90 |                     let date = metadata.get(1).unwrap().as_str();
 91 |                     let date = match NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
 92 |                         Ok(date) => date,
 93 |                         Err(_) => NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M").unwrap(),
 94 |                     };
 95 |                     let size = metadata.get(3).unwrap().as_str();
 96 |                     if size == "-" {
 97 |                         (FileType::Directory, None, date)
 98 |                     } else {
 99 |                         let (n_size, unit) = FileSize::get_humanized(size);
100 |                         (
101 |                             FileType::File,
102 |                             Some(FileSize::HumanizedBinary(n_size, unit)),
103 |                             date,
104 |                         )
105 |                     }
106 |                 }
107 |             };
108 |             if type_ == FileType::Directory && !href.path().ends_with('/') {
109 |                 href.set_path(&format!("{}/", href.path()));
110 |             }
111 | 
112 |             items.push(ListItem::new(
113 |                 href,
114 |                 name.to_string(),
115 |                 type_,
116 |                 size,
117 |                 date,
118 |                 None,
119 |             ))
120 |         }
121 |         Ok(ListResult::List(items))
122 |     }
123 | }
124 | 
125 | #[cfg(test)]
126 | mod tests {
127 |     use crate::listing::SizeUnit;
128 | 
129 |     use super::*;
130 |     use crate::parser::tests::*;
131 | 
132 |     #[test]
133 |     fn test_docker() {
134 |         let context = init_async_context();
135 |         let items = DockerListingParser::default()
136 |             .get_list(
137 |                 &context,
138 |                 &url::Url::parse("http://localhost:1921/docker/").unwrap(),
139 |             )
140 |             .unwrap();
141 |         match items {
142 |             ListResult::List(items) => {
143 |                 assert_eq!(items.len(), 45);
144 |                 assert_eq!(items[0].name, "7.0");
145 |                 assert_eq!(items[0].type_, FileType::Directory);
146 |                 assert_eq!(items[0].size, None);
147 |                 assert_eq!(items[0].mtime, NaiveDateTime::default());
148 |                 assert_eq!(items[42].name, "docker-ce-staging.repo");
149 |                 assert_eq!(items[42].type_, FileType::File);
150 |                 assert_eq!(
151 |                     items[42].size,
152 |                     Some(FileSize::HumanizedBinary(2.0, SizeUnit::K))
153 |                 );
154 |                 assert_eq!(
155 |                     items[42].mtime,
156 |                     NaiveDateTime::parse_from_str("2023-07-07 20:20:56", "%Y-%m-%d %H:%M:%S")
157 |                         .unwrap()
158 |                 );
159 |             }
160 |             _ => unreachable!(),
161 |         }
162 |     }
163 | 
164 |     #[test]
165 |     fn test_docker_2() {
166 |         let context = init_async_context();
167 |         let items = DockerListingParser::default()
168 |             .get_list(
169 |                 &context,
170 |                 &url::Url::parse("http://localhost:1921/docker/armv7l/").unwrap(),
171 |             )
172 |             .unwrap();
173 |         match items {
174 |             ListResult::List(items) => {
175 |                 assert_eq!(items.len(), 2);
176 |                 assert_eq!(items[0].name, "nightly");
177 |                 assert_eq!(items[0].type_, FileType::Directory);
178 |                 assert_eq!(items[0].size, None);
179 |                 // Don't compare folder mtime here...
180 |                 // assert_eq!(
181 |                 //     items[0].mtime,
182 |                 //     NaiveDateTime::parse_from_str("2020-01-21 07:38", "%Y-%m-%d %H:%M").unwrap()
183 |                 // );
184 |                 assert_eq!(items[1].name, "test");
185 |                 assert_eq!(items[1].type_, FileType::Directory);
186 |                 assert_eq!(items[1].size, None);
187 |                 // assert_eq!(
188 |                 //     items[1].mtime,
189 |                 //     NaiveDateTime::parse_from_str("2020-01-21 07:38", "%Y-%m-%d %H:%M").unwrap()
190 |                 // );
191 |             }
192 |             _ => unreachable!(),
193 |         }
194 |     }
195 | }
196 | 


--------------------------------------------------------------------------------
/src/parser/fallback.rs:
--------------------------------------------------------------------------------
  1 | // An inefficient fallback parser only for non-listing HTML.
  2 | // Read docs/parser.md for known limitations.
  3 | 
  4 | use crate::{
  5 |     listing::{FileSize, FileType, ListItem},
  6 |     utils::{get, get_response_mtime, head},
  7 | };
  8 | use scraper::{Html, Selector};
  9 | use tracing::debug;
 10 | 
 11 | use super::*;
 12 | 
 13 | #[derive(Debug, Clone, Default)]
 14 | pub struct FallbackParser;
 15 | 
 16 | const INDEX: [&str; 2] = ["index.html", "index.htm"];
 17 | 
 18 | impl Parser for FallbackParser {
 19 |     fn name(&self) -> &'static str {
 20 |         "Fallback for non-listing directory HTML (index.html) only"
 21 |     }
 22 | 
 23 |     fn get_list(&self, async_context: &AsyncContext, url: &Url) -> Result<ListResult, ParserError> {
 24 |         let url = if !url.path().ends_with('/') {
 25 |             Url::parse(&format!("{}/", url.path())).unwrap()
 26 |         } else {
 27 |             url.clone()
 28 |         };
 29 |         let (name, resp) = {
 30 |             let mut final_resp = None;
 31 |             let mut final_name = None;
 32 |             for index in INDEX {
 33 |                 let url = url.join(index).unwrap();
 34 |                 let resp = get(
 35 |                     &async_context.runtime,
 36 |                     &async_context.listing_client,
 37 |                     url.clone(),
 38 |                 );
 39 |                 match resp {
 40 |                     Ok(r) => {
 41 |                         final_resp = Some(r);
 42 |                         final_name = Some(index);
 43 |                         break;
 44 |                     }
 45 |                     Err(e) => {
 46 |                         warn!("Failed to fetch {url}: {e}");
 47 |                         continue;
 48 |                     }
 49 |                 }
 50 |             }
 51 |             (
 52 |                 final_name,
 53 |                 final_resp.ok_or(anyhow!("Does not match index list: {:?}", INDEX)),
 54 |             )
 55 |         };
 56 |         let resp = resp?;
 57 |         let name = name.unwrap();
 58 |         let mtime = get_response_mtime(&resp)
 59 |             .unwrap_or(chrono::offset::Utc::now())
 60 |             .naive_utc();
 61 |         let url = resp.url().clone();
 62 |         let body = get_text(&async_context.runtime, resp)?;
 63 |         let size = body.len();
 64 |         let timezone = chrono::FixedOffset::east_opt(0);
 65 | 
 66 |         let document = Html::parse_document(&body);
 67 |         let selector = Selector::parse("a").unwrap();
 68 |         let mut items = Vec::new();
 69 |         // Add index file
 70 |         items.push(ListItem::new(
 71 |             url.clone(),
 72 |             name.to_string(),
 73 |             FileType::File,
 74 |             Some(FileSize::Precise(size as u64)),
 75 |             mtime,
 76 |             timezone,
 77 |         ));
 78 |         // Remove the "index.htm(l)" part in url
 79 |         let url = url.join("./").unwrap();
 80 |         for element in document.select(&selector) {
 81 |             let href = match element.value().attr("href") {
 82 |                 // well, what can I say... if you don't have href attribute?
 83 |                 None => continue,
 84 |                 Some(h) => h,
 85 |             };
 86 |             let href = match url.join(href) {
 87 |                 Err(e) => {
 88 |                     warn!("cannot join {href} to {url}: {e}, skipping");
 89 |                     continue;
 90 |                 }
 91 |                 Ok(h) => h,
 92 |             };
 93 |             // Ignore if url is not a prefix of href
 94 |             if !href.as_str().starts_with(url.as_str()) {
 95 |                 warn!("{href} is not inside {url}, skipping");
 96 |                 continue;
 97 |             }
 98 |             let relative_href = match url.make_relative(&href) {
 99 |                 None => {
100 |                     warn!("cannot make relative of {href} from {url}");
101 |                     continue;
102 |                 }
103 |                 Some(r) => r,
104 |             };
105 |             let relative_href = match relative_href.find('/') {
106 |                 Some(idx) => relative_href[..idx + 1].to_string(),
107 |                 None => relative_href,
108 |             };
109 |             if relative_href.is_empty() || relative_href == "/" {
110 |                 continue;
111 |             }
112 |             let name = get_real_name_from_href(&relative_href);
113 |             if name.is_empty() {
114 |                 continue;
115 |             }
116 |             let href = url
117 |                 .join(&relative_href)
118 |                 .expect("unexpected error of handling URL");
119 |             let type_ = if relative_href.ends_with('/') {
120 |                 FileType::Directory
121 |             } else {
122 |                 FileType::File
123 |             };
124 | 
125 |             // Try HEAD
126 |             debug!("HEADing {href} in fallback parser");
127 |             let resp = match head(
128 |                 &async_context.runtime,
129 |                 &async_context.listing_client,
130 |                 href.clone(),
131 |             ) {
132 |                 Ok(r) => r,
133 |                 Err(e) => {
134 |                     let status = e.status();
135 |                     if status == Some(reqwest::StatusCode::NOT_FOUND)
136 |                         || status == Some(reqwest::StatusCode::FORBIDDEN)
137 |                     {
138 |                         continue;
139 |                     }
140 | 
141 |                     // TODO: what to do here?
142 |                     warn!("Cannot get from {}, skipping", href);
143 |                     continue;
144 |                 }
145 |             };
146 | 
147 |             let item = if type_ == FileType::File {
148 |                 let size = resp.content_length();
149 |                 let mtime = match get_response_mtime(&resp) {
150 |                     Ok(m) => m,
151 |                     Err(e) => {
152 |                         warn!("Cannot get mtime from {href}: {e}, skipping");
153 |                         continue;
154 |                     }
155 |                 };
156 |                 let naive = mtime.naive_utc();
157 | 
158 |                 ListItem::new(
159 |                     href,
160 |                     name.to_string(),
161 |                     type_,
162 |                     size.map(FileSize::Precise),
163 |                     naive,
164 |                     timezone,
165 |                 )
166 |             } else {
167 |                 ListItem::new(
168 |                     href,
169 |                     name.to_string(),
170 |                     type_,
171 |                     None,
172 |                     mtime, // mtime does not matter for dir
173 |                     timezone,
174 |                 )
175 |             };
176 | 
177 |             items.push(item);
178 |         }
179 | 
180 |         Ok(ListResult::List(items))
181 |     }
182 | }
183 | 
184 | #[cfg(test)]
185 | mod tests {
186 |     use super::*;
187 |     use crate::parser::tests::*;
188 | 
189 |     #[test]
190 |     fn test_mimalloc() {
191 |         let context = init_async_context();
192 |         let items = FallbackParser
193 |             .get_list(
194 |                 &context,
195 |                 &url::Url::parse("http://localhost:1921/buildroot/mimalloc/").unwrap(),
196 |             )
197 |             .unwrap();
198 |         match items {
199 |             ListResult::List(items) => {
200 |                 assert_eq!(items.len(), 4);
201 |                 assert_eq!(items[0].name, "index.html");
202 |                 assert_eq!(items[0].type_, FileType::File);
203 |                 assert_eq!(items[0].size, Some(FileSize::Precise(9369)));
204 | 
205 |                 assert_eq!(items[3].name, "test");
206 |                 assert_eq!(items[3].type_, FileType::Directory);
207 |             }
208 |             _ => unreachable!(),
209 |         }
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------
/src/parser/fancyindex.rs:
--------------------------------------------------------------------------------
  1 | // Nginx fancyindex parser
  2 | 
  3 | use crate::{
  4 |     listing::{FileSize, FileType, ListItem},
  5 |     utils::get,
  6 | };
  7 | 
  8 | use super::*;
  9 | use anyhow::Result;
 10 | use chrono::{DateTime, NaiveDateTime};
 11 | use scraper::{Html, Selector};
 12 | 
 13 | #[derive(Debug, Clone, Default)]
 14 | pub struct FancyIndexListingParser;
 15 | 
 16 | impl Parser for FancyIndexListingParser {
 17 |     fn name(&self) -> &'static str {
 18 |         "Fancyindex"
 19 |     }
 20 | 
 21 |     fn get_list(
 22 |         &self,
 23 |         async_context: &AsyncContext,
 24 |         url: &url::Url,
 25 |     ) -> Result<ListResult, ParserError> {
 26 |         let resp = get(
 27 |             &async_context.runtime,
 28 |             &async_context.listing_client,
 29 |             url.clone(),
 30 |         )?;
 31 |         let url = resp.url().clone();
 32 |         let body = get_text(&async_context.runtime, resp)?;
 33 |         assert_if_url_has_no_trailing_slash(&url);
 34 |         let document = Html::parse_document(&body);
 35 |         let selector = Selector::parse("tbody tr").unwrap();
 36 |         let mut items = Vec::new();
 37 |         for element in document.select(&selector) {
 38 |             // let link_selector = Selector::parse("td.link a").unwrap();
 39 |             // let size_selector = Selector::parse("td.size").unwrap();
 40 |             // let date_selector = Selector::parse("td.date").unwrap();
 41 | 
 42 |             // Select <td> in order, instead of using class name, to improve compatibility for strange pages
 43 |             let td_selector = Selector::parse("td").unwrap();
 44 |             let mut td_iterator = element.select(&td_selector);
 45 | 
 46 |             let td_a = match td_iterator.next() {
 47 |                 Some(tda) => tda,
 48 |                 None => {
 49 |                     warn!("Cannot find <td> in this <tr> (header maybe?), skipping...");
 50 |                     continue;
 51 |                 }
 52 |             };
 53 |             let a = match td_a.select(&Selector::parse("a").unwrap()).next() {
 54 |                 Some(a) => a,
 55 |                 None => {
 56 |                     return Err(anyhow!("Cannot find <a> in first cell.").into());
 57 |                 }
 58 |             };
 59 |             let href = a.value().attr("href").unwrap();
 60 |             let displayed_filename = a.inner_html();
 61 | 
 62 |             if displayed_filename == "Parent Directory/" || href == "../" {
 63 |                 continue;
 64 |             }
 65 | 
 66 |             let name = get_real_name_from_href(href);
 67 |             let href = url.join(href)?;
 68 |             let type_ = if href.as_str().ends_with('/') {
 69 |                 FileType::Directory
 70 |             } else {
 71 |                 FileType::File
 72 |             };
 73 |             let size = td_iterator.next().unwrap().inner_html();
 74 |             let size = size.trim();
 75 |             let date = td_iterator.next().unwrap().inner_html();
 76 |             let date = date.trim();
 77 | 
 78 |             // decide (guess) which time format to use
 79 |             let (date_fmt, _) = guess_date_fmt(date);
 80 |             let naive_date;
 81 |             let timezone;
 82 |             if !date_fmt_has_timezone(&date_fmt) {
 83 |                 naive_date = NaiveDateTime::parse_from_str(date, &date_fmt)?;
 84 |                 timezone = None;
 85 |             } else {
 86 |                 let date = DateTime::parse_from_str(date, &date_fmt)?;
 87 |                 naive_date = date.naive_utc();
 88 |                 timezone = Some(date.offset().to_owned());
 89 |             }
 90 | 
 91 |             items.push(ListItem::new(
 92 |                 href,
 93 |                 name,
 94 |                 type_,
 95 |                 {
 96 |                     if size == "-" {
 97 |                         None
 98 |                     } else {
 99 |                         let (n_size, unit) = FileSize::get_humanized(size);
100 |                         Some(FileSize::HumanizedBinary(n_size, unit))
101 |                     }
102 |                 },
103 |                 naive_date,
104 |                 timezone,
105 |             ));
106 |         }
107 | 
108 |         Ok(ListResult::List(items))
109 |     }
110 | }
111 | 
112 | #[cfg(test)]
113 | mod tests {
114 |     use chrono::FixedOffset;
115 | 
116 |     use super::*;
117 |     use crate::listing::SizeUnit;
118 |     use crate::parser::tests::*;
119 | 
120 |     #[test]
121 |     fn test_njumirrors() {
122 |         let context = init_async_context();
123 |         let items = FancyIndexListingParser
124 |             .get_list(
125 |                 &context,
126 |                 &Url::parse("http://localhost:1921/bmclapi/").unwrap(),
127 |             )
128 |             .unwrap();
129 |         match items {
130 |             ListResult::List(items) => {
131 |                 assert_eq!(items[0].name, "bouncycastle");
132 |                 assert_eq!(items[0].type_, FileType::Directory);
133 |                 assert_eq!(items[0].size, None);
134 |                 assert_eq!(
135 |                     items[0].mtime,
136 |                     NaiveDateTime::parse_from_str("2024-04-23 19:01:54", "%Y-%m-%d %H:%M:%S")
137 |                         .unwrap()
138 |                 );
139 |                 assert_eq!(items[items.len() - 1].name, "lwjgURL");
140 |                 assert_eq!(items[items.len() - 1].type_, FileType::File);
141 |                 assert_eq!(
142 |                     items[items.len() - 1].size,
143 |                     Some(FileSize::HumanizedBinary(1767.0, SizeUnit::B))
144 |                 );
145 |                 assert_eq!(
146 |                     items[items.len() - 1].mtime,
147 |                     NaiveDateTime::parse_from_str("2021-04-30 20:55:32", "%Y-%m-%d %H:%M:%S")
148 |                         .unwrap()
149 |                 );
150 |             }
151 |             _ => unreachable!(),
152 |         }
153 |     }
154 | 
155 |     #[test]
156 |     fn test_loongnix() {
157 |         let context = init_async_context();
158 |         let items = FancyIndexListingParser
159 |             .get_list(
160 |                 &context,
161 |                 &Url::parse("http://localhost:1921/loongnix/").unwrap(),
162 |             )
163 |             .unwrap();
164 |         match items {
165 |             ListResult::List(items) => {
166 |                 assert_eq!(items[0].name, "contrib");
167 |                 assert_eq!(items[0].type_, FileType::Directory);
168 |                 assert_eq!(items[0].size, None);
169 |                 assert_eq!(
170 |                     items[0].mtime,
171 |                     NaiveDateTime::parse_from_str("2023-08-15 05:48", "%Y-%m-%d %H:%M").unwrap()
172 |                 );
173 |                 assert_eq!(items[items.len() - 1].name, "Release.gpg");
174 |                 assert_eq!(items[items.len() - 1].type_, FileType::File);
175 |                 assert_eq!(
176 |                     items[items.len() - 1].size,
177 |                     Some(FileSize::HumanizedBinary(659.0, SizeUnit::B))
178 |                 );
179 |                 assert_eq!(
180 |                     items[items.len() - 1].mtime,
181 |                     NaiveDateTime::parse_from_str("2023-08-15 05:48", "%Y-%m-%d %H:%M").unwrap()
182 |                 );
183 |             }
184 |             _ => unreachable!(),
185 |         }
186 |     }
187 | 
188 |     #[test]
189 |     fn test_misc_1() {
190 |         // In fact this is NOT a fancyindex page, but it basically match the layout of that.
191 |         let context = init_async_context();
192 |         let items = FancyIndexListingParser
193 |             .get_list(
194 |                 &context,
195 |                 &Url::parse("http://localhost:1921/misc/1/").unwrap(),
196 |             )
197 |             .unwrap();
198 |         match items {
199 |             ListResult::List(items) => {
200 |                 assert_eq!(items.len(), 1);
201 |                 assert_eq!(items[0].name, "passwd");
202 |                 assert_eq!(items[0].type_, FileType::File);
203 |                 assert_eq!(
204 |                     items[0].size,
205 |                     Some(FileSize::HumanizedBinary(3.3, SizeUnit::K))
206 |                 );
207 |                 assert_eq!(
208 |                     items[0].mtime,
209 |                     NaiveDateTime::parse_from_str("2024-08-24 15:04:11", "%Y-%m-%d %H:%M:%S")
210 |                         .unwrap()
211 |                 );
212 |                 assert_eq!(items[0].timezone, FixedOffset::east_opt(0),);
213 |             }
214 |             _ => unreachable!(),
215 |         }
216 |     }
217 | }
218 | 


--------------------------------------------------------------------------------
/src/parser/gradle.rs:
--------------------------------------------------------------------------------
  1 | use crate::listing::{FileSize, FileType, ListItem};
  2 | use chrono::{DateTime, NaiveDateTime};
  3 | use scraper::{Html, Selector};
  4 | use tracing::info;
  5 | 
  6 | use super::*;
  7 | use anyhow::Result;
  8 | 
  9 | #[derive(Debug, Clone, Default)]
 10 | pub struct GradleListingParser {}
 11 | 
 12 | impl Parser for GradleListingParser {
 13 |     fn name(&self) -> &'static str {
 14 |         "services.gradle.org"
 15 |     }
 16 | 
 17 |     fn get_list(
 18 |         &self,
 19 |         async_context: &AsyncContext,
 20 |         url: &url::Url,
 21 |     ) -> Result<ListResult, ParserError> {
 22 |         let resp = get(
 23 |             &async_context.runtime,
 24 |             &async_context.listing_client,
 25 |             url.clone(),
 26 |         )?;
 27 |         let url = resp.url().clone();
 28 |         let body = get_text(&async_context.runtime, resp)?;
 29 |         assert_if_url_has_no_trailing_slash(&url);
 30 |         let document = Html::parse_document(&body);
 31 |         let selector = Selector::parse("ul li").unwrap();
 32 |         let mut items = Vec::new();
 33 |         for element in document.select(&selector) {
 34 |             // Select <a> first, then <span>s
 35 |             let a_selector = Selector::parse("a").unwrap();
 36 |             let span_selector = Selector::parse("span").unwrap();
 37 |             let size_selector = Selector::parse("span.size").unwrap();
 38 |             let date_selector = Selector::parse("span.date").unwrap();
 39 | 
 40 |             if element.select(&span_selector).next().is_none() {
 41 |                 info!("No <span> in this <li>. Maybe it's a header");
 42 |                 continue;
 43 |             }
 44 | 
 45 |             let a = match element.select(&a_selector).next() {
 46 |                 Some(a) => a,
 47 |                 None => {
 48 |                     return Err(anyhow!("No <a> in given <li>").into());
 49 |                 }
 50 |             };
 51 |             let href = a.value().attr("href").unwrap();
 52 |             let displayed_filename = a.inner_html();
 53 | 
 54 |             if displayed_filename == "Parent Directory/" || href == "../" {
 55 |                 continue;
 56 |             }
 57 | 
 58 |             let name = get_real_name_from_href(href);
 59 |             let href = url.join(href)?;
 60 |             let type_ = if href.as_str().ends_with('/') {
 61 |                 FileType::Directory
 62 |             } else {
 63 |                 FileType::File
 64 |             };
 65 |             let size = element.select(&size_selector).next().unwrap().inner_html();
 66 |             let size = size.trim();
 67 |             let date = element.select(&date_selector).next().unwrap().inner_html();
 68 |             let date = date.trim();
 69 | 
 70 |             // decide (guess) which time format to use
 71 |             let (date_fmt, _) = guess_date_fmt(date);
 72 |             let naive_date;
 73 |             let timezone;
 74 |             if !date_fmt_has_timezone(&date_fmt) {
 75 |                 naive_date = NaiveDateTime::parse_from_str(date, &date_fmt)?;
 76 |                 timezone = None;
 77 |             } else {
 78 |                 let date = DateTime::parse_from_str(date, &date_fmt)?;
 79 |                 naive_date = date.naive_utc();
 80 |                 timezone = Some(date.offset().to_owned());
 81 |             }
 82 | 
 83 |             items.push(ListItem::new(
 84 |                 href,
 85 |                 name,
 86 |                 type_,
 87 |                 {
 88 |                     if size == "-" {
 89 |                         None
 90 |                     } else {
 91 |                         let (n_size, unit) = FileSize::get_humanized(size);
 92 |                         Some(FileSize::HumanizedBinary(n_size, unit))
 93 |                     }
 94 |                 },
 95 |                 naive_date,
 96 |                 timezone,
 97 |             ));
 98 |         }
 99 | 
100 |         Ok(ListResult::List(items))
101 |     }
102 | }
103 | 
104 | #[cfg(test)]
105 | mod tests {
106 |     use chrono::FixedOffset;
107 |     use test_log::test;
108 | 
109 |     use crate::listing::SizeUnit;
110 | 
111 |     use super::*;
112 |     use crate::parser::tests::*;
113 | 
114 |     #[test]
115 |     fn test_gradle() {
116 |         let context = init_async_context();
117 |         let items = GradleListingParser::default()
118 |             .get_list(
119 |                 &context,
120 |                 &url::Url::parse("http://localhost:1921/gradle").unwrap(),
121 |             )
122 |             .unwrap();
123 |         match items {
124 |             ListResult::List(items) => {
125 |                 assert_eq!(items.len(), 64);
126 |                 assert_eq!(items[0].name, "gradle-8.10-wrapper.jar.sha256");
127 |                 assert_eq!(items[0].type_, FileType::File);
128 |                 assert_eq!(
129 |                     items[0].size,
130 |                     Some(FileSize::HumanizedBinary(64.0, SizeUnit::B))
131 |                 );
132 |                 assert_eq!(
133 |                     items[0].mtime,
134 |                     NaiveDateTime::parse_from_str("14-Aug-2024 11:18", "%d-%b-%Y %H:%M").unwrap()
135 |                 );
136 |                 assert_eq!(items[0].timezone, FixedOffset::east_opt(0),);
137 |             }
138 |             _ => unreachable!(),
139 |         }
140 |     }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/parser/lighttpd.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     listing::{FileSize, FileType, ListItem},
  3 |     utils::get,
  4 | };
  5 | use chrono::NaiveDateTime;
  6 | use scraper::{Html, Selector};
  7 | // use tracing::debug;
  8 | 
  9 | use super::*;
 10 | use anyhow::{anyhow, Result};
 11 | 
 12 | #[derive(Debug, Clone, Default)]
 13 | pub struct LighttpdListingParser;
 14 | 
 15 | impl Parser for LighttpdListingParser {
 16 |     fn name(&self) -> &'static str {
 17 |         "Lighttpd"
 18 |     }
 19 | 
 20 |     fn get_list(
 21 |         &self,
 22 |         async_context: &AsyncContext,
 23 |         url: &url::Url,
 24 |     ) -> Result<ListResult, ParserError> {
 25 |         let resp = get(
 26 |             &async_context.runtime,
 27 |             &async_context.listing_client,
 28 |             url.clone(),
 29 |         )?;
 30 |         let url = resp.url().clone();
 31 |         let body = get_text(&async_context.runtime, resp)?;
 32 |         assert_if_url_has_no_trailing_slash(&url);
 33 |         let document = Html::parse_document(&body);
 34 |         let selector = Selector::parse("tbody").unwrap();
 35 |         let indexlist = document
 36 |             .select(&selector)
 37 |             .next()
 38 |             .ok_or_else(|| anyhow!("Cannot find <tbody>"))?;
 39 |         let selector = Selector::parse("tr").unwrap();
 40 |         let mut items = Vec::new();
 41 |         for element in indexlist.select(&selector) {
 42 |             let a = element
 43 |                 .select(&Selector::parse("a").unwrap())
 44 |                 .next()
 45 |                 .ok_or_else(|| anyhow!("Cannot find <a>"))?;
 46 |             let mtime = element
 47 |                 .select(&Selector::parse(".m").unwrap())
 48 |                 .next()
 49 |                 .ok_or_else(|| anyhow!("Cannot find .m"))?;
 50 |             let size = element
 51 |                 .select(&Selector::parse(".s").unwrap())
 52 |                 .next()
 53 |                 .ok_or_else(|| anyhow!("Cannot find .s"))?;
 54 | 
 55 |             // let filetype = element.select(&Selector::parse(".t").unwrap()).next().unwrap();
 56 | 
 57 |             let displayed_filename = a.inner_html();
 58 |             if displayed_filename == ".." {
 59 |                 continue;
 60 |             }
 61 |             let href = a
 62 |                 .value()
 63 |                 .attr("href")
 64 |                 .ok_or_else(|| anyhow!("Cannot find href inside <a>"))?;
 65 |             let name = get_real_name_from_href(href);
 66 |             let href = url.join(href)?;
 67 | 
 68 |             let type_ = if href.as_str().ends_with('/') {
 69 |                 FileType::Directory
 70 |             } else {
 71 |                 FileType::File
 72 |             };
 73 | 
 74 |             let mtime = mtime.inner_html();
 75 |             let mtime = mtime.trim();
 76 |             let mtime = NaiveDateTime::parse_from_str(mtime, "%Y-%b-%d %H:%M:%S")?;
 77 | 
 78 |             let size = size.inner_html();
 79 |             // Currently we just use simple replace to handle HTML entities
 80 |             // if we need a more sophisticated way to handle it, we should use a crate
 81 |             // like https://crates.io/crates/htmlentity
 82 |             let size = size.replace("&nbsp;", "");
 83 |             let size = size.trim();
 84 |             let size = if size == "-" {
 85 |                 None
 86 |             } else {
 87 |                 let (n_size, unit) = FileSize::get_humanized(size);
 88 |                 Some(FileSize::HumanizedBinary(n_size, unit))
 89 |             };
 90 | 
 91 |             // debug!("{} {} {} {:?} {:?}", href, name, mtime, size, type_);
 92 |             items.push(ListItem::new(href, name, type_, size, mtime, None))
 93 |         }
 94 | 
 95 |         Ok(ListResult::List(items))
 96 |     }
 97 | }
 98 | 
 99 | #[cfg(test)]
100 | mod tests {
101 |     use crate::listing::SizeUnit;
102 | 
103 |     use super::*;
104 |     use crate::parser::tests::*;
105 | 
106 |     #[test]
107 |     fn test_buildroot_root() {
108 |         let context = init_async_context();
109 |         let items = LighttpdListingParser
110 |             .get_list(
111 |                 &context,
112 |                 &Url::parse("http://localhost:1921/buildroot/").unwrap(),
113 |             )
114 |             .unwrap();
115 |         match items {
116 |             ListResult::List(items) => {
117 |                 assert_eq!(items[0].name, "18xx-ti-utils");
118 |                 assert_eq!(items[0].type_, FileType::Directory);
119 |                 assert_eq!(items[0].size, None);
120 |                 assert_eq!(
121 |                     items[0].mtime,
122 |                     NaiveDateTime::parse_from_str("2021-01-11 15:59:23", "%Y-%m-%d %H:%M:%S")
123 |                         .unwrap()
124 |                 );
125 |                 let last_item = items.last().unwrap();
126 |                 assert_eq!(last_item.name, "zyre-v2.0.0.tar.gz");
127 |                 assert_eq!(last_item.type_, FileType::File);
128 |                 assert_eq!(
129 |                     last_item.size,
130 |                     Some(FileSize::HumanizedBinary(262.1, SizeUnit::K))
131 |                 );
132 |                 assert_eq!(
133 |                     last_item.mtime,
134 |                     NaiveDateTime::parse_from_str("2018-03-08 11:18:46", "%Y-%m-%d %H:%M:%S")
135 |                         .unwrap()
136 |                 );
137 |             }
138 |             _ => unreachable!(),
139 |         }
140 |     }
141 | 
142 |     #[test]
143 |     fn test_buildroot_subfolder() {
144 |         let context = init_async_context();
145 |         let items = LighttpdListingParser
146 |             .get_list(
147 |                 &context,
148 |                 &Url::parse("http://localhost:1921/buildroot/acl/").unwrap(),
149 |             )
150 |             .unwrap();
151 |         match items {
152 |             ListResult::List(items) => {
153 |                 assert_eq!(items.len(), 4);
154 |                 assert_eq!(items[0].name, "acl-2.2.52.src.tar.gz");
155 |                 assert_eq!(items[0].type_, FileType::File);
156 |                 assert_eq!(
157 |                     items[0].size,
158 |                     Some(FileSize::HumanizedBinary(377.5, SizeUnit::K))
159 |                 );
160 |                 assert_eq!(
161 |                     items[0].mtime,
162 |                     NaiveDateTime::parse_from_str("2013-05-19 06:10:38", "%Y-%m-%d %H:%M:%S")
163 |                         .unwrap()
164 |                 );
165 |                 assert_eq!(items[3].name, "acl-2.3.2.tar.xz");
166 |                 assert_eq!(items[3].type_, FileType::File);
167 |                 assert_eq!(
168 |                     items[3].size,
169 |                     Some(FileSize::HumanizedBinary(362.9, SizeUnit::K))
170 |                 );
171 |                 assert_eq!(
172 |                     items[3].mtime,
173 |                     NaiveDateTime::parse_from_str("2024-02-07 03:04:10", "%Y-%m-%d %H:%M:%S")
174 |                         .unwrap()
175 |                 );
176 |             }
177 |             _ => unreachable!(),
178 |         }
179 |     }
180 | }
181 | 


--------------------------------------------------------------------------------
/src/regex_manager/mod.rs:
--------------------------------------------------------------------------------
  1 | pub mod v1;
  2 | pub mod v2;
  3 | 
  4 | use std::str::FromStr;
  5 | 
  6 | use regex::Regex;
  7 | 
  8 | use crate::SharedArgs;
  9 | 
 10 | // Submit an issue if you find this out-of-date!
 11 | // And assuming that all vars are distro_ver
 12 | const REGEX_REPLACEMENTS: &[(&str, &str)] = &[
 13 |     // https://endoflife.date/debian
 14 |     ("${DEBIAN_CURRENT}", "(?<distro_ver>bullseye|bookworm)"),
 15 |     // https://endoflife.date/ubuntu (excluding ESM)
 16 |     ("${UBUNTU_LTS}", "(?<distro_ver>focal|jammy|noble)"),
 17 |     ("${UBUNTU_NONLTS}", "(?<distro_ver>oracular|plucky)"),
 18 |     // https://endoflife.date/fedora
 19 |     ("${FEDORA_CURRENT}", "(?<distro_ver>40|41|42)"),
 20 |     // CentOS is no longer supported -- this regex is replaced to something that could match nothing
 21 |     (
 22 |         "${CENTOS_CURRENT}",
 23 |         "(?<distro_ver>NONEXISTFILENAMESOITCOULDNEVERMATCHANYTHING)",
 24 |     ),
 25 |     // https://endoflife.date/rhel (excluding ELCS)
 26 |     ("${RHEL_CURRENT}", "(?<distro_ver>8|9)"),
 27 |     // https://endoflife.date/opensuse
 28 |     ("${OPENSUSE_CURRENT}", "(?<distro_ver>15.6)"),
 29 |     // https://endoflife.date/sles
 30 |     ("${SLES_CURRENT}", "(?<distro_ver>15)"),
 31 | ];
 32 | 
 33 | /// ExpandedRegex contains inner and rev_inner, and would transparently add '/' before string
 34 | /// (and convert regex with ^). A warning would be given if text input contains '/' at front.
 35 | #[derive(Debug, Clone)]
 36 | pub struct ExpandedRegex {
 37 |     pub inner: Regex,
 38 |     /// v1 compatibility field
 39 |     rev_inner: Regex,
 40 | }
 41 | 
 42 | impl FromStr for ExpandedRegex {
 43 |     type Err = regex::Error;
 44 | 
 45 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 46 |         // If starts with ^ and not ^/, change start matching character from ^ to ^/
 47 |         let s = if s.starts_with('^') && !s.starts_with("^/") {
 48 |             &format!("^/{}", &s[1..])
 49 |         } else {
 50 |             s
 51 |         };
 52 |         let mut s1 = s.to_string();
 53 |         for (from, to) in REGEX_REPLACEMENTS {
 54 |             s1 = s1.replace(from, to);
 55 |         }
 56 |         let mut s2 = s.to_string();
 57 |         for (from, _) in REGEX_REPLACEMENTS.iter().rev() {
 58 |             s2 = s2.replace(from, "(?<distro_ver>.+)");
 59 |         }
 60 |         Ok(Self {
 61 |             inner: Regex::new(&s1)?,
 62 |             rev_inner: Regex::new(&s2)?,
 63 |         })
 64 |     }
 65 | }
 66 | 
 67 | // Delegate to inner
 68 | impl ExpandedRegex {
 69 |     fn text_transform(text: &str) -> String {
 70 |         if !text.starts_with('/') {
 71 |             tracing::warn!("(unexpected internal input: string given to match_str shall start with /, anything wrong?)");
 72 |             format!("/{}", text)
 73 |         } else {
 74 |             text.to_string()
 75 |         }
 76 |     }
 77 | 
 78 |     pub fn is_match(&self, text: &str) -> bool {
 79 |         self.inner.is_match(&Self::text_transform(text))
 80 |     }
 81 | 
 82 |     /// v1 compatibility method
 83 |     pub fn is_others_match(&self, text: &str) -> bool {
 84 |         let text = &Self::text_transform(text);
 85 |         !self.inner.is_match(text) && self.rev_inner.is_match(text)
 86 |     }
 87 | }
 88 | 
 89 | #[derive(Debug, Clone, Copy, PartialEq)]
 90 | pub enum Comparison {
 91 |     Stop,
 92 |     /// v1 compatibility field
 93 |     ListOnly,
 94 |     Ok,
 95 | }
 96 | 
 97 | pub trait ExclusionManagerTrait: Send + Sync {
 98 |     fn match_str(&self, text: &str) -> Comparison;
 99 | }
100 | 
101 | pub fn get_exclusion_manager(shared_args: impl SharedArgs) -> Box<dyn ExclusionManagerTrait> {
102 |     if shared_args.use_v2_exclusion() {
103 |         Box::new(v2::ExclusionManager::new())
104 |     } else {
105 |         Box::new(v1::ExclusionManager::new(
106 |             shared_args.exclude(),
107 |             shared_args.include(),
108 |         ))
109 |     }
110 | }
111 | 
112 | #[cfg(test)]
113 | mod tests {
114 |     use super::*;
115 | 
116 |     #[test]
117 |     fn test_expanded_regex() {
118 |         let regex = ExpandedRegex::from_str("^/deb/dists/${DEBIAN_CURRENT}").unwrap();
119 |         assert!(regex.is_match("/deb/dists/bookworm/Release"));
120 |         assert!(!regex.is_match("/deb/dists/wheezy/Release"));
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/regex_manager/v1.rs:
--------------------------------------------------------------------------------
  1 | use super::{Comparison, ExclusionManagerTrait, ExpandedRegex};
  2 | 
  3 | #[derive(Debug, Clone)]
  4 | pub struct ExclusionManager {
  5 |     /// Stop the task immediately if any of these regexes match.
  6 |     instant_stop_regexes: Vec<ExpandedRegex>,
  7 |     /// Continue, but don't download anything if any of these regexes match.
  8 |     list_only_regexes: Vec<ExpandedRegex>,
  9 |     /// Include only these regexes.
 10 |     include_regexes: Vec<ExpandedRegex>,
 11 | }
 12 | 
 13 | impl ExclusionManager {
 14 |     pub fn new(exclusions: &[ExpandedRegex], inclusions: &[ExpandedRegex]) -> Self {
 15 |         let mut instant_stop_regexes = Vec::new();
 16 |         let mut list_only_regexes = Vec::new();
 17 | 
 18 |         for exclusion in exclusions {
 19 |             let regex_str = exclusion.inner.as_str();
 20 |             let mut flag = false;
 21 |             for inclusion in inclusions {
 22 |                 if inclusion.inner.as_str().starts_with(regex_str) {
 23 |                     list_only_regexes.push(exclusion.clone());
 24 |                     flag = true;
 25 |                     break;
 26 |                 }
 27 |             }
 28 |             if !flag {
 29 |                 instant_stop_regexes.push(exclusion.clone());
 30 |             }
 31 |         }
 32 | 
 33 |         Self {
 34 |             instant_stop_regexes,
 35 |             list_only_regexes,
 36 |             include_regexes: inclusions.to_vec(),
 37 |         }
 38 |     }
 39 | }
 40 | 
 41 | impl ExclusionManagerTrait for ExclusionManager {
 42 |     fn match_str(&self, text: &str) -> Comparison {
 43 |         for regex in &self.instant_stop_regexes {
 44 |             if regex.is_match(text) {
 45 |                 return Comparison::Stop;
 46 |             }
 47 |         }
 48 |         for regex in &self.include_regexes {
 49 |             if regex.is_match(text) {
 50 |                 return Comparison::Ok;
 51 |             }
 52 |         }
 53 |         // Performance: it is possible that a regex for inclusion shown like this:
 54 |         // ^fedora/${FEDORA_CURRENT}
 55 |         // And the remote corresponding folder has a lot of subfolders.
 56 |         // This is a "shortcut" to avoid checking all subfolders.
 57 |         for regex in &self.include_regexes {
 58 |             if regex.is_others_match(text) {
 59 |                 return Comparison::Stop;
 60 |             }
 61 |         }
 62 |         for regex in &self.list_only_regexes {
 63 |             if regex.is_match(text) {
 64 |                 return Comparison::ListOnly;
 65 |             }
 66 |         }
 67 |         Comparison::Ok
 68 |     }
 69 | }
 70 | 
 71 | #[cfg(test)]
 72 | mod tests {
 73 |     use std::str::FromStr;
 74 | 
 75 |     use test_log::test;
 76 |     use tracing::debug;
 77 | 
 78 |     use super::*;
 79 | 
 80 |     #[test]
 81 |     fn test_exclusion() {
 82 |         let target =
 83 |             "/debian/pmg/dists/stretch/pmgtest/binary-amd64/grub-efi-amd64-bin_2.02-pve6.changelog";
 84 |         let exclusions =
 85 |             vec![ExpandedRegex::from_str("pmg/dists/.+/pmgtest/.+changelog$").unwrap()];
 86 |         let inclusions = vec![];
 87 |         let exclusion_manager = ExclusionManager::new(&exclusions, &inclusions);
 88 |         assert_eq!(exclusion_manager.match_str(target), Comparison::Stop);
 89 |     }
 90 | 
 91 |     #[test]
 92 |     fn test_partial() {
 93 |         let target1 = "/yum/mysql-tools-community/fc/24/x86_64";
 94 |         let target2 = "/yum/mysql-tools-community/fc/40/x86_64";
 95 |         let target3 = "/yum/mysql-tools-community/fc/";
 96 |         let target4 = "/yum/mysql-tools-community/fc/24/";
 97 |         let target5 = "/yum/mysql-tools-community/fc/40/";
 98 |         let exclusions = vec![ExpandedRegex::from_str("/fc/").unwrap()];
 99 |         let inclusions = vec![ExpandedRegex::from_str("/fc/${FEDORA_CURRENT}").unwrap()];
100 |         debug!("exclusions: {:?}", exclusions);
101 |         debug!("inclusions: {:?}", inclusions);
102 |         let exclusion_manager = ExclusionManager::new(&exclusions, &inclusions);
103 |         assert_eq!(exclusion_manager.match_str(target1), Comparison::Stop);
104 |         assert_eq!(exclusion_manager.match_str(target2), Comparison::Ok);
105 |         assert_eq!(exclusion_manager.match_str(target3), Comparison::ListOnly);
106 |         assert_eq!(exclusion_manager.match_str(target4), Comparison::Stop);
107 |         assert_eq!(exclusion_manager.match_str(target5), Comparison::Ok);
108 |     }
109 | 
110 |     #[test]
111 |     fn test_exclude_dbg() {
112 |         let target1 = "/yum/mysql-8.0-community/docker/el/8/aarch64/mysql-community-server-minimal-8.0.33-1.el8.aarch64.rpm";
113 |         let target2 = "/yum/mysql-8.0-community/docker/el/8/debuginfo/x86_64/mysql-community-server-minimal-debuginfo-8.0.24-1.el8.x86_64.rpm";
114 |         let exclusions = vec![
115 |             ExpandedRegex::from_str("/el/").unwrap(),
116 |             ExpandedRegex::from_str("debuginfo").unwrap(),
117 |         ];
118 |         let inclusions = vec![ExpandedRegex::from_str("/el/${RHEL_CURRENT}").unwrap()];
119 |         let exclusion_manager = ExclusionManager::new(&exclusions, &inclusions);
120 |         assert_eq!(exclusion_manager.match_str(target1), Comparison::Ok);
121 |         assert_eq!(exclusion_manager.match_str(target2), Comparison::Stop);
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/regex_manager/v2.rs:
--------------------------------------------------------------------------------
 1 | use std::str::FromStr;
 2 | 
 3 | use tracing::debug;
 4 | 
 5 | use super::{Comparison, ExclusionManagerTrait, ExpandedRegex};
 6 | 
 7 | #[derive(Debug, Clone)]
 8 | enum RegexType {
 9 |     Include(ExpandedRegex),
10 |     Exclude(ExpandedRegex),
11 | }
12 | 
13 | #[derive(Debug, Clone)]
14 | pub struct ExclusionManager {
15 |     regexes: Vec<RegexType>,
16 | }
17 | 
18 | impl ExclusionManager {
19 |     pub fn new() -> Self {
20 |         // TODO: how to get correct order with clap?
21 |         let args = std::env::args().collect::<Vec<_>>();
22 |         debug!("args: {:?}", args);
23 |         let mut regexes = Vec::new();
24 |         let mut iter = args.iter().peekable();
25 |         while let Some(arg) = iter.next() {
26 |             if let Some(stripped) = arg.strip_prefix("--exclude=") {
27 |                 regexes.push(RegexType::Exclude(
28 |                     ExpandedRegex::from_str(stripped).expect("unexpected exclude regex"),
29 |                 ));
30 |             } else if let Some(stripped) = arg.strip_prefix("--include=") {
31 |                 regexes.push(RegexType::Include(
32 |                     ExpandedRegex::from_str(stripped).expect("unexpected include regex"),
33 |                 ));
34 |             } else if arg == "--exclude" {
35 |                 if let Some(s) = iter.peek() {
36 |                     regexes.push(RegexType::Exclude(
37 |                         ExpandedRegex::from_str(s).expect("unexpected exclude regex"),
38 |                     ));
39 |                 }
40 |             } else if arg == "--include" {
41 |                 if let Some(s) = iter.peek() {
42 |                     regexes.push(RegexType::Include(
43 |                         ExpandedRegex::from_str(s).expect("unexpected include regex"),
44 |                     ));
45 |                 }
46 |             }
47 |         }
48 |         debug!("regexes: {:?}", regexes);
49 |         Self { regexes }
50 |     }
51 | }
52 | 
53 | impl ExclusionManagerTrait for ExclusionManager {
54 |     fn match_str(&self, text: &str) -> Comparison {
55 |         for regex in &self.regexes {
56 |             match regex {
57 |                 RegexType::Exclude(regex) if regex.is_match(text) => return Comparison::Stop,
58 |                 RegexType::Include(regex) if regex.is_match(text) => return Comparison::Ok,
59 |                 _ => {}
60 |             }
61 |         }
62 |         Comparison::Ok
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/timezone.rs:
--------------------------------------------------------------------------------
  1 | use crate::listing::FileType;
  2 | use crate::parser::{ListResult, ParserMux};
  3 | use crate::regex_manager::{Comparison, ExclusionManagerTrait};
  4 | use crate::utils::{self, again};
  5 | use crate::utils::{head, relative_to_str};
  6 | use crate::AsyncContext;
  7 | use crate::{parser, SyncArgs};
  8 | 
  9 | use anyhow::{bail, Result};
 10 | use chrono::{DateTime, FixedOffset, Utc};
 11 | use tracing::{debug, info};
 12 | use url::Url;
 13 | 
 14 | pub fn determinate_timezone(
 15 |     args: &SyncArgs,
 16 |     parser: &ParserMux,
 17 |     exclusion_manager: &dyn ExclusionManagerTrait,
 18 |     async_context: &AsyncContext,
 19 | ) -> Option<FixedOffset> {
 20 |     match args.timezone {
 21 |         None => {
 22 |             // Check if to guess timezone
 23 |             // Some parsers like directory-lister, requires special handling for URL --
 24 |             // we cannot deduce "base" from file URL. Most normal websites work like this
 25 |             // File http://example.com/d1/f1 => Listing http://example.com/d1/
 26 |             // However directory-lister:
 27 |             // File https://example.com/d1/f1 => Listing https://example.com/?dir=d1/
 28 |             // So we have to remember the listing URL here, too
 29 |             let timezone_base_and_url = match &args.timezone_file {
 30 |                 Some(f) => {
 31 |                     if f == "no" {
 32 |                         None
 33 |                     } else {
 34 |                         // Currently timezone_file could be given from CLI
 35 |                         // In this case, we still use the old logic to "guess" the listing
 36 |                         // by setting the base (listing) url to None
 37 |                         Some((None, Url::parse(f).expect("Invalid timezone file URL")))
 38 |                     }
 39 |                 }
 40 |                 None => {
 41 |                     // eek, try getting first file in root index
 42 |                     fn find_first_file(
 43 |                         args: &SyncArgs,
 44 |                         parser: &ParserMux,
 45 |                         async_context: &AsyncContext,
 46 |                         url: &Url,
 47 |                         relative: Vec<String>,
 48 |                         exclusion_manager: &dyn ExclusionManagerTrait,
 49 |                     ) -> Option<(Option<Url>, Url)> {
 50 |                         let relative_str = relative_to_str(&relative, None);
 51 |                         if exclusion_manager.match_str(&relative_str) == Comparison::Stop {
 52 |                             info!("Excluded by exclusion manager: {}", relative_str);
 53 |                             return None;
 54 |                         }
 55 |                         info!("Try finding first File in {}", url);
 56 |                         let list = again(|| Ok(parser.get_list_with_filter(async_context, url, &relative_str)?), args.retry)
 57 |                             .unwrap_or_else(|_| panic!("Failed to get list for {}. Maybe you shall disable timezone guessing?", url));
 58 |                         match list {
 59 |                             ListResult::List(list) => {
 60 |                                 if let Some(item) = list.iter().find(|x| x.type_ == FileType::File)
 61 |                                 {
 62 |                                     info!("Find a file! URL: {}", item.url);
 63 |                                     return Some((Some(url.clone()), item.url.clone()));
 64 |                                 }
 65 |                                 for item in list.iter().filter(|x| x.type_ == FileType::Directory) {
 66 |                                     let mut relative = relative.clone();
 67 |                                     relative.push(item.name.clone());
 68 |                                     if let Some(res) = find_first_file(
 69 |                                         args,
 70 |                                         parser,
 71 |                                         async_context,
 72 |                                         &item.url,
 73 |                                         relative,
 74 |                                         exclusion_manager,
 75 |                                     ) {
 76 |                                         return Some(res);
 77 |                                     }
 78 |                                 }
 79 |                                 None
 80 |                             }
 81 |                             ListResult::Redirect(_) => {
 82 |                                 info!("Get a manual redirect instead of a file");
 83 |                                 None
 84 |                             }
 85 |                         }
 86 |                     }
 87 |                     find_first_file(
 88 |                         args,
 89 |                         parser,
 90 |                         async_context,
 91 |                         &args.upstream,
 92 |                         [].to_vec(),
 93 |                         exclusion_manager,
 94 |                     )
 95 |                 }
 96 |             };
 97 |             match timezone_base_and_url {
 98 |                 Some((timezone_base_url, timezone_url)) => {
 99 |                     let timezone = guess_remote_timezone(
100 |                         parser,
101 |                         async_context,
102 |                         &args.upstream,
103 |                         timezone_base_url,
104 |                         timezone_url,
105 |                     )
106 |                     .expect("Failed to guess timezone");
107 |                     info!("Guessed timezone: {:?}", timezone);
108 |                     Some(timezone)
109 |                 }
110 |                 None => None,
111 |             }
112 |         }
113 |         Some(tz) => {
114 |             info!("Using timezone from argument: {:?} hrs", tz);
115 |             Some(FixedOffset::east_opt(tz * 3600).unwrap())
116 |         }
117 |     }
118 | }
119 | 
120 | fn guess_remote_timezone(
121 |     parser: &ParserMux,
122 |     async_context: &AsyncContext,
123 |     upstream: &Url,
124 |     base_url: Option<Url>,
125 |     file_url: Url,
126 | ) -> Result<FixedOffset> {
127 |     assert!(!file_url.as_str().ends_with('/'));
128 |     // trim after the latest '/'
129 |     // TODO: improve this
130 | 
131 |     let file_url_str = file_url.as_str();
132 |     let base_url = match base_url {
133 |         Some(b) => b,
134 |         None => Url::parse(&file_url_str[..=file_url_str.rfind('/').unwrap()]).unwrap(),
135 |     };
136 |     let relative = base_url.path().strip_prefix(upstream.path()).unwrap();
137 |     debug!("get {relative} as relative for parser in guess remote timezone");
138 | 
139 |     info!("base: {:?}", base_url);
140 |     info!("file: {:?}", file_url);
141 | 
142 |     let list = parser.get_list_with_filter(async_context, &base_url, relative)?;
143 |     let list = match list {
144 |         parser::ListResult::Redirect(_) => {
145 |             anyhow::bail!("Redirection not supported");
146 |         }
147 |         parser::ListResult::List(list) => list,
148 |     };
149 |     debug!("{:?}", list);
150 |     for item in list {
151 |         if item.url == file_url {
152 |             // access file_url with HEAD
153 |             let resp = head(
154 |                 &async_context.runtime,
155 |                 &async_context.download_client,
156 |                 file_url,
157 |             )?;
158 |             let mtime = utils::get_response_mtime(&resp)?;
159 | 
160 |             // compare how many hours are there between mtime (FixedOffset) and item.mtime (Naive)
161 |             // assuming that Naive one is UTC
162 |             let unknown_mtime = DateTime::<Utc>::from_naive_utc_and_offset(item.mtime, Utc);
163 |             let offset = unknown_mtime - mtime;
164 |             let offset_minutes = offset.num_minutes();
165 |             let hrs = (offset_minutes as f64 / 60.0).round() as i32;
166 | 
167 |             let minute_delta = (hrs as i64 * 60 - offset_minutes).abs();
168 |             if minute_delta > 20 {
169 |                 bail!("File mtime got from parser and response does not match.");
170 |             }
171 | 
172 |             // Construct timezone by hrs
173 |             let timezone = FixedOffset::east_opt(hrs * 3600).ok_or(anyhow::anyhow!(
174 |                 "Cannot convert to timezone (offset hour = {hrs})."
175 |             ))?;
176 |             info!(
177 |                 "html time: {:?}, head time: {:?}, timezone: {:?}",
178 |                 item.mtime, mtime, timezone
179 |             );
180 |             return Ok(timezone);
181 |         }
182 |     }
183 |     anyhow::bail!("File not found")
184 | }
185 | 


--------------------------------------------------------------------------------