├── pyscandir ├── tests │ ├── __init__.py │ ├── common.py │ ├── test_scandir.py │ ├── test_count.py │ └── test_walk.py ├── src │ ├── def │ │ ├── walk.rs │ │ ├── mod.rs │ │ ├── toc.rs │ │ ├── scandir.rs │ │ ├── count.rs │ │ └── direntry.rs │ ├── lib.rs │ └── count.rs ├── .python-version ├── upload.sh ├── MANIFEST.in ├── doc │ ├── images │ │ ├── linux_walk_usr.png │ │ ├── linux_scandir_usr.png │ │ ├── linux_walk_linux-5.9.png │ │ ├── windows_walk_windows.png │ │ ├── windows_walk_linux-5.9.png │ │ ├── linux_scandir_linux-5.9.png │ │ ├── windows_scandir_linux-5.9.png │ │ └── windows_scandir_windows.png │ ├── walk.md │ ├── count.md │ ├── benchmarks.md │ └── scandir.md ├── examples │ ├── fastproperties │ │ ├── fastproperties.gif │ │ ├── fastproperties.ico │ │ ├── doc │ │ │ └── Linux_workspace.jpg │ │ └── fastproperties.spec │ ├── count_cm.py │ ├── count.py │ ├── walk.py │ ├── scandir.py │ └── benchmark.py ├── LICENSE ├── Cargo.toml ├── run_pytest_for_all_wheels.sh ├── pyproject.toml ├── README.md ├── build_wheels.py └── CHANGELOG.md ├── scandir ├── doc │ ├── images │ │ ├── linux_walk_usr.png │ │ ├── linux_scandir_usr.png │ │ ├── linux_walk_linux-5.9.png │ │ ├── windows_walk_linux-5.9.png │ │ ├── windows_walk_windows.png │ │ ├── linux_scandir_linux-5.9.png │ │ ├── windows_scandir_windows.png │ │ └── windows_scandir_linux-5.9.png │ ├── count.md │ ├── walk.md │ ├── benchmarks.md │ └── scandir.md ├── src │ ├── def │ │ ├── mod.rs │ │ ├── walk.rs │ │ ├── options.rs │ │ ├── count.rs │ │ ├── toc.rs │ │ ├── direntry.rs │ │ └── scandir.rs │ ├── lib.rs │ └── common.rs ├── examples │ ├── count.rs │ ├── walk.rs │ └── scandir.rs ├── Cargo.toml ├── tests │ ├── common.rs │ ├── walk.rs │ ├── count.rs │ └── scandir.rs ├── benches │ ├── count.rs │ ├── walk.rs │ └── scandir.rs └── README.md ├── .markdownlint.json ├── .editorconfig ├── .gitignore ├── Cargo.toml ├── .cargo └── config.toml ├── .github └── dependabot.yml ├── tools ├── benchmark_results_nt_Windows_python.json ├── benchmark_results_nt_linux-5.9_python.json ├── benchmark_results_posix_usr_python.json ├── benchmark_results_posix_linux-5.9_python.json └── create_charts.py ├── LICENSE-MIT ├── README.md └── CHANGELOG.md /pyscandir/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyscandir/src/def/walk.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyscandir/.python-version: -------------------------------------------------------------------------------- 1 | 3.13.2 2 | -------------------------------------------------------------------------------- /pyscandir/upload.sh: -------------------------------------------------------------------------------- 1 | twine upload ../target/wheels/* 2 | -------------------------------------------------------------------------------- /pyscandir/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyproject.toml Cargo.toml 2 | recursive-include src * 3 | -------------------------------------------------------------------------------- /scandir/doc/images/linux_walk_usr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/linux_walk_usr.png -------------------------------------------------------------------------------- /pyscandir/doc/images/linux_walk_usr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/linux_walk_usr.png -------------------------------------------------------------------------------- /scandir/doc/images/linux_scandir_usr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/linux_scandir_usr.png -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": true, 3 | "MD024": false, 4 | "MD013": { 5 | "line_length": 100 6 | } 7 | } -------------------------------------------------------------------------------- /pyscandir/doc/images/linux_scandir_usr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/linux_scandir_usr.png -------------------------------------------------------------------------------- /pyscandir/doc/images/linux_walk_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/linux_walk_linux-5.9.png -------------------------------------------------------------------------------- /pyscandir/doc/images/windows_walk_windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/windows_walk_windows.png -------------------------------------------------------------------------------- /scandir/doc/images/linux_walk_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/linux_walk_linux-5.9.png -------------------------------------------------------------------------------- /scandir/doc/images/windows_walk_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/windows_walk_linux-5.9.png -------------------------------------------------------------------------------- /scandir/doc/images/windows_walk_windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/windows_walk_windows.png -------------------------------------------------------------------------------- /pyscandir/doc/images/windows_walk_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/windows_walk_linux-5.9.png -------------------------------------------------------------------------------- /scandir/doc/images/linux_scandir_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/linux_scandir_linux-5.9.png -------------------------------------------------------------------------------- /scandir/doc/images/windows_scandir_windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/windows_scandir_windows.png -------------------------------------------------------------------------------- /pyscandir/doc/images/linux_scandir_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/linux_scandir_linux-5.9.png -------------------------------------------------------------------------------- /pyscandir/doc/images/windows_scandir_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/windows_scandir_linux-5.9.png -------------------------------------------------------------------------------- /pyscandir/doc/images/windows_scandir_windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/doc/images/windows_scandir_windows.png -------------------------------------------------------------------------------- /scandir/doc/images/windows_scandir_linux-5.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/scandir/doc/images/windows_scandir_linux-5.9.png -------------------------------------------------------------------------------- /pyscandir/examples/fastproperties/fastproperties.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/examples/fastproperties/fastproperties.gif -------------------------------------------------------------------------------- /pyscandir/examples/fastproperties/fastproperties.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/examples/fastproperties/fastproperties.ico -------------------------------------------------------------------------------- /pyscandir/examples/fastproperties/doc/Linux_workspace.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brmmm3/scandir-rs/HEAD/pyscandir/examples/fastproperties/doc/Linux_workspace.jpg -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /target 3 | /scandir_rs/target 4 | **/*.rs.bk 5 | 6 | .vscode/ 7 | 8 | Cargo.lock 9 | 10 | *.pyc 11 | 12 | *.lock 13 | 14 | dist/ 15 | 16 | .idea/ 17 | 18 | /venv 19 | pyscandir/poetry.lock 20 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | resolver = "2" 4 | 5 | members = ["scandir", "pyscandir"] 6 | 7 | [profile.release] 8 | codegen-units = 1 9 | debug = false 10 | incremental = false 11 | lto = true 12 | opt-level = 3 13 | #panic = "abort" 14 | 15 | [profile.release.build-override] 16 | opt-level = 0 17 | -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.x86_64-unknown-linux-gnu] 2 | rustflags = ["-C", "target-cpu=x86-64-v3", "-Zshare-generics=n"] 3 | 4 | [target.x86_64-pc-windows-gnu] 5 | rustflags = ["-C", "target-cpu=x86-64-v3", "-Zshare-generics=n"] 6 | 7 | [target.x86_64-pc-windows-msvc] 8 | rustflags = ["-C", "target-cpu=x86-64-v3", "-Zshare-generics=n"] 9 | linker = "rust-lld.exe" 10 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | target-branch: main 8 | cooldown: 9 | default-days: 4 10 | - package-ecosystem: "cargo" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | target-branch: main 15 | cooldown: 16 | default-days: 4 17 | -------------------------------------------------------------------------------- /pyscandir/examples/count_cm.py: -------------------------------------------------------------------------------- 1 | # Example with context manager 2 | 3 | import time 4 | 5 | from scandir_rs import Count, ReturnType 6 | 7 | with Count("/usr", return_type=ReturnType.Ext) as instance: 8 | while instance.busy: 9 | statistics = instance.results() 10 | # Do something... 11 | time.sleep(0.1) 12 | print(instance.results()) 13 | print(instance.to_speedy()) 14 | print(instance.to_bincode()) 15 | print(instance.to_json()) 16 | -------------------------------------------------------------------------------- /pyscandir/examples/count.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | from scandir_rs import Count 6 | 7 | dirName = "C:/Windows" if os.name == "nt" else "/usr" 8 | stats = Count(dirName).collect() 9 | print(stats) 10 | print(stats.to_speedy()) 11 | print(stats.to_bincode()) 12 | print(stats.to_json()) 13 | # Output is something like: 14 | # Statistics { dirs: 76923, files: 648585, slinks: 48089, 15 | # hlinks: 0, devices: 0, pipes: 0, size: 0, usage: 0, 16 | # errors: [], duration: 1.07 } 17 | -------------------------------------------------------------------------------- /pyscandir/src/def/mod.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | 3 | pub mod count; 4 | pub use count::Statistics; 5 | pub mod direntry; 6 | pub mod walk; 7 | pub use direntry::{DirEntry, DirEntryExt}; 8 | pub mod scandir; 9 | pub mod toc; 10 | pub use toc::Toc; 11 | 12 | #[pyclass(eq, eq_int)] 13 | #[derive(Debug, Clone, PartialEq)] 14 | pub enum ReturnType { 15 | Base, 16 | Ext, 17 | } 18 | 19 | impl ReturnType { 20 | #[allow(clippy::wrong_self_convention)] 21 | pub fn from_object(&self) -> ::scandir::ReturnType { 22 | match &self { 23 | ReturnType::Base => ::scandir::ReturnType::Base, 24 | ReturnType::Ext => ::scandir::ReturnType::Ext, 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /pyscandir/examples/walk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | from scandir_rs import Walk, ReturnType 6 | 7 | dirName = "C:/Windows" if os.name == 'nt' else "/tmp" 8 | 9 | for root, dirs, files in os.walk(dirName): 10 | print(root, dirs) 11 | 12 | print("*** return_type=RETURN_TYPE_WALK:") 13 | for root, dirs, files in Walk(dirName): 14 | print("#", root) 15 | print("dirs", dirs) 16 | print("files", files) 17 | 18 | 19 | print("\n*** return_type=RETURN_TYPE_EXT:") 20 | for root, dirs, files, symlinks, other, errors in Walk(dirName, return_type=ReturnType.Ext): 21 | print("#", root) 22 | print("dirs", dirs) 23 | print("files", files) 24 | print("symlinks", symlinks) 25 | print("other", other) 26 | print("errors", errors) 27 | -------------------------------------------------------------------------------- /pyscandir/tests/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import tempfile 5 | 6 | 7 | def CreateTempFileTree(dircnt: int, depth: int, filecnt: int): 8 | print( 9 | f"Create temporary directory with {dircnt} directories with depth {depth} and {3 * filecnt} files") 10 | tempDir = tempfile.TemporaryDirectory(prefix="scandir_rs_") 11 | for dn in range(dircnt): 12 | dirName = f"{tempDir.name}/dir{dn}" 13 | for depth in range(depth): 14 | os.makedirs(dirName) 15 | for fn in range(filecnt): 16 | open(f"{dirName}/file{fn}.bin", "wb").close() 17 | open(f"{dirName}/file{fn}.txt", "wb").close() 18 | open(f"{dirName}/file{fn}.log", "wb").close() 19 | dirName = f"{dirName}/dir{depth}" 20 | return tempDir 21 | -------------------------------------------------------------------------------- /scandir/src/def/mod.rs: -------------------------------------------------------------------------------- 1 | use glob_sl::{MatchOptions, Pattern}; 2 | 3 | pub type ErrorsType = Vec<(String, String)>; // Tuple with file path and error message 4 | 5 | pub mod count; 6 | pub use count::Statistics; 7 | pub mod direntry; 8 | pub mod options; 9 | pub mod walk; 10 | pub use direntry::{DirEntry, DirEntryExt}; 11 | pub use options::Options; 12 | pub mod scandir; 13 | pub use scandir::ScandirResult; 14 | pub mod toc; 15 | pub use toc::Toc; 16 | 17 | #[derive(Debug, Clone, PartialEq)] 18 | pub struct Filter { 19 | pub dir_include: Vec, 20 | pub dir_exclude: Vec, 21 | pub file_include: Vec, 22 | pub file_exclude: Vec, 23 | pub options: Option, 24 | } 25 | 26 | #[derive(Debug, Clone, PartialEq, PartialOrd)] 27 | pub enum ReturnType { 28 | Base, 29 | Ext, 30 | } 31 | -------------------------------------------------------------------------------- /scandir/examples/count.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::io::Error; 3 | 4 | use scandir::Count; 5 | 6 | fn main() -> Result<(), Error> { 7 | let args: Vec = env::args().collect(); 8 | let default_dir = "/tmp".to_string(); 9 | let root_dir = &args.get(1).unwrap_or(&default_dir); 10 | let mut instance = Count::new(root_dir)?; 11 | instance = instance.dir_exclude(Some(vec!["dir0".to_owned(), "dir1".to_owned()])); 12 | if args.len() > 2 { 13 | instance = instance.extended(true); 14 | } 15 | let _results = instance.collect()?; 16 | println!("options {:#?}", instance.options()); 17 | println!("results {:#?}", instance.results()); 18 | println!("finished {:?}", instance.finished()); 19 | println!("has more errors {:?}", instance.has_errors()); 20 | println!("duration {:?}", instance.duration()); 21 | Ok(()) 22 | } 23 | -------------------------------------------------------------------------------- /scandir/src/def/walk.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "speedy")] 2 | use speedy::{Readable, Writable}; 3 | 4 | use crate::Toc; 5 | 6 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 7 | #[cfg_attr( 8 | any(feature = "bincode", feature = "json"), 9 | derive(Deserialize, Serialize) 10 | )] 11 | #[derive(Debug, Clone, PartialEq)] 12 | pub struct WalkEntry { 13 | pub path: String, 14 | pub toc: Toc, 15 | } 16 | 17 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 18 | #[cfg_attr( 19 | any(feature = "bincode", feature = "json"), 20 | derive(Deserialize, Serialize) 21 | )] 22 | #[derive(Debug, Clone, PartialEq)] 23 | pub struct WalkEntryExt { 24 | pub path: String, 25 | pub toc: Toc, 26 | } 27 | 28 | #[derive(Debug, Clone)] 29 | pub enum WalkResult { 30 | Toc(Toc), 31 | WalkEntry(WalkEntry), 32 | WalkEntryExt(WalkEntryExt), 33 | } 34 | -------------------------------------------------------------------------------- /pyscandir/src/lib.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | 3 | mod count; 4 | mod def; 5 | mod scandir; 6 | mod walk; 7 | 8 | /// scandir_rs is a directory iteration module like os.walk(), but with more features and higher speed. Depending on the function call 9 | /// it yields a list of paths, tuple of lists grouped by their entry type or DirEntry objects that include file type and stat information along 10 | /// with the name. Using scandir_rs is about 2-17 times faster than os.walk() (depending on the platform, file system and file tree structure) 11 | /// by parallelizing the iteration in background. 12 | #[pymodule] 13 | #[pyo3(name = "scandir_rs")] 14 | fn init(_py: Python, m: &Bound) -> PyResult<()> { 15 | m.add("__version__", env!("CARGO_PKG_VERSION"))?; 16 | m.add_class::()?; 17 | m.add_class::()?; 18 | m.add_class::()?; 19 | m.add_class::()?; 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /tools/benchmark_results_nt_Windows_python.json: -------------------------------------------------------------------------------- 1 | { 2 | "stats": { 3 | "dirs": 212836, 4 | "files": 428834, 5 | "slinks": 0, 6 | "hlinks": 37428, 7 | "devices": 0, 8 | "pipes": 0, 9 | "size": 45924271263, 10 | "usage": 47145938944, 11 | "errors": [], 12 | "duration": 14.4608376 13 | }, 14 | "Count.collect": 1.4410305777791008, 15 | "Count.collect(Ext)": 4.6700401555556, 16 | "os.walk": 36.25580859999658, 17 | "os.walk(Ext)": 89.76988140000321, 18 | "Walk.iter": 4.275886599997951, 19 | "Walk.iter(Ext)": 4.457214966668592, 20 | "Walk.collect": 5.365913033335043, 21 | "Walk.collect(Ext)": 5.679970233332521, 22 | "scantree (os.scandir)": 24.699874533330632, 23 | "Scandir.iter": 4.2446447000014205, 24 | "Scandir.iter(Ext)": 14.060320599999008, 25 | "Scandir.collect": 4.712805566664126, 26 | "Scandir.collect(Ext)": 14.565464133333686 27 | } -------------------------------------------------------------------------------- /tools/benchmark_results_nt_linux-5.9_python.json: -------------------------------------------------------------------------------- 1 | { 2 | "stats": { 3 | "dirs": 4712, 4 | "files": 69998, 5 | "slinks": 0, 6 | "hlinks": 0, 7 | "devices": 0, 8 | "pipes": 0, 9 | "size": 1155021778, 10 | "usage": 1321263104, 11 | "errors": [], 12 | "duration": 0.8444677 13 | }, 14 | "Count.collect": 0.027092222222967796, 15 | "Count.collect(Ext)": 0.2757384444444647, 16 | "os.walk": 0.7714034333330346, 17 | "os.walk(Ext)": 6.289230066666884, 18 | "Walk.iter": 0.09197329999976016, 19 | "Walk.iter(Ext)": 0.09013806666674402, 20 | "Walk.collect": 0.12844003333399692, 21 | "Walk.collect(Ext)": 0.12383549999988948, 22 | "scantree (os.scandir)": 0.6110425333317835, 23 | "Scandir.iter": 0.09354486666658583, 24 | "Scandir.iter(Ext)": 0.8598253333329922, 25 | "Scandir.collect": 0.13204666667055184, 26 | "Scandir.collect(Ext)": 0.8926219999993918 27 | } -------------------------------------------------------------------------------- /tools/benchmark_results_posix_usr_python.json: -------------------------------------------------------------------------------- 1 | { 2 | "stats": { 3 | "dirs": 45061, 4 | "files": 388526, 5 | "slinks": 34937, 6 | "hlinks": 177, 7 | "devices": 0, 8 | "pipes": 0, 9 | "size": 24872559571, 10 | "usage": 25802964992, 11 | "errors": [], 12 | "duration": 0.491917376 13 | }, 14 | "Count.collect": 0.10447501088815948, 15 | "Count.collect(Ext)": 0.16466138099915245, 16 | "os.walk": 1.3398972343323596, 17 | "os.walk(Ext)": 3.7727819449986177, 18 | "Walk.iter": 0.2711520700007289, 19 | "Walk.iter(Ext)": 0.2779418813297525, 20 | "Walk.collect": 0.44415884966535185, 21 | "Walk.collect(Ext)": 0.4392061623366317, 22 | "scantree (os.scandir)": 2.7845730506669497, 23 | "Scandir.iter": 0.42965780533268116, 24 | "Scandir.iter(Ext)": 0.5957438273326261, 25 | "Scandir.collect": 0.6680224250012543, 26 | "Scandir.collect(Ext)": 0.8741988956656618 27 | } -------------------------------------------------------------------------------- /tools/benchmark_results_posix_linux-5.9_python.json: -------------------------------------------------------------------------------- 1 | { 2 | "stats": { 3 | "dirs": 4711, 4 | "files": 69973, 5 | "slinks": 38, 6 | "hlinks": 0, 7 | "devices": 0, 8 | "pipes": 0, 9 | "size": 1155199283, 10 | "usage": 1321480192, 11 | "errors": [], 12 | "duration": 0.091063006 13 | }, 14 | "Count.collect": 0.01647606499965251, 15 | "Count.collect(Ext)": 0.026376396555052552, 16 | "os.walk": 0.1485356019984465, 17 | "os.walk(Ext)": 0.5309350373330138, 18 | "Walk.iter": 0.043524941667177096, 19 | "Walk.iter(Ext)": 0.046542457668692805, 20 | "Walk.collect": 0.06586165366752539, 21 | "Walk.collect(Ext)": 0.0650285099982284, 22 | "scantree (os.scandir)": 0.4416154136658103, 23 | "Scandir.iter": 0.06728461799987902, 24 | "Scandir.iter(Ext)": 0.10148573933111038, 25 | "Scandir.collect": 0.08415932733623777, 26 | "Scandir.collect(Ext)": 0.12246924233719862 27 | } -------------------------------------------------------------------------------- /scandir/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `scandir` is a directory iteration module like `walk`, but with more features and higher speed. Depending on the function call 2 | //! it yields a list of paths, tuple of lists grouped by their entry type or ``DirEntry`` objects that include file type and stat information along 3 | //! with the name. 4 | //! 5 | //! If you are just interested in directory statistics you can use the ``Count``. 6 | //! 7 | //! `scandir` contains following classes: 8 | //! - `Count` for determining statistics of a directory. 9 | //! - `Walk` for getting names of directory entries. 10 | //! - `Scandir` for getting detailed stats of directory entries. 11 | 12 | #![cfg_attr(windows, feature(windows_by_handle))] 13 | 14 | extern crate glob_sl; 15 | #[cfg_attr(any(feature = "bincode", feature = "json"), macro_use)] 16 | #[cfg(any(feature = "bincode", feature = "json"))] 17 | extern crate serde_derive; 18 | 19 | pub mod def; 20 | pub use def::*; 21 | pub mod common; 22 | pub mod count; 23 | pub use count::*; 24 | pub mod walk; 25 | pub use walk::*; 26 | pub mod scandir; 27 | pub use scandir::*; 28 | -------------------------------------------------------------------------------- /pyscandir/LICENSE: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024-present Martin Bammer 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /pyscandir/tests/test_scandir.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | from scandir_rs import Scandir, ReturnType 5 | 6 | from .common import CreateTempFileTree 7 | 8 | 9 | @pytest.fixture(scope="session", autouse=True) 10 | def tempDir(): 11 | tmpDir = CreateTempFileTree(10, 3, 10) 12 | yield tmpDir 13 | tmpDir.cleanup() 14 | 15 | 16 | def test_scandir_fast(tempDir): 17 | sd = Scandir(tempDir.name, return_type=ReturnType.Base) 18 | contents = {} 19 | for dirEntry in sd: 20 | assert dirEntry.atime > 0.0 21 | assert dirEntry.ctime > 0.0 22 | assert dirEntry.mtime > 0.0 23 | assert not hasattr(dirEntry, "st_mode") 24 | contents[dirEntry.path] = dirEntry 25 | assert len(contents) == 186 26 | 27 | 28 | def test_scandir_ext(tempDir): 29 | sd = Scandir(tempDir.name, return_type=ReturnType.Ext) 30 | contents = {} 31 | for dirEntry in sd: 32 | assert dirEntry.atime > 0.0 33 | assert dirEntry.ctime > 0.0 34 | assert dirEntry.mtime > 0.0 35 | assert hasattr(dirEntry, "st_mode") 36 | contents[dirEntry.path] = dirEntry 37 | assert len(contents) == 186 38 | -------------------------------------------------------------------------------- /pyscandir/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "scandir_rs" 3 | version = "2.9.5" 4 | authors = ["Martin Bammer "] 5 | description = "A fast file tree scanner written in Rust" 6 | edition = "2024" 7 | license = "MIT" 8 | repository = "https://github.com/brmmm3/scandir-rs" 9 | homepage = "https://github.com/brmmm3/scandir-rs" 10 | readme = "README.md" 11 | keywords = ["fast", "scandir", "walk"] 12 | include = [ 13 | "Cargo.toml", 14 | "pyproject.toml", 15 | "src/*", 16 | "test/*.py", 17 | "README.md", 18 | "CHANGELOG.md", 19 | "LICENSE", 20 | ] 21 | 22 | [lib] 23 | name = "scandir_rs" 24 | crate-type = ["cdylib"] 25 | 26 | [dependencies] 27 | pyo3 = { version = "0.27.1", features = [ 28 | "extension-module", 29 | "generate-import-lib", 30 | ] } 31 | speedy = { version = "0.8", optional = true } 32 | bincode = { version = "2.0", features = ["derive", "serde"], optional = true } 33 | serde = { version = "1.0", optional = true } 34 | serde_derive = { version = "1.0", optional = true } 35 | serde_json = { version = "1.0", optional = true } 36 | scandir = { version = "2.9" } 37 | #scandir = { path = "../scandir", version = "2.9.2" } 38 | 39 | [features] 40 | default = ["speedy", "bincode", "json"] 41 | speedy = ["dep:speedy"] 42 | bincode = ["dep:bincode", "dep:serde", "dep:serde_derive"] 43 | json = ["dep:serde_json", "dep:serde", "dep:serde_derive"] 44 | -------------------------------------------------------------------------------- /scandir/src/def/options.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use super::ReturnType; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct Options { 7 | pub root_path: PathBuf, 8 | pub sorted: bool, 9 | pub skip_hidden: bool, 10 | pub max_depth: usize, 11 | pub max_file_cnt: usize, 12 | pub dir_include: Option>, 13 | pub dir_exclude: Option>, 14 | pub file_include: Option>, 15 | pub file_exclude: Option>, 16 | pub case_sensitive: bool, 17 | pub follow_links: bool, 18 | pub return_type: ReturnType, 19 | } 20 | 21 | impl Options { 22 | #[allow(clippy::too_many_arguments)] 23 | pub fn new( 24 | root_path: PathBuf, 25 | sorted: bool, 26 | skip_hidden: bool, 27 | max_depth: usize, 28 | max_file_cnt: usize, 29 | dir_include: Option>, 30 | dir_exclude: Option>, 31 | file_include: Option>, 32 | file_exclude: Option>, 33 | case_sensitive: bool, 34 | follow_links: bool, 35 | return_type: Option, 36 | ) -> Self { 37 | Self { 38 | root_path, 39 | sorted, 40 | skip_hidden, 41 | max_depth, 42 | max_file_cnt, 43 | dir_include, 44 | dir_exclude, 45 | file_include, 46 | file_exclude, 47 | case_sensitive, 48 | follow_links, 49 | return_type: return_type.unwrap_or(ReturnType::Base), 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /scandir/examples/walk.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | use std::thread; 3 | use std::time::Duration; 4 | use std::{env, time::Instant}; 5 | 6 | use scandir::{ReturnType, Walk}; 7 | 8 | fn main() -> Result<(), Error> { 9 | let args: Vec = env::args().collect(); 10 | let default_dir = "/usr".to_string(); 11 | let root_dir = &args.get(1).unwrap_or(&default_dir); 12 | let mut instance = Walk::new(root_dir, Some(true))?.max_file_cnt(100); 13 | if args.contains(&"--ext".to_string()) { 14 | instance = instance.return_type(ReturnType::Ext); 15 | } 16 | println!("options {:#?}", instance.options()); 17 | instance.start()?; 18 | let now = Instant::now(); 19 | loop { 20 | if !instance.busy() { 21 | break; 22 | } 23 | thread::sleep(Duration::from_millis(10)); 24 | } 25 | let result = format!("{:#?}", instance.collect()?); 26 | println!("dt={}", now.elapsed().as_secs_f64()); 27 | let result_str = format!("{result:#?}"); 28 | println!( 29 | "result {}", 30 | &result_str[..std::cmp::min(result_str.len(), 500)] 31 | ); 32 | let results = instance.results(false); 33 | println!("result_cnt {}", results.len()); 34 | println!("result_cnt {}", instance.results_cnt(false)); 35 | println!("finished {:?}", instance.finished()); 36 | println!("has_errors {:?}", instance.has_errors()); 37 | println!("error_cnt {}", instance.has_errors()); 38 | println!("statistics {:#?}", instance.statistics()); 39 | println!("duration {:?}", instance.duration()); 40 | Ok(()) 41 | } 42 | -------------------------------------------------------------------------------- /pyscandir/examples/fastproperties/fastproperties.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['fastproperties.py'], 7 | pathex=[], 8 | binaries=[], 9 | datas=[], 10 | hiddenimports=[], 11 | hookspath=[], 12 | runtime_hooks=[], 13 | excludes=['_bz2', '_ctypes', '_hashlib', '_lzma', '_socket', '_ssl', 'pyexpat', 'numpy', 'pytz'], 14 | win_no_prefer_redirects=False, 15 | win_private_assemblies=False, 16 | cipher=block_cipher, 17 | noarchive=False) 18 | 19 | a.datas = [entry for entry in a.datas if not entry[0].startswith("lib2to3")] 20 | a.datas = [entry for entry in a.datas 21 | if "tzdata" not in entry[0] and "email" not in entry[0] and "unittest" not in entry[0] 22 | and "msgs" not in entry[0] and "encoding" not in entry[0] and "README" not in entry[0] 23 | and "tai-ku" not in entry[0]] 24 | a.datas.append(("fastproperties.ico", "fastproperties.ico", "DATA")) 25 | a.binaries = [entry for entry in a.binaries if not entry[0].startswith("libopenblas")] 26 | 27 | pyz = PYZ(a.pure, a.zipped_data, 28 | cipher=block_cipher) 29 | 30 | exe = EXE(pyz, 31 | a.scripts, 32 | a.binaries, 33 | a.zipfiles, 34 | a.datas, 35 | [], 36 | name='fastproperties', 37 | debug=False, 38 | bootloader_ignore_signals=False, 39 | strip=False, 40 | upx=True, 41 | upx_exclude=[], 42 | runtime_tmpdir=None, 43 | console=False, 44 | icon="fastproperties.ico") 45 | -------------------------------------------------------------------------------- /pyscandir/run_pytest_for_all_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | eval "$(pyenv init -)" 3 | 4 | name=`grep -Po '\bname\s*=\s*"\K.*?(?=")' Cargo.toml | head -1 | tr - _` 5 | version=`grep -Po '\bversion\s*=\s*"\K.*?(?=")' Cargo.toml | head -1` 6 | 7 | pyenv shell 3.7.15 8 | pip install --upgrade pip 9 | pip install -U pytest 10 | pip install --force-reinstall ../target/wheels/$name-$version-cp37-cp37m-linux_x86_64.whl 11 | python3.7 -m pytest 12 | 13 | pyenv shell 3.8.15 14 | pip install --upgrade pip 15 | pip install -U pytest 16 | pip install --force-reinstall ../target/wheels/$name-$version-cp38-cp38-linux_x86_64.whl 17 | python3.8 -m pytest 18 | 19 | pyenv shell 3.9.15 20 | pip install --upgrade pip 21 | pip install -U pytest 22 | pip install --force-reinstall ../target/wheels/$name-$version-cp39-cp39-linux_x86_64.whl 23 | python3.9 -m pytest 24 | 25 | pyenv shell 3.10.14 26 | pip install --upgrade pip 27 | pip install -U pytest 28 | pip install --force-reinstall ../target/wheels/$name-$version-cp310-cp310-linux_x86_64.whl 29 | python3.10 -m pytest 30 | 31 | pyenv shell 3.11.11 32 | pip install --upgrade pip 33 | pip install -U pytest 34 | pip install --force-reinstall ../target/wheels/$name-$version-cp311-cp311-linux_x86_64.whl 35 | python3.11 -m pytest 36 | 37 | pyenv shell 3.12.8 38 | pip install --upgrade pip 39 | pip install -U pytest 40 | pip install --force-reinstall ../target/wheels/$name-$version-cp312-cp312-linux_x86_64.whl 41 | python3.12 -m pytest 42 | 43 | pyenv shell 3.13.2 44 | pip install --upgrade pip 45 | pip install -U pytest 46 | pip install --force-reinstall ../target/wheels/$name-$version-cp313-cp313-linux_x86_64.whl 47 | python3.13 -m pytest 48 | 49 | pyenv shell --unset 50 | -------------------------------------------------------------------------------- /pyscandir/examples/scandir.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import threading 5 | 6 | from scandir_rs import Scandir, ReturnType 7 | 8 | 9 | def Counter(sd): 10 | print("Counter started...") 11 | x = 0 12 | while not sd.finished: 13 | x += 1 14 | print(f"X={x}") 15 | 16 | 17 | dirName = "C:/Windows/appcompat" if os.name == "nt" else "/tmp" 18 | 19 | # ReturnType.Base is the default 20 | print("\n*** return_type=ReturnType.Base:") 21 | for dirEntry in Scandir(dirName, return_type=ReturnType.Base): 22 | print(dirEntry) 23 | 24 | # Or collect all results and return them in one step 25 | results = Scandir(dirName).collect() 26 | 27 | # And with extended metadata 28 | instance = Scandir(dirName) 29 | # Use method `extended` as an alternative to the above usage. 30 | instance.extended(True) 31 | results, errors = instance.collect() 32 | print("\n*** Scandir results_cnt=", len(results)) 33 | for dirEntry in results: 34 | print(dirEntry.path, dirEntry.ctime) 35 | 36 | print("\n*** return_type=ReturnType.Ext:") 37 | for dirEntry in Scandir(dirName, return_type=ReturnType.Ext): 38 | print(dirEntry) 39 | 40 | 41 | print("*** Parallel Threads ***") 42 | sd = Scandir(".", return_type=ReturnType.Ext) 43 | thr = threading.Thread(target=Counter, args=(sd,), daemon=True) 44 | thr.start() 45 | sd.start() 46 | thr.join() 47 | results = sd.results() 48 | print("Finished", sd.busy, sd.finished, sd.has_errors(), len(results)) 49 | print(str(results)[:200]) 50 | print(sd.statistics) 51 | # Need to be compiled with feature "speedy" 52 | # print(sd.to_speedy()) 53 | # Need to be compiled with feature "bincode" 54 | # print(sd.to_bincode()) 55 | # Need to be compiled with feature "json" 56 | # print(sd.to_json()) 57 | -------------------------------------------------------------------------------- /scandir/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "scandir" 3 | version = "2.9.5" 4 | authors = ["Martin Bammer "] 5 | description = "A fast file tree scanner written in Rust" 6 | edition = "2024" 7 | license = "MIT" 8 | repository = "https://github.com/brmmm3/scandir-rs" 9 | homepage = "https://github.com/brmmm3/scandir-rs" 10 | readme = "README.md" 11 | keywords = ["fast", "scandir", "walk"] 12 | include = ["Cargo.toml", "src/*"] 13 | 14 | [lib] 15 | name = "scandir" 16 | crate-type = ["lib"] 17 | 18 | [dependencies] 19 | jwalk-meta = "0.9" 20 | rayon = "1.11" 21 | flume = "0.11" 22 | glob-sl = "0.4" 23 | speedy = { version = "0.8", optional = true } 24 | bincode = { version = "2.0", features = ["serde", "derive"], optional = true } 25 | serde = { version = "1.0", optional = true } 26 | serde_derive = { version = "1.0", optional = true } 27 | serde_json = { version = "1.0", optional = true } 28 | 29 | [dev-dependencies] 30 | tempfile = "3.23" 31 | criterion = { version = "0.7", features = ["html_reports"] } 32 | reqwest = { version = "0.12", features = ["blocking"] } 33 | flate2 = "1.1" 34 | tar = "0.4" 35 | walkdir = "2.5" 36 | scan_dir = "0.3" 37 | 38 | [features] 39 | default = ["speedy", "bincode", "json"] 40 | speedy = ["dep:speedy"] 41 | bincode = ["dep:bincode", "dep:serde", "dep:serde_derive"] 42 | json = ["dep:serde_json", "dep:serde", "dep:serde_derive"] 43 | 44 | [target.'cfg(unix)'.dependencies] 45 | expanduser = "1.2" 46 | 47 | [target.'cfg(unix)'.dev-dependencies] 48 | unix-named-pipe = "0.2" 49 | 50 | [[bench]] 51 | name = "count" 52 | path = "benches/count.rs" 53 | harness = false 54 | 55 | [[bench]] 56 | name = "walk" 57 | path = "benches/walk.rs" 58 | harness = false 59 | 60 | [[bench]] 61 | name = "scandir" 62 | path = "benches/scandir.rs" 63 | harness = false 64 | -------------------------------------------------------------------------------- /scandir/examples/scandir.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | use std::time::Duration; 3 | use std::{env, time::Instant}; 4 | 5 | use scandir::{ReturnType, Scandir}; 6 | 7 | fn main() -> Result<(), Error> { 8 | let args: Vec = env::args().collect(); 9 | let default_dir = "/usr".to_string(); 10 | let root_dir = &args.get(1).unwrap_or(&default_dir); 11 | let mut instance = Scandir::new(root_dir, Some(true))?; 12 | //instance = instance.max_file_cnt(100); 13 | if args.contains(&"--ext".to_string()) { 14 | instance = instance.return_type(ReturnType::Ext); 15 | } 16 | println!("options {:#?}", instance.options()); 17 | instance.start()?; 18 | let now = Instant::now(); 19 | std::thread::sleep(Duration::from_millis(100)); 20 | //instance.stop(); 21 | let entries = instance.collect()?; 22 | println!("dt={}", now.elapsed().as_secs_f64()); 23 | for (path, error) in entries.errors.iter() { 24 | println!("ERROR {path:?}: {error}"); 25 | } 26 | let first_result = entries.results.first().unwrap(); 27 | println!( 28 | "First file {:?} has size {}", 29 | first_result.path(), 30 | first_result.size() 31 | ); 32 | let result = format!("{:#?}", instance.results(false)); 33 | let result_str = format!("{result:#?}"); 34 | println!( 35 | "result {}", 36 | &result_str[..std::cmp::min(result_str.len(), 500)] 37 | ); 38 | println!("finished {:?}", instance.finished()); 39 | println!("has more entries {:?}", instance.has_entries(true)); 40 | println!("has_errors {:?}", instance.has_errors()); 41 | println!("results {}", entries.results.len()); 42 | println!("error_cnt {}", entries.errors.len()); 43 | println!("duration {:?}", instance.duration()); 44 | Ok(()) 45 | } 46 | -------------------------------------------------------------------------------- /scandir/src/def/count.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "bincode")] 2 | use bincode::error::EncodeError; 3 | #[cfg(feature = "speedy")] 4 | use speedy::{Readable, Writable}; 5 | 6 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 7 | #[cfg_attr( 8 | any(feature = "bincode", feature = "json"), 9 | derive(Deserialize, Serialize) 10 | )] 11 | #[derive(Debug, Clone, PartialEq)] 12 | pub struct Statistics { 13 | pub dirs: i32, 14 | pub files: i32, 15 | pub slinks: i32, 16 | pub hlinks: i32, 17 | pub devices: i32, 18 | pub pipes: i32, 19 | pub size: u64, 20 | pub usage: u64, 21 | pub errors: Vec, 22 | pub duration: f64, 23 | } 24 | 25 | impl Statistics { 26 | pub fn new() -> Self { 27 | Statistics { 28 | dirs: 0, 29 | files: 0, 30 | slinks: 0, 31 | hlinks: 0, 32 | devices: 0, 33 | pipes: 0, 34 | size: 0, 35 | usage: 0, 36 | errors: Vec::new(), 37 | duration: 0.0, 38 | } 39 | } 40 | 41 | pub fn clear(&mut self) { 42 | self.dirs = 0; 43 | self.files = 0; 44 | self.slinks = 0; 45 | self.hlinks = 0; 46 | self.devices = 0; 47 | self.pipes = 0; 48 | self.size = 0; 49 | self.usage = 0; 50 | self.errors.clear(); 51 | self.duration = 0.0; 52 | } 53 | 54 | #[cfg(feature = "json")] 55 | pub fn to_json(&self) -> serde_json::Result { 56 | serde_json::to_string(self) 57 | } 58 | 59 | #[cfg(feature = "bincode")] 60 | pub fn to_vec(&self) -> Result, EncodeError> { 61 | bincode::serde::encode_to_vec(self, bincode::config::legacy()) 62 | } 63 | } 64 | 65 | impl Default for Statistics { 66 | fn default() -> Self { 67 | Self::new() 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /pyscandir/tests/test_count.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | from scandir_rs import Count, ReturnType 5 | 6 | from .common import CreateTempFileTree 7 | 8 | 9 | @pytest.fixture(scope="session", autouse=True) 10 | def tempDir(): 11 | tmpDir = CreateTempFileTree(10, 3, 10) 12 | yield tmpDir 13 | tmpDir.cleanup() 14 | 15 | 16 | def test_count(tempDir): 17 | count = Count(tempDir.name) 18 | count.start() 19 | count.join() 20 | assert count.as_dict() == {'dirs': 6, 'files': 180, 21 | 'size': 24576, 'usage': 24576} 22 | 23 | 24 | def test_count_extended(tempDir): 25 | count = Count(tempDir.name, return_type=ReturnType.Ext).collect() 26 | assert count.as_dict() == {'dirs': 6, 'files': 180, 27 | 'size': 24576, 'usage': 24576} 28 | 29 | 30 | def test_count_extended_file_exclude(tempDir): 31 | count = Count(tempDir.name, return_type=ReturnType.Ext, 32 | file_exclude=["*.bin"]).collect() 33 | assert count.as_dict() == {'dirs': 6, 'files': 120, 34 | 'size': 24576, 'usage': 24576} 35 | 36 | 37 | def test_count_extended_file_include(tempDir): 38 | count = Count(tempDir.name, return_type=ReturnType.Ext, 39 | file_include=["*.bin"]).collect() 40 | assert count.as_dict() == {'dirs': 6, 'files': 60, 41 | 'size': 24576, 'usage': 24576} 42 | 43 | 44 | def test_count_extended_dir_include(tempDir): 45 | count = Count(tempDir.name, return_type=ReturnType.Ext, 46 | dir_include=["dir0/**"]).collect() 47 | assert count.as_dict() == {'dirs': 3, 'files': 90, 48 | 'size': 12288, 'usage': 12288} 49 | 50 | 51 | def test_count_extended_dir_exclude(tempDir): 52 | count = Count(tempDir.name, return_type=ReturnType.Ext, 53 | dir_exclude=["dir0", "dir1"]).collect() 54 | assert count.as_dict() == {'dirs': 1, 'files': 30, 55 | 'size': 4096, 'usage': 4096} 56 | -------------------------------------------------------------------------------- /pyscandir/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "scandir-rs" 3 | version = "2.9.5" 4 | authors = [{ name = "Martin Bammer", email = "mrbm74@gmail.com" }] 5 | description = "A fast file tree scanner written in Rust" 6 | license = "MIT" 7 | readme = "README.md" 8 | dependencies = [] 9 | requires-python = ">=3.10" 10 | classifiers = [ 11 | "Development Status :: 4 - Beta", 12 | "Intended Audience :: Developers", 13 | "Programming Language :: Rust", 14 | "Programming Language :: Python", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.10", 17 | "Programming Language :: Python :: 3.11", 18 | "Programming Language :: Python :: 3.12", 19 | "Programming Language :: Python :: 3.13", 20 | "Programming Language :: Python :: 3.14", 21 | "Programming Language :: Python :: Implementation :: CPython", 22 | "Operating System :: POSIX :: Linux", 23 | "Operating System :: MacOS", 24 | "Operating System :: Microsoft :: Windows", 25 | "License :: OSI Approved :: MIT License", 26 | 'Topic :: Software Development :: Libraries', 27 | 'Topic :: Software Development :: Libraries :: Python Modules', 28 | 'Topic :: Utilities', 29 | ] 30 | 31 | [build-system] 32 | requires = ["setuptools", "wheel", "setuptools_rust", "maturin>=1.5.1"] 33 | build-backend = "maturin" 34 | 35 | [tool.maturin] 36 | manylinux = "2_34" 37 | strip = true 38 | 39 | [tool.poetry.dependencies] 40 | python = '>=3.10' 41 | maturin = ">=1.10.1" 42 | 43 | [tool.poetry.group.dev.dependencies] 44 | pytest = ">=9.0.1" 45 | 46 | [dependency-groups] 47 | dev = [ 48 | "pytest == 9.0.1", 49 | "dirty-equals == 0.11", 50 | "ruff == 0.14.4", 51 | "zizmor == 1.16.3", 52 | "typos == 1.39.0", 53 | "rumdl == 0.0.176", 54 | ] 55 | bench = [ 56 | "oyaml", 57 | "pyyaml", 58 | "ryaml", 59 | "ruamel-yaml", 60 | "polars == 1.35.2", 61 | "altair == 6.0.0", 62 | "py-cpuinfo == 9.0.0", 63 | "vl-convert-python == 1.8.0", 64 | ] 65 | 66 | [project.urls] 67 | Source = "https://github.com/brmmm3/scandir-rs" 68 | Homepage = "https://github.com/brmmm3/scandir-rs" 69 | "Bug Tracker" = "https://github.com/brmmm3/scandir-rs/issues" 70 | -------------------------------------------------------------------------------- /pyscandir/tests/test_walk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import time 4 | 5 | import pytest 6 | from scandir_rs import Walk, ReturnType 7 | 8 | from .common import CreateTempFileTree 9 | 10 | 11 | @pytest.fixture(scope="session", autouse=True) 12 | def tempDir(): 13 | tmpDir = CreateTempFileTree(10, 3, 10) 14 | yield tmpDir 15 | tmpDir.cleanup() 16 | 17 | 18 | def test_walk_toc(tempDir): 19 | sd = Walk(tempDir.name) 20 | toc = sd.collect() 21 | assert not toc.errors 22 | assert not toc.other 23 | assert not toc.symlinks 24 | import os 25 | 26 | print(f"tempDir.name={tempDir.name}") 27 | print(f"toc.dirs={toc.dirs}") 28 | print(f"toc.files={toc.files}") 29 | print(f"entries={os.listdir(tempDir.name)}") 30 | assert len(toc.dirs) == 6 31 | assert len(toc.files) == 180 32 | 33 | 34 | def test_walk_toc_extended(tempDir): 35 | sd = Walk(tempDir.name, return_type=ReturnType.Ext) 36 | toc = sd.collect() 37 | assert not toc.errors 38 | assert not toc.other 39 | assert not toc.symlinks 40 | import os 41 | 42 | print(f"tempDir.name={tempDir.name}") 43 | print(f"toc.dirs={toc.dirs}") 44 | print(f"toc.files={toc.files}") 45 | print(f"entries={os.listdir(tempDir.name)}") 46 | assert len(toc.dirs) == 6 47 | assert len(toc.files) == 180 48 | 49 | 50 | def test_walk_toc_iter(tempDir): 51 | sd = Walk(tempDir.name, return_type=ReturnType.Base) 52 | sd.start() 53 | while sd.busy: 54 | time.sleep(0.01) 55 | toc = sd.collect() 56 | assert not toc.errors 57 | assert not toc.other 58 | assert not toc.symlinks 59 | assert len(toc.dirs) == 6 60 | assert len(toc.files) == 180 61 | 62 | 63 | def test_walk_walk(tempDir): 64 | sd = Walk(tempDir.name, return_type=ReturnType.Base) 65 | allDirs = [] 66 | allFiles = [] 67 | for root, dirs, files in sd: 68 | allDirs.extend(dirs) 69 | allFiles.extend(files) 70 | assert len(allDirs) == 6 71 | assert len(allFiles) == 180 72 | 73 | 74 | def test_walk_walk_ext(tempDir): 75 | sd = Walk(tempDir.name, return_type=ReturnType.Ext) 76 | allDirs = [] 77 | allFiles = [] 78 | allSymlinks = [] 79 | allOther = [] 80 | allErrors = [] 81 | for root, dirs, files, symlinks, other, errors in sd: 82 | allDirs.extend(dirs) 83 | allFiles.extend(files) 84 | allSymlinks.extend(symlinks) 85 | allOther.extend(other) 86 | allErrors.extend(errors) 87 | assert not allErrors 88 | assert not allOther 89 | assert not allSymlinks 90 | assert len(allDirs) == 6 91 | assert len(allFiles) == 180 92 | -------------------------------------------------------------------------------- /pyscandir/src/def/toc.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(feature = "speedy", feature = "bincode", feature = "json"))] 2 | use pyo3::exceptions::PyException; 3 | use pyo3::prelude::*; 4 | #[cfg(any(feature = "speedy", feature = "bincode"))] 5 | use pyo3::types::PyBytes; 6 | use pyo3::types::PyDict; 7 | 8 | #[pyclass] 9 | #[derive(Debug, Clone)] 10 | pub struct Toc(scandir::Toc); 11 | 12 | impl Toc { 13 | pub fn from(entry: &scandir::Toc) -> Self { 14 | Toc(entry.clone()) 15 | } 16 | } 17 | 18 | #[pymethods] 19 | impl Toc { 20 | #[getter] 21 | fn dirs(&self) -> Vec { 22 | self.0.dirs() 23 | } 24 | 25 | #[getter] 26 | fn files(&self) -> Vec { 27 | self.0.files() 28 | } 29 | 30 | #[getter] 31 | fn symlinks(&self) -> Vec { 32 | self.0.symlinks() 33 | } 34 | 35 | #[getter] 36 | fn other(&self) -> Vec { 37 | self.0.other() 38 | } 39 | 40 | #[getter] 41 | fn errors(&self) -> Vec { 42 | self.0.errors() 43 | } 44 | 45 | fn as_dict(&self, py: Python) -> PyResult> { 46 | let pydict = PyDict::new(py); 47 | pydict.set_item("dirs", self.0.dirs.clone())?; 48 | pydict.set_item("files", self.0.files.clone())?; 49 | pydict.set_item("symlinks", self.0.symlinks.clone())?; 50 | pydict.set_item("other", self.0.other.clone())?; 51 | pydict.set_item("errors", self.0.errors.clone())?; 52 | Ok(pydict.into_any().unbind()) 53 | } 54 | 55 | #[cfg(feature = "speedy")] 56 | fn to_speedy(&self, py: Python) -> PyResult> { 57 | match self.0.to_speedy() { 58 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 59 | b.copy_from_slice(&v); 60 | Ok(()) 61 | })? 62 | .into()), 63 | Err(e) => Err(PyException::new_err(e.to_string())), 64 | } 65 | } 66 | 67 | #[cfg(feature = "bincode")] 68 | fn to_bincode(&self, py: Python) -> PyResult> { 69 | match self.0.to_bincode() { 70 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 71 | b.copy_from_slice(&v); 72 | Ok(()) 73 | })? 74 | .into()), 75 | Err(e) => Err(PyException::new_err(e.to_string())), 76 | } 77 | } 78 | 79 | #[cfg(feature = "json")] 80 | fn to_json(&self) -> PyResult { 81 | self.0 82 | .to_json() 83 | .map_err(|e| PyException::new_err(e.to_string())) 84 | } 85 | 86 | fn __repr__(&self) -> String { 87 | format!("{self:?}") 88 | } 89 | 90 | fn __str__(&self) -> String { 91 | format!("{self:?}") 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /pyscandir/src/def/scandir.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(feature = "speedy", feature = "bincode", feature = "json"))] 2 | use pyo3::exceptions::PyException; 3 | use pyo3::prelude::*; 4 | #[cfg(any(feature = "speedy", feature = "bincode"))] 5 | use pyo3::types::PyBytes; 6 | 7 | use super::DirEntryExt; 8 | 9 | #[pyclass] 10 | #[derive(Debug, Clone)] 11 | pub struct ScandirResult(scandir::ScandirResult); 12 | 13 | #[pymethods] 14 | impl ScandirResult { 15 | #[getter] 16 | fn path(&self) -> String { 17 | self.0.path().clone() 18 | } 19 | 20 | #[getter] 21 | fn error(&self) -> Option<(String, String)> { 22 | self.0.error().cloned() 23 | } 24 | 25 | #[getter] 26 | fn is_dir(&self) -> bool { 27 | self.0.is_dir() 28 | } 29 | 30 | #[getter] 31 | fn is_file(&self) -> bool { 32 | self.0.is_file() 33 | } 34 | 35 | #[getter] 36 | fn is_symlink(&self) -> bool { 37 | self.0.is_symlink() 38 | } 39 | 40 | #[getter] 41 | fn ctime(&self) -> f64 { 42 | self.0.ctime() 43 | } 44 | 45 | #[getter] 46 | fn mtime(&self) -> f64 { 47 | self.0.mtime() 48 | } 49 | 50 | #[getter] 51 | fn atime(&self) -> f64 { 52 | self.0.atime() 53 | } 54 | 55 | #[getter] 56 | fn size(&self) -> u64 { 57 | self.0.size() 58 | } 59 | 60 | #[getter] 61 | fn ext(&self) -> Option { 62 | match &self.0 { 63 | scandir::ScandirResult::DirEntryExt(e) => Some(DirEntryExt::from(e)), 64 | _ => None, 65 | } 66 | } 67 | 68 | #[cfg(feature = "speedy")] 69 | fn to_speedy(&self, py: Python) -> PyResult> { 70 | match self.0.to_speedy() { 71 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 72 | b.copy_from_slice(&v); 73 | Ok(()) 74 | })? 75 | .into()), 76 | Err(e) => Err(PyException::new_err(e.to_string())), 77 | } 78 | } 79 | 80 | #[cfg(feature = "bincode")] 81 | fn to_bincode(&self, py: Python) -> PyResult> { 82 | match self.0.to_bincode() { 83 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 84 | b.copy_from_slice(&v); 85 | Ok(()) 86 | })? 87 | .into()), 88 | Err(e) => Err(PyException::new_err(e.to_string())), 89 | } 90 | } 91 | 92 | #[cfg(feature = "json")] 93 | fn to_json(&self) -> PyResult { 94 | self.0 95 | .to_json() 96 | .map_err(|e| PyException::new_err(e.to_string())) 97 | } 98 | 99 | fn __repr__(&self) -> String { 100 | format!("{self:?}") 101 | } 102 | 103 | fn __str__(&self) -> String { 104 | format!("{self:?}") 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /scandir/tests/common.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(windows, feature(junction_point))] 2 | 3 | use std::cmp::min; 4 | use std::fs::{File, create_dir_all, hard_link}; 5 | use std::io::{Error, Write}; 6 | #[cfg(unix)] 7 | use std::os::unix::fs::symlink; 8 | #[cfg(windows)] 9 | use std::os::windows::fs::junction_point; 10 | 11 | use tempfile::TempDir; 12 | 13 | pub fn setup() -> TempDir { 14 | let base_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); 15 | let _ = std::env::set_current_dir(&base_dir); 16 | TempDir::new().unwrap() 17 | } 18 | 19 | #[allow(dead_code)] 20 | pub fn cleanup(temp_dir: TempDir) -> Result<(), Error> { 21 | temp_dir.close() 22 | } 23 | 24 | fn get_filename(i: u32) -> String { 25 | if i < 3 { 26 | format!(".file{i}") 27 | } else { 28 | format!("file{i}") 29 | } 30 | } 31 | 32 | pub fn create_temp_file_tree( 33 | depth: u32, 34 | dircnt: u32, 35 | filecnt: u32, 36 | hlinkcnt: u32, 37 | #[cfg(windows)] jcnt: u32, // Number of junctions to create 38 | #[cfg(unix)] slinkcnt: u32, 39 | #[cfg(unix)] pipecnt: u32, 40 | ) -> Result { 41 | let temp_dir = setup(); 42 | #[cfg(windows)] 43 | let junc_dir = temp_dir.path().join("junc_dir"); 44 | #[cfg(windows)] 45 | { 46 | if jcnt > 0 { 47 | create_dir_all(&junc_dir)?; 48 | for i in 1..=filecnt { 49 | let mut file = File::create(junc_dir.join("junc_".to_string() + &get_filename(i)))?; 50 | file.write_all(format!("HELLO{i}").as_bytes())?; 51 | } 52 | } 53 | } 54 | for i in 1..=dircnt { 55 | let mut dir = temp_dir.path().join(format!("dir{i}")); 56 | for d in 1..=depth { 57 | dir = dir.join(format!("dir{i}_{d}")); 58 | create_dir_all(&dir)?; 59 | for i in 1..=filecnt { 60 | let mut file = File::create(dir.join(get_filename(i)))?; 61 | file.write_all(format!("HELLO{i}").as_bytes())?; 62 | } 63 | for i in 1..=hlinkcnt { 64 | let filenum = min(i, filecnt); 65 | hard_link( 66 | dir.join(get_filename(filenum)), 67 | dir.join(format!("hardlink{i}")), 68 | )?; 69 | } 70 | #[cfg(windows)] 71 | for i in 1..=jcnt { 72 | junction_point(&junc_dir, &dir.join(format!("junction{i}")))?; 73 | } 74 | #[cfg(unix)] 75 | for i in 1..=slinkcnt { 76 | let filenum = min(i, filecnt); 77 | symlink( 78 | dir.join(get_filename(filenum)), 79 | dir.join(format!("symlink{i}")), 80 | )?; 81 | } 82 | #[cfg(unix)] 83 | for i in 1..=pipecnt { 84 | unix_named_pipe::create(dir.join(format!("pipe{i}")), None)?; 85 | } 86 | } 87 | } 88 | Ok(temp_dir) 89 | } 90 | -------------------------------------------------------------------------------- /scandir/benches/count.rs: -------------------------------------------------------------------------------- 1 | use std::{path::Path, time::Duration}; 2 | 3 | #[cfg(windows)] 4 | use std::path::PathBuf; 5 | 6 | use criterion::{Criterion, criterion_group, criterion_main}; 7 | 8 | fn create_test_data() -> String { 9 | let temp_dir; 10 | let linux_dir; 11 | let kernel_path; 12 | #[cfg(unix)] 13 | { 14 | temp_dir = expanduser::expanduser("~/Rust/_Data/benches").unwrap(); 15 | linux_dir = expanduser::expanduser("~/Rust/_Data/benches/linux-5.9").unwrap(); 16 | kernel_path = expanduser::expanduser("~/Rust/_Data/benches/linux-5.9.tar.gz").unwrap(); 17 | } 18 | #[cfg(windows)] 19 | { 20 | temp_dir = PathBuf::from("C:/Workspace/benches"); 21 | linux_dir = PathBuf::from("C:/Workspace/benches/linux-5.9"); 22 | kernel_path = PathBuf::from("C:/Workspace/benches/linux-5.9.tar.gz"); 23 | } 24 | if !temp_dir.exists() { 25 | std::fs::create_dir_all(&temp_dir).unwrap(); 26 | } 27 | if !kernel_path.exists() { 28 | // Download kernel 29 | println!("Downloading linux-5.9.tar.gz..."); 30 | let resp = 31 | reqwest::blocking::get("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.9.tar.gz") 32 | .expect("request failed"); 33 | let body = resp.text().expect("body invalid"); 34 | let mut out = std::fs::File::create(&kernel_path).expect("failed to create file"); 35 | std::io::copy(&mut body.as_bytes(), &mut out).expect("failed to copy content"); 36 | } 37 | if !linux_dir.exists() { 38 | println!("Extracting linux-5.9.tar.gz..."); 39 | let tar_gz = std::fs::File::open(&kernel_path).unwrap(); 40 | let tar = flate2::read::GzDecoder::new(tar_gz); 41 | let mut archive = tar::Archive::new(tar); 42 | archive.unpack(&linux_dir).unwrap(); 43 | } 44 | linux_dir.to_str().unwrap().to_string() 45 | } 46 | 47 | fn benchmark_dir(c: &mut Criterion, path: &str) { 48 | println!("Running benchmarks for {path}..."); 49 | let dir = Path::new(path).file_name().unwrap().to_str().unwrap(); 50 | let mut group = c.benchmark_group(format!("Count {dir}")); 51 | group.measurement_time(Duration::from_secs(10)); 52 | group.sample_size(20); 53 | group.bench_function("scandir.Count (collect)", |b| { 54 | b.iter(|| { 55 | let mut instance = scandir::Count::new(path) 56 | .unwrap_or_else(|_| panic!("Failed to create Count instance for {path}")); 57 | instance.collect().unwrap(); 58 | }) 59 | }); 60 | group.bench_function("scandir.Count(Ext) (collect)", |b| { 61 | b.iter(|| { 62 | let mut instance = scandir::Count::new(path) 63 | .unwrap_or_else(|_| panic!("Failed to create Count instance for {path}")) 64 | .extended(true); 65 | instance.collect().unwrap(); 66 | }) 67 | }); 68 | group.finish(); 69 | } 70 | 71 | fn benchmarks(c: &mut Criterion) { 72 | benchmark_dir(c, &create_test_data()); 73 | #[cfg(unix)] 74 | let path = "/usr"; 75 | #[cfg(windows)] 76 | let path = "C:/Windows"; 77 | benchmark_dir(c, path); 78 | } 79 | 80 | criterion_group!(benches, benchmarks); 81 | criterion_main!(benches); 82 | -------------------------------------------------------------------------------- /scandir/tests/walk.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(windows, feature(junction_point))] 2 | 3 | use std::io::Error; 4 | 5 | use scandir::Walk; 6 | 7 | mod common; 8 | 9 | #[test] 10 | fn test_walk() -> Result<(), Error> { 11 | #[cfg(unix)] 12 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 13 | #[cfg(windows)] 14 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 15 | let toc = Walk::new(temp_dir.path(), Some(true))?.collect()?; 16 | #[cfg(windows)] 17 | { 18 | assert_eq!(67, toc.files.len()); 19 | assert_eq!(13, toc.dirs.len()); 20 | assert_eq!(27, toc.symlinks.len()); 21 | assert_eq!(0, toc.other.len()); 22 | } 23 | #[cfg(unix)] 24 | { 25 | assert_eq!(63, toc.files.len()); 26 | assert_eq!(12, toc.dirs.len()); 27 | assert_eq!(54, toc.symlinks.len()); 28 | assert_eq!(63, toc.other.len()); 29 | } 30 | assert_eq!(0, toc.errors.len()); 31 | common::cleanup(temp_dir) 32 | } 33 | 34 | #[test] 35 | fn test_walk_skip_hidden() -> Result<(), Error> { 36 | #[cfg(unix)] 37 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 38 | #[cfg(windows)] 39 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 40 | let toc = Walk::new(temp_dir.path(), Some(true))? 41 | .skip_hidden(true) 42 | .collect()?; 43 | #[cfg(windows)] 44 | { 45 | assert_eq!(67, toc.files.len()); 46 | assert_eq!(13, toc.dirs.len()); 47 | assert_eq!(27, toc.symlinks.len()); 48 | assert_eq!(0, toc.other.len()); 49 | } 50 | #[cfg(unix)] 51 | { 52 | assert_eq!(63, toc.files.len()); 53 | assert_eq!(12, toc.dirs.len()); 54 | assert_eq!(54, toc.symlinks.len()); 55 | assert_eq!(63, toc.other.len()); 56 | } 57 | assert_eq!(0, toc.errors.len()); 58 | common::cleanup(temp_dir) 59 | } 60 | 61 | #[test] 62 | fn test_walk_extended() -> Result<(), Error> { 63 | #[cfg(unix)] 64 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 65 | #[cfg(windows)] 66 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 67 | let toc = Walk::new(temp_dir.path(), Some(true))? 68 | .extended(true) 69 | .collect()?; 70 | #[cfg(windows)] 71 | { 72 | assert_eq!(67, toc.files.len()); 73 | assert_eq!(13, toc.dirs.len()); 74 | assert_eq!(27, toc.symlinks.len()); 75 | assert_eq!(0, toc.other.len()); 76 | } 77 | #[cfg(unix)] 78 | { 79 | assert_eq!(63, toc.files.len()); 80 | assert_eq!(12, toc.dirs.len()); 81 | assert_eq!(54, toc.symlinks.len()); 82 | assert_eq!(63, toc.other.len()); 83 | } 84 | assert_eq!(0, toc.errors.len()); 85 | common::cleanup(temp_dir) 86 | } 87 | 88 | #[test] 89 | fn test_walk_follow_links() -> Result<(), Error> { 90 | #[cfg(unix)] 91 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 92 | #[cfg(windows)] 93 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 94 | let toc = Walk::new(temp_dir.path(), Some(true))? 95 | .follow_links(true) 96 | .collect()?; 97 | #[cfg(windows)] 98 | { 99 | assert_eq!(175, toc.files.len()); 100 | assert_eq!(40, toc.dirs.len()); 101 | assert_eq!(0, toc.symlinks.len()); 102 | assert_eq!(0, toc.other.len()); 103 | } 104 | #[cfg(unix)] 105 | { 106 | assert_eq!(117, toc.files.len()); 107 | assert_eq!(12, toc.dirs.len()); 108 | assert_eq!(0, toc.symlinks.len()); 109 | assert_eq!(63, toc.other.len()); 110 | } 111 | assert_eq!(0, toc.errors.len()); 112 | common::cleanup(temp_dir) 113 | } 114 | -------------------------------------------------------------------------------- /scandir/tests/count.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(windows, feature(junction_point))] 2 | 3 | use std::io::Error; 4 | 5 | use scandir::Count; 6 | 7 | mod common; 8 | 9 | #[test] 10 | fn test_count() -> Result<(), Error> { 11 | #[cfg(unix)] 12 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 13 | #[cfg(windows)] 14 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 15 | let count = Count::new(temp_dir.path())?.collect()?; 16 | assert!(count.errors.is_empty()); 17 | //assert!(count.duration > 0.0); --> Fails on MAC 18 | assert_eq!(0, count.devices); 19 | #[cfg(windows)] 20 | { 21 | assert_eq!(85, count.files); 22 | assert_eq!(13, count.dirs); 23 | assert_eq!(27, count.slinks); 24 | } 25 | #[cfg(unix)] 26 | { 27 | assert_eq!(81, count.files); 28 | assert_eq!(12, count.dirs); 29 | assert_eq!(54, count.slinks); 30 | assert_eq!(0, count.pipes); 31 | } 32 | assert_eq!(0, count.hlinks); 33 | common::cleanup(temp_dir) 34 | } 35 | 36 | #[test] 37 | fn test_count_skip_hidden() -> Result<(), Error> { 38 | #[cfg(unix)] 39 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 40 | #[cfg(windows)] 41 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 42 | let count = Count::new(temp_dir.path())?.skip_hidden(true).collect()?; 43 | assert!(count.errors.is_empty()); 44 | //assert!(count.duration > 0.0); --> Fails on MAC 45 | assert_eq!(0, count.devices); 46 | #[cfg(windows)] 47 | { 48 | assert_eq!(67, count.files); 49 | assert_eq!(13, count.dirs); 50 | assert_eq!(27, count.slinks); 51 | } 52 | #[cfg(unix)] 53 | { 54 | assert_eq!(63, count.files); 55 | assert_eq!(12, count.dirs); 56 | assert_eq!(54, count.slinks); 57 | assert_eq!(0, count.pipes); 58 | } 59 | assert_eq!(0, count.hlinks); 60 | common::cleanup(temp_dir) 61 | } 62 | 63 | #[test] 64 | fn test_count_extended() -> Result<(), Error> { 65 | #[cfg(unix)] 66 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 67 | #[cfg(windows)] 68 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 69 | let count = Count::new(temp_dir.path())?.extended(true).collect()?; 70 | assert!(count.errors.is_empty()); 71 | //assert!(count.duration > 0.0); --> Fails on MAC 72 | assert_eq!(0, count.devices); 73 | #[cfg(windows)] 74 | { 75 | assert_eq!(40, count.files); 76 | assert_eq!(13, count.dirs); 77 | assert_eq!(27, count.slinks); 78 | } 79 | #[cfg(unix)] 80 | { 81 | assert_eq!(36, count.files); 82 | assert_eq!(12, count.dirs); 83 | assert_eq!(54, count.slinks); 84 | assert_eq!(63, count.pipes); 85 | } 86 | assert_eq!(45, count.hlinks); 87 | common::cleanup(temp_dir) 88 | } 89 | 90 | #[test] 91 | fn test_count_follow_links() -> Result<(), Error> { 92 | #[cfg(unix)] 93 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 94 | #[cfg(windows)] 95 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 96 | let count = Count::new(temp_dir.path())?.follow_links(true).collect()?; 97 | assert!(count.errors.is_empty()); 98 | //assert!(count.duration > 0.0); --> Fails on MAC 99 | assert_eq!(0, count.devices); 100 | #[cfg(windows)] 101 | { 102 | assert_eq!(85, count.files); 103 | assert_eq!(13, count.dirs); 104 | assert_eq!(27, count.slinks); 105 | } 106 | #[cfg(unix)] 107 | { 108 | assert_eq!(81, count.files); 109 | assert_eq!(12, count.dirs); 110 | assert_eq!(54, count.slinks); 111 | assert_eq!(0, count.pipes); 112 | } 113 | assert_eq!(0, count.hlinks); 114 | common::cleanup(temp_dir) 115 | } 116 | -------------------------------------------------------------------------------- /scandir/src/def/toc.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | #[cfg(feature = "bincode")] 4 | use bincode::error::EncodeError; 5 | #[cfg(feature = "speedy")] 6 | use speedy::{Readable, Writable}; 7 | 8 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 9 | #[cfg_attr( 10 | any(feature = "bincode", feature = "json"), 11 | derive(Deserialize, Serialize) 12 | )] 13 | #[derive(Debug, Clone, PartialEq)] 14 | pub struct Toc { 15 | pub dirs: Vec, 16 | pub files: Vec, 17 | pub symlinks: Vec, 18 | pub other: Vec, 19 | pub errors: Vec, 20 | } 21 | 22 | impl Toc { 23 | pub fn new() -> Self { 24 | Toc { 25 | dirs: Vec::new(), 26 | files: Vec::new(), 27 | symlinks: Vec::new(), 28 | other: Vec::new(), 29 | errors: Vec::new(), 30 | } 31 | } 32 | 33 | pub fn clear(&mut self) { 34 | self.dirs.clear(); 35 | self.files.clear(); 36 | self.symlinks.clear(); 37 | self.other.clear(); 38 | self.errors.clear(); 39 | } 40 | 41 | pub fn dirs(&self) -> Vec { 42 | self.dirs.clone() 43 | } 44 | 45 | pub fn files(&self) -> Vec { 46 | self.files.clone() 47 | } 48 | 49 | pub fn symlinks(&self) -> Vec { 50 | self.symlinks.clone() 51 | } 52 | 53 | pub fn other(&self) -> Vec { 54 | self.other.clone() 55 | } 56 | 57 | pub fn errors(&self) -> Vec { 58 | self.errors.clone() 59 | } 60 | 61 | pub fn is_empty(&self) -> bool { 62 | self.dirs.is_empty() 63 | && self.files.is_empty() 64 | && self.symlinks.is_empty() 65 | && self.other.is_empty() 66 | && self.errors.is_empty() 67 | } 68 | 69 | pub fn extend(&mut self, root_dir: &str, other: &Toc) { 70 | self.dirs.extend_from_slice( 71 | &other 72 | .dirs 73 | .iter() 74 | .map(|x| PathBuf::from(root_dir).join(x).to_str().unwrap().to_owned()) 75 | .collect::>(), 76 | ); 77 | self.files.extend_from_slice( 78 | &other 79 | .files 80 | .iter() 81 | .map(|x| PathBuf::from(root_dir).join(x).to_str().unwrap().to_owned()) 82 | .collect::>(), 83 | ); 84 | self.symlinks.extend_from_slice( 85 | &other 86 | .symlinks 87 | .iter() 88 | .map(|x| PathBuf::from(root_dir).join(x).to_str().unwrap().to_owned()) 89 | .collect::>(), 90 | ); 91 | self.other.extend_from_slice( 92 | &other 93 | .other 94 | .iter() 95 | .map(|x| PathBuf::from(root_dir).join(x).to_str().unwrap().to_owned()) 96 | .collect::>(), 97 | ); 98 | self.errors.extend_from_slice( 99 | &other 100 | .errors 101 | .iter() 102 | .map(|x| PathBuf::from(root_dir).join(x).to_str().unwrap().to_owned()) 103 | .collect::>(), 104 | ); 105 | } 106 | 107 | #[cfg(feature = "speedy")] 108 | pub fn to_speedy(&self) -> Result, speedy::Error> { 109 | self.write_to_vec() 110 | } 111 | 112 | #[cfg(feature = "bincode")] 113 | pub fn to_bincode(&self) -> Result, EncodeError> { 114 | bincode::serde::encode_to_vec(self, bincode::config::legacy()) 115 | } 116 | 117 | #[cfg(feature = "json")] 118 | pub fn to_json(&self) -> serde_json::Result { 119 | serde_json::to_string(self) 120 | } 121 | } 122 | 123 | impl Default for Toc { 124 | fn default() -> Self { 125 | Self::new() 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /scandir/README.md: -------------------------------------------------------------------------------- 1 | # scandir 2 | 3 | The Rust crate is called `scandir` and installable via `cargo`. On Linux it is 4 | **1.5 - 2.9 times faster** and on Windows **1.5 - 5.4 time faster** (see 5 | [benchmarks](https://github.com/brmmm3/scandir-rs/blob/master/scandir/doc/benchmarks.md)). 6 | 7 | If you are just interested in directory statistics you can use the `Count`. 8 | 9 | `scandir_rs` contains following classes: 10 | 11 | - `Count` for determining statistics of a directory. 12 | - `Walk` for getting names of directory entries. 13 | - `Scandir` for getting detailed stats of directory entries. 14 | 15 | For the API see: 16 | 17 | - Class [Count](https://github.com/brmmm3/scandir-rs/blob/master/scandir/doc/count.md) 18 | - Class [Walk](https://github.com/brmmm3/scandir-rs/blob/master/scandir/doc/walk.md) 19 | - Class [Scandir](https://github.com/brmmm3/scandir-rs/blob/master/scandir/doc/scandir.md) 20 | 21 | ## Examples 22 | 23 | `Collect` examples: 24 | 25 | ```rust 26 | use scandir::Count; 27 | 28 | // collect() starts the worker thread and waits until it has finished. The line below is blocking. 29 | println!(Count::new("/usr")?.collect()?); 30 | // Get extended statistics 31 | println!(Count::new("/usr", return_type=ReturnType.Ext)?.collect()?); 32 | ``` 33 | 34 | The same, but asynchronously in background using a class instance: 35 | 36 | ```rust 37 | use scandir::Count; 38 | 39 | let mut instance = Count::new("/usr", return_type=ReturnType.Ext); 40 | instance.start(); // Start scanning the directory 41 | ... 42 | let values = instance.results(); // Returns the current statistics. Can be read at any time 43 | ... 44 | if instance.busy() { // Check if the task is still running. 45 | ... 46 | instance.stop(); // If you want to cancel the task 47 | ... 48 | instance.join(); // Wait for the instance to finish. 49 | ``` 50 | 51 | ```rust 52 | let mut instance = Count::new(&path)?; 53 | instance.start()?; 54 | loop { 55 | if !instance.busy() { 56 | break; 57 | } 58 | // Do something 59 | thread::sleep(Duration::from_millis(10)); 60 | } 61 | // collect() immediately returns because the worker thread has already finished. 62 | let statistics = instance.collect()?; 63 | ``` 64 | 65 | `Walk` example: 66 | 67 | ```rust 68 | use scandir::Walk; 69 | 70 | // Get basic file tree 71 | println!(Walk::new("/usr")?.collect()?); 72 | // Get file tree with extended file type identification. This is slower. 73 | println!(Walk::new("/usr", return_type=ReturnType.Ext)?.collect()?); 74 | ``` 75 | 76 | If you want to have intermediate results, e.g. you want to show the progress to the user, 77 | the use the example below. 78 | 79 | ```rust 80 | let mut instance = Walk::new(&path, None)?; 81 | instance.start()?; 82 | loop { 83 | if !instance.busy() { 84 | break; 85 | } 86 | let new_results = instance.results(true); 87 | // Do something 88 | thread::sleep(Duration::from_millis(10)); 89 | } 90 | // collect() immediately returns because the worker thread has already finished. 91 | let results = instance.collect()?; 92 | ``` 93 | 94 | `Scandir` example: 95 | 96 | ```rust 97 | use scandir::Scandir; 98 | 99 | // Get basic file metadata 100 | println!(Scandir::new("/usr")?.collect()?); 101 | // Get extended file metadata 102 | println!(Scandir::new("/usr", return_type=ReturnType.Ext, None)?.collect()?); 103 | ``` 104 | 105 | If you want to have intermediate results, e.g. you want to show the progress to the user, 106 | the use the example below. 107 | 108 | ```rust 109 | let mut instance = Scandir::new(&path, None)?; 110 | instance.start()?; 111 | loop { 112 | if !instance.busy() { 113 | break; 114 | } 115 | let new_results = instance.results(true); 116 | // Do something 117 | thread::sleep(Duration::from_millis(10)); 118 | } 119 | // collect() immediately returns because the worker thread has already finished. 120 | let results = instance.collect()?; 121 | ``` 122 | -------------------------------------------------------------------------------- /pyscandir/doc/walk.md: -------------------------------------------------------------------------------- 1 | # The API of class `Walk` 2 | 3 | ## `Toc` 4 | 5 | The `Toc` class is the return value of class method `results` and `collect` of class `Walk`. 6 | 7 | ### `Toc` has following class members 8 | 9 | - `dirs` list of directory names. 10 | - `files` list of filenames. 11 | - `symlinks` list of symlink names. 12 | - `other` list of names of all other entry types. 13 | - `errors` list of access errors (list of strings). 14 | 15 | ## `Walk()` 16 | 17 | ```python 18 | def Walk( 19 | root_path: str, 20 | sorted: bool = False, 21 | skip_hidden: bool = False, 22 | max_depth: int = 0, 23 | max_file_cnt: int = 0, 24 | dir_include: List[str] | None = None, 25 | dir_exclude: List[str] | None = None, 26 | file_include: List[str] | None = None, 27 | file_exclude: List[str] | None = None, 28 | case_sensitive: bool = True, 29 | return_type: ReturnType = ReturnType.Base, 30 | store: bool = True, 31 | ) 32 | ``` 33 | 34 | Creates a class instance for calculating statistics. The class instance initially does nothing. 35 | To start the scan either the method `start` or the method `collect` has to be called or a context 36 | has to be created (`with Walk(...) as instance:`). When the context is closed the background 37 | thread is stopped. 38 | 39 | ### Parameters 40 | 41 | - `root_path` is directory to scan. `~` is allowed on Unix systems. 42 | - `sorted` if `True` alphabetically sort results. 43 | - `skip_hidden` if `True` then ignore all hidden files and directories. 44 | - `max_depth` is maximum depth of iteration. If `0` then depth limit is disabled. 45 | - `dir_include` list of patterns for directories to include. 46 | - `dir_exclude` list of patterns for directories to exclude. 47 | - `file_include` list of patterns for files to include. 48 | - `file_exclude` list of patterns for files to exclude. 49 | - `case_sensitive` if `True` then do case sensitive pattern matching. 50 | - `follow_links` if `True` then follow symlinks and junctions. 51 | - `return_type` defines type of data returned. 52 | - `store` store results in local structure. 53 | 54 | For valid file patterns see module [glob](https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). 55 | 56 | ### Return types 57 | 58 | - `ReturnType.Base` return `dirs` and `files` as `os.walk` does. 59 | - `ReturnType.Ext` return additional data: `symlinks`, `other` and `errors`. 60 | 61 | **Please note:** 62 | > Due to limitations of jwalk the returned errors just contain the error message without any 63 | information to which files the errors correspond to. 64 | 65 | ### `clear()` 66 | 67 | Clear all results. 68 | 69 | ### `start()` 70 | 71 | Start parsing the directory tree in background. Raises an exception if a task is already running. 72 | 73 | ### `join()` 74 | 75 | Wait for task to finish. 76 | 77 | ### `stop()` 78 | 79 | Stop task. 80 | 81 | ### `collect() -> Toc` 82 | 83 | Collect directories, files, etc. and return a `Toc` object when the task has finished. 84 | This method is blocking and releases the GIL. Method `start` will be called if not already done. 85 | 86 | ### `has_results(only_new: bool | None = True) -> bool` 87 | 88 | Returns `True` if new entries are available and `only_new` is `False` or in case `only_new` 89 | is `False` and any entries have been collected since task start. 90 | 91 | ### `results_cnt(only_new: bool | None = True) -> int` 92 | 93 | Returns number of results collected so far. If `update` is `True` then new results are counted too. 94 | 95 | ### `results(ronly_new: bool | None = True) -> List[Tuple[str, Toc]]` 96 | 97 | Returns entries and errors. 98 | 99 | If `only_new` is `True` (default) then return all `Toc` collected so far else return only new `Toc`. 100 | 101 | ### `has_errors() -> bool` 102 | 103 | Returns `True` if errors occured while walking through the directory tree. 104 | The error messages can be found in `Toc` objects returned. 105 | 106 | ### `duration -> float` 107 | 108 | Returns the duration of the task in seconds as float. As long as the task is running it will 109 | return 0. 110 | 111 | ### `finished -> bool` 112 | 113 | Returns `True` after the task has finished. 114 | 115 | ### `busy -> bool` 116 | 117 | Returns `True` while a task is running. 118 | -------------------------------------------------------------------------------- /pyscandir/src/def/count.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(feature = "speedy", feature = "bincode", feature = "json"))] 2 | use pyo3::exceptions::PyException; 3 | use pyo3::prelude::*; 4 | #[cfg(any(feature = "speedy", feature = "bincode"))] 5 | use pyo3::types::PyBytes; 6 | use pyo3::types::PyDict; 7 | 8 | #[cfg(feature = "speedy")] 9 | use speedy::Writable; 10 | 11 | #[pyclass] 12 | #[derive(Debug, Clone)] 13 | pub struct Statistics(pub scandir::Statistics); 14 | 15 | impl Statistics { 16 | pub fn from(entry: &scandir::Statistics) -> Self { 17 | Statistics(entry.clone()) 18 | } 19 | } 20 | 21 | #[pymethods] 22 | impl Statistics { 23 | #[getter] 24 | fn dirs(&self) -> i32 { 25 | self.0.dirs 26 | } 27 | 28 | #[getter] 29 | fn files(&self) -> i32 { 30 | self.0.files 31 | } 32 | 33 | #[getter] 34 | fn slinks(&self) -> i32 { 35 | self.0.slinks 36 | } 37 | 38 | #[getter] 39 | fn hlinks(&self) -> i32 { 40 | self.0.hlinks 41 | } 42 | 43 | #[getter] 44 | fn devices(&self) -> i32 { 45 | self.0.devices 46 | } 47 | 48 | #[getter] 49 | fn pipes(&self) -> i32 { 50 | self.0.pipes 51 | } 52 | 53 | #[getter] 54 | fn size(&self) -> u64 { 55 | self.0.size 56 | } 57 | 58 | #[getter] 59 | fn usage(&self) -> u64 { 60 | self.0.usage 61 | } 62 | 63 | #[getter] 64 | fn errors(&self) -> Vec { 65 | self.0.errors.clone() 66 | } 67 | 68 | #[getter] 69 | fn duration(&self) -> f64 { 70 | self.0.duration 71 | } 72 | 73 | #[pyo3(signature = (duration=None))] 74 | pub fn as_dict(&self, duration: Option, py: Python) -> PyResult> { 75 | let pyresult = PyDict::new(py); 76 | if self.0.dirs > 0 { 77 | pyresult.set_item("dirs", self.0.dirs).unwrap(); 78 | } 79 | if self.0.files > 0 { 80 | pyresult.set_item("files", self.0.files).unwrap(); 81 | } 82 | if self.0.slinks > 0 { 83 | pyresult.set_item("slinks", self.0.slinks).unwrap(); 84 | } 85 | if self.0.hlinks > 0 { 86 | pyresult.set_item("hlinks", self.0.hlinks).unwrap(); 87 | } 88 | if self.0.devices > 0 { 89 | pyresult.set_item("devices", self.0.devices).unwrap(); 90 | } 91 | if self.0.pipes > 0 { 92 | pyresult.set_item("pipes", self.0.pipes).unwrap(); 93 | } 94 | if self.0.size > 0 { 95 | pyresult.set_item("size", self.0.size).unwrap(); 96 | } 97 | if self.0.usage > 0 { 98 | pyresult.set_item("usage", self.0.usage).unwrap(); 99 | } 100 | if !self.0.errors.is_empty() { 101 | pyresult.set_item("errors", self.0.errors.to_vec()).unwrap(); 102 | } 103 | if duration.unwrap_or(false) { 104 | pyresult.set_item("duration", self.0.duration).unwrap(); 105 | } 106 | Ok(pyresult.into_any().unbind()) 107 | } 108 | 109 | #[cfg(feature = "speedy")] 110 | fn to_speedy(&self, py: Python) -> PyResult> { 111 | match self.0.write_to_vec() { 112 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 113 | b.copy_from_slice(&v); 114 | Ok(()) 115 | })? 116 | .into()), 117 | Err(e) => Err(PyException::new_err(e.to_string())), 118 | } 119 | } 120 | 121 | #[cfg(feature = "bincode")] 122 | fn to_bincode(&self, py: Python) -> PyResult> { 123 | match self.0.to_vec() { 124 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 125 | b.copy_from_slice(&v); 126 | Ok(()) 127 | })? 128 | .into()), 129 | Err(e) => Err(PyException::new_err(e.to_string())), 130 | } 131 | } 132 | 133 | #[cfg(feature = "json")] 134 | fn to_json(&self) -> PyResult { 135 | self.0 136 | .to_json() 137 | .map_err(|e| PyException::new_err(e.to_string())) 138 | } 139 | 140 | fn __repr__(&self) -> String { 141 | format!("{self:?}") 142 | } 143 | 144 | fn __str__(&self) -> String { 145 | format!("{self:?}") 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /scandir/doc/count.md: -------------------------------------------------------------------------------- 1 | # The API of class `Count` 2 | 3 | ## Statistics 4 | 5 | The `Statistics` class is the return value of class methods `results` and `collect` 6 | of class `Count`. 7 | 8 | ### `Statistics` has following class members 9 | 10 | - `dirs` contains number of directories. 11 | - `files` contains number of files. 12 | - `slinks` contains number of symlinks. 13 | - `hlinks` contains number of hardlinks. 14 | - `devices` contains number of devices (only relevant on Unix systems). 15 | - `pipes` contains number of named pipes (only relevant on Unix systems). 16 | - `size` contains total size of all files. 17 | - `usage` contains total usage on disk. 18 | - `errors` list of access errors (list of strings). 19 | - `duration` time taken for scanning (in seconds as a float). 20 | 21 | ## `Count::new>(root_path: P) -> Result` 22 | 23 | Creates a class instance for calculating statistics. The class instance initially does nothing. 24 | To start the scan either the method `start` or the method `collect` has to be called. 25 | 26 | ### Class members 27 | 28 | - `root_path` is directory to scan. `~` is allowed on Unix systems. 29 | - `skip_hidden` if `true` then ignore all hidden files and directories. 30 | - `max_depth` is maximum depth of iteration. If `0` then depth limit is disabled. 31 | - `max_file_cnt` is maximum number of files to collect. If `0` then limit is disabled. 32 | - `dir_include` list of patterns for directories to include. 33 | - `dir_exclude` list of patterns for directories to exclude. 34 | - `file_include` list of patterns for files to include. 35 | - `file_exclude` list of patterns for files to exclude. 36 | - `case_sensitive` if `true` then do case sensitive pattern matching. 37 | - `return_type` defines type of data returned. 38 | 39 | For valid file patterns see module [glob](https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). 40 | 41 | ### Return types 42 | 43 | - `ReturnType::Base` calculate statistics for `dirs`, `files`, `slinks`, `size` and `usage`. 44 | - `ReturnType::Ext` in addition to above calculate statistcs `hlinks` and on Unix platforms 45 | 46 | `devices` and `pipes`. 47 | 48 | ### `skip_hidden(mut self, skip_hidden: bool) -> Self` 49 | 50 | Set to `true` to skip hidden (starting with a dot) files. 51 | 52 | ### `max_depth(mut self, depth: usize) -> Self` 53 | 54 | Set the maximum depth of entries yield by the iterator. 55 | 56 | ### `max_file_cnt(mut self, max_file_cnt: usize) -> Self` 57 | 58 | Set maximum number of files to collect. 59 | 60 | ### `dir_include(mut self, dir_include: Option>) -> Self` 61 | 62 | Set directory include filter. 63 | 64 | ### `dir_exclude(mut self, dir_exclude: Option>) -> Self` 65 | 66 | Set directory exclude filter. 67 | 68 | ### `file_include(mut self, file_include: Option>) -> Self` 69 | 70 | Set file include filter. 71 | 72 | ### `file_exclude(mut self, file_exclude: Option>) -> Self` 73 | 74 | Set file exclude filter. 75 | 76 | ### `case_sensitive(mut self, case_sensitive: bool) -> Self` 77 | 78 | Set case sensitive filename filtering. 79 | 80 | ### `extended(mut self, extended: bool) -> Self` 81 | 82 | Set extended file type counting. 83 | 84 | ### `clear(&mut self)` 85 | 86 | Clear all results. 87 | 88 | ### `start(&mut self) -> Result<(), Error>` 89 | 90 | Start calculating statistics in background. Raises an expception if a task is already running. 91 | 92 | ### `join(&mut self) -> bool` 93 | 94 | Wait for parsing task to finish. 95 | 96 | ### `stop(&mut self) -> bool` 97 | 98 | Stop parsing task. 99 | 100 | ### `collect(&mut self) -> Result` 101 | 102 | Calculate statistics and return a `Statistics` object when the task has finished. 103 | 104 | ### `has_results(&self) -> bool` 105 | 106 | Returns `true` if new statistics are available. 107 | 108 | ### `results(&mut self) -> Statistics` 109 | 110 | Return a `Statistics` object with the current statistics. 111 | 112 | ### `has_errors(&mut self) -> bool` 113 | 114 | Returns `true` if errors occured while scanning the directory tree. The errors can be found 115 | in the statistics object. 116 | 117 | ### `duration(&mut self) -> f64` 118 | 119 | Returns the duration of the task in seconds as float. As long as the task is running it will 120 | return 0. 121 | 122 | ### `finished(&self) -> bool` 123 | 124 | Returns `true` after the task has finished. 125 | 126 | ### `busy(&self) -> bool` 127 | 128 | Returns `true` while a task is running. 129 | -------------------------------------------------------------------------------- /pyscandir/README.md: -------------------------------------------------------------------------------- 1 | # scandir-rs 2 | 3 | The Python module is called `scandir_rs` and installable via `pip`. It is an alternative to 4 | `os.walk()` and `os.scandir()` with more features and higher speed. On Linux it is 5 | **3 - 11 times faster** and on Windows **6 - 70 time faster** 6 | (see [benchmarks](https://github.com/brmmm3/scandir-rs/blob/master/pyscandir/doc/benchmarks.md)). 7 | It releases the GIL and the scanning is done in a background thread. 8 | With different methods intermediate results can be read. 9 | 10 | If you are just interested in directory statistics you can use the `Count`. 11 | 12 | `scandir_rs` contains following classes: 13 | 14 | - `Count` for determining statistics of a directory. 15 | - `Walk` for getting names of directory entries. 16 | - `Scandir` for getting detailed stats of directory entries. 17 | 18 | For the API see: 19 | 20 | - Class [Count](https://github.com/brmmm3/scandir-rs/blob/master/pyscandir/doc/count.md) 21 | - Class [Walk](https://github.com/brmmm3/scandir-rs/blob/master/pyscandir/doc/walk.md) 22 | - Class [Scandir](https://github.com/brmmm3/scandir-rs/blob/master/pyscandir/doc/scandir.md) 23 | 24 | ## Installation 25 | 26 | For building this wheel from source you need the tool `maturin`. 27 | 28 | Install `maturin`: 29 | 30 | ```sh 31 | cargo install maturin 32 | ``` 33 | 34 | IMPORTANT: In order to build this project at least Rust version 1.61 is needed! 35 | 36 | **Build wheel:** 37 | 38 | Change to directory `pyscandir`. 39 | 40 | Build wheel (on Linux): 41 | 42 | ```sh 43 | maturin build --release --strip 44 | ``` 45 | 46 | Build wheel on Windows: 47 | 48 | ```sh 49 | maturin build --release --strip --no-sdist 50 | ``` 51 | 52 | ``maturin`` will build the wheels for all Python versions installed on your system. 53 | 54 | Alternatively you can use the build script `build_wheels.py`. 55 | The precondition to run this script is to have `pyenv` installed. 56 | The script can build the wheel for specific Python versions or for all Python versions installed 57 | by `pyenv`. 58 | In addition it runs ``pytest`` after successfull creation of each wheel. 59 | 60 | ```sh 61 | python build_wheels.py 62 | ``` 63 | 64 | By default the script will build the wheel for the current Python interpreter. 65 | If you want to build the wheel for specific Python version(s) by providing the 66 | argument `--versions`. 67 | 68 | ```sh 69 | python build_wheels.py --versions 3.11.8,3.12.2 70 | ``` 71 | 72 | To build the wheel for all installed Python versions: 73 | 74 | ```sh 75 | python build_wheels.py --versions * 76 | ``` 77 | 78 | Instruction how to install ``pyenv`` can be found on [github](https://github.com/pyenv/pyenv). 79 | 80 | ## Examples 81 | 82 | Get statistics of a directory: 83 | 84 | ```python 85 | from scandir_rs import Count, ReturnType 86 | 87 | print(Count("/usr", return_type=ReturnType.Ext).collect()) 88 | ``` 89 | 90 | The `collect` method releases the GIL. So other Python threads can run in parallel. 91 | 92 | The same, but asynchronously in background using a class instance: 93 | 94 | ```python 95 | from scandir_rs import Count, ReturnType 96 | 97 | instance = Count("/usr", return_type=ReturnType.Ext) 98 | instance.start() # Start scanning the directory in background 99 | ... 100 | values = instance.results() # Returns the current statistics. Can be read at any time 101 | ... 102 | if instance.busy(): # Check if the task is still running. 103 | ... 104 | instance.stop() # If you want to cancel the task 105 | ... 106 | instance.join() # Wait for the instance to finish. 107 | ``` 108 | 109 | and with a context manager: 110 | 111 | ```python 112 | import time 113 | 114 | from scandir_rs import Count, ReturnType 115 | 116 | with Count("/usr", return_type=ReturnType.Ext) as instance: 117 | while instance.busy(): 118 | statistics = instance.results() 119 | # Do something 120 | time.sleep(0.01) 121 | print(instance.results()) 122 | ``` 123 | 124 | ``os.walk()`` example: 125 | 126 | ```python 127 | from scandir_rs import Walk 128 | 129 | for root, dirs, files in Walk("/usr"): 130 | # Do something 131 | ``` 132 | 133 | with extended data: 134 | 135 | ```python 136 | from scandir_rs import Walk, ReturnType 137 | 138 | for root, dirs, files, symlinks, other, errors in Walk("/usr", return_type=ReturnType.Ext): 139 | # Do something 140 | ``` 141 | 142 | ``os.scandir()`` example: 143 | 144 | ```python 145 | from scandir_rs import Scandir, ReturnType 146 | 147 | for path, entry in Scandir("~/workspace", return_type=ReturnType.Ext): 148 | # entry is a custom DirEntry object 149 | ``` 150 | -------------------------------------------------------------------------------- /pyscandir/doc/count.md: -------------------------------------------------------------------------------- 1 | # The API of class `Count` 2 | 3 | ## `Statistics` 4 | 5 | The `Statistics` class is the return value of class methods `results` and `collect` 6 | of class `Count`. 7 | 8 | ### `Statistics` has following class members 9 | 10 | - `dirs` contains number of directories. 11 | - `files` contains number of files. 12 | - `slinks` contains number of symlinks. 13 | - `hlinks` contains number of hardlinks. 14 | - `devices` contains number of devices (only relevant on Unix systems). 15 | - `pipes` contains number of named pipes (only relevant on Unix systems). 16 | - `size` contains total size of all files. 17 | - `usage` contains total usage on disk. 18 | - `errors` list of access errors (list of strings). 19 | - `duration` time taken for scanning (in seconds as a float). 20 | 21 | ## `Count()` 22 | 23 | def Count( 24 | root_path: str, 25 | skip_hidden: bool = False, 26 | max_depth: int = 0, 27 | max_file_cnt: int = 0, 28 | dir_include: List[str] | None = None, 29 | dir_exclude: List[str] | None = None, 30 | file_include: List[str] | None = None, 31 | file_exclude: List[str] | None = None, 32 | case_sensitive: bool = False, 33 | return_type: ReturnType = ReturnType.Base, 34 | ) 35 | 36 | Creates a class instance for calculating statistics. The class instance initially does nothing. 37 | To start the scan either the method `start` or the method `collect` has to be called or a 38 | context has to be created (`with Count(...) as instance:`). When the context is closed the 39 | background thread is stopped. 40 | 41 | ### Parameters 42 | 43 | - `root_path` is directory to scan. `~` is allowed on Unix systems. 44 | - `skip_hidden` if `True` then ignore all hidden files and directories. 45 | - `max_depth` is maximum depth of iteration. If `0` then depth limit is disabled. 46 | - `max_file_cnt` is maximum number of files to collect. If `0` then limit is disabled. 47 | - `dir_include` list of patterns for directories to include. 48 | - `dir_exclude` list of patterns for directories to exclude. 49 | - `file_include` list of patterns for files to include. 50 | - `file_exclude` list of patterns for files to exclude. 51 | - `case_sensitive` if `True` then do case sensitive pattern matching. 52 | - `follow_links` if `True` then follow symlinks and junctions. 53 | - `return_type` defines type of data returned. 54 | 55 | For valid file patterns see module [glob](https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). 56 | 57 | ### Return types 58 | 59 | - `ReturnType.Base` calculate statistcs for `dirs`, `files`, `slinks`, `size` and `usage`. 60 | - `ReturnType.Ext` in addition to above calculate statistcs `hlinks` and on Unix platforms 61 | `devices` and `pipes`. 62 | 63 | ### Example usage of the context manager 64 | 65 | ``python 66 | import scandir_rs as scandir 67 | 68 | with scandir.Count("~/workspace", extended=True) as instance: 69 | while instance.busy(): 70 | statistics = instance.results() 71 | 72 | ## Do something 73 | 74 | `` 75 | 76 | ## `clear()` 77 | 78 | Clear all results. 79 | 80 | ### `start()` 81 | 82 | Start calculating statistics in background. Raises an expception if a task is already running. 83 | 84 | ### `join()` 85 | 86 | Wait for parsing task to finish. 87 | 88 | ### `stop()` 89 | 90 | Stop parsing task. 91 | 92 | ### `collect() -> Statistics` 93 | 94 | Calculate statistics and return a `Statistics` object when the task has finished. 95 | This method is blocking and releases the GIL. 96 | 97 | ### `has_results() -> bool` 98 | 99 | Returns `True` if new statistics are available. 100 | 101 | ### `results() -> Statistics` 102 | 103 | Return a `Statistics` object with the current statistics. 104 | 105 | ### `has_errors() -> bool` 106 | 107 | Returns `True` if errors occured while scanning the directory tree. The errors can be found 108 | in the statistics object. 109 | 110 | ### `duration -> float` 111 | 112 | Returns the duration of the task in seconds as float. As long as the task is running it will 113 | return 0. 114 | 115 | ### `finished -> bool` 116 | 117 | Returns `True` after the task has finished. 118 | 119 | ### `busy -> bool` 120 | 121 | Returns `True` while a task is running. 122 | 123 | ### `as_dict() -> dict` 124 | 125 | Returns statistics as a `dict`. Result will only contain the keys of which the values are non zero. 126 | 127 | ### `to_speedy() -> bytes` 128 | 129 | Feature `speedy` enabled. 130 | 131 | Returns statistics as [speedy](https://docs.rs/speedy/latest/speedy) encoded byte string. 132 | 133 | ### `to_bincode() -> bytes` 134 | 135 | Feature `bincode` enabled. 136 | 137 | Returns statistics as [bincode](https://docs.rs/bincode/latest/bincode) encoded byte string. 138 | 139 | ### `to_json() -> str` 140 | 141 | Feature `json` enabled. 142 | 143 | Returns statistics as [json](https://docs.rs/serde_json/latest/serde_json) encoded string. 144 | -------------------------------------------------------------------------------- /scandir/tests/scandir.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(windows, feature(junction_point))] 2 | 3 | use std::io::Error; 4 | 5 | use scandir::{ReturnType, Scandir, ScandirResult}; 6 | 7 | mod common; 8 | 9 | #[test] 10 | fn test_scandir() -> Result<(), Error> { 11 | #[cfg(unix)] 12 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 13 | #[cfg(windows)] 14 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 15 | let entries = Scandir::new(temp_dir.path(), Some(true))?.collect()?; 16 | #[cfg(unix)] 17 | assert_eq!(210, entries.results.len()); 18 | #[cfg(windows)] 19 | assert_eq!(125, entries.results.len()); 20 | assert_eq!(0, entries.errors.len()); 21 | #[cfg(target_os = "linux")] 22 | match entries.results.first().unwrap() { 23 | ScandirResult::DirEntry(d) => { 24 | assert_eq!("dir3", &d.path); 25 | assert!(d.is_dir); 26 | #[cfg(target_os = "linux")] 27 | assert!(d.st_size <= 4096); // Directories on tmpfs can have a size smaller than 4096 28 | #[cfg(target_os = "macos")] 29 | assert_eq!(96, d.st_size); 30 | #[cfg(windows)] 31 | assert_eq!(0, d.st_size); 32 | } 33 | _ => panic!("Wrong type"), 34 | } 35 | common::cleanup(temp_dir) 36 | } 37 | 38 | #[test] 39 | fn test_scandir_skip_hidden() -> Result<(), Error> { 40 | #[cfg(unix)] 41 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 42 | #[cfg(windows)] 43 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 44 | let scandir = Scandir::new(temp_dir.path(), Some(true))?; 45 | let mut scandir = scandir.skip_hidden(true); 46 | let entries = scandir.collect()?; 47 | #[cfg(unix)] 48 | assert_eq!(192, entries.results.len()); 49 | #[cfg(windows)] 50 | assert_eq!(107, entries.results.len()); 51 | assert_eq!(0, entries.errors.len()); 52 | match entries.results.first().unwrap() { 53 | ScandirResult::DirEntry(d) => { 54 | assert!(["dir1", "dir2", "dir3"].contains(&d.path.as_str())); 55 | assert!(d.is_dir); 56 | #[cfg(target_os = "linux")] 57 | assert!(d.st_size <= 4096); // Directories on tmpfs can have a size smaller than 4096 58 | #[cfg(target_os = "macos")] 59 | assert_eq!(96, d.st_size); 60 | #[cfg(windows)] 61 | assert_eq!(0, d.st_size); 62 | } 63 | _ => panic!("Wrong type"), 64 | } 65 | common::cleanup(temp_dir) 66 | } 67 | 68 | #[test] 69 | fn test_scandir_extended() -> Result<(), Error> { 70 | #[cfg(unix)] 71 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 72 | #[cfg(windows)] 73 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 74 | let entries = Scandir::new(temp_dir.path(), Some(true))? 75 | .return_type(ReturnType::Ext) 76 | .collect()?; 77 | #[cfg(unix)] 78 | assert_eq!(210, entries.results.len()); 79 | #[cfg(windows)] 80 | assert_eq!(125, entries.results.len()); 81 | assert_eq!(0, entries.errors.len()); 82 | match entries.results.first().unwrap() { 83 | ScandirResult::DirEntryExt(d) => { 84 | assert!(["dir1", "dir2", "dir3"].contains(&d.path.as_str())); 85 | assert!(d.is_dir); 86 | #[cfg(target_os = "linux")] 87 | assert!(d.st_size <= 4096); // Directories on tmpfs can have a size smaller than 4096 88 | #[cfg(target_os = "macos")] 89 | assert_eq!(96, d.st_size); 90 | #[cfg(windows)] 91 | assert_eq!(0, d.st_size); 92 | } 93 | _ => panic!("Wrong type"), 94 | } 95 | common::cleanup(temp_dir) 96 | } 97 | 98 | #[test] 99 | fn test_scandir_follow_links() -> Result<(), Error> { 100 | #[cfg(unix)] 101 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 6, 7)?; 102 | #[cfg(windows)] 103 | let temp_dir = common::create_temp_file_tree(3, 3, 4, 5, 3)?; 104 | let entries = Scandir::new(temp_dir.path(), Some(true))? 105 | .follow_links(true) 106 | .collect()?; 107 | #[cfg(unix)] 108 | assert_eq!(210, entries.results.len()); 109 | #[cfg(windows)] 110 | assert_eq!(233, entries.results.len()); 111 | assert_eq!(0, entries.errors.len()); 112 | #[cfg(target_os = "linux")] 113 | match entries.results.first().unwrap() { 114 | ScandirResult::DirEntry(d) => { 115 | assert_eq!("dir3", &d.path); 116 | assert!(d.is_dir); 117 | #[cfg(target_os = "linux")] 118 | assert!(d.st_size <= 4096); // Directories on tmpfs can have a size smaller than 4096 119 | #[cfg(target_os = "macos")] 120 | assert_eq!(96, d.st_size); 121 | #[cfg(windows)] 122 | assert_eq!(0, d.st_size); 123 | } 124 | _ => panic!("Wrong type"), 125 | } 126 | common::cleanup(temp_dir) 127 | } 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `scandir-rs` 2 | 3 | `scandir-rs` is a Rust project which provides a [Rust](https://github.com/brmmm3/scandir-rs/blob/master/scandir/README.md) 4 | and a [Python](https://github.com/brmmm3/scandir-rs/blob/master/pyscandir/README.md) module for 5 | directory iteration, like `os.walk()` or `os.scandir()`, but with more features and higher speed. 6 | Depending on the function call it yields a list of paths, tuple of lists grouped by their entry 7 | type or `DirEntry` objects that include file type and stat information along with the file name. 8 | Directory iteration is **many** times faster than `os.walk()`, `os.scandir()`, `walkdir` or 9 | `scan_dir` (see **benchmarks** for [Rust](https://github.com/brmmm3/scandir-rs/blob/master/scandir/doc/benchmarks.md) 10 | and [Python](https://github.com/brmmm3/scandir-rs/blob/master/pyscandir/doc/benchmarks.md)). 11 | 12 | The higher performance is achieved through parallelizing the file system access for reducing the 13 | access delay because of the overhead each file access has. 14 | 15 | **Note:** `scandir_rs` uses libc 2.34, which is currently not supported by the manylinux releases. 16 | So it is not possible to upload prebuilt Linux wheels to PyPI. As a workaround you can download 17 | the Linux wheels from [github](https://github.com/brmmm3/scandir-rs/releases/tag/2.7.0). 18 | 19 | **Note:** Since 2.8.0 `skip_hidden` is now `false` by default! 20 | 21 | ## Python examples 22 | 23 | ### Count 24 | 25 | Get statistics of a directory: 26 | 27 | ```python 28 | from scandir_rs import Count, ReturnType 29 | 30 | print(Count("/usr", return_type=ReturnType.Ext).collect()) 31 | ``` 32 | 33 | The `collect` method releases the GIL. So other Python threads can run in parallel. 34 | 35 | The same, but asynchronously in background using a class instance: 36 | 37 | ```python 38 | from scandir_rs import Count, ReturnType 39 | 40 | instance = Count("/usr", return_type=ReturnType.Ext) 41 | instance.start() # Start scanning the directory in background 42 | ... 43 | values = instance.results() # Returns the current statistics. Can be read at any time 44 | ... 45 | if instance.busy(): # Check if the task is still running. 46 | ... 47 | instance.stop() # If you want to cancel the task 48 | ... 49 | instance.join() # Wait for the instance to finish. 50 | ``` 51 | 52 | and with a context manager: 53 | 54 | ```python 55 | import time 56 | 57 | from scandir_rs import Count, ReturnType 58 | 59 | with Count("/usr", return_type=ReturnType.Ext) as instance: 60 | while instance.busy(): 61 | statistics = instance.results() 62 | # Do something 63 | time.sleep(0.01) 64 | print(instance.results()) 65 | ``` 66 | 67 | ### Walk 68 | 69 | ```python 70 | from scandir_rs import Walk 71 | 72 | for root, dirs, files in Walk("/usr"): 73 | # Do something 74 | ``` 75 | 76 | #### with extended metadata 77 | 78 | ```python 79 | from scandir_rs import Walk, ReturnType 80 | 81 | for root, dirs, files, symlinks, other, errors in Walk("/usr", return_type=ReturnType.Ext): 82 | # Do something 83 | ``` 84 | 85 | ### Scandir 86 | 87 | ```python 88 | from scandir_rs import Scandir, ReturnType 89 | 90 | for path, entry in Scandir("~/workspace", return_type=ReturnType.Ext): 91 | # entry is a custom DirEntry object 92 | ``` 93 | 94 | or collecting all the result: 95 | 96 | ```python 97 | from scandir_rs import Scandir, ReturnType 98 | 99 | instance = Scandir("~/workspace") 100 | instance.extended(True) 101 | resuolts = instance.collect() 102 | ``` 103 | 104 | ## Rust examples 105 | 106 | ### Count 107 | 108 | ```rust 109 | let mut instance = Count::new(&root_dir)?; 110 | // Exclude directories dir0 and dir1 111 | instance = instance.dir_exclude(Some(vec!["dir0".to_owned(), "dir1".to_owned()])); 112 | // Use extended metadata for calculating statistics 113 | instance = instance.extended(true); 114 | // Start and wait for finishing background worker thread. 115 | // collect checks if background thread is already running. If not it will be started. 116 | let statistics = instance.collect()?; 117 | ``` 118 | 119 | ### Walk 120 | 121 | ```rust 122 | let mut instance = Walk::new(&root_dir, None)?; 123 | // Use extended metadata for calculating statistics 124 | instance = instance.extended(true); 125 | // Start background thread for traversing file tree 126 | instance.start()?; 127 | loop { 128 | if !instance.busy() { 129 | break; 130 | } 131 | // Do something... 132 | thread::sleep(Duration::from_millis(10)); 133 | } 134 | let result = instance.collect()?; 135 | ``` 136 | 137 | ### Scandir 138 | 139 | ```rust 140 | let mut instance = Scandir::new(&root_dir, None)?; 141 | // Use extended metadata for calculating statistics 142 | instance = instance.extended(true); 143 | // Start background thread for traversing file tree 144 | instance.start()?; 145 | loop { 146 | if !instance.busy() { 147 | break; 148 | } 149 | // Do something... 150 | thread::sleep(Duration::from_millis(10)); 151 | } 152 | let result = instance.collect()?; 153 | ``` 154 | -------------------------------------------------------------------------------- /pyscandir/build_wheels.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | import fnmatch 5 | import concurrent.futures 6 | from typing import List 7 | 8 | 9 | def Run(args: List[str]) -> subprocess.CompletedProcess: 10 | print("RUN:", " ".join(args)) 11 | if os.name == "nt": 12 | return subprocess.run(args, shell=True, capture_output=True) 13 | return subprocess.run(" ".join(args), shell=True, capture_output=True) 14 | 15 | 16 | def ShowResult(title: str, prc: subprocess.CompletedProcess): 17 | stdout = prc.stdout.decode("utf-8") 18 | stderr = prc.stderr.decode("utf-8") 19 | if prc.returncode != 0: 20 | print(f"'{title}' failed with error code {prc.returncode}") 21 | print(stderr) 22 | elif not stdout: 23 | stdout = stderr 24 | return stdout, prc.returncode 25 | 26 | 27 | def BuildWheel( 28 | versions_dir: str, version: str, python_exe: str, features: str, bDebug: bool 29 | ) -> int: 30 | print(f"Building wheel for Python version {version}...") 31 | python_path = f"{versions_dir}/{version}/{python_exe}" 32 | cmd = ["maturin", "build", "--strip", "-i", python_path] 33 | if not bDebug: 34 | cmd.insert(2, "--release") 35 | if features: 36 | cmd.extend(["--", "--features", f'"{features.replace(",", " ")}"']) 37 | maturin_build = Run(cmd) 38 | stdout, returncode = ShowResult("maturin build", maturin_build) 39 | if returncode != 0: 40 | return returncode 41 | builtWheel = [ 42 | line for line in stdout.splitlines() if "Built wheel for CPython" in line 43 | ] 44 | if not builtWheel: 45 | print("No wheel built!") 46 | print(stdout) 47 | return 1 48 | wheel_path = builtWheel[0].split(" to ")[1] 49 | 50 | upgrade_pip = Run([python_path, "-m", "pip", "install", "-U", "pip"]) 51 | stdout, returncode = ShowResult("pip install -U pip", upgrade_pip) 52 | if returncode != 0: 53 | return returncode 54 | 55 | upgrade_pytest = Run([python_path, "-m", "pip", "install", "-U", "pytest"]) 56 | stdout, returncode = ShowResult("pip install -U pytest", upgrade_pytest) 57 | if returncode != 0: 58 | return returncode 59 | 60 | install_wheel = Run( 61 | [python_path, "-m", "pip", "install", "--force-reinstall", wheel_path] 62 | ) 63 | stdout, returncode = ShowResult("install wheel", install_wheel) 64 | if returncode != 0: 65 | return returncode 66 | 67 | run_pytest = Run([python_path, "-m", "pytest"]) 68 | stdout, returncode = ShowResult("pytest", run_pytest) 69 | print(stdout) 70 | if returncode != 0 and returncode != 5: 71 | return returncode 72 | return 0 73 | 74 | 75 | def GetPyEnvVersions() -> List[str] | None: 76 | tmpVersions = [sys.version.split()[0]] 77 | if "--versions" in sys.argv: 78 | tmpVersions = sys.argv[sys.argv.index("--versions") + 1].split(",") 79 | pyEnvVersionsPrc = Run(["pyenv", "versions"]) 80 | stdOut, returnCode = ShowResult("pyenv versions", pyEnvVersionsPrc) 81 | if returnCode != 0: 82 | sys.exit(1) 83 | pyEnvVersions = [ 84 | version.lstrip("*").strip().split()[0] 85 | for version in stdOut.splitlines() 86 | if "system" not in version and " 2.7." not in version 87 | ] 88 | versions = [] 89 | for version in tmpVersions: 90 | versions.extend(fnmatch.filter(pyEnvVersions, version)) 91 | return sorted(set(versions)) 92 | 93 | 94 | if __name__ == "__main__": 95 | versions = GetPyEnvVersions() 96 | if not versions: 97 | print("No versions to build Python wheel!") 98 | sys.exit(1) 99 | features = None 100 | if "--features" in sys.argv: 101 | features = sys.argv[sys.argv.index("--features") + 1] 102 | bDebug = "--debug" in sys.argv 103 | 104 | print(f"Building wheel for Python versions {bDebug=}:") 105 | print(", ".join(versions)) 106 | 107 | python_path = Run(["pyenv", "which", "python"]) 108 | if python_path.returncode != 0: 109 | print(f"'pyenv which python' failed with error code {python_path.returncode}") 110 | print(python_path.stderr.decode("utf-8")) 111 | sys.exit(1) 112 | 113 | versions_dir = ( 114 | python_path.stdout.decode("utf-8").rsplit("versions", 1)[0] + "versions" 115 | ) 116 | 117 | python_exe = "python.exe" if os.name == "nt" else "bin/python" 118 | 119 | futures = {} 120 | with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: 121 | for version in versions: 122 | futures[version] = executor.submit( 123 | BuildWheel, versions_dir, version, python_exe, features, bDebug 124 | ) 125 | for version, future in futures.items(): 126 | returncode = future.result() 127 | if returncode != 0: 128 | print( 129 | f"Building wheel for Python version {version} failed with error code {returncode}!" 130 | ) 131 | -------------------------------------------------------------------------------- /scandir/src/def/direntry.rs: -------------------------------------------------------------------------------- 1 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; 2 | 3 | #[cfg(feature = "bincode")] 4 | use bincode::error::EncodeError; 5 | #[cfg(feature = "speedy")] 6 | use speedy::{Readable, Writable}; 7 | 8 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 9 | #[cfg_attr( 10 | any(feature = "bincode", feature = "json"), 11 | derive(Deserialize, Serialize) 12 | )] 13 | #[derive(Debug, Clone, PartialEq, Default)] 14 | pub struct DirEntry { 15 | pub path: String, 16 | pub is_symlink: bool, 17 | pub is_dir: bool, 18 | pub is_file: bool, 19 | pub st_ctime: Option, 20 | pub st_mtime: Option, 21 | pub st_atime: Option, 22 | pub st_size: u64, 23 | } 24 | 25 | impl DirEntry { 26 | #[inline] 27 | pub fn ctime(&self) -> f64 { 28 | let duration = self 29 | .st_ctime 30 | .unwrap_or(UNIX_EPOCH) 31 | .duration_since(UNIX_EPOCH) 32 | .unwrap_or_else(|_err| Duration::new(0, 0)); 33 | (duration.as_secs() as f64) + (duration.subsec_nanos() as f64) * 1e-9 34 | } 35 | 36 | #[inline] 37 | pub fn mtime(&self) -> f64 { 38 | let duration = self 39 | .st_mtime 40 | .unwrap_or(UNIX_EPOCH) 41 | .duration_since(UNIX_EPOCH) 42 | .unwrap_or_else(|_err| Duration::new(0, 0)); 43 | (duration.as_secs() as f64) + (duration.subsec_nanos() as f64) * 1e-9 44 | } 45 | 46 | #[inline] 47 | pub fn atime(&self) -> f64 { 48 | let duration = self 49 | .st_atime 50 | .unwrap_or(UNIX_EPOCH) 51 | .duration_since(UNIX_EPOCH) 52 | .unwrap_or_else(|_err| Duration::new(0, 0)); 53 | (duration.as_secs() as f64) + (duration.subsec_nanos() as f64) * 1e-9 54 | } 55 | 56 | #[cfg(feature = "speedy")] 57 | pub fn to_speedy(&self) -> Result, speedy::Error> { 58 | self.write_to_vec() 59 | } 60 | 61 | #[cfg(feature = "bincode")] 62 | pub fn to_vec(&self) -> Result, EncodeError> { 63 | bincode::serde::encode_to_vec(self, bincode::config::legacy()) 64 | } 65 | 66 | #[cfg(feature = "json")] 67 | pub fn to_json(&self) -> serde_json::Result { 68 | serde_json::to_string(self) 69 | } 70 | } 71 | 72 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 73 | #[cfg_attr( 74 | any(feature = "bincode", feature = "json"), 75 | derive(Deserialize, Serialize) 76 | )] 77 | #[derive(Debug, Clone, PartialEq, Default)] 78 | pub struct DirEntryExt { 79 | pub path: String, 80 | pub is_symlink: bool, 81 | pub is_dir: bool, 82 | pub is_file: bool, 83 | /// Creation time in seconds as float 84 | pub st_ctime: Option, 85 | /// Modification time in seconds as float 86 | pub st_mtime: Option, 87 | /// Access time in seconds as float 88 | pub st_atime: Option, 89 | /// Size of file / entry 90 | pub st_size: u64, 91 | /// File system block size 92 | pub st_blksize: u64, 93 | /// Number of used blocks on device / file system 94 | pub st_blocks: u64, 95 | /// File access mode / rights 96 | pub st_mode: u32, 97 | /// Number of hardlinks 98 | pub st_nlink: u64, 99 | /// User ID (Unix only) 100 | pub st_uid: u32, 101 | /// Group ID (Unix only) 102 | pub st_gid: u32, 103 | /// I-Node number (Unix only) 104 | pub st_ino: u64, 105 | /// Device number (Unix only) 106 | pub st_dev: u64, 107 | /// Device number (for character and block devices on Unix). 108 | pub st_rdev: u64, 109 | } 110 | 111 | impl DirEntryExt { 112 | #[inline] 113 | pub fn ctime(&self) -> f64 { 114 | let duration = self 115 | .st_ctime 116 | .unwrap_or(UNIX_EPOCH) 117 | .duration_since(UNIX_EPOCH) 118 | .unwrap_or_else(|_err| Duration::new(0, 0)); 119 | (duration.as_secs() as f64) + (duration.subsec_nanos() as f64) * 1e-9 120 | } 121 | 122 | #[inline] 123 | pub fn mtime(&self) -> f64 { 124 | let duration = self 125 | .st_mtime 126 | .unwrap_or(UNIX_EPOCH) 127 | .duration_since(UNIX_EPOCH) 128 | .unwrap_or_else(|_err| Duration::new(0, 0)); 129 | (duration.as_secs() as f64) + (duration.subsec_nanos() as f64) * 1e-9 130 | } 131 | 132 | #[inline] 133 | pub fn atime(&self) -> f64 { 134 | let duration = self 135 | .st_atime 136 | .unwrap_or(UNIX_EPOCH) 137 | .duration_since(UNIX_EPOCH) 138 | .unwrap_or_else(|_err| Duration::new(0, 0)); 139 | (duration.as_secs() as f64) + (duration.subsec_nanos() as f64) * 1e-9 140 | } 141 | 142 | #[cfg(feature = "speedy")] 143 | pub fn to_speedy(&self) -> Result, speedy::Error> { 144 | self.write_to_vec() 145 | } 146 | 147 | #[cfg(feature = "bincode")] 148 | pub fn to_vec(&self) -> Result, EncodeError> { 149 | bincode::serde::encode_to_vec(self, bincode::config::legacy()) 150 | } 151 | 152 | #[cfg(feature = "json")] 153 | pub fn to_json(&self) -> serde_json::Result { 154 | serde_json::to_string(self) 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /scandir/src/def/scandir.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "bincode")] 2 | use bincode::error::EncodeError; 3 | #[cfg(feature = "speedy")] 4 | use speedy::{Readable, Writable}; 5 | 6 | use crate::ErrorsType; 7 | use crate::direntry::{DirEntry, DirEntryExt}; 8 | 9 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 10 | #[cfg_attr( 11 | any(feature = "bincode", feature = "json"), 12 | derive(Deserialize, Serialize) 13 | )] 14 | #[derive(Debug, Clone, PartialEq)] 15 | pub enum ScandirResult { 16 | DirEntry(DirEntry), 17 | DirEntryExt(DirEntryExt), 18 | Error((String, String)), 19 | } 20 | 21 | impl ScandirResult { 22 | #[inline] 23 | pub fn path(&self) -> &String { 24 | match self { 25 | Self::DirEntry(e) => &e.path, 26 | Self::DirEntryExt(e) => &e.path, 27 | Self::Error(e) => &e.0, 28 | } 29 | } 30 | 31 | #[inline] 32 | pub fn error(&self) -> Option<&(String, String)> { 33 | match self { 34 | Self::Error(e) => Some(e), 35 | _ => None, 36 | } 37 | } 38 | 39 | #[inline] 40 | pub fn is_dir(&self) -> bool { 41 | match self { 42 | Self::DirEntry(e) => e.is_dir, 43 | Self::DirEntryExt(e) => e.is_dir, 44 | Self::Error(_) => false, 45 | } 46 | } 47 | 48 | #[inline] 49 | pub fn is_file(&self) -> bool { 50 | match self { 51 | Self::DirEntry(e) => e.is_file, 52 | Self::DirEntryExt(e) => e.is_file, 53 | Self::Error(_) => false, 54 | } 55 | } 56 | 57 | #[inline] 58 | pub fn is_symlink(&self) -> bool { 59 | match self { 60 | Self::DirEntry(e) => e.is_symlink, 61 | Self::DirEntryExt(e) => e.is_symlink, 62 | Self::Error(_) => false, 63 | } 64 | } 65 | 66 | #[inline] 67 | pub fn ctime(&self) -> f64 { 68 | match self { 69 | Self::DirEntry(e) => e.ctime(), 70 | Self::DirEntryExt(e) => e.ctime(), 71 | Self::Error(_) => 0.0, 72 | } 73 | } 74 | 75 | #[inline] 76 | pub fn mtime(&self) -> f64 { 77 | match self { 78 | Self::DirEntry(e) => e.mtime(), 79 | Self::DirEntryExt(e) => e.mtime(), 80 | Self::Error(_) => 0.0, 81 | } 82 | } 83 | 84 | #[inline] 85 | pub fn atime(&self) -> f64 { 86 | match self { 87 | Self::DirEntry(e) => e.atime(), 88 | Self::DirEntryExt(e) => e.atime(), 89 | Self::Error(_) => 0.0, 90 | } 91 | } 92 | 93 | #[inline] 94 | pub fn size(&self) -> u64 { 95 | match self { 96 | Self::DirEntry(e) => e.st_size, 97 | Self::DirEntryExt(e) => e.st_size, 98 | Self::Error(_) => 0, 99 | } 100 | } 101 | 102 | #[inline] 103 | pub fn ext(&self) -> Option<&DirEntryExt> { 104 | match self { 105 | Self::DirEntryExt(e) => Some(e), 106 | _ => None, 107 | } 108 | } 109 | 110 | #[cfg(feature = "speedy")] 111 | pub fn to_speedy(&self) -> Result, speedy::Error> { 112 | self.write_to_vec() 113 | } 114 | 115 | #[cfg(feature = "bincode")] 116 | pub fn to_bincode(&self) -> Result, EncodeError> { 117 | bincode::serde::encode_to_vec(self, bincode::config::legacy()) 118 | } 119 | 120 | #[cfg(feature = "json")] 121 | pub fn to_json(&self) -> serde_json::Result { 122 | serde_json::to_string(self) 123 | } 124 | } 125 | 126 | #[cfg_attr(feature = "speedy", derive(Readable, Writable))] 127 | #[cfg_attr( 128 | any(feature = "bincode", feature = "json"), 129 | derive(Deserialize, Serialize) 130 | )] 131 | #[derive(Debug, Clone, PartialEq)] 132 | pub struct ScandirResults { 133 | pub results: Vec, 134 | pub errors: ErrorsType, 135 | } 136 | 137 | impl ScandirResults { 138 | pub fn new() -> Self { 139 | ScandirResults { 140 | results: Vec::new(), 141 | errors: Vec::new(), 142 | } 143 | } 144 | 145 | pub fn clear(&mut self) { 146 | self.results.clear(); 147 | self.errors.clear(); 148 | } 149 | 150 | #[inline] 151 | pub fn is_empty(&self) -> bool { 152 | self.results.is_empty() && self.errors.is_empty() 153 | } 154 | 155 | #[inline] 156 | pub fn len(&self) -> usize { 157 | self.results.len() + self.errors.len() 158 | } 159 | 160 | pub fn extend(&mut self, results: &ScandirResults) { 161 | self.results.extend_from_slice(&results.results); 162 | self.errors.extend_from_slice(&results.errors); 163 | } 164 | 165 | #[cfg(feature = "speedy")] 166 | pub fn to_speedy(&self) -> Result, speedy::Error> { 167 | self.write_to_vec() 168 | } 169 | 170 | #[cfg(feature = "bincode")] 171 | pub fn to_bincode(&self) -> Result, EncodeError> { 172 | bincode::serde::encode_to_vec(self, bincode::config::legacy()) 173 | } 174 | 175 | #[cfg(feature = "json")] 176 | pub fn to_json(&self) -> serde_json::Result { 177 | serde_json::to_string(self) 178 | } 179 | } 180 | 181 | impl Default for ScandirResults { 182 | fn default() -> Self { 183 | Self::new() 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /pyscandir/examples/benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import sys 5 | import time 6 | import platform 7 | 8 | import psutil 9 | from diskinfo import DiskInfo 10 | from tabulate import tabulate 11 | 12 | from scandir_rs import Count, Walk, Scandir, ReturnType 13 | 14 | 15 | def scantree(path): 16 | try: 17 | for entry in os.scandir(path): 18 | if entry.is_dir(follow_symlinks=False): 19 | yield entry 20 | yield from scantree(entry.path) 21 | else: 22 | yield entry 23 | except Exception: 24 | return 25 | 26 | 27 | def GetDiskInfo(): 28 | partition = [ 29 | p for p in psutil.disk_partitions(all=False) if p.mountpoint in ("/", "C:\\") 30 | ][0] 31 | disks = DiskInfo().get_disk_list() 32 | for disk in disks: 33 | if partition.device.startswith(disk.get_path()): 34 | return ( 35 | disk.get_model(), 36 | ("SSD" if disk.is_ssd() else "NVME" if disk.is_nvme() else "HDD"), 37 | partition.fstype, 38 | ) 39 | 40 | 41 | dirName = "C:/Windows" if os.name == "nt" else "/usr" 42 | pyVersion = sys.version.split(" ")[0] 43 | 44 | print(f"Benchmarking directory: {dirName}") 45 | if os.name != "nt": 46 | dirName = os.path.expanduser(dirName) 47 | print(Count(dirName).collect()) 48 | print(Count(dirName, return_type=ReturnType.Ext).collect()) 49 | print() 50 | 51 | table = [] 52 | 53 | t1 = time.time() 54 | toc = Count(dirName).collect() 55 | dt = time.time() - t1 56 | print(f"Count.collect: {dt:.3f}") 57 | table.append([f"{dt:.3}", "Count.collect"]) 58 | 59 | t1 = time.time() 60 | toc = Count(dirName, return_type=ReturnType.Ext).collect() 61 | dt = time.time() - t1 62 | print(f"Count(ReturnType=Ext).collect: {dt:.3f}") 63 | table.append([f"{dt:.3}", "Count(ReturnType=Ext).collect"]) 64 | 65 | t1 = time.time() 66 | cnt = 0 67 | for root, dirs, files in os.walk(os.path.expanduser(dirName)): 68 | cnt += 1 69 | dtOsWalk = time.time() - t1 70 | print(f"os.walk: {dtOsWalk:.3f} {cnt}") 71 | table.append([f"{dtOsWalk:.3}", f"os.walk (Python {pyVersion})"]) 72 | 73 | t1 = time.time() 74 | cnt = 0 75 | for result in Walk(dirName): 76 | cnt += 1 77 | dtWalkIter = time.time() - t1 78 | print(f"Walk.iter: {dtWalkIter:.3f} {cnt}") 79 | table.append([f"{dtWalkIter:.3}", "Walk.iter"]) 80 | 81 | t1 = time.time() 82 | toc = Walk(dirName).collect() 83 | dt = time.time() - t1 84 | print(f"Walk.collect: {dt:.3f} dirs={len(toc.dirs)} files=={len(toc.files)}") 85 | table.append([f"{dt:.3}", "Walk.collect"]) 86 | 87 | t1 = time.time() 88 | instance = Walk(dirName) 89 | toc = instance.collect() 90 | dt = time.time() - t1 91 | print( 92 | f"Walk.collect: {dt:.3f} dirs={len(toc.dirs)} files=={len(toc.files)} Walk().duration={instance.duration}" 93 | ) 94 | 95 | t1 = time.time() 96 | toc = Walk(dirName, return_type=ReturnType.Ext).collect() 97 | dt = time.time() - t1 98 | print(f"Walk(ReturnType=Ext).collect: {dt:.3f} {str(toc)[:500]}") 99 | table.append([f"{dt:.3}", "Walk(ReturnType=Ext).collect"]) 100 | 101 | t1 = time.time() 102 | dirs = 0 103 | files = 0 104 | symlinks = 0 105 | size = 0 106 | for entry in scantree(os.path.expanduser(dirName)): 107 | try: 108 | st = entry.stat() 109 | except Exception: 110 | continue 111 | if entry.is_dir(): 112 | dirs += 1 113 | elif entry.is_file(): 114 | files += 1 115 | elif entry.is_symlink(): 116 | symlinks += 1 117 | size += st.st_size 118 | dtScantree = time.time() - t1 119 | print(f"scantree (os.scandir): {dtScantree:.3f} {dirs=} {files=} {symlinks=} {size=}") 120 | table.append([f"{dtScantree:.3}", f"scantree (os.scandir, Python {pyVersion})"]) 121 | 122 | t1 = time.time() 123 | entries = Scandir(dirName).collect() 124 | dt = time.time() - t1 125 | print(f"Scandir.collect: {dt:.3f} {len(entries)}") 126 | table.append([f"{dt:.3}", "Scandir.collect"]) 127 | 128 | t1 = time.time() 129 | instance = Scandir(dirName) 130 | cnt = 0 131 | for entry in instance: 132 | cnt += 1 133 | dtScandirIter = time.time() - t1 134 | print(f"Scandir.iter: {dtScandirIter:.3f} {cnt}") 135 | table.append([f"{dtScandirIter:.3}", "Scandir.iter"]) 136 | 137 | t1 = time.time() 138 | instance = Scandir(dirName) 139 | toc = instance.collect() 140 | dt = time.time() - t1 141 | print(f"Scandir.collect: {dt:.3f} {len(toc)} Scandir().duration={instance.duration}") 142 | 143 | t1 = time.time() 144 | entries = Scandir(dirName, return_type=ReturnType.Ext).collect() 145 | dt = time.time() - t1 146 | print(f"Scandir(ReturnType=Ext).collect: {dt:.3f} {len(entries)}") 147 | table.append([f"{dt:.3}", "Scandir(ReturnType=Ext).collect"]) 148 | 149 | uname = platform.uname() 150 | print(f"\n{uname.system} {uname.machine} (kernel={uname.release})") 151 | print("Physical cores:", psutil.cpu_count(logical=False)) 152 | print("Total cores:", psutil.cpu_count(logical=True)) 153 | cpufreq = psutil.cpu_freq() 154 | print(f"Max Frequency: {cpufreq.max:.2f}Mhz") 155 | disk = GetDiskInfo() 156 | print(f"Disk: {disk[0]} ({disk[1]}, {disk[2]})") 157 | print() 158 | print(tabulate(table, headers=["Time [s]", "Method"], tablefmt="github")) 159 | print() 160 | print(f"Walk.iter **~{dtOsWalk / dtWalkIter:.1f} times faster** than os.walk.") 161 | print( 162 | f"Scandir.iter **~{dtScantree / dtScandirIter:.1f} times faster** than scantree(os.scandir)." 163 | ) 164 | -------------------------------------------------------------------------------- /scandir/doc/walk.md: -------------------------------------------------------------------------------- 1 | # The API of class `Walk` 2 | 3 | ## Toc 4 | 5 | The `Toc` class is the return value of class method `results` and `collect` of class `Walk`. 6 | 7 | ### `Toc` has following class members 8 | 9 | - `dirs` list of directory names. 10 | - `files` list of filenames. 11 | - `symlinks` list of symlink names. 12 | - `other` list of names of all other entry types. 13 | - `errors` list of access errors (list of strings). 14 | 15 | ## `Walk::new>(root_path: P, store: Option) -> Result` 16 | 17 | Creates a class instance for getting the file tree. The class instance initially does nothing. 18 | To start the scan either the method `start` or the method `collect` has to be called. 19 | 20 | ### Class members 21 | 22 | - `root_path` is directory to scan. `~` is allowed on Unix systems. 23 | - `sorted` if `true` alphabetically sort results. 24 | - `skip_hidden` if `true` then ignore all hidden files and directories. 25 | - `max_depth` is maximum depth of iteration. If `0` then depth limit is disabled. 26 | - `dir_include` list of patterns for directories to include. 27 | - `dir_exclude` list of patterns for directories to exclude. 28 | - `file_include` list of patterns for files to include. 29 | - `file_exclude` list of patterns for files to exclude. 30 | - `case_sensitive` if `true` then do case sensitive pattern matching. 31 | - `return_type` defines type of data returned. 32 | - `store` store results in local structure. 33 | 34 | For valid file patterns see module [glob](https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). 35 | 36 | ### Return types 37 | 38 | - `ReturnType::Base` return `dirs` and `files` as `os.walk` does. 39 | - `ReturnType::Ext` return additional data: `symlinks`, `other` and `errors`. 40 | 41 | **Please note:** 42 | > Due to limitations of jwalk the returned errors just contain the error message without 43 | any information to which files the errors correspond to. 44 | 45 | ### `sorted(mut self, sorted: bool) -> Self` 46 | 47 | Return results in sorted order. 48 | 49 | ### `skip_hidden(mut self, skip_hidden: bool) -> Self` 50 | 51 | Set to `true` to skip hidden (starting with a dot) files. 52 | 53 | ### `max_depth(mut self, depth: usize) -> Self` 54 | 55 | Set the maximum depth of entries yield by the iterator. 56 | 57 | ### `max_file_cnt(mut self, max_file_cnt: usize) -> Self` 58 | 59 | Set maximum number of files to collect. 60 | 61 | ### `dir_include(mut self, dir_include: Option>) -> Self` 62 | 63 | Set directory include filter. 64 | 65 | ### `dir_exclude(mut self, dir_exclude: Option>) -> Self` 66 | 67 | Set directory exclude filter. 68 | 69 | ### `file_include(mut self, file_include: Option>) -> Self` 70 | 71 | Set file include filter. 72 | 73 | ### `file_exclude(mut self, file_exclude: Option>) -> Self` 74 | 75 | Set file exclude filter. 76 | 77 | ### `case_sensitive(mut self, case_sensitive: bool) -> Self` 78 | 79 | Set case sensitive filename filtering. 80 | 81 | ### `return_type(mut self, return_type: ReturnType) -> Self` 82 | 83 | Set extended file type counting. 84 | 85 | ### `clear(&mut self)` 86 | 87 | Clear all results. 88 | 89 | ### `start(&mut self) -> Result<(), Error>` 90 | 91 | Start parsing the directory tree in background. Raises an exception if a task is already running. 92 | 93 | ### `join(&mut self) -> bool` 94 | 95 | Wait for parsing task to finish. 96 | 97 | ### `stop(&mut self) -> bool` 98 | 99 | Stop parsing task. 100 | 101 | ### `collect(&mut self) -> Result` 102 | 103 | Calculate statistics and return a `Toc` object when the task has finished. This method is blocking. 104 | 105 | ### `has_results(&mut self, only_new: bool) -> bool` 106 | 107 | If `only_new` is `true` this method returns `true` if new results are available, 108 | If `only_new` is `false` this method returns `true` if results are available, 109 | 110 | ### `results_cnt(&mut self, only_new: bool) -> usize` 111 | 112 | If `only_new` is `true` this method returns number of new results, 113 | If `only_new` is `false` this method returns number of total results, 114 | 115 | ### `results(&mut self, only_new: bool) -> Vec<(String, Toc)>` 116 | 117 | If `only_new` is `true` this method returns new results, 118 | If `only_new` is `false` this method returns total results, 119 | 120 | ### `has_errors(&mut self) -> bool` 121 | 122 | Returns `true` if errors occured while scanning the directory tree. The errors can be found 123 | in the statistics object. 124 | 125 | ### `errors_cnt(&mut self) -> usize` 126 | 127 | Returns number of errors occured while scanning the file tree. 128 | 129 | ### `errors(&mut self, only_new: bool) -> ErrorsType` 130 | 131 | Returns the errors. 132 | 133 | ### `to_speedy(&self) -> Result, speedy::Error>` 134 | 135 | Returns the results serialized with `speedy`. 136 | For this method the feature `speedy` needs to be enabled. 137 | 138 | ### `to_bincode(&self) -> bincode::Result>` 139 | 140 | Returns the results serialized with `bincode`. 141 | For this method the feature `bincode` needs to be enabled. 142 | 143 | ### `to_json(&self) -> serde_json::Result` 144 | 145 | Returns the results serialized as `json`. 146 | For this method the feature `json` needs to be enabled. 147 | 148 | ### `statistics(&self) -> Statistics` 149 | 150 | Returns the statistics of the results. Please note that file types `other` are 151 | counted in member `devices`. 152 | 153 | ### `duration(&mut self) -> f64` 154 | 155 | Returns the duration of the task in seconds as float. As long as the task is running it will 156 | return 0. 157 | 158 | ### `finished(&self) -> bool` 159 | 160 | Returns `true` after the task has finished. 161 | 162 | ### `busy(&self) -> bool` 163 | 164 | Returns `true` while a task is running. 165 | -------------------------------------------------------------------------------- /scandir/benches/walk.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(windows, feature(windows_by_handle))] 2 | 3 | #[cfg(unix)] 4 | use std::os::unix::fs::MetadataExt; 5 | #[cfg(windows)] 6 | use std::os::windows::fs::MetadataExt; 7 | use std::{fs, path::Path, time::Duration}; 8 | 9 | #[cfg(windows)] 10 | use std::path::PathBuf; 11 | 12 | use criterion::{Criterion, criterion_group, criterion_main}; 13 | 14 | #[cfg(unix)] 15 | #[derive(Debug, Clone)] 16 | pub struct MetaDataExt { 17 | pub st_mode: u32, 18 | pub st_ino: u64, 19 | pub st_dev: u64, 20 | pub st_nlink: u64, 21 | pub st_blksize: u64, 22 | pub st_blocks: u64, 23 | pub st_uid: u32, 24 | pub st_gid: u32, 25 | pub st_rdev: u64, 26 | } 27 | 28 | #[cfg(windows)] 29 | #[derive(Debug, Clone)] 30 | pub struct MetaDataExt { 31 | pub file_attributes: u32, 32 | pub volume_serial_number: Option, 33 | pub number_of_links: Option, 34 | pub file_index: Option, 35 | } 36 | 37 | #[inline] 38 | pub fn get_metadata_ext(metadata: &fs::Metadata) -> MetaDataExt { 39 | #[cfg(unix)] 40 | { 41 | MetaDataExt { 42 | st_mode: metadata.mode(), 43 | st_ino: metadata.ino(), 44 | st_dev: metadata.dev(), 45 | st_nlink: metadata.nlink(), 46 | st_blksize: metadata.blksize(), 47 | st_blocks: metadata.blocks(), 48 | st_uid: metadata.uid(), 49 | st_gid: metadata.gid(), 50 | st_rdev: metadata.rdev(), 51 | } 52 | } 53 | #[cfg(windows)] 54 | { 55 | MetaDataExt { 56 | file_attributes: metadata.file_attributes(), 57 | volume_serial_number: metadata.volume_serial_number(), 58 | number_of_links: metadata.number_of_links(), 59 | file_index: metadata.file_index(), 60 | } 61 | } 62 | } 63 | 64 | fn create_test_data() -> String { 65 | let temp_dir; 66 | let linux_dir; 67 | let kernel_path; 68 | #[cfg(unix)] 69 | { 70 | temp_dir = expanduser::expanduser("~/Rust/_Data/benches").unwrap(); 71 | linux_dir = expanduser::expanduser("~/Rust/_Data/benches/linux-5.9").unwrap(); 72 | kernel_path = expanduser::expanduser("~/Rust/_Data/benches/linux-5.9.tar.gz").unwrap(); 73 | } 74 | #[cfg(windows)] 75 | { 76 | temp_dir = PathBuf::from("C:/Workspace/benches"); 77 | linux_dir = PathBuf::from("C:/Workspace/benches/linux-5.9"); 78 | kernel_path = PathBuf::from("C:/Workspace/benches/linux-5.9.tar.gz"); 79 | } 80 | if !temp_dir.exists() { 81 | std::fs::create_dir_all(&temp_dir).unwrap(); 82 | } 83 | if !kernel_path.exists() { 84 | // Download kernel 85 | println!("Downloading linux-5.9.tar.gz..."); 86 | let resp = 87 | reqwest::blocking::get("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.9.tar.gz") 88 | .expect("request failed"); 89 | let body = resp.text().expect("body invalid"); 90 | let mut out = std::fs::File::create(&kernel_path).expect("failed to create file"); 91 | std::io::copy(&mut body.as_bytes(), &mut out).expect("failed to copy content"); 92 | } 93 | if !linux_dir.exists() { 94 | println!("Extracting linux-5.9.tar.gz..."); 95 | let tar_gz = std::fs::File::open(&kernel_path).unwrap(); 96 | let tar = flate2::read::GzDecoder::new(tar_gz); 97 | let mut archive = tar::Archive::new(tar); 98 | archive.unpack(&linux_dir).unwrap(); 99 | } 100 | linux_dir.to_str().unwrap().to_string() 101 | } 102 | 103 | fn benchmark_dir(c: &mut Criterion, path: &str) { 104 | println!("Running benchmarks for {path}..."); 105 | let dir = Path::new(path).file_name().unwrap().to_str().unwrap(); 106 | let mut group = c.benchmark_group(format!("Walk {dir}")); 107 | group.measurement_time(Duration::from_secs(30)); 108 | group.sample_size(20); 109 | group.bench_function("walkdir.WalkDir", |b| { 110 | b.iter(|| { 111 | let _ = walkdir::WalkDir::new(path).into_iter().collect::>(); 112 | }) 113 | }); 114 | group.bench_function("walkdir.WalkDir(Ext)", |b| { 115 | b.iter(|| { 116 | let _ = walkdir::WalkDir::new(path) 117 | .into_iter() 118 | .map(|result| match result { 119 | Ok(entry) => { 120 | if let Ok(metadata) = fs::metadata(entry.path()) { 121 | Ok((entry.metadata().unwrap(), Some(get_metadata_ext(&metadata)))) 122 | } else { 123 | Ok((entry.metadata().unwrap(), None)) 124 | } 125 | } 126 | Err(e) => Err(e), 127 | }) 128 | .collect::>(); 129 | }) 130 | }); 131 | group.bench_function("scandir.Walk (collect)", |b| { 132 | b.iter(|| { 133 | let mut instance = scandir::Walk::new(path, Some(true)) 134 | .unwrap_or_else(|_| panic!("Failed to create Walk instance for {path}")); 135 | instance.collect().unwrap(); 136 | }) 137 | }); 138 | group.bench_function("scandir.Walk(Ext) (collect)", |b| { 139 | b.iter(|| { 140 | let mut instance = scandir::Walk::new(path, Some(true)) 141 | .unwrap_or_else(|_| panic!("Failed to create Walk instance for {path}")) 142 | .return_type(scandir::ReturnType::Ext); 143 | instance.collect().unwrap(); 144 | }) 145 | }); 146 | group.finish(); 147 | } 148 | 149 | fn benchmarks(c: &mut Criterion) { 150 | benchmark_dir(c, &create_test_data()); 151 | #[cfg(unix)] 152 | let path = "/usr"; 153 | #[cfg(windows)] 154 | let path = "C:/Windows"; 155 | benchmark_dir(c, path); 156 | } 157 | 158 | criterion_group!(benches, benchmarks); 159 | criterion_main!(benches); 160 | -------------------------------------------------------------------------------- /scandir/benches/scandir.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(windows, feature(windows_by_handle))] 2 | 3 | use std::fs; 4 | #[cfg(unix)] 5 | use std::os::unix::fs::MetadataExt; 6 | #[cfg(windows)] 7 | use std::os::windows::fs::MetadataExt; 8 | use std::path::Path; 9 | use std::time::Duration; 10 | 11 | #[cfg(windows)] 12 | use std::path::PathBuf; 13 | 14 | use criterion::{Criterion, criterion_group, criterion_main}; 15 | 16 | #[cfg(unix)] 17 | #[derive(Debug, Clone)] 18 | pub struct MetaDataExt { 19 | pub st_mode: u32, 20 | pub st_ino: u64, 21 | pub st_dev: u64, 22 | pub st_nlink: u64, 23 | pub st_blksize: u64, 24 | pub st_blocks: u64, 25 | pub st_uid: u32, 26 | pub st_gid: u32, 27 | pub st_rdev: u64, 28 | } 29 | 30 | #[cfg(windows)] 31 | #[derive(Debug, Clone)] 32 | pub struct MetaDataExt { 33 | pub file_attributes: u32, 34 | pub volume_serial_number: Option, 35 | pub number_of_links: Option, 36 | pub file_index: Option, 37 | } 38 | 39 | #[inline] 40 | pub fn get_metadata_ext(metadata: &fs::Metadata) -> MetaDataExt { 41 | #[cfg(unix)] 42 | { 43 | MetaDataExt { 44 | st_mode: metadata.mode(), 45 | st_ino: metadata.ino(), 46 | st_dev: metadata.dev(), 47 | st_nlink: metadata.nlink(), 48 | st_blksize: metadata.blksize(), 49 | st_blocks: metadata.blocks(), 50 | st_uid: metadata.uid(), 51 | st_gid: metadata.gid(), 52 | st_rdev: metadata.rdev(), 53 | } 54 | } 55 | #[cfg(windows)] 56 | { 57 | MetaDataExt { 58 | file_attributes: metadata.file_attributes(), 59 | volume_serial_number: metadata.volume_serial_number(), 60 | number_of_links: metadata.number_of_links(), 61 | file_index: metadata.file_index(), 62 | } 63 | } 64 | } 65 | 66 | fn create_test_data() -> String { 67 | let temp_dir; 68 | let linux_dir; 69 | let kernel_path; 70 | #[cfg(unix)] 71 | { 72 | temp_dir = expanduser::expanduser("~/Rust/_Data/benches").unwrap(); 73 | linux_dir = expanduser::expanduser("~/Rust/_Data/benches/linux-5.9").unwrap(); 74 | kernel_path = expanduser::expanduser("~/Rust/_Data/benches/linux-5.9.tar.gz").unwrap(); 75 | } 76 | #[cfg(windows)] 77 | { 78 | temp_dir = PathBuf::from("C:/Workspace/benches"); 79 | linux_dir = PathBuf::from("C:/Workspace/benches/linux-5.9"); 80 | kernel_path = PathBuf::from("C:/Workspace/benches/linux-5.9.tar.gz"); 81 | } 82 | if !temp_dir.exists() { 83 | std::fs::create_dir_all(&temp_dir).unwrap(); 84 | } 85 | if !kernel_path.exists() { 86 | // Download kernel 87 | println!("Downloading linux-5.9.tar.gz..."); 88 | let resp = 89 | reqwest::blocking::get("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.9.tar.gz") 90 | .expect("request failed"); 91 | let body = resp.text().expect("body invalid"); 92 | let mut out = std::fs::File::create(&kernel_path).expect("failed to create file"); 93 | std::io::copy(&mut body.as_bytes(), &mut out).expect("failed to copy content"); 94 | } 95 | if !linux_dir.exists() { 96 | println!("Extracting linux-5.9.tar.gz..."); 97 | let tar_gz = std::fs::File::open(&kernel_path).unwrap(); 98 | let tar = flate2::read::GzDecoder::new(tar_gz); 99 | let mut archive = tar::Archive::new(tar); 100 | archive.unpack(&linux_dir).unwrap(); 101 | } 102 | linux_dir.to_str().unwrap().to_string() 103 | } 104 | 105 | fn benchmark_dir(c: &mut Criterion, path: &str) { 106 | println!("Running benchmarks for {path}..."); 107 | let dir = Path::new(path).file_name().unwrap().to_str().unwrap(); 108 | let mut group = c.benchmark_group(format!("Scandir {dir}")); 109 | group.measurement_time(Duration::from_secs(60)); 110 | group.sample_size(20); 111 | group.bench_function("scan_dir.ScanDir", |b| { 112 | b.iter(|| { 113 | let mut entries = Vec::new(); 114 | let _ = scan_dir::ScanDir::all().walk(path, |iter| { 115 | for (entry, _name) in iter { 116 | entries.push(entry.metadata().unwrap()); 117 | } 118 | }); 119 | }) 120 | }); 121 | group.bench_function("scan_dir.ScanDir(Ext)", |b| { 122 | b.iter(|| { 123 | let mut entries = Vec::new(); 124 | let _ = scan_dir::ScanDir::all().walk(path, |iter| { 125 | for (entry, _name) in iter { 126 | if let Ok(metadata) = fs::metadata(entry.path()) { 127 | entries 128 | .push((entry.metadata().unwrap(), Some(get_metadata_ext(&metadata)))); 129 | } else { 130 | entries.push((entry.metadata().unwrap(), None)); 131 | } 132 | } 133 | }); 134 | }) 135 | }); 136 | group.bench_function("scandir.Scandir (collect)", |b| { 137 | b.iter(|| { 138 | let mut instance = scandir::Scandir::new(path, Some(true)) 139 | .unwrap_or_else(|_| panic!("Failed to create Scandir instance for {path}")); 140 | instance.collect().unwrap(); 141 | }) 142 | }); 143 | group.bench_function("scandir.Scandir(Ext) (collect)", |b| { 144 | b.iter(|| { 145 | let mut instance = scandir::Scandir::new(path, Some(true)) 146 | .unwrap_or_else(|_| panic!("Failed to create Scandir instance for {path}")) 147 | .return_type(scandir::ReturnType::Ext); 148 | instance.collect().unwrap(); 149 | }) 150 | }); 151 | group.finish(); 152 | } 153 | 154 | fn benchmarks(c: &mut Criterion) { 155 | benchmark_dir(c, &create_test_data()); 156 | #[cfg(unix)] 157 | let path = "/usr"; 158 | #[cfg(windows)] 159 | let path = "C:/Windows"; 160 | benchmark_dir(c, path); 161 | } 162 | 163 | criterion_group!(benches, benchmarks); 164 | criterion_main!(benches); 165 | -------------------------------------------------------------------------------- /scandir/doc/benchmarks.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | Benchmarking code see [benches/benchmark.rs](../benches/benchmark.rs) 4 | 5 | **(Ext)** means reading extended metadata to be able to identify hardlinks and special file types, 6 | like pipes, devices. 7 | 8 | ## Linux with Tower Ryzen 5 2400G @ 3.6GHz (4/8 cores) and Samsung SSD 960 EVO 250GB (NVME, EXT4) 9 | 10 | ### Directory linux-5.9 with 11 | 12 | - 4711 directories 13 | - 69973 files 14 | - 38 symlinks 15 | - 1.08GB size and 1.23GB usage on disk 16 | 17 | #### Count linux-5.9 18 | 19 | | Time [s] | Method | 20 | |------------|--------------------------------------| 21 | | 0.046 | Count.collect | 22 | | 0.081 | Count.collect(Ext) | 23 | 24 | #### Walk linux-5.9 25 | 26 | | Time [s] | Method | 27 | |---------------|-----------------------------------| 28 | | 0.082 | walkdir.WalkDir | 29 | | 0.056 | Walk.collect | 30 | | 0.462 | walkdir.WalkDir(Ext) | 31 | | 0.055 | Walk.collect(Ext) | 32 | 33 | Walk.collect **~1.5 times faster** than walkdir.WalkDir. 34 | Walk.collect(Ext) **~8.4 times faster** than walkdir.WalkDir(Ext). 35 | 36 | ![images/linux_walk_linux-5.9.png](images/linux_walk_linux-5.9.png) 37 | 38 | #### Scandir linux-5.9 39 | 40 | | Time [s] | Method | 41 | |---------------|-----------------------------------| 42 | | 0.199 | scan_dir.ScanDir | 43 | | 0.073 | Scandir.collect | 44 | | 0.383 | scan_dir.ScanDir(Ext) | 45 | | 0.116 | Scandir.collect(Ext) | 46 | 47 | Scandir.collect **~2.7 times faster** than scan_dir.ScanDir. 48 | Scandir.collect(Ext) **~3.3 times faster** than scan_dir.ScanDir(Ext). 49 | 50 | ![images/linux_scandir_linux-5.9.png](images/linux_scandir_linux-5.9.png) 51 | 52 | ### Directory /usr with 53 | 54 | - 45060 directories 55 | - 388518 files 56 | - 34937 symlinks 57 | - 177 hardlinks 58 | - 0 devices 59 | - 0 pipes 60 | - 23.16GB size and 24.02GB usage on disk 61 | 62 | #### Count /usr 63 | 64 | | Time [s] | Method | 65 | |------------|--------------------------------------| 66 | | 0.306 | Count.collect | 67 | | 0.515 | Count.collect(Ext) | 68 | 69 | #### Walk /usr 70 | 71 | | Time [s] | Method | 72 | |------------|--------------------------------------| 73 | | 0.671 | walkdir.WalkDir | 74 | | 0.405 | Walk.collect | 75 | | 2.829 | walkdir.WalkDir(Ext) | 76 | | 0.404 | Walk.collect(Ext) | 77 | 78 | Walk.collect **~1.7 times faster** than walkdir.WalkDir. 79 | Walk.collect(Ext) **~7.0 times faster** than walkdir.WalkDir(Ext). 80 | 81 | ![images/linux_walk_usr.png](images/linux_walk_usr.png) 82 | 83 | #### Scandir /usr 84 | 85 | | Time [s] | Method | 86 | |------------|--------------------------------------| 87 | | 1.474 | scan_dir.ScanDir | 88 | | 0.615 | Scandir.collect | 89 | | 2.575 | scan_dir.ScanDir(Ext) | 90 | | 0.822 | Scandir.collect(Ext) | 91 | 92 | Scandir.collect **~2.4 times faster** than scan_dir.ScanDir. 93 | Scandir.collect(Ext) **~3.1 times faster** than scan_dir.ScanDir(Ext). 94 | 95 | ![images/linux_scandir_usr.png](images/linux_scandir_usr.png) 96 | 97 | ## Windows 10, Laptop Core i7-11850H @ 2.5GHz (8/16 cores), Samsung MZVLB1T0HBLR-000H1 (NVME, NTFS) 98 | 99 | ### Directory linux-5.9 with 100 | 101 | - 4712 directories 102 | - 69998 files 103 | - 1.08GB size and 1.23GB usage on disk 104 | 105 | #### Count linux-5.9 106 | 107 | | Time [s] | Method | 108 | |------------|--------------------------------------| 109 | | 0.070 | Count.collect | 110 | | 0.826 | Count.collect(Ext) | 111 | 112 | #### Walk linux-5.9 113 | 114 | | Time [s] | Method | 115 | |---------------|-----------------------------------| 116 | | 0.456 | walkdir.WalkDir | 117 | | 0.100 | Walk.collect | 118 | | 4.343 | walkdir.WalkDir(Ext) | 119 | | 0.103 | Walk.collect(Ext) | 120 | 121 | Walk.collect **~4.6 times faster** than walkdir.WalkDir. 122 | Walk.collect(Ext) **~42.2 times faster** than walkdir.WalkDir(Ext). 123 | 124 | ![images/windows_walk_linux-5.9.png](images/windows_walk_linux-5.9.png) 125 | 126 | #### Scandir linux-5.9 127 | 128 | | Time [s] | Method | 129 | |---------------|-----------------------------------| 130 | | 0.707 | scan_dir.ScanDir | 131 | | 0.107 | Scandir.collect | 132 | | 7.483 | scan_dir.ScanDir(Ext) | 133 | | 0.864 | Scandir.collect(Ext) | 134 | 135 | Scandir.collect **~6.6 times faster** than scan_dir.ScanDir. 136 | Scandir.collect(Ext) **~8.7 times faster** than scan_dir.ScanDir(Ext). 137 | 138 | ![images/windows_scandir_linux-5.9.png](images/windows_scandir_linux-5.9.png) 139 | 140 | ### Directory C:\Windows with 141 | 142 | - 165926 directories 143 | - 316866 files 144 | - 35364 hardlinks 145 | - 39.68GB size and 40.53GB usage on disk 146 | 147 | #### Count C:\Windows 148 | 149 | | Time [s] | Method | 150 | |------------|--------------------------------------| 151 | | 3.018 | Count.collect | 152 | | 11.622 | Count.collect(Ext) | 153 | 154 | #### Walk C:\Windows 155 | 156 | | Time [s] | Method | 157 | |---------------|-----------------------------------| 158 | | 15.512 | walkdir.WalkDir | 159 | | 0.046 | Walk.collect | 160 | | 0.090 | walkdir.WalkDir(Ext) | 161 | | 0.961 | Walk.collect(Ext) | 162 | 163 | Walk.collect **~5.0 times faster** than walkdir.WalkDir. 164 | Walk.collect(Ext) **~5.2 times faster** than walkdir.WalkDir. 165 | 166 | ![images/windows_walk_windows.png](images/windows_walk_windows.png) 167 | 168 | #### Scandir C:\Windows 169 | 170 | | Time [s] | Method | 171 | |---------------|-----------------------------------| 172 | | 16.818 | scan_dir.ScanDir | 173 | | 2.999 | Scandir.collect | 174 | | 47.740 | scan_dir.ScanDir(Ext) | 175 | | 10.632 | Scandir.collect(Ext) | 176 | 177 | Scandir.collect **~5.6 times faster** than scan_dir.ScanDir. 178 | Scandir.collect(Ext) **~4.5 times faster** than scan_dir.ScanDir. 179 | 180 | ![images/windows_scandir_windows.png](images/windows_scandir_windows.png) 181 | -------------------------------------------------------------------------------- /pyscandir/doc/benchmarks.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | Benchmarking code see [benches/benchmark.py](../benches/benchmark.py) 4 | 5 | In the below table the line **Walk.iter** returns comparable 6 | results to os.walk. 7 | 8 | ## Linux with Tower Ryzen 5 2400G @ 3.6GHz (4/8 cores) and Samsung SSD 960 EVO 250GB (NVME, EXT4) 9 | 10 | ### Directory linux-5.9 with 11 | 12 | - 4711 directories 13 | - 69973 files 14 | - 38 symlinks 15 | - 1.08GB size and 1.23GB usage on disk 16 | 17 | #### Count 18 | 19 | | Time [s] | Method | 20 | |------------|-------------------------------| 21 | | 0.016 | Count.collect | 22 | | 0.026 | Count(Ext).collect | 23 | 24 | #### Walk 25 | 26 | | Time [s] | Method | 27 | |------------|--------------------------------| 28 | | 0.149 | os.walk (Python 3.12.3) | 29 | | 0.044 | Walk.iter | 30 | | 0.066 | Walk.collect | 31 | | 0.531 | os.walk(Ext) (Python 3.12.3) | 32 | | 0.047 | Walk(Ext).iter | 33 | | 0.065 | Walk(Ext).collect | 34 | 35 | Walk.iter **~3.3 times faster** than os.walk. 36 | Walk(Ext).iter **~12.1 times faster** than os.walk(Ext). 37 | 38 | ![Linux Walk Linux 5 image](images/linux_walk_linux-5.9.png) 39 | 40 | #### Scandir 41 | 42 | | Time [s] | Method | 43 | |------------|--------------------------------------| 44 | | 0.442 | scantree (os.scandir, Python 3.12.3) | 45 | | 0.067 | Scandir.iter | 46 | | 0.084 | Scandir.collect | 47 | | 0.101 | Scandir(Ext).iter | 48 | | 0.122 | Scandir(Ext).collect | 49 | 50 | Scandir.iter **~5.5 times faster** than scantree(os.scandir). 51 | Scandir(Ext).iter **~4.1 times faster** than scantree(os.scandir). 52 | 53 | ![Linux Scandir Linux 5 image](images/linux_scandir_linux-5.9.png) 54 | 55 | ### Directory /usr with 56 | 57 | - 45061 directories 58 | - 388526 files 59 | - 34937 symlinks 60 | - 177 hardlinks 61 | - 0 devices 62 | - 0 pipes 63 | - 23.16GB size and 24.03GB usage on disk 64 | 65 | #### Count 66 | 67 | | Time [s] | Method | 68 | |------------|-------------------------------| 69 | | 0.104 | Count.collect | 70 | | 0.165 | Count(Ext).collect | 71 | 72 | #### Walk 73 | 74 | | Time [s] | Method | 75 | |------------|--------------------------------| 76 | | 1.340 | os.walk (Python 3.12.3) | 77 | | 0.271 | Walk.iter | 78 | | 0.444 | Walk.collect | 79 | | 3.773 | os.walk(Ext) (Python 3.12.3) | 80 | | 0.278 | Walk(Ext).iter | 81 | | 0.439 | Walk(Ext).collect | 82 | 83 | Walk.iter **~4.9 times faster** than os.walk. 84 | Walk(Ext).iter **~13.0 times faster** than os.walk(Ext). 85 | 86 | ![Linux Walk Usr image](images/linux_walk_usr.png) 87 | 88 | #### Scandir 89 | 90 | | Time [s] | Method | 91 | |------------|--------------------------------------| 92 | | 2.785 | scantree (os.scandir, Python 3.12.3) | 93 | | 0.430 | Scandir.iter | 94 | | 0.668 | Scandir.collect | 95 | | 0.596 | Scandir(Ext).iter | 96 | | 0.874 | Scandir(Ext).collect | 97 | 98 | Scandir.iter **~6.5 times faster** than scantree(os.scandir). 99 | Scandir(Ext).iter **~4.7 times faster** than scantree(os.scandir). 100 | 101 | ![Linux Scandir Usr image](images/linux_scandir_usr.png) 102 | 103 | ## Windows 10 with Laptop Core i7-11850H @ 2.5GHz (8/16 cores) and Samsung MZVLB1T0HBLR-000H1 (NTFS) 104 | 105 | ### Directory linux-5.9 with 106 | 107 | - 4712 directories 108 | - 69998 files 109 | - 1.08GB size and 1.23GB usage on disk 110 | 111 | #### Count 112 | 113 | | Time [s] | Method | 114 | |------------|--------------------| 115 | | 0.027 | Count.collect | 116 | | 0.276 | Count(Ext).collect | 117 | 118 | #### Walk 119 | 120 | | Time [s] | Method | 121 | |------------|------------------------------| 122 | | 0.771 | os.walk (Python 3.12.3) | 123 | | 0.092 | Walk.iter | 124 | | 0.128 | Walk.collect | 125 | | 6.289 | os.walk(Ext) (Python 3.12.3) | 126 | | 0.090 | Walk(Ext).iter | 127 | | 0.124 | Walk(Ext).collect | 128 | 129 | Walk.iter **~8.4 times faster** than os.walk. 130 | Walk(Ext).iter **~69.8 times faster** than os.walk(Ext). 131 | 132 | ![Windows Walk Linux 5 image](images/windows_walk_linux-5.9.png) 133 | 134 | #### Scandir 135 | 136 | | Time [s] | Method | 137 | |------------|--------------------------------------| 138 | | 0.611 | scantree (os.scandir, Python 3.12.3) | 139 | | 0.094 | Scandir.iter | 140 | | 0.132 | Scandir.collect | 141 | | 0.860 | Scandir(Ext).iter | 142 | | 0.892 | Scandir(Ext).collect | 143 | 144 | Scandir.iter **~6.5 times faster** than scantree(os.scandir). 145 | Scandir(Ext).iter **slower** than scantree(os.scandir). **TODO:** Needs investigation why. 146 | 147 | ![Windows Scandir Linux 5 image](images/windows_scandir_linux-5.9.png) 148 | 149 | ### Directory C:\Windows with 150 | 151 | - 212836 directories 152 | - 428834 files 153 | - 37428 hardlinks 154 | - 42.77GB size and 43.91GB usage on disk 155 | 156 | #### Count 157 | 158 | | Time [s] | Method | 159 | |------------|--------------------| 160 | | 1.441 | Count.collect | 161 | | 4.670 | Count(Ext).collect | 162 | 163 | #### Walk 164 | 165 | | Time [s] | Method | 166 | |------------|------------------------------| 167 | | 36.255 | os.walk (Python 3.12.3) | 168 | | 4.276 | Walk.iter | 169 | | 5.366 | Walk.collect | 170 | | 89.770 | os.walk(Ext) (Python 3.12.3) | 171 | | 4.457 | Walk(Ext).iter | 172 | | 5.680 | Walk(Ext).collect | 173 | 174 | Walk.iter **~8.5 times faster** than os.walk. 175 | Walk(Ext).iter **~20.1 times faster** than os.walk(Ext). 176 | 177 | ![Windows Walk Windows image](images/windows_walk_windows.png) 178 | 179 | #### Scandir 180 | 181 | | Time [s] | Method | 182 | |------------|--------------------------------------| 183 | | 24.700 | scantree (os.scandir, Python 3.12.3) | 184 | | 4.245 | Scandir.iter | 185 | | 4.713 | Scandir.collect | 186 | | 14.060 | Scandir(Ext).iter | 187 | | 14.566 | Scandir(Ext).collect | 188 | 189 | Scandir.iter **~5.8 times faster** than scantree(os.scandir). 190 | Scandir(Ext).iter **~1.8 times faster** than scantree(os.scandir). 191 | 192 | ![Windows Scandir Windows image](images/windows_scandir_windows.png) 193 | -------------------------------------------------------------------------------- /scandir/doc/scandir.md: -------------------------------------------------------------------------------- 1 | # The API of class `Scandir` 2 | 3 | ## ScandirResult 4 | 5 | Is an enum which can be: 6 | 7 | `DirEntry` 8 | `DirEntryExt` 9 | 10 | ## DirEntry 11 | 12 | - `path` relative path 13 | - `is_symlink` `True` is entry is a symbolic link. 14 | - `is_dir` `True` is entry is a directory. 15 | - `is_file` `True` is entry is a file. 16 | - `st_ctime` creation time in seconds as float. 17 | - `st_mtime` modification time in seconds as float. 18 | - `st_atime` access time in seconds as float. 19 | - `st_size` size of entry. 20 | 21 | ## DirEntryExt 22 | 23 | - `is_symlink` `True` is entry is a symbolic link. 24 | - `is_dir` `True` is entry is a directory. 25 | - `is_file` `True` is entry is a file. 26 | - `st_ctime` creation time in seconds as float. 27 | - `st_mtime` modification time in seconds as float. 28 | - `st_atime` access time in seconds as float. 29 | - `st_mode` file access mode / rights. 30 | - `st_ino` inode number (only for Unix). 31 | - `st_dev` device number (only for Unix). 32 | - `st_nlink` number of hard links. 33 | - `st_size` size of entry. 34 | - `st_blksize` block size of file system. 35 | - `st_blocks` number of blocks used. 36 | - `st_uid` user id (only for Unix). 37 | - `st_gid` groud id (only for Unix). 38 | - `st_rdev` device number (for character and block devices on Unix). 39 | 40 | ## `Scandir::new>(root_path: P, store: Option) -> Result` 41 | 42 | Creates a class instance for getting the metadata of the entries of a file tree. 43 | The class instance initially does nothing. To start the scan either the method `start` 44 | or the method `collect` has to be called. 45 | 46 | ### Class members 47 | 48 | - `root_path` is directory to scan. `~` is allowed on Unix systems. 49 | - `sorted` if `true` alphabetically sort results. 50 | - `skip_hidden` if `true` ignore all hidden files and directories. 51 | - `metadata` if `true` also fetch some metadata. 52 | - `metadata_ext` if `true` also fetch extended metadata. 53 | - `max_depth` is maximum depth of iteration. If `0` then depth limit is disabled. 54 | - `dir_include` list of patterns for directories to include. 55 | - `dir_exclude` list of patterns for directories to exclude. 56 | - `file_include` list of patterns for files to include. 57 | - `file_exclude` list of patterns for files to exclude. 58 | - `case_sensitive` if `true` then do case sensitive pattern matching. 59 | - `return_type` defines type of data returned. 60 | - `store` store results in local structure. 61 | 62 | For valid file patterns see module [glob](https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). 63 | 64 | ### Return types 65 | 66 | - `ReturnType::Base` return `DirEntry` objects. 67 | - `ReturnType::Ext` return `DirEntryExt` objects. 68 | 69 | ### `sorted(mut self, sorted: bool) -> Self` 70 | 71 | Return results in sorted order. 72 | 73 | ### `skip_hidden(mut self, skip_hidden: bool) -> Self` 74 | 75 | Set to `true` to skip hidden (starting with a dot) files. 76 | 77 | ### `max_depth(mut self, depth: usize) -> Self` 78 | 79 | Set the maximum depth of entries yield by the iterator. 80 | 81 | ### `max_file_cnt(mut self, max_file_cnt: usize) -> Self` 82 | 83 | Set maximum number of files to collect. 84 | 85 | ### `dir_include(mut self, dir_include: Option>) -> Self` 86 | 87 | Set directory include filter. 88 | 89 | ### `dir_exclude(mut self, dir_exclude: Option>) -> Self` 90 | 91 | Set directory exclude filter. 92 | 93 | ### `file_include(mut self, file_include: Option>) -> Self` 94 | 95 | Set file include filter. 96 | 97 | ### `file_exclude(mut self, file_exclude: Option>) -> Self` 98 | 99 | Set file exclude filter. 100 | 101 | ### `case_sensitive(mut self, case_sensitive: bool) -> Self` 102 | 103 | Set case sensitive filename filtering. 104 | 105 | ### `return_type(mut self, return_type: ReturnType) -> Self` 106 | 107 | Set extended file type counting. 108 | 109 | ### `clear(&mut self)` 110 | 111 | Clear all results. 112 | 113 | ### `start(&mut self) -> Result<(), Error>` 114 | 115 | Start parsing the directory tree in background. Raises an exception if a task is already running. 116 | 117 | ### `join(&mut self) -> bool` 118 | 119 | Wait for parsing task to finish. 120 | 121 | ### `stop(&mut self) -> bool` 122 | 123 | Stop parsing task. 124 | 125 | ### `collect(&mut self) -> Result` 126 | 127 | Calculate statistics and return a `Toc` object when the task has finished. This method is blocking. 128 | 129 | ### `has_results(&mut self, only_new: bool) -> bool` 130 | 131 | If `only_new` is `true` this method returns `true` if new results are available, 132 | If `only_new` is `false` this method returns `true` if results are available, 133 | 134 | ### `results_cnt(&mut self, only_new: bool) -> usize` 135 | 136 | If `only_new` is `true` this method returns number of new results, 137 | If `only_new` is `false` this method returns number of total results, 138 | 139 | ### `results(&mut self, only_new: bool) -> ScandirResults` 140 | 141 | If `only_new` is `true` this method returns new results, 142 | If `only_new` is `false` this method returns total results, 143 | 144 | ### `has_entries(&mut self, only_new: bool) -> bool` 145 | 146 | If `only_new` is `true` this method returns `true` if new results are available, 147 | If `only_new` is `false` this method returns `true` if results are available, 148 | 149 | ### `entries_cnt(&mut self, only_new: bool) -> usize` 150 | 151 | If `only_new` is `true` this method returns number of new results, 152 | If `only_new` is `false` this method returns number of total results, 153 | 154 | ### `entries(&mut self, only_new: bool) -> Vec` 155 | 156 | If `only_new` is `true` this method returns new results, 157 | If `only_new` is `false` this method returns total results, 158 | 159 | ### `has_errors(&mut self) -> bool` 160 | 161 | Returns `true` if errors occured while scanning the file tree. 162 | 163 | ### `errors_cnt(&mut self) -> usize` 164 | 165 | Returns number of errors occured while scanning the file tree. 166 | 167 | ### `errors(&mut self, only_new: bool) -> ErrorsType` 168 | 169 | Returns the errors. 170 | 171 | ### `to_speedy(&self) -> Result, speedy::Error>` 172 | 173 | Returns the results serialized with `speedy`. 174 | For this method the feature `speedy` needs to be enabled. 175 | 176 | ### `to_bincode(&self) -> bincode::Result>` 177 | 178 | Returns the results serialized with `bincode`. 179 | For this method the feature `bincode` needs to be enabled. 180 | 181 | ### `to_json(&self) -> serde_json::Result` 182 | 183 | Returns the results serialized as `json`. 184 | For this method the feature `json` needs to be enabled. 185 | 186 | ### `statistics(&self) -> Statistics` 187 | 188 | Returns the statistics of the results. 189 | 190 | ### `duration(&mut self) -> f64` 191 | 192 | Returns the duration of the task in seconds as float. As long as the task is running it will 193 | return 0. 194 | 195 | ### `finished(&self) -> bool` 196 | 197 | Returns `true` after the task has finished. 198 | 199 | ### `busy(&self) -> bool` 200 | 201 | Returns `true` while a task is running. 202 | -------------------------------------------------------------------------------- /pyscandir/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [2.9.4] - 2025-04-16 9 | 10 | ### Changed 11 | 12 | - Update dependencies. 13 | 14 | ### Fixed 15 | 16 | - Fix compilation error when features are not enabled. 17 | 18 | ## [2.9.3] - 2025-03-28 19 | 20 | ### Changed 21 | 22 | - Update dependencies. 23 | - Update edition to 2024. 24 | 25 | ## [2.9.2] - 2025-01-27 26 | 27 | ### Fixed 28 | 29 | - Fix test on Windows. 30 | 31 | ## [2.9.1] - 2025-01-27 32 | 33 | ### Fixed 34 | 35 | - Fix `follow_links` feature. 36 | 37 | ## [2.9.0] - 2025-01-27 38 | 39 | ### Improved 40 | 41 | - Update dependencies. 42 | - Add optional argument `follow_links`. 43 | 44 | ## [2.8.0] - 2024-10-26 45 | 46 | ### Changed 47 | 48 | - ATTENTION: `skip_hidden` is now `false` by default! 49 | 50 | ### Improved 51 | 52 | - Fix tests on Windows. 53 | - Add support for macos-14 on ARM64. 54 | 55 | ## [2.7.3] - 2024-10-22 56 | 57 | ### Improved 58 | 59 | - Update dependencies. 60 | - Add support for Python 3.13. 61 | - Fix continuous integration on github for Linux and Windows. 62 | - Add support for macos-14 on ARM64. 63 | 64 | ## [2.7.2] - 2024-07-09 65 | 66 | ### Improved 67 | 68 | - Update dependencies. 69 | 70 | ## [2.7.1] - 2024-04-17 71 | 72 | ### Fixed 73 | 74 | - Fixed project description. 75 | 76 | ## [2.7.0] - 2024-04-15 77 | 78 | ### Added 79 | 80 | - Added optional serialization methods `to_json`, `to_speedy` and `to_bincode` to `Walk`. 81 | The corresponding features `json`, `speedy` and `bincode` need to be enabled. 82 | - Add `statistics` getter to `Walk`. 83 | 84 | ### Improved 85 | 86 | - Optimized code. 87 | - Update benchmarks. 88 | 89 | ## [2.6.0] - 2024-04-10 90 | 91 | ### Added 92 | 93 | - Added optional serialization methods `to_json`, `to_speedy` and `to_bincode`. 94 | The corresponding features `json`, `speedy` and `bincode` need to be enabled. 95 | - Add `statistics` getter to `Scandir`. 96 | 97 | ### Improved 98 | 99 | - Optimized code. 100 | - Update benchmarks. 101 | 102 | ### Changed 103 | 104 | - Change methods `duration`, `finished` and `busy` to getters. 105 | 106 | ## [2.5.1] - 2024-04-01 107 | 108 | ### Changed 109 | 110 | - Update dependencies. 111 | 112 | ## [2.5.0] - 2024-03-24 113 | 114 | ### Added 115 | 116 | - Added methods to directly access contents of DirEntry(Ext) in ScandirResult. 117 | 118 | ## [2.4.2] - 2024-03-24 119 | 120 | ### Changed 121 | 122 | - Update dependencies. 123 | - Fix warnings. 124 | 125 | ## [2.4.1] - 2024-02-10 126 | 127 | ### Changed 128 | 129 | - Update dependencies. 130 | 131 | ## [2.4.0] - 2023-05-06 132 | 133 | ### Changed 134 | 135 | - Unify API of different methods (API changes in some methods!). 136 | - Update documentation. 137 | 138 | ## [2.3.5] - 2023-04-27 139 | 140 | ### Changed 141 | 142 | - Update dependencies 143 | 144 | ## [2.3.4] - 2023-03-12 145 | 146 | ### Fixed 147 | 148 | - Fix compile problems on Windows. 149 | - Replace all shell build scripts with a single Python build script. 150 | 151 | ## [2.3.3] - 2023-03-03 152 | 153 | ### Fixed 154 | 155 | - Fix a possible crash in scandir. 156 | 157 | ## [2.3.2] - 2023-02-13 158 | 159 | ### Changed 160 | 161 | - Update dependencies. 162 | 163 | ## [2.3.1] - 2023-01-23 164 | 165 | ### Fixed 166 | 167 | - Update jwalk to 0.8.1 to fix Windows issues. 168 | 169 | ## [2.3.0] - 2023-01-23 170 | 171 | ### Added 172 | 173 | - Add support for path to file as root path. 174 | 175 | ## [2.2.0] - 2022-11-29 176 | 177 | ### Added 178 | 179 | - Add support for Python 3.11. 180 | - Add option `store` to optionally disable storing results locally. 181 | 182 | ### Changed 183 | 184 | - Change path to generic type to accept different input types. 185 | 186 | ## [2.1.0] - 2022-11-16 187 | 188 | ### Added 189 | 190 | - Add optional support for speedy serialization. 191 | 192 | ## [2.0.5] - 2022-10-17 193 | 194 | ### Changed 195 | 196 | - Update supported Python versions. 197 | 198 | ### Fixed 199 | 200 | Fix CVE-2007-4559 in benchmark.py 201 | 202 | ## [2.0.4] - 2022-05-05 203 | 204 | ### Changed 205 | 206 | - Replace alive AtomicBool by is_finished method of JoinHandle. 207 | IMPORTANT: At least Rust 1.61 is needed! 208 | 209 | ## [2.0.3] - 2022-05-05 210 | 211 | ### Fixed 212 | 213 | - Fix build scripts. 214 | 215 | ## [2.0.2] - 2022-05-04 216 | 217 | ### Added 218 | 219 | - Add methods has_entries, entries_cnt and has_errors. 220 | 221 | ### Fixed 222 | 223 | - Fix root path parsing bug. 224 | 225 | ## [2.0.1] - 2022-05-03 226 | 227 | ### Fixed 228 | 229 | - Fixed root path problem for Unix platforms. 230 | - Fixed metadata reading problem for some cases. 231 | - Fixed problem with buggy filenames. 232 | 233 | ## [2.0.0] - 2022-04-24 234 | 235 | ### Changed 236 | 237 | - Complete rewrite. 238 | - Namespaces have changed. 239 | - API has changed. 240 | 241 | ## [0.9.7] - 2022-02-19 242 | 243 | ### Changed 244 | 245 | - Update dependencies. 246 | 247 | ## [0.9.6] - 2022-02-19 248 | 249 | ### Fixed 250 | 251 | - Fix a crash when file system doesn't support file creation time. 252 | 253 | ## [0.9.5] - 2022-01-31 254 | 255 | ### Added 256 | 257 | - Thread safe ts_busy method for each sub-module. 258 | - Thread safe ts_count method for each sub-module. 259 | 260 | ### Changed 261 | 262 | - Update dependencies. 263 | - Add support for Python 3.10. 264 | - Improve example ex_scandir for showing usage of thread safe ts_busy and ts_count methods. 265 | 266 | ## [0.9.4] - 2021-02-16 267 | 268 | ### Changed 269 | 270 | - Update dependencies. 271 | 272 | ## [0.9.3] - 2020-07-27 273 | 274 | ### Added 275 | 276 | - Improved pytest test cases. 277 | 278 | ### Changed 279 | 280 | - In benchmark.py: 281 | - Use Linux kernel 5.5.5 as platform independent a reference. 282 | - Accept optional parameter for temporary directory base. 283 | - Benchmark directory C:\Windows on Windows and /usr on other platforms. 284 | 285 | ### Fixed 286 | 287 | - scandir didn't execute. 288 | - Fix performance issue with Walk. 289 | - Correctly return Python exceptions. 290 | - Make build_wheels.sh version independent. 291 | - Make examples platform independent. 292 | - Fix typo in README.md. 293 | 294 | ## [0.9.2] - 2020-07-26 295 | 296 | ### Changed 297 | 298 | - Provide Windows wheels without debug information. 299 | 300 | ## [0.9.1] - 2020-07-26 301 | 302 | ### Changed 303 | 304 | - Update to latest versions of Rust and dependencies. 305 | 306 | ## [0.9.0] - 2020-01-27 307 | 308 | ### Added 309 | 310 | - Add DirEntryExt and DirEntryFull. 311 | - Arguments for directory and file filtering. 312 | 313 | ### Changed 314 | 315 | - Replaced arguments `metadata` and `metadata_ext` with `return_type`. 316 | - Update documentation. 317 | 318 | ## [0.8.0] - 2020-01-19 319 | 320 | ### Added 321 | 322 | - Add getters to DirEntry. 323 | 324 | ### Changed 325 | 326 | - Update documentation. 327 | 328 | ### Fixed 329 | 330 | - Correctly count hardlinks. 331 | - Update [jwalk](https://github.com/brmmm3/jwalk/tree/jwalk-0.4.1-alpha.1) to get correct extended 332 | metadata (size and hardlinks). 333 | 334 | ## [0.7.2] - 2020-01-10 335 | 336 | ### Changed 337 | 338 | - Change default return_type for Walk to RETURN_TYPE_WALK. 339 | 340 | ## [0.7.1] - 2020-01-10 341 | 342 | ### Changed 343 | 344 | - Update documentation. 345 | 346 | ## [0.7.0] - 2020-01-09 347 | 348 | - First release. 349 | -------------------------------------------------------------------------------- /pyscandir/src/count.rs: -------------------------------------------------------------------------------- 1 | use std::io::ErrorKind; 2 | 3 | use pyo3::exceptions::{PyException, PyFileNotFoundError, PyRuntimeError, PyValueError}; 4 | use pyo3::prelude::*; 5 | use pyo3::types::PyType; 6 | 7 | #[cfg(any(feature = "speedy", feature = "bincode"))] 8 | use pyo3::types::PyBytes; 9 | 10 | #[cfg(feature = "speedy")] 11 | use speedy::Writable; 12 | 13 | use crate::def::{ReturnType, Statistics}; 14 | 15 | #[pyclass] 16 | #[derive(Debug)] 17 | pub struct Count { 18 | instance: scandir::Count, 19 | busy: bool, 20 | } 21 | 22 | #[pymethods] 23 | impl Count { 24 | #[allow(clippy::too_many_arguments)] 25 | #[new] 26 | #[pyo3(signature = (root_path, skip_hidden=None, max_depth=None, max_file_cnt=None, dir_include=None, dir_exclude=None, file_include=None, file_exclude=None, case_sensitive=None, follow_links=None, return_type=None))] 27 | fn new( 28 | root_path: &str, 29 | skip_hidden: Option, 30 | max_depth: Option, 31 | max_file_cnt: Option, 32 | dir_include: Option>, 33 | dir_exclude: Option>, 34 | file_include: Option>, 35 | file_exclude: Option>, 36 | case_sensitive: Option, 37 | follow_links: Option, 38 | return_type: Option, 39 | ) -> PyResult { 40 | Ok(Count { 41 | instance: match scandir::Count::new(root_path) { 42 | Ok(c) => c 43 | .skip_hidden(skip_hidden.unwrap_or(false)) 44 | .max_depth(max_depth.unwrap_or(0)) 45 | .max_file_cnt(max_file_cnt.unwrap_or(0)) 46 | .dir_include(dir_include) 47 | .dir_exclude(dir_exclude) 48 | .file_include(file_include) 49 | .file_exclude(file_exclude) 50 | .case_sensitive(case_sensitive.unwrap_or(false)) 51 | .follow_links(follow_links.unwrap_or(false)) 52 | .extended(return_type.unwrap_or(ReturnType::Base) == ReturnType::Ext), 53 | Err(e) => match e.kind() { 54 | ErrorKind::InvalidInput => { 55 | return Err(PyValueError::new_err(e.to_string())); 56 | } 57 | ErrorKind::NotFound => { 58 | return Err(PyFileNotFoundError::new_err(e.to_string())); 59 | } 60 | _ => { 61 | return Err(PyException::new_err(e.to_string())); 62 | } 63 | }, 64 | }, 65 | busy: false, 66 | }) 67 | } 68 | 69 | pub fn extended(&mut self, extended: bool) { 70 | self.instance.set_extended(extended); 71 | } 72 | 73 | pub fn clear(&mut self) { 74 | self.instance.clear(); 75 | } 76 | 77 | pub fn start(&mut self) -> PyResult<()> { 78 | self.instance 79 | .start() 80 | .map_err(|e| PyException::new_err(e.to_string())) 81 | } 82 | 83 | pub fn join(&mut self, py: Python) -> PyResult { 84 | let result = py.detach(|| self.instance.join()); 85 | if !result { 86 | return Err(PyRuntimeError::new_err("Thread not running")); 87 | } 88 | Ok(true) 89 | } 90 | 91 | pub fn stop(&mut self) -> PyResult { 92 | if !self.instance.stop() { 93 | return Err(PyRuntimeError::new_err("Thread not running")); 94 | } 95 | Ok(true) 96 | } 97 | 98 | pub fn collect(&mut self, py: Python) -> PyResult> { 99 | let results = py.detach(|| self.instance.collect())?; 100 | Ok(Py::new(py, Statistics::from(&results)).unwrap().into_any()) 101 | } 102 | 103 | pub fn has_results(&mut self) -> bool { 104 | self.instance.has_results() 105 | } 106 | 107 | pub fn results(&mut self, py: Python) -> Py { 108 | Py::new(py, Statistics::from(&self.instance.results())) 109 | .unwrap() 110 | .into_any() 111 | } 112 | 113 | pub fn has_errors(&mut self) -> bool { 114 | self.instance.has_errors() 115 | } 116 | 117 | #[getter] 118 | pub fn duration(&mut self) -> f64 { 119 | self.instance.duration() 120 | } 121 | 122 | #[getter] 123 | pub fn finished(&mut self) -> bool { 124 | self.instance.finished() 125 | } 126 | 127 | #[getter] 128 | pub fn busy(&self) -> bool { 129 | self.instance.busy() 130 | } 131 | 132 | #[pyo3(signature = (duration=None))] 133 | fn as_dict(&mut self, duration: Option, py: Python) -> PyResult> { 134 | Statistics::from(&self.instance.results()).as_dict(duration, py) 135 | } 136 | 137 | #[cfg(feature = "speedy")] 138 | fn to_speedy(&self, py: Python) -> PyResult> { 139 | match self.instance.statistics.write_to_vec() { 140 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 141 | b.copy_from_slice(&v); 142 | Ok(()) 143 | })? 144 | .into()), 145 | Err(e) => Err(PyException::new_err(e.to_string())), 146 | } 147 | } 148 | 149 | #[cfg(feature = "bincode")] 150 | fn to_bincode(&self, py: Python) -> PyResult> { 151 | match self.instance.statistics.to_vec() { 152 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 153 | b.copy_from_slice(&v); 154 | Ok(()) 155 | })? 156 | .into()), 157 | Err(e) => Err(PyException::new_err(e.to_string())), 158 | } 159 | } 160 | 161 | #[cfg(feature = "json")] 162 | fn to_json(&self) -> PyResult { 163 | self.instance 164 | .statistics 165 | .to_json() 166 | .map_err(|e| PyException::new_err(e.to_string())) 167 | } 168 | 169 | fn __enter__(mut slf: PyRefMut) -> PyResult> { 170 | slf.instance 171 | .start() 172 | .map_err(|e| PyException::new_err(e.to_string()))?; 173 | Ok(slf) 174 | } 175 | 176 | #[pyo3(signature = (ty=None, _value=None, _traceback=None))] 177 | fn __exit__( 178 | &mut self, 179 | ty: Option<&Bound>, 180 | _value: Option<&Bound>, 181 | _traceback: Option<&Bound>, 182 | ) -> PyResult { 183 | if !self.instance.stop() { 184 | return Ok(false); 185 | } 186 | self.instance.join(); 187 | match ty { 188 | Some(ty) => Python::attach(|py| ty.eq(py.get_type::())), 189 | None => Ok(false), 190 | } 191 | } 192 | 193 | fn __iter__(mut slf: PyRefMut) -> PyResult> { 194 | if slf.busy { 195 | return Err(PyRuntimeError::new_err("Busy")); 196 | } 197 | slf.instance.start()?; 198 | slf.busy = true; 199 | Ok(slf) 200 | } 201 | 202 | fn __next__(&mut self, py: Python) -> PyResult>> { 203 | if !self.busy { 204 | return Ok(None); 205 | } 206 | if !self.instance.busy() { 207 | self.busy = false; 208 | } 209 | Ok(Some( 210 | Py::new(py, Statistics::from(&self.instance.results())) 211 | .unwrap() 212 | .into_any(), 213 | )) 214 | } 215 | 216 | fn __repr__(&self) -> String { 217 | format!("{self:?}") 218 | } 219 | 220 | fn __str__(&self) -> String { 221 | format!("{self:?}") 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [2.9.5] - 2025-11-13 9 | 10 | ### Changed 11 | 12 | - Update dependencies. 13 | 14 | ### Fixed 15 | 16 | - Fix clippy warnings and a test. 17 | 18 | ## [2.9.4] - 2025-04-16 19 | 20 | ### Changed 21 | 22 | - Update dependencies. 23 | 24 | ### Fixed 25 | 26 | - Fix compilation error when features are not enabled. 27 | 28 | ## [2.9.3] - 2025-03-28 29 | 30 | ### Changed 31 | 32 | - Update dependencies. 33 | - Update edition to 2024. 34 | 35 | ## [2.9.2] - 2025-01-27 36 | 37 | ### Fixed 38 | 39 | - Fix test on Windows. 40 | 41 | ## [2.9.1] - 2025-01-27 42 | 43 | ### Fixed 44 | 45 | - Fix `follow_links` feature. 46 | 47 | ## [2.9.0] - 2025-01-27 48 | 49 | ### Improved 50 | 51 | - Update dependencies. 52 | - Add optional argument `follow_links`. 53 | 54 | ## [2.8.0] - 2024-10-26 55 | 56 | ### Changed 57 | 58 | - ATTENTION: `skip_hidden` is now `false` by default! 59 | 60 | ### Improved 61 | 62 | - Fix tests on Windows. 63 | - Add support for macos-14 on ARM64. 64 | 65 | ## [2.7.3] - 2024-10-22 66 | 67 | ### Changed 68 | 69 | - ATTENTION: `skip_hidden` is now `false` by default! 70 | 71 | ### Improved 72 | 73 | - Update dependencies. 74 | - Add support for Python 3.13. 75 | - Fix continuous integration on github for Linux and Windows. 76 | 77 | ## [2.7.2] - 2024-07-09 78 | 79 | ### Improved 80 | 81 | - Update dependencies. 82 | 83 | ## [2.7.1] - 2024-04-17 84 | 85 | ### Fixed 86 | 87 | - Fixed project description. 88 | 89 | ## [2.7.0] - 2024-04-15 90 | 91 | ### Added 92 | 93 | - Added optional serialization methods `to_json`, `to_speedy` and `to_bincode` to `Walk`. 94 | The corresponding features `json`, `speedy` and `bincode` need to be enabled. 95 | - Add `statistics` getter to `Walk`. 96 | 97 | ### Improved 98 | 99 | - Optimized code. 100 | - Update benchmarks. 101 | 102 | ## [2.6.0] - 2024-04-10 103 | 104 | ### Added 105 | 106 | - Added optional serialization methods `to_json`, `to_speedy` and `to_bincode`. 107 | The corresponding features `json`, `speedy` and `bincode` need to be enabled. 108 | - Add `statistics` getter to `Scandir`. 109 | 110 | ### Improved 111 | 112 | - Optimized code. 113 | - Update benchmarks. 114 | 115 | ### Changed 116 | 117 | - Change methods `duration`, `finished` and `busy` to getters. 118 | 119 | ## [2.5.1] - 2024-04-01 120 | 121 | ### Changed 122 | 123 | - Update dependencies. 124 | 125 | ## [2.5.0] - 2024-03-24 126 | 127 | ### Added 128 | 129 | - Added methods to directly access contents of DirEntry(Ext) in ScandirResult. 130 | 131 | ## [2.4.2] - 2024-03-24 132 | 133 | ### Changed 134 | 135 | - Update dependencies. 136 | - Fix warnings. 137 | 138 | ## [2.4.1] - 2024-02-10 139 | 140 | ### Changed 141 | 142 | - Update dependencies. 143 | 144 | ## [2.4.0] - 2023-05-06 145 | 146 | ### Changed 147 | 148 | - Unify API of different methods (API changes in some methods!). 149 | - Update documentation. 150 | 151 | ## [2.3.5] - 2023-04-27 152 | 153 | ### Changed 154 | 155 | - Update dependencies 156 | 157 | ## [2.3.4] - 2023-03-12 158 | 159 | ### Fixed 160 | 161 | - Fix compile problems on Windows. 162 | - Replace all shell build scripts with a single Python build script. 163 | 164 | ## [2.3.3] - 2023-03-03 165 | 166 | ### Fixed 167 | 168 | - Fix a possible crash in scandir. 169 | 170 | ## [2.3.2] - 2023-02-13 171 | 172 | ### Changed 173 | 174 | - Update dependencies. 175 | 176 | ## [2.3.1] - 2023-01-23 177 | 178 | ### Fixed 179 | 180 | - Update jwalk to 0.8.1 to fix Windows issues. 181 | 182 | ## [2.3.0] - 2023-01-23 183 | 184 | ### Added 185 | 186 | - Add support for path to file as root path. 187 | 188 | ## [2.2.0] - 2022-11-29 189 | 190 | ### Added 191 | 192 | - Add support for Python 3.11. 193 | - Add option `store` to optionally disable storing results locally. 194 | 195 | ### Changed 196 | 197 | - Change path to generic type to accept different input types. 198 | 199 | ## [2.1.0] - 2022-11-16 200 | 201 | ### Added 202 | 203 | - Add optional support for speedy serialization. 204 | 205 | ## [2.0.5] - 2022-10-17 206 | 207 | ### Changed 208 | 209 | - Update supported Python versions. 210 | 211 | ### Fixed 212 | 213 | Fix CVE-2007-4559 in benchmark.py 214 | 215 | ## [2.0.4] - 2022-05-05 216 | 217 | ### Changed 218 | 219 | - Replace alive AtomicBool by is_finished method of JoinHandle. 220 | IMPORTANT: At least Rust 1.61 is needed! 221 | 222 | ## [2.0.3] - 2022-05-05 223 | 224 | ### Fixed 225 | 226 | - Fix build scripts. 227 | 228 | ## [2.0.2] - 2022-05-04 229 | 230 | ### Added 231 | 232 | - Add methods has_entries, entries_cnt and has_errors. 233 | 234 | ### Fixed 235 | 236 | - Fix root path parsing bug. 237 | 238 | ## [2.0.1] - 2022-05-03 239 | 240 | ### Fixed 241 | 242 | - Fixed root path problem for Unix platforms. 243 | - Fixed metadata reading problem for some cases. 244 | - Fixed problem with buggy filenames. 245 | 246 | ## [2.0.0] - 2022-04-24 247 | 248 | ### Changed 249 | 250 | - Complete rewrite. 251 | - Namespaces have changed. 252 | - API has changed. 253 | 254 | ## [0.9.7] - 2022-02-19 255 | 256 | ### Changed 257 | 258 | - Update dependencies. 259 | 260 | ## [0.9.6] - 2022-02-19 261 | 262 | ### Fixed 263 | 264 | - Fix a crash when file system doesn't support file creation time. 265 | 266 | ## [0.9.5] - 2022-01-31 267 | 268 | ### Added 269 | 270 | - Thread safe ts_busy method for each sub-module. 271 | - Thread safe ts_count method for each sub-module. 272 | 273 | ### Changed 274 | 275 | - Update dependencies. 276 | - Add support for Python 3.10. 277 | - Improve example ex_scandir for showing usage of thread safe ts_busy and ts_count methods. 278 | 279 | ## [0.9.4] - 2021-02-16 280 | 281 | ### Changed 282 | 283 | - Update dependencies. 284 | 285 | ## [0.9.3] - 2020-07-27 286 | 287 | ### Added 288 | 289 | - Improved pytest test cases. 290 | 291 | ### Changed 292 | 293 | - In benchmark.py: 294 | - Use Linux kernel 5.5.5 as platform independent a reference. 295 | - Accept optional parameter for temporary directory base. 296 | - Benchmark directory C:\Windows on Windows and /usr on other platforms. 297 | 298 | ### Fixed 299 | 300 | - scandir didn't execute. 301 | - Fix performance issue with Walk. 302 | - Correctly return Python exceptions. 303 | - Make build_wheels.sh version independent. 304 | - Make examples platform independent. 305 | - Fix typo in README.md. 306 | 307 | ## [0.9.2] - 2020-07-26 308 | 309 | ### Changed 310 | 311 | - Provide Windows wheels without debug information. 312 | 313 | ## [0.9.1] - 2020-07-26 314 | 315 | ### Changed 316 | 317 | - Update to latest versions of Rust and dependencies. 318 | 319 | ## [0.9.0] - 2020-01-27 320 | 321 | ### Added 322 | 323 | - Add DirEntryExt and DirEntryFull. 324 | - Arguments for directory and file filtering. 325 | 326 | ### Changed 327 | 328 | - Replaced arguments `metadata` and `metadata_ext` with `return_type`. 329 | - Update documentation. 330 | 331 | ## [0.8.0] - 2020-01-19 332 | 333 | ### Added 334 | 335 | - Add getters to DirEntry. 336 | 337 | ### Changed 338 | 339 | - Update documentation. 340 | 341 | ### Fixed 342 | 343 | - Correctly count hardlinks. 344 | - Update [jwalk](https://github.com/brmmm3/jwalk/tree/jwalk-0.4.1-alpha.1) to get correct extended 345 | metadata (size and hardlinks). 346 | 347 | ## [0.7.2] - 2020-01-10 348 | 349 | ### Changed 350 | 351 | - Change default return_type for Walk to RETURN_TYPE_WALK. 352 | 353 | ## [0.7.1] - 2020-01-10 354 | 355 | ### Changed 356 | 357 | - Update documentation. 358 | 359 | ## [0.7.0] - 2020-01-09 360 | 361 | - First release. 362 | -------------------------------------------------------------------------------- /tools/create_charts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import plotly.graph_objects as go 5 | 6 | data = { 7 | "Python": { 8 | "Walk": { 9 | "Linux": { 10 | "linux-5.9": { 11 | "os.walk": 0.440817, 12 | "Walk.iter": 0.133679, 13 | "Walk.collect": 0.197038, 14 | "os.walk(Ext)": 1.61088, 15 | "Walk.iter(Ext)": 0.133556, 16 | "Walk.collect(Ext)": 0.191944, 17 | }, 18 | "usr": { 19 | "os.walk": 3.94502, 20 | "Walk.iter": 0.80265, 21 | "Walk.collect": 1.34461, 22 | "os.walk(Ext)": 10.7779, 23 | "Walk.iter(Ext)": 0.827304, 24 | "Walk.collect(Ext)": 1.33137, 25 | }, 26 | }, 27 | "Windows": { 28 | "linux-5.9": { 29 | "os.walk": 2.29283, 30 | "Walk.iter": 0.247534, 31 | "Walk.collect": 0.386362, 32 | "os.walk(Ext)": 17.6911, 33 | "Walk.iter(Ext)": 0.250716, 34 | "Walk.collect(Ext)": 0.39245, 35 | }, 36 | "Windows": { 37 | "os.walk": 99.0955, 38 | "Walk.iter": 10.0431, 39 | "Walk.collect": 11.8813, 40 | "os.walk(Ext)": 238.835, 41 | "Walk.iter(Ext)": 10.007, 42 | "Walk.collect(Ext)": 11.8674, 43 | }, 44 | }, 45 | }, 46 | "Scandir": { 47 | "Linux": { 48 | "linux-5.9": { 49 | "scantree (os.scandir)": 1.31862, 50 | "Scandir.iter": 0.237867, 51 | "Scandir.collect": 0.271947, 52 | "Scandir.iter(Ext)": 0.320545, 53 | "Scandir.collect(Ext)": 0.380465, 54 | }, 55 | "usr": { 56 | "scantree (os.scandir)": 8.25362, 57 | "Scandir.iter": 1.27802, 58 | "Scandir.collect": 2.01097, 59 | "Scandir.iter(Ext)": 1.75471, 60 | "Scandir.collect(Ext)": 2.58515, 61 | }, 62 | }, 63 | "Windows": { 64 | "linux-5.9": { 65 | "scantree (os.scandir)": 1.96715, 66 | "Scandir.iter": 0.26433, 67 | "Scandir.collect": 0.375734, 68 | "Scandir.iter(Ext)": 1.86403, 69 | "Scandir.collect(Ext)": 2.08924, 70 | }, 71 | "Windows": { 72 | "scantree (os.scandir)": 66.8014, 73 | "Scandir.iter": 10.1068, 74 | "Scandir.collect": 11.3297, 75 | "Scandir.iter(Ext)": 37.7527, 76 | "Scandir.collect(Ext)": 38.5138, 77 | }, 78 | }, 79 | }, 80 | }, 81 | "Rust": { 82 | "Walk": { 83 | "Linux": { 84 | "linux-5.9": { 85 | "walkdir.WalkDir": 0.082, 86 | "Walk.collect": 0.056, 87 | "walkdir.WalkDir(Ext)": 0.462, 88 | "Walk.collect(Ext)": 0.055, 89 | }, 90 | "usr": { 91 | "walkdir.WalkDir": 0.671, 92 | "Walk.collect": 0.405, 93 | "walkdir.WalkDir(Ext)": 2.829, 94 | "Walk.collect(Ext)": 0.404, 95 | }, 96 | }, 97 | "Windows": { 98 | "linux-5.9": { 99 | "walkdir.WalkDir": 0.456, 100 | "Walk.collect": 0.1, 101 | "walkdir.WalkDir(Ext)": 4.343, 102 | "Walk.collect(Ext)": 0.103, 103 | }, 104 | "Windows": { 105 | "walkdir.WalkDir": 15.546, 106 | "Walk.collect": 3.454, 107 | "walkdir.WalkDir(Ext)": 50.366, 108 | "Walk.collect(Ext)": 3.459, 109 | }, 110 | }, 111 | }, 112 | "Scandir": { 113 | "Linux": { 114 | "linux-5.9": { 115 | "scan_dir.ScanDir": 0.199, 116 | "Scandir.collect": 0.073, 117 | "scan_dir.ScanDir(Ext)": 0.383, 118 | "Scandir.collect(Ext)": 0.116, 119 | }, 120 | "usr": { 121 | "scan_dir.ScanDir": 1.474, 122 | "Scandir.collect": 0.615, 123 | "scan_dir.ScanDir(Ext)": 2.575, 124 | "Scandir.collect(Ext)": 0.822, 125 | }, 126 | }, 127 | "Windows": { 128 | "linux-5.9": { 129 | "scan_dir.ScanDir": 0.456, 130 | "Scandir.collect": 0.107, 131 | "scan_dir.ScanDir(Ext)": 7.483, 132 | "Scandir.collect(Ext)": 0.864, 133 | }, 134 | "Windows": { 135 | "scan_dir.ScanDir": 16.818, 136 | "Scandir.collect": 2.999, 137 | "scan_dir.ScanDir(Ext)": 47.740, 138 | "Scandir.collect(Ext)": 10.632, 139 | }, 140 | }, 141 | }, 142 | }, 143 | } 144 | 145 | 146 | def UpdateData(lang: str, bs: str, path: str, newData: dict): 147 | for method in ("Walk", "Scandir"): 148 | d = data[lang][method][bs][path] 149 | for key in tuple(d): 150 | d[key] = newData[key] 151 | 152 | 153 | py_nt_linux = json.loads( 154 | open("tools/benchmark_results_nt_linux-5.9_python.json").read() 155 | ) 156 | py_nt_windows = json.loads( 157 | open("tools/benchmark_results_nt_Windows_python.json").read() 158 | ) 159 | py_linux_linux = json.loads( 160 | open("tools/benchmark_results_posix_linux-5.9_python.json").read() 161 | ) 162 | py_linux_usr = json.loads(open("tools/benchmark_results_posix_usr_python.json").read()) 163 | 164 | UpdateData("Python", "Windows", "linux-5.9", py_nt_linux) 165 | UpdateData("Python", "Windows", "Windows", py_nt_windows) 166 | UpdateData("Python", "Linux", "linux-5.9", py_linux_linux) 167 | UpdateData("Python", "Linux", "usr", py_linux_usr) 168 | 169 | 170 | for lang, langData in data.items(): 171 | baseDir = "pyscandir" if lang == "Python" else "scandir" 172 | dirName = f"{baseDir}/doc/images" 173 | if not os.path.exists(dirName): 174 | os.makedirs(dirName) 175 | for methodGroup, methodData in langData.items(): 176 | for osName, osData in methodData.items(): 177 | for path, pathData in osData.items(): 178 | methods = list(pathData.keys()) 179 | fig = go.Figure( 180 | data=[ 181 | go.Bar( 182 | name=method, 183 | x=["" * len(methods)], 184 | y=[dt], 185 | text=f"{dt:.2f}s", 186 | textposition="auto", 187 | ) 188 | for method, dt in pathData.items() 189 | ] 190 | ) 191 | fig.update_layout( 192 | barmode="group", 193 | xaxis_title="Method", 194 | yaxis_title="Time [s]", 195 | ) 196 | pathName = f"{dirName}/{osName.lower()}_{methodGroup.lower()}_{path.lower()}.png" 197 | print(pathName) 198 | fig.write_image(pathName) 199 | -------------------------------------------------------------------------------- /pyscandir/doc/scandir.md: -------------------------------------------------------------------------------- 1 | # The API of class `Scandir` 2 | 3 | ## `ScandirResult` 4 | 5 | Is an enum which can be: 6 | 7 | `DirEntry` 8 | `DirEntryExt` 9 | 10 | ## `DirEntry` 11 | 12 | - `path` relative path 13 | - `is_symlink` `True` is entry is a symbolic link. 14 | - `is_dir` `True` is entry is a directory. 15 | - `is_file` `True` is entry is a file. 16 | - `st_ctime` creation time in seconds as float. 17 | - `st_mtime` modification time in seconds as float. 18 | - `st_atime` access time in seconds as float. 19 | - `st_size` size of entry. 20 | 21 | ## `DirEntryExt` 22 | 23 | - `is_symlink` `True` is entry is a symbolic link. 24 | - `is_dir` `True` is entry is a directory. 25 | - `is_file` `True` is entry is a file. 26 | - `st_ctime` creation time in seconds as float. 27 | - `st_mtime` modification time in seconds as float. 28 | - `st_atime` access time in seconds as float. 29 | - `st_mode` file access mode / rights. 30 | - `st_ino` inode number (only for Unix). 31 | - `st_dev` device number (only for Unix). 32 | - `st_nlink` number of hard links. 33 | - `st_size` size of entry. 34 | - `st_blksize` block size of file system. 35 | - `st_blocks` number of blocks used. 36 | - `st_uid` user id (only for Unix). 37 | - `st_gid` groud id (only for Unix). 38 | - `st_rdev` device number (for character and block devices on Unix). 39 | 40 | ## `Scandir()` 41 | 42 | ```python 43 | def Scandir( 44 | root_path: str, 45 | sorted: bool = False, 46 | skip_hidden: bool = False, 47 | metadata: bool = False, 48 | metadata_ext: bool = False, 49 | max_depth: int = 0, 50 | dir_include: list | None = None, 51 | dir_exclude: list | None = None, 52 | file_include: list | None = None, 53 | file_exclude: list | None = None, 54 | case_sensitive: bool = True, 55 | return_type: int = RETURN_TYPE_WALK, 56 | store: bool = True, 57 | ) 58 | ``` 59 | 60 | Creates a class object for more control when reading the directory contents. 61 | Useful when the iteration should be doine in background without blocking the application. 62 | The class instance initially does nothing. To start the scan either the method `start` has 63 | to be called or a context has to be created (`with ClassInstance:`). 64 | When the context is closed the background thread is stopped. 65 | 66 | The returned results are tuples with absolute path and `DirEntry`, `DirEntryExt` or 67 | `DirEntryFull` object, depending on the `return_type`. In case of an error an error string 68 | is returned. 69 | 70 | ### Parameters 71 | 72 | - `root_path` is directory to scan. `~` is allowed on Unix systems. 73 | - `sorted` if `True` alphabetically sort results. 74 | - `skip_hidden` if `True` ignore all hidden files and directories. 75 | - `metadata` if `True` also fetch some metadata. 76 | - `metadata_ext` if `True` also fetch extended metadata. 77 | - `max_depth` is maximum depth of iteration. If `0` then depth limit is disabled. 78 | - `dir_include` list of patterns for directories to include. 79 | - `dir_exclude` list of patterns for directories to exclude. 80 | - `file_include` list of patterns for files to include. 81 | - `file_exclude` list of patterns for files to exclude. 82 | - `case_sensitive` if `True` then do case sensitive pattern matching. 83 | - `follow_links` if `True` then follow symlinks and junctions. 84 | - `return_type` defines type of data returned. 85 | - `store` store results in local structure. 86 | 87 | For valid file patterns see module [glob](https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). 88 | 89 | ### Return types 90 | 91 | - `ReturnType.Base` return `DirEntry` objects. 92 | - `ReturnType.Ext` return `DirEntryExt` objects. 93 | 94 | ### `clear()` 95 | 96 | Clear all results. 97 | 98 | ### `start()` 99 | 100 | Start parsing the directory tree in background. Raises an expception if a task is already running. 101 | 102 | ### `join()` 103 | 104 | Wait for parsing task to finish. 105 | 106 | ### `stop()` 107 | 108 | Stop parsing task. 109 | 110 | ### `collect() -> Tuple[List[ScandirResult], List[Tuple[str, str]]]` 111 | 112 | Parse file tree and wait until parsing has finished. Method `start` will be called if not 113 | already done. This method returns the same as the `results` method. 114 | It is blocking and releases the GIL. 115 | `Error` contains a tuple with 2 strings. First string contains path to file. 116 | Second string is the error message. 117 | 118 | ### `has_results(only_new: bool | None = True) -> bool` 119 | 120 | Returns `True` if new entries or errors are available and `only_new` is `True` (default) or 121 | in case `only_new` is `False` and any entries and errors have been collected since the start 122 | of the parse task. 123 | 124 | ### `results_cnt(only_new: bool | None = True) -> int` 125 | 126 | Returns the number of new entries and errors if `only_new` is `True` (default) or in case 127 | `only_new` is `False` the number of entries and errors since the start of the parse task. 128 | 129 | ### `results(only_new: bool | None = True) -> Tuple[List[ScandirResult], List[str, str]]` 130 | 131 | Returns entries and errors. 132 | 133 | If `only_new` is `True` (default) then return all results and errors collected so far else return 134 | only new results and errors. 135 | 136 | ### `has_entries(only_new: bool | None = True) -> bool` 137 | 138 | Returns `True` if new entries are available and `only_new` is `True` (default) or in case 139 | `only_new` is `False` and any entries have been collected since the start of the parse task. 140 | 141 | ### `entries_cnt(only_new: bool | None = True) -> int` 142 | 143 | Returns the number of new entries if `only_new` is `True` (default) or in case `only_new` 144 | is `False` the number of entries since the start of the parse task. 145 | 146 | ### `entries(only_new: bool | None = True) -> List[Tuple[str, Toc]]` 147 | 148 | Returns entries. 149 | 150 | If `only_new` is `True` (default) then return all results and errors collected so far else 151 | return only new results and errors. 152 | 153 | ### `has_errors() -> bool` 154 | 155 | Returns `True` if new errors are available and `only_new` is `True` (default) or in case 156 | `only_new` is `False` and any errors have been collected since the start of the parse task. 157 | 158 | ### `errors_cnt(only_new: bool | None = True) -> int` 159 | 160 | Returns the number of new errors if `only_new` is `True` (default) or in case `only_new` 161 | is `False` the number of errors since the start of the parse task. 162 | 163 | ### `errors(only_new: bool | None = True) -> List[Tuple[str, str]]` 164 | 165 | Returns errors. 166 | 167 | If `only_new` is `True` (default) then return all results and errors collected so far else 168 | return only new results and errors. 169 | 170 | ### `duration -> float` 171 | 172 | Returns the duration of the parsing task. As long as the task is running it will return 0. 173 | 174 | ### `finished -> bool` 175 | 176 | Returns `True` after the parsing task has finished. 177 | 178 | ### `busy -> bool` 179 | 180 | Returns `True` while a parsing task is running. 181 | 182 | ### `statistics -> Statistics` 183 | 184 | Returns the statistics for all currently collected results. 185 | 186 | ### `as_dict(only_new: bool | None = True) -> Dict[str, DirEntry | DirEntryExt | str]` 187 | 188 | Returns entries and errors as dictionary. 189 | 190 | If `only_new` is `True` then return all results collected so far else return only new results. 191 | Each result consists of root directory and `Toc`. 192 | 193 | ### `to_speedy() -> bytes` 194 | 195 | Feature `speedy` enabled. 196 | 197 | Returns statistics as [speedy](https://docs.rs/speedy/latest/speedy) encoded byte string. 198 | 199 | ### `to_bincode() -> bytes` 200 | 201 | Feature `bincode` enabled. 202 | 203 | Returns statistics as [bincode](https://docs.rs/bincode/latest/bincode) encoded byte string. 204 | 205 | ### `to_json() -> str` 206 | 207 | Feature `json` enabled. 208 | 209 | Returns statistics as [json](https://docs.rs/serde_json/latest/serde_json) encoded string. 210 | -------------------------------------------------------------------------------- /scandir/src/common.rs: -------------------------------------------------------------------------------- 1 | use std::fs::{self, Metadata}; 2 | use std::io::{Error, ErrorKind}; 3 | use std::path::{Path, PathBuf}; 4 | 5 | #[cfg(unix)] 6 | use expanduser::expanduser; 7 | 8 | use glob_sl::{MatchOptions, Pattern}; 9 | 10 | use crate::def::{Filter, Options}; 11 | 12 | pub fn check_and_expand_path>(path_str: P) -> Result { 13 | #[cfg(unix)] 14 | let path_result = fs::canonicalize(expanduser(path_str.as_ref().to_str().unwrap()).unwrap()); 15 | #[cfg(not(unix))] 16 | let path_result = fs::canonicalize(&path_str); 17 | let path = match path_result { 18 | Ok(p) => { 19 | if !p.exists() { 20 | return Err(Error::new( 21 | ErrorKind::NotFound, 22 | path_str.as_ref().to_str().unwrap().to_string(), 23 | )); 24 | } 25 | p 26 | } 27 | Err(e) => { 28 | return Err(Error::other(e.to_string())); 29 | } 30 | }; 31 | Ok(path) 32 | } 33 | 34 | pub fn get_root_path_len(root_path: &Path) -> usize { 35 | let root_path = root_path.to_str().unwrap(); 36 | let mut root_path_len = root_path.len(); 37 | #[cfg(unix)] 38 | if !root_path.ends_with('/') { 39 | root_path_len += 1; 40 | } 41 | #[cfg(windows)] 42 | if !root_path.ends_with('\\') { 43 | root_path_len += 1; 44 | } 45 | root_path_len 46 | } 47 | 48 | pub fn create_filter(options: &Options) -> Result, Error> { 49 | let mut filter = Filter { 50 | dir_include: Vec::new(), 51 | dir_exclude: Vec::new(), 52 | file_include: Vec::new(), 53 | file_exclude: Vec::new(), 54 | options: match options.case_sensitive { 55 | true => None, 56 | false => Some(MatchOptions { 57 | case_sensitive: false, 58 | ..MatchOptions::new() 59 | }), 60 | }, 61 | }; 62 | if let Some(ref f) = options.dir_include { 63 | let f = &mut f 64 | .iter() 65 | .map(|s| Pattern::new(s)) 66 | .collect::, glob_sl::PatternError>>(); 67 | let f = match f { 68 | Ok(f) => f, 69 | Err(e) => { 70 | return Err(Error::new( 71 | ErrorKind::InvalidInput, 72 | format!("dir_include: {}", e), 73 | )); 74 | } 75 | }; 76 | filter.dir_include.append(f); 77 | } 78 | if let Some(ref f) = options.dir_exclude { 79 | let f = &mut f 80 | .iter() 81 | .map(|s| Pattern::new(s)) 82 | .collect::, glob_sl::PatternError>>(); 83 | let f = match f { 84 | Ok(f) => f, 85 | Err(e) => { 86 | return Err(Error::new( 87 | ErrorKind::InvalidInput, 88 | format!("dir_exclude: {}", e), 89 | )); 90 | } 91 | }; 92 | filter.dir_exclude.append(f); 93 | } 94 | if let Some(ref f) = options.file_include { 95 | let f = &mut f 96 | .iter() 97 | .map(|s| Pattern::new(s)) 98 | .collect::, glob_sl::PatternError>>(); 99 | let f = match f { 100 | Ok(f) => f, 101 | Err(e) => { 102 | return Err(Error::new( 103 | ErrorKind::InvalidInput, 104 | format!("file_include: {}", e), 105 | )); 106 | } 107 | }; 108 | filter.file_include.append(f); 109 | } 110 | if let Some(ref f) = options.file_exclude { 111 | let f = &mut f 112 | .iter() 113 | .map(|s| Pattern::new(s)) 114 | .collect::, glob_sl::PatternError>>(); 115 | let f = match f { 116 | Ok(f) => f, 117 | Err(e) => { 118 | return Err(Error::new( 119 | ErrorKind::InvalidInput, 120 | format!("file_exclude: {}", e), 121 | )); 122 | } 123 | }; 124 | filter.file_exclude.append(f); 125 | } 126 | if filter.dir_include.is_empty() 127 | && filter.dir_exclude.is_empty() 128 | && filter.file_include.is_empty() 129 | && filter.file_exclude.is_empty() 130 | { 131 | return Ok(None); 132 | } 133 | Ok(Some(filter)) 134 | } 135 | 136 | #[inline] 137 | pub fn filter_direntry( 138 | key: &str, 139 | filter: &Vec, 140 | options: Option, 141 | empty: bool, 142 | ) -> bool { 143 | if filter.is_empty() || key.is_empty() { 144 | return empty; 145 | } 146 | match options { 147 | Some(options) => { 148 | for f in filter { 149 | if f.as_str().ends_with("**") && !key.ends_with('/') { 150 | // Workaround: glob currently has problems with "foo/**" 151 | let mut key = String::from(key); 152 | key.push('/'); 153 | if f.matches_with(&key, options) { 154 | return true; 155 | } 156 | } 157 | if f.matches_with(key, options) { 158 | return true; 159 | } 160 | } 161 | } 162 | None => { 163 | for f in filter { 164 | if f.as_str().ends_with("**") && !key.ends_with('/') { 165 | // Workaround: glob currently has problems with "foo/**" 166 | let mut key = String::from(key); 167 | key.push('/'); 168 | if f.matches(&key) { 169 | return true; 170 | } 171 | } 172 | if f.matches(key) { 173 | return true; 174 | } 175 | } 176 | } 177 | } 178 | false 179 | } 180 | 181 | #[inline] 182 | pub fn filter_dir( 183 | root_path_len: usize, 184 | dir_entry: &jwalk_meta::DirEntry<((), Option>)>, 185 | filter_ref: &Filter, 186 | ) -> bool { 187 | let mut key = dir_entry.parent_path.to_path_buf(); 188 | key.push(dir_entry.file_name.clone().into_string().unwrap()); 189 | let key = key 190 | .to_str() 191 | .unwrap() 192 | .get(root_path_len..) 193 | .unwrap_or("") 194 | .to_string(); 195 | if filter_direntry(&key, &filter_ref.dir_exclude, filter_ref.options, false) 196 | || !filter_direntry(&key, &filter_ref.dir_include, filter_ref.options, true) 197 | { 198 | return false; 199 | } 200 | true 201 | } 202 | 203 | #[inline] 204 | #[allow(clippy::type_complexity)] 205 | pub fn filter_children( 206 | children: &mut Vec< 207 | Result>)>, jwalk_meta::Error>, 208 | >, 209 | filter: &Option, 210 | root_path_len: usize, 211 | ) { 212 | if let Some(filter_ref) = &filter { 213 | children.retain(|dir_entry_result| { 214 | dir_entry_result 215 | .as_ref() 216 | .map(|dir_entry| { 217 | if dir_entry.file_type.is_dir() { 218 | return filter_dir(root_path_len, dir_entry, filter_ref); 219 | } else { 220 | let options = filter_ref.options; 221 | let key = match dir_entry.file_name.to_str() { 222 | Some(s) => s, 223 | None => { 224 | return false; 225 | } 226 | }; 227 | if filter_direntry(key, &filter_ref.file_exclude, options, false) 228 | || !filter_direntry(key, &filter_ref.file_include, options, true) 229 | { 230 | return false; 231 | } 232 | } 233 | true 234 | }) 235 | .unwrap_or(false) 236 | }); 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /pyscandir/src/def/direntry.rs: -------------------------------------------------------------------------------- 1 | use std::time::SystemTime; 2 | 3 | #[cfg(any(feature = "speedy", feature = "bincode", feature = "json"))] 4 | use pyo3::exceptions::PyException; 5 | use pyo3::prelude::*; 6 | #[cfg(any(feature = "speedy", feature = "bincode"))] 7 | use pyo3::types::PyBytes; 8 | use pyo3::types::PyDict; 9 | 10 | #[cfg(feature = "speedy")] 11 | use speedy::Writable; 12 | 13 | #[pyclass] 14 | #[derive(Debug, Clone)] 15 | pub struct DirEntry(scandir::DirEntry); 16 | 17 | impl DirEntry { 18 | pub fn from(entry: &scandir::DirEntry) -> Self { 19 | DirEntry(entry.clone()) 20 | } 21 | } 22 | 23 | #[pymethods] 24 | impl DirEntry { 25 | #[getter] 26 | fn path(&self) -> String { 27 | self.0.path.clone() 28 | } 29 | 30 | #[getter] 31 | fn is_symlink(&self) -> bool { 32 | self.0.is_symlink 33 | } 34 | 35 | #[getter] 36 | fn is_dir(&self) -> bool { 37 | self.0.is_dir 38 | } 39 | 40 | #[getter] 41 | fn is_file(&self) -> bool { 42 | self.0.is_file 43 | } 44 | 45 | #[getter] 46 | fn st_ctime(&self) -> Option { 47 | self.0.st_ctime 48 | } 49 | 50 | #[getter] 51 | fn st_mtime(&self) -> Option { 52 | self.0.st_mtime 53 | } 54 | 55 | #[getter] 56 | fn st_atime(&self) -> Option { 57 | self.0.st_atime 58 | } 59 | 60 | #[getter] 61 | fn st_size(&self) -> u64 { 62 | self.0.st_size 63 | } 64 | 65 | #[getter] 66 | fn ctime(&self) -> f64 { 67 | self.0.ctime() 68 | } 69 | 70 | #[getter] 71 | fn mtime(&self) -> f64 { 72 | self.0.mtime() 73 | } 74 | 75 | #[getter] 76 | fn atime(&self) -> f64 { 77 | self.0.atime() 78 | } 79 | 80 | fn as_dict(&self, py: Python) -> PyResult> { 81 | let pydict = PyDict::new(py); 82 | pydict.set_item("path", self.0.path.clone())?; 83 | pydict.set_item("is_symlink", self.0.is_symlink)?; 84 | pydict.set_item("is_dir", self.0.is_dir)?; 85 | pydict.set_item("is_file", self.0.is_file)?; 86 | pydict.set_item("st_ctime", self.0.st_ctime)?; 87 | pydict.set_item("st_mtime", self.0.st_mtime)?; 88 | pydict.set_item("st_atime", self.0.st_atime)?; 89 | pydict.set_item("st_size", self.0.st_size)?; 90 | Ok(pydict.into_any().unbind()) 91 | } 92 | 93 | #[cfg(feature = "speedy")] 94 | fn to_speedy(&self, py: Python) -> PyResult> { 95 | match self.0.write_to_vec() { 96 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 97 | b.copy_from_slice(&v); 98 | Ok(()) 99 | })? 100 | .into()), 101 | Err(e) => Err(PyException::new_err(e.to_string())), 102 | } 103 | } 104 | 105 | #[cfg(feature = "bincode")] 106 | fn to_bincode(&self, py: Python) -> PyResult> { 107 | match self.0.to_vec() { 108 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 109 | b.copy_from_slice(&v); 110 | Ok(()) 111 | })? 112 | .into()), 113 | Err(e) => Err(PyException::new_err(e.to_string())), 114 | } 115 | } 116 | 117 | #[cfg(feature = "json")] 118 | fn to_json(&self) -> PyResult { 119 | self.0 120 | .to_json() 121 | .map_err(|e| PyException::new_err(e.to_string())) 122 | } 123 | 124 | fn __repr__(&self) -> String { 125 | format!("{self:?}") 126 | } 127 | 128 | fn __str__(&self) -> String { 129 | format!("{self:?}") 130 | } 131 | } 132 | 133 | #[pyclass] 134 | #[derive(Debug, Clone)] 135 | pub struct DirEntryExt(scandir::DirEntryExt); 136 | 137 | impl DirEntryExt { 138 | pub fn from(entry: &scandir::DirEntryExt) -> Self { 139 | DirEntryExt(entry.clone()) 140 | } 141 | } 142 | 143 | #[pymethods] 144 | impl DirEntryExt { 145 | #[getter] 146 | fn path(&self) -> String { 147 | self.0.path.clone() 148 | } 149 | 150 | #[getter] 151 | fn is_symlink(&self) -> bool { 152 | self.0.is_symlink 153 | } 154 | 155 | #[getter] 156 | fn is_dir(&self) -> bool { 157 | self.0.is_dir 158 | } 159 | 160 | #[getter] 161 | fn is_file(&self) -> bool { 162 | self.0.is_file 163 | } 164 | 165 | #[getter] 166 | fn st_ctime(&self) -> Option { 167 | self.0.st_ctime 168 | } 169 | 170 | #[getter] 171 | fn st_mtime(&self) -> Option { 172 | self.0.st_mtime 173 | } 174 | 175 | #[getter] 176 | fn st_atime(&self) -> Option { 177 | self.0.st_atime 178 | } 179 | 180 | #[getter] 181 | fn st_size(&self) -> u64 { 182 | self.0.st_size 183 | } 184 | 185 | #[getter] 186 | fn st_blksize(&self) -> u64 { 187 | self.0.st_blksize 188 | } 189 | 190 | #[getter] 191 | fn st_blocks(&self) -> u64 { 192 | self.0.st_blocks 193 | } 194 | 195 | #[getter] 196 | fn st_mode(&self) -> u32 { 197 | self.0.st_mode 198 | } 199 | 200 | #[getter] 201 | fn st_nlink(&self) -> u64 { 202 | self.0.st_nlink 203 | } 204 | 205 | #[getter] 206 | fn st_uid(&self) -> u32 { 207 | self.0.st_uid 208 | } 209 | 210 | #[getter] 211 | fn st_gid(&self) -> u32 { 212 | self.0.st_gid 213 | } 214 | 215 | #[getter] 216 | fn st_ino(&self) -> u64 { 217 | self.0.st_ino 218 | } 219 | 220 | #[getter] 221 | fn st_dev(&self) -> u64 { 222 | self.0.st_dev 223 | } 224 | 225 | #[getter] 226 | fn st_rdev(&self) -> u64 { 227 | self.0.st_rdev 228 | } 229 | 230 | #[getter] 231 | fn ctime(&self) -> f64 { 232 | self.0.ctime() 233 | } 234 | 235 | #[getter] 236 | fn mtime(&self) -> f64 { 237 | self.0.mtime() 238 | } 239 | 240 | #[getter] 241 | fn atime(&self) -> f64 { 242 | self.0.atime() 243 | } 244 | 245 | fn as_dict(&self, py: Python) -> PyResult> { 246 | let pydict = PyDict::new(py); 247 | pydict.set_item("path", self.0.path.clone())?; 248 | pydict.set_item("is_symlink", self.0.is_symlink)?; 249 | pydict.set_item("is_dir", self.0.is_dir)?; 250 | pydict.set_item("is_file", self.0.is_file)?; 251 | pydict.set_item("st_ctime", self.0.st_ctime)?; 252 | pydict.set_item("st_mtime", self.0.st_mtime)?; 253 | pydict.set_item("st_atime", self.0.st_atime)?; 254 | pydict.set_item("st_size", self.0.st_size)?; 255 | pydict.set_item("st_blksize", self.0.st_blksize)?; 256 | pydict.set_item("st_blocks", self.0.st_blocks)?; 257 | pydict.set_item("st_mode", self.0.st_mode)?; 258 | pydict.set_item("st_nlink", self.0.st_nlink)?; 259 | pydict.set_item("st_uid", self.0.st_uid)?; 260 | pydict.set_item("st_gid", self.0.st_gid)?; 261 | pydict.set_item("st_ino", self.0.st_ino)?; 262 | pydict.set_item("st_dev", self.0.st_dev)?; 263 | pydict.set_item("st_rdev", self.0.st_rdev)?; 264 | Ok(pydict.into_any().unbind()) 265 | } 266 | 267 | #[cfg(feature = "speedy")] 268 | fn to_speedy(&self, py: Python) -> PyResult> { 269 | match self.0.write_to_vec() { 270 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 271 | b.copy_from_slice(&v); 272 | Ok(()) 273 | })? 274 | .into()), 275 | Err(e) => Err(PyException::new_err(e.to_string())), 276 | } 277 | } 278 | 279 | #[cfg(feature = "bincode")] 280 | fn to_bincode(&self, py: Python) -> PyResult> { 281 | match self.0.to_vec() { 282 | Ok(v) => Ok(PyBytes::new_with(py, v.len(), |b| { 283 | b.copy_from_slice(&v); 284 | Ok(()) 285 | })? 286 | .into()), 287 | Err(e) => Err(PyException::new_err(e.to_string())), 288 | } 289 | } 290 | 291 | #[cfg(feature = "json")] 292 | fn to_json(&self) -> PyResult { 293 | self.0 294 | .to_json() 295 | .map_err(|e| PyException::new_err(e.to_string())) 296 | } 297 | 298 | fn __repr__(&self) -> String { 299 | format!("{self:?}") 300 | } 301 | 302 | fn __str__(&self) -> String { 303 | format!("{self:?}") 304 | } 305 | } 306 | --------------------------------------------------------------------------------