├── test_data
    ├── system
    ├── NTUSER.DAT
    ├── lh_block
    ├── lznt1_buffer
    ├── system.log1
    ├── system.log2
    ├── win7_ntuser.dat
    ├── lznt1_decoded_buffer
    └── utf16_multiple_buffer
├── .world
    ├── setup.sh
    ├── clean.sh
    ├── config.sh
    ├── install.sh
    └── build.sh
├── .gitignore
├── pynotatin
    ├── .cargo
    │   └── config.toml
    ├── pyproject.toml
    ├── Cargo.toml
    ├── src
    │   ├── lib.rs
    │   ├── err.rs
    │   ├── py_notatin_content.rs
    │   ├── py_notatin_parser.rs
    │   ├── util.rs
    │   └── py_notatin_key.rs
    ├── README.md
    ├── notatin.pyi
    └── poetry.lock
├── .dockerignore
├── Dockerfile
├── .github
    └── workflows
    │   └── pipeline.yml
├── benches
    └── my_benchmark.rs
├── src
    ├── hive_bin_cell.rs
    ├── lib.rs
    ├── reg_item_map.rs
    ├── file_info.rs
    ├── progress.rs
    ├── cell.rs
    ├── hive_bin_header.rs
    ├── err.rs
    ├── cli_util.rs
    ├── sub_key_list_li.rs
    ├── log.rs
    ├── field_serializers.rs
    ├── macros.rs
    ├── parser_builder.rs
    ├── sub_key_list_lf.rs
    ├── sub_key_list_lh.rs
    ├── sub_key_list_ri.rs
    ├── marvin32.rs
    ├── cell_value.rs
    ├── state.rs
    ├── cell_key_security.rs
    ├── cell_big_data.rs
    ├── filter.rs
    └── field_offset_len.rs
├── bin
    └── reg_dump
    │   ├── json_writer.rs
    │   ├── tsv_writer.rs
    │   ├── main.rs
    │   └── common_writer.rs
├── Cargo.toml
├── README.md
├── .gitlab-ci.yml
└── marvin32
    └── marvin32.cpp


/test_data/system:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/system


--------------------------------------------------------------------------------
/test_data/NTUSER.DAT:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/NTUSER.DAT


--------------------------------------------------------------------------------
/test_data/lh_block:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/lh_block


--------------------------------------------------------------------------------
/test_data/lznt1_buffer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/lznt1_buffer


--------------------------------------------------------------------------------
/test_data/system.log1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/system.log1


--------------------------------------------------------------------------------
/test_data/system.log2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/system.log2


--------------------------------------------------------------------------------
/test_data/win7_ntuser.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/win7_ntuser.dat


--------------------------------------------------------------------------------
/test_data/lznt1_decoded_buffer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/strozfriedberg/notatin/HEAD/test_data/lznt1_decoded_buffer


--------------------------------------------------------------------------------
/.world/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | 
3 | . .world/build_config.sh
4 | 
5 | if [[ "$Linkage" == 'static' || ( "$Target" == 'windows' ) ]]; then
6 |   exit
7 | fi


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | Cargo.lock
 3 | 
 4 | *venv*
 5 | .vscode
 6 | 
 7 | pynotatin/target
 8 | pynotatin/tests/__pycache__
 9 | 
10 | .world/build_config.sh
11 | 


--------------------------------------------------------------------------------
/.world/clean.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | . .world/build_config.sh
 4 | 
 5 | if [[ "$Linkage" == 'static' || ( "$Target" == 'windows' ) ]]; then
 6 |   exit
 7 | fi
 8 | 
 9 | rm -rf pynotatin/out
10 | 


--------------------------------------------------------------------------------
/pynotatin/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [target.x86_64-apple-darwin]
 2 | rustflags = [
 3 |   "-C", "link-arg=-undefined",
 4 |   "-C", "link-arg=dynamic_lookup",
 5 | ]
 6 | 
 7 | [target.aarch64-apple-darwin]
 8 | rustflags = [
 9 |   "-C", "link-arg=-undefined",
10 |   "-C", "link-arg=dynamic_lookup",
11 | ]
12 | 
13 | 


--------------------------------------------------------------------------------
/.world/config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | . .world/build_config.sh
 4 | 
 5 | if [[ "$Linkage" == 'static' || ("$Target" == 'windows') ]]; then
 6 |   exit
 7 | fi
 8 | 
 9 | pushd pynotatin
10 | poetry config virtualenvs.in-project true --local
11 | 
12 | if [[ "$Target" == 'windows_package' ]]; then
13 |   poetry config cache-dir .poetry --local
14 | fi
15 | 
16 | poetry lock --check && poetry install --no-cache
17 | popd
18 | 


--------------------------------------------------------------------------------
/pynotatin/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "pynotatin"
 3 | version = "1.0.1"
 4 | authors = ["kimberly.stone <kimberly.stone@levelblue.com>"]
 5 | description = "Python bindings for notatin"
 6 | 
 7 | [build-system]
 8 | requires = ["maturin>=0.11,<0.12"]
 9 | build-backend = "maturin"
10 | 
11 | [tool.poetry.dependencies]
12 | python = "^3.6"
13 | 
14 | [tool.poetry.dev-dependencies]
15 | pytest = "^6.2.4"
16 | maturin = "^0.11.2"
17 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # .dockerignore file for deployment docker container
 2 | # .dockerignore goes in the root of the build context, not next to the Dockerfile
 3 | 
 4 | # Ignore all files top-level files and directories
 5 | */
 6 | 
 7 | # Except these
 8 | !bin/
 9 | !build/
10 | !pynotatin/
11 | !src/
12 | !test_data/
13 | !benches/
14 | !**/*Cargo*
15 | 
16 | # Other files that should just always be ignored
17 | .git
18 | **/.idea
19 | **/.venv
20 | **/*.pdf
21 | **/*.md
22 | **/*.env
23 | **/target/
24 | 


--------------------------------------------------------------------------------
/.world/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | . .world/build_config.sh
 4 | 
 5 | if [[ "$Linkage" == 'static' || ("$Target" == 'windows') ]]; then
 6 |   exit
 7 | fi
 8 | 
 9 | pushd pynotatin
10 | poetry run maturin build --release --interpreter python --no-sdist
11 | mkdir -p $INSTALL/lib/python/pynotatin
12 | cp target/wheels/* $INSTALL/lib/python/pynotatin
13 | popd
14 | 
15 | mkdir -p $INSTALL/bin
16 | cp target/release/reg_compare${EXE_EXT} $INSTALL/bin
17 | cp target/release/reg_dump${EXE_EXT} $INSTALL/bin
18 | 


--------------------------------------------------------------------------------
/test_data/utf16_multiple_buffer:
--------------------------------------------------------------------------------
1 | N A S _ r e q u e s t e d _ d a t a . 7 z   B l a c k H a r r i e r _ D 7 _ i 6 8 6 _ F D E _ 2 0 1 4 1 2 1 9 . d d . 7 z   B l a c k H a r r i e r _ D 7 _ a m d 6 4 _ 2 0 1 4 1 2 1 7 . 7 z   B l a c k H a r r i e r _ D 7 _ a m d 6 4 _ F D E _ 2 0 1 4 1 2 1 7 . 7 z   C : \ U s e r s \ j m r o b e r t s \ D e s k t o p \ U S B _ R e s e a r c h \ I E F . z i p   C o m p a n y _ R e p o r t _ 1 0 2 2 2 0 1 3 . v i r . z i p   L Y N C . 7 z   v i r u s e s . z i p   A L L D A T A . t x t . b z 2   


--------------------------------------------------------------------------------
/.world/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | . .world/build_config.sh
 4 | 
 5 | if [[ "$Linkage" == 'static' || ("$Target" == 'windows') ]]; then
 6 |   exit
 7 | fi
 8 | 
 9 | cargo test --all-features --all-targets
10 | cargo clippy --all-features --all-targets
11 | cargo build --release --features="build-binary"
12 | 
13 | pushd pynotatin
14 | 
15 | cargo test --no-default-features # --no-default-features is specified to avoid a bug in PyO3 (https://aeshirey.github.io/code/2020/04/01/tests-and-pyo3.html)
16 | cargo clippy --all-features --all-targets
17 | poetry run maturin develop --release
18 | poetry run pytest
19 | 
20 | popd
21 | 


--------------------------------------------------------------------------------
/pynotatin/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "pynotatin"
 3 | version = "1.0.1"
 4 | authors = ["Kimberly Stone <kimberly.stone@levelblue.com>"]
 5 | edition = "2018"
 6 | 
 7 | [lib]
 8 | name = "notatin"
 9 | crate-type = ["cdylib"]
10 | 
11 | [dependencies]
12 | notatin = { path = ".." }
13 | pyo3 = { version = ">= 0.18.1", features = ["auto-initialize"] }
14 | pyo3-file = ">= 0.7.0"
15 | num-traits = ">= 0.2"
16 | log = ">= 0.4"
17 | chrono = ">= 0.4"
18 | serde_json = ">= 1"
19 | csv = ">= 1"
20 | serde = { version = ">= 1.0", features = ["derive"] }
21 | 
22 | [features]
23 | default = ["pyo3/extension-module"]
24 | 
25 | [tool.maturin]
26 | sdist-include = ["Cargo.lock"]
27 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM quay.io/pypa/manylinux2014_x86_64
 2 | 
 3 | RUN yum update -y && yum install -y python3 python3-pip
 4 | RUN curl -4 --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 5 | ENV PATH="${PATH}:/root/.cargo/bin"
 6 | RUN rustc --version
 7 | 
 8 | RUN pip3 install toml maturin
 9 | RUN rustup component add clippy
10 | 
11 | # Install llvm so the xlsxwriter crate can build
12 | RUN yum install -y centos-release-scl
13 | RUN yum install -y llvm-toolset-7.0-clang.x86_64
14 | 
15 | COPY pynotatin /app
16 | COPY . /app/notatin_build/
17 | 
18 | WORKDIR /app
19 | 
20 | RUN sed -i 's/"\.\."/"notatin_build"/' Cargo.toml
21 | #RUN cat cargo_sdist_extras.txt >> pyproject.toml
22 | 
23 | ENTRYPOINT scl enable llvm-toolset-7.0 'maturin build --release -o /out'
24 | 


--------------------------------------------------------------------------------
/.github/workflows/pipeline.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       matrix:
17 |         os: [ubuntu-latest, windows-latest, macOS-latest]
18 |         rust: [stable]
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v3
22 |       - name: ⚙️ Install Rust
23 |         run: |
24 |           rustup update ${{ matrix.rust }}
25 |           rustup default ${{ matrix.rust }}
26 |       - name: 🔨 Build
27 |         run: cargo build --verbose
28 |       - name: 🧪 Run Tests
29 |         run: cargo test --verbose
30 |       - name: 📎 Run Clippy
31 |         run: cargo clippy --release -- -D warnings
32 | 


--------------------------------------------------------------------------------
/pynotatin/src/lib.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  */
17 | 
18 | pub mod err;
19 | pub mod py_notatin_content;
20 | pub mod py_notatin_key;
21 | pub mod py_notatin_parser;
22 | pub mod py_notatin_value;
23 | pub mod util;
24 | 


--------------------------------------------------------------------------------
/pynotatin/src/err.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  */
17 | 
18 | use pyo3::exceptions::PyRuntimeError;
19 | use pyo3::PyErr;
20 | 
21 | pub struct PyNotatinError(pub notatin::err::Error);
22 | 
23 | impl From<PyNotatinError> for PyErr {
24 |     fn from(err: PyNotatinError) -> Self {
25 |         PyErr::new::<PyRuntimeError, _>(format!("{}", err.0))
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/benches/my_benchmark.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Criterion};
 2 | use notatin::{parser::ParserIterator, parser_builder::ParserBuilder};
 3 | 
 4 | fn test_read_small_reg() {
 5 |     let parser = ParserBuilder::from_path("test_data/NTUSER.DAT")
 6 |         .build()
 7 |         .unwrap();
 8 |     for _key in ParserIterator::new(&parser).iter() {}
 9 | }
10 | 
11 | fn test_read_small_reg_with_deleted() {
12 |     let parser = ParserBuilder::from_path("test_data/NTUSER.DAT")
13 |         .recover_deleted(true)
14 |         .build()
15 |         .unwrap();
16 |     for _key in ParserIterator::new(&parser).iter() {}
17 | }
18 | 
19 | pub fn bench(c: &mut Criterion) {
20 |     let mut group1 = c.benchmark_group("read small reg");
21 |     group1
22 |         .sample_size(1000)
23 |         .measurement_time(std::time::Duration::from_secs(5))
24 |         .bench_function("read small reg", |b| b.iter(test_read_small_reg))
25 |         .bench_function("read small reg with deleted", |b| {
26 |             b.iter(test_read_small_reg_with_deleted)
27 |         });
28 |     group1.finish();
29 | }
30 | 
31 | criterion_group!(benches, bench);
32 | criterion_main!(benches);
33 | 


--------------------------------------------------------------------------------
/pynotatin/README.md:
--------------------------------------------------------------------------------
 1 | # PyNotatin
 2 | 
 3 | Python bindings for the Notatin crate.
 4 | 
 5 | ## Install
 6 | 
 7 | ### From source
 8 | 
 9 | ```
10 | pip install .
11 | ```
12 | 
13 | ### From Github
14 | 
15 | ```
16 | pip install git+https://github.com/strozfriedberg/notatin.git#subdirectory=pynotatin
17 | ```
18 | 
19 | ## Library usage
20 | 
21 | ```python,no_run
22 | from notatin import PyNotatinParser
23 | 
24 | def py_notatin_dump():
25 |     parser = PyNotatinParser("../test_data/NTUSER.DAT")
26 |     for key in parser.reg_keys():
27 |         print(key.path)
28 |         for value in key.values():
29 |             print("\t" + value.pretty_name + "\t" + str(value.content))
30 | }
31 | ```
32 | 
33 | See `test_reg.py` for other usage examples (particularly regarding accessing specific keys and values directly).
34 | 
35 | ## Unit tests
36 | 
37 | Use `cargo test --no-default-features` to run the Rust unit tests. The `--no-default-featues` option is required due to this [known issue](https://pyo3.rs/v0.13.2/faq.html#i-cant-run-cargo-test-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror) in PyO3.
38 | 
39 | ## Copyright
40 | 
41 | Copyright 2025 LevelBlue. Notatin and PyNotatin are licensed under the Apache License, Version 2.0.
42 | 


--------------------------------------------------------------------------------
/src/hive_bin_cell.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | use core::fmt::Debug;
18 | 
19 | pub trait CellSubKeyList {
20 |     fn size(&self) -> u32;
21 |     fn get_offset_list(&self, hbin_offset_absolute: u32) -> Vec<u32>;
22 | }
23 | 
24 | impl Debug for dyn CellSubKeyList {
25 |     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
26 |         write!(f, "CellSubKeyList size:{}", self.size())
27 |     }
28 | }
29 | 
30 | impl PartialEq for dyn CellSubKeyList {
31 |     fn eq(&self, other: &Self) -> bool {
32 |         self.size() == other.size() && self.get_offset_list(0) == other.get_offset_list(0)
33 |     }
34 | }
35 | 
36 | impl Eq for dyn CellSubKeyList {}
37 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | pub mod field_offset_len;
18 | pub(crate) mod macros;
19 | 
20 | pub mod base_block;
21 | pub mod cell;
22 | pub mod cell_big_data;
23 | pub mod cell_key_node;
24 | pub mod cell_key_security;
25 | pub mod cell_key_value;
26 | pub mod cell_value;
27 | pub mod cli_util;
28 | pub mod err;
29 | pub mod field_serializers;
30 | pub mod file_info;
31 | pub mod filter;
32 | pub mod hive_bin_cell;
33 | pub mod hive_bin_header;
34 | pub mod log;
35 | pub mod marvin32;
36 | pub mod parser;
37 | pub mod parser_builder;
38 | pub mod parser_recover_deleted;
39 | pub mod progress;
40 | pub mod reg_item_map;
41 | pub mod state;
42 | pub mod sub_key_list_lf;
43 | pub mod sub_key_list_lh;
44 | pub mod sub_key_list_li;
45 | pub mod sub_key_list_ri;
46 | pub mod transaction_log;
47 | pub mod util;
48 | 


--------------------------------------------------------------------------------
/src/reg_item_map.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | use std::collections::HashMap;
18 | 
19 | pub(crate) type RegItemMap = HashMap<RegItemMapKey, RegItemMapValue>;
20 | 
21 | #[derive(Debug, Eq, Hash, PartialEq)]
22 | pub(crate) struct RegItemMapKey {
23 |     pub(crate) key_path: String,
24 |     pub(crate) value_name: Option<String>,
25 | }
26 | 
27 | impl RegItemMapKey {
28 |     pub(crate) fn new(key_path: String, value_name: Option<String>) -> Self {
29 |         Self {
30 |             key_path,
31 |             value_name,
32 |         }
33 |     }
34 | }
35 | 
36 | #[derive(Debug, Eq, Hash, PartialEq)]
37 | pub(crate) struct RegItemMapValue {
38 |     pub(crate) hash: blake3::Hash,
39 |     pub(crate) file_offset_absolute: usize,
40 |     pub(crate) sequence_num: u32,
41 | }
42 | 
43 | impl RegItemMapValue {
44 |     pub(crate) fn new(hash: blake3::Hash, file_offset_absolute: usize, sequence_num: u32) -> Self {
45 |         Self {
46 |             hash,
47 |             file_offset_absolute,
48 |             sequence_num,
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/pynotatin/src/py_notatin_content.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  */
17 | 
18 | use crate::py_notatin_value::{PyNotatinDecodeFormat, PyNotatinValue};
19 | use pyo3::prelude::*;
20 | 
21 | use notatin::cell_value::{CellValue, DecodableValue};
22 | use pyo3::{Py, PyResult, Python};
23 | 
24 | #[pyclass]
25 | #[derive(Clone, Debug, Eq, PartialEq)]
26 | pub struct PyNotatinContent {
27 |     pub inner: CellValue,
28 | }
29 | 
30 | #[pymethods]
31 | impl PyNotatinContent {
32 |     #[getter]
33 |     pub fn content(&self, py: Python) -> Option<PyObject> {
34 |         PyNotatinValue::prepare_content(py, &self.inner)
35 |     }
36 | 
37 |     pub fn decode(
38 |         &self,
39 |         py: Python,
40 |         format: &PyNotatinDecodeFormat,
41 |         offset: usize,
42 |     ) -> PyResult<Py<PyNotatinContent>> {
43 |         let (decoded_content, _) = self.inner.decode_content(&format.inner, offset);
44 |         Py::new(
45 |             py,
46 |             PyNotatinContent {
47 |                 inner: decoded_content,
48 |             },
49 |         )
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/bin/reg_dump/json_writer.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | use notatin::{
18 |     err::Error,
19 |     filter::Filter,
20 |     parser::{Parser, ParserIterator},
21 |     progress,
22 | };
23 | use std::fs::File;
24 | use std::io::{BufWriter, Write};
25 | use std::path::*;
26 | 
27 | pub(crate) struct WriteJson {}
28 | 
29 | impl WriteJson {
30 |     pub(crate) fn write(
31 |         out_path: impl AsRef<Path>,
32 |         parser: &Parser,
33 |         filter: Option<Filter>,
34 |         console: &mut Box<dyn progress::UpdateProgressTrait>,
35 |     ) -> Result<(), Error> {
36 |         let write_file = File::create(out_path)?;
37 |         let mut iter = ParserIterator::new(parser);
38 |         if let Some(filter) = filter {
39 |             iter.with_filter(filter);
40 |         }
41 |         let mut writer = BufWriter::new(write_file);
42 |         for (index, key) in iter.iter().enumerate() {
43 |             console.update_progress(index)?;
44 |             writeln!(&mut writer, "{}", serde_json::to_string(&key).unwrap())?;
45 |         }
46 |         Ok(())
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "notatin"
 3 | version = "1.0.1"
 4 | authors = ["Kimberly Stone <kimberly.stone@levelblue.com>", "Joel Uckelman <juckelman@strozfriedberg.co.uk>"]
 5 | edition = "2018"
 6 | license = "Apache-2.0"
 7 | description = "Notatin is a Rust library for parsing offline Windows Registry files."
 8 | repository = "https://github.com/strozfriedberg/notatin"
 9 | readme = "README.md"
10 | keywords = ["forensic", "windows_registry", "parser"]
11 | categories = ["parser-implementations"]
12 | exclude = [
13 |     "test_data/*",
14 | ]
15 | 
16 | 
17 | [dependencies]
18 | bitflags = "2.3"
19 | blake3 = "1.8"
20 | crossterm = "0.29"
21 | enum-primitive-derive = "0.3"
22 | md5 = "0.8"
23 | nom = "8.0"
24 | num = "0.4"
25 | num-traits = "0.2.14"
26 | paste = "1.0"
27 | regex = "1.5"
28 | serde = { version = "1.0", features = ["derive"] }
29 | serde_json = "1.0"
30 | strum_macros = "0.27"
31 | thiserror = "2.0"
32 | winstructs = "0.3"
33 | 
34 | clap = { version = "4.5", optional = true }
35 | itertools = { version = "0.14", optional = true }
36 | walkdir = { version = "2.5", optional = true }
37 | xlsxwriter = { version = "0.6", optional = true }
38 | 
39 | 
40 | [dependencies.chrono]
41 | version = ">=0.4.27"
42 | features = ["serde"]
43 | 
44 | 
45 | [dev-dependencies]
46 | criterion = "0.7"
47 | 
48 | 
49 | [features]
50 | build-binary = ["xlsxwriter", "clap", "walkdir", "itertools"]
51 | 
52 | 
53 | [[bin]]
54 | name = "reg_compare"
55 | path = "./bin/reg_compare.rs"
56 | required-features = ["build-binary"]
57 | 
58 | 
59 | [[bin]]
60 | name = "reg_dump"
61 | path = "./bin/reg_dump/main.rs"
62 | required-features = ["build-binary"]
63 | 
64 | 
65 | [[bench]]
66 | name = "my_benchmark"
67 | path = "benches/my_benchmark.rs"
68 | harness = false
69 | 


--------------------------------------------------------------------------------
/src/file_info.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | use crate::err::Error;
18 | use std::io::{self, Read, Seek};
19 | use std::path::Path;
20 | 
21 | #[derive(Clone, Debug, Default, Eq, PartialEq)]
22 | pub(crate) struct FileInfo {
23 |     pub hbin_offset_absolute: usize,
24 |     pub buffer: Vec<u8>,
25 | }
26 | 
27 | impl FileInfo {
28 |     pub(crate) fn from_path<T>(filename: T) -> Result<Self, Error>
29 |     where
30 |         T: AsRef<Path>,
31 |     {
32 |         Self::from_read_seek(std::fs::File::open(filename)?)
33 |     }
34 | 
35 |     pub(crate) fn from_read_seek<T: ReadSeek>(mut data_primary: T) -> Result<Self, Error> {
36 |         let mut file_buffer_primary = Vec::new();
37 |         data_primary.read_to_end(&mut file_buffer_primary)?;
38 | 
39 |         Ok(Self {
40 |             hbin_offset_absolute: 0,
41 |             buffer: file_buffer_primary,
42 |         })
43 |     }
44 | 
45 |     pub(crate) fn get_file_offset(&self, input: &[u8]) -> usize {
46 |         self.get_file_offset_from_ptr(input.as_ptr() as usize)
47 |     }
48 | 
49 |     pub(crate) fn get_file_offset_from_ptr(&self, ptr: usize) -> usize {
50 |         ptr - self.buffer.as_ptr() as usize
51 |     }
52 | }
53 | 
54 | pub trait ReadSeek: Read + Seek {
55 |     fn tell(&mut self) -> io::Result<u64> {
56 |         self.stream_position()
57 |     }
58 | }
59 | 
60 | impl<T: Read + Seek> ReadSeek for T {}
61 | 


--------------------------------------------------------------------------------
/src/progress.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | use crate::err::Error;
17 | use crossterm::{cursor, QueueableCommand};
18 | use std::io;
19 | use std::io::{Stdout, Write};
20 | 
21 | pub fn new(update_console: bool) -> Box<dyn UpdateProgressTrait> {
22 |     if update_console {
23 |         Box::new(UpdateConsole {
24 |             need_final_newline: false,
25 |             stdout: io::stdout(),
26 |         })
27 |     } else {
28 |         Box::new(UpdateNull {})
29 |     }
30 | }
31 | 
32 | pub trait UpdateProgressTrait {
33 |     fn update_progress(&mut self, index: usize) -> Result<(), Error>;
34 |     fn update(&mut self, msg: &str) -> Result<(), Error>;
35 |     fn write(&mut self, msg: &str) -> Result<(), Error>;
36 | }
37 | 
38 | struct UpdateConsole {
39 |     need_final_newline: bool,
40 |     stdout: Stdout,
41 | }
42 | 
43 | impl UpdateProgressTrait for UpdateConsole {
44 |     fn update_progress(&mut self, index: usize) -> Result<(), Error> {
45 |         if index.is_multiple_of(1000) {
46 |             self.stdout.write_all(".".as_bytes())?;
47 |             self.stdout.flush()?;
48 |         }
49 |         Ok(())
50 |     }
51 | 
52 |     fn update(&mut self, msg: &str) -> Result<(), Error> {
53 |         self.stdout.queue(cursor::SavePosition)?;
54 |         self.stdout.write_all(msg.as_bytes())?;
55 |         self.stdout.queue(cursor::RestorePosition)?;
56 |         self.stdout.flush()?;
57 |         self.need_final_newline = true;
58 |         Ok(())
59 |     }
60 | 
61 |     fn write(&mut self, msg: &str) -> Result<(), Error> {
62 |         self.stdout.write_all(msg.as_bytes())?;
63 |         self.stdout.flush()?;
64 |         Ok(())
65 |     }
66 | }
67 | 
68 | impl Drop for UpdateConsole {
69 |     fn drop(&mut self) {
70 |         if self.need_final_newline {
71 |             self.stdout.write_all("\n".as_bytes()).unwrap_or_default();
72 |             self.stdout.flush().unwrap_or_default();
73 |         }
74 |     }
75 | }
76 | 
77 | struct UpdateNull {}
78 | 
79 | impl UpdateProgressTrait for UpdateNull {
80 |     fn update_progress(&mut self, _index: usize) -> Result<(), Error> {
81 |         Ok(())
82 |     }
83 | 
84 |     fn update(&mut self, _msg: &str) -> Result<(), Error> {
85 |         Ok(())
86 |     }
87 | 
88 |     fn write(&mut self, _msg: &str) -> Result<(), Error> {
89 |         Ok(())
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/cell.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2025 LevelBlue
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | use nom::{branch::alt, bytes::complete::tag, combinator::map, IResult, Parser};
18 | use serde::Serialize;
19 | 
20 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)]
21 | pub enum CellState {
22 |     DeletedTransactionLog = -3,
23 |     DeletedPrimaryFile = -2,
24 |     DeletedPrimaryFileSlack = -1,
25 |     Allocated = 0,
26 |     ModifiedTransactionLog = 1,
27 |     // All Deleted* values are < 0 for support of `is_deleted()`.
28 |     // Make sure any new Deleted* values follow this pattern.
29 | }
30 | 
31 | impl Default for CellState {
32 |     fn default() -> Self {
33 |         Self::Allocated
34 |     }
35 | }
36 | 
37 | impl CellState {
38 |     pub fn is_deleted(self) -> bool {
39 |         (self as i8) < 0
40 |     }
41 | 
42 |     pub fn is_deleted_primary_file(self) -> bool {
43 |         self == Self::DeletedPrimaryFile || self == Self::DeletedPrimaryFileSlack
44 |     }
45 | }
46 | 
47 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)]
48 | pub enum CellType {
49 |     CellOther,
50 |     CellKey,
51 |     CellValue,
52 |     CellSecurity,
53 |     CellBigData,
54 |     CellIndexRoot,
55 |     CellHashLeaf,
56 |     CellFastLeaf,
57 |     CellIndexLeaf,
58 | }
59 | 
60 | impl CellType {
61 |     pub(crate) fn read_cell_type(input: &[u8]) -> Self {
62 |         fn cell_type(b: &[u8]) -> IResult<&[u8], CellType> {
63 |             alt((
64 |                 map(tag("nk"), |_| CellType::CellKey),
65 |                 map(tag("vk"), |_| CellType::CellValue),
66 |                 map(tag("sk"), |_| CellType::CellSecurity),
67 |                 map(tag("lf"), |_| CellType::CellFastLeaf),
68 |                 map(tag("li"), |_| CellType::CellIndexLeaf),
69 |                 map(tag("lh"), |_| CellType::CellHashLeaf),
70 |                 map(tag("ri"), |_| CellType::CellIndexRoot),
71 |                 map(tag("db"), |_| CellType::CellBigData),
72 |             ))
73 |             .parse(b)
74 |         }
75 | 
76 |         match cell_type(input) {
77 |             Ok((_, cell_type)) => cell_type,
78 |             Err(_) => CellType::CellOther,
79 |         }
80 |     }
81 | }
82 | 
83 | pub trait Cell {
84 |     fn get_file_offset_absolute(&self) -> usize;
85 |     fn get_hash(&self) -> Option<blake3::Hash>;
86 |     fn get_logs(&self) -> &crate::log::Logs;
87 |     fn has_or_is_recovered(&self) -> bool;
88 | }
89 | 


--------------------------------------------------------------------------------
/src/hive_bin_header.rs:
--------------------------------------------------------------------------------
 1 | use crate::file_info::FileInfo;
 2 | use crate::util;
 3 | use chrono::{DateTime, Utc};
 4 | use nom::{
 5 |     bytes::complete::tag,
 6 |     number::complete::{le_u32, le_u64},
 7 |     IResult,
 8 | };
 9 | use serde::Serialize;
10 | 
11 | #[derive(Clone, Debug, Eq, PartialEq, Serialize)]
12 | pub struct HiveBinHeader {
13 |     /// The absolute offset of the hive bin, calculated at parse time
14 |     pub file_offset_absolute: usize,
15 |     /// The offset of the hive bin, Value in bytes and relative from the start of the hive bin data
16 |     pub offset_from_first_hbin: u32,
17 |     /// Size of the hive bin
18 |     pub size: u32,
19 |     /// 0 most of the time, can contain remnant data
20 |     pub unknown1: u32,
21 |     /// 0 most of the time, can contain remnant data
22 |     pub unknown2: u32,
23 |     /// Only the first hive bin contains a valid FILETIME. The timestamp in the header of the first hive bin acts as a backup copy of a Last written timestamp in the base block.
24 |     pub timestamp: DateTime<Utc>,
25 |     /// The Spare field is used when shifting hive bins and cells in memory. In Windows 2000, the same field is called MemAlloc, it is used to track memory allocations for hive bins.
26 |     pub spare: u32,
27 | }
28 | 
29 | impl HiveBinHeader {
30 |     pub(crate) fn from_bytes<'a>(file_info: &FileInfo, input: &'a [u8]) -> IResult<&'a [u8], Self> {
31 |         let file_offset_absolute = file_info.get_file_offset(input);
32 |         let (input, _signature) = tag("hbin")(input)?;
33 |         let (input, offset_from_first_hbin) = le_u32(input)?;
34 |         let (input, size) = le_u32(input)?;
35 |         let (input, unknown1) = le_u32(input)?;
36 |         let (input, unknown2) = le_u32(input)?;
37 |         let (input, timestamp) = le_u64(input)?;
38 |         let (input, spare) = le_u32(input)?;
39 | 
40 |         let hbh = HiveBinHeader {
41 |             file_offset_absolute,
42 |             offset_from_first_hbin,
43 |             size,
44 |             unknown1,
45 |             unknown2,
46 |             timestamp: util::get_date_time_from_filetime(timestamp),
47 |             spare,
48 |         };
49 | 
50 |         Ok((input, hbh))
51 |     }
52 | }
53 | 
54 | #[cfg(test)]
55 | mod tests {
56 |     use super::*;
57 | 
58 |     #[test]
59 |     fn test_parse_hive_bin_header() {
60 |         let buffer = [
61 |             0x68, 0x62, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
62 |             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7C, 0x60, 0xD7, 0xC4, 0x26, 0x14, 0xCD, 0x01,
63 |             0x00, 0x00, 0x00, 0x00,
64 |         ];
65 | 
66 |         let file_info = FileInfo {
67 |             hbin_offset_absolute: 4096,
68 |             buffer: buffer.to_vec(),
69 |         };
70 | 
71 |         let ret = HiveBinHeader::from_bytes(&file_info, &file_info.buffer[..]);
72 | 
73 |         let expected_output = HiveBinHeader {
74 |             file_offset_absolute: 0,
75 |             offset_from_first_hbin: 0,
76 |             size: 4096,
77 |             unknown1: 0,
78 |             unknown2: 0,
79 |             timestamp: util::get_date_time_from_filetime(129782121007374460),
80 |             spare: 0,
81 |         };
82 | 
83 |         let remaining: [u8; 0] = [0; 0];
84 |         let expected = Ok((&remaining[..], expected_output));
85 | 
86 |         assert_eq!(expected, ret);
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/err.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use thiserror::Error;
 18 | 
 19 | #[derive(Debug, Error, Eq, PartialEq)]
 20 | pub enum Error {
 21 |     #[error("An error has occurred in the Nom library: {}", detail)]
 22 |     Nom { detail: String },
 23 |     #[error("An error has occurred in the Winstructs library: {}", detail)]
 24 |     Winstructs { detail: String },
 25 |     #[error("An error has occurred while converting: {}", detail)]
 26 |     Conversion { detail: String },
 27 |     #[error("An error has occurred in StripPrefix: {}", detail)]
 28 |     StripPrefix { detail: String },
 29 |     #[error("An IO error has occurred: {}", detail)]
 30 |     Io { detail: String },
 31 |     #[error("An error has occurred in the Xlsxwriter library: {}", detail)]
 32 |     XlsxWriter { detail: String },
 33 |     #[error("An error has occurred: {}", detail)]
 34 |     TryFromInt { detail: String },
 35 |     #[error("Invalid buffer access: {}", detail)]
 36 |     Buffer { detail: String },
 37 |     #[error("An error has occurred when converting: {}", detail)]
 38 |     Any { detail: String },
 39 | }
 40 | 
 41 | impl Error {
 42 |     pub fn buffer(s: &str) -> Self {
 43 |         Self::Buffer {
 44 |             detail: s.to_string(),
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | impl From<nom::Err<nom::error::Error<&[u8]>>> for Error {
 50 |     fn from(_error: nom::Err<nom::error::Error<&[u8]>>) -> Self {
 51 |         Error::Nom {
 52 |             detail: "Nom parsing error".to_string(),
 53 |         }
 54 |     }
 55 | }
 56 | 
 57 | impl From<winstructs::err::Error> for Error {
 58 |     fn from(error: winstructs::err::Error) -> Self {
 59 |         Error::Winstructs {
 60 |             detail: format!("{:#?}", error.to_string()),
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | impl From<std::array::TryFromSliceError> for Error {
 66 |     fn from(error: std::array::TryFromSliceError) -> Self {
 67 |         Error::Conversion {
 68 |             detail: format!("{:#?}", error.to_string()),
 69 |         }
 70 |     }
 71 | }
 72 | 
 73 | impl From<std::path::StripPrefixError> for Error {
 74 |     fn from(error: std::path::StripPrefixError) -> Self {
 75 |         Error::StripPrefix {
 76 |             detail: format!("{:#?}", error.to_string()),
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | impl From<std::io::Error> for Error {
 82 |     fn from(error: std::io::Error) -> Self {
 83 |         Error::Io {
 84 |             detail: format!("{:#?}", error.to_string()),
 85 |         }
 86 |     }
 87 | }
 88 | 
 89 | #[cfg(feature = "build-binary")]
 90 | impl From<xlsxwriter::XlsxError> for Error {
 91 |     fn from(error: xlsxwriter::XlsxError) -> Self {
 92 |         Error::XlsxWriter {
 93 |             detail: format!("{:#?}", error.to_string()),
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | impl From<std::num::TryFromIntError> for Error {
 99 |     fn from(error: std::num::TryFromIntError) -> Self {
100 |         Error::TryFromInt {
101 |             detail: format!("{:#?}", error.to_string()),
102 |         }
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/src/cli_util.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use std::path::*;
 18 | 
 19 | pub fn parse_paths(paths: &str) -> (String, Option<Vec<String>>) {
 20 |     let mut logs = vec![];
 21 |     let mut primary = String::new();
 22 |     for component in paths.split(',') {
 23 |         let lower = component.trim().trim_matches('\'').to_ascii_lowercase();
 24 |         if lower.ends_with(".log1") || lower.ends_with(".log2") {
 25 |             logs.push(component.trim().trim_matches('\'').to_string());
 26 |         } else {
 27 |             primary = component.trim().trim_matches('\'').to_string();
 28 |         }
 29 |     }
 30 |     if logs.is_empty() {
 31 |         (primary, None)
 32 |     } else {
 33 |         (primary, Some(logs))
 34 |     }
 35 | }
 36 | 
 37 | pub fn check_add_log(
 38 |     base_folder: &Path,
 39 |     primary_name: &str,
 40 |     extension: &str,
 41 |     logs: &mut Vec<PathBuf>,
 42 | ) {
 43 |     let log = get_log_name(base_folder, primary_name, extension);
 44 |     if log.is_file() {
 45 |         logs.push(log);
 46 |     }
 47 | }
 48 | 
 49 | fn get_log_name(base_folder: &Path, primary_name: &str, extension: &str) -> PathBuf {
 50 |     let log_name = match primary_name {
 51 |         "NTUSER.DAT" => "ntuser.dat",
 52 |         "UsrClass.DAT" => "UsrClass.dat",
 53 |         _ => primary_name,
 54 |     };
 55 |     let mut log = base_folder.join(log_name).into_os_string();
 56 |     log.push(".");
 57 |     log.push(extension);
 58 |     PathBuf::from(log)
 59 | }
 60 | 
 61 | pub fn get_log_files(skip_logs: bool, f: &str, path: &Path) -> Option<Vec<PathBuf>> {
 62 |     if skip_logs {
 63 |         None
 64 |     } else {
 65 |         let mut logs: Vec<PathBuf> = vec![];
 66 |         if let Some(folder) = path.parent() {
 67 |             check_add_log(folder, f, "LOG1", &mut logs);
 68 |             check_add_log(folder, f, "LOG2", &mut logs);
 69 |         }
 70 |         Some(logs)
 71 |     }
 72 | }
 73 | 
 74 | pub fn file_has_size(path: &Path) -> bool {
 75 |     match path.metadata() {
 76 |         Ok(md) => {
 77 |             if md.len() == 0 {
 78 |                 println!("{:?} size is 0; skipping", path);
 79 |                 false
 80 |             } else {
 81 |                 true
 82 |             }
 83 |         }
 84 |         Err(e) => {
 85 |             println!("Unable to get size for {:?} ({:?})", path, e);
 86 |             false
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | #[cfg(test)]
 92 | mod tests {
 93 |     use super::*;
 94 | 
 95 |     #[test]
 96 |     fn test_get_log_name() {
 97 |         assert_eq!(
 98 |             PathBuf::from("/mnt/d/tmp/ntuser.dat.LOG1"),
 99 |             get_log_name(Path::new("/mnt/d/tmp"), "NTUSER.DAT", "LOG1")
100 |         );
101 |         assert_eq!(
102 |             PathBuf::from("/mnt/d/tmp/UsrClass.dat.LOG2"),
103 |             get_log_name(Path::new("/mnt/d/tmp"), "UsrClass.DAT", "LOG2")
104 |         );
105 |         assert_eq!(
106 |             PathBuf::from("/mnt/d/tmp/SYSTEM.LOG2"),
107 |             get_log_name(Path::new("/mnt/d/tmp"), "SYSTEM", "LOG2")
108 |         );
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/src/sub_key_list_li.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::hive_bin_cell;
 18 | use nom::{
 19 |     bytes::complete::tag,
 20 |     number::complete::{le_i32, le_u16, le_u32},
 21 |     IResult, Parser,
 22 | };
 23 | use serde::Serialize;
 24 | 
 25 | // Subkeys list
 26 | #[derive(Debug, Eq, PartialEq, Serialize)]
 27 | pub struct SubKeyListLi {
 28 |     pub size: u32,
 29 |     pub count: u16,
 30 |     pub items: Vec<SubKeyListLiItem>, // Vec size = count
 31 | }
 32 | 
 33 | impl SubKeyListLi {
 34 |     /// Uses nom to parse an lf sub key list (lf) hive bin cell.
 35 |     fn from_bytes_internal(input: &[u8]) -> IResult<&[u8], SubKeyListLi> {
 36 |         let (input, size) = le_i32(input)?;
 37 |         let (input, _signature) = tag("li")(input)?;
 38 |         let (input, count) = le_u16(input)?;
 39 |         let (input, items) =
 40 |             nom::multi::count(SubKeyListLiItem::from_bytes(), count.into()).parse(input)?;
 41 |         Ok((
 42 |             input,
 43 |             SubKeyListLi {
 44 |                 size: size.unsigned_abs(),
 45 |                 count,
 46 |                 items,
 47 |             },
 48 |         ))
 49 |     }
 50 | 
 51 |     pub(crate) fn from_bytes(
 52 |     ) -> impl Fn(&[u8]) -> IResult<&[u8], Box<dyn hive_bin_cell::CellSubKeyList>> {
 53 |         |input: &[u8]| {
 54 |             let (input, ret) = SubKeyListLi::from_bytes_internal(input)?;
 55 |             Ok((input, Box::new(ret)))
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | impl hive_bin_cell::CellSubKeyList for SubKeyListLi {
 61 |     fn size(&self) -> u32 {
 62 |         self.size
 63 |     }
 64 | 
 65 |     fn get_offset_list(&self, hbin_offset_absolute: u32) -> Vec<u32> {
 66 |         self.items
 67 |             .iter()
 68 |             .map(|x| x.named_key_offset_relative + hbin_offset_absolute)
 69 |             .collect()
 70 |     }
 71 | }
 72 | 
 73 | #[derive(Debug, Eq, PartialEq, Serialize)]
 74 | pub struct SubKeyListLiItem {
 75 |     pub named_key_offset_relative: u32, // The offset value is in bytes and relative from the start of the hive bin data
 76 | }
 77 | 
 78 | impl SubKeyListLiItem {
 79 |     fn from_bytes() -> impl Fn(&[u8]) -> IResult<&[u8], Self> {
 80 |         |input: &[u8]| {
 81 |             let (input, named_key_offset_relative) = le_u32(input)?;
 82 |             Ok((
 83 |                 input,
 84 |                 SubKeyListLiItem {
 85 |                     named_key_offset_relative,
 86 |                 },
 87 |             ))
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | #[cfg(test)]
 93 | mod tests {
 94 |     use super::*;
 95 |     use crate::hive_bin_cell::CellSubKeyList;
 96 | 
 97 |     #[test]
 98 |     fn test_sub_key_list_li_traits() {
 99 |         let li = SubKeyListLi {
100 |             size: 64,
101 |             count: 2,
102 |             items: vec![
103 |                 SubKeyListLiItem {
104 |                     named_key_offset_relative: 12345,
105 |                 },
106 |                 SubKeyListLiItem {
107 |                     named_key_offset_relative: 54321,
108 |                 },
109 |             ],
110 |         };
111 |         assert_eq!(li.size, li.size());
112 |         assert_eq!(vec![16441, 58417], li.get_offset_list(4096));
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/log.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::err::Error;
 18 | use serde::Serialize;
 19 | use std::fmt;
 20 | use std::io::{BufWriter, Write};
 21 | 
 22 | #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
 23 | pub struct Logs {
 24 |     logs: Option<Vec<Log>>,
 25 | }
 26 | 
 27 | impl Logs {
 28 |     pub(crate) fn add<T: ToString>(&mut self, code: LogCode, text: &T) {
 29 |         self.add_internal(Log {
 30 |             code,
 31 |             text: text.to_string(),
 32 |         });
 33 |     }
 34 | 
 35 |     fn add_internal(&mut self, warning: Log) {
 36 |         match &mut self.logs {
 37 |             Some(logs) => logs.push(warning),
 38 |             None => self.logs = Some(vec![warning]),
 39 |         }
 40 |     }
 41 | 
 42 |     pub fn has_logs(&self) -> bool {
 43 |         self.logs.is_some()
 44 |             && !self
 45 |                 .logs
 46 |                 .as_ref()
 47 |                 .expect("just checked is_some()")
 48 |                 .is_empty()
 49 |     }
 50 | 
 51 |     pub fn get(&self) -> Option<&Vec<Log>> {
 52 |         self.logs.as_ref()
 53 |     }
 54 | 
 55 |     pub(crate) fn get_option(self) -> Option<Self> {
 56 |         if self.logs.is_none() {
 57 |             None
 58 |         } else {
 59 |             Some(self)
 60 |         }
 61 |     }
 62 | 
 63 |     pub(crate) fn prepend_all(&mut self, prefix: &str) {
 64 |         if let Some(logs) = &mut self.logs {
 65 |             for log in logs {
 66 |                 log.text = format!("{}{}", prefix, log.text)
 67 |             }
 68 |         }
 69 |     }
 70 | 
 71 |     pub(crate) fn extend(&mut self, additional: Self) {
 72 |         match &mut self.logs {
 73 |             Some(logs) => logs.extend(additional.logs.unwrap_or_default()),
 74 |             None => self.logs = Some(additional.logs.unwrap_or_default()),
 75 |         }
 76 |     }
 77 | 
 78 |     pub fn write<W: Write>(&self, writer: &mut BufWriter<std::fs::File>) -> Result<(), Error> {
 79 |         if let Some(logs) = &self.logs {
 80 |             for log in logs {
 81 |                 writeln!(writer, "{:?} {}", log.code, log.text)?;
 82 |             }
 83 |         }
 84 |         Ok(())
 85 |     }
 86 | 
 87 |     pub(crate) fn get_string(&self) -> String {
 88 |         let mut ret = String::new();
 89 |         if let Some(logs) = &self.logs {
 90 |             for log in logs {
 91 |                 ret += &format!("{:?} {};", log.code, log.text);
 92 |             }
 93 |         }
 94 |         ret
 95 |     }
 96 | }
 97 | 
 98 | impl fmt::Display for Logs {
 99 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100 |         write!(f, "{}", self.get_string())
101 |     }
102 | }
103 | 
104 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)]
105 | pub enum LogCode {
106 |     WarningOther,
107 |     WarningNom,
108 |     WarningConversion,
109 |     WarningContent,
110 |     WarningBigDataContent,
111 |     WarningUnrecognizedBitflag,
112 |     WarningTransactionLog,
113 |     WarningIterator,
114 |     WarningBaseBlock,
115 |     WarningParse,
116 |     WarningRecovery,
117 |     Info,
118 | }
119 | 
120 | #[derive(Clone, Debug, Eq, PartialEq, Serialize)]
121 | pub struct Log {
122 |     pub code: LogCode,
123 |     pub text: String,
124 | }
125 | 


--------------------------------------------------------------------------------
/pynotatin/notatin.pyi:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | # classes
  4 | class PyNotatinParser(object):
  5 |     """ Create and return a new object. """
  6 |     @staticmethod # known case of __new__
  7 |     def __new__(FileOrFileLike) -> PyNotatinParser: ...
  8 | 
  9 |     """ Returns the key for the `path` parameter """
 10 |     def open(self, path: str) -> PyNotatinKey: ...
 11 | 
 12 |     """ Returns an iterator that yields reg keys """
 13 |     def reg_keys(self): ...
 14 | 
 15 |     """ Returns the root key """
 16 |     def root(self) -> PyNotatinKey: ...
 17 | 
 18 |     """ Returns the parent key for the `key` parameter """
 19 |     def get_parent(self, key: PyNotatinKey) -> PyNotatinKey: ...
 20 | 
 21 | class PyNotatinKey(object):
 22 |     """ Returns an iterator that yields reg values """
 23 |     def values(self): ...
 24 | 
 25 |     """ Returns the requested value, or None """
 26 |     def value(self, name: str) -> Optional[PyNotatinValue]: ...
 27 | 
 28 |     """ Returns an iterator that yields sub keys """
 29 |     def subkeys(self, parser: PyNotatinParser): ...
 30 | 
 31 |     """ Returns the requested key, or None """
 32 |     def find_key(self, parser: PyNotatinParser, path: str) -> Optional[PyNotatinKey]: ...
 33 | 
 34 |     """ Returns the name of the key """
 35 |     @property
 36 |     def name(self) -> str: ...
 37 | 
 38 |     """ Returns the path of the key """
 39 |     @property
 40 |     def path(self) -> str: ...
 41 | 
 42 |     """ Returns the pretty path (no root) of the key """
 43 |     @property
 44 |     def pretty_path(self) -> str: ...
 45 | 
 46 |     """ Returns the number of sub keys """
 47 |     @property
 48 |     def number_of_sub_keys(self) -> int: ...
 49 | 
 50 |     """ Returns the number of key values """
 51 |     @property
 52 |     def number_of_key_values(self) -> int: ...
 53 | 
 54 | class PyNotatinValue(object):
 55 |     """ Returns the value as bytes """
 56 |     @property
 57 |     def value(self) -> bytes: ...
 58 | 
 59 |     """ Returns the name of the value, or "(default)" for the default value """
 60 |     @property
 61 |     def pretty_name(self) -> str: ...
 62 | 
 63 |     """ Returns the name of the value """
 64 |     @property
 65 |     def name(self) -> str: ...
 66 | 
 67 |     """ Returns the data type as an integer """
 68 |     @property
 69 |     def raw_data_type(self) -> int: ...
 70 | 
 71 |     """ Returns the value as typed data """
 72 |     @property
 73 |     def content(self) -> object: ...
 74 | 
 75 |     """ Decodes the content using one of the supported decoders (see `PyNotatinDecodeFormat`) """
 76 |     def decode(self, format: PyNotatinDecodeFormat, offset: int) -> PyNotatinContent: ...
 77 | 
 78 |     """ Returns an iterator that yields value versions, if `recover_deleted` was enabled """
 79 |     def versions(self): ...
 80 | 
 81 | class PyNotatinContent(object):
 82 |     """ Returns the decoded content """
 83 |     @property
 84 |     def content(self) -> object: ...
 85 | 
 86 |     """ Decodes the content using one of the supported decoders (see `PyNotatinDecodeFormat`).
 87 |         This method allows for chaining of decode operations """
 88 |     def decode(self,format: PyNotatinDecodeFormat, offset: int) -> PyNotatinContent: ...
 89 | 
 90 | class PyNotatinDecodeFormat(object):
 91 |     lznt1: PyNotatinDecodeFormat
 92 |     """ Returns an lznt1 decoder """
 93 | 
 94 |     rot13: PyNotatinDecodeFormat
 95 |     """ Returns a rot13 decoder """
 96 | 
 97 |     utf16_multiple: PyNotatinDecodeFormat
 98 |     """ Returns a utf16_multiple (REG_MULTI_SZ) decoder """
 99 | 
100 |     utf16: PyNotatinDecodeFormat
101 |     """ Returns a utf16 decoder """
102 | 
103 | class PyNotatinParserBuilder(object):
104 |     """ Create and return a new object. """
105 |     @staticmethod # known case of __new__
106 |     def __new__(FileOrFileLike) -> PyNotatinParserBuilder: ...
107 | 
108 |     """ Set to true to search for deleted and modified items """
109 |     def recover_deleted(self, recover: bool): ...
110 | 
111 |     """ Add a transaction log file """
112 |     def with_transaction_log(self, FileOrFileLike): ...
113 | 
114 |     """ Returns a PyNotatinParser """
115 |     def build(self) -> PyNotatinParser: ...
116 | 


--------------------------------------------------------------------------------
/src/field_serializers.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::cell_key_node::{AccessFlags, KeyNodeFlags};
 18 | use crate::field_offset_len::FieldTrait;
 19 | use crate::log::Logs;
 20 | use crate::util;
 21 | use serde::ser::{Serialize, SerializeStruct, Serializer};
 22 | 
 23 | pub(crate) fn field_data_as_hex<S: Serializer>(
 24 |     x: &[u8],
 25 |     s: S,
 26 | ) -> std::result::Result<S::Ok, S::Error> {
 27 |     s.serialize_str(&util::to_hex_string(x))
 28 | }
 29 | 
 30 | pub(crate) fn field_last_key_written_date_and_time_interpreted<S: Serializer>(
 31 |     x: &dyn FieldTrait<u64>,
 32 |     s: S,
 33 | ) -> std::result::Result<S::Ok, S::Error> {
 34 |     let mut ser = s.serialize_struct("last_key_written_date_and_time", get_field_count(x))?;
 35 |     serialize_base_field(x, &mut ser)?;
 36 |     ser.serialize_field(
 37 |         "interpreted",
 38 |         &util::format_date_time(util::get_date_time_from_filetime(x.value())),
 39 |     )?;
 40 |     ser.end()
 41 | }
 42 | 
 43 | pub(crate) fn field_key_node_flag_bits_interpreted<S: Serializer>(
 44 |     x: &dyn FieldTrait<u16>,
 45 |     s: S,
 46 | ) -> std::result::Result<S::Ok, S::Error> {
 47 |     let mut logs = Logs::default();
 48 |     let flags = KeyNodeFlags::from_bits_checked(x.value(), &mut logs);
 49 |     let mut ser = s.serialize_struct("key_node_flag_bits", get_field_count_with_logs(x, &logs))?;
 50 |     serialize_base_field(x, &mut ser)?;
 51 |     ser.serialize_field("interpreted", &format!("{:?}", flags))?;
 52 |     if logs.has_logs() {
 53 |         ser.serialize_field("logs", &logs.get_string())?;
 54 |     }
 55 |     ser.end()
 56 | }
 57 | 
 58 | pub(crate) fn field_acccess_flag_bits_interpreted<S: Serializer>(
 59 |     x: &dyn FieldTrait<u32>,
 60 |     s: S,
 61 | ) -> std::result::Result<S::Ok, S::Error> {
 62 |     let mut logs = Logs::default();
 63 |     let flags = AccessFlags::from_bits_checked(x.value(), &mut logs);
 64 |     let mut ser = s.serialize_struct("access_flag_bits", get_field_count_with_logs(x, &logs))?;
 65 |     serialize_base_field(x, &mut ser)?;
 66 |     ser.serialize_field("interpreted", &format!("{:?}", flags))?;
 67 |     if logs.has_logs() {
 68 |         ser.serialize_field("logs", &logs.get_string())?;
 69 |     }
 70 |     ser.end()
 71 | }
 72 | 
 73 | pub(crate) fn field_value_name_interpreted<S: Serializer>(
 74 |     x: &dyn FieldTrait<String>,
 75 |     s: S,
 76 | ) -> std::result::Result<S::Ok, S::Error> {
 77 |     let mut ser = s.serialize_struct("value_name", get_field_count(x))?;
 78 |     serialize_base_field(x, &mut ser)?;
 79 |     ser.serialize_field("interpreted", &util::get_pretty_name(&x.value()))?;
 80 |     ser.end()
 81 | }
 82 | 
 83 | fn get_field_count<T: Default + Clone + Serialize + 'static>(x: &dyn FieldTrait<T>) -> usize {
 84 |     if x.get_field_full().is_some() {
 85 |         4
 86 |     } else {
 87 |         2
 88 |     }
 89 | }
 90 | 
 91 | fn get_field_count_with_logs<T: Default + Clone + Serialize + 'static>(
 92 |     x: &dyn FieldTrait<T>,
 93 |     logs: &Logs,
 94 | ) -> usize {
 95 |     get_field_count(x) + logs.has_logs() as usize // if we have logs then we need an additional field to display them
 96 | }
 97 | 
 98 | fn serialize_base_field<S: SerializeStruct, T: Default + Clone + Serialize + 'static>(
 99 |     x: &dyn FieldTrait<T>,
100 |     ser: &mut S,
101 | ) -> Result<(), S::Error> {
102 |     if let Some(full) = x.get_field_full() {
103 |         ser.serialize_field("value", &full.value())?;
104 |         ser.serialize_field("offset", &full.offset())?;
105 |         ser.serialize_field("len", &full.len())
106 |     } else {
107 |         ser.serialize_field("value", &x.value())
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/src/macros.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | /// Via https://github.com/omerbenamram/mft
 18 | #[macro_export]
 19 | macro_rules! impl_serialize_for_bitflags {
 20 |     ($flags: ident) => {
 21 |         impl serde::ser::Serialize for $flags {
 22 |             fn serialize<S>(&self, serializer: S) -> ::std::result::Result<S::Ok, S::Error>
 23 |             where
 24 |                 S: serde::ser::Serializer,
 25 |             {
 26 |                 serializer.serialize_str(&format!("{:?}", &self))
 27 |             }
 28 |         }
 29 |     };
 30 | }
 31 | 
 32 | #[macro_export]
 33 | macro_rules! impl_flags_from_bits {
 34 |     ($bitflag_type: ident, $var_type: ident) => {
 35 |         impl $bitflag_type {
 36 |             #[allow(dead_code)]
 37 |             pub(crate) fn from_bits_checked(
 38 |                 flags: $var_type,
 39 |                 logs: &mut $crate::log::Logs,
 40 |             ) -> Self {
 41 |                 let flags_mapped = $bitflag_type::from_bits_truncate(flags);
 42 |                 if flags != flags_mapped.bits() {
 43 |                     fn f() {}
 44 |                     fn type_name_of<T>(_: T) -> &'static str {
 45 |                         std::any::type_name::<T>()
 46 |                     }
 47 |                     let name = type_name_of(f);
 48 |                     const FOOTER_LEN: usize = "::f".len();
 49 |                     let fn_name = &name[..name.len() - FOOTER_LEN];
 50 |                     logs.add(
 51 |                         $crate::log::LogCode::WarningUnrecognizedBitflag,
 52 |                         &format!("{}: {:#X}", fn_name, flags),
 53 |                     );
 54 |                 }
 55 |                 return flags_mapped;
 56 |             }
 57 |         }
 58 |     };
 59 | }
 60 | 
 61 | #[macro_export]
 62 | macro_rules! impl_enum_from_value {
 63 |     ($enum_type: ident) => {
 64 |         impl $enum_type {
 65 |             pub(crate) fn from_value(value: u32, logs: &mut Logs) -> Self {
 66 |                 $enum_type::from_u32(value).unwrap_or_else(|| {
 67 |                     logs.add(
 68 |                         LogCode::WarningConversion,
 69 |                         &format!("Unrecognized {} value", stringify!($enum_type)),
 70 |                     );
 71 |                     $enum_type::Unknown
 72 |                 })
 73 |             }
 74 |         }
 75 |     };
 76 | }
 77 | 
 78 | #[cfg(test)]
 79 | mod tests {
 80 |     use crate::log::{Log, LogCode, Logs};
 81 |     use bitflags::bitflags;
 82 | 
 83 |     #[test]
 84 |     fn test_from_bits_checked() {
 85 |         bitflags! {
 86 |             #[derive(Debug, PartialEq)]
 87 |             pub struct TestFlags: u16 {
 88 |                 const TEST_1 = 0x0001;
 89 |                 const TEST_2 = 0x0002;
 90 |                 const TEST_3 = 0x0003;
 91 |             }
 92 |         }
 93 |         impl_flags_from_bits! { TestFlags, u16 }
 94 | 
 95 |         let flag_bits = 0x0001 | 0x0003;
 96 |         let mut logs = Logs::default();
 97 |         let flags = TestFlags::from_bits_checked(flag_bits, &mut logs);
 98 |         assert_eq!(
 99 |             TestFlags::TEST_1 | TestFlags::TEST_3,
100 |             flags,
101 |             "Valid from_bits_checked conversion"
102 |         );
103 |         assert_eq!(
104 |             None,
105 |             logs.get(),
106 |             "Valid from_bits_checked conversion - logs should be empty"
107 |         );
108 | 
109 |         let flag_bits = 0xffff;
110 |         let flags = TestFlags::from_bits_checked(flag_bits, &mut logs);
111 |         assert_eq!(
112 |             TestFlags::TEST_1 | TestFlags::TEST_2 | TestFlags::TEST_3,
113 |             flags,
114 |             "Unmapped bits from_bits_checked conversion"
115 |         );
116 |         assert_eq!(Some(&vec![
117 |             Log {
118 |                 code: LogCode::WarningUnrecognizedBitflag,
119 |                 text: "notatin::macros::tests::test_from_bits_checked::TestFlags::from_bits_checked: 0xFFFF".to_string()
120 |             }
121 |         ]), logs.get(), "Unmapped bits from_bits_checked conversion - logs should contain a warning");
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/parser_builder.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::err::Error;
 18 | use crate::file_info::{FileInfo, ReadSeek};
 19 | use crate::filter::Filter;
 20 | use crate::parser::Parser;
 21 | use crate::state::State;
 22 | use crate::transaction_log::TransactionLog;
 23 | use std::path::Path;
 24 | 
 25 | #[derive(Clone, Default)]
 26 | pub struct ParserBuilderBase {
 27 |     filter: Option<Filter>,
 28 |     recover_deleted: bool,
 29 |     get_full_field_info: bool,
 30 |     update_console: bool,
 31 | }
 32 | 
 33 | pub struct ParserBuilderFromPath {
 34 |     primary: Box<dyn AsRef<Path>>,
 35 |     transaction_logs: Vec<Box<dyn AsRef<Path>>>,
 36 |     base: ParserBuilderBase,
 37 | }
 38 | 
 39 | impl ParserBuilderFromPath {
 40 |     pub fn recover_deleted(&mut self, recover: bool) -> &mut Self {
 41 |         self.base.recover_deleted = recover;
 42 |         self
 43 |     }
 44 | 
 45 |     pub fn get_full_field_info(&mut self, get_full_field_info: bool) -> &mut Self {
 46 |         self.base.get_full_field_info = get_full_field_info;
 47 |         self
 48 |     }
 49 | 
 50 |     pub fn with_transaction_log<T: AsRef<Path> + 'static>(&mut self, log: T) -> &mut Self {
 51 |         self.transaction_logs.push(Box::new(log));
 52 |         self
 53 |     }
 54 | 
 55 |     pub fn update_console(&mut self, update_console: bool) -> &mut Self {
 56 |         self.base.update_console = update_console;
 57 |         self
 58 |     }
 59 | 
 60 |     pub fn build(&self) -> Result<Parser, Error> {
 61 |         let mut transaction_logs = vec![];
 62 |         for transaction_log in &self.transaction_logs {
 63 |             transaction_logs.push(Box::new(std::fs::File::open(transaction_log.as_ref())?))
 64 |         }
 65 |         ParserBuilder::build(
 66 |             FileInfo::from_path(self.primary.as_ref())?,
 67 |             self.base.clone(),
 68 |             transaction_logs,
 69 |         )
 70 |     }
 71 | }
 72 | 
 73 | pub struct ParserBuilderFromFile {
 74 |     primary: Box<dyn ReadSeek>,
 75 |     transaction_logs: Vec<Box<dyn ReadSeek>>,
 76 |     base: ParserBuilderBase,
 77 | }
 78 | 
 79 | impl ParserBuilderFromFile {
 80 |     pub fn with_filter(&mut self, filter: Filter) -> &mut Self {
 81 |         self.base.filter = Some(filter);
 82 |         self
 83 |     }
 84 | 
 85 |     pub fn recover_deleted(&mut self, recover: bool) -> &mut Self {
 86 |         self.base.recover_deleted = recover;
 87 |         self
 88 |     }
 89 | 
 90 |     pub fn get_full_field_info(&mut self, get_full_field_info: bool) -> &mut Self {
 91 |         self.base.get_full_field_info = get_full_field_info;
 92 |         self
 93 |     }
 94 | 
 95 |     pub fn with_transaction_log<T: ReadSeek + 'static>(&mut self, log: T) -> &mut Self {
 96 |         self.transaction_logs.push(Box::new(log));
 97 |         self
 98 |     }
 99 | 
100 |     pub fn build(self) -> Result<Parser, Error> {
101 |         let mut transaction_logs = vec![];
102 |         for transaction_log in self.transaction_logs {
103 |             transaction_logs.push(Box::new(transaction_log));
104 |         }
105 |         ParserBuilder::build(
106 |             FileInfo::from_read_seek(self.primary)?,
107 |             self.base,
108 |             transaction_logs,
109 |         )
110 |     }
111 | }
112 | 
113 | pub struct ParserBuilder {}
114 | 
115 | impl ParserBuilder {
116 |     pub fn from_path<P: AsRef<Path> + 'static>(primary: P) -> ParserBuilderFromPath {
117 |         ParserBuilderFromPath {
118 |             primary: Box::new(primary),
119 |             transaction_logs: vec![],
120 |             base: ParserBuilderBase::default(),
121 |         }
122 |     }
123 | 
124 |     pub fn from_file<R: ReadSeek + 'static>(primary: R) -> ParserBuilderFromFile {
125 |         ParserBuilderFromFile {
126 |             primary: Box::new(primary),
127 |             transaction_logs: vec![],
128 |             base: ParserBuilderBase::default(),
129 |         }
130 |     }
131 | 
132 |     fn build<T: ReadSeek + 'static>(
133 |         file_info: FileInfo,
134 |         base: ParserBuilderBase,
135 |         transaction_logs: Vec<Box<T>>,
136 |     ) -> Result<Parser, Error> {
137 |         let (parsed_transaction_logs, warning_logs) = TransactionLog::parse(transaction_logs)?;
138 | 
139 |         let mut parser = Parser {
140 |             file_info,
141 |             state: State {
142 |                 get_full_field_info: base.get_full_field_info,
143 |                 ..State::default()
144 |             },
145 |             base_block: None,
146 |             hive_bin_header: None,
147 |             cell_key_node_root: None,
148 |             recover_deleted: base.recover_deleted,
149 |             update_console: base.update_console,
150 |         };
151 |         parser.init(base.recover_deleted, parsed_transaction_logs)?;
152 | 
153 |         if let Some(warning_logs) = warning_logs {
154 |             parser.state.info.extend(warning_logs);
155 |         }
156 |         Ok(parser)
157 |     }
158 | }
159 | 


--------------------------------------------------------------------------------
/src/sub_key_list_lf.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::hive_bin_cell;
 18 | use crate::log::Logs;
 19 | use crate::util;
 20 | use nom::{
 21 |     bytes::complete::{tag, take},
 22 |     number::complete::{le_i32, le_u16, le_u32},
 23 |     IResult, Parser,
 24 | };
 25 | use serde::Serialize;
 26 | 
 27 | // Subkeys list with name hints
 28 | #[derive(Debug, Eq, PartialEq, Serialize)]
 29 | pub struct SubKeyListLf {
 30 |     pub size: u32,
 31 |     pub count: u16,
 32 |     pub items: Vec<SubKeyListLfItem>, // Vec size = count
 33 | }
 34 | 
 35 | impl hive_bin_cell::CellSubKeyList for SubKeyListLf {
 36 |     fn size(&self) -> u32 {
 37 |         self.size
 38 |     }
 39 | 
 40 |     fn get_offset_list(&self, hbin_offset_absolute: u32) -> Vec<u32> {
 41 |         self.items
 42 |             .iter()
 43 |             .map(|x| x.named_key_offset_relative + hbin_offset_absolute)
 44 |             .collect()
 45 |     }
 46 | }
 47 | 
 48 | impl SubKeyListLf {
 49 |     /// Uses nom to parse an lf sub key list (lf) hive bin cell.
 50 |     fn from_bytes_internal(input: &[u8]) -> IResult<&[u8], Self> {
 51 |         let (input, size) = le_i32(input)?;
 52 |         let (input, _signature) = tag("lf")(input)?;
 53 |         let (input, count) = le_u16(input)?;
 54 |         let (input, items) =
 55 |             nom::multi::count(SubKeyListLfItem::from_bytes(), count.into()).parse(input)?;
 56 |         Ok((
 57 |             input,
 58 |             SubKeyListLf {
 59 |                 size: size.unsigned_abs(),
 60 |                 count,
 61 |                 items,
 62 |             },
 63 |         ))
 64 |     }
 65 | 
 66 |     pub(crate) fn from_bytes(
 67 |     ) -> impl Fn(&[u8]) -> IResult<&[u8], Box<dyn hive_bin_cell::CellSubKeyList>> {
 68 |         |input: &[u8]| {
 69 |             let (input, ret) = SubKeyListLf::from_bytes_internal(input)?;
 70 |             Ok((input, Box::new(ret)))
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | #[derive(Debug, Eq, PartialEq, Serialize)]
 76 | pub struct SubKeyListLfItem {
 77 |     pub named_key_offset_relative: u32, // The offset value is in bytes and relative from the start of the hive bin data
 78 |     pub name_hint: String, // The first 4 ASCII characters of a key name string (used to speed up lookups)
 79 |     pub logs: Logs,
 80 | }
 81 | 
 82 | impl SubKeyListLfItem {
 83 |     fn from_bytes() -> impl Fn(&[u8]) -> IResult<&[u8], Self> {
 84 |         |input: &[u8]| {
 85 |             let (input, named_key_offset_relative) = le_u32(input)?;
 86 |             let (input, name_hint) = take(4usize)(input)?;
 87 |             let mut logs = Logs::default();
 88 |             Ok((
 89 |                 input,
 90 |                 SubKeyListLfItem {
 91 |                     named_key_offset_relative,
 92 |                     name_hint: util::from_ascii(name_hint, &mut logs, "SubKeyListLfItem::key_name"),
 93 |                     logs,
 94 |                 },
 95 |             ))
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | #[cfg(test)]
101 | mod tests {
102 |     use super::*;
103 |     use crate::hive_bin_cell::CellSubKeyList;
104 | 
105 |     #[test]
106 |     fn test_sub_key_list_lf_traits() {
107 |         let lf = SubKeyListLf {
108 |             size: 64,
109 |             count: 2,
110 |             items: vec![
111 |                 SubKeyListLfItem {
112 |                     named_key_offset_relative: 12345,
113 |                     name_hint: "aaaa".to_string(),
114 |                     logs: Logs::default(),
115 |                 },
116 |                 SubKeyListLfItem {
117 |                     named_key_offset_relative: 54321,
118 |                     name_hint: "zzzz".to_string(),
119 |                     logs: Logs::default(),
120 |                 },
121 |             ],
122 |         };
123 |         assert_eq!(lf.size, lf.size());
124 |         assert_eq!(vec![16441, 58417], lf.get_offset_list(4096));
125 |     }
126 | 
127 |     #[test]
128 |     fn test_parse_sub_key_list_lf() {
129 |         let slice = [
130 |             0xE8, 0xFF, 0xFF, 0xFF, 0x6C, 0x66, 0x02, 0x00, 0xF8, 0x9B, 0x01, 0x00, 0x53, 0x63,
131 |             0x72, 0x65, 0xA0, 0x9B, 0x01, 0x00, 0x53, 0x63, 0x72, 0x65,
132 |         ];
133 | 
134 |         let ret = SubKeyListLf::from_bytes_internal(&slice);
135 | 
136 |         let expected_output = SubKeyListLf {
137 |             size: 24,
138 |             count: 2,
139 |             items: vec![
140 |                 SubKeyListLfItem {
141 |                     named_key_offset_relative: 105464,
142 |                     name_hint: "Scre".to_string(),
143 |                     logs: Logs::default(),
144 |                 },
145 |                 SubKeyListLfItem {
146 |                     named_key_offset_relative: 105376,
147 |                     name_hint: "Scre".to_string(),
148 |                     logs: Logs::default(),
149 |                 },
150 |             ],
151 |         };
152 | 
153 |         let remaining: [u8; 0] = [0; 0];
154 | 
155 |         let expected = Ok((&remaining[..], expected_output));
156 | 
157 |         assert_eq!(expected, ret);
158 |     }
159 | }
160 | 


--------------------------------------------------------------------------------
/bin/reg_dump/tsv_writer.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use notatin::{
 18 |     cell::Cell,
 19 |     cell_key_node::CellKeyNode,
 20 |     cell_key_value::CellKeyValue,
 21 |     err::Error,
 22 |     filter::Filter,
 23 |     parser::{Parser, ParserIterator},
 24 |     progress, util,
 25 | };
 26 | use std::fs::File;
 27 | use std::io::{BufWriter, Write};
 28 | use std::path::*;
 29 | 
 30 | pub(crate) struct WriteTsv {
 31 |     index: usize,
 32 |     recovered_only: bool,
 33 |     writer: BufWriter<File>,
 34 |     console: Box<dyn progress::UpdateProgressTrait>,
 35 | }
 36 | 
 37 | impl WriteTsv {
 38 |     pub(crate) fn new(output: impl AsRef<Path>, recovered_only: bool) -> Result<Self, Error> {
 39 |         let write_file = File::create(output)?;
 40 |         let writer = BufWriter::new(write_file);
 41 |         Ok(WriteTsv {
 42 |             index: 0,
 43 |             recovered_only,
 44 |             writer,
 45 |             console: progress::new(true),
 46 |         })
 47 |     }
 48 | 
 49 |     pub(crate) fn write(&mut self, parser: &Parser, filter: Option<Filter>) -> Result<(), Error> {
 50 |         let mut iter = ParserIterator::new(parser);
 51 |         if let Some(filter) = filter {
 52 |             iter.with_filter(filter);
 53 |         }
 54 | 
 55 |         writeln!(self.writer,"Index\tKey Path\tSubkey Count\tValue Name\tValue Data\tTimestamp\tStatus\tPrevious Seq Num\tModifying Seq Num\tFlags\tAccess Flags\tValue Type\tLogs")?;
 56 |         for (index, key) in iter.iter().enumerate() {
 57 |             self.console.update_progress(index)?;
 58 |             self.write_key_tsv(&key, false)?;
 59 |         }
 60 |         writeln!(self.writer, "\nLogs\n-----------")?;
 61 |         parser.get_parse_logs().write::<File>(&mut self.writer)?;
 62 |         Ok(())
 63 |     }
 64 | 
 65 |     fn write_value_tsv(
 66 |         &mut self,
 67 |         cell_key_node: &CellKeyNode,
 68 |         value: &CellKeyValue,
 69 |     ) -> Result<(), Error> {
 70 |         if !self.recovered_only || value.has_or_is_recovered() {
 71 |             self.index += 1;
 72 |             writeln!(
 73 |                 self.writer,
 74 |                 "{index}\t{key_path}\t\t{value_name}\t{value_data}\t\t{status:?}\t{prev_seq_num}\t{mod_seq_num}\t\t\t{value_type}\t{logs}",
 75 |                 index = self.index,
 76 |                 key_path = util::escape_string(&cell_key_node.path),
 77 |                 value_name = util::escape_string(&value.get_pretty_name()),
 78 |                 value_data = util::escape_string(&value.get_content().0.to_string()),
 79 |                 status = value.cell_state,
 80 |                 prev_seq_num = Self::get_sequence_num_string(value.sequence_num),
 81 |                 mod_seq_num = Self::get_sequence_num_string(value.updated_by_sequence_num),
 82 |                 value_type = value.get_content().0.get_type(),
 83 |                 logs = util::escape_string(&value.logs.to_string())
 84 |             )?;
 85 |         }
 86 |         Ok(())
 87 |     }
 88 | 
 89 |     fn write_key_tsv(
 90 |         &mut self,
 91 |         cell_key_node: &CellKeyNode,
 92 |         key_modified: bool,
 93 |     ) -> Result<(), Error> {
 94 |         if !self.recovered_only || cell_key_node.has_or_is_recovered() {
 95 |             let mut logs = cell_key_node.logs.clone();
 96 |             self.index += 1;
 97 |             writeln!(
 98 |                 self.writer,
 99 |                 "{index}\t{key_path}\t{subkey_count}\t\t\t{timestamp}\t{status:?}\t{prev_seq_num}\t{mod_seq_num}\t{flags:?}\t{access_flags:?}\t\t{logs}",
100 |                 index = self.index,
101 |                 key_path = util::escape_string(&cell_key_node.path),
102 |                 subkey_count = &cell_key_node.cell_sub_key_offsets_absolute.len(),
103 |                 timestamp = util::format_date_time(cell_key_node.last_key_written_date_and_time()),
104 |                 status = cell_key_node.cell_state,
105 |                 prev_seq_num = Self::get_sequence_num_string(cell_key_node.sequence_num),
106 |                 mod_seq_num = Self::get_sequence_num_string(cell_key_node.updated_by_sequence_num),
107 |                 flags = cell_key_node.key_node_flags(&mut logs),
108 |                 access_flags = cell_key_node.access_flags(&mut logs),
109 |                 logs = util::escape_string(&cell_key_node.logs.to_string())
110 |             )?;
111 | 
112 |             for sub_key in &cell_key_node.versions {
113 |                 self.write_key_tsv(sub_key, true)?;
114 |             }
115 |         }
116 | 
117 |         if !key_modified {
118 |             // don't output values for modified keys; current/modified/deleted vals will be output via the current version of the key
119 |             for value in cell_key_node.value_iter() {
120 |                 self.write_value_tsv(cell_key_node, &value)?;
121 | 
122 |                 for sub_value in &value.versions {
123 |                     self.write_value_tsv(cell_key_node, sub_value)?;
124 |                 }
125 |             }
126 |         }
127 |         Ok(())
128 |     }
129 | 
130 |     fn get_sequence_num_string(seq_num: Option<u32>) -> String {
131 |         match seq_num {
132 |             Some(seq_num) => format!("{}", seq_num),
133 |             _ => String::new(),
134 |         }
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/src/sub_key_list_lh.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::hive_bin_cell;
 18 | use nom::{
 19 |     bytes::complete::tag,
 20 |     number::complete::{le_i32, le_u16, le_u32},
 21 |     IResult, Parser,
 22 | };
 23 | use serde::Serialize;
 24 | 
 25 | // Subkeys list with name hints
 26 | #[derive(Debug, Eq, PartialEq, Serialize)]
 27 | pub struct SubKeyListLh {
 28 |     pub size: u32,
 29 |     pub count: u16,
 30 |     pub items: Vec<SubKeyListLhItem>, // Vec size = count
 31 | }
 32 | 
 33 | impl SubKeyListLh {
 34 |     /// Uses nom to parse an lh sub key list (lh) hive bin cell.
 35 |     fn from_bytes_internal(input: &[u8]) -> IResult<&[u8], Self> {
 36 |         let (input, size) = le_i32(input)?;
 37 |         let (input, _signature) = tag("lh")(input)?;
 38 |         let (input, count) = le_u16(input)?;
 39 |         let (input, items) =
 40 |             nom::multi::count(SubKeyListLhItem::from_bytes(), count.into()).parse(input)?;
 41 |         Ok((
 42 |             input,
 43 |             SubKeyListLh {
 44 |                 size: size.unsigned_abs(),
 45 |                 count,
 46 |                 items,
 47 |             },
 48 |         ))
 49 |     }
 50 | 
 51 |     pub(crate) fn from_bytes(
 52 |     ) -> impl Fn(&[u8]) -> IResult<&[u8], Box<dyn hive_bin_cell::CellSubKeyList>> {
 53 |         |input: &[u8]| {
 54 |             let (input, ret) = SubKeyListLh::from_bytes_internal(input)?;
 55 |             Ok((input, Box::new(ret)))
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | impl hive_bin_cell::CellSubKeyList for SubKeyListLh {
 61 |     fn size(&self) -> u32 {
 62 |         self.size
 63 |     }
 64 | 
 65 |     fn get_offset_list(&self, hbin_offset_absolute: u32) -> Vec<u32> {
 66 |         self.items
 67 |             .iter()
 68 |             .filter_map(|x| {
 69 |                 x.named_key_offset_relative
 70 |                     .checked_add(hbin_offset_absolute)
 71 |             })
 72 |             .collect()
 73 |     }
 74 | }
 75 | 
 76 | #[derive(Debug, Eq, PartialEq, Serialize)]
 77 | pub struct SubKeyListLhItem {
 78 |     pub named_key_offset_relative: u32, // The offset value is in bytes and relative from the start of the hive bin data
 79 |     pub name_hash: u32, // Hash of a key name string (used to speed up lookups). A different hash function is used for different sub key list types.
 80 | }
 81 | 
 82 | impl SubKeyListLhItem {
 83 |     fn from_bytes() -> impl Fn(&[u8]) -> IResult<&[u8], Self> {
 84 |         |input: &[u8]| {
 85 |             let (input, named_key_offset_relative) = le_u32(input)?;
 86 |             let (input, name_hash) = le_u32(input)?;
 87 |             Ok((
 88 |                 input,
 89 |                 SubKeyListLhItem {
 90 |                     named_key_offset_relative,
 91 |                     name_hash,
 92 |                 },
 93 |             ))
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | #[cfg(test)]
 99 | mod tests {
100 |     use super::*;
101 |     use crate::hive_bin_cell::CellSubKeyList;
102 | 
103 |     #[test]
104 |     fn test_sub_key_list_lh_traits() {
105 |         let lh = SubKeyListLh {
106 |             size: 64,
107 |             count: 2,
108 |             items: vec![
109 |                 SubKeyListLhItem {
110 |                     named_key_offset_relative: 12345,
111 |                     name_hash: 1111,
112 |                 },
113 |                 SubKeyListLhItem {
114 |                     named_key_offset_relative: 54321,
115 |                     name_hash: 2222,
116 |                 },
117 |             ],
118 |         };
119 |         assert_eq!(lh.size, lh.size());
120 |         assert_eq!(vec![16441, 58417], lh.get_offset_list(4096));
121 |     }
122 | 
123 |     #[test]
124 |     fn test_parse_sub_key_list_lh() {
125 |         let f = std::fs::read("test_data/lh_block").unwrap();
126 |         let slice = &f[..];
127 |         let (_, key_list) = SubKeyListLh::from_bytes_internal(slice).unwrap();
128 | 
129 |         let expected_output = SubKeyListLh {
130 |             size: 96,
131 |             count: 8,
132 |             items: vec![
133 |                 SubKeyListLhItem {
134 |                     named_key_offset_relative: 4600,
135 |                     name_hash: 129374869,
136 |                 },
137 |                 SubKeyListLhItem {
138 |                     named_key_offset_relative: 7008,
139 |                     name_hash: 97615,
140 |                 },
141 |                 SubKeyListLhItem {
142 |                     named_key_offset_relative: 7536,
143 |                     name_hash: 397082278,
144 |                 },
145 |                 SubKeyListLhItem {
146 |                     named_key_offset_relative: 7192,
147 |                     name_hash: 2451360315,
148 |                 },
149 |                 SubKeyListLhItem {
150 |                     named_key_offset_relative: 7440,
151 |                     name_hash: 235888890,
152 |                 },
153 |                 SubKeyListLhItem {
154 |                     named_key_offset_relative: 6376,
155 |                     name_hash: 2289207844,
156 |                 },
157 |                 SubKeyListLhItem {
158 |                     named_key_offset_relative: 7096,
159 |                     name_hash: 2868760012,
160 |                 },
161 |                 SubKeyListLhItem {
162 |                     named_key_offset_relative: 7352,
163 |                     name_hash: 123397,
164 |                 },
165 |             ],
166 |         };
167 |         assert_eq!(expected_output, key_list);
168 |     }
169 | }
170 | 


--------------------------------------------------------------------------------
/src/sub_key_list_ri.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::cell_key_node::CellKeyNode;
 18 | use crate::err::Error;
 19 | use crate::file_info::FileInfo;
 20 | use crate::hive_bin_cell;
 21 | use crate::state::State;
 22 | use nom::{
 23 |     bytes::complete::tag,
 24 |     number::complete::{le_i32, le_u16, le_u32},
 25 |     IResult, Parser,
 26 | };
 27 | use serde::Serialize;
 28 | 
 29 | // List of subkeys lists (used to subdivide subkeys lists)
 30 | #[derive(Debug, Eq, PartialEq, Serialize)]
 31 | pub struct SubKeyListRi {
 32 |     pub size: u32,
 33 |     pub count: u16,
 34 |     pub items: Vec<SubKeyListRiItem>, // Vec size = count
 35 | }
 36 | 
 37 | impl hive_bin_cell::CellSubKeyList for SubKeyListRi {
 38 |     fn size(&self) -> u32 {
 39 |         self.size
 40 |     }
 41 | 
 42 |     fn get_offset_list(&self, hbin_offset_absolute: u32) -> Vec<u32> {
 43 |         self.items
 44 |             .iter()
 45 |             .map(|x| x.sub_key_list_offset_relative + hbin_offset_absolute)
 46 |             .collect()
 47 |     }
 48 | }
 49 | 
 50 | impl SubKeyListRi {
 51 |     /// Uses nom to parse an ri sub key list (ri) hive bin cell.
 52 |     pub(crate) fn from_bytes(input: &[u8]) -> IResult<&[u8], Self> {
 53 |         let (input, size) = le_i32(input)?;
 54 |         let (input, _signature) = tag("ri")(input)?;
 55 |         let (input, count) = le_u16(input)?;
 56 |         let (input, list_offsets) =
 57 |             nom::multi::count(parse_sub_key_list_ri_item(), count.into()).parse(input)?;
 58 | 
 59 |         Ok((
 60 |             input,
 61 |             SubKeyListRi {
 62 |                 size: size.unsigned_abs(),
 63 |                 count,
 64 |                 items: list_offsets,
 65 |             },
 66 |         ))
 67 |     }
 68 | 
 69 |     pub(crate) fn parse_offsets(
 70 |         &self,
 71 |         file_info: &FileInfo,
 72 |         state: &mut State,
 73 |     ) -> Result<Vec<u32>, Error> {
 74 |         let mut list: Vec<u32> = Vec::new();
 75 |         for item in self.items.iter() {
 76 |             let mut sub_list = CellKeyNode::parse_sub_key_list(
 77 |                 file_info,
 78 |                 state,
 79 |                 item.sub_key_list_offset_relative,
 80 |             )?;
 81 |             list.append(&mut sub_list);
 82 |         }
 83 |         Ok(list)
 84 |     }
 85 | }
 86 | 
 87 | #[derive(Debug, Eq, PartialEq, Serialize)]
 88 | pub struct SubKeyListRiItem {
 89 |     pub sub_key_list_offset_relative: u32, // The offset value is in bytes and relative from the start of the hive bin data
 90 | }
 91 | 
 92 | fn parse_sub_key_list_ri_item() -> impl Fn(&[u8]) -> IResult<&[u8], SubKeyListRiItem> {
 93 |     |input: &[u8]| {
 94 |         let (input, sub_key_list_offset_relative) = le_u32(input)?;
 95 | 
 96 |         Ok((
 97 |             input,
 98 |             SubKeyListRiItem {
 99 |                 sub_key_list_offset_relative,
100 |             },
101 |         ))
102 |     }
103 | }
104 | 
105 | #[cfg(test)]
106 | mod tests {
107 |     use super::*;
108 |     use crate::hive_bin_cell::CellSubKeyList;
109 | 
110 |     #[test]
111 |     fn test_sub_key_list_ri_traits() {
112 |         let ri = SubKeyListRi {
113 |             size: 64,
114 |             count: 2,
115 |             items: vec![
116 |                 SubKeyListRiItem {
117 |                     sub_key_list_offset_relative: 12345,
118 |                 },
119 |                 SubKeyListRiItem {
120 |                     sub_key_list_offset_relative: 54321,
121 |                 },
122 |             ],
123 |         };
124 |         assert_eq!(ri.size, ri.size());
125 |         assert_eq!(vec![16441, 58417], ri.get_offset_list(4096));
126 |     }
127 | 
128 |     #[test]
129 |     fn test_parse_sub_key_list_ri() {
130 |         let slice = [
131 |             208, 255, 255, 255, 114, 105, 9, 0, 32, 192, 0, 0, 32, 176, 2, 0, 32, 112, 3, 0, 32,
132 |             48, 4, 0, 32, 240, 4, 0, 32, 176, 5, 0, 32, 112, 6, 0, 32, 48, 7, 0, 32, 128, 1, 0, 56,
133 |             0, 0, 0,
134 |         ];
135 |         let ret = SubKeyListRi::from_bytes(&slice);
136 |         let expected_output = SubKeyListRi {
137 |             size: 48,
138 |             count: 9,
139 |             items: vec![
140 |                 SubKeyListRiItem {
141 |                     sub_key_list_offset_relative: 49184,
142 |                 },
143 |                 SubKeyListRiItem {
144 |                     sub_key_list_offset_relative: 176160,
145 |                 },
146 |                 SubKeyListRiItem {
147 |                     sub_key_list_offset_relative: 225312,
148 |                 },
149 |                 SubKeyListRiItem {
150 |                     sub_key_list_offset_relative: 274464,
151 |                 },
152 |                 SubKeyListRiItem {
153 |                     sub_key_list_offset_relative: 323616,
154 |                 },
155 |                 SubKeyListRiItem {
156 |                     sub_key_list_offset_relative: 372768,
157 |                 },
158 |                 SubKeyListRiItem {
159 |                     sub_key_list_offset_relative: 421920,
160 |                 },
161 |                 SubKeyListRiItem {
162 |                     sub_key_list_offset_relative: 471072,
163 |                 },
164 |                 SubKeyListRiItem {
165 |                     sub_key_list_offset_relative: 98336,
166 |                 },
167 |             ],
168 |         };
169 |         let remaining = [56, 0, 0, 0];
170 |         let expected = Ok((&remaining[..], expected_output));
171 | 
172 |         assert_eq!(expected, ret);
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Notatin
  2 | 
  3 | [![Build Status](https://github.com/strozfriedberg/notatin/actions/workflows/pipeline.yml/badge.svg)](https://github.com/strozfriedberg/notatin/actions)
  4 | [![crates.io](https://img.shields.io/crates/v/notatin.svg)](https://docs.rs/notatin/latest/notatin/)
  5 | 
  6 | Notatin is a Rust library for parsing offline Windows Registry files. It includes Python bindings for the library (pynotatin) and binaries for working directly with registry files.
  7 | 
  8 | ## Features
  9 | 
 10 | - Implemented using 100% safe Rust, and works on all platforms supported by Rust (that have stdlib). Tested in Windows and Ubuntu.
 11 | - Supports applying transaction logs, and recovering deleted and modified keys and values.
 12 | - Supports exporting to JSONL, XLSX, TSV, and Eric Zimmerman's common registry format (<https://github.com/EricZimmerman/Registry>).
 13 | - Python bindings are included in the project (pynotatin).
 14 | 
 15 | ### notatin (crate)
 16 | 
 17 |  `notatin` is a library that parses offline Windows Registry files.
 18 | 
 19 | ### reg_dump (utility)
 20 | 
 21 | `reg_dump` is a binary utility. It parses registry files, or a tree of registry files using the `--recurse` argument, and exports to JSONL, XLSX, TSV, or common format.
 22 | An optional key path filter may also be supplied. Optional recovery of deleted and prior versions of keys and values is also supported.
 23 | 
 24 | JSONL dumps _all_ the data. The `--full-field-info` argument will include file offset information for each field.
 25 | 
 26 | XLSX and TSV dump some of the data; the data in both outputs is the same but XLSX has context-specific formatting which is especially helpful when reviewing recovered data.
 27 | And, if you are focusing on recovered items, the `--recovered-only` argument will return only items that are modified, deleted, or that contain a modified or deleted value.
 28 | 
 29 | Common dumps what common wants.
 30 | 
 31 | ```
 32 | Usage: reg_dump [OPTIONS] --input <input> --output <output> -t <TYPE>
 33 | 
 34 | Options:
 35 |   -i, --input <input>      Base registry file, or root folder if recursing
 36 |   -o, --output <output>    Output file. or folder if recursing
 37 |   -t <TYPE>                output type [default: jsonl] [possible values: jsonl, xlsx, tsv, common]
 38 |   -r, --recurse            Recurse through input looking for registry files
 39 |       --recover            Recover deleted and versioned keys and values
 40 |       --recovered-only     Only export recovered items (applicable to tsv and xlsx output)
 41 |       --full-field-info    Get the offset and length for each key/value field (applicable to jsonl output)
 42 |   -s, --skip-logs          Skip transaction log files
 43 |   -f, --filter [<STRING>]  Key path for filter (ex: 'ControlSet001\Services')
 44 |   -h, --help               Print help
 45 |   -V, --version            Print version
 46 | ```
 47 | 
 48 | ### reg_compare (utility)
 49 | 
 50 | `reg_compare` is a binary utility. It will compare two registry files, or trees of files using `--recurse` argument (the structure of the trees must match). The default output is a report of the differences
 51 | in a format similar to that of Regshot. The `--diff` argument will format the results in a unified diff format.
 52 | 
 53 | ```
 54 | Usage: reg_compare [OPTIONS] --base <base> --compare <compare> --output <output>
 55 | 
 56 | Options:
 57 |   -b, --base <base>        Base registry file or root folder to search
 58 |   -c, --compare <compare>  Registry file or root folder to search for comparison
 59 |   -o, --output <output>    Output file or folder
 60 |   -r, --recurse            Recurse through base and comparison folders looking for registry files; file trees must match
 61 |   -f, --filter [<STRING>]  Key path for filter (ex: 'ControlSet001\Services')
 62 |   -d, --diff               Export unified diff format output
 63 |   -s, --skip-logs          Skip transaction log files
 64 |   -h, --help               Print help
 65 |   -V, --version            Print version
 66 | ```
 67 | 
 68 | ## Library usage
 69 | 
 70 | ```rust,no_run
 71 | use notatin::{
 72 |     err::Error,
 73 |     parser_builder::{ParserBuilder, ParserBuilderTrait},
 74 | };
 75 | 
 76 | fn main() -> Result<(), Error> {
 77 |     let mut parser = ParserBuilder::from_path("system")
 78 |         .recover_deleted(false)
 79 |         .with_transaction_log("system.log1")
 80 |         .with_transaction_log("system.log2")
 81 |         .build()?;
 82 | 
 83 |     for key in parser.iter() {
 84 |         println!("{}", key.path);
 85 |         for value in key.value_iter() {
 86 |             println!("\t{} {:?}", value.value_name, value.get_content());
 87 |         }
 88 |     }
 89 |     Ok(())
 90 | }
 91 | ```
 92 | 
 93 | Opening files and iterating the results is intended to be straightforward.
 94 | By default, iteration is prefix order (displayed in the code sample above). Postorder traversal (children before parents) is available as well:
 95 | 
 96 | ```rust,no_run
 97 | for key in parser.iter_postorder() {
 98 |     //...
 99 | }
100 | ```
101 | 
102 | Result filters are optional, but they can speed up processing as Notatin will skip parsing what doesn't match.
103 | Filters may include regular expressions and/or literal paths and are applied at iteration time.
104 | 
105 | ```rust,no_run
106 | let filter = FilterBuilder::new()
107 |     .add_literal_segment("control Panel")
108 |     .add_regex_segment("access.*")
109 |     .add_regex_segment("keyboard.+")
110 |     .return_child_keys(false)
111 |     .build();
112 | ```
113 | 
114 | ### pynotatin (Python bindings)
115 | 
116 | Please see the pynotatin README.md for details on using pynotatin.
117 | 
118 | ## What is Notatin?
119 | 
120 |  _Notatin_ is another name for the enzyme glucose oxidase. Glucose oxidase catalyzes the oxidation of glucose to hydrogen peroxide.
121 |  It is present in honey because honeybees synthesize the enzyme and deposit it into the honey, where it acts as a natural preservative.
122 |  So, Notatin helps preserve things in hives.
123 | 
124 | - <https://en.wikipedia.org/wiki/Glucose_oxidase>
125 | - <https://en.wikipedia.org/wiki/Windows_Registry#Hives>
126 | 
127 | ## Copyright
128 | 
129 |  Copyright 2025 LevelBlue. Notatin is licensed under the Apache License, Version 2.0.
130 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | stages:
  3 |   - clear_approval
  4 |   - build
  5 |   - test
  6 | 
  7 | variables:
  8 |   FULL_BUILD:
  9 |     value: "false"
 10 |     options:
 11 |       - "true"
 12 |       - "false"
 13 |     description: "Whether to run a full build, including Windows E2E"
 14 |   PENGUIN_BUILD:
 15 |     value: "false"
 16 |     options:
 17 |       - "true"
 18 |       - "false"
 19 |     description: "Whether to run a penguin_ci build"
 20 |   BUILD_LOCATION:
 21 |     value: "staging"
 22 |     options:
 23 |       - "production"
 24 |       - "staging"
 25 |     description: "Where to build"
 26 | 
 27 | build:
 28 |   stage: build
 29 |   rules:
 30 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 31 |       when: manual
 32 |   variables:
 33 |     JOB_TYPE: "build"
 34 |     MERGE_PROJECT: $CI_MERGE_REQUEST_PROJECT_PATH
 35 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
 36 |     MERGE_SOURCE_BRANCH: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
 37 |     MERGE_TARGET_BRANCH: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
 38 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
 39 |     MERGE_SOURCE_COMMIT: $CI_COMMIT_SHA
 40 |     SOURCE_HEAD_COMMIT: $CI_MERGE_REQUEST_SOURCE_BRANCH_SHA
 41 |     BUILD_LOCATION: "production"
 42 |   trigger:
 43 |     project: soldev/cyberdev/make_world
 44 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
 45 |     strategy: depend
 46 | 
 47 | build (penguin ci):
 48 |   stage: build
 49 |   rules:
 50 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 51 |       when: manual
 52 |   variables:
 53 |     JOB_TYPE: "build"
 54 |     MERGE_PROJECT: $CI_MERGE_REQUEST_PROJECT_PATH
 55 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
 56 |     MERGE_SOURCE_BRANCH: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
 57 |     MERGE_TARGET_BRANCH: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
 58 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
 59 |     MERGE_SOURCE_COMMIT: $CI_COMMIT_SHA
 60 |     SOURCE_HEAD_COMMIT: $CI_MERGE_REQUEST_SOURCE_BRANCH_SHA
 61 |     PENGUIN_BUILD: "true"
 62 |     BUILD_LOCATION: "production"
 63 |   trigger:
 64 |     project: soldev/cyberdev/make_world
 65 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
 66 |     strategy: depend
 67 | 
 68 | build (full e2e):
 69 |   stage: build
 70 |   rules:
 71 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 72 |       when: manual
 73 |   variables:
 74 |     JOB_TYPE: "build"
 75 |     MERGE_PROJECT: $CI_MERGE_REQUEST_PROJECT_PATH
 76 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
 77 |     MERGE_SOURCE_BRANCH: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
 78 |     MERGE_TARGET_BRANCH: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
 79 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
 80 |     MERGE_SOURCE_COMMIT: $CI_COMMIT_SHA
 81 |     SOURCE_HEAD_COMMIT: $CI_MERGE_REQUEST_SOURCE_BRANCH_SHA
 82 |     FULL_BUILD: "true"
 83 |     BUILD_LOCATION: "production"
 84 |   trigger:
 85 |     project: soldev/cyberdev/make_world
 86 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
 87 |     strategy: depend
 88 | 
 89 | STAGING build:
 90 |   stage: build
 91 |   rules:
 92 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 93 |       when: manual
 94 |   variables:
 95 |     JOB_TYPE: "build"
 96 |     MERGE_PROJECT: $CI_MERGE_REQUEST_PROJECT_PATH
 97 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
 98 |     MERGE_SOURCE_BRANCH: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
 99 |     MERGE_TARGET_BRANCH: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
100 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
101 |     MERGE_SOURCE_COMMIT: $CI_COMMIT_SHA
102 |     SOURCE_HEAD_COMMIT: $CI_MERGE_REQUEST_SOURCE_BRANCH_SHA
103 |     BUILD_LOCATION: "staging"
104 |   trigger:
105 |     project: soldev/cyberdev/make_world
106 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
107 |     strategy: depend
108 | 
109 | STAGING (penguin ci):
110 |   stage: build
111 |   rules:
112 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
113 |       when: manual
114 |   variables:
115 |     JOB_TYPE: "build"
116 |     MERGE_PROJECT: $CI_MERGE_REQUEST_PROJECT_PATH
117 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
118 |     MERGE_SOURCE_BRANCH: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
119 |     MERGE_TARGET_BRANCH: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
120 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
121 |     MERGE_SOURCE_COMMIT: $CI_COMMIT_SHA
122 |     SOURCE_HEAD_COMMIT: $CI_MERGE_REQUEST_SOURCE_BRANCH_SHA
123 |     PENGUIN_BUILD: "true"
124 |     BUILD_LOCATION: "staging"
125 |   trigger:
126 |     project: soldev/cyberdev/make_world
127 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
128 |     strategy: depend
129 | 
130 | STAGING (full e2e):
131 |   stage: build
132 |   rules:
133 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
134 |       when: manual
135 |   variables:
136 |     JOB_TYPE: "build"
137 |     MERGE_PROJECT: $CI_MERGE_REQUEST_PROJECT_PATH
138 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
139 |     MERGE_SOURCE_BRANCH: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
140 |     MERGE_TARGET_BRANCH: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
141 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
142 |     MERGE_SOURCE_COMMIT: $CI_COMMIT_SHA
143 |     SOURCE_HEAD_COMMIT: $CI_MERGE_REQUEST_SOURCE_BRANCH_SHA
144 |     FULL_BUILD: "true"
145 |     BUILD_LOCATION: "staging"
146 |   trigger:
147 |     project: soldev/cyberdev/make_world
148 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
149 |     strategy: depend
150 | 
151 | 
152 | # update submodule ref in make_world when an MR is merged
153 | update make_world:
154 |   stage: build
155 |   rules:
156 |     - if: $CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
157 |   variables:
158 |     JOB_TYPE: "update"
159 |     MERGE_PROJECT: $CI_PROJECT_NAME
160 |     MERGE_COMMIT: $CI_COMMIT_SHA
161 |     MERGE_COMMIT_MESSAGE: $CI_COMMIT_MESSAGE
162 |   trigger:
163 |     project: soldev/cyberdev/make_world
164 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
165 |     strategy: depend
166 | 
167 | remove build approval:
168 |   stage: clear_approval
169 |   rules:
170 |     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
171 |   variables:
172 |     JOB_TYPE: "unapprove"
173 |     MERGE_PROJECT_ID: $CI_MERGE_REQUEST_PROJECT_ID
174 |     MERGE_REQUEST_IID: $CI_MERGE_REQUEST_IID
175 |   trigger:
176 |     project: soldev/cyberdev/make_world
177 |     branch: $MAKE_WORLD_DEFAULT_BRANCH
178 |     strategy: depend
179 | 
180 | 


--------------------------------------------------------------------------------
/src/marvin32.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | /*
 18 |  Based on:
 19 | 
 20 |     https://github.com/dotnet/coreclr/blob/v1.0.0/src/inc/marvin32.h
 21 |     https://github.com/dotnet/coreclr/blob/v1.0.0/src/vm/marvin32.cpp
 22 |     https://github.com/mono/corefx/blob/c4eeab9fc2faa0195a812e552cd73ee298d39386/src/Common/tests/Tests/System/MarvinTests.cs
 23 | 
 24 |  Under the license:
 25 | 
 26 |     Licensed to the .NET Foundation under one or more agreements.
 27 |     The .NET Foundation licenses this file to you under the MIT license.
 28 | */
 29 | 
 30 | use std::convert::TryInto;
 31 | 
 32 | macro_rules! BLOCK {
 33 |     ($a:ident, $b:ident) => {
 34 |         $b ^= $a;
 35 |         $a = $a.rotate_left(20);
 36 |         $a = $a.wrapping_add($b);
 37 |         $b = $b.rotate_left(9);
 38 |         $b ^= $a;
 39 |         $a = $a.rotate_left(27);
 40 |         $a = $a.wrapping_add($b);
 41 |         $b = $b.rotate_left(19);
 42 |     };
 43 | }
 44 | 
 45 | /// Computes a 64-bit Marvin32 hash
 46 | pub(crate) fn marvin32(seed: u64, data: &[u8], mut dlen: usize) -> u64 {
 47 |     let mut s0: u32 = (seed & 0xFFFFFFFF) as u32;
 48 |     let mut s1: u32 = (seed >> 32) as u32;
 49 | 
 50 |     let mut i = 0;
 51 | 
 52 |     while dlen > 3 {
 53 |         s0 = s0.wrapping_add(u32::from_le_bytes(data[i..i + 4].try_into().unwrap()));
 54 |         BLOCK!(s0, s1);
 55 |         i += 4;
 56 |         dlen -= 4;
 57 |     }
 58 | 
 59 |     s0 = s0.wrapping_add(match dlen {
 60 |         0 => 0x80,
 61 |         1 => 0x8000 | (data[i] as u32),
 62 |         2 => 0x800000 | (u16::from_le_bytes(data[i..i + 2].try_into().unwrap()) as u32),
 63 |         3 => {
 64 |             0x80000000
 65 |                 | ((data[i + 2] as u32) << 16)
 66 |                 | (u16::from_le_bytes(data[i..i + 2].try_into().unwrap()) as u32)
 67 |         }
 68 |         _ => unreachable!(),
 69 |     });
 70 | 
 71 |     BLOCK!(s0, s1);
 72 |     BLOCK!(s0, s1);
 73 | 
 74 |     (s0 as u64) | ((s1 as u64) << 32)
 75 | }
 76 | 
 77 | #[cfg(test)]
 78 | mod tests {
 79 |     use super::*;
 80 | 
 81 |     #[test]
 82 |     fn test_marvin32() {
 83 |         let seed_1: u64 = 0x4FB61A001BDBCC;
 84 |         let seed_2: u64 = 0x804FB61A001BDBCC;
 85 |         let seed_3: u64 = 0x804FB61A801BDBCC;
 86 | 
 87 |         let data_0: [u8; 0] = [];
 88 |         let data_1 = [0xAF];
 89 |         let data_2 = [0xE7, 0x0F];
 90 |         let data_3 = [0x37, 0xF4, 0x95];
 91 |         let data_4 = [0x86, 0x42, 0xDC, 0x59];
 92 |         let data_5 = [0x15, 0x3F, 0xB7, 0x98, 0x26];
 93 |         let data_6 = [0x09, 0x32, 0xE6, 0x24, 0x6C, 0x47];
 94 |         let data_7 = [0xAB, 0x42, 0x7E, 0xA8, 0xD1, 0x0F, 0xC7];
 95 | 
 96 |         let data_256: [u8; 256] = (0..=255).collect::<Vec<_>>().try_into().unwrap();
 97 | 
 98 |         assert_eq!(0x30ED35C100CD3C7D, marvin32(seed_1, &data_0, 0));
 99 |         assert_eq!(0x48E73FC77D75DDC1, marvin32(seed_1, &data_1, 1));
100 |         assert_eq!(0xB5F6E1FC485DBFF8, marvin32(seed_1, &data_2, 2));
101 |         assert_eq!(0xF0B07C789B8CF7E8, marvin32(seed_1, &data_3, 3));
102 |         assert_eq!(0x7008F2E87E9CF556, marvin32(seed_1, &data_4, 4));
103 |         assert_eq!(0xE6C08C6DA2AFA997, marvin32(seed_1, &data_5, 5));
104 |         assert_eq!(0x6F04BF1A5EA24060, marvin32(seed_1, &data_6, 6));
105 |         assert_eq!(0xE11847E4F0678C41, marvin32(seed_1, &data_7, 7));
106 |         assert_eq!(0x7DFCAB33FCEAD72C, marvin32(seed_1, &data_256, 256));
107 |         assert_eq!(0x10A9D5D3996FD65D, marvin32(seed_2, &data_0, 0));
108 |         assert_eq!(0x68201F91960EBF91, marvin32(seed_2, &data_1, 1));
109 |         assert_eq!(0x64B581631F6AB378, marvin32(seed_2, &data_2, 2));
110 |         assert_eq!(0xE1F2DFA6E5131408, marvin32(seed_2, &data_3, 3));
111 |         assert_eq!(0x36289D9654FB49F6, marvin32(seed_2, &data_4, 4));
112 |         assert_eq!(0x0A06114B13464DBD, marvin32(seed_2, &data_5, 5));
113 |         assert_eq!(0xD6DD5E40AD1BC2ED, marvin32(seed_2, &data_6, 6));
114 |         assert_eq!(0xE203987DBA252FB3, marvin32(seed_2, &data_7, 7));
115 |         assert_eq!(0x73B4AE569E0DB919, marvin32(seed_2, &data_256, 256));
116 |         assert_eq!(0xA37FB0DA2ECAE06C, marvin32(seed_3, &[0x00], 1));
117 |         assert_eq!(0xFECEF370701AE054, marvin32(seed_3, &[0xFF], 1));
118 |         assert_eq!(0xA638E75700048880, marvin32(seed_3, &[0x00, 0xFF], 2));
119 |         assert_eq!(0xBDFB46D969730E2A, marvin32(seed_3, &[0xFF, 0x00], 2));
120 |         assert_eq!(0x9D8577C0FE0D30BF, marvin32(seed_3, &[0xFF, 0x00, 0xFF], 3));
121 |         assert_eq!(0x4F9FBDDE15099497, marvin32(seed_3, &[0x00, 0xFF, 0x00], 3));
122 |         assert_eq!(
123 |             0x24EAA279D9A529CA,
124 |             marvin32(seed_3, &[0x00, 0xFF, 0x00, 0xFF], 4)
125 |         );
126 |         assert_eq!(
127 |             0xD3BEC7726B057943,
128 |             marvin32(seed_3, &[0xFF, 0x00, 0xFF, 0x00], 4)
129 |         );
130 |         assert_eq!(
131 |             0x920B62BBCA3E0B72,
132 |             marvin32(seed_3, &[0xFF, 0x00, 0xFF, 0x00, 0xFF], 5)
133 |         );
134 |         assert_eq!(
135 |             0x1D7DDF9DFDF3C1BF,
136 |             marvin32(seed_3, &[0x00, 0xFF, 0x00, 0xFF, 0x00], 5)
137 |         );
138 |         assert_eq!(
139 |             0xEC21276A17E821A5,
140 |             marvin32(seed_3, &[0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF], 6)
141 |         );
142 |         assert_eq!(
143 |             0x6911A53CA8C12254,
144 |             marvin32(seed_3, &[0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00], 6)
145 |         );
146 |         assert_eq!(
147 |             0xFDFD187B1D3CE784,
148 |             marvin32(seed_3, &[0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF], 7)
149 |         );
150 |         assert_eq!(
151 |             0x71876F2EFB1B0EE8,
152 |             marvin32(seed_3, &[0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00], 7)
153 |         );
154 |     }
155 | }
156 | 


--------------------------------------------------------------------------------
/src/cell_value.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::field_serializers;
 18 | use crate::log::{LogCode, Logs};
 19 | use crate::util;
 20 | use serde::Serialize;
 21 | use strum_macros::IntoStaticStr;
 22 | 
 23 | #[derive(Clone, Debug, Eq, IntoStaticStr, PartialEq, Serialize)]
 24 | pub enum CellValue {
 25 |     None,
 26 |     #[serde(serialize_with = "field_serializers::field_data_as_hex")]
 27 |     Binary(Vec<u8>),
 28 |     String(String),
 29 |     MultiString(Vec<String>),
 30 |     U32(u32),
 31 |     I32(i32),
 32 |     U64(u64),
 33 |     I64(i64),
 34 |     Error,
 35 | }
 36 | 
 37 | impl CellValue {
 38 |     pub fn get_type(&self) -> String {
 39 |         // take advantage of IntoStaticStr which will return the enum type as a str
 40 |         let value_type: &str = self.into();
 41 |         value_type.to_string()
 42 |     }
 43 | }
 44 | 
 45 | impl std::fmt::Display for CellValue {
 46 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 47 |         match self {
 48 |             Self::Binary(v) => write!(f, "{}", util::to_hex_string(v)),
 49 |             Self::String(v) => write!(f, "{}", v),
 50 |             Self::MultiString(v) => write!(f, "{:?}", v),
 51 |             Self::U32(v) => write!(f, "{}", v),
 52 |             Self::I32(v) => write!(f, "{}", v),
 53 |             Self::U64(v) => write!(f, "{}", v),
 54 |             Self::I64(v) => write!(f, "{}", v),
 55 |             _ => write!(f, ""),
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | #[derive(Clone, Debug, Eq, PartialEq, Serialize)]
 61 | pub enum DecodeFormat {
 62 |     Lznt1,
 63 |     Rot13,
 64 |     Utf16,
 65 |     Utf16Multiple,
 66 | }
 67 | 
 68 | impl DecodeFormat {
 69 |     pub(crate) fn decode(
 70 |         &self,
 71 |         cell_value: &CellValue,
 72 |         offset: usize,
 73 |     ) -> (CellValue, Option<Logs>) {
 74 |         match self {
 75 |             DecodeFormat::Lznt1 | DecodeFormat::Utf16 | DecodeFormat::Utf16Multiple => {
 76 |                 if let CellValue::Binary(b) = cell_value {
 77 |                     self.decode_bytes(b, offset)
 78 |                 } else {
 79 |                     let mut warnings = Logs::default();
 80 |                     warnings.add(
 81 |                         LogCode::WarningConversion,
 82 |                         &"Unsupported CellValue/format pair",
 83 |                     );
 84 |                     (CellValue::Error, Some(warnings))
 85 |                 }
 86 |             }
 87 |             DecodeFormat::Rot13 => Self::decode_string(cell_value),
 88 |         }
 89 |     }
 90 | 
 91 |     fn decode_bytes(&self, value_bytes: &[u8], offset: usize) -> (CellValue, Option<Logs>) {
 92 |         let mut warnings = Logs::default();
 93 |         match self {
 94 |             DecodeFormat::Lznt1 => {
 95 |                 match util::decode_lznt1(value_bytes, offset, value_bytes.len()) {
 96 |                     Ok(decompressed) => (CellValue::Binary(decompressed), None),
 97 |                     _ => {
 98 |                         warnings.add(LogCode::WarningConversion, &"Error decompressing lznt1");
 99 |                         (CellValue::Error, Some(warnings))
100 |                     }
101 |                 }
102 |             }
103 |             DecodeFormat::Utf16 => match value_bytes.get(offset..) {
104 |                 Some(slice) => {
105 |                     let s = util::from_utf16_le_string(
106 |                         slice,
107 |                         value_bytes.len() - offset,
108 |                         &mut warnings,
109 |                         "decode_content",
110 |                     );
111 |                     (CellValue::String(s), warnings.get_option())
112 |                 }
113 |                 None => {
114 |                     warnings.add(LogCode::WarningConversion, &"Buffer too small");
115 |                     (CellValue::Error, Some(warnings))
116 |                 }
117 |             },
118 |             DecodeFormat::Utf16Multiple => match value_bytes.get(offset..) {
119 |                 Some(slice) => {
120 |                     let m = util::from_utf16_le_strings(
121 |                         slice,
122 |                         value_bytes.len(),
123 |                         &mut warnings,
124 |                         "decode_content",
125 |                     );
126 |                     (CellValue::MultiString(m), warnings.get_option())
127 |                 }
128 |                 None => {
129 |                     warnings.add(LogCode::WarningConversion, &"Buffer too small");
130 |                     (CellValue::Error, Some(warnings))
131 |                 }
132 |             },
133 |             _ => (CellValue::None, warnings.get_option()),
134 |         }
135 |     }
136 | 
137 |     fn decode_string(cell_value: &CellValue) -> (CellValue, Option<Logs>) {
138 |         match cell_value {
139 |             CellValue::String(s) => (CellValue::String(util::decode_rot13(s)), None),
140 |             CellValue::MultiString(m) => {
141 |                 let mut decoded = vec![];
142 |                 for s in m {
143 |                     decoded.push(util::decode_rot13(s));
144 |                 }
145 |                 (CellValue::MultiString(decoded), None)
146 |             }
147 |             _ => {
148 |                 let mut warnings = Logs::default();
149 |                 warnings.add(
150 |                     LogCode::WarningConversion,
151 |                     &"Unsupported CellValue/format pair",
152 |                 );
153 |                 (CellValue::Error, Some(warnings))
154 |             }
155 |         }
156 |     }
157 | }
158 | 
159 | impl DecodableValue for CellValue {
160 |     fn decode_content(&self, format: &DecodeFormat, offset: usize) -> (CellValue, Option<Logs>) {
161 |         format.decode(self, offset)
162 |     }
163 | }
164 | 
165 | pub trait DecodableValue {
166 |     fn decode_content(&self, format: &DecodeFormat, offset: usize) -> (CellValue, Option<Logs>);
167 | }
168 | 


--------------------------------------------------------------------------------
/src/state.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::cell_key_node::CellKeyNode;
 18 | use crate::cell_key_value::CellKeyValue;
 19 | use crate::log::Logs;
 20 | use crate::util;
 21 | use blake3::{Hash, Hasher};
 22 | use std::collections::HashMap;
 23 | 
 24 | #[derive(Clone, Debug)]
 25 | pub(crate) struct ModifiedValueMap {
 26 |     pub map: HashMap<(String, String), Vec<CellKeyValue>>,
 27 | }
 28 | 
 29 | impl ModifiedValueMap {
 30 |     pub(crate) fn new() -> Self {
 31 |         ModifiedValueMap {
 32 |             map: HashMap::new(),
 33 |         }
 34 |     }
 35 | 
 36 |     pub(crate) fn add(&mut self, key_path: &str, value_name: &str, value: CellKeyValue) {
 37 |         match self
 38 |             .map
 39 |             .get_mut(&(key_path.to_string(), value_name.to_string()))
 40 |         {
 41 |             Some(vec) => {
 42 |                 vec.push(value);
 43 |             }
 44 |             None => {
 45 |                 self.map
 46 |                     .insert((key_path.to_string(), value_name.to_string()), vec![value]);
 47 |             }
 48 |         }
 49 |     }
 50 | 
 51 |     pub(crate) fn get(&self, key_path: &str, value_name: &str) -> Option<&Vec<CellKeyValue>> {
 52 |         self.map
 53 |             .get(&(key_path.to_string(), value_name.to_string()))
 54 |     }
 55 | }
 56 | 
 57 | #[derive(Clone, Debug)]
 58 | pub(crate) struct DeletedValueMap {
 59 |     pub map: HashMap<String, Vec<CellKeyValue>>,
 60 | }
 61 | 
 62 | impl DeletedValueMap {
 63 |     pub(crate) fn new() -> Self {
 64 |         DeletedValueMap {
 65 |             map: HashMap::new(),
 66 |         }
 67 |     }
 68 | 
 69 |     pub(crate) fn add(&mut self, key_path: &str, value: CellKeyValue) {
 70 |         match self.map.get_mut(key_path) {
 71 |             Some(vec) => {
 72 |                 vec.push(value);
 73 |             }
 74 |             None => {
 75 |                 self.map.insert(key_path.to_string(), vec![value]);
 76 |             }
 77 |         }
 78 |     }
 79 | 
 80 |     pub(crate) fn get(&self, key_path: &str) -> Option<&Vec<CellKeyValue>> {
 81 |         self.map.get(key_path)
 82 |     }
 83 | 
 84 |     pub(crate) fn remove(&mut self, key_path: &str, value_name: &str, hash: &Hash) {
 85 |         if let Some(values) = self.map.get_mut(key_path) {
 86 |             for (index, value) in values.iter().enumerate() {
 87 |                 if value.detail.value_name() == value_name {
 88 |                     if let Some(value_hash) = value.hash {
 89 |                         if hash == &value_hash {
 90 |                             values.remove(index);
 91 |                             if values.is_empty() {
 92 |                                 self.map.remove(key_path);
 93 |                             }
 94 |                             break;
 95 |                         }
 96 |                     }
 97 |                 }
 98 |             }
 99 |         }
100 |     }
101 | }
102 | 
103 | #[derive(Clone, Debug)]
104 | pub(crate) struct ModifiedDeletedKeyMap {
105 |     pub map: HashMap<String, Vec<CellKeyNode>>,
106 | }
107 | 
108 | impl ModifiedDeletedKeyMap {
109 |     pub(crate) fn new() -> Self {
110 |         ModifiedDeletedKeyMap {
111 |             map: HashMap::new(),
112 |         }
113 |     }
114 | 
115 |     pub(crate) fn add(&mut self, path: &str, value: CellKeyNode) {
116 |         match self.map.get_mut(path) {
117 |             Some(vec) => {
118 |                 vec.push(value);
119 |             }
120 |             None => {
121 |                 self.map.insert(path.to_string(), vec![value]);
122 |             }
123 |         }
124 |     }
125 | 
126 |     pub(crate) fn get(&self, path: &str) -> Option<&Vec<CellKeyNode>> {
127 |         self.map.get(path)
128 |     }
129 | 
130 |     pub(crate) fn remove(&mut self, path: &str, hash: &Hash) {
131 |         let parent_path = &path[0..path.rfind('\\').unwrap_or_default()];
132 |         if let Some(keys) = self.map.get_mut(parent_path) {
133 |             for (index, key) in keys.iter().enumerate() {
134 |                 if let Some(key_hash) = key.hash {
135 |                     if hash == &key_hash && path == key.path {
136 |                         keys.remove(index);
137 |                         if keys.is_empty() {
138 |                             self.map.remove(path);
139 |                         }
140 |                         break;
141 |                     }
142 |                 }
143 |             }
144 |         }
145 |     }
146 | }
147 | 
148 | #[derive(Clone, Debug)]
149 | pub(crate) struct State {
150 |     /// Path filters don't include the root name, but the cell key's path does.
151 |     /// This is the length of that root name so we can index into the string directly.
152 |     pub root_key_path_offset: usize,
153 | 
154 |     /// `get_full_field_info` indicates if we should generate offset and length info for the structures we are reading.
155 |     /// Default is `false`
156 |     pub get_full_field_info: bool,
157 | 
158 |     pub info: Logs,
159 | 
160 |     pub hasher: Hasher,
161 | 
162 |     pub deleted_keys: ModifiedDeletedKeyMap,
163 |     pub modified_keys: ModifiedDeletedKeyMap,
164 |     pub deleted_values: DeletedValueMap,
165 |     pub modified_values: ModifiedValueMap,
166 | }
167 | 
168 | impl State {
169 |     pub(crate) fn get_root_path_offset(&mut self, key_path: &str) -> usize {
170 |         if self.root_key_path_offset == 0 {
171 |             self.root_key_path_offset = util::get_root_path_offset(key_path)
172 |         }
173 |         self.root_key_path_offset
174 |     }
175 | }
176 | 
177 | impl Default for State {
178 |     fn default() -> Self {
179 |         Self {
180 |             root_key_path_offset: 0,
181 |             get_full_field_info: false,
182 |             info: Logs::default(),
183 |             hasher: Hasher::new(),
184 |             deleted_keys: ModifiedDeletedKeyMap::new(),
185 |             modified_keys: ModifiedDeletedKeyMap::new(),
186 |             deleted_values: DeletedValueMap::new(),
187 |             modified_values: ModifiedValueMap::new(),
188 |         }
189 |     }
190 | }
191 | 


--------------------------------------------------------------------------------
/pynotatin/src/py_notatin_parser.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  */
 17 | 
 18 | use crate::err::PyNotatinError;
 19 | use crate::py_notatin_content::PyNotatinContent;
 20 | use crate::py_notatin_key::PyNotatinKey;
 21 | use crate::py_notatin_value::{PyNotatinDecodeFormat, PyNotatinValue};
 22 | use crate::util::{init_logging, FileOrFileLike};
 23 | use ::notatin::{
 24 |     cell_key_node::CellKeyNode,
 25 |     err::Error,
 26 |     parser::{Parser, ParserIteratorContext},
 27 |     parser_builder::ParserBuilder,
 28 | };
 29 | use pyo3::exceptions::{PyNotImplementedError, PyRuntimeError};
 30 | use pyo3::prelude::*;
 31 | 
 32 | #[pyclass(subclass)]
 33 | /// Returns an instance of the parser.
 34 | /// Works on both a path (string), or a file-like object.
 35 | pub struct PyNotatinParser {
 36 |     pub inner: Option<Parser>,
 37 | }
 38 | 
 39 | #[pymethods]
 40 | impl PyNotatinParser {
 41 |     #[new]
 42 |     fn new(path_or_file_like: PyObject) -> PyResult<Self> {
 43 |         let parser = ParserBuilder::from_file(FileOrFileLike::to_read_seek(&path_or_file_like)?)
 44 |             .build()
 45 |             .map_err(PyNotatinError)?;
 46 |         Ok(PyNotatinParser {
 47 |             inner: Some(parser),
 48 |         })
 49 |     }
 50 | 
 51 |     /// Returns an iterator that yields reg keys as Python objects.
 52 |     fn reg_keys(&mut self) -> PyResult<Py<PyNotatinKeysIterator>> {
 53 |         self.reg_keys_iterator()
 54 |     }
 55 | 
 56 |     /// Returns the key for the `path` parameter.
 57 |     fn open(&mut self, path: &str) -> PyResult<Option<Py<PyNotatinKey>>> {
 58 |         self.key_for(|parser| parser.get_key(path, false))
 59 |     }
 60 | 
 61 |     /// Returns the root key.
 62 |     fn root(&mut self) -> PyResult<Option<Py<PyNotatinKey>>> {
 63 |         self.key_for(|parser| parser.get_root_key())
 64 |     }
 65 | 
 66 |     /// Returns the parent key for the `key` parameter.
 67 |     fn get_parent(&mut self, key: &mut PyNotatinKey) -> PyResult<Option<Py<PyNotatinKey>>> {
 68 |         self.key_for(|parser| parser.get_parent_key(&mut key.inner))
 69 |     }
 70 | 
 71 |     fn __iter__(mut slf: PyRefMut<Self>) -> PyResult<Py<PyNotatinKeysIterator>> {
 72 |         slf.reg_keys()
 73 |     }
 74 | 
 75 | fn __next__(_slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
 76 |         Err(PyErr::new::<PyNotImplementedError, _>("Using `next()` over `PyNotatinParser` is not supported. Try iterating over `PyNotatinParser(...).reg_keys()`"))
 77 |     }
 78 | }
 79 | 
 80 | impl PyNotatinParser {
 81 |     fn key_for<F>(&mut self, func: F) -> PyResult<Option<Py<PyNotatinKey>>>
 82 |     where
 83 |         F: FnOnce(&mut Parser) -> Result<Option<CellKeyNode>, Error>
 84 |     {
 85 |         match &mut self.inner {
 86 |             Some(parser) => match func(parser) {
 87 |                 Ok(key) => match key {
 88 |                     Some(key) => Python::with_gil(|py| {
 89 |                         Ok(PyNotatinKey::from_cell_key_node(py, key).ok())
 90 |                     }),
 91 |                     _ => Ok(None)
 92 |                 },
 93 |                 Err(e) => Err(PyErr::new::<PyRuntimeError, _>(e.to_string()))
 94 |             },
 95 |             _ => Ok(None)
 96 |         }
 97 |     }
 98 | 
 99 |     /// Returns an iterator that yields reg keys as Python objects
100 |     fn reg_keys_iterator(&mut self) -> PyResult<Py<PyNotatinKeysIterator>> {
101 |         Python::with_gil(|py| {
102 |             let inner = match self.inner.take() {
103 |                 Some(inner) => inner,
104 |                 None => {
105 |                     return Err(PyErr::new::<PyRuntimeError, _>(
106 |                         "PyNotatinParser can only be used once",
107 |                     ));
108 |                 }
109 |             };
110 |             let iterator_context = ParserIteratorContext::from_parser(&inner, true, None);
111 |             Py::new(
112 |                 py,
113 |                 PyNotatinKeysIterator {
114 |                     inner,
115 |                     iterator_context,
116 |                 },
117 |             )
118 |         })
119 |     }
120 | }
121 | 
122 | #[pyclass]
123 | pub struct PyNotatinParserBuilder {
124 |     pub primary_file: PyObject,
125 |     pub recover_deleted: bool,
126 |     pub transaction_logs: Vec<PyObject>,
127 | }
128 | 
129 | #[pymethods]
130 | impl PyNotatinParserBuilder {
131 |     #[new]
132 |     fn new(path_or_file_like: PyObject) -> PyResult<Self> {
133 |         Ok(PyNotatinParserBuilder {
134 |             primary_file: path_or_file_like,
135 |             recover_deleted: false,
136 |             transaction_logs: vec![],
137 |         })
138 |     }
139 | 
140 |     pub fn recover_deleted(&mut self, recover: bool) -> PyResult<()> {
141 |         self.recover_deleted = recover;
142 |         Ok(())
143 |     }
144 | 
145 |     pub fn with_transaction_log(&mut self, log: PyObject) -> PyResult<()> {
146 |         self.transaction_logs.push(log);
147 |         Ok(())
148 |     }
149 | 
150 |     pub fn build(&self) -> PyResult<PyNotatinParser> {
151 |         let mut builder =
152 |             ParserBuilder::from_file(FileOrFileLike::to_read_seek(&self.primary_file)?);
153 |         builder.recover_deleted(self.recover_deleted);
154 |         for transaction_log in &self.transaction_logs {
155 |             builder.with_transaction_log(FileOrFileLike::to_read_seek(transaction_log)?);
156 |         }
157 |         Ok(PyNotatinParser {
158 |             inner: Some(builder.build().map_err(PyNotatinError)?),
159 |         })
160 |     }
161 | }
162 | 
163 | #[pyclass]
164 | pub struct PyNotatinKeysIterator {
165 |     inner: Parser,
166 |     iterator_context: ParserIteratorContext,
167 | }
168 | 
169 | impl PyNotatinKeysIterator {
170 |     pub(crate) fn reg_key_to_pyobject(reg_key: CellKeyNode, py: Python) -> PyObject {
171 |         match PyNotatinKey::from_cell_key_node(py, reg_key).map(|entry| entry.to_object(py)) {
172 |             Ok(py_reg_key) => py_reg_key,
173 |             Err(e) => e.to_object(py),
174 |         }
175 |     }
176 | 
177 |     fn next(&mut self) -> Option<PyObject> {
178 |         Python::with_gil(|py| {
179 |             self.inner
180 |                 .next_key_preorder(&mut self.iterator_context)
181 |                 .map(|key| Self::reg_key_to_pyobject(key, py))
182 |         })
183 |     }
184 | }
185 | 
186 | #[pymethods]
187 | impl PyNotatinKeysIterator {
188 |     fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<PyNotatinKeysIterator>> {
189 |         Ok(slf.into())
190 |     }
191 |     fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
192 |         Ok(slf.next())
193 |     }
194 | }
195 | 
196 | /// Parses a windows registry file.
197 | #[pymodule]
198 | fn notatin(py: Python, m: &PyModule) -> PyResult<()> {
199 |     init_logging(py).ok();
200 | 
201 |     m.add_class::<PyNotatinParserBuilder>()?;
202 |     m.add_class::<PyNotatinParser>()?;
203 |     m.add_class::<PyNotatinKey>()?;
204 |     m.add_class::<PyNotatinValue>()?;
205 |     m.add_class::<PyNotatinContent>()?;
206 |     m.add_class::<PyNotatinDecodeFormat>()?;
207 | 
208 |     Ok(())
209 | }
210 | 


--------------------------------------------------------------------------------
/src/cell_key_security.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::err::Error;
 18 | use crate::log::Logs;
 19 | use nom::{
 20 |     bytes::complete::{tag, take},
 21 |     number::complete::{le_i32, le_u16, le_u32},
 22 |     IResult,
 23 | };
 24 | use serde::Serialize;
 25 | use std::io::Cursor;
 26 | use winstructs::security::SecurityDescriptor;
 27 | 
 28 | #[derive(Debug, Eq, PartialEq, Serialize)]
 29 | pub struct CellKeySecurityDetail {
 30 |     pub size: u32,
 31 |     pub unknown1: u16,
 32 |     /* Offsets in bytes, relative from the start of the hive bin's data.
 33 |     When a key security item acts as a list header, flink points to the first entry of this list.
 34 |     If a list is empty, flink points to a list header (i.e. to a current cell).
 35 |     When a key security item acts as a list entry, flink points to the next entry of this list.
 36 |     If there is no next entry in a list, flink points to a list header. */
 37 |     pub flink: u32,
 38 |     /* Offsets in bytes, relative from the start of the hive bin's data.
 39 |     When a key security item acts as a list header, blink points to the last entry of this list.
 40 |     If a list is empty, blink points to a list header (i.e. to a current cell).
 41 |     When a key security item acts as a list entry, blink points to the previous entry of this list.
 42 |     If there is no previous entry in a list, blink points to a list header. */
 43 |     pub blink: u32,
 44 |     pub reference_count: u32,
 45 |     pub security_descriptor_size: u32,
 46 | }
 47 | 
 48 | #[derive(Debug, Eq, PartialEq, Serialize)]
 49 | pub struct CellKeySecurity {
 50 |     pub detail: CellKeySecurityDetail,
 51 |     pub security_descriptor: Vec<u8>,
 52 |     pub logs: Logs,
 53 | }
 54 | 
 55 | impl CellKeySecurity {
 56 |     /// Uses nom to parse a key security (sk) hive bin cell.
 57 |     fn from_bytes(input: &[u8]) -> IResult<&[u8], Self> {
 58 |         let (input, size) = le_i32(input)?;
 59 |         let (input, _signature) = tag("sk")(input)?;
 60 |         let (input, unknown1) = le_u16(input)?;
 61 |         let (input, flink) = le_u32(input)?;
 62 |         let (input, blink) = le_u32(input)?;
 63 |         let (input, reference_count) = le_u32(input)?;
 64 |         let (input, security_descriptor_size) = le_u32(input)?;
 65 |         let (input, security_descriptor) = take(security_descriptor_size)(input)?;
 66 | 
 67 |         Ok((
 68 |             input,
 69 |             CellKeySecurity {
 70 |                 detail: CellKeySecurityDetail {
 71 |                     size: size.unsigned_abs(),
 72 |                     unknown1,
 73 |                     flink,
 74 |                     blink,
 75 |                     reference_count,
 76 |                     security_descriptor_size,
 77 |                 },
 78 |                 security_descriptor: security_descriptor.to_vec(),
 79 |                 logs: Logs::default(),
 80 |             },
 81 |         ))
 82 |     }
 83 | }
 84 | 
 85 | pub(crate) fn read_cell_key_security(
 86 |     buffer: &[u8],
 87 |     security_key_offset: u32,
 88 |     hbin_offset_absolute: usize,
 89 | ) -> Result<Vec<SecurityDescriptor>, Error> {
 90 |     let mut security_descriptors = Vec::new();
 91 |     let mut offset: usize = security_key_offset as usize;
 92 |     loop {
 93 |         let slice = buffer
 94 |             .get(offset + hbin_offset_absolute..)
 95 |             .ok_or_else(|| Error::buffer("read_cell_key_security"))?;
 96 |         let (_, cell_key_security) = CellKeySecurity::from_bytes(slice)?;
 97 |         security_descriptors.push(SecurityDescriptor::from_stream(&mut Cursor::new(
 98 |             cell_key_security.security_descriptor,
 99 |         ))?);
100 | 
101 |         if cell_key_security.detail.flink == security_key_offset {
102 |             break;
103 |         }
104 |         offset = cell_key_security.detail.flink as usize;
105 |     }
106 |     Ok(security_descriptors)
107 | }
108 | 
109 | #[cfg(test)]
110 | mod tests {
111 |     use super::*;
112 | 
113 |     #[test]
114 |     fn test_parse_cell_key_security() {
115 |         let slice = [
116 |             0xF8, 0xFE, 0xFF, 0xFF, 0x73, 0x6B, 0x00, 0x00, 0x00, 0x8D, 0x03, 0x00, 0x60, 0x95,
117 |             0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x90,
118 |             0x80, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00,
119 |             0x00, 0x00, 0x02, 0x00, 0x6C, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x24, 0x00,
120 |             0x3F, 0x00, 0x0F, 0x00, 0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x15, 0x00,
121 |             0x00, 0x00, 0x97, 0x2A, 0x67, 0x79, 0xA0, 0x54, 0x4A, 0xB6, 0x19, 0x87, 0x28, 0x7E,
122 |             0x51, 0x04, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0x3F, 0x00, 0x0F, 0x00, 0x01, 0x01,
123 |             0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x12, 0x00, 0x00, 0x00, 0x00, 0x03, 0x18, 0x00,
124 |             0x3F, 0x00, 0x0F, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00,
125 |             0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0x19, 0x00, 0x02, 0x00,
126 |             0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x0C, 0x00, 0x00, 0x00, 0x01, 0x02,
127 |             0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
128 |             0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00,
129 |             0x00, 0x05, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x3F, 0x00, 0x0F, 0x00,
130 |             0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02,
131 |             0x00, 0x00, 0x00, 0x0A, 0x14, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x01, 0x00, 0x00,
132 |             0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00,
133 |             0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
134 |             0x00, 0x00, 0x00, 0x05, 0x12, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
135 |         ];
136 | 
137 |         let (_, sec) = CellKeySecurity::from_bytes(&slice).unwrap();
138 | 
139 |         let expected_output = CellKeySecurity {
140 |             detail: CellKeySecurityDetail {
141 |                 size: 264,
142 |                 unknown1: 0,
143 |                 flink: 232704,
144 |                 blink: 234848,
145 |                 reference_count: 1,
146 |                 security_descriptor_size: 156,
147 |             },
148 |             security_descriptor: vec![
149 |                 1, 0, 4, 144, 128, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 2, 0, 108, 0, 4,
150 |                 0, 0, 0, 0, 3, 36, 0, 63, 0, 15, 0, 1, 5, 0, 0, 0, 0, 0, 5, 21, 0, 0, 0, 151, 42,
151 |                 103, 121, 160, 84, 74, 182, 25, 135, 40, 126, 81, 4, 0, 0, 0, 3, 20, 0, 63, 0, 15,
152 |                 0, 1, 1, 0, 0, 0, 0, 0, 5, 18, 0, 0, 0, 0, 3, 24, 0, 63, 0, 15, 0, 1, 2, 0, 0, 0,
153 |                 0, 0, 5, 32, 0, 0, 0, 32, 2, 0, 0, 0, 3, 20, 0, 25, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0,
154 |                 5, 12, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 5, 32, 0, 0, 0, 32, 2, 0, 0, 1, 1, 0, 0, 0, 0,
155 |                 0, 5, 18, 0, 0, 0,
156 |             ],
157 |             logs: Logs::default(),
158 |         };
159 | 
160 |         assert_eq!(expected_output, sec);
161 |     }
162 | }
163 | 


--------------------------------------------------------------------------------
/src/cell_big_data.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::cell_key_value::{CellKeyValue, CellKeyValueDataTypes};
 18 | use crate::err::Error;
 19 | use crate::file_info::FileInfo;
 20 | use crate::log::Logs;
 21 | use nom::{
 22 |     bytes::complete::tag,
 23 |     multi::count,
 24 |     number::complete::{le_i32, le_u16, le_u32},
 25 |     IResult, Parser,
 26 | };
 27 | use serde::Serialize;
 28 | 
 29 | /* List of data segments. Big data is used to reference data larger than 16344 bytes
 30 | When the Minor version field of the base block is greater than 3, it has the following structure: */
 31 | #[derive(Debug, Eq, PartialEq, Serialize)]
 32 | pub struct CellBigData {
 33 |     pub size: u32,
 34 |     pub count: u16,
 35 |     pub segment_list_offset_relative: u32, // relative to the start of the hive bin
 36 |     pub logs: Logs,
 37 | }
 38 | 
 39 | impl CellBigData {
 40 |     /// Returns a tuple of the full content buffer and the absolute data offsets
 41 |     pub(crate) fn get_big_data_bytes(
 42 |         file_info: &FileInfo,
 43 |         offset: usize,
 44 |         data_type: &CellKeyValueDataTypes,
 45 |         data_size: u32,
 46 |     ) -> Result<(Vec<u8>, Vec<usize>), Error> {
 47 |         let slice = file_info
 48 |             .buffer
 49 |             .get(offset..)
 50 |             .ok_or_else(|| Error::buffer("get_big_data_bytes"))?;
 51 |         let (_, (hive_bin_cell_big_data, _)) = CellBigData::from_bytes(slice)?;
 52 |         let (_, data_offsets_absolute) =
 53 |             hive_bin_cell_big_data.parse_big_data_offsets(file_info)?;
 54 | 
 55 |         let mut big_data_buffer: Vec<u8> = Vec::new();
 56 |         let mut data_size_remaining = data_size;
 57 |         for offset in data_offsets_absolute.iter() {
 58 |             if data_size_remaining > 0 {
 59 |                 let (input, size) = CellBigData::parse_big_data_size(file_info, *offset)?;
 60 |                 let size_to_read = std::cmp::min(
 61 |                     size.unsigned_abs(),
 62 |                     std::cmp::min(data_size_remaining, CellKeyValue::BIG_DATA_SIZE_THRESHOLD),
 63 |                 );
 64 |                 let slice = input
 65 |                     .get(..size_to_read as usize)
 66 |                     .ok_or_else(|| Error::buffer("get_big_data_bytes"))?;
 67 |                 big_data_buffer.extend_from_slice(slice);
 68 |                 data_size_remaining -= size_to_read;
 69 |             }
 70 |         }
 71 |         Ok((
 72 |             data_type.get_value_bytes(&big_data_buffer[..]),
 73 |             data_offsets_absolute.iter().map(|x| *x as usize).collect(),
 74 |         ))
 75 |     }
 76 | 
 77 |     pub(crate) fn is_big_data_block(input: &[u8]) -> bool {
 78 |         match input.get(4..) {
 79 |             Some(slice) => {
 80 |                 tag::<&str, &[u8], nom::error::Error<&[u8]>>("db")(slice).is_ok()
 81 |             }
 82 |             None => false,
 83 |         }
 84 |     }
 85 | 
 86 |     /// Uses nom to parse a big data (db) hive bin cell. Returns a tuple of Self and the starting ptr offset
 87 |     fn from_bytes(input: &[u8]) -> IResult<&[u8], (Self, usize)> {
 88 |         let offset = input.as_ptr() as usize;
 89 |         let (input, size) = le_i32(input)?;
 90 |         let (input, _signature) = tag("db")(input)?;
 91 |         let (input, count) = le_u16(input)?;
 92 |         let (input, segment_list_offset_relative) = le_u32(input)?;
 93 | 
 94 |         Ok((
 95 |             input,
 96 |             (
 97 |                 CellBigData {
 98 |                     size: size.unsigned_abs(),
 99 |                     count,
100 |                     segment_list_offset_relative,
101 |                     logs: Logs::default(),
102 |                 },
103 |                 offset,
104 |             ),
105 |         ))
106 |     }
107 | 
108 |     fn parse_big_data_size(file_info: &FileInfo, offset: u32) -> IResult<&[u8], i32> {
109 |         let slice = file_info
110 |             .buffer
111 |             .get(file_info.hbin_offset_absolute + offset as usize..)
112 |             .ok_or(nom::Err::Error(nom::error::Error {
113 |                 input: &file_info.buffer[..],
114 |                 code: nom::error::ErrorKind::Eof,
115 |             }))?;
116 |         le_i32(slice)
117 |     }
118 | 
119 |     fn parse_big_data_offsets<'a>(&self, file_info: &'a FileInfo) -> IResult<&'a [u8], Vec<u32>> {
120 |         let slice = file_info
121 |             .buffer
122 |             .get(file_info.hbin_offset_absolute + self.segment_list_offset_relative as usize..)
123 |             .ok_or(nom::Err::Error(nom::error::Error {
124 |                 input: &file_info.buffer[..],
125 |                 code: nom::error::ErrorKind::Eof,
126 |             }))?;
127 |         let (input, _size) = le_u32(slice)?;
128 |         let (_, list) = count(le_u32, self.count as usize).parse(input)?;
129 |         Ok((input, list))
130 |     }
131 | }
132 | 
133 | #[cfg(test)]
134 | mod tests {
135 |     use super::*;
136 | 
137 |     #[test]
138 |     fn test_is_big_data_block() {
139 |         assert!(CellBigData::is_big_data_block(&[0, 0, 0, 0, 0x64, 0x62]));
140 |         assert!(!CellBigData::is_big_data_block(&[0, 0, 0, 0, 0, 0]));
141 |     }
142 | 
143 |     #[test]
144 |     fn test_parse_big_data_size() {
145 |         let file_info = FileInfo {
146 |             hbin_offset_absolute: 0,
147 |             buffer: [0, 1, 2, 3].to_vec(),
148 |         };
149 |         let (input, size) = CellBigData::parse_big_data_size(&file_info, 0).unwrap();
150 |         assert_eq!(size, 0x03020100);
151 |         assert_eq!(input, &[0; 0]);
152 |     }
153 | 
154 |     #[test]
155 |     fn test_parse_big_data_offsets() {
156 |         let file_info = FileInfo {
157 |             hbin_offset_absolute: 0,
158 |             buffer: [
159 |                 0xF0, 0xFF, 0xFF, 0xFF, 0x20, 0x30, 0x00, 0x00, 0x20, 0x70, 0x00, 0x00,
160 |             ]
161 |             .to_vec(),
162 |         };
163 | 
164 |         let cell_big_data = CellBigData {
165 |             size: 0,
166 |             count: 2,
167 |             segment_list_offset_relative: 0,
168 |             logs: Logs::default(),
169 |         };
170 |         let (_, offsets) = cell_big_data.parse_big_data_offsets(&file_info).unwrap();
171 |         assert_eq!(vec![0x00003020, 0x00007020], offsets);
172 |     }
173 | 
174 |     #[test]
175 |     fn test_parse_sub_key_list_db() {
176 |         let slice = [
177 |             0xF0, 0xFF, 0xFF, 0xFF, 0x64, 0x62, 0x02, 0x00, 0xD8, 0x01, 0x00, 0x00, 0x00, 0x00,
178 |             0x00, 0x00,
179 |         ];
180 | 
181 |         let (_, (big_data, _)) = CellBigData::from_bytes(&slice).unwrap();
182 |         let expected_output = CellBigData {
183 |             size: 16,
184 |             count: 2,
185 |             segment_list_offset_relative: 472,
186 |             logs: Logs::default(),
187 |         };
188 | 
189 |         assert_eq!(expected_output, big_data);
190 |     }
191 | 
192 |     #[test]
193 |     fn test_get_big_data_bytes() {
194 |         let file_info = FileInfo {
195 |             hbin_offset_absolute: 0,
196 |             buffer: [
197 |                 0xF0, 0xFF, 0xFF, 0xFF, 0x20, 0x30, 0x00, 0x00, 0x20, 0x70, 0x00, 0x00,
198 |             ]
199 |             .to_vec(),
200 |         };
201 |         let res =
202 |             CellBigData::get_big_data_bytes(&file_info, 20, &CellKeyValueDataTypes::REG_DWORD, 4);
203 |         assert_eq!(Err(Error::buffer("get_big_data_bytes")), res);
204 |     }
205 | }
206 | 


--------------------------------------------------------------------------------
/pynotatin/src/util.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  */
 17 | 
 18 | use log::{Level, Log, Metadata, Record, SetLoggerError};
 19 | use std::{cmp::Ordering, fs::File, io::BufReader};
 20 | 
 21 | use chrono::{DateTime, Datelike, Timelike, NaiveDateTime, Utc};
 22 | use notatin::file_info::ReadSeek;
 23 | use pyo3::{PyObject, PyResult, Python, ToPyObject};
 24 | use pyo3::types::PyDateTime;
 25 | use pyo3_file::PyFileLikeObject;
 26 | 
 27 | #[derive(Debug)]
 28 | pub enum Output {
 29 |     Python,
 30 | }
 31 | 
 32 | #[derive(Debug)]
 33 | pub enum FileOrFileLike {
 34 |     File(String),
 35 |     FileLike(PyFileLikeObject),
 36 | }
 37 | 
 38 | impl FileOrFileLike {
 39 |     pub fn from_pyobject(path_or_file_like: PyObject) -> PyResult<FileOrFileLike> {
 40 |         Python::with_gil(|py| {
 41 |             // is a path
 42 |             if let Ok(s) = path_or_file_like.extract(py) {
 43 |                 return Ok(FileOrFileLike::File(s));
 44 |             }
 45 | 
 46 |             // We only need read + seek
 47 |             match PyFileLikeObject::with_requirements(path_or_file_like, true, false, true) {
 48 |                 Ok(f) => Ok(FileOrFileLike::FileLike(f)),
 49 |                 Err(e) => Err(e),
 50 |             }
 51 |         })
 52 |     }
 53 | 
 54 |     pub(crate) fn to_read_seek(path_or_file_like: &PyObject) -> PyResult<Box<dyn ReadSeek + Send>> {
 55 |         match FileOrFileLike::from_pyobject(path_or_file_like.clone())? {
 56 |             FileOrFileLike::File(s) => {
 57 |                 let file = File::open(s)?;
 58 |                 let reader = BufReader::with_capacity(4096, file);
 59 |                 Ok(Box::new(reader) as Box<dyn ReadSeek + Send>)
 60 |             }
 61 |             FileOrFileLike::FileLike(f) => Ok(Box::new(f) as Box<dyn ReadSeek + Send>),
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | fn nanos_to_micros_round_half_even(nanos: u32) -> u32 {
 67 |     let nanos_e7 = (nanos % 1_000) / 100;
 68 |     let nanos_e6 = (nanos % 10_000) / 1000;
 69 |     let mut micros = (nanos / 10_000) * 10;
 70 |     match nanos_e7.cmp(&5) {
 71 |         Ordering::Greater => micros += nanos_e6 + 1,
 72 |         Ordering::Less => micros += nanos_e6,
 73 |         Ordering::Equal => micros += nanos_e6 + (nanos_e6 % 2),
 74 |     }
 75 |     micros
 76 | }
 77 | 
 78 | fn date_splitter(date: &DateTime<Utc>) -> (i64, u32) {
 79 |     let mut unix_time = date.timestamp();
 80 |     let mut micros = nanos_to_micros_round_half_even(date.timestamp_subsec_nanos());
 81 | 
 82 |     let inc_sec = micros / 1_000_000;
 83 |     micros %= 1_000_000;
 84 |     unix_time += inc_sec as i64;
 85 | 
 86 |     (unix_time, micros)
 87 | }
 88 | 
 89 | fn round_to_usec_half_even(date: &DateTime<Utc>) -> DateTime<Utc> {
 90 |     let (unix_time, micros) = date_splitter(date);
 91 | 
 92 |     DateTime::<Utc>::from_utc(
 93 |         NaiveDateTime::from_timestamp_opt(unix_time, micros * 1_000)
 94 |             .expect("impossible"),
 95 |         Utc
 96 |     )
 97 | }
 98 | 
 99 | pub fn date_to_pyobject(date: &DateTime<Utc>) -> PyResult<PyObject> {
100 |     let rounded_date = round_to_usec_half_even(date);
101 | 
102 |     Python::with_gil(|py| {
103 |         PyDateTime::new(
104 |             py,
105 |             rounded_date.year(),
106 |             rounded_date.month() as u8,
107 |             rounded_date.day() as u8,
108 |             rounded_date.hour() as u8,
109 |             rounded_date.minute() as u8,
110 |             rounded_date.second() as u8,
111 |             rounded_date.timestamp_subsec_micros(),
112 |             None,
113 |         )
114 |         .map(|dt| dt.to_object(py))
115 |     })
116 | }
117 | 
118 | // Logging implementation from https://github.com/omerbenamram/pymft-rs
119 | /// A logger that prints all messages with a readable output format.
120 | struct PyLogger {
121 |     level: Level,
122 |     warnings_module: PyObject,
123 | }
124 | 
125 | impl Log for PyLogger {
126 |     fn enabled(&self, metadata: &Metadata) -> bool {
127 |         metadata.level() <= self.level
128 |     }
129 | 
130 |     fn log(&self, record: &Record) {
131 |         if self.enabled(record.metadata()) {
132 |             if let Level::Warn = self.level {
133 |                 let level_string = record.level().to_string();
134 |                 Python::with_gil(|py| {
135 | 
136 |                     let message = format!(
137 |                         "{:<5} [{}] {}",
138 |                         level_string,
139 |                         record.module_path().unwrap_or_default(),
140 |                         record.args()
141 |                     );
142 | 
143 |                     self.warnings_module
144 |                         .call_method(py, "warn", (message,), None)
145 |                         .ok();
146 |                 });
147 |             }
148 |         }
149 |     }
150 | 
151 |     fn flush(&self) {}
152 | }
153 | 
154 | pub fn init_logging(py: Python) -> Result<(), SetLoggerError> {
155 |     let warnings = py
156 |         .import("warnings")
157 |         .expect("python to have warning module")
158 |         .to_object(py);
159 | 
160 |     let logger = PyLogger {
161 |         level: Level::Warn,
162 |         warnings_module: warnings,
163 |     };
164 | 
165 |     log::set_boxed_logger(Box::new(logger))?;
166 |     log::set_max_level(Level::Warn.to_level_filter());
167 | 
168 |     Ok(())
169 | }
170 | 
171 | #[cfg(test)]
172 | mod tests {
173 |     use pyo3::types::{PyDateAccess, PyTimeAccess};
174 | 
175 |     use super::*;
176 | 
177 |     #[test]
178 |     fn test_nanos_to_micros_round_half_even() {
179 |         assert_eq!(nanos_to_micros_round_half_even(764_026_300), 764_026);
180 |         assert_eq!(nanos_to_micros_round_half_even(764_026_600), 764_027);
181 |         assert_eq!(nanos_to_micros_round_half_even(764_026_500), 764_026);
182 |         assert_eq!(nanos_to_micros_round_half_even(764_027_500), 764_028);
183 |         assert_eq!(nanos_to_micros_round_half_even(999_999_500), 1_000_000);
184 |     }
185 | 
186 |     #[test]
187 |     fn test_date_splitter(){
188 |         let tests = [
189 |             ("2020-09-29T17:38:04.9999995Z", (1601401085, 0u32)),
190 |             ("2020-09-29T17:38:04.0000004Z", (1601401084, 0u32)),
191 |             ("2020-09-29T17:38:04.1234567Z", (1601401084, 123457u32)),
192 |             ("2020-12-31T23:59:59.9999995Z", (1609459200, 0u32)),
193 |         ];
194 | 
195 |         for (test, expected) in tests {
196 |             let dt = DateTime::parse_from_rfc3339(test).unwrap().with_timezone(&Utc);
197 |             let res = date_splitter(&dt);
198 |             assert_eq!(res, expected);
199 |         }
200 |     }
201 | 
202 |     #[test]
203 |     fn test_date_to_pyobject() {
204 |         let tests = [
205 |             ("2020-09-29T17:38:04.9999995Z", (2020, 9, 29, 17, 38, 5, 0)),
206 |             ("2020-09-29T17:38:04.0000004Z", (2020, 9, 29, 17, 38, 4, 0)),
207 |             ("2020-09-29T17:38:04.1234567Z", (2020, 9, 29, 17, 38, 4, 123457)),
208 |             ("2020-12-31T23:59:59.9999995Z", (2021, 1, 1, 0, 0, 0, 0)),
209 |         ];
210 |         Python::with_gil(|py| {
211 |             for (test, (y, mo, d, h, min, s, us)) in tests {
212 |                 let dt = DateTime::parse_from_rfc3339(test).unwrap().with_timezone(&Utc);
213 | 
214 |                 let po = date_to_pyobject(&dt).unwrap();
215 |                 let pdt = po.as_ref(py).extract::<&PyDateTime>().unwrap();
216 | 
217 |                 assert_eq!(pdt.get_year(), y);
218 |                 assert_eq!(pdt.get_month(), mo);
219 |                 assert_eq!(pdt.get_day(), d);
220 |                 assert_eq!(pdt.get_hour(), h);
221 |                 assert_eq!(pdt.get_minute(), min);
222 |                 assert_eq!(pdt.get_second(), s);
223 |                 assert_eq!(pdt.get_microsecond(), us);
224 |             }
225 |         });
226 |     }
227 | }
228 | 


--------------------------------------------------------------------------------
/pynotatin/src/py_notatin_key.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  */
 17 | 
 18 | use pyo3::prelude::*;
 19 | 
 20 | use crate::py_notatin_parser::{PyNotatinKeysIterator, PyNotatinParser};
 21 | use crate::py_notatin_value::PyNotatinValue;
 22 | use crate::util::date_to_pyobject;
 23 | use notatin::{cell_key_node::CellKeyNode, cell_key_value::CellKeyValue};
 24 | use pyo3::exceptions::PyNotImplementedError;
 25 | use pyo3::{Py, PyResult, Python};
 26 | 
 27 | #[pyclass(subclass)]
 28 | pub struct PyNotatinKey {
 29 |     pub(crate) inner: CellKeyNode,
 30 |     #[pyo3(get)]
 31 |     pub last_key_written_date_and_time: PyObject,
 32 | }
 33 | 
 34 | #[pymethods]
 35 | impl PyNotatinKey {
 36 |     /// values(self, /)
 37 |     /// --
 38 |     ///
 39 |     /// Returns an iterator that yields registry values as python objects.
 40 |     fn values(&mut self) -> PyResult<Py<PyNotatinValuesIterator>> {
 41 |         self.reg_values_iterator()
 42 |     }
 43 | 
 44 |     /// value(self, name)
 45 |     /// --
 46 |     ///
 47 |     /// Returns an option with the requested value, or None.
 48 |     fn value(&mut self, name: &str) -> Option<Py<PyNotatinValue>> {
 49 |         match self.inner.get_value(name) {
 50 |             Some(value) => Python::with_gil(|py| {
 51 |                 PyNotatinValue::from_cell_key_value(py, value).ok()
 52 |             }),
 53 |             _ => None
 54 |         }
 55 |     }
 56 | 
 57 |     /// sub_keys(self, parser, /)
 58 |     /// --
 59 |     ///
 60 |     /// Returns an iterator that yields sub keys as python objects.
 61 |     fn subkeys(&mut self, parser: &mut PyNotatinParser) -> PyResult<Py<PyNotatinSubKeysIterator>> {
 62 |         self.sub_keys_iterator(parser)
 63 |     }
 64 | 
 65 |     fn find_key(&mut self, parser: &mut PyNotatinParser, path: &str) -> Option<Py<PyNotatinKey>> {
 66 |         match &mut parser.inner {
 67 |             Some(parser) => match self.inner.get_sub_key_by_path(parser, path) {
 68 |                 Some(key) => Python::with_gil(|py| {
 69 |                     PyNotatinKey::from_cell_key_node(py, key).ok()
 70 |                 }),
 71 |                 _ => None
 72 |             },
 73 |             _ => None
 74 |         }
 75 |     }
 76 | 
 77 |     /// name(self, /)
 78 |     /// --
 79 |     ///
 80 |     /// Returns the name of the key
 81 |     #[getter]
 82 |     pub fn name(&self, py: Python) -> PyObject {
 83 |         self.inner.key_name.to_object(py)
 84 |     }
 85 | 
 86 |     /// path(self, /)
 87 |     /// --
 88 |     ///
 89 |     /// Returns the path of the key
 90 |     #[getter]
 91 |     pub fn path(&self, py: Python) -> PyObject {
 92 |         self.inner.path.to_object(py)
 93 |     }
 94 | 
 95 |     /// pretty_path(self, /)
 96 |     /// --
 97 |     ///
 98 |     /// Returns the pretty path (no root object) of the key
 99 |     #[getter]
100 |     pub fn pretty_path(&self, py: Python) -> PyObject {
101 |         self.inner.get_pretty_path().to_object(py)
102 |     }
103 | 
104 |     /// number_of_sub_keys(self, /)
105 |     /// --
106 |     ///
107 |     /// Returns the number of sub keys
108 |     #[getter]
109 |     pub fn number_of_sub_keys(&self, py: Python) -> PyObject {
110 |         self.inner.detail.number_of_sub_keys().to_object(py)
111 |     }
112 | 
113 |     /// number_of_key_values(self, /)
114 |     /// --
115 |     ///
116 |     /// Returns the number of key values
117 |     #[getter]
118 |     pub fn number_of_key_values(&self, py: Python) -> PyObject {
119 |         self.inner.detail.number_of_key_values().to_object(py)
120 |     }
121 | 
122 |     fn __iter__(mut slf: PyRefMut<Self>) -> PyResult<Py<PyNotatinValuesIterator>> {
123 |         slf.values()
124 |     }
125 | 
126 |     fn __next__(_slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
127 |         Err(PyErr::new::<PyNotImplementedError, _>("Using `next()` over `PyNotatinKey` is not supported. Try iterating over `PyNotatinKey(...).values() or PyNotatinKey(...).sub_keys()`"))
128 |     }
129 | }
130 | 
131 | impl PyNotatinKey {
132 |     pub fn from_cell_key_node(
133 |         py: Python,
134 |         cell_key_node: CellKeyNode,
135 |     ) -> PyResult<Py<PyNotatinKey>> {
136 |         Py::new(
137 |             py,
138 |             PyNotatinKey {
139 |                 last_key_written_date_and_time: date_to_pyobject(
140 |                     &cell_key_node.last_key_written_date_and_time(),
141 |                 )?,
142 |                 inner: cell_key_node,
143 |             },
144 |         )
145 |     }
146 | 
147 |     fn reg_values_iterator(&mut self) -> PyResult<Py<PyNotatinValuesIterator>> {
148 |         Python::with_gil(|py| {
149 |             Py::new(
150 |                 py,
151 |                 PyNotatinValuesIterator {
152 |                     inner: self.inner.clone(),
153 |                     sub_values_iter_index: 0,
154 |                 },
155 |            )
156 |         })
157 |     }
158 | 
159 |     fn sub_keys_iterator(
160 |         &mut self,
161 |         parser: &mut PyNotatinParser,
162 |     ) -> PyResult<Py<PyNotatinSubKeysIterator>> {
163 |         Python::with_gil(|py| {
164 |             self.inner.init_sub_key_iter();
165 |             match &mut parser.inner {
166 |                 Some(parser) => {
167 |                     let sub_keys = self.inner.read_sub_keys(parser);
168 | 
169 |                     Py::new(py, PyNotatinSubKeysIterator { index: 0, sub_keys })
170 |                 }
171 |                 _ => Py::new(
172 |                     py,
173 |                     PyNotatinSubKeysIterator {
174 |                         index: 0,
175 |                         sub_keys: Vec::new(),
176 |                     },
177 |                 ),
178 |             }
179 |         })
180 |     }
181 | }
182 | 
183 | #[pyclass]
184 | pub struct PyNotatinValuesIterator {
185 |     inner: CellKeyNode,
186 |     sub_values_iter_index: usize,
187 | }
188 | 
189 | impl PyNotatinValuesIterator {
190 |     pub(crate) fn reg_value_to_pyobject(reg_value: CellKeyValue, py: Python) -> PyObject {
191 |         match PyNotatinValue::from_cell_key_value(py, reg_value).map(|entry| entry.to_object(py)) {
192 |             Ok(py_reg_value) => py_reg_value,
193 |             Err(e) => e.to_object(py),
194 |         }
195 |     }
196 | 
197 |     fn next(&mut self) -> Option<PyObject> {
198 |         Python::with_gil(|py| {
199 |             match self.inner.next_value(self.sub_values_iter_index) {
200 |                 Some((value, sub_values_iter_index)) => {
201 |                     self.sub_values_iter_index = sub_values_iter_index;
202 |                     Some(Self::reg_value_to_pyobject(value, py))
203 |                 }
204 |                 None => None,
205 |             }
206 |         })
207 |     }
208 | }
209 | 
210 | #[pyclass]
211 | pub struct PyNotatinSubKeysIterator {
212 |     index: usize,
213 |     sub_keys: Vec<CellKeyNode>,
214 | }
215 | 
216 | impl PyNotatinSubKeysIterator {
217 |     fn next(&mut self) -> Option<PyObject> {
218 |         Python::with_gil(|py| {
219 |             match self.sub_keys.get(self.index) {
220 |                 Some(key) => {
221 |                     self.index += 1;
222 |                     Some(PyNotatinKeysIterator::reg_key_to_pyobject(key.clone(), py))
223 |                 }
224 |                 None => None,
225 |             }
226 |         })
227 |     }
228 | }
229 | 
230 | #[pymethods]
231 | impl PyNotatinValuesIterator {
232 |     fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<PyNotatinValuesIterator>> {
233 |         Ok(slf.into())
234 |     }
235 | 
236 |     fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
237 |         Ok(slf.next())
238 |     }
239 | }
240 | 
241 | #[pymethods]
242 | impl PyNotatinSubKeysIterator {
243 |     fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<PyNotatinSubKeysIterator>> {
244 |         Ok(slf.into())
245 |     }
246 | 
247 |     fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
248 |         Ok(slf.next())
249 |     }
250 | }
251 | 


--------------------------------------------------------------------------------
/src/filter.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use crate::cell_key_node::CellKeyNode;
 18 | use crate::err::Error;
 19 | use crate::impl_serialize_for_bitflags;
 20 | use crate::state::State;
 21 | use bitflags::bitflags;
 22 | use regex::Regex;
 23 | 
 24 | /// Filter allows specification of a condition to be met when reading the registry.
 25 | /// Evaluation will short-circuit when possible
 26 | #[derive(Clone, Debug, Default)]
 27 | pub struct Filter {
 28 |     reg_query: Option<RegQuery>,
 29 | }
 30 | 
 31 | impl Filter {
 32 |     pub fn new() -> Self {
 33 |         Filter { reg_query: None }
 34 |     }
 35 | 
 36 |     pub fn is_valid(&self) -> bool {
 37 |         self.reg_query.is_some()
 38 |     }
 39 | 
 40 |     pub(crate) fn check_cell(&self, state: &mut State, cell: &CellKeyNode) -> FilterFlags {
 41 |         if self.is_valid() {
 42 |             self.match_cell(state, cell)
 43 |         } else {
 44 |             FilterFlags::FILTER_ITERATE_KEYS
 45 |         }
 46 |     }
 47 | 
 48 |     pub(crate) fn match_cell(&self, state: &mut State, cell: &CellKeyNode) -> FilterFlags {
 49 |         if cell.is_key_root() {
 50 |             if let Some(reg_query) = &self.reg_query {
 51 |                 if !reg_query.key_path_has_root {
 52 |                     return FilterFlags::FILTER_ITERATE_KEYS;
 53 |                 }
 54 |             }
 55 |         }
 56 |         self.match_key(state, cell.lowercase())
 57 |     }
 58 | 
 59 |     fn match_key(&self, state: &mut State, key_path: String) -> FilterFlags {
 60 |         if let Some(reg_query) = &self.reg_query {
 61 |             reg_query.check_key_match(&key_path, state.get_root_path_offset(&key_path))
 62 |         } else {
 63 |             FilterFlags::FILTER_ITERATE_KEYS
 64 |         }
 65 |     }
 66 | 
 67 |     pub(crate) fn return_sub_keys(&self) -> bool {
 68 |         match &self.reg_query {
 69 |             Some(fp) => fp.children,
 70 |             _ => false,
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | #[derive(Clone, Debug)]
 76 | pub enum RegQueryComponent {
 77 |     ComponentString(String),
 78 |     ComponentRegex(Regex),
 79 | }
 80 | 
 81 | #[derive(Clone, Debug, Default)]
 82 | pub struct FilterBuilder {
 83 |     key_path: Vec<RegQueryComponent>,
 84 |     key_path_has_root: bool,
 85 |     children: bool,
 86 |     regex_errors: Vec<String>,
 87 | }
 88 | 
 89 | impl FilterBuilder {
 90 |     pub fn new() -> Self {
 91 |         FilterBuilder {
 92 |             key_path: vec![],
 93 |             key_path_has_root: false,
 94 |             children: false,
 95 |             regex_errors: vec![],
 96 |         }
 97 |     }
 98 | 
 99 |     pub fn add_key_path(mut self, key_path: &str) -> Self {
100 |         for segment in key_path.trim_end_matches('\\').split('\\') {
101 |             Self::add_literal_segment_internal(&mut self.key_path, segment);
102 |         }
103 |         self
104 |     }
105 | 
106 |     pub fn add_literal_segment(mut self, segment: &str) -> Self {
107 |         Self::add_literal_segment_internal(&mut self.key_path, segment.trim_matches('\\'));
108 |         self
109 |     }
110 | 
111 |     fn add_literal_segment_internal(key_path: &mut Vec<RegQueryComponent>, segment: &str) {
112 |         key_path.push(RegQueryComponent::ComponentString(
113 |             segment.to_ascii_lowercase(),
114 |         ));
115 |     }
116 | 
117 |     pub fn add_regex_segment(mut self, regex: &str) -> Self {
118 |         match Regex::new(&regex.to_ascii_lowercase()) {
119 |             Ok(r) => self.key_path.push(RegQueryComponent::ComponentRegex(r)),
120 |             Err(e) => self.regex_errors.push(e.to_string()),
121 |         }
122 |         self
123 |     }
124 | 
125 |     pub fn key_path_has_root(mut self, key_path_has_root: bool) -> Self {
126 |         self.key_path_has_root = key_path_has_root;
127 |         self
128 |     }
129 | 
130 |     pub fn return_child_keys(mut self, children: bool) -> Self {
131 |         self.children = children;
132 |         self
133 |     }
134 | 
135 |     pub fn build(self) -> Result<Filter, Error> {
136 |         if self.regex_errors.is_empty() {
137 |             Ok(Filter {
138 |                 reg_query: Some(RegQuery {
139 |                     key_path: self.key_path,
140 |                     key_path_has_root: self.key_path_has_root,
141 |                     children: self.children,
142 |                 }),
143 |             })
144 |         } else {
145 |             Err(Error::Any {
146 |                 detail: format!("Regex errors encountered: {}", self.regex_errors.join(", ")),
147 |             })
148 |         }
149 |     }
150 | }
151 | 
152 | /// ReqQuery is a structured filter which allows for regular expressions
153 | #[derive(Clone, Debug, Default)]
154 | pub struct RegQuery {
155 |     pub(crate) key_path: Vec<RegQueryComponent>,
156 |     /// True if `key_path` contains the root key name. Usually will be false, but useful if you are searching using a path from an existing key
157 |     pub(crate) key_path_has_root: bool,
158 |     /// Determines if subkeys are returned during iteration
159 |     pub(crate) children: bool,
160 | }
161 | 
162 | impl RegQuery {
163 |     fn check_key_match(&self, key_path: &str, mut root_key_name_offset: usize) -> FilterFlags {
164 |         if self.key_path_has_root {
165 |             root_key_name_offset = 0;
166 |         }
167 |         let key_path_iterator = key_path[root_key_name_offset..].split('\\'); // key path can be shorter and match
168 |         let mut filter_iterator = self.key_path.iter();
169 |         let mut filter_path_segment = filter_iterator.next();
170 | 
171 |         for key_path_segment in key_path_iterator {
172 |             match filter_path_segment {
173 |                 Some(fps) => match fps {
174 |                     RegQueryComponent::ComponentString(s) => {
175 |                         if s != &key_path_segment.to_ascii_lowercase() {
176 |                             return FilterFlags::FILTER_NO_MATCH;
177 |                         } else {
178 |                             filter_path_segment = filter_iterator.next();
179 |                         }
180 |                     }
181 |                     RegQueryComponent::ComponentRegex(r) => {
182 |                         if r.is_match(&key_path_segment.to_ascii_lowercase()) {
183 |                             filter_path_segment = filter_iterator.next();
184 |                         } else {
185 |                             return FilterFlags::FILTER_NO_MATCH;
186 |                         }
187 |                     }
188 |                 },
189 |                 None => return FilterFlags::FILTER_NO_MATCH,
190 |             }
191 |         }
192 |         if filter_path_segment.is_none() {
193 |             // we matched all the keys!
194 |             FilterFlags::FILTER_ITERATE_KEYS | FilterFlags::FILTER_KEY_MATCH
195 |         } else {
196 |             FilterFlags::FILTER_ITERATE_KEYS
197 |         }
198 |     }
199 | }
200 | 
201 | bitflags! {
202 |     #[derive(Clone, Copy, Debug, PartialEq)]
203 |     pub struct FilterFlags: u16 {
204 |         const FILTER_NO_MATCH     = 0x0001;
205 |         const FILTER_ITERATE_KEYS = 0x0002;
206 |         const FILTER_KEY_MATCH    = 0x0004;
207 |     }
208 | }
209 | impl_serialize_for_bitflags! {FilterFlags}
210 | 
211 | #[cfg(test)]
212 | mod tests {
213 |     use super::*;
214 |     use crate::cell_key_node;
215 | 
216 |     #[test]
217 |     fn test_check_cell_match_key() -> Result<(), Error> {
218 |         let mut state = State::default();
219 |         let filter = FilterBuilder::new()
220 |             .add_key_path("HighContrast")
221 |             .return_child_keys(true)
222 |             .build()?;
223 |         let mut key_node = cell_key_node::CellKeyNode {
224 |             path: String::from("HighContrast"),
225 |             ..Default::default()
226 |         };
227 |         assert_eq!(
228 |             FilterFlags::FILTER_ITERATE_KEYS | FilterFlags::FILTER_KEY_MATCH,
229 |             filter.check_cell(&mut state, &key_node),
230 |             "check_cell: Same case key match failed"
231 |         );
232 | 
233 |         key_node.path = String::from("Highcontrast");
234 |         assert_eq!(
235 |             FilterFlags::FILTER_ITERATE_KEYS | FilterFlags::FILTER_KEY_MATCH,
236 |             filter.check_cell(&mut state, &key_node),
237 |             "check_cell: Different case key match failed"
238 |         );
239 | 
240 |         key_node.path = String::from("badVal");
241 |         assert_eq!(
242 |             FilterFlags::FILTER_NO_MATCH,
243 |             filter.check_cell(&mut state, &key_node),
244 |             "check_cell: No match key match failed"
245 |         );
246 |         Ok(())
247 |     }
248 | }
249 | 


--------------------------------------------------------------------------------
/marvin32/marvin32.cpp:
--------------------------------------------------------------------------------
  1 | // Licensed to the .NET Foundation under one or more agreements.
  2 | // The .NET Foundation licenses this file to you under the MIT license.
  3 | // See the LICENSE file in the project root for more information.
  4 | //
  5 | // Based on:
  6 | //
  7 | // https://github.com/dotnet/coreclr/blob/v1.0.0/src/inc/marvin32.h
  8 | // https://github.com/dotnet/coreclr/blob/v1.0.0/src/vm/marvin32.cpp
  9 | // https://github.com/mono/corefx/blob/c4eeab9fc2faa0195a812e552cd73ee298d39386/src/Common/tests/Tests/System/MarvinTests.cs
 10 | //
 11 | 
 12 | // To build:
 13 | //
 14 | // $ g++ -Wall -std=c++20 -I/home/juckelman/projects/make_world/install/include -L/home/juckelman/projects/make_world/install/lib -lCatch2Main -lCatch2 -o marvin32 marvin32.cpp
 15 | // $ LD_LIBRARY_PATH=/home/juckelman/projects/make_world/install/lib ./marvin32
 16 | //
 17 | 
 18 | #include <catch2/catch_test_macros.hpp>
 19 | #include <catch2/benchmark/catch_benchmark.hpp>
 20 | 
 21 | #include <bit>
 22 | #include <cstddef>
 23 | #include <cstdint>
 24 | #include <numeric>
 25 | #include <tuple>
 26 | #include <vector>
 27 | 
 28 | #define BLOCK(a, b) \
 29 | {\
 30 |     b ^= a; a = std::rotl(a, 20);\
 31 |     a += b; b = std::rotl(b,  9);\
 32 |     b ^= a; a = std::rotl(a, 27);\
 33 |     a += b; b = std::rotl(b, 19);\
 34 | }
 35 | 
 36 | uint64_t marvin32_0(uint64_t seed, const uint8_t* data, size_t dlen) {
 37 |   uint32_t s0 = seed & 0xFFFFFFFF;
 38 |   uint32_t s1 = seed >> 32;
 39 | 
 40 |   while (dlen > 7) {
 41 |     s0 += *reinterpret_cast<const uint32_t*>(data);
 42 |     BLOCK(s0, s1);
 43 |     s0 += *reinterpret_cast<const uint32_t*>(data + 4);
 44 |     BLOCK(s0, s1);
 45 |     data += 8;
 46 |     dlen -= 8;
 47 |   }
 48 | 
 49 |   uint32_t tmp;
 50 | 
 51 |   switch (dlen) {
 52 |   default:
 53 |   case 4:
 54 |     s0 += *reinterpret_cast<const uint32_t*>(data);
 55 |     BLOCK(s0, s1);
 56 |     data += 4;
 57 |   case 0:
 58 |     tmp = 0x80;
 59 |     break;
 60 |   case 5:
 61 |     s0 += *reinterpret_cast<const uint32_t*>(data);
 62 |     BLOCK(s0, s1);
 63 |     data += 4;
 64 |   case 1:
 65 |     tmp = 0x8000 | data[0];
 66 |     break;
 67 |   case 6:
 68 |     s0 += *reinterpret_cast<const uint32_t*>(data);
 69 |     BLOCK(s0, s1);
 70 |     data += 4;
 71 |   case 2:
 72 |     tmp = 0x800000 | *reinterpret_cast<const uint16_t*>(data);
 73 |     break;
 74 |   case 7:
 75 |     s0 += *reinterpret_cast<const uint32_t*>(data);
 76 |     BLOCK(s0, s1);
 77 |     data += 4;
 78 |   case 3:
 79 |     tmp = *reinterpret_cast<const uint16_t*>(data) | (data[2] << 16) | 0x80000000;
 80 |     break;
 81 |   }
 82 | 
 83 |   s0 += tmp;
 84 | 
 85 |   BLOCK(s0, s1);
 86 |   BLOCK(s0, s1);
 87 | 
 88 |   return s0 | (static_cast<uint64_t>(s1) << 32);
 89 | }
 90 | 
 91 | uint64_t marvin32_1(uint64_t seed, const uint8_t* data, size_t dlen) {
 92 |   uint32_t s0 = seed & 0xFFFFFFFF;
 93 |   uint32_t s1 = seed >> 32;
 94 | 
 95 |   while (dlen > 7) {
 96 |     s0 += *reinterpret_cast<const uint32_t*>(data);
 97 |     BLOCK(s0, s1);
 98 |     s0 += *reinterpret_cast<const uint32_t*>(data + 4);
 99 |     BLOCK(s0, s1);
100 |     data += 8;
101 |     dlen -= 8;
102 |   }
103 | 
104 |   switch (dlen) {
105 |   default:
106 |   case 4:
107 |     s0 += *reinterpret_cast<const uint32_t*>(data);
108 |     BLOCK(s0, s1);
109 |     data += 4;
110 |   case 0:
111 |     s0 += 0x80;
112 |     break;
113 |   case 5:
114 |     s0 += *reinterpret_cast<const uint32_t*>(data);
115 |     BLOCK(s0, s1);
116 |     data += 4;
117 |   case 1:
118 |     s0 += 0x8000 | data[0];
119 |     break;
120 |   case 6:
121 |     s0 += *reinterpret_cast<const uint32_t*>(data);
122 |     BLOCK(s0, s1);
123 |     data += 4;
124 |   case 2:
125 |     s0 += 0x800000 | *reinterpret_cast<const uint16_t*>(data);
126 |     break;
127 |   case 7:
128 |     s0 += *reinterpret_cast<const uint32_t*>(data);
129 |     BLOCK(s0, s1);
130 |     data += 4;
131 |   case 3:
132 |     s0 += 0x80000000 | (data[2] << 16) | *reinterpret_cast<const uint16_t*>(data);
133 |     break;
134 |   }
135 | 
136 |   BLOCK(s0, s1);
137 |   BLOCK(s0, s1);
138 | 
139 |   return s0 | (static_cast<uint64_t>(s1) << 32);
140 | }
141 | 
142 | uint64_t marvin32_2(uint64_t seed, const uint8_t* data, size_t dlen) {
143 |   uint32_t s0 = seed & 0xFFFFFFFF;
144 |   uint32_t s1 = seed >> 32;
145 | 
146 |   while (dlen > 3) {
147 |     s0 += *reinterpret_cast<const uint32_t*>(data);
148 |     BLOCK(s0, s1);
149 |     data += 4;
150 |     dlen -= 4;
151 |   }
152 | 
153 |   switch (dlen) {
154 |   default:
155 |   case 0:
156 |     s0 += 0x80;
157 |     break;
158 |   case 1:
159 |     s0 += 0x8000 | data[0];
160 |     break;
161 |   case 2:
162 |     s0 += 0x800000 | *reinterpret_cast<const uint16_t*>(data);
163 |     break;
164 |   case 3:
165 |     s0 += 0x80000000 | (data[2] << 16) | *reinterpret_cast<const uint16_t*>(data);
166 |     break;
167 |   }
168 | 
169 |   BLOCK(s0, s1);
170 |   BLOCK(s0, s1);
171 | 
172 |   return s0 | (static_cast<uint64_t>(s1) << 32);
173 | }
174 | 
175 | uint64_t marvin32_3(uint64_t seed, const uint8_t* data, size_t dlen) {
176 |   uint32_t s0 = seed & 0xFFFFFFFF;
177 |   uint32_t s1 = seed >> 32;
178 | 
179 |   while (dlen > 7) {
180 |     s0 += *reinterpret_cast<const uint32_t*>(data);
181 |     BLOCK(s0, s1);
182 |     s0 += *reinterpret_cast<const uint32_t*>(data + 4);
183 |     BLOCK(s0, s1);
184 |     data += 8;
185 |     dlen -= 8;
186 |   }
187 | 
188 |   if (dlen > 3) {
189 |     s0 += *reinterpret_cast<const uint32_t*>(data);
190 |     BLOCK(s0, s1);
191 |     data += 4;
192 |     dlen -= 4;
193 |   }
194 | 
195 |   switch (dlen) {
196 |   default:
197 |   case 0:
198 |     s0 += 0x80;
199 |     break;
200 |   case 1:
201 |     s0 += 0x8000 | data[0];
202 |     break;
203 |   case 2:
204 |     s0 += 0x800000 | *reinterpret_cast<const uint16_t*>(data);
205 |     break;
206 |   case 3:
207 |     s0 += 0x80000000 | (data[2] << 16) | *reinterpret_cast<const uint16_t*>(data);
208 |     break;
209 |   }
210 | 
211 |   BLOCK(s0, s1);
212 |   BLOCK(s0, s1);
213 | 
214 |   return s0 | (static_cast<uint64_t>(s1) << 32);
215 | }
216 | 
217 | TEST_CASE("tests") {
218 |   const uint64_t seed_1 = 0x4FB61A001BDBCC;
219 |   const uint64_t seed_2 = 0x804FB61A001BDBCC;
220 |   const uint64_t seed_3 = 0x804FB61A801BDBCC;
221 |   const std::vector<uint8_t> data_0 = {};
222 |   const std::vector<uint8_t> data_1 = { 0xAF };
223 |   const std::vector<uint8_t> data_2 = { 0xE7, 0x0F };
224 |   const std::vector<uint8_t> data_3 = { 0x37, 0xF4, 0x95 };
225 |   const std::vector<uint8_t> data_4 = { 0x86, 0x42, 0xDC, 0x59 };
226 |   const std::vector<uint8_t> data_5 = { 0x15, 0x3F, 0xB7, 0x98, 0x26 };
227 |   const std::vector<uint8_t> data_6 = { 0x09, 0x32, 0xE6, 0x24, 0x6C, 0x47 };
228 |   const std::vector<uint8_t> data_7 = { 0xAB, 0x42, 0x7E, 0xA8, 0xD1, 0x0F, 0xC7 };
229 | 
230 |   std::vector<uint8_t> data_256(256);
231 |   std::iota(data_256.begin(), data_256.end(), 0);
232 | 
233 |   const std::vector<uint8_t> data_k(1024);
234 | 
235 |   const std::vector<std::tuple<uint64_t, std::vector<uint8_t>, uint64_t>> tests = {
236 |     { seed_1, data_0, 0x30ED35C100CD3C7D },
237 |     { seed_1, data_1, 0x48E73FC77D75DDC1 },
238 |     { seed_1, data_2, 0xB5F6E1FC485DBFF8 },
239 |     { seed_1, data_3, 0xF0B07C789B8CF7E8 },
240 |     { seed_1, data_4, 0x7008F2E87E9CF556 },
241 |     { seed_1, data_5, 0xE6C08C6DA2AFA997 },
242 |     { seed_1, data_6, 0x6F04BF1A5EA24060 },
243 |     { seed_1, data_7, 0xE11847E4F0678C41 },
244 |     { seed_1, data_256, 0x7DFCAB33FCEAD72C },
245 |     { seed_2, data_0, 0x10A9D5D3996FD65D },
246 |     { seed_2, data_1, 0x68201F91960EBF91 },
247 |     { seed_2, data_2, 0x64B581631F6AB378 },
248 |     { seed_2, data_3, 0xE1F2DFA6E5131408 },
249 |     { seed_2, data_4, 0x36289D9654FB49F6 },
250 |     { seed_2, data_5, 0x0A06114B13464DBD },
251 |     { seed_2, data_6, 0xD6DD5E40AD1BC2ED },
252 |     { seed_2, data_7, 0xE203987DBA252FB3 },
253 |     { seed_2, data_256, 0x73B4AE569E0DB919 },
254 |     { seed_3, { 0x00 }, 0xA37FB0DA2ECAE06C },
255 |     { seed_3, { 0xFF }, 0xFECEF370701AE054 },
256 |     { seed_3, { 0x00, 0xFF }, 0xA638E75700048880 },
257 |     { seed_3, { 0xFF, 0x00 }, 0xBDFB46D969730E2A },
258 |     { seed_3, { 0xFF, 0x00, 0xFF }, 0x9D8577C0FE0D30BF },
259 |     { seed_3, { 0x00, 0xFF, 0x00 }, 0x4F9FBDDE15099497 },
260 |     { seed_3, { 0x00, 0xFF, 0x00, 0xFF }, 0x24EAA279D9A529CA },
261 |     { seed_3, { 0xFF, 0x00, 0xFF, 0x00 }, 0xD3BEC7726B057943 },
262 |     { seed_3, { 0xFF, 0x00, 0xFF, 0x00, 0xFF }, 0x920B62BBCA3E0B72 },
263 |     { seed_3, { 0x00, 0xFF, 0x00, 0xFF, 0x00 }, 0x1D7DDF9DFDF3C1BF },
264 |     { seed_3, { 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF }, 0xEC21276A17E821A5 },
265 |     { seed_3, { 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00 }, 0x6911A53CA8C12254 },
266 |     { seed_3, { 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF }, 0xFDFD187B1D3CE784 },
267 |     { seed_3, { 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00 }, 0x71876F2EFB1B0EE8 },
268 |     { seed_3, data_k, 0x40D0D89D379BD1EE },
269 |     { seed_3, data_256, 0xFFA003EEBD81AEB },
270 |   };
271 | 
272 |   const auto funcs = { marvin32_0, marvin32_1, marvin32_2, marvin32_3 };
273 | 
274 |   for (const auto& [seed, data, exp]: tests) {
275 |     for (const auto f: funcs) {
276 |       CHECK(f(seed, data.data(), data.size()) == exp);
277 |     }
278 |   }
279 | 
280 |   int i = 0;
281 |   for (const auto f: funcs) {
282 |     BENCHMARK(std::to_string(i)) {
283 |       for (const auto& [seed, data, exp]: tests) {
284 |         CHECK(f(seed, data.data(), data.size()) == exp);
285 |       }
286 |     };
287 |     ++i;
288 |   }
289 | }
290 | 


--------------------------------------------------------------------------------
/bin/reg_dump/main.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | pub mod common_writer;
 18 | pub mod json_writer;
 19 | pub mod tsv_writer;
 20 | pub mod xlsx_writer;
 21 | 
 22 | use std::path::*;
 23 | 
 24 | use clap::builder::{EnumValueParser, PossibleValue};
 25 | use clap::{arg, Arg, Command, ValueEnum};
 26 | use notatin::{
 27 |     cli_util::*,
 28 |     err::Error,
 29 |     filter::{Filter, FilterBuilder},
 30 |     parser_builder::ParserBuilder,
 31 |     progress,
 32 | };
 33 | use walkdir::WalkDir;
 34 | 
 35 | use common_writer::WriteCommon;
 36 | use json_writer::WriteJson;
 37 | use tsv_writer::WriteTsv;
 38 | use xlsx_writer::WriteXlsx;
 39 | 
 40 | fn main() -> Result<(), Error> {
 41 |     let matches = Command::new("Notatin Registry Dump")
 42 |         .version("1.0.1")
 43 |         .arg(
 44 |             Arg::new("input")
 45 |                 .short('i')
 46 |                 .long("input")
 47 |                 .help("Base registry file, or root folder if recursing")
 48 |                 .required(true)
 49 |                 .number_of_values(1),
 50 |         )
 51 |         .arg(
 52 |             Arg::new("output")
 53 |                 .short('o')
 54 |                 .long("output")
 55 |                 .help("Output file. or folder if recursing")
 56 |                 .required(true)
 57 |                 .number_of_values(1),
 58 |         )
 59 |         .arg(
 60 |             arg!(<TYPE> "output type")
 61 |                 .short('t')
 62 |                 .value_parser(EnumValueParser::<OutputType>::new())
 63 |                 .ignore_case(true)
 64 |                 .default_value("jsonl"),
 65 |         )
 66 |         .arg(arg!(
 67 |             -r --recurse "Recurse through input looking for registry files"
 68 |         ))
 69 |         .arg(arg!(
 70 |             --recover "Recover deleted and versioned keys and values"
 71 |         ))
 72 |         .arg(arg!(
 73 |             --"recovered-only" "Only export recovered items (applicable to tsv and xlsx output)"
 74 |         ))
 75 |         .arg(arg!(
 76 |             --"full-field-info" "Get the offset and length for each key/value field (applicable to jsonl output)"
 77 |         ))
 78 |         .arg(arg!(
 79 |             -s --"skip-logs" "Skip transaction log files"
 80 |         ))
 81 |         .arg(arg!(
 82 |             -f --filter [STRING] "Key path for filter (ex: 'ControlSet001\\Services')"
 83 |         ))
 84 |         .get_matches();
 85 | 
 86 |     let input = matches.get_one::<String>("input").expect("Required value");
 87 |     let output = matches.get_one::<String>("output").expect("Required value");
 88 |     let recurse = matches.get_flag("recurse");
 89 |     let recover = matches.get_flag("recover");
 90 |     let skip_logs = matches.get_flag("skip-logs");
 91 |     let recovered_only = matches.get_flag("recovered-only");
 92 |     let get_full_field_info = matches.get_flag("full-field-info");
 93 |     let output_type = *matches
 94 |         .get_one::<OutputType>("TYPE")
 95 |         .expect("Unrecognized value");
 96 | 
 97 |     let filter = match matches.get_one::<String>("filter") {
 98 |         Some(f) => Some(
 99 |             FilterBuilder::new()
100 |                 .add_key_path(f)
101 |                 .return_child_keys(true)
102 |                 .build()?,
103 |         ),
104 |         None => None,
105 |     };
106 | 
107 |     if recurse {
108 |         process_folder(
109 |             &PathBuf::from(output),
110 |             &PathBuf::from(input),
111 |             filter,
112 |             recover,
113 |             recovered_only,
114 |             get_full_field_info,
115 |             skip_logs,
116 |             output_type,
117 |         )
118 |     } else {
119 |         process_file(
120 |             &PathBuf::from(output),
121 |             PathBuf::from(input),
122 |             filter,
123 |             recover,
124 |             recovered_only,
125 |             get_full_field_info,
126 |             skip_logs,
127 |             output_type,
128 |         )
129 |     }
130 | }
131 | 
132 | fn process_file(
133 |     outpath: &PathBuf,
134 |     input: PathBuf,
135 |     filter: Option<Filter>,
136 |     recover: bool,
137 |     recovered_only: bool,
138 |     get_full_field_info: bool,
139 |     skip_logs: bool,
140 |     output_type: OutputType,
141 | ) -> Result<(), Error> {
142 |     let logs = get_log_files(
143 |         skip_logs,
144 |         &input.file_name().unwrap().to_string_lossy(),
145 |         &input,
146 |     );
147 | 
148 |     reg_dump(
149 |         input,
150 |         &PathBuf::from(outpath),
151 |         logs,
152 |         filter,
153 |         recover,
154 |         recovered_only,
155 |         get_full_field_info,
156 |         output_type,
157 |     )
158 | }
159 | 
160 | fn process_folder(
161 |     outfolder: &PathBuf,
162 |     base: &PathBuf,
163 |     filter: Option<Filter>,
164 |     recover: bool,
165 |     recovered_only: bool,
166 |     get_full_field_info: bool,
167 |     skip_logs: bool,
168 |     output_type: OutputType,
169 | ) -> Result<(), Error> {
170 |     let reg_files = vec![
171 |         "sam",
172 |         "security",
173 |         "software",
174 |         "system",
175 |         "default",
176 |         "amcache",
177 |         "ntuser.dat",
178 |         "usrclass.dat",
179 |     ];
180 | 
181 |     for entry in WalkDir::new(base)
182 |         .into_iter()
183 |         .filter_map(Result::ok)
184 |         .filter(|e| !e.file_type().is_dir())
185 |     {
186 |         if let Some(f) = entry.file_name().to_str() {
187 |             let f_lower = f.to_lowercase();
188 |             if reg_files.contains(&f_lower.as_str()) && file_has_size(entry.path()) {
189 |                 match entry.path().strip_prefix(base) {
190 |                     Err(e) => println!("{:?}", e),
191 |                     Ok(primary_path_from_base) => {
192 |                         let logs = get_log_files(skip_logs, f, entry.path());
193 |                         let outpath = get_outpath(primary_path_from_base, outfolder, &output_type);
194 |                         let _ = reg_dump(
195 |                             PathBuf::from(entry.path()),
196 |                             &outpath,
197 |                             logs,
198 |                             filter.clone(),
199 |                             recover,
200 |                             recovered_only,
201 |                             get_full_field_info,
202 |                             output_type,
203 |                         );
204 |                     }
205 |                 }
206 |             }
207 |         }
208 |     }
209 |     Ok(())
210 | }
211 | 
212 | fn get_outpath<T>(primary_path_from_base: &Path, outfolder: T, output_type: &OutputType) -> PathBuf
213 | where
214 |     T: AsRef<Path> + std::convert::AsRef<std::ffi::OsStr>,
215 | {
216 |     let path = primary_path_from_base.to_string_lossy();
217 |     let output_filename = str::replace(&path, std::path::MAIN_SEPARATOR, "_");
218 |     let mut output_path = Path::new(&outfolder).join(output_filename);
219 |     match output_type {
220 |         OutputType::Xlsx => output_path.set_extension("xlsx"),
221 |         OutputType::Tsv => output_path.set_extension("tsv"),
222 |         OutputType::Common => output_path.set_extension("txt"),
223 |         _ => output_path.set_extension("jsonl"),
224 |     };
225 |     output_path
226 | }
227 | 
228 | fn reg_dump(
229 |     input: PathBuf,
230 |     output: &PathBuf,
231 |     logs: Option<Vec<PathBuf>>,
232 |     filter: Option<Filter>,
233 |     recover: bool,
234 |     recovered_only: bool,
235 |     get_full_field_info: bool,
236 |     output_type: OutputType,
237 | ) -> Result<(), Error> {
238 |     let mut parser_builder = ParserBuilder::from_path(input);
239 |     parser_builder.update_console(true);
240 |     parser_builder.recover_deleted(recover);
241 |     parser_builder.get_full_field_info(get_full_field_info);
242 |     for log in logs.unwrap_or_default() {
243 |         parser_builder.with_transaction_log(log);
244 |     }
245 |     let parser = parser_builder.build()?;
246 | 
247 |     let mut console = progress::new(true);
248 |     console.write("Writing file")?;
249 | 
250 |     if output_type == OutputType::Xlsx {
251 |         WriteXlsx::new(output, recovered_only)?.write(&parser, filter)?;
252 |     } else if output_type == OutputType::Tsv {
253 |         WriteTsv::new(output, recovered_only)?.write(&parser, filter)?;
254 |     } else if output_type == OutputType::Common {
255 |         WriteCommon::new(output)?.write(&parser, filter)?;
256 |     } else {
257 |         WriteJson::write(output, &parser, filter, &mut console)?;
258 |     }
259 |     console.write(&format!("\nFinished writing {:?}\n", output))?;
260 |     Ok(())
261 | }
262 | 
263 | #[derive(Clone, Copy, Debug, PartialEq)]
264 | pub enum OutputType {
265 |     Jsonl,
266 |     Common,
267 |     Tsv,
268 |     Xlsx,
269 | }
270 | 
271 | impl ValueEnum for OutputType {
272 |     fn value_variants<'a>() -> &'a [Self] {
273 |         &[
274 |             OutputType::Jsonl,
275 |             OutputType::Xlsx,
276 |             OutputType::Tsv,
277 |             OutputType::Common,
278 |         ]
279 |     }
280 | 
281 |     fn to_possible_value<'a>(&self) -> Option<PossibleValue> {
282 |         Some(match self {
283 |             OutputType::Jsonl => PossibleValue::new("jsonl"),
284 |             OutputType::Xlsx => PossibleValue::new("xlsx"),
285 |             OutputType::Tsv => PossibleValue::new("tsv"),
286 |             OutputType::Common => PossibleValue::new("common"),
287 |         })
288 |     }
289 | }
290 | 


--------------------------------------------------------------------------------
/bin/reg_dump/common_writer.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use notatin::{
 18 |     cell::CellState,
 19 |     cell_key_node::CellKeyNode,
 20 |     cell_key_value::CellKeyValue,
 21 |     err::Error,
 22 |     filter::Filter,
 23 |     parser::{Parser, ParserIterator},
 24 |     progress, util,
 25 | };
 26 | use std::fs::File;
 27 | use std::io::{BufWriter, Write};
 28 | use std::path::*;
 29 | 
 30 | pub(crate) struct WriteCommon {
 31 |     writer: BufWriter<File>,
 32 | }
 33 | 
 34 | impl WriteCommon {
 35 |     pub(crate) fn new(output: impl AsRef<Path>) -> Result<Self, Error> {
 36 |         let write_file = File::create(output)?;
 37 |         let writer = BufWriter::new(write_file);
 38 |         Ok(WriteCommon { writer })
 39 |     }
 40 | 
 41 |     pub(crate) fn write(&mut self, parser: &Parser, filter: Option<Filter>) -> Result<(), Error> {
 42 |         writeln!(
 43 |             &mut self.writer,
 44 |             "## Registry common export format\n\
 45 |             ## Key format\n\
 46 |             ## key,Is Free,Absolute offset in decimal,KeyPath,,,,LastWriteTime in UTC\n\
 47 |             ## Value format\n\
 48 |             ## value,Is Free,Absolute offset in decimal,KeyPath,Value name,Data type (as decimal integer),Value data as bytes separated by a singe space,\n\
 49 |             ## \"Is Free\" interpretation: A for in use, U for unused from the primary file, D for deleted from the transaction log, M for modified from the transaction log\n\
 50 |             ##\n\
 51 |             ## Comparison of unused keys/values is done to compare recovery of vk and nk records, not the algorithm used to associate unused keys to other keys and their values.\n\
 52 |             ## When including unused keys, only the recovered key name should be included, not the full path to the unused key.\n\
 53 |             ## When including unused values, do not include the parent key information.\n\
 54 |             ##\n\
 55 |             ## The following totals should also be included\n\
 56 |             ##\n\
 57 |             ## total_keys: total in use key count\n\
 58 |             ## total_values: total in use value count\n\
 59 |             ## total_unused_keys: total free key count (recovered from primary file)\n\
 60 |             ## total_unused_values: total free value count (recovered from primary file)\n\
 61 |             ## total_deleted_from_transaction_log_keys: total deleted key count (recovered from transaction logs)\n\
 62 |             ## total_deleted_from_transaction_log_values: total deleted value count (recovered from transaction logs)\n\
 63 |             ## total_modified_from_transaction_log_keys: total modified key count (recovered from transaction logs)\n\
 64 |             ## total_modified_from_transaction_log_values: total modified value count (recovered from transaction logs)\n\
 65 |             ##\n\
 66 |             ## Before comparison with other common export implementations, the files should be sorted\n\
 67 |             ##"
 68 |         )?;
 69 |         let mut keys = 0;
 70 |         let mut values = 0;
 71 |         let mut unused_keys = 0;
 72 |         let mut unused_values = 0;
 73 |         let mut tx_log_deleted_keys = 0;
 74 |         let mut tx_log_deleted_values = 0;
 75 |         let mut tx_log_modified_keys = 0;
 76 |         let mut tx_log_modified_values = 0;
 77 | 
 78 |         let mut iter = ParserIterator::new(parser);
 79 |         if let Some(filter) = filter {
 80 |             iter.with_filter(filter);
 81 |         }
 82 | 
 83 |         let mut console = progress::new(true);
 84 |         for (index, key) in iter.iter().enumerate() {
 85 |             console.update_progress(index)?;
 86 |             self.write_key(
 87 |                 &key,
 88 |                 &mut unused_keys,
 89 |                 &mut keys,
 90 |                 &mut tx_log_deleted_keys,
 91 |                 &mut tx_log_modified_keys,
 92 |             )?;
 93 |             for mk in &key.versions {
 94 |                 self.write_key(
 95 |                     mk,
 96 |                     &mut unused_keys,
 97 |                     &mut keys,
 98 |                     &mut tx_log_deleted_keys,
 99 |                     &mut tx_log_modified_keys,
100 |                 )?;
101 |             }
102 | 
103 |             for value in key.value_iter() {
104 |                 self.write_value(
105 |                     &key,
106 |                     &value,
107 |                     &mut unused_values,
108 |                     &mut values,
109 |                     &mut tx_log_deleted_values,
110 |                     &mut tx_log_modified_values,
111 |                 )?;
112 | 
113 |                 for mv in value.versions {
114 |                     self.write_value(
115 |                         &key,
116 |                         &mv,
117 |                         &mut unused_values,
118 |                         &mut values,
119 |                         &mut tx_log_deleted_values,
120 |                         &mut tx_log_modified_values,
121 |                     )?;
122 |                 }
123 |             }
124 |         }
125 |         writeln!(&mut self.writer, "## total_keys: {}", keys)?;
126 |         writeln!(&mut self.writer, "## total_values: {}", values)?;
127 |         writeln!(&mut self.writer, "## total_unused_keys: {}", unused_keys)?;
128 |         writeln!(
129 |             &mut self.writer,
130 |             "## total_unused_values: {}",
131 |             unused_values
132 |         )?;
133 |         writeln!(
134 |             &mut self.writer,
135 |             "## total_deleted_from_transaction_log_keys: {}",
136 |             tx_log_deleted_keys
137 |         )?;
138 |         writeln!(
139 |             &mut self.writer,
140 |             "## total_deleted_from_transaction_log_values: {}",
141 |             tx_log_deleted_values
142 |         )?;
143 |         writeln!(
144 |             &mut self.writer,
145 |             "## total_modified_from_transaction_log_keys: {}",
146 |             tx_log_modified_keys
147 |         )?;
148 |         writeln!(
149 |             &mut self.writer,
150 |             "## total_modified_from_transaction_log_values: {}",
151 |             tx_log_modified_values
152 |         )?;
153 |         Ok(())
154 |     }
155 | 
156 |     fn get_alloc_char(state: &CellState) -> &str {
157 |         match state {
158 |             CellState::DeletedPrimaryFile | CellState::DeletedPrimaryFileSlack => "U",
159 |             CellState::DeletedTransactionLog => "D",
160 |             CellState::ModifiedTransactionLog => "M",
161 |             CellState::Allocated => "A",
162 |         }
163 |     }
164 | 
165 |     fn write_key(
166 |         &mut self,
167 |         key: &CellKeyNode,
168 |         unused_keys: &mut u32,
169 |         keys: &mut u32,
170 |         tx_log_deleted_keys: &mut u32,
171 |         tx_log_modified_keys: &mut u32,
172 |     ) -> Result<(), Error> {
173 |         let key_path = match key.cell_state {
174 |             CellState::DeletedPrimaryFile | CellState::DeletedPrimaryFileSlack => {
175 |                 *unused_keys += 1;
176 |                 &key.key_name
177 |             } // ## When including unused keys, only the recovered key name should be included, not the full path to the deleted key.
178 |             CellState::Allocated => {
179 |                 *keys += 1;
180 |                 &key.path[1..]
181 |             } // drop the first slash to match EZ's formatting
182 |             CellState::DeletedTransactionLog => {
183 |                 *tx_log_deleted_keys += 1;
184 |                 &key.path[1..]
185 |             } // drop the first slash to match EZ's formatting
186 |             CellState::ModifiedTransactionLog => {
187 |                 *tx_log_modified_keys += 1;
188 |                 &key.path[1..]
189 |             } // drop the first slash to match EZ's formatting
190 |         };
191 |         writeln!(
192 |             self.writer,
193 |             "key,{},{},{},,,,{}",
194 |             Self::get_alloc_char(&key.cell_state),
195 |             key.file_offset_absolute,
196 |             util::escape_string(key_path),
197 |             util::format_date_time(key.last_key_written_date_and_time())
198 |         )?;
199 |         Ok(())
200 |     }
201 | 
202 |     fn write_value(
203 |         &mut self,
204 |         key: &CellKeyNode,
205 |         value: &CellKeyValue,
206 |         unused_values: &mut u32,
207 |         values: &mut u32,
208 |         tx_log_deleted_values: &mut u32,
209 |         tx_log_modified_values: &mut u32,
210 |     ) -> Result<(), Error> {
211 |         let key_name = match value.cell_state {
212 |             CellState::DeletedPrimaryFile | CellState::DeletedPrimaryFileSlack => {
213 |                 *unused_values += 1;
214 |                 ""
215 |             } // ## When including unused values, do not include the parent key information
216 |             CellState::Allocated => {
217 |                 *values += 1;
218 |                 &key.key_name[..]
219 |             }
220 |             CellState::DeletedTransactionLog => {
221 |                 *tx_log_deleted_values += 1;
222 |                 &key.key_name[..]
223 |             }
224 |             CellState::ModifiedTransactionLog => {
225 |                 *tx_log_modified_values += 1;
226 |                 &key.key_name[..]
227 |             }
228 |         };
229 |         writeln!(
230 |             self.writer,
231 |             "value,{},{},{},{},{:?},{},",
232 |             Self::get_alloc_char(&value.cell_state),
233 |             value.file_offset_absolute,
234 |             util::escape_string(key_name),
235 |             util::escape_string(&value.get_pretty_name()),
236 |             value.data_type as u32,
237 |             util::to_hex_string(&value.detail.value_bytes().unwrap_or_default()[..])
238 |         )?;
239 |         Ok(())
240 |     }
241 | }
242 | 


--------------------------------------------------------------------------------
/pynotatin/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "atomicwrites"
  5 | version = "1.4.1"
  6 | description = "Atomic file writes."
  7 | category = "dev"
  8 | optional = false
  9 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 10 | files = [
 11 |     {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "attrs"
 16 | version = "22.2.0"
 17 | description = "Classes Without Boilerplate"
 18 | category = "dev"
 19 | optional = false
 20 | python-versions = ">=3.6"
 21 | files = [
 22 |     {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
 23 |     {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
 24 | ]
 25 | 
 26 | [package.extras]
 27 | cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
 28 | dev = ["attrs[docs,tests]"]
 29 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"]
 30 | tests = ["attrs[tests-no-zope]", "zope.interface"]
 31 | tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"]
 32 | 
 33 | [[package]]
 34 | name = "colorama"
 35 | version = "0.4.5"
 36 | description = "Cross-platform colored terminal text."
 37 | category = "dev"
 38 | optional = false
 39 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 40 | files = [
 41 |     {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
 42 |     {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
 43 | ]
 44 | 
 45 | [[package]]
 46 | name = "importlib-metadata"
 47 | version = "4.8.3"
 48 | description = "Read metadata from Python packages"
 49 | category = "dev"
 50 | optional = false
 51 | python-versions = ">=3.6"
 52 | files = [
 53 |     {file = "importlib_metadata-4.8.3-py3-none-any.whl", hash = "sha256:65a9576a5b2d58ca44d133c42a241905cc45e34d2c06fd5ba2bafa221e5d7b5e"},
 54 |     {file = "importlib_metadata-4.8.3.tar.gz", hash = "sha256:766abffff765960fcc18003801f7044eb6755ffae4521c8e8ce8e83b9c9b0668"},
 55 | ]
 56 | 
 57 | [package.dependencies]
 58 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
 59 | zipp = ">=0.5"
 60 | 
 61 | [package.extras]
 62 | docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
 63 | perf = ["ipython"]
 64 | testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pep517", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-perf (>=0.9.2)"]
 65 | 
 66 | [[package]]
 67 | name = "iniconfig"
 68 | version = "1.1.1"
 69 | description = "iniconfig: brain-dead simple config-ini parsing"
 70 | category = "dev"
 71 | optional = false
 72 | python-versions = "*"
 73 | files = [
 74 |     {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
 75 |     {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
 76 | ]
 77 | 
 78 | [[package]]
 79 | name = "maturin"
 80 | version = "0.11.5"
 81 | description = "Build and publish crates with pyo3, rust-cpython and cffi bindings as well as rust binaries as python packages"
 82 | category = "dev"
 83 | optional = false
 84 | python-versions = ">=3.6"
 85 | files = [
 86 |     {file = "maturin-0.11.5-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:d78f24561a5e02f7d119b348b26e5772ad5698a43ca49e8facb9ce77cf273714"},
 87 |     {file = "maturin-0.11.5-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:c2ded8b4ef9210d627bb966bc67661b7db259535f6062afe1ce5605406b50f3f"},
 88 |     {file = "maturin-0.11.5-py3-none-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1ce666c386ff9c3c2b5d7d3ca4b1f9f675c38d7540ffbda0d5d5bc7d6ddde49a"},
 89 |     {file = "maturin-0.11.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0ac45879a7d624b47d72b093ae3370270894c19779f42aad7568a92951c5d47"},
 90 |     {file = "maturin-0.11.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4191b0b7362b3025096faf126ff15cb682fbff324ac4a6ca18d55bb16e2b759b"},
 91 |     {file = "maturin-0.11.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bf96e7586bfdb5b0fadc6d662534b8a41123b33dff084fa383a81ded0ce5334"},
 92 |     {file = "maturin-0.11.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab2b3ccf66f5e0f9c3904d215835337b1bd305e79e3bf53b65bbc80a5755e01b"},
 93 |     {file = "maturin-0.11.5-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3354d030b88c938a33bf407a6c0f79ccdd2cce3e1e3e4a2d0c92dc2e063adc6e"},
 94 |     {file = "maturin-0.11.5-py3-none-win32.whl", hash = "sha256:20f9c30701c9932ed8026ceaf896fc77ecc76cebd6a182668dbc10ed597f8789"},
 95 |     {file = "maturin-0.11.5-py3-none-win_amd64.whl", hash = "sha256:70381be1585cb9fa5c02b83af80ae661aaad959e8aa0fddcfe195b004054bd69"},
 96 |     {file = "maturin-0.11.5.tar.gz", hash = "sha256:07074778b063a439fdfd5501bd1d1823a216ec5b657d3ecde78fd7f2c4782422"},
 97 | ]
 98 | 
 99 | [package.dependencies]
100 | toml = ">=0.10.2,<0.11.0"
101 | 
102 | [[package]]
103 | name = "packaging"
104 | version = "21.3"
105 | description = "Core utilities for Python packages"
106 | category = "dev"
107 | optional = false
108 | python-versions = ">=3.6"
109 | files = [
110 |     {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
111 |     {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
112 | ]
113 | 
114 | [package.dependencies]
115 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
116 | 
117 | [[package]]
118 | name = "pluggy"
119 | version = "1.0.0"
120 | description = "plugin and hook calling mechanisms for python"
121 | category = "dev"
122 | optional = false
123 | python-versions = ">=3.6"
124 | files = [
125 |     {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
126 |     {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
127 | ]
128 | 
129 | [package.dependencies]
130 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
131 | 
132 | [package.extras]
133 | dev = ["pre-commit", "tox"]
134 | testing = ["pytest", "pytest-benchmark"]
135 | 
136 | [[package]]
137 | name = "py"
138 | version = "1.11.0"
139 | description = "library with cross-python path, ini-parsing, io, code, log facilities"
140 | category = "dev"
141 | optional = false
142 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
143 | files = [
144 |     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
145 |     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
146 | ]
147 | 
148 | [[package]]
149 | name = "pyparsing"
150 | version = "3.0.7"
151 | description = "Python parsing module"
152 | category = "dev"
153 | optional = false
154 | python-versions = ">=3.6"
155 | files = [
156 |     {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"},
157 |     {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"},
158 | ]
159 | 
160 | [package.extras]
161 | diagrams = ["jinja2", "railroad-diagrams"]
162 | 
163 | [[package]]
164 | name = "pytest"
165 | version = "6.2.5"
166 | description = "pytest: simple powerful testing with Python"
167 | category = "dev"
168 | optional = false
169 | python-versions = ">=3.6"
170 | files = [
171 |     {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
172 |     {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
173 | ]
174 | 
175 | [package.dependencies]
176 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
177 | attrs = ">=19.2.0"
178 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
179 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
180 | iniconfig = "*"
181 | packaging = "*"
182 | pluggy = ">=0.12,<2.0"
183 | py = ">=1.8.2"
184 | toml = "*"
185 | 
186 | [package.extras]
187 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
188 | 
189 | [[package]]
190 | name = "toml"
191 | version = "0.10.2"
192 | description = "Python Library for Tom's Obvious, Minimal Language"
193 | category = "dev"
194 | optional = false
195 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
196 | files = [
197 |     {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
198 |     {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
199 | ]
200 | 
201 | [[package]]
202 | name = "typing-extensions"
203 | version = "4.1.1"
204 | description = "Backported and Experimental Type Hints for Python 3.6+"
205 | category = "dev"
206 | optional = false
207 | python-versions = ">=3.6"
208 | files = [
209 |     {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
210 |     {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
211 | ]
212 | 
213 | [[package]]
214 | name = "zipp"
215 | version = "3.6.0"
216 | description = "Backport of pathlib-compatible object wrapper for zip files"
217 | category = "dev"
218 | optional = false
219 | python-versions = ">=3.6"
220 | files = [
221 |     {file = "zipp-3.6.0-py3-none-any.whl", hash = "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"},
222 |     {file = "zipp-3.6.0.tar.gz", hash = "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832"},
223 | ]
224 | 
225 | [package.extras]
226 | docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
227 | testing = ["func-timeout", "jaraco.itertools", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
228 | 
229 | [metadata]
230 | lock-version = "2.0"
231 | python-versions = "^3.6"
232 | content-hash = "e2703a2d74a6386f7ec9d493f0efc4ed674a8a4e169d183d3fe10e3216674ba4"
233 | 


--------------------------------------------------------------------------------
/src/field_offset_len.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 LevelBlue
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | use serde::Serialize;
 18 | 
 19 | pub(crate) trait FieldTrait<T: Default + 'static> {
 20 |     fn value(&self) -> T;
 21 |     fn offset(&self) -> usize;
 22 |     fn len(&self) -> u32;
 23 | 
 24 |     // This trait exists only to support FieldLight and FieldFull, so rather than
 25 |     // using a more generic Any/downcast approach to get the specific typed value
 26 |     // we instead have these functions
 27 |     #[allow(dead_code)]
 28 |     fn get_field_light(&self) -> Option<&FieldLight<T>>; // this is currently unused in the existing code, but it may be useful in the future
 29 |     fn get_field_full(&self) -> Option<&FieldFull<T>>;
 30 | }
 31 | 
 32 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)]
 33 | pub struct FieldFull<T: Default> {
 34 |     pub value: T,
 35 |     pub offset: usize,
 36 |     pub len: u32,
 37 | }
 38 | 
 39 | impl<T: Default + Clone + 'static> FieldTrait<T> for FieldFull<T> {
 40 |     fn value(&self) -> T {
 41 |         self.value.clone()
 42 |     }
 43 |     fn offset(&self) -> usize {
 44 |         self.offset
 45 |     }
 46 |     fn len(&self) -> u32 {
 47 |         self.len
 48 |     }
 49 |     fn get_field_light(&self) -> Option<&FieldLight<T>> {
 50 |         None
 51 |     }
 52 |     fn get_field_full(&self) -> Option<&FieldFull<T>> {
 53 |         Some(self)
 54 |     }
 55 | }
 56 | 
 57 | impl<T: Default> FieldFull<T> {
 58 |     /// This is the standard constructor; it will set len based upon the size of type `T`
 59 |     pub fn new(value: T, offset: usize) -> Self {
 60 |         Self::new_with_len(value, offset, std::mem::size_of::<T>() as u32)
 61 |     }
 62 | 
 63 |     /// Allows an explicit size to be set (for example, if T is a Vec)
 64 |     pub fn new_with_len(value: T, offset: usize, len: u32) -> Self {
 65 |         Self { value, offset, len }
 66 |     }
 67 | }
 68 | 
 69 | impl<T: Default> Default for FieldFull<T> {
 70 |     fn default() -> Self {
 71 |         Self {
 72 |             value: T::default(),
 73 |             offset: 0,
 74 |             len: 0,
 75 |         }
 76 |     }
 77 | }
 78 | 
 79 | #[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize)]
 80 | pub struct FieldLight<T: Default> {
 81 |     pub value: T,
 82 | }
 83 | 
 84 | impl<T: Default> FieldLight<T> {
 85 |     pub fn new(value: T) -> Self {
 86 |         Self { value }
 87 |     }
 88 | }
 89 | 
 90 | impl<T: Default + Clone + 'static> FieldTrait<T> for FieldLight<T> {
 91 |     fn value(&self) -> T {
 92 |         self.value.clone()
 93 |     }
 94 |     fn offset(&self) -> usize {
 95 |         0
 96 |     }
 97 |     fn len(&self) -> u32 {
 98 |         0
 99 |     }
100 |     fn get_field_light(&self) -> Option<&FieldLight<T>> {
101 |         Some(self)
102 |     }
103 |     fn get_field_full(&self) -> Option<&FieldFull<T>> {
104 |         None
105 |     }
106 | }
107 | 
108 | mod macros {
109 |     #[macro_export]
110 |     macro_rules! make_field_struct {
111 |         ( @$field_type:ident, $name:ident { } -> ($($result:tt)*) ) => (
112 |             #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
113 |             pub struct $name {
114 |                 $($result)*
115 |             }
116 |         );
117 | 
118 |         ( @$field_type:ident, $name:ident { $field:ident : $type:ty, $($tail:tt)* } -> ($($result:tt)*) ) => (
119 |             make_field_struct!(@$field_type, $name { $($tail)* } -> (
120 |                 $($result)*
121 |                 pub $field : $field_type<$type>,
122 |             ));
123 |         );
124 | 
125 |         ( @$field_type:ident, $name:ident { $field:ident : $type:ty ; $attribute_macro:meta, $($tail:tt)* } -> ($($result:tt)*) ) => (
126 |             make_field_struct!(@$field_type, $name { $($tail)* } -> (
127 |                 $($result)*
128 |                 #[$attribute_macro]
129 |                 pub $field : $field_type<$type>,
130 |             ));
131 |         );
132 | 
133 |         /*( @$field_type:ident, $name:ident { $field:ident : $type:ty } -> ($($result:tt)*) ) => (
134 |             make_field_struct!(@$field_type, $name {  } -> (
135 |                 $($result)*
136 |                 pub $field : $field_type<$type>,
137 |             ));
138 |         );*/
139 | 
140 |         ( $field_type:ident, $name:ident { $($tail:tt)* } ) => (
141 |             make_field_struct!(@$field_type, $name { $($tail)* } -> ());
142 |         );
143 |     }
144 | 
145 |     #[macro_export]
146 |     macro_rules! impl_enum {
147 |         ( @$name:ident { } -> ($($result:tt)*) ) => (
148 |             impl $name {
149 |                 $($result)*
150 |             }
151 |         );
152 | 
153 |         ( @$name:ident { $field:ident : $type:ty, $($tail:tt)* } -> ($($result:tt)*) ) => (
154 |             impl_enum!(@$name { $($tail)* } -> (
155 |                 $($result)*
156 |                 pub fn $field(&self) -> $type {
157 |                     match self {
158 |                         Self::Light(detail) => detail.$field.value.clone(),
159 |                         Self::Full(detail) => detail.$field.value.clone(),
160 |                     }
161 |                 }
162 | 
163 |                 paste::item! {
164 |                     // Would love if there was some way to tell if $type was a primitive or not, and therefore whether we should generate
165 |                     // set_field vs. set_field_full.
166 |                     #[allow(dead_code)]
167 |                     #[allow(clippy::ptr_arg)]
168 |                     pub fn [< set_ $field >] (&mut self, val: &$type, offset: usize) {
169 |                         match self {
170 |                             Self::Light(detail) => detail.$field = FieldLight::<$type>::new(val.to_owned()),
171 |                             Self::Full(detail) => detail.$field = FieldFull::<$type>::new(val.to_owned(), offset)
172 |                         }
173 |                     }
174 | 
175 |                     #[allow(dead_code)]
176 |                     #[allow(clippy::ptr_arg)]
177 |                     pub fn [< set_ $field _full >] (&mut self, val: &$type, offset: usize, len: u32) {
178 |                         match self {
179 |                             Self::Light(detail) => detail.$field = FieldLight::<$type>::new(val.to_owned()),
180 |                             Self::Full(detail) => detail.$field = FieldFull::<$type>::new_with_len(val.to_owned(), offset, len)
181 |                         }
182 |                     }
183 |                 }
184 |             ));
185 |         );
186 | 
187 |         ( @$name:ident { $field:ident : $type:ty; $attribute_macro:meta, $($tail:tt)* } -> ($($result:tt)*) ) => (
188 |             impl_enum!(@$name { $field : $type, $($tail)* } -> ($($result)*));
189 |         );
190 | 
191 |         ( $name:ident { $($tail:tt)* } ) => (
192 |             impl_enum!(@$name { $($tail)* } -> ());
193 |         );
194 |     }
195 | 
196 |     /// This macro generates three objects:
197 |     ///     {class_name_prefix}Light: A struct which contains FieldLight objects for each field (value only)
198 |     ///     {class_name_prefix}Full: A struct which contains FieldFull objects for each field (value, offset, and length)
199 |     ///     {class_name_prefix}Enum: An enum with variants for the above two structs
200 |     /// Accessor and setter functions are created on {class_name_prefix}Enum for each field.
201 |     /// A `default()` function is created for {class_name_prefix}Enum which creates a default {class_name_prefix}Light variant
202 |     #[macro_export]
203 |     macro_rules! make_file_offset_structs {
204 |         (
205 |             $class_name_prefix:ident {
206 |             $($tail:tt)*
207 |             }
208 |         ) => {
209 |             paste::item!{
210 |                 make_field_struct! ( FieldLight, [<$class_name_prefix Light>] { $($tail)* } );
211 |                 make_field_struct! ( FieldFull, [<$class_name_prefix Full>] { $($tail)* } );
212 | 
213 |                 #[derive(Clone, Debug, Eq, PartialEq, Serialize)]
214 |                 pub enum [<$class_name_prefix Enum>]  {
215 |                     Light(Box<[<$class_name_prefix Light>]>),
216 |                     Full(Box<[<$class_name_prefix Full>]>),
217 |                 }
218 | 
219 |                 impl Default for [<$class_name_prefix Enum>] {
220 |                     fn default() -> Self {
221 |                         Self::Light(Box::default())
222 |                     }
223 |                 }
224 | 
225 |                 impl_enum! ( [<$class_name_prefix Enum>] { $($tail)* } );
226 |             }
227 |         }
228 |     }
229 | 
230 |     /// This macro generates code which uses nom to read the specified data at the current $input.
231 |     /// If $get_full_field_info is true, it will also determine the offset into the current buffer and
232 |     /// the length of the data and generate the appropriate FieldTrait (FieldFull or FieldLight) object.
233 |     /// Finally it will set the field in $struct_enum.
234 |     /// Note that the value is made available outside of the macro in $var to ensure that the
235 |     /// calling code has access to it for additional processing.
236 |     /// Ex: read_value_offset_length! { input, start_pos_ptr, get_full_field_info, detail_enum, key_node_flag_bits, u16, le_u16 }
237 |     #[macro_export]
238 |     macro_rules! read_value_offset_length {
239 |         (
240 |             $input: ident,
241 |             $start_pos: ident,
242 |             $get_full_field_info: ident,
243 |             $struct_enum: ident,
244 |             $var: ident,
245 |             $var_type: ident,
246 |             $nom_fn: ident
247 |         ) => {
248 |             let $var: $var_type;
249 |             let cur_offset;
250 |             if $get_full_field_info {
251 |                 cur_offset = $input.as_ptr() as usize - $start_pos;
252 |             } else {
253 |                 cur_offset = 0; // cur_offset isn't used if !$get_full_field_info
254 |             }
255 | 
256 |             let ($input, val) = $nom_fn($input)?;
257 |             $var = val;
258 | 
259 |             paste::item! {
260 |                 // ex: detail_enum.set_key_node_flag_bits(&key_node_flag_bits, cur_offset)
261 |                 $struct_enum.[< set_ $var >](&$var, cur_offset);
262 |             }
263 |         };
264 |     }
265 | 
266 |     /// This macro creates $enum_var of type {$class_name_prefix}Enum
267 |     /// and initializes the appropriate variant depending on the value of $get_full_field_info
268 |     #[macro_export]
269 |     macro_rules! init_value_enum {
270 |         (
271 |             $class_name_prefix:ident,
272 |             $enum_var:ident,
273 |             $get_full_field_info:ident
274 |         ) => {
275 |             paste::item! {
276 |                 let mut $enum_var: [<$class_name_prefix Enum>] = if $get_full_field_info {
277 |                     [<$class_name_prefix Enum>]::Full(Box::default())
278 |                 } else {
279 |                     [<$class_name_prefix Enum>]::Light(Box::default())
280 |                 };
281 |             }
282 |         };
283 |     }
284 | }
285 | 


--------------------------------------------------------------------------------