├── .github └── workflows │ └── rust.yml ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── alice-download ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── alice-open-data ├── Cargo.toml ├── README.md └── src │ ├── lib.rs │ └── tests.rs ├── examples ├── convert_to_msgpack │ ├── Cargo.toml │ ├── README.md │ ├── analyze.py │ └── src │ │ └── main.rs └── simple-analysis │ ├── Cargo.toml │ ├── README.md │ ├── result.png │ └── src │ ├── distribution.rs │ └── main.rs ├── histogram ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── malice ├── Cargo.toml ├── README.md ├── benches │ └── rust_vs_root.rs └── src │ ├── event.rs │ ├── lib.rs │ ├── primary_vertex.rs │ ├── track.rs │ └── utils.rs ├── root-io ├── Cargo.toml ├── README.md ├── benches │ └── iter_branch.rs ├── src │ ├── code_gen │ │ ├── mod.rs │ │ ├── rust.rs │ │ └── utils.rs │ ├── core │ │ ├── data_source.rs │ │ ├── file.rs │ │ ├── file_item.rs │ │ ├── mod.rs │ │ ├── parsers.rs │ │ ├── tkey.rs │ │ ├── tstreamer.rs │ │ ├── tstreamerinfo.rs │ │ ├── typeid.rs │ │ └── types.rs │ ├── lib.rs │ ├── test_data │ │ ├── HZZ-lz4.root │ │ ├── HZZ-lzma.root │ │ ├── HZZ-uncompressed.root │ │ ├── HZZ-zlib.root │ │ ├── HZZ.root │ │ ├── README.md │ │ ├── Zmumu-lz4.root │ │ ├── Zmumu-lzma.root │ │ ├── Zmumu-uncompressed.root │ │ ├── Zmumu-zlib.root │ │ ├── Zmumu.root │ │ ├── foriter.root │ │ ├── foriter2.root │ │ ├── mc10events.root │ │ ├── nesteddirs.root │ │ ├── sample-5.23.02-uncompressed.root │ │ ├── sample-5.23.02-zlib.root │ │ ├── sample-5.24.00-uncompressed.root │ │ ├── sample-5.24.00-zlib.root │ │ ├── sample-5.25.02-uncompressed.root │ │ ├── sample-5.25.02-zlib.root │ │ ├── sample-5.26.00-uncompressed.root │ │ ├── sample-5.26.00-zlib.root │ │ ├── sample-5.27.02-uncompressed.root │ │ ├── sample-5.27.02-zlib.root │ │ ├── sample-5.28.00-uncompressed.root │ │ ├── sample-5.28.00-zlib.root │ │ ├── sample-5.29.02-uncompressed.root │ │ ├── sample-5.29.02-zlib.root │ │ ├── sample-5.30.00-lzma.root │ │ ├── sample-5.30.00-uncompressed.root │ │ ├── sample-5.30.00-zlib.root │ │ ├── sample-6.08.04-lzma.root │ │ ├── sample-6.08.04-uncompressed.root │ │ ├── sample-6.08.04-zlib.root │ │ ├── sample-6.10.05-lz4.root │ │ ├── sample-6.10.05-lzma.root │ │ ├── sample-6.10.05-uncompressed.root │ │ ├── sample-6.10.05-zlib.root │ │ ├── simple.root │ │ ├── small-evnt-tree-fullsplit.root │ │ └── small-flat-tree.root │ ├── test_utils.rs │ ├── tests │ │ ├── basic_io.rs │ │ └── mod.rs │ ├── tree_reader │ │ ├── branch.rs │ │ ├── container.rs │ │ ├── leafs.rs │ │ ├── mod.rs │ │ └── tree.rs │ └── utils.rs └── tests │ ├── high_level_io.rs │ ├── read_esd.rs │ └── read_simple.rs └── root-ls ├── Cargo.toml ├── README.md └── src └── main.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: test suite 2 | on: [push] 3 | 4 | jobs: 5 | test: 6 | name: cargo test 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - uses: dtolnay/rust-toolchain@stable 11 | with: 12 | components: clippy, rustfmt 13 | - run: cargo clippy 14 | - run: cargo fmt -- --check 15 | - run: cargo run --bin alice-download --release -- 1 16 | - run: cargo test --all-features 17 | env: 18 | RUST_BACKTRACE: 1 19 | wasm: 20 | name: build wasm 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v3 24 | - uses: dtolnay/rust-toolchain@stable 25 | with: 26 | targets: wasm32-unknown-unknown 27 | - run: cargo build -p malice --target wasm32-unknown-unknown 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | *.lock 4 | */target/* 5 | *tar.gz 6 | *.clang_complete 7 | *.root 8 | 9 | .idea/ 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | os: 3 | - linux 4 | - osx 5 | rust: 6 | - stable 7 | - beta 8 | 9 | before_install: 10 | - rustup component add rustfmt clippy 11 | - rustup target add wasm32-unknown-unknown 12 | 13 | script: 14 | - cargo +$TRAVIS_RUST_VERSION fmt -- --check 15 | - cargo +$TRAVIS_RUST_VERSION clippy 16 | - cargo +$TRAVIS_RUST_VERSION build -p malice --target wasm32-unknown-unknown --verbose 17 | - cargo +$TRAVIS_RUST_VERSION build --verbose --all 18 | # We need to download some files before we can run the tests. 19 | - cd alice-download && cargo run -- 1 && cd .. 20 | - cargo +$TRAVIS_RUST_VERSION test --verbose --all 21 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "alice-download", 4 | "alice-open-data", 5 | "histogram", 6 | "root-io", 7 | "root-ls", 8 | "malice", 9 | "examples/simple-analysis", 10 | "examples/convert_to_msgpack", 11 | ] 12 | 13 | [workspace.dependencies] 14 | alice-open-data = { version="0.5", path="alice-open-data" } 15 | nom = "^7" 16 | reqwest = "0.11" 17 | tokio = { version = "1", features = ["rt-multi-thread", "macros"] } 18 | failure = "0.1" 19 | futures = "0.3" 20 | root-io = { version="0.3.0", path="root-io" } 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: qa 2 | qa: 3 | cargo fmt -- --check 4 | cargo clippy 5 | cargo test --all 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ALICE-rs # 2 | 3 | 4 | **This is not an official ALICE or CERN project** 5 | 6 | 7 | This repository demonstrates how the [public data](http://opendata.cern.ch/collection/ALICE-Reconstructed-Data) released by the CERN based ALICE collaboration can be analyzed with the Rust programming language. 8 | 9 | Here are some example figures produced with this project. The first two show the distribution of produced particles in the longitudinal and azimuthal dimensions. The third figure shows where the collisions (aka events) took place along the direction of the particle beams in the collider. The last figure shows how many charged particles were produced per collision: 10 | 11 | ![result-plot](./examples/simple-analysis/result.png) 12 | 13 | This collection of crates provides and demonstrates: 14 | * A [parser](https://github.com/cbourjau/alice-rs/tree/master/root-io) for reading the [ROOT](https://root.cern.ch/) binary file format. This is the standard format used in particle physics. 15 | * The [mALICE](https://github.com/cbourjau/alice-rs/tree/master/malice) framework providing some essential tools for writing an analysis of the public data. 16 | * A library/binary for downloading a desired amount of the publicly released data. See the [alice-download](https://github.com/cbourjau/alice-rs/tree/master/alice-download) crate. The [alice-open-data](https://github.com/cbourjau/alice-rs/tree/master/alice-open-data) crate provides some conveniences for later locating and reading the downloaded files 17 | * High performance n-dimensional histograms for streaming data. 18 | Maintains a binned count of the data which can be successively filled. See [histogram](https://github.com/cbourjau/alice-rs/tree/master/histogram) 19 | * **A simple analysis demonstrating how it all plays together. See [examples/simple-analysis](https://github.com/cbourjau/alice-rs/tree/master/examples/simple-analysis)** 20 | 21 | ## Dependencies 22 | 23 | There are no dependencies on any ALICE specific software, nor ROOT itself. The software from this repository can be run on Linux, MacOS, and Windows. 24 | 25 | ## CERN, LHC, and ALICE 26 | 27 | ALICE (A Large Ion Collider Experiment) is the dedicated Heavy Ion experiment at the Large Hadron Collider (LHC) in Switzerland. Just like most other CERN based experiments, its goal is to better understand the fundamental particles and forces of the universe. In particular, ALICE is concerned with the so called strong force which is the dominant actor for processes within an atomic nuclei. Many of the properties of this force manifest them self at extreme pressures and temperatures as they were found micro seconds after the Big Bang. By colliding heavy nuclei such as lead ions at nearly the speed of light, we are able to recreate such extreme conditions for a very brief moment within the ALICE detector. By carefully studying the particles produced at such collisions we can deduce the properties of the strong force which will help us to better understand nuclear reactions, neutron stars, the first moments of the universe and much more. 28 | 29 | ## CERN open data 30 | 31 | ALICE, as well as some other CERN based experiments have released a small subset of their recorded data into the public domain. The dataset in question for this project is in total approximately 6TB. While some analyses really need all the data they can get, others (like the example one) can be run on just a ~1GB. With this project it is perfectly feasile to download the complete public dataset to a large external hard drive and run a complex analysis over night on a laptop. 32 | 33 | ## Performance 34 | 35 | The [standard implementation]((https://root.cern.ch/)) for reading ROOT files is written in C++. I wrote a [wrapper](https://github.com/cbourjau/alice-sys) to interface with the standard implementation to do exactly the same kind of work which is being done in the Rust code. I ran both implementations on exactly the same set of input files. The results were identical in both cases. I once ran with a warm cache and once with a cold one. The benchmarks were done with [`hyperfine`](https://github.com/sharkdp/hyperfine). Below are the results: 36 | 37 | - Warm cache 38 | ``` bash 39 | $ hyperfine --warmup=1 './target/release/deps/malice-0650b02bfc3cb85f bench_rust' './target/release/deps/malice-0650b02bfc3cb85f bench_cpp' 40 | Benchmark #1: ./target/release/deps/malice-0650b02bfc3cb85f bench_rust 41 | 42 | Time (mean ± σ): 6.305 s ± 0.187 s [User: 5.392 s, System: 0.909 s] 43 | 44 | Range (min … max): 6.156 s … 6.788 s 45 | 46 | Benchmark #2: ./target/release/deps/malice-0650b02bfc3cb85f bench_cpp 47 | 48 | Time (mean ± σ): 8.234 s ± 0.116 s [User: 6.888 s, System: 1.347 s] 49 | 50 | Range (min … max): 8.098 s … 8.406 s 51 | ``` 52 | 53 | - Cold cache 54 | 55 | ``` bash 56 | $ hyperfine -p 'sync; echo 3 | sudo tee /proc/sys/vm/drop_caches' './target/release/deps/malice-0650b02bfc3cb85f bench_rust' './target/release/deps/malice-0650b02bfc3cb85f bench_cpp' 57 | Benchmark #1: ./target/release/deps/malice-0650b02bfc3cb85f bench_rust 58 | 59 | Time (mean ± σ): 10.894 s ± 0.180 s [User: 6.750 s, System: 1.348 s] 60 | 61 | Range (min … max): 10.619 s … 11.213 s 62 | 63 | Benchmark #2: ./target/release/deps/malice-0650b02bfc3cb85f bench_cpp 64 | 65 | Time (mean ± σ): 18.557 s ± 0.464 s [User: 8.519 s, System: 2.691 s] 66 | 67 | Range (min … max): 17.775 s … 19.156 s 68 | 69 | ``` 70 | 71 | In both cases, the Rust version processed the set of files ~30% (warm cache) and ~70% (cold cache) faster. The cold catch is arguably the more realistic bench mark since one will visit each file in a set only once per analysis. Note that these benchmarks used no concurrency at all. The real value of the Rust version is that it can easily be used multithreaded, while the ROOT-implementation cannot. 72 | 73 | Note also, that the standard ALICE framework, which is build on top of ROOT, was not benchmarked. The ALICE framework always reads in and decompresses all the data of a given file, even though a normal analysis only needs to access less then 10% of it. This makes the standard ALICE framework significantly less performant than this set of crates. 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | ## Why this project? 83 | 84 | I started this project with the intend of learning the Rust programming language. It did not take long until I was plainly fascinated by its ease of use, all the little gems like the debug formatting of bitflags, and the never the less uncompromising speed. 85 | 86 | In the mean time I was able to strip of more and more dependencies to the ALICE software stack (aka. [AliRoot](https://github.com/alisw/AliRoot) and [AliPhysics](https://github.com/alisw/AliPhysics)). Finally I reached the point where I was able to drop all of the ~5M lines of code. 87 | I realized that this project might be of interest to a wider group of people who would like to use the ALICE public data but are understandably lost in the ALICE software stack. Currently, the only way to analyze the published data is through that huge and largely undocumented ALICE framework, which I consider an almost insurmountable entry barrier. Even if somebody does not want to learn Rust, this repository might still provide valuable clues on how to analyze ALICE's data with minimal dependencies. 88 | 89 | Perhaps not surprisingly, removing so much code and indirection from the analysis improved the performance significantly. 90 | 91 | ## Running tests 92 | This projects uses `cargo` as a build tool. Running wasm tests requires a CORS proxy for the time being. If the proxy is set up with a route at `127.0.0.1:3030/opendata` (standard in the not yet released CERN CORS proxy) one may run: 93 | 94 | ``` bash 95 | cd ./alice-open-data/ 96 | wasm-pack test --firefox --headless 97 | ``` 98 | 99 | ## How to cite this project in a publication 100 | 101 | ``` 102 | @Misc{, 103 | author = {Christian Bourjau}, 104 | title = {{mALICE}: An open source framework for analyzing ALICE's Open Data}, 105 | year = {2018--}, 106 | url = "https://github.com/cbourjau/alice-rs/" 107 | } 108 | ``` 109 | 110 | ### Publications 111 | - https://journals.aps.org/prc/abstract/10.1103/PhysRevC.99.054910 112 | -------------------------------------------------------------------------------- /alice-download/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "alice-download" 3 | version = "0.4.0" 4 | authors = ["cbourjau "] 5 | description = "CLI tool to download a given amount of the publicly released ALICE open data" 6 | repository = "https://github.com/cbourjau/alice-rs" 7 | readme = "README.md" 8 | keywords = ["cern", "alice", "lhc", "physics", "data"] 9 | categories = ["science"] 10 | license = "MPL-2.0" 11 | edition = "2021" 12 | 13 | [dependencies] 14 | indicatif = "0.16" 15 | clap = "^2" 16 | alice-open-data = { workspace=true } 17 | failure = { workspace=true } 18 | tokio = { workspace=true } 19 | -------------------------------------------------------------------------------- /alice-download/README.md: -------------------------------------------------------------------------------- 1 | # alice-download 2 | 3 | This is a small tool which is capable of downloading a specified amount of the ALICE open data. The data is stored in `~/lhc_open_data`. 4 | 5 | ## Usage 6 | 7 | Do the following in order to download 5GB of ALICE's data 8 | 9 | ```bash 10 | cargo install alice-download 11 | alice-download 5 12 | ``` 13 | -------------------------------------------------------------------------------- /alice-download/src/main.rs: -------------------------------------------------------------------------------- 1 | use alice_open_data::*; 2 | use clap::{crate_version, value_t, App, Arg}; 3 | use indicatif::{ProgressBar, ProgressStyle}; 4 | 5 | use tokio::runtime::Runtime; 6 | 7 | fn main() { 8 | ::std::process::exit(match do_thing() { 9 | Ok(()) => 0, 10 | Err(err) => { 11 | eprintln!("error: {:?}", err); 12 | 1 13 | } 14 | }); 15 | } 16 | 17 | fn do_thing() -> Result<(), failure::Error> { 18 | let matches = App::new("alice-download") 19 | .version(crate_version!()) 20 | .about( 21 | "Download specified amount of ALICE open Pb--Pb data to ~/lhc_open_data \ 22 | Visit `http://opendata.cern.ch/search?cc=ALICE-Reconstructed-Data` for \ 23 | more information.", 24 | ) 25 | .arg( 26 | Arg::with_name("amount") 27 | .help( 28 | "Download amount specified in GB. Does not re-download existing files. \ 29 | 1GB is enough for simple debugging. Use 50+GB to make meaningful \ 30 | plots.", 31 | ) 32 | .required(true) 33 | .index(1), 34 | ) 35 | .get_matches(); 36 | let max_vol = value_t!(matches.value_of("amount"), u64)?; 37 | // convert from GB to B 38 | let max_vol = max_vol * (1_000_000_000); 39 | let runs = [139_038, 139_173, 139_437, 139_438, 139_465]; 40 | 41 | // size of existing files 42 | let base_dir = data_dir()?; 43 | let mut total: u64 = 0; 44 | for entry in all_files_10h()?.iter() { 45 | let data = entry.metadata()?; 46 | if data.is_file() { 47 | total += data.len(); 48 | } 49 | } 50 | if total >= max_vol { 51 | return Ok(()); 52 | } 53 | let rt = Runtime::new()?; 54 | let urls = runs 55 | .iter() 56 | .map(|r| rt.block_on(get_file_list(*r))) 57 | .collect::, _>>()?; 58 | let pbar = ProgressBar::new(max_vol); 59 | pbar.set_style( 60 | ProgressStyle::default_bar().template("[{elapsed_precise}] {bar:40.cyan/blue} ETA: {eta}"), 61 | ); 62 | pbar.inc(total); 63 | for url in urls.iter().flat_map(|r| r.iter()) { 64 | if total < max_vol { 65 | let size = rt.block_on(download(base_dir.clone(), url.clone()))? as u64; 66 | pbar.inc(size); 67 | total += size; 68 | } else { 69 | break; 70 | } 71 | } 72 | Ok(()) 73 | } 74 | -------------------------------------------------------------------------------- /alice-open-data/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "alice-open-data" 3 | version = "0.5.0" 4 | authors = ["cbourjau "] 5 | description = "Tools to download and manage the publicly released ALICE open data" 6 | repository = "https://github.com/cbourjau/alice-rs" 7 | readme = "README.md" 8 | keywords = ["cern", "alice", "lhc", "physics", "data"] 9 | categories = ["science"] 10 | license = "MPL-2.0" 11 | edition = "2021" 12 | 13 | [dependencies] 14 | glob = "0.3" 15 | failure = { workspace=true } 16 | reqwest = { workspace=true } 17 | 18 | [target.'cfg(not(target_arch = "wasm32"))'.dependencies] 19 | dirs = "1" 20 | 21 | [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] 22 | tokio = { workspace=true } 23 | 24 | [target.'cfg(target_arch = "wasm32")'.dev-dependencies] 25 | futures = { workspace=true } 26 | js-sys = "0.3" 27 | wasm-bindgen = "0.2" 28 | wasm-bindgen-futures = "0.4" 29 | wasm-bindgen-test = "0.3" 30 | web-sys = {"version" = "0.3", "features" = [ 'console' ]} 31 | 32 | -------------------------------------------------------------------------------- /alice-open-data/README.md: -------------------------------------------------------------------------------- 1 | # alice-open-data 2 | 3 | [![Crates.io Version](https://img.shields.io/crates/v/alice-open-data.svg)](https://crates.io/crates/alice-open-data) 4 | 5 | This crate provides some conveniences for other crates that have to access ALICE's data. It takes care of the remote as well as local data 6 | 7 | ## Documentation 8 | https://docs.rs/alice-open-data/ 9 | -------------------------------------------------------------------------------- /alice-open-data/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(not(target_arch = "wasm32"))] 2 | use std::{ 3 | fs::{DirBuilder, File}, 4 | io::Write, 5 | path::PathBuf, 6 | }; 7 | 8 | use failure::{format_err, Error}; 9 | use reqwest::{Client, Url}; 10 | 11 | #[cfg(test)] 12 | mod tests; 13 | 14 | fn root_url() -> Url { 15 | if cfg!(target_arch = "wasm32") { 16 | // Proxy with CORS properly set 17 | "http://127.0.0.1:3030/opendata/".parse().unwrap() 18 | } else { 19 | "http://opendata.web.cern.ch".parse().unwrap() 20 | } 21 | } 22 | 23 | /// Download the given file to the local collection 24 | #[cfg(not(target_arch = "wasm32"))] 25 | pub async fn download(base_dir: PathBuf, url: Url) -> Result { 26 | let mut dest = base_dir; 27 | let mut sub_dir = url.path().to_owned(); 28 | // Remove the leading "\" from the original path 29 | sub_dir.remove(0); 30 | dest.push(sub_dir); 31 | // Do not re-download if the file already exists 32 | if dest.exists() { 33 | return Ok(0); 34 | } 35 | // Make sure the dir exists 36 | if let Some(dir) = dest.parent() { 37 | DirBuilder::new().recursive(true).create(dir)?; 38 | } 39 | // Try downloading with re-tries 40 | let mut n_retries = 3; 41 | let bytes = loop { 42 | let result = try_download(url.clone()).await; 43 | if result.is_ok() || n_retries <= 0 { 44 | break result; 45 | } else { 46 | n_retries -= 1; 47 | } 48 | }?; 49 | let mut f = File::create(dest)?; 50 | Ok(f.write(&bytes)?) 51 | } 52 | 53 | async fn try_download(url: Url) -> Result, Error> { 54 | let resp = Client::new().get(url).send().await?; 55 | Ok(resp 56 | .error_for_status()? 57 | .bytes() 58 | .await? 59 | .into_iter() 60 | .collect()) 61 | } 62 | 63 | /// Base path to the local ALICE open data directory 64 | #[cfg(not(target_arch = "wasm32"))] 65 | pub fn data_dir() -> Result { 66 | let mut dir = dirs::home_dir().ok_or_else(|| format_err!("No home directory"))?; 67 | dir.push("lhc_open_data"); 68 | Ok(dir) 69 | } 70 | 71 | /// Hardcoded path to a specific file. Useful for testing. 72 | /// That file should be the the first to be downloaded automatically. 73 | #[cfg(not(target_arch = "wasm32"))] 74 | pub fn test_file() -> Result { 75 | let mut dir = data_dir()?; 76 | dir.push("eos/opendata/alice/2010/LHC10h/000139038/ESD/0001/AliESDs.root"); 77 | Ok(dir) 78 | } 79 | 80 | /// Path to all files of `LHC10h` 81 | #[cfg(not(target_arch = "wasm32"))] 82 | pub fn all_files_10h() -> Result, Error> { 83 | let mut search_dir = data_dir()?; 84 | search_dir.push("**/AliESDs.root"); 85 | let files: Vec<_> = glob::glob(search_dir.to_str().unwrap()) 86 | .expect("Can't resolve glob") 87 | .map(|path| path.unwrap()) 88 | .collect(); 89 | Ok(files) 90 | } 91 | 92 | pub async fn get_file_list(run: u32) -> Result, Error> { 93 | // Due to CORS we have to change the urls based on the target for now 94 | let uri = root_url().join(match run { 95 | 139_038 => "record/1102/files/ALICE_LHC10h_PbPb_ESD_139038_file_index.txt", 96 | 139_173 => "record/1103/files/ALICE_LHC10h_PbPb_ESD_139173_file_index.txt", 97 | 139_437 => "record/1104/files/ALICE_LHC10h_PbPb_ESD_139437_file_index.txt", 98 | 139_438 => "record/1105/files/ALICE_LHC10h_PbPb_ESD_139438_file_index.txt", 99 | 139_465 => "record/1106/files/ALICE_LHC10h_PbPb_ESD_139465_file_index.txt", 100 | _ => return Err(format_err!("Invalid run number")), 101 | })?; 102 | 103 | let req = Client::new().get(uri); 104 | let resp = req.send().await?; 105 | if resp.status().is_success() { 106 | let content = resp.text().await?; 107 | content 108 | .lines() 109 | .map(|l| root_url().join(&l[26..]).map_err(Into::into)) 110 | .collect() 111 | } else { 112 | Err(format_err!("Could not download list of files")) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /alice-open-data/src/tests.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | //! Integration tests of this library. It seems like 3 | //! wasm-bindgen-tests does not properly pick up async integration 4 | //! tests in a /tests dir. Hence this hack of having this directory 5 | //! inside the /src folder. 6 | 7 | use reqwest::Client; 8 | 9 | use crate::*; 10 | 11 | async fn download_partial() { 12 | use reqwest::header::RANGE; 13 | let client = Client::builder().build().unwrap(); 14 | let url = get_file_list(139_038).await.unwrap()[0].clone(); 15 | let (start, len) = (13993603, 68936); 16 | let rsp = client 17 | .get(url) 18 | .header("User-Agent", "alice-rs") 19 | .header(RANGE, &format!("bytes={}-{}", start, start + len - 1)) 20 | .send() 21 | .await 22 | .unwrap(); 23 | 24 | let partial = rsp.error_for_status().unwrap().bytes().await.unwrap(); 25 | assert_eq!(partial.len(), len); 26 | #[cfg(not(target_arch = "wasm32"))] 27 | { 28 | let from_disc = std::fs::read(test_file().unwrap()).unwrap(); 29 | assert!(partial 30 | .iter() 31 | .skip(start) 32 | .zip(from_disc.iter()) 33 | .all(|(el1, el2)| el1 == el2)); 34 | } 35 | } 36 | 37 | async fn test_get_file_lists() { 38 | let runs = [139_038, 139_173, 139_437, 139_438, 139_465]; 39 | for run in runs.iter() { 40 | println!("Testing run {}", run); 41 | super::get_file_list(*run).await.unwrap(); 42 | } 43 | } 44 | 45 | async fn test_download_file() { 46 | let uris = &super::get_file_list(139038).await.unwrap(); 47 | Client::new().get(uris[0].clone()).send().await.unwrap(); 48 | } 49 | 50 | #[cfg(not(target_arch = "wasm32"))] 51 | #[cfg(test)] 52 | mod tests_x86 { 53 | use std::{env, fs}; 54 | 55 | #[tokio::test] 56 | async fn download_partial() { 57 | super::download_partial().await; 58 | } 59 | 60 | #[tokio::test] 61 | async fn test_get_file_lists() { 62 | super::test_get_file_lists().await; 63 | } 64 | 65 | #[tokio::test] 66 | async fn test_download_file() { 67 | super::test_download_file().await; 68 | } 69 | 70 | #[tokio::test] 71 | async fn test_download_file_high_level() { 72 | let uri = crate::get_file_list(139038).await.unwrap()[0].clone(); 73 | { 74 | // Remobe old stuff: 75 | let mut dir = env::temp_dir(); 76 | dir.push("eos"); 77 | if dir.exists() { 78 | fs::remove_dir_all(dir).unwrap(); 79 | } 80 | } 81 | let base_dir = env::temp_dir(); 82 | // Download if file does not exist 83 | assert_eq!( 84 | super::download(base_dir.clone(), uri.clone()) 85 | .await 86 | .unwrap(), 87 | 14283265 88 | ); 89 | // Don't download twice 90 | assert_eq!( 91 | super::download(base_dir.clone(), uri.clone()) 92 | .await 93 | .unwrap(), 94 | 0 95 | ); 96 | } 97 | } 98 | 99 | #[cfg(target_arch = "wasm32")] 100 | mod test_wasm { 101 | wasm_bindgen_test_configure!(run_in_browser); 102 | 103 | use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure}; 104 | 105 | #[wasm_bindgen_test] 106 | async fn download_partial() { 107 | super::download_partial().await; 108 | } 109 | 110 | #[wasm_bindgen_test] 111 | async fn test_get_file_lists() { 112 | super::test_get_file_lists().await; 113 | } 114 | 115 | #[wasm_bindgen_test] 116 | async fn test_download_file() { 117 | super::test_download_file().await; 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /examples/convert_to_msgpack/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "convert_to_msgpack" 3 | version = "0.1.0" 4 | authors = ["cbourjau "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | alice-open-data = { workspace=true } 9 | malice = { version="0.3.0", path="../../malice" } 10 | rmp-serde = "0.13.7" 11 | root-io = { workspace=true } 12 | serde = "1.0" 13 | serde_derive = "1.0" 14 | -------------------------------------------------------------------------------- /examples/convert_to_msgpack/README.md: -------------------------------------------------------------------------------- 1 | # Convert subset of data to msgpack 2 | 3 | This example demonstrates how to convert a small subset of the Open Data to the [`msgpack`](https://msgpack.org/) format. 4 | This might be useful as a quick and dirty way to analyze some data in Python (e.g. with [`msgpack-python`](https://github.com/msgpack/msgpack-python) or some other language. 5 | The file `./analyze.py` demonstrates how a subsequent analysis in Python may look like. 6 | 7 | To create the binary data file you to need: 8 | 9 | 1. [Install Rust](https://rustup.rs/) 10 | 2. Download some files using [`alice-download`](../../alice-download/README.md) from this repository 11 | 3. Run this example with `cargo run --release` (in this folder) 12 | 13 | -------------------------------------------------------------------------------- /examples/convert_to_msgpack/analyze.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demonstration of how to analyze the json files produced in this exampl 3 | """ 4 | 5 | import msgpack 6 | 7 | if __name__ == "__main__": 8 | with open("./src/events.bin") as f: 9 | unpacker = msgpack.Unpacker(f, raw=False) 10 | for event in unpacker: 11 | print event.keys() 12 | 13 | -------------------------------------------------------------------------------- /examples/convert_to_msgpack/src/main.rs: -------------------------------------------------------------------------------- 1 | //! Simple (and dirty) way to dump a subset of the data from the .root files to json files 2 | use std::fs::File; 3 | use std::io::Write; 4 | 5 | use rmp_serde::Serializer; 6 | use serde::Serialize; 7 | use serde_derive::Serialize; 8 | 9 | use malice::event_iterator_from_files; 10 | use malice::{default_event_filter, default_track_filter, Event}; 11 | 12 | /// Struct holding all the information we want to dump to a new json 13 | /// file. 14 | #[derive(Debug, PartialEq, Serialize)] 15 | struct MiniEvent { 16 | multiplicity: u32, 17 | zvtx: f32, 18 | etas: Vec, 19 | phis: Vec, 20 | } 21 | 22 | impl<'a> From<&'a Event> for MiniEvent { 23 | fn from(event: &Event) -> Self { 24 | // Fill only if we have a valid primary vertex This fails if 25 | // there are events without a primary vertex, but such events 26 | // are not valid anyways! 27 | let prime_vtx = event 28 | .primary_vertex() 29 | .expect("Event has not primary vertex!"); 30 | 31 | let tracks: Vec<_> = event 32 | .tracks() 33 | // Apply a sensible default "cut" on the valid tracks 34 | .filter(|tr| default_track_filter(tr, &prime_vtx)) 35 | .collect(); 36 | let etas: Vec<_> = tracks.iter().map(|tr| tr.eta()).collect(); 37 | let phis: Vec<_> = tracks.iter().map(|tr| tr.phi()).collect(); 38 | 39 | Self { 40 | multiplicity: tracks.len() as u32, 41 | zvtx: prime_vtx.z, 42 | etas, 43 | phis, 44 | } 45 | } 46 | } 47 | 48 | fn main() { 49 | // Iterator over files of the Open Data set 50 | let files: Vec<_> = alice_open_data::all_files_10h() 51 | .expect("No data files found. Did you download with alice-open-data?"); 52 | 53 | // Create an iterator over `malice::event::Event`s 54 | let events = event_iterator_from_files(files.into_iter()); 55 | 56 | // Setup the output file 57 | let mut f = File::create("events.bin").expect("Could not create file!"); 58 | let mut event_counter = 0; 59 | // Iterate through all the _valid_ events; at most 10k 60 | for event in events.filter(default_event_filter).take(10_000) { 61 | let event = MiniEvent::from(&event); 62 | let mut buf = Vec::new(); 63 | event 64 | .serialize(&mut Serializer::new_named(&mut buf)) 65 | .unwrap(); 66 | // Write this event to the output file 67 | f.write_all(&buf).unwrap(); 68 | event_counter += 1; 69 | } 70 | println!("Wrote {} events to events.json", event_counter); 71 | } 72 | -------------------------------------------------------------------------------- /examples/simple-analysis/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "simple-analysis" 3 | version = "0.1.0" 4 | authors = ["cbourjau "] 5 | workspace = "../../" 6 | edition = "2021" 7 | 8 | [dependencies] 9 | alice-open-data = { version="0.5", path="../../alice-open-data" } 10 | failure = "0.1.1" 11 | gnuplot = "^0.0.22" 12 | histogram = {path = "../../histogram" } 13 | malice = { version = "0.3.0", path = "../../malice" } 14 | -------------------------------------------------------------------------------- /examples/simple-analysis/README.md: -------------------------------------------------------------------------------- 1 | # simple-analysis 2 | This crate demonstrates how all the other parts of this repository work together. 3 | 4 | ## How to run it 5 | First, download a few files using [`alice-download`](https://github.com/cbourjau/alice-rs/tree/master/alice-download). 6 | Then compile and run the analysis in **release** mode from the `simple-analysis` folder 7 | ```shell 8 | alice-download 5 # Downloads 5GB to ~/lhc_open_data 9 | cargo run --release 10 | ``` 11 | Note that this analysis attempts to produce some figures using Gnuplot at the end. So make sure you have gnuplot installed as well. 12 | 13 | # What is happening? 14 | 15 | The `main.rs` shows how the IO part is spawned in its own thread. That thread sends `malice::Event`as messages. The reciever is converted into an iterator over `Event`s. 16 | The analysis itself is implemented as a `fold` over this iterator. 17 | Note that this set up can easily be adapted to have `M` IO threads and `N` consuming analysis threads. 18 | The `crossbeam_channel` crate is a good fit for such a `mpmc` approach. 19 | 20 | The analysis itself should probably only consume events fitting some selection criteria. 21 | A reasonable event selection is provided by `malice::default_events_filter`. 22 | 23 | Within the analysis, one probably wants to filter the reconstructed tracks as well. Again, `malice` provides a reasonable default `malice::default_track_filter`. 24 | 25 | This example analysis also visualizes the results using the `gnuplot-rs` crate. 26 | The below figures are the result of this analysis. 27 | The top two figures show the distribution of particles in the longitudinal (`eta`) and azimuthal (`phi`) dimension. 28 | The bottom figure shows the distribution of where exactly the collisions took place within the detector. Namely, The collisions may be slightly displaced from the center of the detector along the beam axis. 29 | 30 | ![result-plot](./result.png) 31 | -------------------------------------------------------------------------------- /examples/simple-analysis/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/examples/simple-analysis/result.png -------------------------------------------------------------------------------- /examples/simple-analysis/src/distribution.rs: -------------------------------------------------------------------------------- 1 | //! Measure a bunch of simple distribtions. 2 | //! - Single particle distribution in eta and phi 3 | //! - Distribution of events' primary vertices along the nominal 4 | //! interaction point along beam axis 5 | use std::f64::consts::PI; 6 | 7 | use failure::Error; 8 | use gnuplot::{AutoOption, AxesCommon, Figure, PlotOption, Tick}; 9 | 10 | use histogram::*; 11 | use malice::default_track_filter; 12 | use malice::Event; 13 | 14 | pub struct SimpleAnalysis { 15 | pub single_particles: Histogram, 16 | pub z_vertex: Histogram, 17 | pub multiplicity: Histogram, 18 | } 19 | 20 | impl SimpleAnalysis { 21 | pub fn new() -> SimpleAnalysis { 22 | // eta, phi, z 23 | let nphi = 120 / 2; 24 | let neta = 120 / 2; 25 | let nmult = 3000; 26 | let (nzvtx, zmin, zmax) = (100, -10., 10.); 27 | SimpleAnalysis { 28 | single_particles: HistogramBuilder::new() 29 | .add_equal_width_axis(neta, -0.8, 0.8) 30 | .add_equal_width_axis(nphi, 0., 2. * PI) 31 | .add_equal_width_axis(nzvtx, zmin, zmax) 32 | .build() 33 | .expect("Error building histogram"), 34 | z_vertex: HistogramBuilder::new() 35 | .add_equal_width_axis(nzvtx, zmin, zmax) 36 | .build() 37 | .expect("Error building histogram"), 38 | multiplicity: HistogramBuilder::new() 39 | .add_equal_width_axis(nmult, 0.0, nmult as f64) 40 | .build() 41 | .expect("Error building histogram"), 42 | } 43 | } 44 | } 45 | 46 | impl SimpleAnalysis { 47 | pub fn process_event(&mut self, event: &Event) { 48 | // Fill only if we have a valid primary vertex 49 | if let Some(prime_vtx) = event.primary_vertex() { 50 | for track in event 51 | .tracks() 52 | .filter(|tr| default_track_filter(tr, &prime_vtx)) 53 | { 54 | self.single_particles.fill(&[ 55 | f64::from(track.eta()), 56 | f64::from(track.phi()), 57 | f64::from(prime_vtx.z), 58 | ]) 59 | } 60 | 61 | self.z_vertex.fill(&[f64::from(prime_vtx.z)]); 62 | self.multiplicity.fill(&[event 63 | .tracks() 64 | .filter(|tr| default_track_filter(tr, &prime_vtx)) 65 | .count() as f64]); 66 | }; 67 | } 68 | 69 | /// Example of how one may write the results to disc 70 | pub fn write_to_disc(&self) -> Result<(), Error> { 71 | self.single_particles.dump_to_file("hybrid")?; 72 | self.z_vertex.dump_to_file("z_pos")?; 73 | Ok(()) 74 | } 75 | } 76 | 77 | impl SimpleAnalysis { 78 | /// Visualized the data using gnuplot-rs 79 | pub fn visualize(&self) { 80 | let mut fg = Figure::new(); 81 | let eta_bin_width: f64 = self.single_particles.widths(0)[0]; 82 | let plot_options = [PlotOption::Color("#d95f02"), PlotOption::FillAlpha(0.8)]; 83 | fg.axes2d() 84 | .set_pos_grid(2, 2, 0) 85 | .set_title("η track distribution", &[]) 86 | .set_x_label("η", &[]) 87 | .set_y_label("⟨dN_{ch} / dη ⟩_{event}", &[]) 88 | .boxes( 89 | &self.single_particles.centers(0), 90 | // Sum over phi and z 91 | self.single_particles 92 | .sum_axis(1) 93 | .sum_axis(1) 94 | .mul(1.0 / self.z_vertex.values().iter().sum::() / eta_bin_width) 95 | .values() 96 | .iter(), 97 | &plot_options, 98 | ); 99 | 100 | let phi_bin_width = self.single_particles.widths(1)[0]; 101 | let x_ticks = vec![ 102 | Tick::Major(0.0, AutoOption::Fix("0".to_owned())), 103 | Tick::Major(0.5 * PI, AutoOption::Fix("0.5 π".to_owned())), 104 | Tick::Major(PI, AutoOption::Fix("π".to_owned())), 105 | Tick::Major(1.5 * PI, AutoOption::Fix("1.5π".to_owned())), 106 | Tick::Major(2.0 * PI, AutoOption::Fix("2π".to_owned())), 107 | ]; 108 | 109 | fg.axes2d() 110 | .set_pos_grid(2, 2, 1) 111 | .set_title("φ track distribution", &[]) 112 | .set_x_label("φ [rad]", &[]) 113 | .set_y_label("⟨dN_{ch} / dφ ⟩_{event}", &[]) 114 | .set_x_range(AutoOption::Fix(0.0), AutoOption::Fix(2.0 * PI)) 115 | .set_x_ticks_custom(x_ticks, &[], &[]) 116 | .boxes( 117 | &self.single_particles.centers(1), 118 | // Sum over eta and z 119 | self.single_particles 120 | .sum_axis(2) 121 | .sum_axis(0) 122 | .mul(1.0 / self.z_vertex.values().iter().sum::() / phi_bin_width) 123 | .values() 124 | .iter(), 125 | &plot_options, 126 | ); 127 | 128 | fg.axes2d() 129 | .set_pos_grid(2, 2, 2) 130 | .set_title("Primary vertex position", &[]) 131 | .set_x_label("z [cm]", &[]) 132 | .set_y_label("# events", &[]) 133 | .boxes( 134 | &self.z_vertex.centers(0), 135 | self.z_vertex.values().iter(), 136 | &plot_options, 137 | ); 138 | 139 | fg.axes2d() 140 | .set_pos_grid(2, 2, 3) 141 | .set_title("N_{ch} distribution", &[]) 142 | .set_x_label("N_{ch}", &[]) 143 | .set_y_label("# events", &[]) 144 | // .set_x_log(Some(10.0)) 145 | .set_y_log(Some(10.0)) 146 | .boxes( 147 | &self.multiplicity.centers(0), 148 | self.multiplicity.values().iter(), 149 | &plot_options, 150 | ); 151 | fg.show(); 152 | } 153 | 154 | /// Compute the centrality edges based on the N_ch/Event distribution 155 | pub fn compute_centrality_edges(&self) { 156 | let tot: f64 = self.multiplicity.values().iter().sum(); 157 | let cum: Vec<_> = self 158 | .multiplicity 159 | .values() 160 | .iter() 161 | .scan(0.0, |state, el| { 162 | *state += el; 163 | Some(*state) 164 | }) 165 | // convert to %; 100% is first bin 166 | .map(|v| (1.0 - v / tot) * 100.0) 167 | // Bin width is one track, so we just enumerate to have the number of tracks 168 | .enumerate() 169 | .collect::>(); 170 | let percent_edges = (1..=10) 171 | .rev() 172 | .filter_map(|v| { 173 | let want_this_percent = (v * 10) as f64; 174 | cum.iter() 175 | .find(|bin_percent| bin_percent.1 <= want_this_percent) 176 | }) 177 | .collect::>(); 178 | println!("Number of valid tracks | less than %"); 179 | for cent_edge in percent_edges { 180 | println!("{:4} | {:3}%", cent_edge.0, cent_edge.1); 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /examples/simple-analysis/src/main.rs: -------------------------------------------------------------------------------- 1 | use malice::default_event_filter; 2 | use malice::event_iterator_from_files; 3 | 4 | mod distribution; 5 | use distribution::SimpleAnalysis; 6 | 7 | fn main() { 8 | let files = alice_open_data::all_files_10h() 9 | .expect("No data files found. Did you download with alice-open-data?"); 10 | 11 | // Create an iterator over all the events in all the given files 12 | let events = event_iterator_from_files(files.into_iter()); 13 | 14 | let mut analysis = SimpleAnalysis::new(); 15 | for event in events.filter(default_event_filter) { 16 | analysis.process_event(&event); 17 | } 18 | // Optionally write results to disc 19 | analysis.write_to_disc().unwrap(); 20 | // Optionally compute the centrality bin edges and print them in the terminal 21 | analysis.compute_centrality_edges(); 22 | // Visualize the results of this analysis using gnuplot 23 | analysis.visualize(); 24 | } 25 | -------------------------------------------------------------------------------- /histogram/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "histogram" 3 | version = "0.1.0" 4 | authors = ["cbourjau "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | bincode = "1" 9 | failure = "0.1.1" 10 | ndarray = {version="0.15", features = ["serde-1"]} 11 | wasm-bindgen = "0.2" 12 | -------------------------------------------------------------------------------- /histogram/README.md: -------------------------------------------------------------------------------- 1 | # Histogram 2 | 3 | This crate provides a very minimalistic histogram. The idea is to have a persistent data structure which is continiously filled with a stream of data. It is a standard tool in statistics and thus in particle physics where most phenomena are described through probability distributions. 4 | 5 | Currently, this crate is really not particularly sophisticated. I just needed something to do the counting. It would be nice if the histograms of this crate could be more closely related to their underlying `ndarray`s. Such that one can directly perform calculations on the histogram instead of the underlying `ndarray`. This should include operation such as adding two histograms or summing all bins along an axis reducing the dimensionality of the resulting histogram accordingly. 6 | -------------------------------------------------------------------------------- /histogram/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | use std::fs::File; 3 | use std::io::prelude::*; 4 | 5 | use bincode::serialize; 6 | use failure::Error; 7 | use ndarray as nd; 8 | use wasm_bindgen::prelude::*; 9 | 10 | // Re-export some ndarray things 11 | pub use nd::Axis; 12 | pub use nd::Dimension; 13 | pub use nd::IxDyn; 14 | 15 | #[wasm_bindgen] 16 | #[derive(Clone, Debug)] 17 | pub struct Histogram { 18 | edges: Vec>, 19 | bins: nd::Array, 20 | } 21 | 22 | #[wasm_bindgen] 23 | impl Histogram { 24 | /// Find the bin index containing `value` on `axis` 25 | /// Return None if the the value is not in range 26 | fn find_bin_index_axis(&self, axis: usize, value: f64) -> Option { 27 | let (edges1d, value) = (&self.edges[axis], value); 28 | edges1d.binary_search_by(|bin| bin.cmp_with(value)).ok() 29 | } 30 | 31 | /// Find indices of bins along each axis 32 | fn find_bin_indices(&self, values: &[f64]) -> Option> { 33 | (0..values.len()) 34 | .map(|dim| self.find_bin_index_axis(dim, values[dim])) 35 | .collect() 36 | } 37 | 38 | pub fn fill(&mut self, values: &[f64]) { 39 | if let Some(bin) = self.bin_mut(values) { 40 | *bin += 1.0; 41 | } 42 | } 43 | 44 | /// The center position of each bin along axis 45 | pub fn centers(&self, axis: usize) -> Vec { 46 | self.edges[axis].iter().map(|bin| bin.center()).collect() 47 | } 48 | 49 | /// The width of each bin along `axis` 50 | pub fn widths(&self, axis: usize) -> Vec { 51 | self.edges[axis].iter().map(|bin| bin.width()).collect() 52 | } 53 | 54 | pub fn values(&self) -> Box<[f64]> { 55 | self.bins.clone().into_raw_vec().into_boxed_slice() 56 | } 57 | 58 | /// Sum all bins along `axis` returning a new histogram with 59 | /// reduced dimensionality. 60 | /// 61 | /// Panics if `axis` is out of bounds. 62 | pub fn sum_axis(&self, axis: u32) -> Histogram { 63 | let axis = axis as usize; 64 | let bins = self.bins.sum_axis(Axis(axis)); 65 | let edges = self 66 | .edges 67 | .iter() 68 | .enumerate() 69 | .filter_map(|(n, ax_edges)| { 70 | if n == axis { 71 | None 72 | } else { 73 | Some(ax_edges.clone()) 74 | } 75 | }) 76 | .collect(); 77 | Histogram { bins, edges } 78 | } 79 | 80 | /// Multiply the values inside this this histogram by a scalar 81 | /// value. 82 | #[allow(clippy::should_implement_trait)] 83 | pub fn mul(self, factor: f64) -> Histogram { 84 | Histogram { 85 | bins: self.bins * factor, 86 | ..self 87 | } 88 | } 89 | } 90 | 91 | // The following impl block is not wasm compatible 92 | impl Histogram { 93 | /// Get a mutable reference to the bin including `values`. Panics 94 | /// if `values` dimensionality is incompatible with that of the 95 | /// histogram. 96 | pub fn bin_mut(&mut self, values: &[f64]) -> Option<&mut f64> { 97 | if values.len() != self.edges.len() { 98 | panic!("Expected values slice of len {}", self.edges.len()); 99 | } 100 | self.find_bin_indices(values) 101 | .and_then(move |idx| self.bins.get_mut(idx.as_slice())) 102 | } 103 | 104 | /// Dump histogram (without edges) to a file of `name`. 105 | /// The binary layout is: 106 | /// `(array_version: u8, ndim: u64, shape: [ndim; u64], a_size: u64, a: [a_size; A])` 107 | pub fn dump_to_file(&self, name: &str) -> Result<(), Error> { 108 | let buf = serialize(&self.bins)?; 109 | let mut f = File::create(name)?; 110 | f.write_all(buf.as_slice())?; 111 | Ok(()) 112 | } 113 | } 114 | 115 | #[wasm_bindgen] 116 | #[derive(Default)] 117 | pub struct HistogramBuilder { 118 | edges: Vec>, 119 | } 120 | 121 | #[wasm_bindgen] 122 | impl HistogramBuilder { 123 | #[wasm_bindgen(constructor)] 124 | pub fn new() -> HistogramBuilder { 125 | HistogramBuilder { edges: Vec::new() } 126 | } 127 | 128 | /// Create a new n-dimensional histogram 129 | pub fn build(&self) -> Option { 130 | let edges: Vec> = self 131 | .edges 132 | .iter() 133 | .map(|edges1d| edges_to_bins(edges1d)) 134 | .collect(); 135 | if edges.is_empty() { 136 | return None; 137 | } 138 | let shape: Vec<_> = edges.iter().map(|edges| edges.len()).collect(); 139 | 140 | let bins = nd::ArrayD::zeros(IxDyn(shape.as_ref())); 141 | Some(Histogram { bins, edges }) 142 | } 143 | 144 | pub fn add_equal_width_axis(mut self, nbins: usize, min: f64, max: f64) -> HistogramBuilder { 145 | let width = (max - min) / nbins as f64; 146 | self.edges.push( 147 | (0..=nbins) 148 | .map(|i| min + width * i as f64) 149 | .collect::>(), 150 | ); 151 | self 152 | } 153 | 154 | pub fn add_variable_width_axis(mut self, edges1d: &[f64]) -> HistogramBuilder { 155 | self.edges.push(edges1d.to_vec()); 156 | self 157 | } 158 | } 159 | 160 | #[derive(Clone, Debug)] 161 | struct BinEdges { 162 | lower: f64, 163 | upper: f64, 164 | } 165 | 166 | impl BinEdges { 167 | pub fn width(&self) -> f64 { 168 | self.upper - self.lower 169 | } 170 | pub fn center(&self) -> f64 { 171 | self.lower + 0.5 * self.width() 172 | } 173 | /// Compute if a given `value` is below, within or above the given binary 174 | /// A bins interval is half open on [low, high) 175 | pub fn cmp_with(&self, value: f64) -> Ordering { 176 | if value < self.lower { 177 | Ordering::Greater 178 | } else if value < self.upper { 179 | Ordering::Equal 180 | } else { 181 | Ordering::Less 182 | } 183 | } 184 | } 185 | 186 | /// Turn a vector of edges to a vector of `BinEdges` 187 | fn edges_to_bins(edges1d: &[f64]) -> Vec { 188 | edges1d 189 | .windows(2) 190 | .map(|window| BinEdges { 191 | lower: window[0], 192 | upper: window[1], 193 | }) 194 | .collect() 195 | } 196 | 197 | #[cfg(test)] 198 | mod tests { 199 | use super::*; 200 | 201 | #[test] 202 | fn calc_indices() { 203 | let h = HistogramBuilder::new() 204 | .add_equal_width_axis(1, 0., 1.) 205 | .add_equal_width_axis(1, 0., 1.) 206 | .build() 207 | .unwrap(); 208 | assert_eq!(h.find_bin_indices(&[-1.0, -1.0]), None, "Wrong indices"); 209 | assert_eq!(h.find_bin_indices(&[2.0, 2.0]), None, "Wrong indices"); 210 | assert_eq!( 211 | h.find_bin_indices(&[0.5, 0.5]), 212 | Some(vec![0, 0]), 213 | "Wrong indices" 214 | ); 215 | } 216 | 217 | #[test] 218 | fn init_histogram() { 219 | let h = HistogramBuilder::new() 220 | .add_equal_width_axis(1, 0., 1.) 221 | .add_equal_width_axis(1, 0., 1.) 222 | .build() 223 | .unwrap(); 224 | assert_eq!(h.edges[0].len(), 1); 225 | assert_eq!(h.bins, nd::arr2(&[[0.]]).into_dyn()); 226 | 227 | let h = HistogramBuilder::new() 228 | .add_equal_width_axis(1, 0., 1.) 229 | .add_equal_width_axis(1, 0., 1.) 230 | .add_equal_width_axis(1, 0., 1.) 231 | .build() 232 | .unwrap(); 233 | 234 | assert_eq!(h.bins, nd::arr3(&[[[0.]]]).into_dyn()); 235 | } 236 | 237 | #[test] 238 | fn faulty_init() { 239 | // No axis 240 | let opt = HistogramBuilder::new().build(); 241 | assert!(opt.is_none()); 242 | } 243 | 244 | #[test] 245 | fn filling() { 246 | let mut h = HistogramBuilder::new() 247 | .add_equal_width_axis(2, 0., 2.) 248 | .add_equal_width_axis(2, 0., 2.) 249 | .build() 250 | .unwrap(); 251 | // underflow both bins 252 | h.fill(&[-5., -5.]); 253 | assert_eq!(h.bins, nd::arr2(&[[0., 0.], [0., 0.]]).into_dyn()); 254 | // underflow one bin 255 | h.fill(&[-5., 1.]); 256 | assert_eq!(h.bins, nd::arr2(&[[0., 0.], [0., 0.]]).into_dyn()); 257 | // underflow / overflow 258 | h.fill(&[-5., 5.]); 259 | assert_eq!(h.bins, nd::arr2(&[[0., 0.], [0., 0.]]).into_dyn()); 260 | // both fit 261 | h.fill(&[0.5, 0.5]); 262 | assert_eq!(h.bins, nd::arr2(&[[1., 0.], [0., 0.]]).into_dyn()); 263 | } 264 | 265 | #[test] 266 | fn edges_and_centers() { 267 | let h = HistogramBuilder::new() 268 | .add_equal_width_axis(2, -1., 1.) 269 | .add_equal_width_axis(2, -1., 1.) 270 | .build() 271 | .unwrap(); 272 | assert_eq!(h.edges[0][0].lower, -1.0); 273 | assert_eq!(h.edges[0][0].upper, 0.0); 274 | assert_eq!(h.edges[0][1].lower, 0.0); 275 | assert_eq!(h.edges[0][1].upper, 1.0); 276 | 277 | assert_eq!(h.centers(0), &[-0.5, 0.5]); 278 | } 279 | 280 | #[test] 281 | fn bin_edges() { 282 | let be = BinEdges { 283 | lower: 0.0, 284 | upper: 1.0, 285 | }; 286 | // Read as "Bin is greater than value"! 287 | assert_eq!(be.cmp_with(2.0), Ordering::Less); 288 | assert_eq!(be.cmp_with(0.5), Ordering::Equal); 289 | assert_eq!(be.cmp_with(-1.0), Ordering::Greater); 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /malice/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "malice" 3 | version = "0.3.0" 4 | authors = ["cbourjau "] 5 | exclude = ["benches/*"] 6 | description = "A small framwork providing sensible defaults to analyse ALICE's open data" 7 | repository = "https://github.com/cbourjau/alice-rs/malice" 8 | readme = "README.md" 9 | keywords = ["root", "cern", "alice", "lhc", "physics"] 10 | categories = ["science", "data-structures"] 11 | license = "MPL-2.0" 12 | edition = "2021" 13 | 14 | [dependencies] 15 | bitflags = "1" 16 | failure = { workspace=true } 17 | futures = { workspace=true } 18 | nom = { workspace=true } 19 | root-io = { workspace=true } 20 | itertools = "0.10" 21 | wasm-bindgen = "0.2" 22 | 23 | [target.'cfg(not(target_arch = "wasm32"))'.dependencies] 24 | # Tokio is used in the entry helper function to iterate over files 25 | tokio = { workspace=true } 26 | 27 | [dev-dependencies] 28 | alice-open-data = { workspace=true } 29 | criterion = "0.3" 30 | 31 | [[bench]] 32 | name = "rust_vs_root" 33 | path = "benches/rust_vs_root.rs" 34 | harness = false 35 | -------------------------------------------------------------------------------- /malice/README.md: -------------------------------------------------------------------------------- 1 | # malice 2 | [![Crates.io Version](https://img.shields.io/crates/v/malice.svg)](https://crates.io/crates/malice) 3 | https://docs.rs/malice/ 4 | 5 | "milli ALICE" aka `malice` is a tiny framework defining some sensible defaults to analyze the ALICE open data. 6 | 7 | # Example 8 | 9 | Here is a very simple example analysis using `malice` and other crates from this repository. 10 | It measures the pseudorapidity distribution of the reconstructed tracks. 11 | For a more comprehensive, but still small, example (including concurrency) check out [simple-analysis](https://github.com/cbourjau/alice-rs/tree/master/examples/simple-analysis). 12 | 13 | ``` rust 14 | extern crate alice_open_data; 15 | extern crate histogram; 16 | extern crate malice; 17 | extern crate root_io; 18 | 19 | use histogram::*; 20 | use root_io::RootFile; 21 | 22 | use malice::{Event, DatasetIntoIter as DsIntoIter}; 23 | use malice::{default_track_filter, default_event_filter}; 24 | 25 | fn main() { 26 | // Iterator over files of the Open Data set 27 | let files: Vec<_> = alice_open_data::all_files_10h() 28 | .expect("No data files found. Did you download with alice-open-data?") 29 | .into_iter() 30 | .collect(); 31 | 32 | // Create an iterator over `malice::event::Event`s 33 | let events = files 34 | .iter() 35 | .map(|path| RootFile::new_from_file(&path).expect("Failed to open file")) 36 | .map(|rf| rf.items()[0].as_tree().unwrap()) 37 | .flat_map(|tree| match DsIntoIter::new(&tree) { 38 | Ok(s) => s, 39 | Err(err) => panic!("An error occured! Message: {}", err), 40 | }); 41 | 42 | // Fold the `malice::event::Events` with the analysis 43 | let _analysis_result: SimpleAnalysis = events 44 | // Apply a sensible default event filter 45 | .filter(default_event_filter) 46 | .fold(SimpleAnalysis::new(), |analysis, ev| { analysis.process_event(&ev) }); 47 | // Do something with the result... 48 | } 49 | 50 | pub struct SimpleAnalysis { 51 | // Histogram of the pseudorapidity (eta) distribution of valid tracks 52 | pub eta_distribution: Histogram, 53 | } 54 | 55 | impl SimpleAnalysis { 56 | fn new() -> SimpleAnalysis { 57 | // 50 bins from -0.9 to 0.9 58 | let (neta, eta_min, eta_max) = (50, -0.9, 0.9); 59 | SimpleAnalysis { 60 | eta_distribution: HistogramBuilder::<[usize; 1]>::new() 61 | .add_equal_width_axis(neta, eta_min, eta_max) 62 | .build() 63 | .expect("Error building histogram"), 64 | } 65 | } 66 | 67 | // Update the histogram with the given event 68 | fn process_event(mut self, event: &Event) -> Self 69 | { 70 | // Fill only if we have a valid primary vertex 71 | if let Some(prime_vtx) = event.primary_vertex() { 72 | self.eta_distribution 73 | .extend( 74 | event.tracks() 75 | // Apply a sensible default "cut" on the valid tracks 76 | .filter(|tr| default_track_filter(&tr, &prime_vtx)) 77 | .map(|tr| [tr.eta() as f64])); 78 | }; 79 | self 80 | } 81 | } 82 | ``` 83 | -------------------------------------------------------------------------------- /malice/benches/rust_vs_root.rs: -------------------------------------------------------------------------------- 1 | //! Benchmarks which were used to compare an earlier version with the 2 | //! official c++ ROOT version. Note that these benchmarks are not 3 | //! ported to the async code, yet! 4 | 5 | #[macro_use] 6 | extern crate criterion; 7 | extern crate malice; 8 | extern crate root_io; 9 | 10 | use criterion::{Bencher, Criterion, Fun}; 11 | use futures::stream::{self, StreamExt}; 12 | 13 | extern crate alice_open_data; 14 | use malice::event_stream_from_esd_file; 15 | 16 | async fn read_rust(n_files: usize) { 17 | let files = alice_open_data::all_files_10h().unwrap().into_iter(); 18 | let _max_chi2 = stream::iter(files) 19 | .take(n_files) 20 | .then(event_stream_from_esd_file) 21 | .map(|res_event_stream| res_event_stream.map(|evt| evt.unwrap())) 22 | .flatten() 23 | .map(|event| { 24 | event 25 | .tracks() 26 | .map(|tr| tr.its_chi2) 27 | .fold(0.0, |max, chi2| if chi2 > max { chi2 } else { max }) 28 | }) 29 | .fold(0.0, |max, chi2| async move { 30 | if chi2 > max { 31 | chi2 32 | } else { 33 | max 34 | } 35 | }); 36 | } 37 | 38 | #[cfg(feature = "cpp")] 39 | fn read_cpp(n_files: usize) { 40 | use malice::dataset_cpp::DatasetIntoIter; 41 | let _max_chi2 = alice_open_data::all_files_10h() 42 | .unwrap() 43 | .into_iter() 44 | .take(n_files) 45 | .flat_map(|path| match DatasetIntoIter::new(&path) { 46 | Ok(s) => s, 47 | Err(err) => panic!("An error occured! Message: {}", err), 48 | }) 49 | .flat_map(|event| event.tracks().map(|tr| tr.itschi2).collect::>()) 50 | .fold(0.0, |max, chi2| if chi2 > max { chi2 } else { max }); 51 | } 52 | 53 | fn bench_rust(b: &mut Bencher, n_files: &usize) { 54 | b.iter(|| read_rust(*n_files)); 55 | } 56 | #[cfg(feature = "cpp")] 57 | fn bench_cpp(b: &mut Bencher, n_files: &usize) { 58 | b.iter(|| read_cpp(*n_files)); 59 | } 60 | 61 | fn criterion_benchmark(c: &mut Criterion) { 62 | let funs = vec![ 63 | Fun::new("Rust", bench_rust), 64 | #[cfg(feature = "cpp")] 65 | Fun::new("cpp", bench_cpp), 66 | ]; 67 | let n_files = 1usize; 68 | c.bench_functions("Rust", funs, n_files); 69 | } 70 | 71 | criterion_group! { 72 | name = benches; 73 | config = Criterion::default() 74 | .sample_size(5) 75 | .warm_up_time(::std::time::Duration::from_secs(10)) 76 | .measurement_time(::std::time::Duration::from_secs(200)) 77 | .with_plots(); 78 | targets = criterion_benchmark 79 | } 80 | 81 | criterion_main!(benches); 82 | -------------------------------------------------------------------------------- /malice/src/event.rs: -------------------------------------------------------------------------------- 1 | //! Structs and `bitflags` related to a given event 2 | use std::fmt::Debug; 3 | 4 | use failure::Error; 5 | use futures::prelude::*; 6 | use itertools::izip; 7 | use nom::{combinator::map, number::complete::*, sequence::tuple, IResult}; 8 | use wasm_bindgen::prelude::*; 9 | 10 | use root_io::core::parsers::{parse_custom_mantissa, parse_tobjarray_of_tnameds}; 11 | use root_io::stream_zip; 12 | use root_io::tree_reader::Tree; 13 | 14 | use crate::primary_vertex::PrimaryVertex; 15 | use crate::track::{Flags, ItsClusters, PidProbabilities, Track, TrackParameters}; 16 | 17 | bitflags! { 18 | /// Triggers are low level qualifier of an event. One event may "fire" several triggers. 19 | pub struct TriggerMask: u64 { 20 | /// Exact definition may vary from run-to-run. Should be used as the default trigger 21 | const MINIMUM_BIAS = 0b0000_0001; 22 | /// Exact definition vary from run-to-run. Marks an event with very high activity 23 | const HIGH_MULT = 0b0000_0010; 24 | } 25 | } 26 | 27 | /// A model for a subset of an event as stored in the published data 28 | #[wasm_bindgen] 29 | #[derive(Debug, PartialEq)] 30 | pub struct Event { 31 | primaryvertex_alivertex_fposition: (f32, f32, f32), 32 | primaryvertex_alivertex_fncontributors: i32, 33 | aliesdrun_frunnumber: i32, 34 | aliesdrun_ftriggerclasses: Vec, 35 | aliesdheader_ftriggermask: u64, 36 | tracks_fx: Vec, 37 | tracks_fp: Vec, 38 | tracks_falpha: Vec, 39 | tracks_fflags: Vec, 40 | tracks_fitschi2: Vec, 41 | tracks_fitsncls: Vec, 42 | tracks_fitsclustermap: Vec, 43 | tracks_ftpcchi2: Vec, 44 | tracks_ftpcncls: Vec, 45 | /// Combined detector response (particle identification; aka PID) probability 46 | tracks_fr: Vec, 47 | } 48 | 49 | #[wasm_bindgen] 50 | impl Event { 51 | /// Return the number of reconstructed tracks. Not very 52 | /// sophisticated, and probably not what what you want! Should 53 | /// rather be the number of **valid** tracks. FIXME. 54 | pub fn multiplicity(&self) -> f32 { 55 | self.tracks_fx.len() as f32 56 | } 57 | 58 | pub fn track(&self, i: u32) -> Option { 59 | self.tracks().nth(i as usize) 60 | } 61 | } 62 | 63 | impl Event { 64 | /// Iterator over **all** `Track`s in this event 65 | pub fn tracks(&self) -> impl Iterator + '_ { 66 | izip!( 67 | self.tracks_fx.iter(), 68 | self.tracks_fp.iter(), 69 | self.tracks_falpha.iter(), 70 | self.tracks_fflags.iter(), 71 | self.tracks_fitschi2.iter(), 72 | self.tracks_fitsncls.iter(), 73 | self.tracks_fitsclustermap.iter(), 74 | self.tracks_ftpcchi2.iter(), 75 | self.tracks_ftpcncls.iter(), 76 | self.tracks_fr.iter() 77 | ) 78 | .map( 79 | |( 80 | x, 81 | parameters, 82 | alpha, 83 | flags, 84 | its_chi2, 85 | its_ncls, 86 | its_clustermap, 87 | tpc_chi2, 88 | tpc_ncls, 89 | pid_probabilities, 90 | )| { 91 | Track { 92 | x: *x, 93 | parameters: *parameters, 94 | alpha: *alpha, 95 | flags: *flags, 96 | its_chi2: *its_chi2, 97 | its_ncls: *its_ncls, 98 | its_clustermap: *its_clustermap, 99 | tpc_chi2: *tpc_chi2, 100 | tpc_ncls: *tpc_ncls, 101 | pid_probabilities: *pid_probabilities, 102 | } 103 | }, 104 | ) 105 | } 106 | 107 | /// The primary vertex of this event, if it exists. Else `None` 108 | pub fn primary_vertex(&self) -> Option { 109 | // 0 contributors means that there is no primar vertex 110 | if self.primaryvertex_alivertex_fncontributors > 0 { 111 | Some(PrimaryVertex { 112 | x: self.primaryvertex_alivertex_fposition.0, 113 | y: self.primaryvertex_alivertex_fposition.1, 114 | z: self.primaryvertex_alivertex_fposition.2, 115 | n_contrib: self.primaryvertex_alivertex_fncontributors, 116 | }) 117 | } else { 118 | None 119 | } 120 | } 121 | 122 | /// The `TriggerMask` of this event. Use this to select minimum bias events, for example 123 | pub fn trigger_mask(&self) -> TriggerMask { 124 | // The infromation which triggers fired is stored in a bitmask 125 | // Then we use the bit mask to find the string describing the 126 | // fired trigger Then, we convert the fired trigger to a 127 | // Trigger mask and lastly, we collect all fired triggers into 128 | // one mask 129 | (0..50) // Only 50 bits were used in the mask - YOLO! 130 | .map(|i| (self.aliesdheader_ftriggermask & (1 << i)) != 0) 131 | .zip(self.aliesdrun_ftriggerclasses.iter()) 132 | .filter_map(|(fired, trigger_name)| if fired { Some(trigger_name) } else { None }) 133 | .map(|name| string_to_mask(name, self.aliesdrun_frunnumber)) 134 | .collect() 135 | } 136 | } 137 | 138 | pub async fn event_stream_from_tree(t: &Tree) -> Result, Error> { 139 | let track_counter: Vec<_> = t 140 | .branch_by_name("Tracks")? 141 | .as_fixed_size_iterator(|i| be_u32(i)) 142 | .collect::>() 143 | .await; 144 | let s = stream_zip!( 145 | t.branch_by_name("AliESDRun.fRunNumber")? 146 | .as_fixed_size_iterator(|i| be_i32(i)), 147 | t.branch_by_name("AliESDRun.fTriggerClasses")? 148 | .as_fixed_size_iterator(parse_tobjarray_of_tnameds), 149 | t.branch_by_name("AliESDHeader.fTriggerMask")? 150 | .as_fixed_size_iterator(|i| be_u64(i)), 151 | t.branch_by_name("PrimaryVertex.AliVertex.fPosition[3]")? 152 | .as_fixed_size_iterator(|i| tuple((be_f32, be_f32, be_f32))(i)), 153 | t.branch_by_name("PrimaryVertex.AliVertex.fNContributors")? 154 | .as_fixed_size_iterator(|i| be_i32(i)), 155 | t.branch_by_name("Tracks.fX")? 156 | .as_var_size_iterator(|i| be_f32(i), track_counter.to_owned()), 157 | t.branch_by_name("Tracks.fP[5]")?.as_var_size_iterator( 158 | |i| map(tuple((be_f32, be_f32, be_f32, be_f32, be_f32)), |p| { 159 | TrackParameters::new(&p) 160 | })(i), 161 | track_counter.to_owned() 162 | ), 163 | t.branch_by_name("Tracks.fAlpha")? 164 | .as_var_size_iterator(|i| be_f32(i), track_counter.to_owned()), 165 | t.branch_by_name("Tracks.fFlags")?.as_var_size_iterator( 166 | |i| map(be_u64, |uint| Flags::from_bits(uint).unwrap())(i), 167 | track_counter.to_owned() 168 | ), 169 | t.branch_by_name("Tracks.fITSchi2")? 170 | .as_var_size_iterator(|i| parse_custom_mantissa(i, 8), track_counter.to_owned()), 171 | t.branch_by_name("Tracks.fITSncls")? 172 | .as_var_size_iterator(|i| be_i8(i), track_counter.to_owned()), 173 | t.branch_by_name("Tracks.fITSClusterMap")? 174 | .as_var_size_iterator( 175 | |i| map(be_u8, |uint| ItsClusters::from_bits(uint).unwrap())(i), 176 | track_counter.to_owned() 177 | ), 178 | t.branch_by_name("Tracks.fTPCncls")? 179 | .as_var_size_iterator(|i| be_u16(i), track_counter.to_owned()), 180 | t.branch_by_name("Tracks.fTPCchi2")? 181 | .as_var_size_iterator(|i| parse_custom_mantissa(i, 8), track_counter.to_owned()), 182 | t.branch_by_name("Tracks.fR[5]")? 183 | .as_var_size_iterator(parse_pid_probabilities, track_counter), 184 | ) 185 | .map( 186 | |( 187 | aliesdrun_frunnumber, 188 | aliesdrun_ftriggerclasses, 189 | aliesdheader_ftriggermask, 190 | primaryvertex_alivertex_fposition, 191 | primaryvertex_alivertex_fncontributors, 192 | tracks_fx, 193 | tracks_fp, 194 | tracks_falpha, 195 | tracks_fflags, 196 | tracks_fitschi2, 197 | tracks_fitsncls, 198 | tracks_fitsclustermap, 199 | tracks_ftpcncls, 200 | tracks_ftpcchi2, 201 | tracks_fr, 202 | )| { 203 | Event { 204 | aliesdrun_frunnumber, 205 | aliesdrun_ftriggerclasses, 206 | aliesdheader_ftriggermask, 207 | primaryvertex_alivertex_fposition, 208 | primaryvertex_alivertex_fncontributors, 209 | tracks_fx, 210 | tracks_fp, 211 | tracks_falpha, 212 | tracks_fflags, 213 | tracks_fitschi2, 214 | tracks_fitsncls, 215 | tracks_fitsclustermap, 216 | tracks_ftpcchi2, 217 | tracks_ftpcncls, 218 | tracks_fr, 219 | } 220 | }, 221 | ); 222 | Ok(s) 223 | } 224 | 225 | /// Convert a given trigger description to a `TriggerMask`. This 226 | /// mapping may depend on the run number 227 | fn string_to_mask(s: &str, run_number: i32) -> TriggerMask { 228 | // LHC10h 229 | if (136_851..=139_517).contains(&run_number) { 230 | match s { 231 | "CMBAC-B-NOPF-ALL" 232 | | "CMBS2A-B-NOPF-ALL" 233 | | "CMBS2C-B-NOPF-ALL" 234 | | "CMBACS2-B-NOPF-ALL" 235 | | "CMBACS2-B-NOPF-ALLNOTRD" => TriggerMask::MINIMUM_BIAS, 236 | "C0SMH-B-NOPF-ALL" | "C0SMH-B-NOPF-ALLNOTRD" => TriggerMask::HIGH_MULT, 237 | _ => TriggerMask::empty(), 238 | } 239 | } else { 240 | TriggerMask::empty() 241 | } 242 | } 243 | 244 | fn parse_pid_probabilities(input: &[u8]) -> IResult<&[u8], PidProbabilities> { 245 | let (input, electron) = parse_custom_mantissa(input, 8)?; 246 | let (input, muon) = parse_custom_mantissa(input, 8)?; 247 | let (input, pion) = parse_custom_mantissa(input, 8)?; 248 | let (input, kaon) = parse_custom_mantissa(input, 8)?; 249 | let (input, proton) = parse_custom_mantissa(input, 8)?; 250 | Ok(( 251 | input, 252 | PidProbabilities { 253 | electron, 254 | muon, 255 | pion, 256 | kaon, 257 | proton, 258 | }, 259 | )) 260 | } 261 | -------------------------------------------------------------------------------- /malice/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # malice 2 | //! "milli ALICE" aka `malice` is a tiny framework defining some sensible defaults to analyze the ALICE open data. 3 | //! # Features 4 | //! `malice` supports two IO back-ends. The first and recommended one is the pure Rust `root-io` crate. The second one is behind the `cpp` feature gate and depends on the c++ ROOT framework. 5 | //! # Example 6 | //! Here is a very simple example "analysis" using `malice` which 7 | //! counts the number of tracks in an event. For a more 8 | //! comprehensive, but still small, example check out 9 | //! [simple-analysis](https://github.com/cbourjau/alice-rs/tree/master/examples/simple-analysis). 10 | //! 11 | //! ``` rust 12 | //! use alice_open_data; 13 | //! use malice::{default_event_filter, default_track_filter}; 14 | //! use malice::event_iterator_from_files; 15 | //! 16 | //! let file = alice_open_data::test_file() 17 | //! .expect("No data files found. Did you download with alice-open-data?"); 18 | //! 19 | //! // Create an iterator over all the events in all the given files 20 | //! let events = event_iterator_from_files(vec![file].into_iter()); 21 | //! 22 | //! for event in events.filter(default_event_filter) { 23 | //! // Fill only if we have a valid primary vertex 24 | //! if let Some(prime_vtx) = event.primary_vertex() { 25 | //! let n_tracks = event 26 | //! .tracks() 27 | //! // Apply a sensible default "cut" on the valid tracks 28 | //! .filter(|tr| default_track_filter(&tr, &prime_vtx)) 29 | //! .count(); 30 | //! println!("This event had {} valid tracks", n_tracks); 31 | //! } 32 | //! } 33 | //! ``` 34 | 35 | #[macro_use] 36 | extern crate bitflags; 37 | 38 | mod event; 39 | mod primary_vertex; 40 | mod track; 41 | mod utils; 42 | 43 | // re-exports 44 | pub use crate::event::{event_stream_from_tree, Event, TriggerMask}; 45 | pub use crate::primary_vertex::PrimaryVertex; 46 | pub use crate::track::{Flags, ItsClusters, Track}; 47 | pub use crate::utils::{default_event_filter, default_track_filter, is_hybrid_track}; 48 | 49 | use failure::Error; 50 | use futures::prelude::*; 51 | use futures::stream::StreamExt; 52 | 53 | use root_io::{RootFile, Source}; 54 | 55 | use std::pin::Pin; 56 | 57 | #[cfg(not(target_arch = "wasm32"))] 58 | type EventStream = Pin> + Send>>; 59 | #[cfg(target_arch = "wasm32")] 60 | type EventStream = Pin>>>; 61 | 62 | /// A helper function which turns a path to an ALICE ESD file into a 63 | /// stream over the `Events` of that file. 64 | pub async fn event_stream_from_esd_file(p: T) -> EventStream 65 | where 66 | T: Into, 67 | { 68 | let tmp = { 69 | move || async { 70 | let rf = RootFile::new(p).await?; 71 | let tree = rf.items()[0].as_tree().await?; 72 | event_stream_from_tree(&tree).await 73 | } 74 | }(); 75 | // Turn Result into a Stream of Results 76 | match tmp.await { 77 | #[cfg(not(target_arch = "wasm32"))] 78 | Ok(s) => s.map(Ok).boxed(), 79 | #[cfg(target_arch = "wasm32")] 80 | Ok(s) => s.map(Ok).boxed_local(), 81 | Err(err) => stream::iter(vec![Err(err)]).boxed(), 82 | } 83 | } 84 | 85 | /// Main entry point for analyses running over ALICE's open 86 | /// data. Produces an iterator over events from an iterator over files 87 | /// (either local or remote). 88 | /// 89 | /// The necessary IO is done on a separate thread such that IO bound 90 | /// tasks do not interfere with the CPU bound tasks of the analysis 91 | /// itself. If an IO error is encountered the respective file will be 92 | /// skipped. 93 | /// 94 | /// This function is not available on the wasm32 target and must not 95 | /// be called from an asynchronous context itself. 96 | #[cfg(not(target_arch = "wasm32"))] 97 | pub fn event_iterator_from_files(sources: I) -> impl Iterator 98 | where 99 | I: IntoIterator + Send + 'static, 100 | S: Into + Send, 101 | { 102 | use std::sync::mpsc::sync_channel; 103 | use std::thread::spawn; 104 | 105 | const BUFFERED_EVENTS: usize = 10; 106 | let (sender, receiver) = sync_channel(BUFFERED_EVENTS); 107 | spawn(|| { 108 | let rt = tokio::runtime::Runtime::new().expect("Failed to start IO runtime"); 109 | rt.block_on(async move { 110 | stream::iter(sources) 111 | .then(event_stream_from_esd_file) 112 | .flatten() 113 | .try_for_each(|event| async { 114 | // Errors if the receiving end has hung up 115 | sender.send(event).map_err(Into::into) 116 | }) 117 | .await 118 | }) 119 | .expect("Failed to start IO processing."); 120 | }); 121 | receiver.into_iter() 122 | } 123 | 124 | /// Create a stream of events found in the given files (local or 125 | /// remote). You probably want to use `event_iterator_from_files` 126 | /// instead unless you are a on the `wasm32` target. 127 | pub fn event_stream_from_files(sources: SI) -> impl Stream> 128 | where 129 | SI: IntoIterator, 130 | S: Into + Send, 131 | { 132 | futures::stream::iter(sources) 133 | .map(Into::into) 134 | .then(event_stream_from_esd_file) 135 | .flatten() 136 | } 137 | 138 | #[cfg(test)] 139 | mod tests { 140 | 141 | use futures::{future, StreamExt}; 142 | use root_io::RootFile; 143 | 144 | use super::{default_event_filter, default_track_filter, event_stream_from_tree}; 145 | 146 | #[tokio::test] 147 | async fn test_filters() { 148 | let f = alice_open_data::test_file().unwrap(); 149 | let rf = RootFile::new(f).await.unwrap(); 150 | let t = rf.items()[0].as_tree().await.unwrap(); 151 | let events = event_stream_from_tree(&t).await.unwrap(); 152 | let mut cnt_evts = 0; 153 | let mut cnt_tracks = 0; 154 | let mut cnt_tracks_valid = 0; 155 | events 156 | .filter(|ev| future::ready(default_event_filter(ev))) 157 | .for_each(|ev| { 158 | cnt_evts += 1; 159 | cnt_tracks += ev.tracks().count(); 160 | if let Some(pv) = ev.primary_vertex() { 161 | cnt_tracks_valid += 162 | ev.tracks().filter(|t| default_track_filter(t, &pv)).count(); 163 | } 164 | future::ready(()) 165 | }) 166 | .await; 167 | assert_eq!(cnt_evts, 2); 168 | assert_eq!(cnt_tracks, 11958); 169 | assert_eq!(cnt_tracks_valid, 2773); 170 | } 171 | 172 | #[test] 173 | #[cfg(feature = "cpp")] 174 | fn rust_cpp_identical_many_files() { 175 | use super::dataset_cpp::DatasetIntoIter as DsIntoIter_cpp; 176 | use super::dataset_rust::DatasetIntoIter as DsIntoIter_rust; 177 | 178 | let n_files = 500; 179 | let rust_iter = alice_open_data::all_files_10h() 180 | .unwrap() 181 | .into_iter() 182 | .take(n_files) 183 | .map(|path| RootFile::new(path).expect("Failed to open file")) 184 | .map(|rf| rf.items()[0].as_tree().unwrap()) 185 | .flat_map(|tree| match DsIntoIter_rust::new(&tree) { 186 | Ok(s) => s, 187 | Err(err) => panic!("An error occured! Message: {}", err), 188 | }); 189 | let cpp_iter = alice_open_data::all_files_10h() 190 | .unwrap() 191 | .into_iter() 192 | .take(n_files) 193 | .flat_map(|path| match DsIntoIter_cpp::new(&path) { 194 | Ok(s) => [path.to_owned()].to_vec().into_iter().cycle().zip(s), 195 | Err(err) => panic!("An error occured! Message: {}", err), 196 | }); 197 | for (i, (rust_ev, (path, cpp_ev))) in rust_iter.zip(cpp_iter).enumerate() { 198 | // println!("{:?}", path); 199 | assert_eq!(rust_ev, cpp_ev, "Event {} differs in file {:?}", i, path); 200 | } 201 | } 202 | 203 | #[test] 204 | #[cfg(feature = "cpp")] 205 | fn rust_cpp_identical_funky_file_1() { 206 | use super::dataset_cpp::DatasetIntoIter as DsIntoIter_cpp; 207 | use super::dataset_rust::DatasetIntoIter as DsIntoIter_rust; 208 | 209 | let file = alice_open_data::all_files_10h() 210 | .unwrap() 211 | .into_iter() 212 | .find(|p| { 213 | p.to_str() 214 | .unwrap() 215 | // This file contains a bunch of "empty" baskets; i.e. baskets which claim to have events but are just zeros... 216 | .contains("10000139038001.770/AliESDs.root") 217 | }) 218 | .expect("Funky file not found"); 219 | let rust_iter = { 220 | let tree = RootFile::new(file).expect("Failed to open file").items()[0] 221 | .as_tree() 222 | .unwrap(); 223 | match DsIntoIter_rust::new(&tree) { 224 | Ok(s) => s, 225 | Err(err) => panic!("An error occured! Message: {}", err), 226 | } 227 | }; 228 | let cpp_iter = match DsIntoIter_cpp::new(&file) { 229 | Ok(s) => s, 230 | Err(err) => panic!("An error occured! Message: {}", err), 231 | }; 232 | for (rust_ev, cpp_ev) in rust_iter.zip(cpp_iter) { 233 | assert_eq!(rust_ev, cpp_ev); 234 | } 235 | } 236 | #[test] 237 | #[cfg(feature = "cpp")] 238 | fn rust_cpp_identical_funky_file_2() { 239 | use super::dataset_cpp::DatasetIntoIter as DsIntoIter_cpp; 240 | use super::dataset_rust::DatasetIntoIter as DsIntoIter_rust; 241 | let funkies = [ 242 | // This files has baskets which, after parsing, have 0 bytes :P 243 | "10000139038002.40/AliESDs.root", 244 | // events with 0 tracks at end of basket 245 | "10000139038001.310/AliESDs.root", 246 | ]; 247 | for funky in &funkies { 248 | let file = alice_open_data::all_files_10h() 249 | .unwrap() 250 | .into_iter() 251 | .find(|p| p.to_str().unwrap().contains(funky)) 252 | .expect("Funky file not found"); 253 | let mut rust_iter = { 254 | let tree = RootFile::new(file).expect("Failed to open file").items()[0] 255 | .as_tree() 256 | .unwrap(); 257 | match DsIntoIter_rust::new(&tree) { 258 | Ok(s) => s, 259 | Err(err) => panic!("An error occured! Message: {}", err), 260 | } 261 | }; 262 | let mut cpp_iter = match DsIntoIter_cpp::new(&file) { 263 | Ok(s) => s, 264 | Err(err) => panic!("An error occured! Message: {}", err), 265 | }; 266 | assert_eq!(rust_iter.count(), cpp_iter.count()); 267 | } 268 | for funky in &funkies { 269 | let file = alice_open_data::all_files_10h() 270 | .unwrap() 271 | .into_iter() 272 | .find(|p| p.to_str().unwrap().contains(funky)) 273 | .expect("Funky file not found"); 274 | let mut rust_iter = { 275 | let tree = RootFile::new(file).expect("Failed to open file").items()[0] 276 | .as_tree() 277 | .unwrap(); 278 | match DsIntoIter_rust::new(&tree) { 279 | Ok(s) => s, 280 | Err(err) => panic!("An error occured! Message: {}", err), 281 | } 282 | }; 283 | let mut cpp_iter = match DsIntoIter_cpp::new(&file) { 284 | Ok(s) => s, 285 | Err(err) => panic!("An error occured! Message: {}", err), 286 | }; 287 | for (_i, (rust_ev, cpp_ev)) in rust_iter.zip(cpp_iter).enumerate() { 288 | assert_eq!(rust_ev, cpp_ev); 289 | } 290 | } 291 | // let cpp_iter = match DsIntoIter_cpp::new(&file) { 292 | // Ok(s) => s, 293 | // Err(err) => panic!("An error occured! Message: {}", err) 294 | // }; 295 | // assert_eq!(rust_iter.count(), cpp_iter.count()); 296 | // for (i, (rust_ev, cpp_ev)) in rust_iter.zip(cpp_iter).enumerate() { 297 | // println!("{}", i); 298 | // assert_eq!(rust_ev, cpp_ev); 299 | // } 300 | } 301 | 302 | #[test] 303 | #[cfg(feature = "cpp")] 304 | fn bench_cpp() { 305 | let n_files = 50; 306 | use super::dataset_cpp::DatasetIntoIter; 307 | let _max_chi2 = alice_open_data::all_files_10h() 308 | .unwrap() 309 | .into_iter() 310 | .take(n_files) 311 | .flat_map(|path| match DatasetIntoIter::new(&path) { 312 | Ok(s) => s, 313 | Err(err) => panic!("An error occured! Message: {}", err), 314 | }) 315 | .flat_map(|event| event.tracks().map(|tr| tr.its_chi2).collect::>()) 316 | .fold(0.0, |max, chi2| if chi2 > max { chi2 } else { max }); 317 | } 318 | } 319 | -------------------------------------------------------------------------------- /malice/src/primary_vertex.rs: -------------------------------------------------------------------------------- 1 | /// The most likely position in the detector where the current event 2 | /// took place. The primary vertex is the most likely common origin of 3 | /// all the reconstructed tracks. 4 | #[derive(Debug)] 5 | pub struct PrimaryVertex { 6 | /// `x` coordinates in the detector's reference frame 7 | pub x: f32, 8 | /// `y` coordinates in the detector's reference frame 9 | pub y: f32, 10 | /// `z` coordinates in the detector's reference frame; z is along the beam axis 11 | pub z: f32, 12 | /// Number of tracks contributed to this vertex 13 | pub n_contrib: i32, 14 | } 15 | -------------------------------------------------------------------------------- /malice/src/track.rs: -------------------------------------------------------------------------------- 1 | use std::f32::consts::PI; 2 | 3 | use wasm_bindgen::prelude::*; 4 | 5 | bitflags! { 6 | /// Clusters in the ITS associated with the curren track 7 | /// See AliESDTrack::HasPointOnITSLayer 8 | #[wasm_bindgen] 9 | pub struct ItsClusters: u8 { 10 | const SPD_INNER = 1; 11 | const SPD_OUTER = 1 << 1; 12 | const SDD_INNER = 1 << 2; 13 | const SDD_OUTER = 1 << 3; 14 | const SSD_INNER = 1 << 4; 15 | const SSD_OUTER = 1 << 5; 16 | } 17 | } 18 | 19 | bitflags! { 20 | /// Various attributes of tracks. 21 | /// Flags are based on those found in AliRoot's AliVTrack.[h,cxx] 22 | #[wasm_bindgen] 23 | pub struct Flags: u64 { 24 | const ITS_IN = 0x1; 25 | const ITS_OUT = 0x2; 26 | const ITS_REFIT = 0x4; 27 | const ITS_PID = 0x8; 28 | const TPC_IN = 0x10; 29 | const TPC_OUT = 0x20; 30 | const TPC_REFIT = 0x40; 31 | const TPC_PID = 0x80; 32 | const TRD_IN = 0x100; 33 | const TRD_OUT = 0x200; 34 | const TRD_REFIT = 0x400; 35 | const TRD_PID = 0x800; 36 | const TOF_IN = 0x1000; 37 | const TOF_OUT = 0x2000; 38 | const TOF_REFIT = 0x4000; 39 | const TOF_PID = 0x8000; 40 | const HMPID_OUT = 0x0001_0000; 41 | const HMPID_PID = 0x0002_0000; 42 | const EMCAL_MATCH = 0x0004_0000; 43 | const TRD_BACKUP = 0x0008_0000; 44 | const TOF_MISMATCH = 0x0010_0000; 45 | const PHOS_MATCH = 0x0020_0000; 46 | const ITS_UPG = 0x0040_0000; 47 | const SKIP_FRIEND = 0x0080_0000; 48 | const GLOBAL_MERGE = 0x0100_0000; 49 | const MULT_IN_V0 = 0x0200_0000; 50 | const MULT_SEC = 0x0400_0000; 51 | const EMBEDDED = 0x0800_0000; 52 | const ITS_PURE_SA = 0x1000_0000; 53 | const TRDS_TOP = 0x2000_0000; 54 | const ESD_PID = 0x4000_0000; 55 | const TIME = 0x8000_0000; 56 | } 57 | } 58 | 59 | /// Probabilities of this track being of various particle types. These 60 | /// numbers stem from the "combined detector response" 61 | #[wasm_bindgen] 62 | #[derive(Debug, Clone, Copy, PartialEq)] 63 | pub struct PidProbabilities { 64 | pub electron: f32, 65 | pub muon: f32, 66 | pub pion: f32, 67 | pub kaon: f32, 68 | pub proton: f32, 69 | } 70 | 71 | /// A `Track` is a reconstruction of the trajectory of a particle traversing the detector. 72 | #[wasm_bindgen] 73 | pub struct Track { 74 | pub(crate) x: f32, 75 | pub(crate) parameters: TrackParameters, 76 | pub(crate) alpha: f32, 77 | pub flags: Flags, 78 | pub its_chi2: f32, 79 | pub its_ncls: i8, 80 | pub its_clustermap: ItsClusters, 81 | pub(crate) tpc_chi2: f32, 82 | pub tpc_ncls: u16, 83 | pub pid_probabilities: PidProbabilities, 84 | } 85 | 86 | /// An obscure set of parameters which makes sense for the actual 87 | /// reconstruction of the tracks, but is a pain for subsequent 88 | /// analysis 89 | #[derive(Debug, Clone, Copy, PartialEq)] 90 | pub(crate) struct TrackParameters { 91 | // Tracks.fP[0] 92 | loc_y: f32, 93 | // Tracks.fP[1] 94 | loc_z: f32, 95 | // Tracks.fP[2] 96 | loc_sin: f32, 97 | // Tracks.fP[3] 98 | tang: f32, 99 | // Tracks.fP[4] 100 | one_over_pt: f32, 101 | } 102 | 103 | impl TrackParameters { 104 | /// In AliESD files, these parameters are saved in "Tracks.fP[5]" 105 | pub(crate) fn new(paras: &(f32, f32, f32, f32, f32)) -> TrackParameters { 106 | TrackParameters { 107 | loc_y: paras.0, 108 | loc_z: paras.1, 109 | loc_sin: paras.2, 110 | tang: paras.3, 111 | one_over_pt: paras.4, 112 | } 113 | } 114 | } 115 | 116 | #[wasm_bindgen] 117 | impl Track { 118 | /// Longitudinal (not boosted) angle of the `Track` 119 | pub fn theta(&self) -> f32 { 120 | 0.5 * PI - self.parameters.tang.atan() 121 | } 122 | 123 | /// Direction of a track in pseudorapidity `eta` 124 | pub fn eta(&self) -> f32 { 125 | -((0.5 * self.theta()).tan()).ln() 126 | } 127 | 128 | /// Azimuthal direction of the `Track` 129 | pub fn phi(&self) -> f32 { 130 | let mut phi = self.parameters.loc_sin.asin() + self.alpha; 131 | if phi < 0. { 132 | phi += 2. * PI; 133 | } else if phi >= 2. * PI { 134 | phi -= 2. * PI; 135 | } 136 | phi 137 | } 138 | 139 | /// Transverse momentum of the `Track` 140 | pub fn pt(&self) -> f32 { 141 | 1.0 / self.parameters.one_over_pt.abs() 142 | } 143 | 144 | /// Three-momentum (px, py, pz). Results for straight tracks are meaningless. 145 | fn pxpypz(&self) -> (f32, f32, f32) { 146 | let pt = self.pt(); 147 | let cs = self.alpha.cos(); 148 | let sn = self.alpha.sin(); 149 | let r = ((1.0 - self.parameters.loc_sin) * (1.0 + self.parameters.loc_sin)).sqrt(); 150 | ( 151 | pt * (r * cs - self.parameters.loc_sin * sn), 152 | pt * (self.parameters.loc_sin * cs + r * sn), 153 | pt * self.parameters.tang, 154 | ) 155 | } 156 | 157 | pub fn px(&self) -> f32 { 158 | self.pxpypz().0 159 | } 160 | pub fn py(&self) -> f32 { 161 | self.pxpypz().1 162 | } 163 | pub fn pz(&self) -> f32 { 164 | self.pxpypz().2 165 | } 166 | 167 | /// Estimate the distance of closest approach of this track to a given point 168 | /// neglecting the track curvature. This returns the closest approach in the xy plane 169 | pub fn dca_to_point_xy(&self, x: f32, y: f32) -> f32 { 170 | let xv = x * self.alpha.cos() + y * self.alpha.sin(); 171 | let yv = -x * self.alpha.sin() + y * self.alpha.cos(); 172 | let diff_x = self.x - xv; 173 | let diff_y = self.parameters.loc_y - yv; 174 | let loc_sin = self.parameters.loc_sin; 175 | (diff_x * loc_sin - diff_y * ((1. - loc_sin) * (1. + loc_sin)).sqrt()).abs() 176 | } 177 | 178 | /// Distance of closes approch of this track in z 179 | pub fn dca_to_point_z(&self, z: f32) -> f32 { 180 | self.parameters.loc_z - z 181 | } 182 | 183 | /// Chi2 normalized to the number of clusters. This is a measure 184 | /// of how well the reconstruction fit the observed clusters 185 | pub fn tpc_chi2_per_cluster(&self) -> f32 { 186 | self.tpc_chi2 / f32::from(self.tpc_ncls) 187 | } 188 | 189 | /// Chi2 normalized to the number of clusters. This is a measure 190 | /// of how well the reconstruction fit the observed clusters 191 | pub fn its_chi2_per_cluster(&self) -> f32 { 192 | self.its_chi2 / f32::from(self.its_ncls) 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /malice/src/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::event::{Event, TriggerMask}; 2 | use crate::primary_vertex::PrimaryVertex; 3 | use crate::track::{Flags, ItsClusters, Track}; 4 | 5 | /// A simple but reasonable default event selection 6 | /// Returns true if the given event passes the recommended selection criterion 7 | pub fn default_event_filter(event: &Event) -> bool { 8 | // Check if the event has a reconstructed primary vertex 9 | let good_vertex = match event.primary_vertex() { 10 | // Primary vertex must be within +- 10cm 11 | // of the nominal interaction point along beam axis 12 | Some(pv) => pv.z.abs() < 10.0, 13 | None => false, 14 | }; 15 | good_vertex 16 | // Require some activity in the central region 17 | & (event.multiplicity() > 0.0) 18 | // Only use events which fired the minimu bias trigger 19 | & event.trigger_mask().contains(TriggerMask::MINIMUM_BIAS) 20 | } 21 | 22 | /// Applies a reasonable set of default track cuts returning `true` if 23 | /// the `track` is valid 24 | pub fn default_track_filter(tr: &Track, prime_vtx: &PrimaryVertex) -> bool { 25 | tr.flags.contains(Flags::ITS_REFIT) 26 | && tr.flags.contains(Flags::TPC_REFIT) 27 | // Distance of closest approach of this track to the primary 28 | // vertex in transverse plane [cm] 29 | && tr.dca_to_point_xy(prime_vtx.x, prime_vtx.y) < 2.4 30 | // Distance of closest approach of this track to the primary 31 | // vertex along beam axis [cm] 32 | && tr.dca_to_point_z(prime_vtx.z) < 3.2 33 | // Restrict tracks to good TPC coverage 34 | && tr.eta().abs() < 0.9 35 | // Minimal pT cut off 36 | && tr.pt() > 0.15 37 | // Minimal number of clusters in the TPC 38 | && tr.tpc_ncls > 70 39 | // Goodness of fit of this track wrt. the observed clusters; TPC 40 | && tr.tpc_chi2_per_cluster() <= 4.0 41 | // Goodness of fit of this track wrt. the observed clusters; ITS 42 | && tr.its_chi2_per_cluster() <= 36.0 43 | } 44 | 45 | /// So called hybrid tracks are sometimes used in order to achieve a 46 | /// more uniform distribution of tracks in eta and phi. This function 47 | /// cannot be used with the `default_track_filter` and might need more 48 | /// debugging. Use with care. 49 | pub fn is_hybrid_track(tr: &Track) -> bool { 50 | // SPD && ITS_REFIT 51 | tr.its_clustermap.intersects(ItsClusters::SPD_INNER | ItsClusters::SPD_OUTER) 52 | & tr.flags.contains(Flags::ITS_REFIT) || 53 | // !SPD && ITS_REFIT 54 | !tr.its_clustermap.intersects(ItsClusters::SPD_INNER | ItsClusters::SPD_OUTER) 55 | & tr.flags.contains(Flags::ITS_REFIT) || 56 | // !SPD && !ITS_REFIT 57 | !tr.its_clustermap.intersects(ItsClusters::SPD_INNER | ItsClusters::SPD_OUTER) 58 | & !tr.flags.contains(Flags::ITS_REFIT) 59 | } 60 | -------------------------------------------------------------------------------- /root-io/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "root-io" 3 | version = "0.3.0" 4 | authors = ["cbourjau "] 5 | exclude = ["*test_data/", "*.root"] 6 | description = "Reading of `.root` binary files which are commonly used in particle physics" 7 | repository = "https://github.com/cbourjau/alice-rs" 8 | readme = "README.md" 9 | keywords = ["root", "cern", "alice", "lhc", "physics"] 10 | categories = ["parser-implementations", "science", "data-structures"] 11 | license = "MPL-2.0" 12 | edition = "2021" 13 | 14 | [dependencies] 15 | alice-open-data = { workspace=true } 16 | bitflags = "1.0.0" 17 | failure = { workspace=true } 18 | flate2 = "^1" 19 | futures = { workspace=true } 20 | nom = { workspace=true } 21 | reqwest = { workspace=true } 22 | lzma-rs = "0.1.1" 23 | quote = "0.3.15" 24 | uuid = "0.8.2" 25 | lz4-compress = "0.1.1" 26 | regex = "1.8.1" 27 | 28 | 29 | [target.'cfg(target_arch = "wasm32")'.dependencies] 30 | wasm-bindgen-futures = "0.4.10" 31 | 32 | [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] 33 | criterion = "0.3" 34 | tokio = { workspace=true } 35 | 36 | [target.'cfg(target_arch = "wasm32")'.dev-dependencies] 37 | wasm-bindgen = "0.2.60" 38 | wasm-bindgen-test = "0.3.10" 39 | web-sys = {"version" = "0.3.4", "features" = [ 'console' ]} 40 | 41 | # Run with cargo bench --bench iter_branch -- --baseline pathbuf-in-container --color never 42 | [[bench]] 43 | name = "iter_branch" 44 | harness = false 45 | 46 | -------------------------------------------------------------------------------- /root-io/README.md: -------------------------------------------------------------------------------- 1 | # root-io 2 | 3 | [![Crates.io Version](https://img.shields.io/crates/v/root-io.svg)](https://crates.io/crates/root-io) 4 | 5 | 6 | ## Documentation 7 | https://docs.rs/root-io 8 | 9 | `root-io` provides basic support for reading data stored in binary `.root` files commonly used in particle physics experiments. This crates provides: 10 | 11 | - Core types and parsers to read the layout description of custom classes contained in a given file 12 | - Tools to generate `yaml` describing the streamed objects (aka. `TStreamerInfo`) 13 | - Tools to generate (buggy) `Rust` code as a starting point for a new parser 14 | - Set of types and parsers needed to read so-called `TTree`s 15 | 16 | The majority of the exposed API serves the latter point; striving to enable an easy iteration over data stored in `TTree`s. In particular, `root-io` supports reading `TBranches` (i.e. akin to "columns" of a database) with a variable number of elements in each entry (i.e. `TBranches` of `TClonesArray`). 17 | 18 | The `root-ls` crate utilizes this crate to in a CLI to inspect a given root file and to deploy the code-gen tools. 19 | 20 | 21 | -------------------------------------------------------------------------------- /root-io/benches/iter_branch.rs: -------------------------------------------------------------------------------- 1 | extern crate alice_open_data; 2 | extern crate criterion; 3 | extern crate nom; 4 | extern crate root_io; 5 | 6 | use nom::number::complete::{be_f32, be_i32, be_u32}; 7 | 8 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 9 | use futures::StreamExt; 10 | use tokio::runtime::Runtime; 11 | 12 | use root_io::RootFile; 13 | 14 | fn fixed_size_branch() { 15 | let path = alice_open_data::test_file().unwrap(); 16 | 17 | let fut = async { 18 | let f = RootFile::new(path.as_path()) 19 | .await 20 | .expect("Failed to open file"); 21 | let t = f.items()[0].as_tree().await.unwrap(); 22 | let iter = t 23 | .branch_by_name("PrimaryVertex.AliVertex.fNContributors") 24 | .unwrap() 25 | .as_fixed_size_iterator(|i| be_i32(i)); 26 | iter.for_each(|el| async move { 27 | black_box(el); 28 | }) 29 | .await 30 | }; 31 | let rt = Runtime::new().unwrap(); 32 | rt.block_on(fut); 33 | } 34 | 35 | fn var_size_branch() { 36 | let fut = async { 37 | let path = alice_open_data::test_file().unwrap(); 38 | let f = RootFile::new(path.as_path()) 39 | .await 40 | .expect("Failed to open file"); 41 | let t = f.items()[0].as_tree().await.unwrap(); 42 | 43 | let track_counter: Vec<_> = t 44 | .branch_by_name("Tracks") 45 | .unwrap() 46 | .as_fixed_size_iterator(|i| be_u32(i)) 47 | .collect() 48 | .await; 49 | let iter = t 50 | .branch_by_name("Tracks.fX") 51 | .unwrap() 52 | .as_var_size_iterator(|i| be_f32(i), track_counter); 53 | iter.for_each(|el| async { 54 | black_box(el); 55 | }) 56 | .await 57 | }; 58 | let rt = Runtime::new().unwrap(); 59 | rt.block_on(fut); 60 | } 61 | 62 | pub fn criterion_benchmark(c: &mut Criterion) { 63 | c.bench_function("fixed_size_branch", |b| b.iter(|| fixed_size_branch)); 64 | c.bench_function("var_size_branch", |b| b.iter(|| var_size_branch)); 65 | } 66 | 67 | criterion_group!(benches, criterion_benchmark); 68 | criterion_main!(benches); 69 | -------------------------------------------------------------------------------- /root-io/src/code_gen/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod rust; 2 | pub(crate) mod utils; 3 | -------------------------------------------------------------------------------- /root-io/src/code_gen/rust.rs: -------------------------------------------------------------------------------- 1 | /// Types to map out the inter-dependences of the streamed objects 2 | use quote::Tokens; 3 | 4 | pub(crate) trait ToRustType { 5 | fn type_doc(&self) -> Tokens { 6 | quote!() 7 | } 8 | fn type_name(&self) -> Tokens; 9 | } 10 | 11 | pub(crate) trait ToRustParser: ToRustType { 12 | /// The definition of the parser parsing this thing such that it can be used in-line 13 | fn to_inline_parser(&self) -> Tokens { 14 | quote! {#(self.parser_name())} 15 | } 16 | } 17 | 18 | pub(crate) trait ToNamedRustParser: ToRustParser { 19 | /// The name of the parser of this thing 20 | fn parser_name(&self) -> Tokens; 21 | 22 | /// The definition of the parser parsing this thing; May be blank if it is build-in 23 | fn to_named_parser(&self) -> Tokens; 24 | } 25 | 26 | pub(crate) trait ToRustStruct: ToRustType { 27 | fn to_struct(&self) -> Tokens; 28 | } 29 | -------------------------------------------------------------------------------- /root-io/src/code_gen/utils.rs: -------------------------------------------------------------------------------- 1 | /// Return true if the given type name represents a core type. 2 | /// Core types are differently implemented even if they are described 3 | /// in the `TStreamer`. Gotta love ROOT! 4 | pub(crate) fn type_is_core(name: &str) -> bool { 5 | match name { 6 | "TObject" | "TString" | "TNamed" | "TObjArray" | "TObjString" | "TList" => true, 7 | s => s.starts_with("TArray"), 8 | } 9 | } 10 | 11 | /// If necessary, annotate the given type name with a life time or replace with an alias 12 | pub(crate) fn alias_or_lifetime(t: &str) -> String { 13 | // Most core types do not need a life time specifier 14 | if type_is_core(t) && t != "TObjArray" { 15 | return t.to_string(); 16 | } 17 | // All non-core types get a life time 18 | // This is over zealous, but currently, I don't have a proper way 19 | // to check if a type has a member with a lifetime 20 | format!("{}<'s>", t) 21 | } 22 | 23 | pub(crate) fn sanitize(n: &str) -> String { 24 | let keywords = vec![ 25 | "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", 26 | "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", 27 | "return", "Self", "self", "static", "struct", "super", "trait", "true", "type", "unsafe", 28 | "use", "where", "while", "abstract", "alignof", "become", "box", "do", "final", "macro", 29 | "offsetof", "override", "priv", "proc", "pure", "sizeof", "typeof", "unsized", "virtual", 30 | "yield", 31 | ]; 32 | if keywords.into_iter().any(|w| w == n) { 33 | format!("{}_", n) 34 | } else { 35 | n.to_string() 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /root-io/src/core/data_source.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{Read, Seek, SeekFrom}; 3 | #[cfg(not(target_arch = "wasm32"))] 4 | use std::path::Path; 5 | use std::path::PathBuf; 6 | 7 | use failure::Error; 8 | use reqwest::{ 9 | header::{RANGE, USER_AGENT}, 10 | Client, Url, 11 | }; 12 | 13 | /// The source from where the Root file is read. Construct it using 14 | /// `.into()` on a `Url` or `Path`. The latter is not availible for 15 | /// the `wasm32` target. 16 | #[derive(Debug, Clone)] 17 | pub struct Source(SourceInner); 18 | 19 | // This inner enum hides the differentiation between the local and 20 | // remote files from the public API 21 | #[derive(Debug, Clone)] 22 | enum SourceInner { 23 | /// A local source, i.e. a file on disc. 24 | Local(PathBuf), 25 | Remote { 26 | client: Client, 27 | url: Url, 28 | }, 29 | } 30 | 31 | impl Source { 32 | pub fn new>(thing: T) -> Self { 33 | thing.into() 34 | } 35 | 36 | pub async fn fetch(&self, start: u64, len: u64) -> Result, Error> { 37 | match &self.0 { 38 | SourceInner::Local(path) => { 39 | let mut f = File::open(path)?; 40 | f.seek(SeekFrom::Start(start))?; 41 | let mut buf = vec![0; len as usize]; 42 | f.read_exact(&mut buf)?; 43 | Ok(buf) 44 | } 45 | SourceInner::Remote { client, url } => { 46 | let rsp = client 47 | .get(url.clone()) 48 | .header(USER_AGENT, "alice-rs") 49 | .header(RANGE, format!("bytes={}-{}", start, start + len - 1)) 50 | .send() 51 | .await? 52 | .error_for_status()?; 53 | let bytes = rsp.bytes().await?; 54 | Ok(bytes.as_ref().to_vec()) 55 | } 56 | } 57 | } 58 | } 59 | 60 | impl From for Source { 61 | fn from(url: Url) -> Self { 62 | Self(SourceInner::Remote { 63 | client: Client::new(), 64 | url, 65 | }) 66 | } 67 | } 68 | 69 | // Disallow the construction of a local source object on wasm since 70 | // wasm does not have a (proper) file system. 71 | #[cfg(not(target_arch = "wasm32"))] 72 | impl From<&Path> for Source { 73 | fn from(path: &Path) -> Self { 74 | path.to_path_buf().into() 75 | } 76 | } 77 | 78 | #[cfg(not(target_arch = "wasm32"))] 79 | impl From for Source { 80 | fn from(path_buf: PathBuf) -> Self { 81 | Self(SourceInner::Local(path_buf)) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /root-io/src/core/file.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use failure::Error; 4 | use nom::{ 5 | self, 6 | bytes::complete::tag, 7 | combinator::map, 8 | number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8}, 9 | IResult, 10 | }; 11 | 12 | use uuid::Uuid; 13 | 14 | use crate::{ 15 | code_gen::rust::{ToNamedRustParser, ToRustStruct}, 16 | core::tstreamer::streamers, 17 | core::*, 18 | MAP_OFFSET, 19 | }; 20 | 21 | /// Size of serialized `FileHeader` in bytes 22 | const FILE_HEADER_SIZE: u64 = 75; 23 | 24 | /// Size of serialized TDirectory. Depending on the ROOT version this 25 | /// may use 32 or 64 bit pointers. This is the maximal (64 bit size). 26 | const TDIRECTORY_MAX_SIZE: u64 = 42; 27 | 28 | /// `RootFile` wraps the most basic information of a ROOT file. 29 | #[derive(Debug)] 30 | pub struct RootFile { 31 | source: Source, 32 | hdr: FileHeader, 33 | items: Vec, 34 | } 35 | 36 | #[derive(Debug, PartialEq)] 37 | struct FileHeader { 38 | version: i32, 39 | begin: i32, 40 | end: u64, 41 | seek_free: u64, 42 | nbytes_free: i32, 43 | n_entries_free: i32, 44 | n_bytes_name: i32, 45 | pointer_size: u8, 46 | compression: i32, 47 | seek_info: SeekPointer, 48 | nbytes_info: i32, 49 | uuid: Uuid, 50 | seek_dir: SeekPointer, 51 | } 52 | 53 | #[derive(Debug, PartialEq)] 54 | pub struct Directory { 55 | version: i16, 56 | c_time: u32, 57 | m_time: u32, 58 | n_bytes_keys: i32, 59 | n_bytes_name: i32, 60 | seek_dir: SeekPointer, 61 | seek_parent: SeekPointer, 62 | seek_keys: SeekPointer, 63 | } 64 | 65 | /// Parse opening part of a root file 66 | fn file_header(i: &[u8]) -> IResult<&[u8], FileHeader> { 67 | fn version_dep_int(i: &[u8], is_64_bit: bool) -> IResult<&[u8], u64> { 68 | if is_64_bit { 69 | be_u64(i) 70 | } else { 71 | let (i, end) = be_u32(i)?; 72 | Ok((i, end as u64)) 73 | } 74 | } 75 | let (i, _) = tag("root")(i)?; 76 | let (i, version) = be_i32(i)?; 77 | let is_64_bit = version > 1000000; 78 | let (i, begin) = be_i32(i)?; 79 | let (i, end) = version_dep_int(i, is_64_bit)?; 80 | let (i, seek_free) = version_dep_int(i, is_64_bit)?; 81 | let (i, nbytes_free) = be_i32(i)?; 82 | let (i, n_entries_free) = be_i32(i)?; 83 | let (i, n_bytes_name) = be_i32(i)?; 84 | let (i, pointer_size) = be_u8(i)?; 85 | let (i, compression) = be_i32(i)?; 86 | let (i, seek_info) = version_dep_int(i, is_64_bit)?; 87 | let (i, nbytes_info) = be_i32(i)?; 88 | let (i, _uuid_version) = be_u16(i)?; 89 | let (i, uuid) = be_u128(i)?; 90 | 91 | let uuid = Uuid::from_u128(uuid); 92 | let seek_dir = (begin + n_bytes_name) as u64; 93 | Ok(( 94 | i, 95 | FileHeader { 96 | version, 97 | begin, 98 | end, 99 | seek_free, 100 | nbytes_free, 101 | n_entries_free, 102 | n_bytes_name, 103 | pointer_size, 104 | compression, 105 | seek_info, 106 | nbytes_info, 107 | uuid, 108 | seek_dir, 109 | }, 110 | )) 111 | } 112 | 113 | /// Parse a file-pointer based on the version of the file 114 | fn versioned_pointer(input: &[u8], version: i16) -> nom::IResult<&[u8], u64> { 115 | if version > 1000 { 116 | be_u64(input) 117 | } else { 118 | map(be_i32, |val| val as u64)(input) 119 | } 120 | } 121 | 122 | /// Directory within a root file; exists on ever file 123 | fn directory(input: &[u8]) -> nom::IResult<&[u8], Directory> { 124 | let (input, version) = be_i16(input)?; 125 | let (input, c_time) = be_u32(input)?; 126 | let (input, m_time) = be_u32(input)?; 127 | let (input, n_bytes_keys) = be_i32(input)?; 128 | let (input, n_bytes_name) = be_i32(input)?; 129 | let (input, seek_dir) = versioned_pointer(input, version)?; 130 | let (input, seek_parent) = versioned_pointer(input, version)?; 131 | let (input, seek_keys) = versioned_pointer(input, version)?; 132 | Ok(( 133 | input, 134 | Directory { 135 | version, 136 | c_time, 137 | m_time, 138 | n_bytes_keys, 139 | n_bytes_name, 140 | seek_dir, 141 | seek_parent, 142 | seek_keys, 143 | }, 144 | )) 145 | } 146 | 147 | impl RootFile { 148 | /// Open a new ROOT file either from a `Url`, or from a `Path` 149 | /// (not available on `wasm32`). 150 | pub async fn new>(source: S) -> Result { 151 | let source = source.into(); 152 | let hdr = source.fetch(0, FILE_HEADER_SIZE).await.and_then(|buf| { 153 | file_header(&buf) 154 | .map_err(|_| format_err!("Failed to parse file header")) 155 | .map(|(_i, o)| o) 156 | })?; 157 | // Jump to the TDirectory and parse it 158 | let dir = source 159 | .fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE) 160 | .await 161 | .and_then(|buf| { 162 | directory(&buf) 163 | .map_err(|_| format_err!("Failed to parse TDirectory")) 164 | .map(|(_i, o)| o) 165 | })?; 166 | let tkey_of_keys = source 167 | .fetch(dir.seek_keys, dir.n_bytes_keys as u64) 168 | .await 169 | .and_then(|buf| { 170 | tkey(&buf) 171 | .map_err(|_| format_err!("Failed to parse TKeys")) 172 | .map(|(_i, o)| o) 173 | })?; 174 | let keys = match tkey_headers(&tkey_of_keys.obj) { 175 | Ok((_, hdrs)) => Ok(hdrs), 176 | _ => Err(format_err!("Expected TKeyHeaders")), 177 | }?; 178 | let items = keys 179 | .iter() 180 | .map(|k_hdr| FileItem::new(k_hdr, source.clone())) 181 | .collect(); 182 | 183 | Ok(RootFile { source, hdr, items }) 184 | } 185 | 186 | pub async fn get_streamer_context(&self) -> Result { 187 | let seek_info_len = (self.hdr.nbytes_info + 4) as u64; 188 | let info_key = self 189 | .source 190 | .fetch(self.hdr.seek_info, seek_info_len) 191 | .await 192 | .map(|buf| tkey(&buf).unwrap().1)?; 193 | 194 | let key_len = info_key.hdr.key_len; 195 | Ok(Context { 196 | source: self.source.clone(), 197 | offset: key_len as u64 + MAP_OFFSET, 198 | s: info_key.obj, 199 | }) 200 | } 201 | 202 | /// Slice of the items contained in this file 203 | pub fn items(&self) -> &[FileItem] { 204 | &self.items 205 | } 206 | 207 | /// Translate the streamer info of this file to a YAML file 208 | pub async fn streamer_infos(&self) -> Result, Error> { 209 | let ctx = self.get_streamer_context().await?; 210 | let buf = ctx.s.as_slice(); 211 | let (_, streamer_vec) = 212 | streamers(buf, &ctx).map_err(|_| format_err!("Failed to parse TStreamers"))?; 213 | Ok(streamer_vec) 214 | } 215 | 216 | /// Translate the streamer info of this file to a YAML file 217 | pub async fn streamer_info_as_yaml(&self, s: &mut W) -> Result<(), Error> { 218 | for el in &self.streamer_infos().await? { 219 | writeln!(s, "{:#}", el.to_yaml())?; 220 | } 221 | Ok(()) 222 | } 223 | 224 | /// Generate Rust code from the streamer info of this file 225 | pub async fn streamer_info_as_rust(&self, s: &mut W) -> Result<(), Error> { 226 | // Add necessary imports at the top of the file 227 | writeln!( 228 | s, 229 | "{}", 230 | quote! { 231 | use std::marker::PhantomData; 232 | use nom::*; 233 | use parsers::*; 234 | use parsers::utils::*; 235 | use core_types::*; 236 | } 237 | )?; 238 | let streamer_infos = self.streamer_infos().await?; 239 | // generate structs 240 | for el in &streamer_infos { 241 | // The structs contain comments which introduce line breaks; i.e. readable 242 | writeln!(s, "{}", el.to_struct())?; 243 | } 244 | 245 | // generate parsers 246 | for el in &streamer_infos { 247 | // The parsers have no comments, but are ugly; We introduce some 248 | // Linebreaks here to not have rustfmt choke later (doing it later 249 | // is inconvinient since the comments in the structs might contain 250 | // the patterns 251 | let parsers = el.to_named_parser().to_string(); 252 | let parsers = parsers.replace(',', ",\n"); 253 | let parsers = parsers.replace(">>", ">>\n"); 254 | // macro names are generated as my_macro ! (...) by `quote` 255 | let parsers = parsers.replace(" ! (", "!("); 256 | writeln!(s, "{}", parsers)?; 257 | } 258 | Ok(()) 259 | } 260 | } 261 | 262 | #[cfg(all(test, not(target_arch = "wasm32")))] 263 | mod test { 264 | use super::*; 265 | use std::path::Path; 266 | 267 | use nom::multi::length_value; 268 | use reqwest::Url; 269 | use tokio; 270 | 271 | const SIMPLE_FILE_REMOTE: &str = 272 | "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true"; 273 | 274 | #[tokio::test] 275 | async fn read_cms_file_remote() { 276 | let url = "http://opendata.web.cern.ch/eos/opendata/cms/hidata/HIRun2010/HIAllPhysics/RECO/ZS-v2/0000/001DA267-7243-E011-B38F-001617C3B6CE.root"; 277 | let f = RootFile::new(Url::parse(url).unwrap()).await.unwrap(); 278 | let mut s = String::new(); 279 | f.streamer_info_as_yaml(&mut s).await.unwrap(); 280 | println!("{}", s); 281 | for item in f.items() { 282 | item.as_tree().await.unwrap(); 283 | } 284 | } 285 | 286 | async fn file_header_test(source: Source) { 287 | let hdr = source 288 | .fetch(0, FILE_HEADER_SIZE) 289 | .await 290 | .and_then(|buf| { 291 | file_header(&buf) 292 | .map_err(|_| format_err!("Failed to parse file header")) 293 | .map(|(_i, o)| o) 294 | }) 295 | .unwrap(); 296 | 297 | let should = FileHeader { 298 | version: 60600, 299 | begin: 100, 300 | end: 5614, 301 | seek_free: 5559, 302 | nbytes_free: 55, 303 | n_entries_free: 1, 304 | n_bytes_name: 58, 305 | pointer_size: 4, 306 | compression: 1, 307 | seek_info: 1117, 308 | nbytes_info: 4442, 309 | uuid: Uuid::from_u128(154703765255331693287451041600576143087), 310 | seek_dir: 158, 311 | }; 312 | assert_eq!(hdr, should); 313 | } 314 | 315 | #[tokio::test] 316 | async fn file_header_test_local() { 317 | let local = Source::new(Path::new("./src/test_data/simple.root")); 318 | file_header_test(local).await; 319 | } 320 | 321 | #[tokio::test] 322 | async fn file_header_test_remote() { 323 | let remote = Source::new(Url::parse(SIMPLE_FILE_REMOTE).unwrap()); 324 | file_header_test(remote).await; 325 | } 326 | 327 | async fn directory_test(source: Source) { 328 | let hdr = source 329 | .fetch(0, FILE_HEADER_SIZE) 330 | .await 331 | .and_then(|buf| { 332 | file_header(&buf) 333 | .map_err(|_| format_err!("Failed to parse file header")) 334 | .map(|(_i, o)| o) 335 | }) 336 | .unwrap(); 337 | 338 | let dir = source 339 | .fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE) 340 | .await 341 | .and_then(|buf| { 342 | directory(&buf) 343 | .map_err(|_| format_err!("Failed to parse file header")) 344 | .map(|(_i, o)| o) 345 | }) 346 | .unwrap(); 347 | assert_eq!( 348 | dir, 349 | Directory { 350 | version: 5, 351 | c_time: 1418768412, 352 | m_time: 1418768412, 353 | n_bytes_keys: 96, 354 | n_bytes_name: 58, 355 | seek_dir: 100, 356 | // TODO: This should probably be an Option 357 | seek_parent: 0, 358 | seek_keys: 1021 359 | } 360 | ); 361 | } 362 | 363 | #[tokio::test] 364 | async fn directory_test_local() { 365 | let local = Path::new("./src/test_data/simple.root").into(); 366 | directory_test(local).await; 367 | } 368 | 369 | #[tokio::test] 370 | async fn directory_test_remote() { 371 | let remote = Source::new(Url::parse(SIMPLE_FILE_REMOTE).unwrap()); 372 | directory_test(remote).await; 373 | } 374 | 375 | async fn streamerinfo_test(source: Source) { 376 | let key = source 377 | .fetch(1117, 4446) 378 | .await 379 | .and_then(|buf| { 380 | tkey(&buf) 381 | .map_err(|_| format_err!("Failed to parse file header")) 382 | .map(|(_i, o)| o) 383 | }) 384 | .unwrap(); 385 | assert_eq!(key.hdr.obj_name, "StreamerInfo"); 386 | 387 | let key_len = key.hdr.key_len; 388 | let k_map_offset = 2; 389 | let context = Context { 390 | source: source.clone(), 391 | offset: (key_len + k_map_offset) as u64, 392 | s: key.obj, 393 | }; 394 | 395 | match length_value(checked_byte_count, |i| tlist(i, &context))(&context.s) { 396 | Ok((_, l)) => { 397 | assert_eq!(l.len(), 19); 398 | } 399 | Err(_e) => panic!("Not parsed as TList!"), 400 | }; 401 | } 402 | 403 | #[tokio::test] 404 | async fn streamerinfo_test_local() { 405 | let local = Path::new("./src/test_data/simple.root").into(); 406 | streamerinfo_test(local).await; 407 | } 408 | 409 | #[tokio::test] 410 | async fn streamerinfo_test_remote() { 411 | let remote = Url::parse( 412 | "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true") 413 | .unwrap() 414 | .into(); 415 | streamerinfo_test(remote).await; 416 | } 417 | } 418 | -------------------------------------------------------------------------------- /root-io/src/core/file_item.rs: -------------------------------------------------------------------------------- 1 | use failure::Error; 2 | use nom::multi::length_value; 3 | 4 | use crate::core::{checked_byte_count, decompress, Context, Source, TKeyHeader}; 5 | use crate::tree_reader::{ttree, Tree}; 6 | 7 | /// Describes a single item within this file (e.g. a `Tree`) 8 | #[derive(Debug)] 9 | pub struct FileItem { 10 | source: Source, 11 | tkey_hdr: TKeyHeader, 12 | } 13 | 14 | impl FileItem { 15 | /// New file item from the information in a TKeyHeader and the associated file 16 | pub(crate) fn new(tkey_hdr: &TKeyHeader, source: Source) -> FileItem { 17 | FileItem { 18 | source, 19 | tkey_hdr: tkey_hdr.to_owned(), 20 | } 21 | } 22 | 23 | /// Information about this file item in Human readable form 24 | pub fn verbose_info(&self) -> String { 25 | format!("{:#?}", self.tkey_hdr) 26 | } 27 | pub fn name(&self) -> String { 28 | format!( 29 | "`{}` of type `{}`", 30 | self.tkey_hdr.obj_name, self.tkey_hdr.class_name 31 | ) 32 | } 33 | 34 | async fn get_buffer(&self) -> Result, Error> { 35 | let start = self.tkey_hdr.seek_key + self.tkey_hdr.key_len as u64; 36 | let len = self.tkey_hdr.total_size - self.tkey_hdr.key_len as u32; 37 | let comp_buf = self.source.fetch(start, len as u64).await?; 38 | 39 | let buf = if self.tkey_hdr.total_size < self.tkey_hdr.uncomp_len { 40 | // Decompress the read buffer; buf is Vec 41 | let (_, buf) = decompress(comp_buf.as_slice()).unwrap(); 42 | buf 43 | } else { 44 | comp_buf 45 | }; 46 | Ok(buf) 47 | } 48 | 49 | pub(crate) async fn get_context<'s>(&self) -> Result { 50 | let buffer = self.get_buffer().await?; 51 | let k_map_offset = 2; 52 | Ok(Context { 53 | source: self.source.clone(), 54 | offset: (self.tkey_hdr.key_len + k_map_offset) as u64, 55 | s: buffer, 56 | }) 57 | } 58 | 59 | /// Parse this `FileItem` as a `Tree` 60 | pub async fn as_tree(&self) -> Result { 61 | let ctx = self.get_context().await?; 62 | let buf = ctx.s.as_slice(); 63 | 64 | let res = length_value(checked_byte_count, |i| ttree(i, &ctx))(buf); 65 | match res { 66 | Ok((_, obj)) => Ok(obj), 67 | Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { 68 | Err(format_err!("Supplied parser failed! {:?}", e)) 69 | } 70 | _ => panic!(), 71 | } 72 | } 73 | } 74 | 75 | #[cfg(all(test, not(target_arch = "wasm32")))] 76 | mod tests { 77 | use crate::core::RootFile; 78 | use std::path::Path; 79 | 80 | #[tokio::test] 81 | async fn open_simple() { 82 | let path = Path::new("./src/test_data/simple.root"); 83 | let f = RootFile::new(path).await.expect("Failed to open file"); 84 | assert_eq!(f.items().len(), 1); 85 | assert_eq!(f.items()[0].tkey_hdr.obj_name, "tree"); 86 | // Only streamers; not rules 87 | assert_eq!(f.streamer_infos().await.unwrap().len(), 18); 88 | } 89 | 90 | #[tokio::test] 91 | #[cfg(not(target_arch = "wasm32"))] 92 | async fn open_esd() { 93 | use alice_open_data; 94 | let path = alice_open_data::test_file().unwrap(); 95 | 96 | let f = RootFile::new(path.as_path()) 97 | .await 98 | .expect("Failed to open file"); 99 | 100 | assert_eq!(f.items().len(), 2); 101 | assert_eq!(f.items()[0].tkey_hdr.obj_name, "esdTree"); 102 | assert_eq!(f.items()[1].tkey_hdr.obj_name, "HLTesdTree"); 103 | assert_eq!(f.streamer_infos().await.unwrap().len(), 87); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /root-io/src/core/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module contains the core structs and parsers needed to read 2 | //! the self-description of a root file. These parsers can be used to 3 | //! build new parsers using the [root-ls](https://github.com/cbourjau/alice-rs) cli. 4 | 5 | mod data_source; 6 | mod file; 7 | mod file_item; 8 | pub mod parsers; 9 | mod tkey; 10 | mod tstreamer; 11 | mod tstreamerinfo; 12 | mod typeid; 13 | pub mod types; 14 | 15 | pub(crate) use self::parsers::*; 16 | pub(crate) use self::tkey::*; 17 | pub(crate) use self::tstreamer::{tstreamer, TStreamer}; 18 | pub(crate) use self::tstreamerinfo::{tstreamerinfo, TStreamerInfo}; 19 | pub(crate) use self::typeid::*; 20 | pub(crate) use self::types::*; 21 | 22 | pub use self::data_source::Source; 23 | pub use self::file::RootFile; 24 | pub use self::file_item::FileItem; 25 | -------------------------------------------------------------------------------- /root-io/src/core/tkey.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::take, combinator::map, multi::length_count, number::complete::*, IResult, 3 | }; 4 | 5 | use crate::core::*; 6 | 7 | #[derive(Debug, Clone)] 8 | #[allow(dead_code)] 9 | pub struct TKeyHeader { 10 | pub(crate) total_size: u32, 11 | version: u16, 12 | pub(crate) uncomp_len: u32, 13 | datime: u32, 14 | pub(crate) key_len: i16, 15 | cycle: i16, 16 | pub(crate) seek_key: SeekPointer, 17 | seek_pdir: SeekPointer, 18 | pub(crate) class_name: String, 19 | pub(crate) obj_name: String, 20 | obj_title: String, 21 | } 22 | 23 | /// A `TKey` wraps a streamed oject. The object is decompress when 24 | /// reading from disc if applicable. 25 | #[derive(Debug)] 26 | pub struct TKey { 27 | pub(crate) hdr: TKeyHeader, 28 | pub(crate) obj: Vec, 29 | } 30 | 31 | /// Header of a TKey Usually, TKeys are followed up by their 32 | /// content, but there is one "index" in ever root file where only the 33 | /// TKey headers are stored for faster later `Seek`ing 34 | pub fn tkey_header(input: &[u8]) -> nom::IResult<&[u8], TKeyHeader> { 35 | let (input, total_size) = be_u32(input)?; 36 | let (input, version) = be_u16(input)?; 37 | let (input, uncomp_len) = be_u32(input)?; 38 | let (input, datime) = be_u32(input)?; 39 | let (input, key_len) = be_i16(input)?; 40 | let (input, cycle) = be_i16(input)?; 41 | let (input, seek_key) = seek_point(input, version)?; 42 | let (input, seek_pdir) = seek_point(input, version)?; 43 | let (input, class_name) = string(input)?; 44 | let (input, obj_name) = string(input)?; 45 | let (input, obj_title) = string(input)?; 46 | Ok(( 47 | input, 48 | TKeyHeader { 49 | total_size, 50 | version, 51 | uncomp_len, 52 | datime, 53 | key_len, 54 | cycle, 55 | seek_key, 56 | seek_pdir, 57 | class_name, 58 | obj_name, 59 | obj_title, 60 | }, 61 | )) 62 | } 63 | 64 | /// Parse a file-pointer based on the version of the file 65 | fn seek_point(input: &[u8], version: u16) -> nom::IResult<&[u8], u64> { 66 | if version > 1000 { 67 | be_u64(input) 68 | } else { 69 | map(be_u32, u64::from)(input) 70 | } 71 | } 72 | 73 | /// Parse a full TKey including its payload 74 | pub fn tkey(input: &[u8]) -> nom::IResult<&[u8], TKey> { 75 | let (input, hdr) = tkey_header(input)?; 76 | let (input, obj) = take(hdr.total_size - hdr.key_len as u32)(input)?; 77 | let obj = if hdr.uncomp_len as usize > obj.len() { 78 | decompress(obj).unwrap().1 79 | } else { 80 | obj.to_vec() 81 | }; 82 | Ok((input, TKey { hdr, obj })) 83 | } 84 | 85 | /// Special thing for the keylist in the file header 86 | pub(crate) fn tkey_headers(input: &[u8]) -> IResult<&[u8], Vec> { 87 | length_count(be_u32, tkey_header)(input) 88 | } 89 | -------------------------------------------------------------------------------- /root-io/src/core/tstreamerinfo.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use nom::{combinator::eof, multi::length_value, number::complete::*, IResult}; 4 | 5 | use quote::*; 6 | 7 | use crate::{ 8 | code_gen::rust::{ToNamedRustParser, ToRustParser, ToRustStruct, ToRustType}, 9 | code_gen::utils::type_is_core, 10 | core::*, 11 | }; 12 | 13 | #[derive(Debug)] 14 | #[allow(dead_code)] 15 | pub struct TStreamerInfo { 16 | tstreamerinfo_ver: u16, 17 | named: TNamed, 18 | checksum: u32, 19 | new_class_version: u32, 20 | data_members: Vec, 21 | } 22 | 23 | /// Parse one `TStreamerInfo` object (as found in the `TList`) 24 | pub(crate) fn tstreamerinfo<'s>( 25 | i: &'s [u8], 26 | context: &'s Context, 27 | ) -> IResult<&'s [u8], TStreamerInfo> { 28 | let parse_members = |i| tobjarray(|raw_obj, _context| tstreamer(raw_obj), i, context); 29 | 30 | let (i, tstreamerinfo_ver) = be_u16(i)?; 31 | let (i, named) = length_value(checked_byte_count, tnamed)(i)?; 32 | let (i, checksum) = be_u32(i)?; 33 | let (i, new_class_version) = be_u32(i)?; 34 | let (i, _size_tobjarray_with_class_info) = checked_byte_count(i)?; 35 | let (i, _class_info_objarray) = classinfo(i)?; 36 | let (i, data_members) = length_value(checked_byte_count, parse_members)(i)?; 37 | let (i, _eof) = eof(i)?; 38 | Ok(( 39 | i, 40 | TStreamerInfo { 41 | tstreamerinfo_ver, 42 | named, 43 | checksum, 44 | new_class_version, 45 | data_members, 46 | }, 47 | )) 48 | } 49 | 50 | impl ToRustParser for TStreamerInfo { 51 | /// Generate a parser that can parse an an object described by this TStreamer 52 | #[rustfmt::skip::macros(do_parse)] 53 | fn to_inline_parser(&self) -> Tokens { 54 | if type_is_core(self.named.name.as_str()) { 55 | // Don't generate a parser if its a core type 56 | return quote!(#(self.named.name.to_lowercase())); 57 | } 58 | let struct_name = Ident::new(self.named.name.as_str()); 59 | let member_names: &Vec = 60 | &self.data_members.iter().map(|m| m.member_name()).collect(); 61 | let member_parsers: &Vec = &self 62 | .data_members 63 | .iter() 64 | .map(|m| m.to_inline_parser()) 65 | .collect(); 66 | quote! { 67 | do_parse!(ver: be_u16 >> 68 | #(#member_names : #member_parsers >> )* 69 | ({let phantom = PhantomData; 70 | #struct_name { 71 | phantom, 72 | ver, 73 | #(#member_names),* 74 | }}) 75 | )} 76 | } 77 | } 78 | 79 | impl ToNamedRustParser for TStreamerInfo { 80 | fn parser_name(&self) -> Tokens { 81 | let ret = Ident::new(self.named.name.to_lowercase()); 82 | quote!(#ret) 83 | } 84 | 85 | fn to_named_parser(&self) -> Tokens { 86 | if type_is_core(self.named.name.as_str()) { 87 | // Don't generate a parser if its a core type 88 | return quote! {}; 89 | } 90 | let parser_name = self.parser_name(); 91 | let parser = self.to_inline_parser(); 92 | let struct_name = self.type_name(); 93 | quote! { 94 | pub fn #parser_name<'s>(input: &'s[u8], context: &'s Context<'s>) 95 | -> IResult<&'s[u8], #struct_name<'s>> { 96 | value!(input, #parser) 97 | } 98 | } 99 | } 100 | } 101 | 102 | impl ToRustStruct for TStreamerInfo { 103 | /// Generate a struct corresponding to this TStreamerInfo 104 | fn to_struct(&self) -> Tokens { 105 | if type_is_core(self.named.name.as_str()) { 106 | return quote! {}; 107 | } 108 | let name = self.type_name(); 109 | let fields = &self.data_members; 110 | let ver_comment = self.type_doc(); 111 | quote! { 112 | #[derive(Debug)] 113 | pub struct #name<'s> { 114 | /// Gurantee that this object does not outlive its underlying slice 115 | phantom: PhantomData<&'s[u8]>, 116 | #ver_comment 117 | ver: u16, 118 | #(#fields), * 119 | } 120 | } 121 | } 122 | } 123 | 124 | impl ToRustType for TStreamerInfo { 125 | fn type_doc(&self) -> Tokens { 126 | let ret = Ident::new("\n/// Version of the read layout\n"); 127 | quote!(#ret) 128 | } 129 | fn type_name(&self) -> Tokens { 130 | let ret = Ident::new(self.named.name.as_str()); 131 | quote!(#ret) 132 | } 133 | } 134 | 135 | impl TStreamerInfo { 136 | pub(crate) fn to_yaml(&self) -> String { 137 | if type_is_core(self.named.name.as_str()) { 138 | return "".to_string(); 139 | }; 140 | let mut s = "".to_string(); 141 | s += format!("{}:\n", self.named.name).as_str(); 142 | s += format!(" version: {}\n", self.new_class_version).as_str(); 143 | s += " members:\n"; 144 | for obj in &self.data_members { 145 | s += format!(" # {}\n", obj.member_comment()).as_str(); 146 | s += format!(" {}: {}\n", obj.member_name(), obj.type_name()).as_str(); 147 | } 148 | s += "\n"; 149 | s 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /root-io/src/core/typeid.rs: -------------------------------------------------------------------------------- 1 | use std::f64::consts::PI; 2 | 3 | use failure::Error; 4 | use quote::*; 5 | use regex::Regex; 6 | 7 | use crate::code_gen::rust::{ToRustParser, ToRustType}; 8 | 9 | /// Integer ID describing a streamed type in a `TStreamer` 10 | #[derive(Debug, Clone)] 11 | pub(crate) enum TypeID { 12 | InvalidOrCounter(i32), 13 | Primitive(PrimitiveID), 14 | Offset(PrimitiveID), 15 | Array(PrimitiveID), 16 | Base, 17 | Object, 18 | Named, 19 | TObject, 20 | Any, 21 | Objectp, 22 | ObjectP, 23 | String, 24 | AnyP, 25 | Stl, 26 | StlString, 27 | Streamer, 28 | Unknown(i32), 29 | } 30 | 31 | /// Type of a streamed STL container 32 | #[derive(Debug, Clone)] 33 | pub(crate) enum StlTypeID { 34 | Vector, 35 | Bitset, 36 | String, 37 | Map, 38 | MultiMap, 39 | } 40 | 41 | /// ID describing a primitive type. This is a subset (1..19) of the integers used for `TypeID`. 42 | #[derive(Debug, Clone)] 43 | pub(crate) enum PrimitiveID { 44 | KChar, // 1 => "i8" 45 | KShort, // 2 => "i16" 46 | KInt, // 3 => i32 47 | KCounter, // 6 => i32 48 | KLong, // 4 => "i64" 49 | KFloat, // 5 => "f32" 50 | KCharStar, // 7 => "&'s str" 51 | KDouble, // 8 => "f64" 52 | KDouble32(f64, f64, u32), // 9 => "f64" 53 | KLegacyChar, // 10 => unimplemented!() 54 | KUChar, // 11 => "u8" 55 | KUShort, // 12 => "u16" 56 | KUInt, // 13 => "u32" 57 | KULong, // 14 => "u64" 58 | KBits, // 15 => "u32" 59 | KLong64, // 16 => "i64" 60 | KULong64, // 17 => "u64" 61 | KBool, // 18 => "u8" 62 | KFloat16, // 19 => unimplemented!() 63 | } 64 | 65 | impl PrimitiveID { 66 | fn new(id: i32, comment_str: &str) -> Result { 67 | use PrimitiveID::*; 68 | Ok(match id { 69 | 1 => KChar, 70 | 2 => KShort, 71 | 3 => KInt, 72 | 6 => KCounter, 73 | 4 => KLong, 74 | 5 => KFloat, 75 | 7 => KCharStar, 76 | 8 => KDouble, 77 | 9 => { 78 | // https://root.cern/doc/master/classTBufferFile.html#acdff906aa 79 | let re = Regex::new(r"^(\s*\[\w+\]\s*)?\[([^,]+),([^,]+)(,([^,]+))?\]").unwrap(); 80 | match re.captures(comment_str) { 81 | Some(caps) => { 82 | let (min, max, nbits) = ( 83 | evaluate_range_element(&caps[2])?, 84 | evaluate_range_element(&caps[3])?, 85 | match caps.get(5) { 86 | Some(cap) => cap.as_str().trim().parse().map(|val| { 87 | if !(2..=32).contains(&val) { 88 | 32 89 | } else { 90 | val 91 | } 92 | })?, 93 | None => 32, 94 | }, 95 | ); 96 | let mod_min = { 97 | if min >= max && nbits < 15 { 98 | nbits as f64 + 0.1 99 | } else { 100 | min 101 | } 102 | }; 103 | 104 | KDouble32(mod_min, max, nbits) 105 | } 106 | // No range specified. This is a normal f32. 107 | None => KFloat, 108 | } 109 | } 110 | 10 => KLegacyChar, 111 | 11 => KUChar, 112 | 12 => KUShort, 113 | 13 => KUInt, 114 | 14 => KULong, 115 | 15 => KBits, 116 | 16 => KLong64, 117 | 17 => KULong64, 118 | 18 => KBool, 119 | 19 => KFloat16, 120 | id => Err(format_err!("Invalid base type id {}", id))?, 121 | }) 122 | } 123 | } 124 | 125 | impl TypeID { 126 | pub(crate) fn new(id: i32, comment_str: &str) -> Result { 127 | use self::TypeID::*; 128 | Ok(match id { 129 | // -1 may mean that this branch / leaf has no data, or that it has an elements-per-entry array... 130 | -1 => InvalidOrCounter(id), 131 | 0 => Base, 132 | id @ 1..=19 => Primitive(PrimitiveID::new(id, comment_str)?), 133 | id @ 21..=39 => Offset(PrimitiveID::new(id - 20, comment_str)?), 134 | id @ 41..=59 => Array(PrimitiveID::new(id - 40, comment_str)?), 135 | 61 => Object, 136 | 62 => Any, 137 | 63 => Objectp, 138 | 64 => ObjectP, 139 | 65 => String, 140 | 66 => TObject, 141 | 67 => Named, 142 | 69 => AnyP, 143 | 300 => Stl, 144 | 365 => StlString, 145 | 500 => Streamer, 146 | id => Unknown(id), 147 | }) 148 | } 149 | } 150 | 151 | impl StlTypeID { 152 | pub(crate) fn new(id: i32) -> StlTypeID { 153 | match id { 154 | 1 => StlTypeID::Vector, 155 | 4 => StlTypeID::Map, 156 | 5 => StlTypeID::MultiMap, 157 | 8 => StlTypeID::Bitset, 158 | 365 => StlTypeID::String, 159 | _ => unimplemented!("`StlTypeID` {} not implemented.", id), 160 | } 161 | } 162 | } 163 | 164 | impl ToRustType for TypeID { 165 | fn type_name(&self) -> Tokens { 166 | use self::TypeID::*; 167 | let t = match self { 168 | Primitive(ref id) | Offset(ref id) => id.type_name().to_string(), 169 | Array(ref id) => format!("Vec<{}>", id.type_name()), 170 | // "kObjectP"; might be null! 171 | ObjectP => "Option>".to_string(), 172 | String => "String".to_string(), 173 | // Some funky things which we just treat as byte strings for now 174 | Object | Stl | StlString | Streamer | Unknown(82) => "Vec".to_string(), 175 | Any => "Vec".to_string(), 176 | AnyP => "Vec".to_string(), 177 | InvalidOrCounter(-1) => "u32".to_string(), 178 | _ => panic!("{:?}: type not implemented, yet", self), 179 | }; 180 | let t = Ident::new(t); 181 | quote!(#t) 182 | } 183 | } 184 | 185 | impl ToRustParser for PrimitiveID { 186 | fn to_inline_parser(&self) -> Tokens { 187 | match self { 188 | PrimitiveID::KChar => quote! {nom::number::complete::be_i8}, 189 | PrimitiveID::KShort => quote! {nom::number::complete::be_i16}, 190 | PrimitiveID::KInt => quote! {nom::number::complete::be_i32}, 191 | PrimitiveID::KCounter => quote! {nom::number::complete::be_i32}, 192 | PrimitiveID::KLong => quote! {nom::number::complete::be_i64}, 193 | PrimitiveID::KFloat => quote! {nom::number::complete::be_f32}, 194 | PrimitiveID::KCharStar => quote! { c_string }, 195 | PrimitiveID::KDouble => quote! {nom::number::complete::be_f64}, 196 | // This one is nasty! Check the 197 | // TFileBuffer.cxx sources in ROOT and: 198 | // https://root.cern/root/html606/classTBufferFile.html#a44c2adb6fb1194ec999b84aed259e5bc 199 | // and 200 | // https://root.cern/root/html606/TStreamerElement_8cxx.html#a4d6c86845bee19cf28c93a531ec50f29 201 | PrimitiveID::KDouble32(min, max, nbits) => { 202 | quote!(parse_custom_mantissa(#min, #max, #nbits)) 203 | } 204 | PrimitiveID::KLegacyChar => unimplemented!("{:?}: type not implemented, yet", self), 205 | PrimitiveID::KUChar => quote! {nom::number::complete::be_u8}, 206 | PrimitiveID::KUShort => quote! {nom::number::complete::be_u16}, 207 | PrimitiveID::KUInt => quote! {nom::number::complete::be_u32}, 208 | PrimitiveID::KULong => quote! {nom::number::complete::be_u64}, 209 | PrimitiveID::KBits => quote! {nom::number::complete::be_u32}, 210 | PrimitiveID::KLong64 => quote! {nom::number::complete::be_i64}, 211 | PrimitiveID::KULong64 => quote! {nom::number::complete::be_u64}, 212 | PrimitiveID::KBool => quote! {nom::number::complete::be_u8}, 213 | PrimitiveID::KFloat16 => quote! {custom_float16}, 214 | } 215 | } 216 | } 217 | 218 | impl PrimitiveID { 219 | pub(crate) fn type_name_str(&self) -> &str { 220 | use PrimitiveID::*; 221 | match self { 222 | KChar => "i8", 223 | KShort => "i16", 224 | KInt => "i32", 225 | KCounter => "i32", 226 | KLong => "i64", 227 | KFloat => "f32", 228 | KCharStar => "&'s str", 229 | KDouble => "f64", 230 | KDouble32(_, _, _) => "f64", 231 | KLegacyChar => unimplemented!("{:?}: type not implemented", self), 232 | KUChar => "u8", 233 | KUShort => "u16", 234 | KUInt => "u32", 235 | KULong => "u64", 236 | KBits => "u32", 237 | KLong64 => "i64", 238 | KULong64 => "u64", 239 | KBool => "u8", 240 | KFloat16 => "f16", 241 | } 242 | } 243 | } 244 | 245 | impl ToRustType for PrimitiveID { 246 | fn type_name(&self) -> Tokens { 247 | let t = Ident::new(self.type_name_str()); 248 | quote!(#t) 249 | } 250 | } 251 | 252 | fn remove_whitespace(s: &str) -> String { 253 | s.chars().filter(|c| !c.is_whitespace()).collect() 254 | } 255 | 256 | /// Very primitve logic for evaluating comment ranges 257 | fn evaluate_range_element(comment_str: &str) -> Result { 258 | // Remove all whites spaces 259 | let comment_string = remove_whitespace(comment_str); 260 | let comment_str = comment_string.as_str(); 261 | 262 | // Is a simple float 263 | if let Ok(float) = comment_str.parse() { 264 | return Ok(float); 265 | } 266 | 267 | // Might contain "pi" 268 | let (negate, comment_str) = { 269 | match comment_str.strip_prefix('-') { 270 | Some(rest) => (true, rest), 271 | None => (false, comment_str), 272 | } 273 | }; 274 | let val = match comment_str { 275 | "2pi" | "2*pi" | "twopi" => 2. * PI, 276 | "pi/2" => PI / 2., 277 | "pi/4" => PI / 4., 278 | "pi" => PI, 279 | s => Err(format_err!("Unrecognized element in comment string {}", s))?, 280 | }; 281 | 282 | if negate { 283 | Ok(-val) 284 | } else { 285 | Ok(val) 286 | } 287 | } 288 | -------------------------------------------------------------------------------- /root-io/src/core/types.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use crate::core::Source; 4 | 5 | use nom::HexDisplay; 6 | 7 | /// Absolute point in file to seek data 8 | pub(crate) type SeekPointer = u64; 9 | 10 | bitflags! { 11 | pub(crate) struct Flags: u32 { 12 | const BYTE_COUNT_MASK = 0x4000_0000; 13 | const BYTE_COUNT_VMASK = 0x4000; // 16384 14 | const CLASS_MASK = 0x8000_0000; 15 | const NEW_CLASSTAG = 0xFFFF_FFFF; 16 | } 17 | } 18 | bitflags! { 19 | pub(crate) struct TObjectFlags: u32 { 20 | const IS_ON_HEAP = 0x0100_0000; 21 | const IS_REFERENCED = 1 << 4; 22 | } 23 | } 24 | 25 | /// Used in `TStreamerInfo` 26 | /// Describes if the following entry is a new class or if it was 27 | /// already described. 28 | #[derive(Debug)] 29 | pub enum ClassInfo<'s> { 30 | /// Class name of the new class 31 | New(&'s str), 32 | /// Byte offset of new class tag in record, + 2; whatever... followed by object 33 | Exists(u32), 34 | /// Byte offset of new class tag in record, + 2; whatever... ref to object 35 | References(u32), 36 | } 37 | 38 | /// The most basic ROOT object from which almost everything inherits 39 | #[derive(Debug, Clone)] 40 | #[allow(dead_code)] 41 | pub struct TObject { 42 | pub(crate) ver: u16, 43 | pub(crate) id: u32, 44 | pub(crate) bits: TObjectFlags, 45 | } 46 | 47 | /// A ROOT object with a name and a title 48 | #[derive(Debug, Clone)] 49 | pub struct TNamed { 50 | // pub(crate) ver: u16, 51 | // pub(crate) tobj: TObject, 52 | pub name: String, 53 | pub title: String, 54 | } 55 | 56 | /// A type holding nothing but the original data and a class info object 57 | pub struct Raw<'s> { 58 | pub(crate) classinfo: &'s str, 59 | pub(crate) obj: &'s [u8], 60 | } 61 | 62 | /// The context from which we are currently parsing 63 | #[derive(Debug)] 64 | pub struct Context { 65 | /// Path to file of this context 66 | pub(crate) source: Source, 67 | /// Offset between the beginning of `s` and to where absolute 68 | /// positions in the buffer point (e.g. for class defs) 69 | /// Usually something like TKey-length + 4 70 | pub(crate) offset: u64, 71 | /// The full buffer we are working on 72 | pub(crate) s: Vec, 73 | } 74 | 75 | impl<'s> fmt::Debug for Raw<'s> { 76 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 77 | write!(f, "{} \n {}", self.classinfo, self.obj.to_hex(16)) 78 | } 79 | } 80 | 81 | // Types which are so far unused: 82 | // pub type TArrayD = Vec; 83 | // pub type TArrayI = Vec; 84 | // pub type TArrayL64 = Vec; 85 | -------------------------------------------------------------------------------- /root-io/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Root-io 2 | //! This crate provides a way to retrieve data saved in the 3 | //! [ROOT](https://root.cern.ch/) binary format commonly used in 4 | //! particle physics experiments. This library provides the basic 5 | //! means to inspect and process the contents of arbitrary ROOT 6 | //! files. `Root-io` provides a simple mean to read 7 | //! data stored in so-called `TTrees`. The goal of this library is 8 | //! primarily to make the data [published](http://opendata.cern.ch/) 9 | //! by the ALICE collaboration accessible in pure Rust. An example of 10 | //! its usage for that purpose is demonstrated as an [example 11 | //! analysis](https://github.com/cbourjau/alice-rs/tree/master/examples/simple-analysis). 12 | //! 13 | //! The API surface is deliberately small to make the processing of said 14 | //! files as easy as possible. If you are looking for a particular 15 | //! parser chances have it that it exists but it is not marked as `pub`. 16 | #![allow(clippy::cognitive_complexity)] 17 | #![recursion_limit = "256"] 18 | #[macro_use] 19 | extern crate bitflags; 20 | extern crate nom; 21 | #[macro_use] 22 | extern crate quote; 23 | #[macro_use] 24 | extern crate failure; 25 | extern crate flate2; 26 | extern crate lzma_rs; 27 | extern crate reqwest; 28 | 29 | extern crate alice_open_data; 30 | 31 | // pub mod core_types; 32 | mod code_gen; 33 | pub mod core; 34 | pub mod test_utils; 35 | mod tests; 36 | pub mod tree_reader; 37 | 38 | // Contains the stream_zip macro 39 | pub mod utils; 40 | 41 | pub use crate::core::{FileItem, RootFile, Source}; 42 | 43 | /// Offset when using Context; should be in `Context`, maybe? 44 | const MAP_OFFSET: u64 = 2; 45 | -------------------------------------------------------------------------------- /root-io/src/test_data/HZZ-lz4.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/HZZ-lz4.root -------------------------------------------------------------------------------- /root-io/src/test_data/HZZ-lzma.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/HZZ-lzma.root -------------------------------------------------------------------------------- /root-io/src/test_data/HZZ-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/HZZ-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/HZZ-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/HZZ-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/HZZ.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/HZZ.root -------------------------------------------------------------------------------- /root-io/src/test_data/README.md: -------------------------------------------------------------------------------- 1 | This directory contains binary ROOT files for testing purposes. They where primarily taken from the [uproot project]() and from the [ALICE public data](http://opendata.cern.ch). 2 | -------------------------------------------------------------------------------- /root-io/src/test_data/Zmumu-lz4.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/Zmumu-lz4.root -------------------------------------------------------------------------------- /root-io/src/test_data/Zmumu-lzma.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/Zmumu-lzma.root -------------------------------------------------------------------------------- /root-io/src/test_data/Zmumu-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/Zmumu-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/Zmumu-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/Zmumu-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/Zmumu.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/Zmumu.root -------------------------------------------------------------------------------- /root-io/src/test_data/foriter.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/foriter.root -------------------------------------------------------------------------------- /root-io/src/test_data/foriter2.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/foriter2.root -------------------------------------------------------------------------------- /root-io/src/test_data/mc10events.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/mc10events.root -------------------------------------------------------------------------------- /root-io/src/test_data/nesteddirs.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/nesteddirs.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.23.02-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.23.02-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.23.02-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.23.02-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.24.00-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.24.00-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.24.00-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.24.00-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.25.02-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.25.02-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.25.02-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.25.02-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.26.00-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.26.00-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.26.00-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.26.00-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.27.02-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.27.02-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.27.02-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.27.02-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.28.00-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.28.00-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.28.00-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.28.00-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.29.02-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.29.02-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.29.02-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.29.02-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.30.00-lzma.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.30.00-lzma.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.30.00-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.30.00-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-5.30.00-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-5.30.00-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.08.04-lzma.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.08.04-lzma.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.08.04-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.08.04-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.08.04-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.08.04-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.10.05-lz4.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.10.05-lz4.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.10.05-lzma.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.10.05-lzma.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.10.05-uncompressed.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.10.05-uncompressed.root -------------------------------------------------------------------------------- /root-io/src/test_data/sample-6.10.05-zlib.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/sample-6.10.05-zlib.root -------------------------------------------------------------------------------- /root-io/src/test_data/simple.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/simple.root -------------------------------------------------------------------------------- /root-io/src/test_data/small-evnt-tree-fullsplit.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/small-evnt-tree-fullsplit.root -------------------------------------------------------------------------------- /root-io/src/test_data/small-flat-tree.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbourjau/alice-rs/95cfac87b67aa6904ce6880919860c72ce33dd8b/root-io/src/test_data/small-flat-tree.root -------------------------------------------------------------------------------- /root-io/src/test_utils.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | pub use cfg_gated::log; 4 | 5 | #[cfg(target_arch = "wasm32")] 6 | mod cfg_gated { 7 | use wasm_bindgen::JsValue; 8 | use web_sys; 9 | 10 | /// Print a debuggable object to the console 11 | pub fn log(thing: D) { 12 | let s = format!("{:?}", thing); 13 | web_sys::console::log_1(&JsValue::from_str(&s)); 14 | } 15 | } 16 | 17 | #[cfg(not(target_arch = "wasm32"))] 18 | mod cfg_gated { 19 | pub fn log(thing: D) { 20 | std::dbg!(thing); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /root-io/src/tests/basic_io.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(target_arch = "wasm32"))] 2 | 3 | use crate::core::*; 4 | use std::path::PathBuf; 5 | 6 | #[test] 7 | fn list_of_rules() { 8 | let s = &[ 9 | 64, 0, 1, 130, 255, 255, 255, 255, 84, 76, 105, 115, 116, 0, 64, 0, 1, 116, 0, 5, 0, 1, 0, 10 | 0, 0, 0, 2, 0, 64, 0, 11, 108, 105, 115, 116, 79, 102, 82, 117, 108, 101, 115, 0, 0, 0, 2, 11 | 64, 0, 0, 182, 255, 255, 255, 255, 84, 79, 98, 106, 83, 116, 114, 105, 110, 103, 0, 64, 0, 12 | 0, 163, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 150, 116, 121, 112, 101, 61, 114, 101, 97, 100, 13 | 32, 115, 111, 117, 114, 99, 101, 67, 108, 97, 115, 115, 61, 34, 84, 84, 114, 101, 101, 34, 14 | 32, 116, 97, 114, 103, 101, 116, 67, 108, 97, 115, 115, 61, 34, 84, 84, 114, 101, 101, 34, 15 | 32, 118, 101, 114, 115, 105, 111, 110, 61, 34, 91, 45, 49, 54, 93, 34, 32, 115, 111, 117, 16 | 114, 99, 101, 61, 34, 34, 32, 116, 97, 114, 103, 101, 116, 61, 34, 102, 68, 101, 102, 97, 17 | 117, 108, 116, 69, 110, 116, 114, 121, 79, 102, 102, 115, 101, 116, 76, 101, 110, 34, 32, 18 | 99, 111, 100, 101, 61, 34, 123, 32, 102, 68, 101, 102, 97, 117, 108, 116, 69, 110, 116, 19 | 114, 121, 79, 102, 102, 115, 101, 116, 76, 101, 110, 32, 61, 32, 49, 48, 48, 48, 59, 32, 20 | 125, 34, 32, 0, 64, 0, 0, 152, 128, 0, 55, 57, 64, 0, 0, 144, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 21 | 0, 0, 131, 116, 121, 112, 101, 61, 114, 101, 97, 100, 32, 115, 111, 117, 114, 99, 101, 67, 22 | 108, 97, 115, 115, 61, 34, 84, 84, 114, 101, 101, 34, 32, 116, 97, 114, 103, 101, 116, 67, 23 | 108, 97, 115, 115, 61, 34, 84, 84, 114, 101, 101, 34, 32, 118, 101, 114, 115, 105, 111, 24 | 110, 61, 34, 91, 45, 49, 56, 93, 34, 32, 115, 111, 117, 114, 99, 101, 61, 34, 34, 32, 116, 25 | 97, 114, 103, 101, 116, 61, 34, 102, 78, 67, 108, 117, 115, 116, 101, 114, 82, 97, 110, 26 | 103, 101, 34, 32, 99, 111, 100, 101, 61, 34, 123, 32, 102, 78, 67, 108, 117, 115, 116, 101, 27 | 114, 82, 97, 110, 103, 101, 32, 61, 32, 48, 59, 32, 125, 34, 32, 0, 0, 28 | ]; 29 | // Should not be needed; just some dummy 30 | let context = Context { 31 | source: PathBuf::from("").into(), 32 | offset: 0, 33 | s: vec![], 34 | }; 35 | use nom::HexDisplay; 36 | println!("{}", s.to_hex(16)); 37 | let (_, (_name, obj)) = class_name_and_buffer(s, &context).unwrap(); 38 | println!("{}", obj.to_hex(16)); 39 | let (obj, _ci) = classinfo(obj).unwrap(); 40 | println!("{:?}", _ci); 41 | println!("{}", obj.to_hex(16)); 42 | // let (_obj, l) = tlist(obj, &context).unwrap(); 43 | // assert_eq!(l.name, "listOfRules"); 44 | // assert_eq!(l.len, 2); 45 | } 46 | -------------------------------------------------------------------------------- /root-io/src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod basic_io; 3 | -------------------------------------------------------------------------------- /root-io/src/tree_reader/branch.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use futures::prelude::*; 4 | use nom::{ 5 | combinator::verify, 6 | multi::{count, length_data, length_value}, 7 | number::complete::*, 8 | sequence::preceded, 9 | IResult, 10 | }; 11 | 12 | use crate::{ 13 | code_gen::rust::ToRustType, core::parsers::*, core::types::*, 14 | tree_reader::container::Container, tree_reader::leafs::TLeaf, 15 | }; 16 | 17 | /// A `TBranch` describes one "Column" of a `TTree` 18 | /// Even though this class is described in the `TStreamerInfo` of a ROOT 19 | /// file, it is hard coded in this library to provide a reliable API 20 | /// for working with `TTree`s 21 | #[derive(Debug, Clone)] 22 | #[allow(dead_code)] 23 | pub struct TBranch { 24 | /// The name of this object 25 | pub name: String, 26 | /// Compression level and algorithm 27 | fcompress: i32, 28 | /// Initial Size of Basket Buffer 29 | fbasketsize: i32, 30 | /// Initial Length of fEntryOffset table in the basket buffers 31 | fentryoffsetlen: i32, 32 | /// Last basket number written 33 | fwritebasket: i32, 34 | /// Current entry number (last one filled in this branch) 35 | fentrynumber: i64, 36 | /// Offset of this branch 37 | foffset: i32, 38 | /// Branch split level 39 | fsplitlevel: i32, 40 | /// Number of entries 41 | fentries: i64, 42 | /// Number of the first entry in this branch 43 | ffirstentry: i64, 44 | /// Total number of bytes in all leaves before compression 45 | ftotbytes: i64, 46 | /// Total number of bytes in all leaves after compression 47 | fzipbytes: i64, 48 | /// -> List of Branches of this branch 49 | fbranches: Vec, 50 | /// -> List of leaves of this branch (TODO: Parse to TLeafC/I/F..) 51 | fleaves: Vec, 52 | /// Table of first entry in each basket 53 | fbasketentry: Vec, 54 | containers: Vec, 55 | } 56 | 57 | impl TBranch { 58 | /// Return the endpoints of all sub-branches of this branch 59 | pub fn branches(&self) -> Vec<&TBranch> { 60 | let out: Vec<_> = self.fbranches.iter().flat_map(|b| b.branches()).collect(); 61 | if out.is_empty() { 62 | vec![self] 63 | } else { 64 | out 65 | } 66 | } 67 | 68 | /// Access to the `Containers` containing the data of this branch 69 | pub(crate) fn containers(&self) -> &[Container] { 70 | &self.containers 71 | } 72 | 73 | /// The name of this branch 74 | pub fn name(&self) -> String { 75 | self.name.to_owned() 76 | } 77 | 78 | /// The type(s) of the elements in this branch For some reason, 79 | /// there may be situations where a branch has several leaves and thus types. 80 | pub fn element_types(&self) -> Vec { 81 | self.fleaves 82 | .iter() 83 | .map(|l| l.type_name().to_string()) 84 | .collect() 85 | } 86 | 87 | /// Create an iterator over the data of a column (`TBranch`) with a 88 | /// constant number of element per entry (or at least not a 89 | /// variable number of entries which depends on an external list of 90 | /// indices. For the latter case see `as_var_size_iterator`). 91 | /// 92 | /// # Example 93 | /// ``` 94 | /// extern crate failure; 95 | /// extern crate nom; 96 | /// extern crate root_io; 97 | /// use futures::StreamExt; 98 | /// 99 | /// use std::path::Path; 100 | /// use nom::number::complete::be_i32; 101 | /// 102 | /// use root_io::tree_reader::Tree; 103 | /// use root_io::RootFile; 104 | /// 105 | /// #[tokio::main] 106 | ///# async fn main 107 | /// 108 | ///# () { 109 | /// let path = Path::new("./src/test_data/simple.root"); 110 | /// let f = RootFile::new(path).await.expect("Failed to open file"); 111 | /// let tree = f.items()[0].as_tree().await.unwrap(); 112 | /// let numbers = tree 113 | /// .branch_by_name("one").unwrap() 114 | /// // Must pass parser as closure 115 | /// .as_fixed_size_iterator(|i| be_i32(i)); 116 | /// numbers.for_each(|n| async move { 117 | /// println!("All the numbers of this branch{:?}", n); 118 | /// }).await; 119 | ///# } 120 | /// ``` 121 | pub fn as_fixed_size_iterator(&self, p: P) -> impl Stream 122 | where 123 | P: Fn(&[u8]) -> IResult<&[u8], T>, 124 | { 125 | stream::iter(self.containers().to_owned()) 126 | .then(|basket| async move { basket.raw_data().await.unwrap() }) 127 | .map(move |(n_events_in_basket, buffer)| { 128 | // Parse the entire basket buffer; if something is left over its just junk 129 | let x = count(&p, n_events_in_basket as usize)(&buffer); 130 | let events = match x { 131 | Ok((_rest, output)) => output, 132 | Err(e) => panic!("Parser failed unexpectedly {:?}", e), 133 | }; 134 | stream::iter(events) 135 | }) 136 | .flatten() 137 | } 138 | 139 | /// Iterator over the data of a column (`TBranch`) with a variable 140 | /// number of elements per entry. See the file 141 | /// [`read_esd.rs`](https://github.com/cbourjau/root-io/blob/master/src/tests/read_esd.rs) 142 | /// in the repository for a comprehensive example 143 | pub fn as_var_size_iterator( 144 | &self, 145 | p: P, 146 | el_counter: Vec, 147 | ) -> impl Stream> 148 | where 149 | P: Fn(&[u8]) -> IResult<&[u8], T>, 150 | { 151 | let mut elems_per_event = el_counter.into_iter(); 152 | stream::iter(self.containers().to_owned()) 153 | .then(|basket| async move { basket.raw_data().await.unwrap() }) 154 | .map(move |(n_events_in_basket, buffer)| { 155 | let mut buffer = buffer.as_slice(); 156 | let mut events = Vec::with_capacity(n_events_in_basket as usize); 157 | for _ in 0..n_events_in_basket { 158 | if let Some(n_elems_in_event) = elems_per_event.next() { 159 | match count(&p, n_elems_in_event as usize)(buffer) { 160 | Ok((rest, output)) => { 161 | buffer = rest; 162 | events.push(output) 163 | } 164 | Err(e) => panic!("Parser failed unexpectedly {:?}", e), 165 | } 166 | } 167 | } 168 | stream::iter(events) 169 | }) 170 | .flatten() 171 | } 172 | } 173 | 174 | /// `TBranchElements` are a subclass of `TBranch` if the content is an Object 175 | /// We ignore the extra information for now and just parse the TBranch"Header" in either case 176 | pub fn tbranch_hdr<'s>(raw: &Raw<'s>, ctxt: &'s Context) -> IResult<&'s [u8], TBranch> { 177 | match raw.classinfo { 178 | "TBranchElement" | "TBranchObject" => { 179 | let (i, _ver) = be_u16(raw.obj)?; 180 | length_value(checked_byte_count, |i| tbranch(i, ctxt))(i) 181 | } 182 | "TBranch" => tbranch(raw.obj, ctxt), 183 | name => panic!("Unexpected Branch type {}", name), 184 | } 185 | } 186 | 187 | pub fn tbranch<'s>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], TBranch> { 188 | let (i, _ver) = verify(be_u16, |v| [11, 12].contains(v))(i)?; 189 | let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; 190 | let (i, _tattfill) = length_data(checked_byte_count)(i)?; 191 | let (i, fcompress) = be_i32(i)?; 192 | let (i, fbasketsize) = be_i32(i)?; 193 | let (i, fentryoffsetlen) = be_i32(i)?; 194 | let (i, fwritebasket) = be_i32(i)?; 195 | let (i, fentrynumber) = be_i64(i)?; 196 | let (i, foffset) = be_i32(i)?; 197 | let (i, fmaxbaskets) = be_i32(i)?; 198 | let (i, fsplitlevel) = be_i32(i)?; 199 | let (i, fentries) = be_i64(i)?; 200 | let (i, ffirstentry) = be_i64(i)?; 201 | let (i, ftotbytes) = be_i64(i)?; 202 | let (i, fzipbytes) = be_i64(i)?; 203 | let (i, fbranches) = 204 | length_value(checked_byte_count, |i| tobjarray(tbranch_hdr, i, context))(i)?; 205 | let (i, fleaves) = length_value(checked_byte_count, |i| { 206 | tobjarray(TLeaf::parse_from_raw, i, context) 207 | })(i)?; 208 | let (i, fbaskets) = length_value(checked_byte_count, |i| { 209 | tobjarray(|r, _context| Ok((&[], r.obj)), i, context) 210 | })(i)?; 211 | let (i, fbasketbytes) = preceded(be_u8, count(be_i32, fmaxbaskets as usize))(i)?; 212 | let (i, fbasketentry) = preceded(be_u8, count(be_i64, fmaxbaskets as usize))(i)?; 213 | let (i, fbasketseek) = preceded(be_u8, count(be_u64, fmaxbaskets as usize))(i)?; 214 | let (i, ffilename) = string(i)?; 215 | 216 | let name = tnamed.name; 217 | let fbaskets = fbaskets 218 | .into_iter() 219 | .filter(|s| !s.is_empty()) 220 | .map(|s| Container::InMemory(s.to_vec())); 221 | let nbaskets = fwritebasket as usize; 222 | let fbasketbytes = fbasketbytes 223 | .into_iter() 224 | .take(nbaskets) 225 | .map(|val| val as usize); 226 | let fbasketentry = fbasketentry.into_iter().take(nbaskets).collect(); 227 | let fbasketseek = fbasketseek.into_iter().take(nbaskets); 228 | let source = if ffilename.is_empty() { 229 | context.source.to_owned() 230 | } else { 231 | unimplemented!("Root files referencing other Root files is not implemented") 232 | }; 233 | let containers_disk = fbasketseek 234 | .zip(fbasketbytes) 235 | .map(|(seek, len)| Container::OnDisk(source.clone(), seek, len as u64)); 236 | let containers = fbaskets.chain(containers_disk).collect(); 237 | Ok(( 238 | i, 239 | TBranch { 240 | name, 241 | fcompress, 242 | fbasketsize, 243 | fentryoffsetlen, 244 | fwritebasket, 245 | fentrynumber, 246 | foffset, 247 | fsplitlevel, 248 | fentries, 249 | ffirstentry, 250 | ftotbytes, 251 | fzipbytes, 252 | fbranches, 253 | fleaves, 254 | fbasketentry, 255 | containers, 256 | }, 257 | )) 258 | } 259 | -------------------------------------------------------------------------------- /root-io/src/tree_reader/container.rs: -------------------------------------------------------------------------------- 1 | use failure::Error; 2 | use nom::combinator::rest; 3 | use nom::number::complete::*; 4 | use nom::*; 5 | 6 | use crate::core::*; 7 | 8 | #[derive(Debug, Clone)] 9 | pub(crate) enum Container { 10 | /// Decompressed content of a `TBasket` 11 | InMemory(Vec), 12 | /// Filename, start byte, and len of a `TBasket` on disk 13 | OnDisk(Source, u64, u64), 14 | } 15 | 16 | impl Container { 17 | /// Return the number of entries and the data; reading it from disk if necessary 18 | pub(crate) async fn raw_data(self) -> Result<(u32, Vec), Error> { 19 | let buf = match self { 20 | Container::InMemory(buf) => buf, 21 | Container::OnDisk(source, seek, len) => source.fetch(seek, len).await?, 22 | }; 23 | match tbasket2vec(buf.as_slice()) { 24 | Ok((_, v)) => Ok(v), 25 | _ => Err(format_err!("tbasket2vec parser failed")), 26 | } 27 | } 28 | // /// For debugging: Try to find the file of this container. Out of luck if the container was inlined 29 | // pub(crate) fn file(&self) -> Option { 30 | // match *self { 31 | // // No file name available 32 | // Container::InMemory(_) => None, 33 | // Container::OnDisk(ref p, _, _) => Some(p.to_owned()) 34 | // } 35 | // } 36 | } 37 | 38 | /// Return a tuple indicating the number of elements in this basket 39 | /// and the content as a Vec 40 | fn tbasket2vec(input: &[u8]) -> IResult<&[u8], (u32, Vec)> { 41 | let (input, hdr) = tkey_header(input)?; 42 | let (input, _ver) = be_u16(input)?; 43 | let (input, _buf_size) = be_u32(input)?; 44 | let (input, _entry_size) = be_u32(input)?; 45 | let (input, n_entry_buf) = be_u32(input)?; 46 | let (input, last) = be_u32(input)?; 47 | let (input, _flag) = be_i8(input)?; 48 | let (input, buf) = rest(input)?; 49 | let buf = if hdr.uncomp_len as usize > buf.len() { 50 | decompress(buf).unwrap().1 51 | } else { 52 | buf.to_vec() 53 | }; 54 | // Not the whole buffer is filled, no, no, no, that 55 | // would be to easy! Its only filled up to `last`, 56 | // whereby we have to take the key_len into account... 57 | let useful_bytes = (last - hdr.key_len as u32) as usize; 58 | Ok(( 59 | input, 60 | (n_entry_buf, buf.as_slice()[..useful_bytes].to_vec()), 61 | )) 62 | } 63 | 64 | #[cfg(test)] 65 | mod tests { 66 | use crate::core::tkey_header; 67 | use nom::*; 68 | use std::fs::File; 69 | use std::io::{BufReader, Read, Seek, SeekFrom}; 70 | 71 | use super::tbasket2vec; 72 | 73 | #[test] 74 | fn basket_simple() { 75 | let path = "./src/test_data/simple.root"; 76 | let f = File::open(path).unwrap(); 77 | let mut reader = BufReader::new(f); 78 | // Go to first basket 79 | reader.seek(SeekFrom::Start(218)).unwrap(); 80 | // size from fbasketbytes 81 | let mut buf = vec![0; 86]; 82 | // let mut buf = vec![0; 386]; 83 | reader.read_exact(&mut buf).unwrap(); 84 | 85 | println!("{}", buf.to_hex(16)); 86 | println!("{:?}", tkey_header(&buf)); 87 | // println!("{:#?}", tbasket(&buf, be_u32)); 88 | println!("{:#?}", tbasket2vec(&buf)); 89 | } 90 | 91 | // /// Test the first basket of the "Tracks.fP[5]" branch 92 | // #[test] 93 | // fn basket_esd() { 94 | // // This test is broken since the numbers were hardcoded for a specific file 95 | // use alice_open_data; 96 | // let path = alice_open_data::test_file().unwrap(); 97 | 98 | // let f = File::open(&path).unwrap(); 99 | // let mut reader = BufReader::new(f); 100 | // // Go to first basket 101 | // reader.seek(SeekFrom::Start(77881)).unwrap(); 102 | // // size from fbasketbytes 103 | // let mut buf = vec![0; 87125]; 104 | // reader.read_exact(&mut buf).unwrap(); 105 | 106 | // println!("{:?}", tkey_header(&buf).unwrap().1); 107 | // // println!("{:#?}", tbasket(&buf, |i| count!(i, be_f32, 15)).unwrap().1); 108 | // println!("{:#?}", tbasket2vec(&buf)); 109 | // } 110 | } 111 | -------------------------------------------------------------------------------- /root-io/src/tree_reader/leafs.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use nom::{ 4 | combinator::{map_res, peek, verify}, 5 | multi::length_value, 6 | number::complete::*, 7 | IResult, 8 | }; 9 | 10 | use quote::{Ident, Tokens}; 11 | 12 | use crate::{code_gen::rust::ToRustType, core::*}; 13 | 14 | /// Parse a bool from a big endian u8 15 | fn be_bool(i: &[u8]) -> IResult<&[u8], bool> { 16 | let (i, byte) = verify(be_u8, |&byte| byte == 0 || byte == 1)(i)?; 17 | Ok((i, byte == 1)) 18 | } 19 | 20 | // Wrap everything once more to avoid exporting the enum variants. 21 | #[derive(Debug, Clone)] 22 | pub struct TLeaf { 23 | variant: TLeafVariant, 24 | } 25 | 26 | impl TLeaf { 27 | pub fn parse<'s>(i: &'s [u8], context: &'s Context, c_name: &str) -> IResult<&'s [u8], Self> { 28 | TLeafVariant::parse(i, context, c_name).map(|(i, var)| (i, Self { variant: var })) 29 | } 30 | 31 | // A helper function to get around some lifetime issues on the caller sider 32 | pub(crate) fn parse_from_raw<'s>(raw: &Raw<'s>, ctxt: &'s Context) -> IResult<&'s [u8], Self> { 33 | Self::parse(raw.obj, ctxt, raw.classinfo) 34 | } 35 | } 36 | 37 | #[derive(Debug, Clone)] 38 | enum TLeafVariant { 39 | TLeafB(TLeafB), 40 | TLeafS(TLeafS), 41 | TLeafI(TLeafI), 42 | TLeafL(TLeafL), 43 | TLeafF(TLeafF), 44 | TLeafD(TLeafD), 45 | TLeafC(TLeafC), 46 | TLeafO(TLeafO), 47 | TLeafD32(TLeafD32), 48 | TLeafElement(TLeafElement), 49 | } 50 | 51 | impl TLeafVariant { 52 | fn parse<'s>(i: &'s [u8], context: &'s Context, c_name: &str) -> IResult<&'s [u8], Self> { 53 | match c_name { 54 | "TLeafB" => TLeafB::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafB(l))), 55 | "TLeafS" => TLeafS::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafS(l))), 56 | "TLeafI" => TLeafI::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafI(l))), 57 | "TLeafL" => TLeafL::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafL(l))), 58 | "TLeafF" => TLeafF::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafF(l))), 59 | "TLeafD" => TLeafD::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD(l))), 60 | "TLeafC" => TLeafC::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafC(l))), 61 | "TLeafO" => TLeafO::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafO(l))), 62 | "TLeafD32" => TLeafD32::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD32(l))), 63 | "TLeafElement" => { 64 | TLeafElement::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafElement(l))) 65 | } 66 | name => unimplemented!("Unexpected Leaf type {}", name), 67 | } 68 | } 69 | } 70 | 71 | macro_rules! make_tleaf_variant { 72 | // Usually the element size ish what we also use for min/max, but not always 73 | ($struct_name:ident, $field_type:ty, $parser:ident) => { 74 | make_tleaf_variant! {$struct_name, $field_type, $parser, std::mem::size_of::<$field_type>()} 75 | }; 76 | ($struct_name:ident, $field_type:ty, $parser:ident, $size_of_el:expr) => { 77 | #[derive(Debug, Clone)] 78 | #[allow(dead_code)] 79 | struct $struct_name { 80 | base: TLeafBase, 81 | fminimum: $field_type, 82 | fmaximum: $field_type, 83 | } 84 | impl $struct_name { 85 | fn parse<'s>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self> { 86 | // All known descendens have version 1 87 | let (i, _) = verify(be_u16, |&ver| ver == 1)(i)?; 88 | let (i, base) = 89 | length_value(checked_byte_count, |i| TLeafBase::parse(i, context))(i)?; 90 | let (i, fminimum) = $parser(i)?; 91 | let (i, fmaximum) = $parser(i)?; 92 | let obj = Self { 93 | base, 94 | fminimum, 95 | fmaximum, 96 | }; 97 | obj.verify_consistency().unwrap(); 98 | Ok((i, obj)) 99 | } 100 | 101 | fn verify_consistency(&self) -> Result<(), String> { 102 | if self.base.flentype as usize != $size_of_el { 103 | return Err(String::from("Unexpected type length")); 104 | } 105 | if self.base.fisunsigned { 106 | // The minimum and maximum values are possibly wrong 107 | // return Err(String::from("Expected signed value")); 108 | } 109 | Ok(()) 110 | } 111 | } 112 | }; 113 | } 114 | 115 | make_tleaf_variant! {TLeafB, i8, be_i8} 116 | make_tleaf_variant! {TLeafS, i16, be_i16} 117 | make_tleaf_variant! {TLeafI, i32, be_i32} 118 | make_tleaf_variant! {TLeafL, i64, be_i64} 119 | make_tleaf_variant! {TLeafF, f32, be_f32} 120 | make_tleaf_variant! {TLeafD, f64, be_f64} 121 | // TLeafC has chars as elements 122 | make_tleaf_variant! {TLeafC, i32, be_i32, 1} 123 | make_tleaf_variant! {TLeafO, bool, be_bool} 124 | make_tleaf_variant! {TLeafD32, f32, be_f32} 125 | 126 | #[derive(Debug, Clone)] 127 | #[allow(dead_code)] 128 | struct TLeafElement { 129 | base: TLeafBase, 130 | fid: i32, 131 | ftype: TypeID, 132 | } 133 | 134 | impl TLeafElement { 135 | fn parse<'s>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self> { 136 | let (i, _) = verify(be_u16, |&ver| ver == 1)(i)?; 137 | let (i, base) = length_value(checked_byte_count, |i| TLeafBase::parse(i, context))(i)?; 138 | let (i, fid) = be_i32(i)?; 139 | let (i, ftype) = map_res(be_i32, |id| TypeID::new(id, "FIXME!"))(i)?; 140 | Ok((i, Self { base, fid, ftype })) 141 | } 142 | } 143 | 144 | #[derive(Debug, Clone)] 145 | #[allow(dead_code)] 146 | struct TLeafBase { 147 | /// Version of the read layout 148 | ver: u16, 149 | /// The basis for a named object (name, title) 150 | tnamed: TNamed, 151 | /// Number of fixed length elements 152 | flen: i32, 153 | /// Number of bytes for this data type 154 | flentype: i32, 155 | /// Offset in ClonesArray object (if one) 156 | foffset: i32, 157 | /// (=kTRUE if leaf has a range, kFALSE otherwise) 158 | fisrange: bool, 159 | /// (=kTRUE if unsigned, kFALSE otherwise) 160 | fisunsigned: bool, 161 | /// Pointer to Leaf count if variable length (we do not own the counter) 162 | fleafcount: Option>, 163 | } 164 | 165 | impl TLeafBase { 166 | fn parse<'s>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self> { 167 | let (i, ver) = be_u16(i)?; 168 | let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; 169 | let (i, flen) = be_i32(i)?; 170 | let (i, flentype) = be_i32(i)?; 171 | let (i, foffset) = be_i32(i)?; 172 | let (i, fisrange) = be_bool(i)?; 173 | let (i, fisunsigned) = be_bool(i)?; 174 | let (i, fleafcount) = { 175 | if peek(be_u32)(i)?.1 == 0 { 176 | // Consume the bytes but we have no nested leafs 177 | be_u32(i).map(|(i, _)| (i, None))? 178 | } else { 179 | let (i, r) = raw(i, context)?; 180 | // We don't parse from the input buffer. TODO: Check 181 | // that we consumed all bytes 182 | let (_, tleaf) = TLeafVariant::parse(r.obj, context, r.classinfo)?; 183 | (i, Some(Box::new(tleaf))) 184 | } 185 | }; 186 | Ok(( 187 | i, 188 | Self { 189 | ver, 190 | tnamed, 191 | flen, 192 | flentype, 193 | foffset, 194 | fisrange, 195 | fisunsigned, 196 | fleafcount, 197 | }, 198 | )) 199 | } 200 | } 201 | 202 | /// If we have more than one element make this 203 | fn arrayfy_maybe(ty_name: &str, len: usize) -> Tokens { 204 | // not an array 205 | let t = Ident::new(ty_name); 206 | if len == 1 { 207 | quote! {#t} 208 | } else { 209 | // array 210 | quote! {[#t; #len]} 211 | } 212 | } 213 | 214 | impl ToRustType for TLeaf { 215 | fn type_name(&self) -> Tokens { 216 | use TLeafVariant::*; 217 | let (type_name, len) = match &self.variant { 218 | TLeafO(l) => ("bool", l.base.flen), 219 | TLeafB(l) => (if l.base.fisunsigned { "u8" } else { "i8" }, l.base.flen), 220 | TLeafS(l) => (if l.base.fisunsigned { "u16" } else { "i16" }, l.base.flen), 221 | TLeafI(l) => (if l.base.fisunsigned { "u32" } else { "i32" }, l.base.flen), 222 | TLeafL(l) => (if l.base.fisunsigned { "u64" } else { "i64" }, l.base.flen), 223 | TLeafF(l) => ("f32", l.base.flen), 224 | TLeafD(l) => ("f64", l.base.flen), 225 | TLeafC(l) => ("String", l.base.flen), 226 | l => todo!("{:?}", l), 227 | }; 228 | arrayfy_maybe(type_name, len as usize) 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /root-io/src/tree_reader/mod.rs: -------------------------------------------------------------------------------- 1 | //! A convenience wrapper and needed parsers to work with ROOT's 2 | //! `TTree`s. A Tree may be thought of as a table where each row 3 | //! represents a particle collision. Each column may contain one or 4 | //! several elements per collision. This module provides two Iterator 5 | //! structs in order to iterate over these columns (`TBranches` in 6 | //! ROOT lingo). 7 | 8 | mod branch; 9 | mod container; 10 | mod leafs; 11 | mod tree; 12 | 13 | pub use self::tree::{ttree, Tree}; 14 | 15 | #[cfg(all(test, not(target_arch = "wasm32")))] 16 | mod tests { 17 | use std::path::PathBuf; 18 | use tokio; 19 | 20 | use crate::core::RootFile; 21 | 22 | #[tokio::test] 23 | async fn simple_tree() { 24 | let path = PathBuf::from("./src/test_data/simple.root"); 25 | let f = RootFile::new(path.as_path()) 26 | .await 27 | .expect("Failed to open file"); 28 | f.items()[0].as_tree().await.unwrap(); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /root-io/src/tree_reader/tree.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::fmt::Debug; 3 | use std::ops::Deref; 4 | 5 | use failure::Error; 6 | use nom::{ 7 | combinator::{cond, peek, verify}, 8 | multi::{count, length_data, length_value}, 9 | number::complete::*, 10 | sequence::preceded, 11 | IResult, 12 | }; 13 | 14 | use crate::{ 15 | core::parsers::*, core::types::*, tree_reader::branch::tbranch_hdr, 16 | tree_reader::branch::TBranch, tree_reader::leafs::TLeaf, 17 | }; 18 | 19 | /// `TTree` potentially has members with very large `Vec` buffers 20 | /// The `Pointer` type is used to overwrite the default `Debug` trait 21 | /// for those members 22 | struct Pointer(pub Vec); 23 | 24 | impl Deref for Pointer { 25 | type Target = Vec; 26 | fn deref(&self) -> &Vec { 27 | &self.0 28 | } 29 | } 30 | 31 | impl fmt::Debug for Pointer { 32 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 33 | writeln!(f, "Buffer of {} bytes ", self.len()) 34 | } 35 | } 36 | 37 | /// A `Tree` is the default "container" for datasets in Root files The 38 | /// data is oranized in so-called branches. This type is exposed only 39 | /// for the purpose of creating `ColumnFixedIntoIter` and 40 | /// `ColumnVarIntoIter` objects from it. 41 | #[derive(Debug)] 42 | #[allow(dead_code)] 43 | pub struct Tree { 44 | /// Version of the read layout 45 | ver: u16, 46 | /// The basis for a named object (name, title) 47 | tnamed: TNamed, 48 | /// Number of entries 49 | fentries: i64, 50 | /// Total number of bytes in all branches before compression 51 | ftotbytes: i64, 52 | /// Total number of bytes in all branches after compression 53 | fzipbytes: i64, 54 | /// Number of autosaved bytes 55 | fsavedbytes: i64, 56 | /// Number of autoflushed bytes 57 | fflushedbytes: Option, 58 | /// Tree weight (see TTree::SetWeight) 59 | fweight: f64, 60 | /// Timer interval in milliseconds 61 | ftimerinterval: i32, 62 | /// Number of runs before prompting in Scan 63 | fscanfield: i32, 64 | /// Update frequency for EntryLoop 65 | fupdate: i32, 66 | /// Maximum number of entries in case of circular buffers 67 | fmaxentries: i64, 68 | /// Maximum number of entries to process 69 | fmaxentryloop: i64, 70 | /// Number of entries to estimate histogram limits 71 | festimate: i64, 72 | /// List of Branches 73 | pub(crate) fbranches: Vec, 74 | /// Direct pointers to individual branch leaves 75 | pub(crate) fleaves: Vec, 76 | /// List of aliases for expressions based on the tree branches. 77 | faliases: Option>, 78 | /// Sorted index values 79 | findexvalues: Vec, 80 | /// Index of sorted values 81 | findex: Vec, 82 | /// Pointer to the tree Index (if any) 83 | ftreeindex: Option, 84 | /// pointer to list of friend elements 85 | ffriends: Option, 86 | /// pointer to a list of user objects associated to this Tree 87 | fuserinfo: Option, 88 | /// Branch supporting the TRefTable (if any) 89 | fbranchref: Option, 90 | } 91 | 92 | impl Tree { 93 | /// Get all branches of a tree (including nested ones) 94 | pub(crate) fn branches(&self) -> Vec<&TBranch> { 95 | self.fbranches 96 | .iter() 97 | .flat_map(|b| vec![b].into_iter().chain(b.branches().into_iter())) 98 | .collect() 99 | } 100 | /// Get all the branch names and types (including nested ones) of this tree 101 | /// The first element is the name, the second one is the type 102 | pub fn branch_names_and_types(&self) -> Vec<(String, Vec)> { 103 | self.fbranches 104 | .iter() 105 | .flat_map(|b| vec![b].into_iter().chain(b.branches().into_iter())) 106 | .map(|b| (b.name(), b.element_types())) 107 | .collect() 108 | } 109 | 110 | pub fn branch_by_name(&self, name: &str) -> Result<&TBranch, Error> { 111 | self.branches() 112 | .into_iter() 113 | .find(|b| b.name == name) 114 | .ok_or_else(|| { 115 | format_err!( 116 | "Branch {} not found in tree: \n {:#?}", 117 | name, 118 | self.branches() 119 | .iter() 120 | .map(|b| b.name.to_owned()) 121 | .collect::>() 122 | ) 123 | }) 124 | } 125 | } 126 | 127 | /// Parse a `Tree` from the given buffer. Usually used through `FileItem::parse_with`. 128 | pub fn ttree<'s>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Tree> { 129 | let _curried_raw = |i| raw(i, context); 130 | let none_or_u8_buf = |i: &'s [u8]| match peek(be_u32)(i)? { 131 | (i, 0) => be_u32(i).map(|(i, _)| (i, None)), 132 | (i, _) => _curried_raw(i).map(|(i, r)| (i, Some(r.obj.to_vec()))), 133 | }; 134 | let grab_checked_byte_count = move |i| { 135 | length_data(|i| { 136 | let (i, cnt) = checked_byte_count(i)?; 137 | Ok((i, cnt)) 138 | })(i) 139 | }; 140 | let (i, ver) = verify(be_u16, |v| [16, 17, 18, 19].contains(v))(i)?; 141 | let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; 142 | let (i, _tattline) = grab_checked_byte_count(i)?; 143 | let (i, _tattfill) = grab_checked_byte_count(i)?; 144 | let (i, _tattmarker) = grab_checked_byte_count(i)?; 145 | let (i, fentries) = be_i64(i)?; 146 | let (i, ftotbytes) = be_i64(i)?; 147 | let (i, fzipbytes) = be_i64(i)?; 148 | let (i, fsavedbytes) = be_i64(i)?; 149 | let (i, fflushedbytes) = cond(ver >= 18, be_i64)(i)?; 150 | let (i, fweight) = be_f64(i)?; 151 | let (i, ftimerinterval) = be_i32(i)?; 152 | let (i, fscanfield) = be_i32(i)?; 153 | let (i, fupdate) = be_i32(i)?; 154 | let (i, _fdefaultentryoffsetlen) = cond(ver >= 17, be_i32)(i)?; 155 | let (i, fnclusterrange) = cond(ver >= 19, be_i32)(i)?; 156 | let (i, fmaxentries) = be_i64(i)?; 157 | let (i, fmaxentryloop) = be_i64(i)?; 158 | let (i, _fmaxvirtualsize) = be_i64(i)?; 159 | let (i, _fautosave) = be_i64(i)?; 160 | let (i, _fautoflush) = cond(ver >= 18, be_i64)(i)?; 161 | let (i, festimate) = be_i64(i)?; 162 | let (i, _fclusterrangeend) = { 163 | if let Some(n_clst_range) = fnclusterrange { 164 | preceded(be_u8, count(be_i64, n_clst_range as usize))(i) 165 | .map(|(i, ends)| (i, Some(ends)))? 166 | } else { 167 | (i, None) 168 | } 169 | }; 170 | let (i, _fclustersize) = { 171 | if let Some(n_clst_range) = fnclusterrange { 172 | preceded(be_u8, count(be_i64, n_clst_range as usize))(i) 173 | .map(|(i, ends)| (i, Some(ends)))? 174 | } else { 175 | (i, None) 176 | } 177 | }; 178 | let (i, fbranches) = 179 | length_value(checked_byte_count, |i| tobjarray(tbranch_hdr, i, context))(i)?; 180 | let (i, fleaves) = length_value(checked_byte_count, |i| { 181 | tobjarray(TLeaf::parse_from_raw, i, context) 182 | })(i)?; 183 | 184 | let (i, faliases) = none_or_u8_buf(i)?; 185 | let (i, findexvalues) = tarray(be_f64, i)?; 186 | let (i, findex) = tarray(be_i32, i)?; 187 | let (i, ftreeindex) = none_or_u8_buf(i)?; 188 | let (i, ffriends) = none_or_u8_buf(i)?; 189 | let (i, fuserinfo) = none_or_u8_buf(i)?; 190 | let (i, fbranchref) = none_or_u8_buf(i)?; 191 | let ftreeindex = ftreeindex.map(Pointer); 192 | let ffriends = ffriends.map(Pointer); 193 | let fuserinfo = fuserinfo.map(Pointer); 194 | let fbranchref = fbranchref.map(Pointer); 195 | Ok(( 196 | i, 197 | Tree { 198 | ver, 199 | tnamed, 200 | fentries, 201 | ftotbytes, 202 | fzipbytes, 203 | fsavedbytes, 204 | fflushedbytes, 205 | fweight, 206 | ftimerinterval, 207 | fscanfield, 208 | fupdate, 209 | fmaxentries, 210 | fmaxentryloop, 211 | festimate, 212 | fbranches, 213 | fleaves, 214 | faliases, 215 | findexvalues, 216 | findex, 217 | ftreeindex, 218 | ffriends, 219 | fuserinfo, 220 | fbranchref, 221 | }, 222 | )) 223 | } 224 | -------------------------------------------------------------------------------- /root-io/src/utils.rs: -------------------------------------------------------------------------------- 1 | /// Zip `n` streams together analogous to the `zip()` function. This 2 | /// is useful when iterating over multiple branches of a TTree. See 3 | /// the examples of this crate for the suggested use-case. 4 | #[macro_export] 5 | macro_rules! stream_zip { 6 | // @closure creates a tuple-flattening closure for .map() call. usage: 7 | // @closure partial_pattern => partial_tuple , rest , of , iterators 8 | // eg. izip!( @closure ((a, b), c) => (a, b, c) , dd , ee ) 9 | ( @closure $p:pat => $tup:expr ) => { 10 | |$p| $tup 11 | }; 12 | 13 | // The "b" identifier is a different identifier on each recursion level thanks to hygiene. 14 | ( @closure $p:pat => ( $($tup:tt)* ) , $_iter:expr $( , $tail:expr )* ) => { 15 | stream_zip!(@closure ($p, b) => ( $($tup)*, b ) $( , $tail )*) 16 | }; 17 | 18 | // unary 19 | ($first:expr $(,)*) => { 20 | $first 21 | }; 22 | 23 | // binary 24 | ($first:expr, $second:expr $(,)*) => { 25 | stream_zip!($first) 26 | .zip($second) 27 | }; 28 | 29 | // n-ary where n > 2 30 | ( $first:expr $( , $rest:expr )* $(,)* ) => { 31 | stream_zip!($first) 32 | $( 33 | .zip($rest) 34 | )* 35 | .map( 36 | stream_zip!(@closure a => (a) $( , $rest )*) 37 | ) 38 | }; 39 | } 40 | -------------------------------------------------------------------------------- /root-io/tests/high_level_io.rs: -------------------------------------------------------------------------------- 1 | #![cfg(all(test, not(target_arch = "wasm32")))] 2 | use std::path::PathBuf; 3 | 4 | use root_io::*; 5 | 6 | const TEST_FILES: &[&str] = &[ 7 | "./src/test_data/simple.root", 8 | "./src/test_data/HZZ.root", 9 | "./src/test_data/HZZ-lz4.root", 10 | // "./src/test_data/HZZ-lzma.root", 11 | "./src/test_data/sample-5.23.02-uncompressed.root", 12 | "./src/test_data/sample-5.23.02-zlib.root", 13 | "./src/test_data/sample-5.24.00-zlib.root", 14 | "./src/test_data/sample-5.23.02-uncompressed.root", 15 | "./src/test_data/sample-5.23.02-zlib.root", 16 | "./src/test_data/sample-5.24.00-uncompressed.root", 17 | "./src/test_data/sample-5.24.00-zlib.root", 18 | "./src/test_data/sample-5.25.02-uncompressed.root", 19 | "./src/test_data/sample-5.25.02-zlib.root", 20 | "./src/test_data/sample-5.26.00-uncompressed.root", 21 | "./src/test_data/sample-5.26.00-zlib.root", 22 | "./src/test_data/sample-5.27.02-uncompressed.root", 23 | "./src/test_data/sample-5.27.02-zlib.root", 24 | "./src/test_data/sample-5.28.00-uncompressed.root", 25 | "./src/test_data/sample-5.28.00-zlib.root", 26 | "./src/test_data/sample-5.29.02-uncompressed.root", 27 | "./src/test_data/sample-5.29.02-zlib.root", 28 | // "./src/test_data/sample-5.30.00-lzma.root", 29 | "./src/test_data/sample-5.30.00-uncompressed.root", 30 | "./src/test_data/sample-5.30.00-zlib.root", 31 | // "./src/test_data/sample-6.08.04-lzma.root", 32 | "./src/test_data/sample-6.08.04-uncompressed.root", 33 | "./src/test_data/sample-6.08.04-zlib.root", 34 | "./src/test_data/sample-6.10.05-lz4.root", 35 | // "./src/test_data/sample-6.10.05-lzma.root", 36 | "./src/test_data/sample-6.10.05-uncompressed.root", 37 | "./src/test_data/sample-6.10.05-zlib.root", 38 | "./src/test_data/small-flat-tree.root", 39 | "./src/test_data/Zmumu.root", 40 | "./src/test_data/Zmumu-lz4.root", 41 | // "./src/test_data/Zmumu-lzma.root", 42 | "./src/test_data/Zmumu-uncompressed.root", 43 | "./src/test_data/Zmumu-zlib.root", 44 | "./src/test_data/foriter.root", 45 | "./src/test_data/foriter2.root", 46 | "./src/test_data/mc10events.root", 47 | // Contains TStreamerSTLstring 48 | "./src/test_data/nesteddirs.root", 49 | "./src/test_data/small-evnt-tree-fullsplit.root", 50 | ]; 51 | 52 | fn local_paths() -> Vec { 53 | TEST_FILES.iter().map(PathBuf::from).collect() 54 | } 55 | 56 | #[cfg(not(target_arch = "wasm32"))] 57 | mod local { 58 | use super::*; 59 | 60 | #[tokio::test] 61 | async fn root_file_methods() { 62 | let paths = local_paths(); 63 | for p in paths { 64 | println!("{:?}", p); 65 | let f = RootFile::new(p.as_path()) 66 | .await 67 | .expect("Failed to open file"); 68 | let mut s = String::new(); 69 | f.streamer_info_as_yaml(&mut s).await.unwrap(); 70 | f.streamer_info_as_rust(&mut s).await.unwrap(); 71 | for item in f.items() { 72 | item.name(); 73 | if item.verbose_info().contains("TTree") { 74 | item.as_tree().await.unwrap(); 75 | } 76 | } 77 | } 78 | } 79 | 80 | #[tokio::test] 81 | async fn root_file_methods_esd() { 82 | use alice_open_data; 83 | let paths = [alice_open_data::test_file().unwrap()]; 84 | for p in &paths { 85 | println!("{:?}", p); 86 | let f = RootFile::new(p.as_path()) 87 | .await 88 | .expect("Failed to open file"); 89 | let mut s = String::new(); 90 | f.streamer_info_as_yaml(&mut s).await.unwrap(); 91 | f.streamer_info_as_rust(&mut s).await.unwrap(); 92 | for item in f.items() { 93 | item.name(); 94 | item.verbose_info(); 95 | } 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /root-io/tests/read_esd.rs: -------------------------------------------------------------------------------- 1 | use failure::Error; 2 | use futures::prelude::*; 3 | use nom::number::complete::*; 4 | use nom::sequence::tuple; 5 | 6 | use root_io::{ 7 | core::parsers::{parse_custom_mantissa, parse_tobjarray_of_tnameds}, 8 | stream_zip, 9 | tree_reader::Tree, 10 | RootFile, 11 | }; 12 | 13 | /// A model for the / a subset of the ESD data 14 | #[derive(Debug)] 15 | struct Model { 16 | primaryvertex_alivertex_fposition: (f32, f32, f32), 17 | primaryvertex_alivertex_fncontributors: i32, 18 | aliesdrun_frunnumber: i32, 19 | aliesdrun_ftriggerclasses: Vec, 20 | aliesdheader_ftriggermask: u64, 21 | tracks_fx: Vec, 22 | tracks_fp: Vec<(f32, f32, f32, f32, f32)>, 23 | tracks_falpha: Vec, 24 | tracks_fflags: Vec, 25 | tracks_fitschi2: Vec, 26 | tracks_fitsncls: Vec, 27 | tracks_fitsclustermap: Vec, 28 | tracks_ftpcncls: Vec, 29 | tracks_ftpcchi2: Vec, 30 | } 31 | 32 | impl Model { 33 | async fn stream_from_tree(t: &Tree) -> Result + '_, Error> { 34 | let track_counter: Vec<_> = t 35 | .branch_by_name("Tracks")? 36 | .as_fixed_size_iterator(|i| be_u32(i)) 37 | .collect::>() 38 | .await; 39 | let s = stream_zip!( 40 | t.branch_by_name("AliESDRun.fRunNumber")? 41 | .as_fixed_size_iterator(|i| be_i32(i)), 42 | t.branch_by_name("AliESDRun.fTriggerClasses")? 43 | .as_fixed_size_iterator(parse_tobjarray_of_tnameds), 44 | t.branch_by_name("AliESDHeader.fTriggerMask")? 45 | .as_fixed_size_iterator(|i| be_u64(i)), 46 | t.branch_by_name("PrimaryVertex.AliVertex.fPosition[3]")? 47 | .as_fixed_size_iterator(|i| tuple((be_f32, be_f32, be_f32))(i)), 48 | t.branch_by_name("PrimaryVertex.AliVertex.fNContributors")? 49 | .as_fixed_size_iterator(|i| be_i32(i)), 50 | t.branch_by_name("Tracks.fX")? 51 | .as_var_size_iterator(|i| be_f32(i), track_counter.to_owned()), 52 | t.branch_by_name("Tracks.fP[5]")?.as_var_size_iterator( 53 | |i| tuple((be_f32, be_f32, be_f32, be_f32, be_f32))(i), 54 | track_counter.to_owned() 55 | ), 56 | t.branch_by_name("Tracks.fAlpha")? 57 | .as_var_size_iterator(|i| be_f32(i), track_counter.to_owned()), 58 | t.branch_by_name("Tracks.fFlags")? 59 | .as_var_size_iterator(|i| be_u64(i), track_counter.to_owned()), 60 | t.branch_by_name("Tracks.fITSchi2")? 61 | .as_var_size_iterator(|i| parse_custom_mantissa(i, 8), track_counter.to_owned()), 62 | t.branch_by_name("Tracks.fITSncls")? 63 | .as_var_size_iterator(|i| be_i8(i), track_counter.to_owned()), 64 | t.branch_by_name("Tracks.fITSClusterMap")? 65 | .as_var_size_iterator(|i| be_u8(i), track_counter.to_owned()), 66 | t.branch_by_name("Tracks.fTPCncls")? 67 | .as_var_size_iterator(|i| be_u16(i), track_counter.to_owned()), 68 | t.branch_by_name("Tracks.fTPCchi2")? 69 | .as_var_size_iterator(|i| parse_custom_mantissa(i, 8), track_counter), 70 | ) 71 | .map( 72 | |( 73 | aliesdrun_frunnumber, 74 | aliesdrun_ftriggerclasses, 75 | aliesdheader_ftriggermask, 76 | primaryvertex_alivertex_fposition, 77 | primaryvertex_alivertex_fncontributors, 78 | tracks_fx, 79 | tracks_fp, 80 | tracks_falpha, 81 | tracks_fflags, 82 | tracks_fitschi2, 83 | tracks_fitsncls, 84 | tracks_fitsclustermap, 85 | tracks_ftpcncls, 86 | tracks_ftpcchi2, 87 | )| { 88 | Self { 89 | aliesdrun_frunnumber, 90 | aliesdrun_ftriggerclasses, 91 | aliesdheader_ftriggermask, 92 | primaryvertex_alivertex_fposition, 93 | primaryvertex_alivertex_fncontributors, 94 | tracks_fx, 95 | tracks_fp, 96 | tracks_falpha, 97 | tracks_fflags, 98 | tracks_fitschi2, 99 | tracks_fitsncls, 100 | tracks_fitsclustermap, 101 | tracks_ftpcchi2, 102 | tracks_ftpcncls, 103 | } 104 | }, 105 | ); 106 | Ok(s) 107 | } 108 | } 109 | 110 | #[cfg(target_arch = "wasm32")] 111 | mod wasm { 112 | use super::*; 113 | use reqwest::Url; 114 | use wasm_bindgen_test::*; 115 | 116 | wasm_bindgen_test_configure!(run_in_browser); 117 | 118 | #[wasm_bindgen_test(async)] 119 | async fn read_esd_wasm() { 120 | let files = [ 121 | RootFile::new( 122 | Url::parse("http://127.0.0.1:3030/opendata/eos/opendata/alice/2010/LHC10h/000139038/ESD/0001/AliESDs.root").unwrap() 123 | ).await.expect("Failed to open file"), 124 | ]; 125 | for f in &files { 126 | let t = f.items()[0].as_tree().await.unwrap(); 127 | test_branch_iterators(&t).await; 128 | } 129 | } 130 | } 131 | 132 | #[cfg(not(target_arch = "wasm32"))] 133 | mod x64 { 134 | use super::*; 135 | 136 | use reqwest::Url; 137 | 138 | const REMOTE_FILE: &str = 139 | "http://opendata.web.cern.ch/eos/opendata/alice/2010/LHC10h/000139038/ESD/0001/AliESDs.root"; 140 | 141 | #[tokio::test] 142 | async fn read_esd_local_and_remote() { 143 | let path = alice_open_data::test_file().unwrap(); 144 | let files = [ 145 | RootFile::new(path).await.expect("Failed to open file"), 146 | RootFile::new(Url::parse(REMOTE_FILE).unwrap()) 147 | .await 148 | .expect("Failed to open file"), 149 | ]; 150 | for f in &files { 151 | let t = f.items()[0].as_tree().await.unwrap(); 152 | test_branch_iterators(&t).await; 153 | } 154 | } 155 | } 156 | 157 | async fn test_branch_iterators(tree: &Tree) { 158 | let mut schema_iter = Box::pin(Model::stream_from_tree(tree).await.unwrap()); 159 | 160 | let mut cnt = 0; 161 | let mut aliesdrun_frunnumber = 0; 162 | let mut aliesdheader_ftriggermask = 0; 163 | let mut primaryvertex_alivertex_fncontributors = 0; 164 | let mut tracks_fx: Vec = vec![]; 165 | let mut tracks_falpha: Vec = vec![]; 166 | let mut tracks_fflags: Vec = vec![]; 167 | let mut tracks_fitschi2: Vec = vec![]; 168 | let mut tracks_fitsncls: Vec = vec![]; 169 | let mut tracks_fitsclustermap: Vec = vec![]; 170 | let mut primaryvertex_alivertex_fposition: Vec<(f32, f32, f32)> = vec![]; 171 | let mut tracks_fp: Vec> = vec![]; 172 | let mut aliesdrun_ftriggerclasses: Vec = vec![]; 173 | let mut tracks_ftpcchi2: Vec = vec![]; 174 | let mut tracks_ftpcncls: Vec = vec![]; 175 | 176 | while let Some(event) = schema_iter.next().await { 177 | cnt += 1; 178 | aliesdrun_frunnumber += event.aliesdrun_frunnumber; 179 | aliesdheader_ftriggermask += event.aliesdheader_ftriggermask; 180 | primaryvertex_alivertex_fncontributors += event.primaryvertex_alivertex_fncontributors; 181 | tracks_fx.extend(event.tracks_fx.iter()); 182 | tracks_falpha.extend(event.tracks_falpha.iter()); 183 | tracks_fflags.extend(event.tracks_fflags.iter()); 184 | tracks_fitschi2.extend(event.tracks_fitschi2.iter()); 185 | tracks_fitsncls.extend(event.tracks_fitsncls.iter()); 186 | tracks_fitsclustermap.extend(event.tracks_fitsclustermap.iter()); 187 | tracks_ftpcchi2.extend(event.tracks_ftpcchi2.iter()); 188 | tracks_ftpcncls.extend(event.tracks_ftpcncls.iter()); 189 | primaryvertex_alivertex_fposition.push(event.primaryvertex_alivertex_fposition); 190 | tracks_fp.push(event.tracks_fp); 191 | aliesdrun_ftriggerclasses.extend(event.aliesdrun_ftriggerclasses.into_iter()); 192 | } 193 | 194 | assert_eq!(cnt, 4); 195 | assert_eq!(aliesdrun_frunnumber, 556152); 196 | assert_eq!(aliesdheader_ftriggermask, 98); 197 | assert_eq!(primaryvertex_alivertex_fncontributors, 2746); 198 | assert_eq!(tracks_fx.iter().sum::(), -26.986227); 199 | assert_eq!(tracks_falpha.iter().sum::(), -199.63356); 200 | assert_eq!(tracks_fflags.iter().sum::(), 25876766546549); 201 | assert_eq!(tracks_fitschi2.iter().sum::(), 376158.6); 202 | assert_eq!( 203 | tracks_fitsncls.iter().map(|el| *el as i64).sum::(), 204 | 24783 205 | ); 206 | assert_eq!( 207 | tracks_fitsclustermap 208 | .iter() 209 | .map(|el| *el as u64) 210 | .sum::(), 211 | 293099 212 | ); 213 | assert_eq!(tracks_ftpcchi2.iter().sum::(), 2352277.0); 214 | assert_eq!( 215 | tracks_ftpcncls.iter().map(|el| *el as i64).sum::(), 216 | 984359 217 | ); 218 | 219 | assert_eq!( 220 | primaryvertex_alivertex_fposition 221 | .iter() 222 | .fold([0.0, 0.0, 0.0], |acc, el| { 223 | [acc[0] + el.0, acc[1] + el.1, acc[2] + el.2] 224 | }), 225 | [-0.006383737, 0.3380862, 2.938151] 226 | ); 227 | assert_eq!( 228 | tracks_fp.iter().flatten().fold(0.0, |acc, el| { 229 | acc + [el.0, el.1, el.2, el.3, el.4].iter().sum::() 230 | }), 231 | 39584.777 232 | ); 233 | 234 | // Just add up all the chars in the strings 235 | assert_eq!( 236 | aliesdrun_ftriggerclasses 237 | .iter() 238 | .map(|s| { s.chars().map(|c| c as u64).sum::() }) 239 | .sum::(), 240 | 109268 241 | ); 242 | } 243 | -------------------------------------------------------------------------------- /root-io/tests/read_simple.rs: -------------------------------------------------------------------------------- 1 | use std::pin::Pin; 2 | 3 | use failure::Error; 4 | use futures::{Stream, StreamExt}; 5 | use nom::number::complete::*; 6 | 7 | use root_io::{core::parsers::string, stream_zip, tree_reader::Tree, RootFile}; 8 | 9 | /// A model for the (or a subset) of the data. 10 | /// This is the object which contains the data of one "event" 11 | #[derive(Debug)] 12 | #[allow(dead_code)] 13 | struct Model { 14 | one: i32, 15 | two: f32, 16 | three: String, 17 | } 18 | 19 | impl Model { 20 | fn stream_from_tree(t: Tree) -> Result>>, Error> { 21 | Ok(stream_zip!( 22 | t.branch_by_name("one")? 23 | .as_fixed_size_iterator(|i| be_i32(i)), 24 | t.branch_by_name("two")? 25 | .as_fixed_size_iterator(|i| be_f32(i)), 26 | t.branch_by_name("three")?.as_fixed_size_iterator(string) 27 | ) 28 | .map(|(one, two, three)| Self { one, two, three }) 29 | .boxed_local()) 30 | } 31 | } 32 | 33 | async fn read_simple(f: RootFile) { 34 | let t = f.items()[0].as_tree().await.unwrap(); 35 | let s = Model::stream_from_tree(t).unwrap(); 36 | s.for_each(|m| async move { 37 | println!("{:?}", m); 38 | }) 39 | .await 40 | } 41 | 42 | #[cfg(not(target_arch = "wasm32"))] 43 | mod x64 { 44 | use super::*; 45 | use std::path::Path; 46 | 47 | #[tokio::test] 48 | async fn read_simple_local() { 49 | let path = Path::new("./src/test_data/simple.root"); 50 | let f = RootFile::new(path).await.expect("Failed to open file"); 51 | read_simple(f).await; 52 | } 53 | } 54 | 55 | #[cfg(all(test, target_arch = "wasm32"))] 56 | mod wasm { 57 | wasm_bindgen_test_configure!(run_in_browser); 58 | use super::*; 59 | use reqwest::Url; 60 | use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure}; 61 | 62 | #[wasm_bindgen_test] 63 | async fn read_simple_remote() { 64 | let url = Url::parse("http://127.0.0.1:3030/github/cbourjau/alice-rs/master/root-io/src/test_data/simple.root").unwrap(); 65 | let f = RootFile::new(url) 66 | .await 67 | .expect("Failed to open remote file"); 68 | read_simple(f).await; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /root-ls/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "root-ls" 3 | version = "0.2.0" 4 | authors = ["cbourjau "] 5 | description = "CLI tool to inspect the content and layout of `.root` files" 6 | repository = "https://github.com/cbourjau/alice-rs" 7 | readme = "README.md" 8 | keywords = ["root", "cern", "alice", "lhc", "physics"] 9 | categories = ["science", "command-line-utilities"] 10 | license = "MPL-2.0" 11 | edition = "2021" 12 | 13 | [dependencies] 14 | clap = "2" 15 | failure = { workspace=true } 16 | root-io = { workspace=true } 17 | prettyplease = "0.1" 18 | syn = "1" 19 | tokio = { workspace=true } 20 | -------------------------------------------------------------------------------- /root-ls/README.md: -------------------------------------------------------------------------------- 1 | # root-ls 2 | 3 | [![Crates.io Version](https://img.shields.io/crates/v/root-ls.svg)](https://crates.io/crates/root-ls) 4 | 5 | 6 | A command line tool to inspect the types of objects contained in a `.root` file similar to ROOT's `TFile::ShowStreamerInfo()` function. However, `root-ls` is also able to produce (proably buggy) Rust code as a starting point to write a custom parser for the content of a file. If you are in that sort of business, you should take a look at the [`root-io`](https://crates.io/crates/root-io) crate. 7 | 8 | ## Installation 9 | 1. Get Rust via [rustup](https://rustup.rs/) 10 | 2. Install `root-ls` 11 | 12 | ``` bash 13 | cargo install root-ls 14 | ``` 15 | 16 | ## Usage 17 | - Dump the layout of the streamed objects as yaml 18 | ``` bash 19 | root-ls ./simple.root to-yaml 20 | ``` 21 | 22 | - Create rust structs and parsers for the objects in this file 23 | ``` bash 24 | root-ls ./simple.root to-rust 25 | 26 | ``` 27 | 28 | - Print a short summary of all the items in this file 29 | ``` bash 30 | root-ls ./simple.root to-rust inspect 31 | ``` 32 | 33 | - Dump all the info there is on one particular item. Not pretty, but most precise (especially with optional `-v`) 34 | ``` bash 35 | root-ls ./simple.root to-rust inspect --item-pos=0 -v 36 | ``` 37 | -------------------------------------------------------------------------------- /root-ls/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use std::path::Path; 4 | 5 | use clap::{crate_version, value_t, App, AppSettings, Arg, ArgMatches, SubCommand}; 6 | use failure::Error; 7 | use root_io::RootFile; 8 | 9 | #[tokio::main] 10 | async fn main() { 11 | let matches = App::new("Inspect root files") 12 | .version(crate_version!()) 13 | .arg( 14 | Arg::with_name("INPUT") 15 | .help("Input .root file") 16 | .required(true) 17 | .index(1), 18 | ) 19 | .setting(AppSettings::VersionlessSubcommands) 20 | .subcommand( 21 | SubCommand::with_name("inspect") 22 | .about("Dump infromartion about the objects in this file") 23 | .args_from_usage( 24 | "--item-pos=[POS] 'Limit output to item at `pos`' 25 | -v 'Verbose output'", 26 | ), 27 | ) 28 | .subcommand( 29 | SubCommand::with_name("to-yaml").about("Output the StreamerInfo of this file as YAML"), // .arg_from_usage(" 'Output is written to this file'") 30 | ) 31 | .subcommand( 32 | SubCommand::with_name("to-rust") 33 | .about("Generate Rust structs and parsers form the StreamerInfo"), 34 | ) 35 | .get_matches(); 36 | let in_path = Path::new(matches.value_of("INPUT").unwrap()); 37 | let f = root_io::RootFile::new(in_path) 38 | .await 39 | .expect("Failed to open file"); 40 | 41 | if let Some(matches) = matches.subcommand_matches("inspect") { 42 | inspect_file(&f, matches).await; 43 | } else if matches.subcommand_matches("to-yaml").is_some() { 44 | sinfo_to_yaml(&f).await; 45 | } else if matches.subcommand_matches("to-rust").is_some() { 46 | to_rust(&f).await.unwrap(); 47 | } else { 48 | // Write help if no sub command is given 49 | println!("{}", matches.usage()); 50 | } 51 | } 52 | 53 | async fn inspect_file(f: &RootFile, sub_matches: &ArgMatches<'_>) { 54 | if sub_matches.is_present("item-pos") { 55 | let idx = value_t!(sub_matches.value_of("item-pos"), usize).unwrap(); 56 | // FIXME: This should not be specific for TTrees! 57 | let tree = f.items()[idx].as_tree().await.unwrap(); 58 | if sub_matches.is_present("v") { 59 | println!("{:#?}", tree); 60 | } else { 61 | for (name, types) in &tree.branch_names_and_types() { 62 | println!("{}: {:#?}", name, types); 63 | } 64 | } 65 | } else { 66 | println!("Items in file:"); 67 | for (i, item) in f.items().iter().enumerate() { 68 | if sub_matches.is_present("v") { 69 | println!("{}: {}", i, item.verbose_info()); 70 | } else { 71 | println!("{}: {}", i, item.name()); 72 | } 73 | } 74 | } 75 | } 76 | 77 | async fn sinfo_to_yaml(f: &RootFile) { 78 | let mut s = String::new(); 79 | match f.streamer_info_as_yaml(&mut s).await { 80 | Ok(_) => println!("{}", s), 81 | Err(e) => println!("Failed to create yaml. Error: {:?}", e), 82 | } 83 | } 84 | 85 | async fn to_rust(f: &RootFile) -> Result<(), Error> { 86 | let mut s = String::new(); 87 | f.streamer_info_as_rust(&mut s).await?; 88 | let tree = syn::parse_file(&s)?; 89 | println!("{}", prettyplease::unparse(&tree)); 90 | Ok(()) 91 | } 92 | --------------------------------------------------------------------------------