├── .gitignore ├── .gitattributes ├── tests ├── references │ ├── OneMessage.mcap │ ├── OneMetadata.mcap │ ├── demo.mcap │ └── OneAttachment.mcap ├── common.rs ├── flush.rs ├── compression.rs ├── metadata.rs ├── attachment.rs ├── message.rs └── round_trip.rs ├── README.md ├── LICENSE.md ├── Cargo.toml ├── examples ├── common │ └── logsetup.rs ├── mcapcat │ └── main.rs ├── mcapcopy │ └── main.rs └── recover │ └── main.rs └── src ├── io_utils.rs ├── lib.rs ├── records.rs ├── write.rs └── read.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.mcap filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /tests/references/OneMessage.mcap: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c92429a3aa64497c8855552a7e03b8401fe9da0e94c2d8e82d72c8d4341ffcaa 3 | size 190 4 | -------------------------------------------------------------------------------- /tests/references/OneMetadata.mcap: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e254aad93777c23e224c2f800295f7573607e7841a4883916695d0a292786302 3 | size 116 4 | -------------------------------------------------------------------------------- /tests/references/demo.mcap: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f878642b6fc15d2e771ce530252e6454de296e6d99b18748e6cd7d09eaa80598 3 | size 61497068 4 | -------------------------------------------------------------------------------- /tests/references/OneAttachment.mcap: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:816f7ef60af488cccbb67fbe7f2e3de2abf788793ba548993254140dd8a95acc 3 | size 153 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mcap-rs 2 | 3 | ...has been upstreamed into the [Foxglove MCAP repo](https://github.com/foxglove/mcap)! 4 | Use the [`mcap` crate](https://crates.io/crates/mcap) for the latest Rust MCAP goodness. 5 | -------------------------------------------------------------------------------- /tests/common.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | 3 | use anyhow::{Context, Result}; 4 | use camino::Utf8Path; 5 | use memmap::Mmap; 6 | 7 | pub fn map_mcap>(p: P) -> Result { 8 | let p = p.as_ref(); 9 | let fd = fs::File::open(p).with_context(|| format!("Couldn't open {p}"))?; 10 | unsafe { Mmap::map(&fd) }.with_context(|| format!("Couldn't map {p}")) 11 | } 12 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2022 Anduril Industries 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/flush.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::*; 4 | 5 | use std::io::BufWriter; 6 | 7 | use anyhow::Result; 8 | use itertools::Itertools; 9 | use mcap_rs as mcap; 10 | use memmap::Mmap; 11 | use tempfile::tempfile; 12 | 13 | #[test] 14 | fn flush_and_cut_chunks() -> Result<()> { 15 | let mapped = map_mcap("tests/references/demo.mcap")?; 16 | 17 | let messages = mcap::MessageStream::new(&mapped)?; 18 | 19 | let mut tmp = tempfile()?; 20 | let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?; 21 | 22 | for (i, m) in messages.enumerate() { 23 | writer.write(&m?)?; 24 | // Cut a new chunk every other message 25 | if i % 2 == 0 { 26 | writer.flush()?; 27 | } 28 | } 29 | drop(writer); 30 | 31 | let ours = unsafe { Mmap::map(&tmp) }?; 32 | 33 | // Compare the message stream of our MCAP to the reference one. 34 | // Regardless of the chunk boundaries, they should be the same. 35 | for (theirs, ours) in 36 | mcap::MessageStream::new(&mapped)?.zip_eq(mcap::MessageStream::new(&ours)?) 37 | { 38 | assert_eq!(ours?, theirs?) 39 | } 40 | 41 | Ok(()) 42 | } 43 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mcap-rs" 3 | description = "A library for reading and writing Foxglove MCAP files" 4 | keywords = [ "foxglove", "mcap" ] 5 | categories = [ "science::robotics", "compression" ] 6 | repository = "https://github.com/anduril/mcap-rs" 7 | readme = "README.md" 8 | documentation = "https://docs.rs/mcap-rs" 9 | license = "Apache-2.0" 10 | version = "0.3.4" 11 | 12 | edition = "2021" 13 | 14 | # See the repo for the reference files 15 | # (all taken from github.com/foxglove/mcap FWIW) 16 | exclude = [ "/tests/references/" ] 17 | 18 | [profile.dev] 19 | opt-level = 3 # Profiling! 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | binrw = "0.9" 25 | byteorder = "1.4" 26 | crc32fast = "1.3" 27 | log = "0.4" 28 | lz4 = "1.0" 29 | num_cpus = "1.13" 30 | paste = "1.0" 31 | thiserror = "1.0" 32 | enumset = "1.0.11" 33 | zstd = { version = "0.11", features = ["zstdmt"] } 34 | 35 | [dev-dependencies] 36 | anyhow = "1.0" 37 | atty = "0.2" 38 | camino = "1.0" 39 | clap = { version = "3.2", features = ["derive"]} 40 | itertools = "0.10" 41 | memmap = "0.7" 42 | rayon = "1.5" 43 | simplelog = "0.12" 44 | tempfile = "3.3" 45 | -------------------------------------------------------------------------------- /examples/common/logsetup.rs: -------------------------------------------------------------------------------- 1 | use anyhow::*; 2 | use simplelog::*; 3 | 4 | #[derive(clap::ArgEnum, Debug, Copy, Clone)] 5 | pub enum Color { 6 | Auto, 7 | Always, 8 | Never, 9 | } 10 | 11 | /// Set up simplelog to spit messages to stderr. 12 | pub fn init_logger(verbosity: u8, color: Color) { 13 | let mut builder = ConfigBuilder::new(); 14 | // Shut a bunch of stuff off - we're just spitting to stderr. 15 | builder.set_location_level(LevelFilter::Trace); 16 | builder.set_target_level(LevelFilter::Off); 17 | builder.set_thread_level(LevelFilter::Off); 18 | builder.set_time_level(LevelFilter::Off); 19 | 20 | let level = match verbosity { 21 | 0 => LevelFilter::Warn, 22 | 1 => LevelFilter::Info, 23 | 2 => LevelFilter::Debug, 24 | _ => LevelFilter::Trace, 25 | }; 26 | 27 | let config = builder.build(); 28 | 29 | let color = match color { 30 | Color::Always => ColorChoice::AlwaysAnsi, 31 | Color::Auto => { 32 | if atty::is(atty::Stream::Stderr) { 33 | ColorChoice::Auto 34 | } else { 35 | ColorChoice::Never 36 | } 37 | } 38 | Color::Never => ColorChoice::Never, 39 | }; 40 | 41 | TermLogger::init(level, config.clone(), TerminalMode::Stderr, color) 42 | .or_else(|_| SimpleLogger::init(level, config)) 43 | .context("Couldn't init logger") 44 | .unwrap() 45 | } 46 | -------------------------------------------------------------------------------- /tests/compression.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::*; 4 | 5 | use std::io::BufWriter; 6 | 7 | use anyhow::Result; 8 | use itertools::Itertools; 9 | use mcap_rs as mcap; 10 | use memmap::Mmap; 11 | use tempfile::tempfile; 12 | 13 | fn round_trip(comp: Option) -> Result<()> { 14 | let mapped = map_mcap("tests/references/demo.mcap")?; 15 | 16 | let mut tmp = tempfile()?; 17 | let mut writer = mcap::WriteOptions::new() 18 | .compression(comp) 19 | .profile("fooey") 20 | .create(BufWriter::new(&mut tmp))?; 21 | 22 | for m in mcap::MessageStream::new(&mapped)? { 23 | // IRL, we'd add channels, then write messages to known channels, 24 | // which skips having to re-hash the channel and its schema each time. 25 | // But since here we'd need to do the same anyways... 26 | writer.write(&m?)?; 27 | } 28 | drop(writer); 29 | 30 | let ours = unsafe { Mmap::map(&tmp) }?; 31 | 32 | // Compare the message stream of our MCAP to the reference one. 33 | for (theirs, ours) in 34 | mcap::MessageStream::new(&mapped)?.zip_eq(mcap::MessageStream::new(&ours)?) 35 | { 36 | assert_eq!(ours?, theirs?) 37 | } 38 | 39 | Ok(()) 40 | } 41 | 42 | #[test] 43 | fn uncompressed_round_trip() -> Result<()> { 44 | round_trip(None) 45 | } 46 | 47 | #[test] 48 | fn zstd_round_trip() -> Result<()> { 49 | round_trip(Some(mcap::Compression::Zstd)) 50 | } 51 | 52 | #[test] 53 | fn lz4_round_trip() -> Result<()> { 54 | round_trip(Some(mcap::Compression::Lz4)) 55 | } 56 | -------------------------------------------------------------------------------- /examples/mcapcat/main.rs: -------------------------------------------------------------------------------- 1 | #[path = "../common/logsetup.rs"] 2 | mod logsetup; 3 | 4 | use std::{fs, process}; 5 | 6 | use anyhow::{Context, Result}; 7 | use camino::{Utf8Path, Utf8PathBuf}; 8 | use clap::Parser; 9 | use log::*; 10 | use mcap_rs as mcap; 11 | use memmap::Mmap; 12 | 13 | #[derive(Parser, Debug)] 14 | struct Args { 15 | /// Verbosity (-v, -vv, -vvv, etc.) 16 | #[clap(short, long, parse(from_occurrences))] 17 | verbose: u8, 18 | 19 | #[clap(short, long, arg_enum, default_value = "auto")] 20 | color: logsetup::Color, 21 | 22 | mcap: Utf8PathBuf, 23 | } 24 | 25 | fn map_mcap(p: &Utf8Path) -> Result { 26 | let fd = fs::File::open(p).context("Couldn't open MCAP file")?; 27 | unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file") 28 | } 29 | 30 | fn run() -> Result<()> { 31 | let args = Args::parse(); 32 | logsetup::init_logger(args.verbose, args.color); 33 | 34 | let mapped = map_mcap(&args.mcap)?; 35 | 36 | for message in mcap::MessageStream::new(&mapped)? { 37 | let message = message?; 38 | let ts = message.publish_time; 39 | println!( 40 | "{} {} [{}] [{}]...", 41 | ts, 42 | message.channel.topic, 43 | message 44 | .channel 45 | .schema 46 | .as_ref() 47 | .map(|s| s.name.as_str()) 48 | .unwrap_or_default(), 49 | message 50 | .data 51 | .iter() 52 | .take(10) 53 | .map(|b| b.to_string()) 54 | .collect::>() 55 | .join(" ") 56 | ); 57 | } 58 | 59 | info!("{:#?}", mcap::Summary::read(&mapped)?); 60 | Ok(()) 61 | } 62 | 63 | fn main() { 64 | run().unwrap_or_else(|e| { 65 | error!("{:?}", e); 66 | process::exit(1); 67 | }); 68 | } 69 | -------------------------------------------------------------------------------- /src/io_utils.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, prelude::*}; 2 | 3 | use crc32fast::Hasher; 4 | 5 | /// Counts how many bytes have been read and calculates a running CRC32 6 | pub struct CountingCrcReader { 7 | inner: R, 8 | hasher: Hasher, 9 | count: u64, 10 | } 11 | 12 | impl CountingCrcReader { 13 | pub fn new(inner: R) -> Self { 14 | Self { 15 | inner, 16 | hasher: Hasher::new(), 17 | count: 0, 18 | } 19 | } 20 | 21 | pub fn position(&self) -> u64 { 22 | self.count 23 | } 24 | 25 | /// Consumes the reader and returns the checksum 26 | pub fn finalize(self) -> u32 { 27 | self.hasher.finalize() 28 | } 29 | } 30 | 31 | impl Read for CountingCrcReader { 32 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 33 | let res = self.inner.read(buf)?; 34 | self.count += res as u64; 35 | self.hasher.update(&buf[..res]); 36 | Ok(res) 37 | } 38 | } 39 | 40 | pub struct CountingCrcWriter { 41 | inner: W, 42 | hasher: Hasher, 43 | count: u64, 44 | } 45 | 46 | impl CountingCrcWriter { 47 | pub fn new(inner: W) -> Self { 48 | Self { 49 | inner, 50 | hasher: Hasher::new(), 51 | count: 0, 52 | } 53 | } 54 | 55 | pub fn position(&self) -> u64 { 56 | self.count 57 | } 58 | 59 | pub fn get_mut(&mut self) -> &mut W { 60 | &mut self.inner 61 | } 62 | 63 | /// Consumes the reader and returns the inner writer and the checksum 64 | pub fn finalize(self) -> (W, u32) { 65 | (self.inner, self.hasher.finalize()) 66 | } 67 | } 68 | 69 | impl Write for CountingCrcWriter { 70 | fn write(&mut self, buf: &[u8]) -> io::Result { 71 | let res = self.inner.write(buf)?; 72 | self.count += res as u64; 73 | self.hasher.update(&buf[..res]); 74 | Ok(res) 75 | } 76 | 77 | fn flush(&mut self) -> io::Result<()> { 78 | self.inner.flush() 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /examples/mcapcopy/main.rs: -------------------------------------------------------------------------------- 1 | #[path = "../common/logsetup.rs"] 2 | mod logsetup; 3 | 4 | use std::{fs, io::BufWriter}; 5 | 6 | use anyhow::{Context, Result}; 7 | use camino::{Utf8Path, Utf8PathBuf}; 8 | use clap::Parser; 9 | use log::*; 10 | use mcap_rs as mcap; 11 | use memmap::Mmap; 12 | 13 | #[derive(Parser, Debug)] 14 | struct Args { 15 | /// Verbosity (-v, -vv, -vvv, etc.) 16 | #[clap(short, long, parse(from_occurrences))] 17 | verbose: u8, 18 | 19 | #[clap(short, long, arg_enum, default_value = "auto")] 20 | color: logsetup::Color, 21 | 22 | mcap: Utf8PathBuf, 23 | } 24 | 25 | fn map_mcap(p: &Utf8Path) -> Result { 26 | let fd = fs::File::open(p).context("Couldn't open MCAP file")?; 27 | unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file") 28 | } 29 | 30 | fn run() -> Result<()> { 31 | let args = Args::parse(); 32 | logsetup::init_logger(args.verbose, args.color); 33 | 34 | let mapped = map_mcap(&args.mcap)?; 35 | 36 | let mut out = mcap::Writer::new(BufWriter::new(fs::File::create("out.mcap")?))?; 37 | 38 | for message in mcap::MessageStream::new(&mapped)? { 39 | let message = message?; 40 | let ts = message.publish_time; 41 | info!( 42 | "{} {} [{}] [{}]...", 43 | ts, 44 | message.channel.topic, 45 | message 46 | .channel 47 | .schema 48 | .as_ref() 49 | .map(|s| s.name.as_str()) 50 | .unwrap_or_default(), 51 | message 52 | .data 53 | .iter() 54 | .take(10) 55 | .map(|b| b.to_string()) 56 | .collect::>() 57 | .join(" ") 58 | ); 59 | 60 | // We can easily take each Message and write it as a quick and dirty example, 61 | // but in real code, we'd be much better off adding each channel to the writer, 62 | // then calling `write_to_known_channel()`. 63 | // This avoids having to rehash the channel (and its schema) on each `write()` 64 | // to figure out what its ID is. 65 | out.write(&message)?; 66 | } 67 | Ok(()) 68 | } 69 | 70 | fn main() { 71 | run().unwrap_or_else(|e| { 72 | error!("{:?}", e); 73 | std::process::exit(1); 74 | }); 75 | } 76 | -------------------------------------------------------------------------------- /tests/metadata.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::*; 4 | 5 | use std::io::BufWriter; 6 | 7 | use anyhow::Result; 8 | use mcap_rs as mcap; 9 | use memmap::Mmap; 10 | use tempfile::tempfile; 11 | 12 | #[test] 13 | fn smoke() -> Result<()> { 14 | let mapped = map_mcap("tests/references/OneMetadata.mcap")?; 15 | let metas = mcap::read::LinearReader::new(&mapped)? 16 | .filter_map(|record| match record.unwrap() { 17 | mcap::records::Record::Metadata(m) => Some(m), 18 | _ => None, 19 | }) 20 | .collect::>(); 21 | 22 | assert_eq!(metas.len(), 1); 23 | 24 | let expected = mcap::records::Metadata { 25 | name: String::from("myMetadata"), 26 | metadata: [(String::from("foo"), String::from("bar"))].into(), 27 | }; 28 | 29 | assert_eq!(metas[0], expected); 30 | 31 | Ok(()) 32 | } 33 | 34 | #[test] 35 | fn round_trip() -> Result<()> { 36 | let mapped = map_mcap("tests/references/OneMetadata.mcap")?; 37 | let metas = 38 | mcap::read::LinearReader::new(&mapped)?.filter_map(|record| match record.unwrap() { 39 | mcap::records::Record::Metadata(m) => Some(m), 40 | _ => None, 41 | }); 42 | 43 | let mut tmp = tempfile()?; 44 | let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?; 45 | 46 | for m in metas { 47 | writer.write_metadata(&m)?; 48 | } 49 | drop(writer); 50 | 51 | let ours = unsafe { Mmap::map(&tmp) }?; 52 | let summary = mcap::Summary::read(&ours)?; 53 | 54 | let expected_summary = Some(mcap::Summary { 55 | stats: Some(mcap::records::Statistics { 56 | metadata_count: 1, 57 | ..Default::default() 58 | }), 59 | metadata_indexes: vec![mcap::records::MetadataIndex { 60 | offset: 38, // Finicky - depends on the length of the library version string 61 | length: 41, 62 | name: String::from("myMetadata"), 63 | }], 64 | ..Default::default() 65 | }); 66 | assert_eq!(summary, expected_summary); 67 | 68 | let expected = mcap::records::Metadata { 69 | name: String::from("myMetadata"), 70 | metadata: [(String::from("foo"), String::from("bar"))].into(), 71 | }; 72 | 73 | assert_eq!( 74 | mcap::read::metadata(&ours, &summary.unwrap().metadata_indexes[0])?, 75 | expected 76 | ); 77 | 78 | Ok(()) 79 | } 80 | -------------------------------------------------------------------------------- /examples/recover/main.rs: -------------------------------------------------------------------------------- 1 | #[path = "../common/logsetup.rs"] 2 | mod logsetup; 3 | 4 | use std::{fs, io::BufWriter}; 5 | 6 | use anyhow::{ensure, Context, Result}; 7 | use camino::{Utf8Path, Utf8PathBuf}; 8 | use clap::Parser; 9 | use enumset::enum_set; 10 | use log::*; 11 | use mcap_rs as mcap; 12 | use memmap::Mmap; 13 | 14 | #[derive(Parser, Debug)] 15 | struct Args { 16 | /// Verbosity (-v, -vv, -vvv, etc.) 17 | #[clap(short, long, parse(from_occurrences))] 18 | verbose: u8, 19 | 20 | #[clap(short, long, arg_enum, default_value = "auto")] 21 | color: logsetup::Color, 22 | 23 | #[clap(help = "input mcap file")] 24 | input: Utf8PathBuf, 25 | 26 | #[clap( 27 | short, 28 | long, 29 | help = "output mcap file, defaults to .recovered.mcap" 30 | )] 31 | output: Option, 32 | } 33 | 34 | fn map_mcap(p: &Utf8Path) -> Result { 35 | let fd = fs::File::open(p).context("Couldn't open MCAP file")?; 36 | unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file") 37 | } 38 | 39 | fn make_output_path(input: Utf8PathBuf) -> Result { 40 | use std::str::FromStr; 41 | let file_stem = input.file_stem().context("no file stem for input path")?; 42 | let output_path = Utf8PathBuf::from_str(file_stem)?.with_extension("recovered.mcap"); 43 | Ok(output_path) 44 | } 45 | 46 | fn run() -> Result<()> { 47 | let args = Args::parse(); 48 | logsetup::init_logger(args.verbose, args.color); 49 | debug!("{:?}", args); 50 | 51 | let mapped = map_mcap(&args.input)?; 52 | let output_path = args.output.unwrap_or(make_output_path(args.input)?); 53 | ensure!( 54 | !output_path.exists(), 55 | "output path {output_path} already exists" 56 | ); 57 | 58 | let mut out = mcap::Writer::new(BufWriter::new(fs::File::create(output_path)?))?; 59 | 60 | info!("recovering as many messages as possible..."); 61 | let mut recovered_count = 0; 62 | for maybe_message in mcap::MessageStream::new_with_options( 63 | &mapped, 64 | enum_set!(mcap::read::Options::IgnoreEndMagic), 65 | )? { 66 | match maybe_message { 67 | Ok(message) => { 68 | out.write(&message)?; 69 | recovered_count += 1; 70 | } 71 | Err(err) => { 72 | error!("{err} -- stopping"); 73 | break; 74 | } 75 | } 76 | } 77 | info!("recovered {} messages", recovered_count); 78 | Ok(()) 79 | } 80 | 81 | fn main() { 82 | run().unwrap_or_else(|e| { 83 | error!("{:?}", e); 84 | std::process::exit(1); 85 | }); 86 | } 87 | -------------------------------------------------------------------------------- /tests/attachment.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::*; 4 | 5 | use std::{borrow::Cow, io::BufWriter}; 6 | 7 | use anyhow::Result; 8 | use mcap_rs as mcap; 9 | use memmap::Mmap; 10 | use tempfile::tempfile; 11 | 12 | #[test] 13 | fn smoke() -> Result<()> { 14 | let mapped = map_mcap("tests/references/OneAttachment.mcap")?; 15 | let attachments = mcap::read::LinearReader::new(&mapped)? 16 | .filter_map(|record| match record.unwrap() { 17 | mcap::records::Record::Attachment { header, data } => Some((header, data)), 18 | _ => None, 19 | }) 20 | .collect::>(); 21 | 22 | assert_eq!(attachments.len(), 1); 23 | 24 | let expected_header = mcap::records::AttachmentHeader { 25 | log_time: 2, 26 | create_time: 1, 27 | name: String::from("myFile"), 28 | content_type: String::from("application/octet-stream"), 29 | data_len: 3, 30 | }; 31 | 32 | assert_eq!(attachments[0].0, expected_header); 33 | assert_eq!(attachments[0].1, &[1, 2, 3]); 34 | 35 | Ok(()) 36 | } 37 | 38 | #[test] 39 | fn round_trip() -> Result<()> { 40 | let mapped = map_mcap("tests/references/OneAttachment.mcap")?; 41 | let attachments = 42 | mcap::read::LinearReader::new(&mapped)?.filter_map(|record| match record.unwrap() { 43 | mcap::records::Record::Attachment { header, data } => Some((header, data)), 44 | _ => None, 45 | }); 46 | 47 | let mut tmp = tempfile()?; 48 | let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?; 49 | 50 | for (h, d) in attachments { 51 | let a = mcap::Attachment { 52 | log_time: h.log_time, 53 | create_time: h.create_time, 54 | content_type: h.content_type, 55 | name: h.name, 56 | data: Cow::Borrowed(d), 57 | }; 58 | writer.attach(&a)?; 59 | } 60 | drop(writer); 61 | 62 | let ours = unsafe { Mmap::map(&tmp) }?; 63 | let summary = mcap::Summary::read(&ours)?; 64 | 65 | let expected_summary = Some(mcap::Summary { 66 | stats: Some(mcap::records::Statistics { 67 | attachment_count: 1, 68 | ..Default::default() 69 | }), 70 | attachment_indexes: vec![mcap::records::AttachmentIndex { 71 | offset: 38, // Finicky - depends on the length of the library version string 72 | length: 78, 73 | log_time: 2, 74 | create_time: 1, 75 | data_size: 3, 76 | name: String::from("myFile"), 77 | content_type: String::from("application/octet-stream"), 78 | }], 79 | ..Default::default() 80 | }); 81 | assert_eq!(summary, expected_summary); 82 | 83 | let expected_attachment = mcap::Attachment { 84 | log_time: 2, 85 | create_time: 1, 86 | name: String::from("myFile"), 87 | content_type: String::from("application/octet-stream"), 88 | data: Cow::Borrowed(&[1, 2, 3]), 89 | }; 90 | 91 | assert_eq!( 92 | mcap::read::attachment(&ours, &summary.unwrap().attachment_indexes[0])?, 93 | expected_attachment 94 | ); 95 | 96 | Ok(()) 97 | } 98 | -------------------------------------------------------------------------------- /tests/message.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::*; 4 | 5 | use std::{borrow::Cow, io::BufWriter, sync::Arc}; 6 | 7 | use anyhow::Result; 8 | use mcap_rs as mcap; 9 | use memmap::Mmap; 10 | use tempfile::tempfile; 11 | 12 | #[test] 13 | fn smoke() -> Result<()> { 14 | let mapped = map_mcap("tests/references/OneMessage.mcap")?; 15 | let messages = mcap::MessageStream::new(&mapped)?.collect::>>()?; 16 | 17 | assert_eq!(messages.len(), 1); 18 | 19 | let expected = mcap::Message { 20 | channel: Arc::new(mcap::Channel { 21 | schema: Some(Arc::new(mcap::Schema { 22 | name: String::from("Example"), 23 | encoding: String::from("c"), 24 | data: Cow::Borrowed(&[4, 5, 6]), 25 | })), 26 | topic: String::from("example"), 27 | message_encoding: String::from("a"), 28 | metadata: [(String::from("foo"), String::from("bar"))].into(), 29 | }), 30 | sequence: 10, 31 | log_time: 2, 32 | publish_time: 1, 33 | data: Cow::Borrowed(&[1, 2, 3]), 34 | }; 35 | 36 | assert_eq!(messages[0], expected); 37 | 38 | Ok(()) 39 | } 40 | 41 | #[test] 42 | fn round_trip() -> Result<()> { 43 | let mapped = map_mcap("tests/references/OneMessage.mcap")?; 44 | let messages = mcap::MessageStream::new(&mapped)?; 45 | 46 | let mut tmp = tempfile()?; 47 | let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?; 48 | 49 | for m in messages { 50 | writer.write(&m?)?; 51 | } 52 | drop(writer); 53 | 54 | let ours = unsafe { Mmap::map(&tmp) }?; 55 | let summary = mcap::Summary::read(&ours)?.unwrap(); 56 | 57 | let schema = Arc::new(mcap::Schema { 58 | name: String::from("Example"), 59 | encoding: String::from("c"), 60 | data: Cow::Borrowed(&[4, 5, 6]), 61 | }); 62 | 63 | let channel = Arc::new(mcap::Channel { 64 | schema: Some(schema.clone()), 65 | topic: String::from("example"), 66 | message_encoding: String::from("a"), 67 | metadata: [(String::from("foo"), String::from("bar"))].into(), 68 | }); 69 | 70 | let expected_summary = mcap::Summary { 71 | stats: Some(mcap::records::Statistics { 72 | message_count: 1, 73 | schema_count: 1, 74 | channel_count: 1, 75 | chunk_count: 1, 76 | message_start_time: 2, 77 | message_end_time: 2, 78 | channel_message_counts: [(0, 1)].into(), 79 | ..Default::default() 80 | }), 81 | channels: [(0, channel.clone())].into(), 82 | schemas: [(1, schema.clone())].into(), 83 | ..Default::default() 84 | }; 85 | // Don't assert the chunk indexes - their size is at the whim of compressors. 86 | assert_eq!(summary.stats, expected_summary.stats); 87 | assert_eq!(summary.channels, expected_summary.channels); 88 | assert_eq!(summary.schemas, expected_summary.schemas); 89 | assert_eq!( 90 | summary.attachment_indexes, 91 | expected_summary.attachment_indexes 92 | ); 93 | assert_eq!(summary.metadata_indexes, expected_summary.metadata_indexes); 94 | 95 | let expected = mcap::Message { 96 | channel, 97 | sequence: 10, 98 | log_time: 2, 99 | publish_time: 1, 100 | data: Cow::Borrowed(&[1, 2, 3]), 101 | }; 102 | 103 | assert_eq!( 104 | mcap::MessageStream::new(&ours)?.collect::>>()?, 105 | &[expected] 106 | ); 107 | 108 | Ok(()) 109 | } 110 | -------------------------------------------------------------------------------- /tests/round_trip.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::*; 4 | 5 | use std::io::BufWriter; 6 | 7 | use anyhow::Result; 8 | use itertools::Itertools; 9 | use mcap_rs as mcap; 10 | use memmap::Mmap; 11 | use rayon::prelude::*; 12 | use tempfile::tempfile; 13 | 14 | #[test] 15 | fn demo_round_trip() -> Result<()> { 16 | use mcap::records::op; 17 | 18 | let mapped = map_mcap("tests/references/demo.mcap")?; 19 | 20 | let messages = mcap::MessageStream::new(&mapped)?; 21 | 22 | let mut tmp = tempfile()?; 23 | let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?; 24 | 25 | for m in messages { 26 | // IRL, we'd add channels, then write messages to known channels, 27 | // which skips having to re-hash the channel and its schema each time. 28 | // But since here we'd need to do the same anyways... 29 | writer.write(&m?)?; 30 | } 31 | drop(writer); 32 | 33 | let ours = unsafe { Mmap::map(&tmp) }?; 34 | 35 | // Compare the message stream of our MCAP to the reference one. 36 | for (theirs, ours) in 37 | mcap::MessageStream::new(&mapped)?.zip_eq(mcap::MessageStream::new(&ours)?) 38 | { 39 | assert_eq!(ours?, theirs?) 40 | } 41 | 42 | // We don't use them, but verify the summary offsets. 43 | let footer = mcap::read::footer(&ours)?; 44 | assert_ne!(footer.summary_offset_start, 0); 45 | 46 | const FOOTER_LEN: usize = 20 + 8 + 1; // 20 bytes + 8 byte len + 1 byte opcode 47 | let summary_offset_end = ours.len() - FOOTER_LEN - mcap::MAGIC.len(); 48 | 49 | for (i, rec) in mcap::read::LinearReader::sans_magic( 50 | &ours[footer.summary_offset_start as usize..summary_offset_end], 51 | ) 52 | .enumerate() 53 | { 54 | let offset = match rec { 55 | Ok(mcap::records::Record::SummaryOffset(sos)) => sos, 56 | wut => panic!("Expected summary offset, got {:?}", wut), 57 | }; 58 | 59 | // We expect these offsets in this (arbitrary) order: 60 | match (i, offset.group_opcode) { 61 | (0, op::SCHEMA) => (), 62 | (1, op::CHANNEL) => (), 63 | (2, op::CHUNK_INDEX) => (), 64 | (3, op::STATISTICS) => (), 65 | _ => panic!("Summary offset {i} was {offset:?}"), 66 | }; 67 | 68 | // We should be able to read each group from start to finish, 69 | // and the records should be the expected type. 70 | let group_start = offset.group_start as usize; 71 | let group_end = (offset.group_start + offset.group_length) as usize; 72 | for group_rec in mcap::read::LinearReader::sans_magic(&ours[group_start..group_end]) { 73 | match group_rec { 74 | Ok(rec) => assert_eq!(offset.group_opcode, rec.opcode()), 75 | wut => panic!("Expected op {}, got {:?}", offset.group_opcode, wut), 76 | } 77 | } 78 | } 79 | 80 | // Verify the summary and its connectivity. 81 | 82 | let summary = mcap::Summary::read(&ours)?.unwrap(); 83 | assert!(summary.attachment_indexes.is_empty()); 84 | assert!(summary.metadata_indexes.is_empty()); 85 | 86 | // EZ mode: Streamed chunks should match up with a file-level message stream. 87 | for (whole, by_chunk) in mcap::MessageStream::new(&ours)?.zip_eq( 88 | summary 89 | .chunk_indexes 90 | .iter() 91 | .flat_map(|ci| summary.stream_chunk(&ours, ci).unwrap()), 92 | ) { 93 | assert_eq!(whole?, by_chunk?); 94 | } 95 | 96 | // Hard mode: randomly access every message in the MCAP. 97 | // Yes, this is dumb and O(n^2). 98 | let mut messages = Vec::new(); 99 | 100 | for ci in &summary.chunk_indexes { 101 | let mut offsets_and_messages = summary 102 | .read_message_indexes(&ours, ci) 103 | .unwrap() 104 | // At least parallelize the dumb. 105 | .into_par_iter() 106 | .flat_map(|(_k, v)| v) 107 | .map(|e| (e.offset, summary.seek_message(&ours, ci, &e).unwrap())) 108 | .collect::>(); 109 | 110 | offsets_and_messages.sort_unstable_by_key(|im| im.0); 111 | 112 | for om in offsets_and_messages { 113 | messages.push(om.1); 114 | } 115 | } 116 | 117 | for (streamed, seeked) in mcap::MessageStream::new(&ours)?.zip_eq(messages.into_iter()) { 118 | assert_eq!(streamed?, seeked); 119 | } 120 | 121 | Ok(()) 122 | } 123 | 124 | #[test] 125 | fn demo_random_chunk_access() -> Result<()> { 126 | let mapped = map_mcap("tests/references/demo.mcap")?; 127 | 128 | let summary = mcap::Summary::read(&mapped)?.unwrap(); 129 | 130 | // Random access of the second chunk should match the stream of the whole file. 131 | let messages_in_first_chunk: usize = summary 132 | .read_message_indexes(&mapped, &summary.chunk_indexes[0])? 133 | .values() 134 | .map(|entries| entries.len()) 135 | .sum(); 136 | let messages_in_second_chunk: usize = summary 137 | .read_message_indexes(&mapped, &summary.chunk_indexes[1])? 138 | .values() 139 | .map(|entries| entries.len()) 140 | .sum(); 141 | 142 | for (whole, random) in mcap::MessageStream::new(&mapped)? 143 | .skip(messages_in_first_chunk) 144 | .take(messages_in_second_chunk) 145 | .zip_eq(summary.stream_chunk(&mapped, &summary.chunk_indexes[1])?) 146 | { 147 | assert_eq!(whole?, random?); 148 | } 149 | 150 | // Let's poke around the message indexes 151 | let mut index_entries = summary 152 | .read_message_indexes(&mapped, &summary.chunk_indexes[1])? 153 | .values() 154 | .flatten() 155 | .copied() 156 | .collect::>(); 157 | 158 | index_entries.sort_unstable_by_key(|e| e.offset); 159 | 160 | // Do a big dumb n^2 seek of each message (dear god, don't ever actually do this) 161 | for (entry, message) in index_entries 162 | .iter() 163 | .zip_eq(summary.stream_chunk(&mapped, &summary.chunk_indexes[1])?) 164 | { 165 | let seeked = summary.seek_message(&mapped, &summary.chunk_indexes[1], entry)?; 166 | assert_eq!(seeked, message?); 167 | } 168 | 169 | Ok(()) 170 | } 171 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A library for manipulating [Foxglove MCAP](https://github.com/foxglove/mcap) files, 2 | //! both reading: 3 | //! 4 | //! ```no_run 5 | //! use std::fs; 6 | //! 7 | //! use anyhow::{Context, Result}; 8 | //! use camino::Utf8Path; 9 | //! use memmap::Mmap; 10 | //! 11 | //! fn map_mcap>(p: P) -> Result { 12 | //! let fd = fs::File::open(p.as_ref()).context("Couldn't open MCAP file")?; 13 | //! unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file") 14 | //! } 15 | //! 16 | //! fn read_it() -> Result<()> { 17 | //! let mapped = map_mcap("in.mcap")?; 18 | //! 19 | //! for message in mcap_rs::MessageStream::new(&mapped)? { 20 | //! println!("{:?}", message?); 21 | //! // Or whatever else you'd like to do... 22 | //! } 23 | //! Ok(()) 24 | //! } 25 | //! ``` 26 | //! or writing: 27 | //! ```no_run 28 | //! use std::{collections::BTreeMap, fs, io::BufWriter}; 29 | //! 30 | //! use anyhow::Result; 31 | //! 32 | //! use mcap_rs::{Channel, records::MessageHeader, Writer}; 33 | //! 34 | //! fn write_it() -> Result<()> { 35 | //! // To set the profile or compression options, see mcap_rs::WriteOptions. 36 | //! let mut out = Writer::new( 37 | //! BufWriter::new(fs::File::create("out.mcap")?) 38 | //! )?; 39 | //! 40 | //! // Channels and schemas are automatically assigned ID as they're serialized, 41 | //! // and automatically deduplicated with `Arc` when deserialized. 42 | //! let my_channel = Channel { 43 | //! topic: String::from("cool stuff"), 44 | //! schema: None, 45 | //! message_encoding: String::from("application/octet-stream"), 46 | //! metadata: BTreeMap::default() 47 | //! }; 48 | //! 49 | //! let channel_id = out.add_channel(&my_channel)?; 50 | //! 51 | //! out.write_to_known_channel( 52 | //! &MessageHeader { 53 | //! channel_id, 54 | //! sequence: 25, 55 | //! log_time: 6, 56 | //! publish_time: 24 57 | //! }, 58 | //! &[1, 2, 3] 59 | //! )?; 60 | //! out.write_to_known_channel( 61 | //! &MessageHeader { 62 | //! channel_id, 63 | //! sequence: 32, 64 | //! log_time: 23, 65 | //! publish_time: 25 66 | //! }, 67 | //! &[3, 4, 5] 68 | //! )?; 69 | //! 70 | //! out.finish()?; 71 | //! 72 | //! Ok(()) 73 | //! } 74 | //! ``` 75 | 76 | pub mod read; 77 | pub mod records; 78 | pub mod write; 79 | 80 | mod io_utils; 81 | 82 | use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc}; 83 | 84 | use thiserror::Error; 85 | 86 | #[derive(Debug, Error)] 87 | pub enum McapError { 88 | #[error("Bad magic number")] 89 | BadMagic, 90 | #[error("Footer record couldn't be found at the end of the file, before the magic bytes")] 91 | BadFooter, 92 | #[error("Attachment CRC failed (expeted {saved:08X}, got {calculated:08X}")] 93 | BadAttachmentCrc { saved: u32, calculated: u32 }, 94 | #[error("Chunk CRC failed (expected {saved:08X}, got {calculated:08X}")] 95 | BadChunkCrc { saved: u32, calculated: u32 }, 96 | #[error("Data section CRC failed (expected {saved:08X}, got {calculated:08X})")] 97 | BadDataCrc { saved: u32, calculated: u32 }, 98 | #[error("Summary section CRC failed (expected {saved:08X}, got {calculated:08X})")] 99 | BadSummaryCrc { saved: u32, calculated: u32 }, 100 | #[error("Index offset and length didn't point to the expected record type")] 101 | BadIndex, 102 | #[error("Channel `{0}` has mulitple records that don't match.")] 103 | ConflictingChannels(String), 104 | #[error("Schema `{0}` has mulitple records that don't match.")] 105 | ConflictingSchemas(String), 106 | #[error("Record parse failed")] 107 | Parse(#[from] binrw::Error), 108 | #[error("I/O error from writing, or reading a compression stream")] 109 | Io(#[from] std::io::Error), 110 | #[error("Schema has an ID of 0")] 111 | InvalidSchemaId, 112 | #[error("MCAP file ended in the middle of a record")] 113 | UnexpectedEof, 114 | #[error("Chunk ended in the middle of a record")] 115 | UnexpectedEoc, 116 | #[error("Message {0} referenced unknown channel {1}")] 117 | UnknownChannel(u32, u16), 118 | #[error("Channel `{0}` referenced unknown schema {1}")] 119 | UnknownSchema(String, u16), 120 | #[error("Found record with opcode {0:02X} in a chunk")] 121 | UnexpectedChunkRecord(u8), 122 | #[error("Unsupported compression format `{0}`")] 123 | UnsupportedCompression(String), 124 | } 125 | 126 | pub type McapResult = Result; 127 | 128 | /// Magic bytes for the MCAP format 129 | pub const MAGIC: &[u8] = &[0x89, b'M', b'C', b'A', b'P', 0x30, b'\r', b'\n']; 130 | 131 | /// Compression options for chunks of channels, schemas, and messages in an MCAP file 132 | #[derive(Debug, Copy, Clone, Default)] 133 | pub enum Compression { 134 | #[default] 135 | Zstd, 136 | Lz4, 137 | } 138 | 139 | /// Describes a schema used by one or more [Channel]s in an MCAP file 140 | /// 141 | /// The [`CoW`](std::borrow::Cow) can either borrow directly from the mapped file, 142 | /// or hold its own buffer if it was decompressed from a chunk. 143 | #[derive(Clone, PartialEq, Eq, Hash)] 144 | pub struct Schema<'a> { 145 | pub name: String, 146 | pub encoding: String, 147 | pub data: Cow<'a, [u8]>, 148 | } 149 | 150 | impl fmt::Debug for Schema<'_> { 151 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 152 | f.debug_struct("Schema") 153 | .field("name", &self.name) 154 | .field("encoding", &self.encoding) 155 | .finish_non_exhaustive() 156 | } 157 | } 158 | 159 | /// Describes a channel which [Message]s are published to in an MCAP file 160 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 161 | pub struct Channel<'a> { 162 | pub topic: String, 163 | pub schema: Option>>, 164 | 165 | pub message_encoding: String, 166 | pub metadata: BTreeMap, 167 | } 168 | 169 | /// An event in an MCAP file, published to a [Channel] 170 | /// 171 | /// The [`CoW`](std::borrow::Cow) can either borrow directly from the mapped file, 172 | /// or hold its own buffer if it was decompressed from a chunk. 173 | #[derive(Debug, Clone, PartialEq, Eq)] 174 | pub struct Message<'a> { 175 | pub channel: Arc>, 176 | pub sequence: u32, 177 | pub log_time: u64, 178 | pub publish_time: u64, 179 | pub data: Cow<'a, [u8]>, 180 | } 181 | 182 | /// An attachment and its metadata in an MCAP file 183 | #[derive(Debug, PartialEq, Eq)] 184 | pub struct Attachment<'a> { 185 | pub log_time: u64, 186 | pub create_time: u64, 187 | pub name: String, 188 | pub content_type: String, 189 | pub data: Cow<'a, [u8]>, 190 | } 191 | 192 | pub use read::{MessageStream, Summary}; 193 | pub use write::{WriteOptions, Writer}; 194 | -------------------------------------------------------------------------------- /src/records.rs: -------------------------------------------------------------------------------- 1 | //! Raw records parsed from an MCAP file 2 | //! 3 | //! See 4 | //! 5 | //! You probably want to user higher-level interfaces, like 6 | //! [`Message`](crate::Message), [`Channel`](crate::Channel), and [`Schema`](crate::Schema), 7 | //! read from iterators like [`MesssageStream`](crate::MessageStream). 8 | 9 | use binrw::io::{Read, Seek, Write}; 10 | use binrw::*; 11 | 12 | use std::{ 13 | borrow::Cow, 14 | collections::BTreeMap, 15 | time::{Duration, SystemTime, UNIX_EPOCH}, 16 | }; 17 | 18 | /// Opcodes for MCAP file records. 19 | /// 20 | /// "Records are identified by a single-byte opcode. 21 | /// Record opcodes in the range 0x01-0x7F are reserved for future MCAP format usage. 22 | /// 0x80-0xFF are reserved for application extensions and user proposals." 23 | pub mod op { 24 | pub const HEADER: u8 = 0x01; 25 | pub const FOOTER: u8 = 0x02; 26 | pub const SCHEMA: u8 = 0x03; 27 | pub const CHANNEL: u8 = 0x04; 28 | pub const MESSAGE: u8 = 0x05; 29 | pub const CHUNK: u8 = 0x06; 30 | pub const MESSAGE_INDEX: u8 = 0x07; 31 | pub const CHUNK_INDEX: u8 = 0x08; 32 | pub const ATTACHMENT: u8 = 0x09; 33 | pub const ATTACHMENT_INDEX: u8 = 0x0A; 34 | pub const STATISTICS: u8 = 0x0B; 35 | pub const METADATA: u8 = 0x0C; 36 | pub const METADATA_INDEX: u8 = 0x0D; 37 | pub const SUMMARY_OFFSET: u8 = 0x0E; 38 | pub const END_OF_DATA: u8 = 0x0F; 39 | } 40 | 41 | /// A raw record from an MCAP file. 42 | /// 43 | /// For records with large slices of binary data (schemas, messages, chunks...), 44 | /// we use a [`CoW`](std::borrow::Cow) that can either borrow directly from the mapped file, 45 | /// or hold its own buffer if it was decompressed from a chunk. 46 | #[derive(Debug)] 47 | pub enum Record<'a> { 48 | Header(Header), 49 | Footer(Footer), 50 | Schema { 51 | header: SchemaHeader, 52 | data: Cow<'a, [u8]>, 53 | }, 54 | Channel(Channel), 55 | Message { 56 | header: MessageHeader, 57 | data: Cow<'a, [u8]>, 58 | }, 59 | Chunk { 60 | header: ChunkHeader, 61 | data: &'a [u8], 62 | }, 63 | MessageIndex(MessageIndex), 64 | ChunkIndex(ChunkIndex), 65 | Attachment { 66 | header: AttachmentHeader, 67 | data: &'a [u8], 68 | }, 69 | AttachmentIndex(AttachmentIndex), 70 | Statistics(Statistics), 71 | Metadata(Metadata), 72 | MetadataIndex(MetadataIndex), 73 | SummaryOffset(SummaryOffset), 74 | EndOfData(EndOfData), 75 | /// A record of unknown type 76 | Unknown { 77 | opcode: u8, 78 | data: Cow<'a, [u8]>, 79 | }, 80 | } 81 | 82 | impl Record<'_> { 83 | pub fn opcode(&self) -> u8 { 84 | match &self { 85 | Record::Header(_) => op::HEADER, 86 | Record::Footer(_) => op::FOOTER, 87 | Record::Schema { .. } => op::SCHEMA, 88 | Record::Channel(_) => op::CHANNEL, 89 | Record::Message { .. } => op::MESSAGE, 90 | Record::Chunk { .. } => op::CHUNK, 91 | Record::MessageIndex(_) => op::MESSAGE_INDEX, 92 | Record::ChunkIndex(_) => op::CHUNK_INDEX, 93 | Record::Attachment { .. } => op::ATTACHMENT, 94 | Record::AttachmentIndex(_) => op::ATTACHMENT_INDEX, 95 | Record::Statistics(_) => op::STATISTICS, 96 | Record::Metadata(_) => op::METADATA, 97 | Record::MetadataIndex(_) => op::METADATA_INDEX, 98 | Record::SummaryOffset(_) => op::SUMMARY_OFFSET, 99 | Record::EndOfData(_) => op::END_OF_DATA, 100 | Record::Unknown { opcode, .. } => *opcode, 101 | } 102 | } 103 | } 104 | 105 | #[binrw] 106 | #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] 107 | struct McapString { 108 | #[br(temp)] 109 | #[bw(calc = inner.len() as u32)] 110 | pub len: u32, 111 | 112 | #[br(count = len, try_map = String::from_utf8)] 113 | #[bw(map = |s| s.as_bytes())] 114 | pub inner: String, 115 | } 116 | 117 | /// Avoids taking a copy to turn a String to an McapString for serialization 118 | fn write_string( 119 | s: &String, 120 | w: &mut W, 121 | opts: &WriteOptions, 122 | args: (), 123 | ) -> BinResult<()> { 124 | (s.len() as u32).write_options(w, opts, args)?; 125 | (s.as_bytes()).write_options(w, opts, args)?; 126 | Ok(()) 127 | } 128 | 129 | fn parse_vec, R: Read + Seek>( 130 | reader: &mut R, 131 | ro: &ReadOptions, 132 | args: (), 133 | ) -> BinResult> { 134 | let mut parsed = Vec::new(); 135 | 136 | // Length of the map in BYTES, not records. 137 | let byte_len: u32 = BinRead::read_options(reader, ro, args)?; 138 | let pos = reader.stream_position()?; 139 | 140 | while (reader.stream_position()? - pos) < byte_len as u64 { 141 | parsed.push(T::read_options(reader, ro, args)?); 142 | } 143 | 144 | Ok(parsed) 145 | } 146 | 147 | #[allow(clippy::ptr_arg)] // needed to match binrw macros 148 | fn write_vec>( 149 | v: &Vec, 150 | w: &mut W, 151 | opts: &WriteOptions, 152 | args: (), 153 | ) -> BinResult<()> { 154 | use std::io::SeekFrom; 155 | 156 | let start = w.stream_position()?; 157 | (!0u32).write_options(w, opts, args)?; // Revisit... 158 | for e in v.iter() { 159 | e.write_options(w, opts, args)?; 160 | } 161 | let end = w.stream_position()?; 162 | let data_len = end - start - 4; 163 | w.seek(SeekFrom::Start(start))?; 164 | (data_len as u32).write_options(w, opts, args)?; 165 | assert_eq!(w.seek(SeekFrom::End(0))?, end); 166 | Ok(()) 167 | } 168 | 169 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 170 | pub struct Header { 171 | #[br(map = |s: McapString| s.inner )] 172 | #[bw(write_with = write_string)] 173 | pub profile: String, 174 | 175 | #[br(map = |s: McapString| s.inner )] 176 | #[bw(write_with = write_string)] 177 | pub library: String, 178 | } 179 | 180 | #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)] 181 | pub struct Footer { 182 | pub summary_start: u64, 183 | pub summary_offset_start: u64, 184 | pub summary_crc: u32, 185 | } 186 | 187 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 188 | pub struct SchemaHeader { 189 | pub id: u16, 190 | 191 | #[br(map = |s: McapString| s.inner )] 192 | #[bw(write_with = write_string)] 193 | pub name: String, 194 | 195 | #[br(map = |s: McapString| s.inner )] 196 | #[bw(write_with = write_string)] 197 | pub encoding: String, 198 | 199 | pub data_len: u32, 200 | } 201 | 202 | fn parse_string_map( 203 | reader: &mut R, 204 | ro: &ReadOptions, 205 | args: (), 206 | ) -> BinResult> { 207 | let mut parsed = BTreeMap::new(); 208 | 209 | // Length of the map in BYTES, not records. 210 | let byte_len: u32 = BinRead::read_options(reader, ro, args)?; 211 | let pos = reader.stream_position()?; 212 | 213 | while (reader.stream_position()? - pos) < byte_len as u64 { 214 | let k = McapString::read_options(reader, ro, args)?; 215 | let v = McapString::read_options(reader, ro, args)?; 216 | if let Some(_prev) = parsed.insert(k.inner, v.inner) { 217 | return Err(binrw::Error::Custom { 218 | pos, 219 | err: Box::new("Duplicate keys in map"), 220 | }); 221 | } 222 | } 223 | 224 | Ok(parsed) 225 | } 226 | 227 | fn write_string_map( 228 | s: &BTreeMap, 229 | w: &mut W, 230 | opts: &WriteOptions, 231 | args: (), 232 | ) -> BinResult<()> { 233 | // Ugh: figure out total number of bytes to write: 234 | let mut byte_len = 0; 235 | for (k, v) in s { 236 | byte_len += 8; // Four bytes each for lengths of key and value 237 | byte_len += k.len(); 238 | byte_len += v.len(); 239 | } 240 | 241 | (byte_len as u32).write_options(w, opts, args)?; 242 | let pos = w.stream_position()?; 243 | 244 | for (k, v) in s { 245 | write_string(k, w, opts, args)?; 246 | write_string(v, w, opts, args)?; 247 | } 248 | assert_eq!(w.stream_position()?, pos + byte_len as u64); 249 | Ok(()) 250 | } 251 | 252 | fn write_int_map, V: BinWrite, W: Write + Seek>( 253 | s: &BTreeMap, 254 | w: &mut W, 255 | opts: &WriteOptions, 256 | args: (), 257 | ) -> BinResult<()> { 258 | // Ugh: figure out total number of bytes to write: 259 | let mut byte_len = 0; 260 | for _ in s.values() { 261 | // Hack: We're assuming serialized size of the value is its in-memory size. 262 | // For ints of all flavors, this should be true. 263 | byte_len += core::mem::size_of::(); 264 | byte_len += core::mem::size_of::(); 265 | } 266 | 267 | (byte_len as u32).write_options(w, opts, args)?; 268 | let pos = w.stream_position()?; 269 | 270 | for (k, v) in s { 271 | k.write_options(w, opts, args)?; 272 | v.write_options(w, opts, args)?; 273 | } 274 | assert_eq!(w.stream_position()?, pos + byte_len as u64); 275 | Ok(()) 276 | } 277 | 278 | fn parse_int_map(reader: &mut R, ro: &ReadOptions, args: ()) -> BinResult> 279 | where 280 | K: BinRead + std::cmp::Ord, 281 | V: BinRead, 282 | R: Read + Seek, 283 | { 284 | let mut parsed = BTreeMap::new(); 285 | 286 | // Length of the map in BYTES, not records. 287 | let byte_len: u32 = BinRead::read_options(reader, ro, args)?; 288 | let pos = reader.stream_position()?; 289 | 290 | while (reader.stream_position()? - pos) < byte_len as u64 { 291 | let k = K::read_options(reader, ro, args)?; 292 | let v = V::read_options(reader, ro, args)?; 293 | if let Some(_prev) = parsed.insert(k, v) { 294 | return Err(binrw::Error::Custom { 295 | pos, 296 | err: Box::new("Duplicate keys in map"), 297 | }); 298 | } 299 | } 300 | 301 | Ok(parsed) 302 | } 303 | 304 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 305 | pub struct Channel { 306 | pub id: u16, 307 | pub schema_id: u16, 308 | 309 | #[br(map = |s: McapString| s.inner )] 310 | #[bw(write_with = write_string)] 311 | pub topic: String, 312 | 313 | #[br(map = |s: McapString| s.inner )] 314 | #[bw(write_with = write_string)] 315 | pub message_encoding: String, 316 | 317 | #[br(parse_with = parse_string_map)] 318 | #[bw(write_with = write_string_map)] 319 | pub metadata: BTreeMap, 320 | } 321 | 322 | pub fn system_time_to_nanos(d: &SystemTime) -> u64 { 323 | let ns = d.duration_since(UNIX_EPOCH).unwrap().as_nanos(); 324 | assert!(ns <= u64::MAX as u128); 325 | ns as u64 326 | } 327 | 328 | pub fn nanos_to_system_time(n: u64) -> SystemTime { 329 | UNIX_EPOCH + Duration::from_nanos(n) 330 | } 331 | 332 | #[derive(Debug, Copy, Clone, Eq, PartialEq, BinRead, BinWrite)] 333 | pub struct MessageHeader { 334 | pub channel_id: u16, 335 | pub sequence: u32, 336 | 337 | pub log_time: u64, 338 | 339 | pub publish_time: u64, 340 | } 341 | 342 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 343 | pub struct ChunkHeader { 344 | pub message_start_time: u64, 345 | 346 | pub message_end_time: u64, 347 | 348 | pub uncompressed_size: u64, 349 | 350 | pub uncompressed_crc: u32, 351 | 352 | #[br(map = |s: McapString| s.inner )] 353 | #[bw(write_with = write_string)] 354 | pub compression: String, 355 | 356 | pub compressed_size: u64, 357 | } 358 | 359 | #[derive(Debug, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)] 360 | pub struct MessageIndexEntry { 361 | pub log_time: u64, 362 | 363 | pub offset: u64, 364 | } 365 | 366 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 367 | pub struct MessageIndex { 368 | pub channel_id: u16, 369 | 370 | #[br(parse_with = parse_vec)] 371 | #[bw(write_with = write_vec)] 372 | pub records: Vec, 373 | } 374 | 375 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 376 | pub struct ChunkIndex { 377 | pub message_start_time: u64, 378 | 379 | pub message_end_time: u64, 380 | 381 | pub chunk_start_offset: u64, 382 | 383 | pub chunk_length: u64, 384 | 385 | #[br(parse_with = parse_int_map)] 386 | #[bw(write_with = write_int_map)] 387 | pub message_index_offsets: BTreeMap, 388 | 389 | pub message_index_length: u64, 390 | 391 | #[br(map = |s: McapString| s.inner )] 392 | #[bw(write_with = write_string)] 393 | pub compression: String, 394 | 395 | pub compressed_size: u64, 396 | 397 | pub uncompressed_size: u64, 398 | } 399 | 400 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 401 | pub struct AttachmentHeader { 402 | pub log_time: u64, 403 | 404 | pub create_time: u64, 405 | 406 | #[br(map = |s: McapString| s.inner )] 407 | #[bw(write_with = write_string)] 408 | pub name: String, 409 | 410 | #[br(map = |s: McapString| s.inner )] 411 | #[bw(write_with = write_string)] 412 | pub content_type: String, 413 | 414 | pub data_len: u64, 415 | } 416 | 417 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 418 | pub struct AttachmentIndex { 419 | pub offset: u64, 420 | 421 | pub length: u64, 422 | 423 | pub log_time: u64, 424 | 425 | pub create_time: u64, 426 | 427 | pub data_size: u64, 428 | 429 | #[br(map = |s: McapString| s.inner )] 430 | #[bw(write_with = write_string)] 431 | pub name: String, 432 | 433 | #[br(map = |s: McapString| s.inner )] 434 | #[bw(write_with = write_string)] 435 | pub content_type: String, 436 | } 437 | 438 | #[derive(Debug, Default, Clone, Eq, PartialEq, BinRead, BinWrite)] 439 | pub struct Statistics { 440 | pub message_count: u64, 441 | pub schema_count: u16, 442 | pub channel_count: u32, 443 | pub attachment_count: u32, 444 | pub metadata_count: u32, 445 | pub chunk_count: u32, 446 | 447 | pub message_start_time: u64, 448 | 449 | pub message_end_time: u64, 450 | 451 | #[br(parse_with = parse_int_map)] 452 | #[bw(write_with = write_int_map)] 453 | pub channel_message_counts: BTreeMap, 454 | } 455 | 456 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 457 | pub struct Metadata { 458 | #[br(map = |s: McapString| s.inner )] 459 | #[bw(write_with = write_string)] 460 | pub name: String, 461 | 462 | #[br(parse_with = parse_string_map)] 463 | #[bw(write_with = write_string_map)] 464 | pub metadata: BTreeMap, 465 | } 466 | 467 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)] 468 | pub struct MetadataIndex { 469 | pub offset: u64, 470 | 471 | pub length: u64, 472 | 473 | #[br(map = |s: McapString| s.inner )] 474 | #[bw(write_with = write_string)] 475 | pub name: String, 476 | } 477 | 478 | #[derive(Debug, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)] 479 | pub struct SummaryOffset { 480 | pub group_opcode: u8, 481 | pub group_start: u64, 482 | pub group_length: u64, 483 | } 484 | 485 | #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)] 486 | pub struct EndOfData { 487 | pub data_section_crc: u32, 488 | } 489 | 490 | #[cfg(test)] 491 | mod tests { 492 | use super::*; 493 | use std::io::Cursor; 494 | 495 | #[test] 496 | fn string_parse() { 497 | let ms: McapString = Cursor::new(b"\x04\0\0\0abcd").read_le().unwrap(); 498 | assert_eq!( 499 | ms, 500 | McapString { 501 | inner: String::from("abcd") 502 | } 503 | ); 504 | 505 | assert!(Cursor::new(b"\x05\0\0\0abcd") 506 | .read_le::() 507 | .is_err()); 508 | 509 | let mut written = Vec::new(); 510 | Cursor::new(&mut written) 511 | .write_le(&McapString { 512 | inner: String::from("hullo"), 513 | }) 514 | .unwrap(); 515 | assert_eq!(&written, b"\x05\0\0\0hullo"); 516 | } 517 | 518 | #[test] 519 | fn header_parse() { 520 | let expected = b"\x04\0\0\0abcd\x03\0\0\x00123"; 521 | 522 | let h: Header = Cursor::new(expected).read_le().unwrap(); 523 | assert_eq!(h.profile, "abcd"); 524 | assert_eq!(h.library, "123"); 525 | 526 | let mut written = Vec::new(); 527 | Cursor::new(&mut written).write_le(&h).unwrap(); 528 | assert_eq!(written, expected); 529 | } 530 | } 531 | -------------------------------------------------------------------------------- /src/write.rs: -------------------------------------------------------------------------------- 1 | //! Write MCAP files 2 | 3 | use std::{ 4 | borrow::Cow, 5 | collections::{BTreeMap, HashMap}, 6 | io::{self, prelude::*, Cursor, SeekFrom}, 7 | }; 8 | 9 | use binrw::prelude::*; 10 | use byteorder::{WriteBytesExt, LE}; 11 | 12 | use crate::{ 13 | io_utils::CountingCrcWriter, 14 | records::{self, op, MessageHeader, Record}, 15 | Attachment, Channel, Compression, McapError, McapResult, Message, Schema, MAGIC, 16 | }; 17 | 18 | pub use records::Metadata; 19 | 20 | enum WriteMode { 21 | Raw(W), 22 | Chunk(ChunkWriter), 23 | } 24 | 25 | fn op_and_len(w: &mut W, op: u8, len: usize) -> io::Result<()> { 26 | w.write_u8(op)?; 27 | w.write_u64::(len as u64)?; 28 | Ok(()) 29 | } 30 | 31 | fn write_record(w: &mut W, r: &Record) -> io::Result<()> { 32 | // Annoying: our stream isn't Seek if we're writing to a compressed chunk stream, 33 | // so we need an intermediate buffer. 34 | macro_rules! record { 35 | ($op:expr, $b:ident) => {{ 36 | let mut rec_buf = Vec::new(); 37 | Cursor::new(&mut rec_buf).write_le($b).unwrap(); 38 | 39 | op_and_len(w, $op, rec_buf.len())?; 40 | w.write_all(&rec_buf)?; 41 | }}; 42 | } 43 | 44 | macro_rules! header_and_data { 45 | ($op:expr, $header:ident, $data:ident) => {{ 46 | let mut header_buf = Vec::new(); 47 | Cursor::new(&mut header_buf).write_le($header).unwrap(); 48 | 49 | op_and_len(w, $op, header_buf.len() + $data.len())?; 50 | w.write_all(&header_buf)?; 51 | w.write_all($data)?; 52 | }}; 53 | } 54 | 55 | match r { 56 | Record::Header(h) => record!(op::HEADER, h), 57 | Record::Footer(_) => { 58 | unreachable!("Footer handles its own serialization because its CRC is self-referencing") 59 | } 60 | Record::Schema { header, data } => header_and_data!(op::SCHEMA, header, data), 61 | Record::Channel(c) => record!(op::CHANNEL, c), 62 | Record::Message { header, data } => header_and_data!(op::MESSAGE, header, data), 63 | Record::Chunk { .. } => { 64 | unreachable!("Chunks handle their own serialization due to seeking shenanigans") 65 | } 66 | Record::MessageIndex(_) => { 67 | unreachable!("MessageIndexes handle their own serialization to recycle the buffer between indexes") 68 | } 69 | Record::ChunkIndex(c) => record!(op::CHUNK_INDEX, c), 70 | Record::Attachment { header, data } => { 71 | assert_eq!(header.data_len, data.len() as u64); 72 | 73 | // Can't use header_and_data since we need to checksum those, 74 | // but not the op and len 75 | let mut header_buf = Vec::new(); 76 | Cursor::new(&mut header_buf).write_le(header).unwrap(); 77 | op_and_len(w, op::ATTACHMENT, header_buf.len() + data.len() + 4)?; // 4 for crc 78 | 79 | let mut checksummer = CountingCrcWriter::new(w); 80 | checksummer.write_all(&header_buf)?; 81 | checksummer.write_all(data)?; 82 | let (w, crc) = checksummer.finalize(); 83 | w.write_u32::(crc)?; 84 | } 85 | Record::AttachmentIndex(ai) => record!(op::ATTACHMENT_INDEX, ai), 86 | Record::Statistics(s) => record!(op::STATISTICS, s), 87 | Record::Metadata(m) => record!(op::METADATA, m), 88 | Record::MetadataIndex(mi) => record!(op::METADATA_INDEX, mi), 89 | Record::SummaryOffset(so) => record!(op::SUMMARY_OFFSET, so), 90 | Record::EndOfData(eod) => record!(op::END_OF_DATA, eod), 91 | _ => todo!(), 92 | }; 93 | Ok(()) 94 | } 95 | 96 | #[derive(Debug, Clone)] 97 | pub struct WriteOptions { 98 | compression: Option, 99 | profile: String, 100 | } 101 | 102 | impl Default for WriteOptions { 103 | fn default() -> Self { 104 | Self { 105 | compression: Some(Compression::Zstd), 106 | profile: String::new(), 107 | } 108 | } 109 | } 110 | 111 | impl WriteOptions { 112 | pub fn new() -> Self { 113 | Self::default() 114 | } 115 | 116 | pub fn compression(self, compression: Option) -> Self { 117 | Self { 118 | compression, 119 | ..self 120 | } 121 | } 122 | 123 | pub fn profile>(self, profile: S) -> Self { 124 | Self { 125 | profile: profile.into(), 126 | ..self 127 | } 128 | } 129 | 130 | /// Creates a [`Writer`] whch writes to `w` using the given options 131 | pub fn create<'a, W: Write + Seek>(self, w: W) -> McapResult> { 132 | Writer::with_options(w, self) 133 | } 134 | } 135 | 136 | /// Writes an MCAP file to the given [writer](Write). 137 | /// 138 | /// Users should call [`finish()`](Self::finish) to flush the stream 139 | /// and check for errors when done; otherwise the result will be unwrapped on drop. 140 | pub struct Writer<'a, W: Write + Seek> { 141 | writer: Option>, 142 | compression: Option, 143 | schemas: HashMap, u16>, 144 | channels: HashMap, u16>, 145 | stats: records::Statistics, 146 | chunk_indexes: Vec, 147 | attachment_indexes: Vec, 148 | metadata_indexes: Vec, 149 | } 150 | 151 | impl<'a, W: Write + Seek> Writer<'a, W> { 152 | pub fn new(writer: W) -> McapResult { 153 | Self::with_options(writer, WriteOptions::default()) 154 | } 155 | 156 | fn with_options(mut writer: W, opts: WriteOptions) -> McapResult { 157 | writer.write_all(MAGIC)?; 158 | 159 | write_record( 160 | &mut writer, 161 | &Record::Header(records::Header { 162 | profile: opts.profile, 163 | library: String::from("mcap-rs-") + env!("CARGO_PKG_VERSION"), 164 | }), 165 | )?; 166 | 167 | Ok(Self { 168 | writer: Some(WriteMode::Raw(writer)), 169 | compression: opts.compression, 170 | schemas: HashMap::new(), 171 | channels: HashMap::new(), 172 | stats: records::Statistics::default(), 173 | chunk_indexes: Vec::new(), 174 | attachment_indexes: Vec::new(), 175 | metadata_indexes: Vec::new(), 176 | }) 177 | } 178 | 179 | /// Adds a channel (and its provided schema, if any), returning its ID. 180 | /// 181 | /// Useful with subequent calls to [`write_to_known_channel()`](Self::write_to_known_channel) 182 | pub fn add_channel(&mut self, chan: &Channel<'a>) -> McapResult { 183 | let schema_id = match &chan.schema { 184 | Some(s) => self.add_schema(s)?, 185 | None => 0, 186 | }; 187 | 188 | if let Some(id) = self.channels.get(chan) { 189 | return Ok(*id); 190 | } 191 | 192 | self.stats.channel_count += 1; 193 | 194 | let next_channel_id = self.channels.len() as u16; 195 | assert!(self 196 | .channels 197 | .insert(chan.clone(), next_channel_id) 198 | .is_none()); 199 | self.chunkin_time()? 200 | .write_channel(next_channel_id, schema_id, chan)?; 201 | Ok(next_channel_id) 202 | } 203 | 204 | fn add_schema(&mut self, schema: &Schema<'a>) -> McapResult { 205 | if let Some(id) = self.schemas.get(schema) { 206 | return Ok(*id); 207 | } 208 | 209 | self.stats.schema_count += 1; 210 | 211 | // Schema IDs cannot be zero, that's the sentinel value in a channel 212 | // for "no schema" 213 | let next_schema_id = self.schemas.len() as u16 + 1; 214 | assert!(self 215 | .schemas 216 | .insert(schema.clone(), next_schema_id) 217 | .is_none()); 218 | self.chunkin_time()?.write_schema(next_schema_id, schema)?; 219 | Ok(next_schema_id) 220 | } 221 | 222 | /// Write the given message (and its provided channel, if needed). 223 | pub fn write(&mut self, message: &Message<'a>) -> McapResult<()> { 224 | let channel_id = self.add_channel(&message.channel)?; 225 | let header = MessageHeader { 226 | channel_id, 227 | sequence: message.sequence, 228 | log_time: message.log_time, 229 | publish_time: message.publish_time, 230 | }; 231 | let data: &[u8] = &message.data; 232 | self.write_to_known_channel(&header, data) 233 | } 234 | 235 | /// Write a message to an added channel, given its ID. 236 | /// 237 | /// This skips hash lookups of the channel and schema if you already added them. 238 | pub fn write_to_known_channel( 239 | &mut self, 240 | header: &MessageHeader, 241 | data: &[u8], 242 | ) -> McapResult<()> { 243 | // The number of channels should be relatively small, 244 | // do a quick linear search to make sure we're not being given a bogus ID 245 | if !self.channels.values().any(|id| *id == header.channel_id) { 246 | return Err(McapError::UnknownChannel( 247 | header.sequence, 248 | header.channel_id, 249 | )); 250 | } 251 | 252 | self.stats.message_count += 1; 253 | self.stats.message_start_time = match self.stats.message_start_time { 254 | 0 => header.log_time, 255 | nz => nz.min(header.log_time), 256 | }; 257 | self.stats.message_end_time = match self.stats.message_end_time { 258 | 0 => header.log_time, 259 | nz => nz.max(header.log_time), 260 | }; 261 | *self 262 | .stats 263 | .channel_message_counts 264 | .entry(header.channel_id) 265 | .or_insert(0) += 1; 266 | 267 | self.chunkin_time()?.write_message(header, data)?; 268 | Ok(()) 269 | } 270 | 271 | pub fn attach(&mut self, attachment: &Attachment) -> McapResult<()> { 272 | self.stats.attachment_count += 1; 273 | 274 | let header = records::AttachmentHeader { 275 | log_time: attachment.log_time, 276 | create_time: attachment.create_time, 277 | name: attachment.name.clone(), 278 | content_type: attachment.content_type.clone(), 279 | data_len: attachment.data.len() as u64, 280 | }; 281 | 282 | // Attachments don't live in chunks. 283 | let w = self.finish_chunk()?; 284 | 285 | let offset = w.stream_position()?; 286 | 287 | write_record( 288 | w, 289 | &Record::Attachment { 290 | header, 291 | data: &attachment.data, 292 | }, 293 | )?; 294 | 295 | let length = w.stream_position()? - offset; 296 | self.attachment_indexes.push(records::AttachmentIndex { 297 | offset, 298 | length, 299 | log_time: attachment.log_time, 300 | create_time: attachment.create_time, 301 | data_size: attachment.data.len() as u64, 302 | name: attachment.name.clone(), 303 | content_type: attachment.content_type.clone(), 304 | }); 305 | 306 | Ok(()) 307 | } 308 | 309 | pub fn write_metadata(&mut self, metadata: &Metadata) -> McapResult<()> { 310 | self.stats.metadata_count += 1; 311 | 312 | let w = self.finish_chunk()?; 313 | let offset = w.stream_position()?; 314 | 315 | // Should we specialize this to avoid taking a clone of the map? 316 | write_record(w, &Record::Metadata(metadata.clone()))?; 317 | 318 | let length = w.stream_position()? - offset; 319 | 320 | self.metadata_indexes.push(records::MetadataIndex { 321 | offset, 322 | length, 323 | name: metadata.name.clone(), 324 | }); 325 | 326 | Ok(()) 327 | } 328 | 329 | /// Finishes the current chunk, if we have one, and flushes the underlying 330 | /// [writer](Write). 331 | /// 332 | /// We finish the chunk to guarantee that the file can be streamed by future 333 | /// readers at least up to this point. 334 | /// (The alternative is to just flush the writer mid-chunk. 335 | /// But if we did that, and then writing was suddenly interrupted afterwards, 336 | /// readers would have to try to recover a half-written chunk, 337 | /// probably with an unfinished compresion stream.) 338 | /// 339 | /// Note that lossless compression schemes like LZ4 and Zstd improve 340 | /// as they go, so larger chunks will tend to have better compression. 341 | /// (Of course, this depends heavily on the entropy of what's being compressed! 342 | /// A stream of zeroes will compress great at any chunk size, and a stream 343 | /// of random data will compress terribly at any chunk size.) 344 | pub fn flush(&mut self) -> McapResult<()> { 345 | self.finish_chunk()?.flush()?; 346 | Ok(()) 347 | } 348 | 349 | /// `.expect()` message when we go to write and self.writer is `None`, 350 | /// which should only happen when [`Writer::finish()`] was called. 351 | const WHERE_WRITER: &'static str = "Trying to write a record on a finished MCAP"; 352 | 353 | /// Starts a new chunk if we haven't done so already. 354 | fn chunkin_time(&mut self) -> McapResult<&mut ChunkWriter> { 355 | // Some Rust tricky: we can't move the writer out of self.writer, 356 | // leave that empty for a bit, and then replace it with a ChunkWriter. 357 | // (That would leave it in an unspecified state if we bailed here!) 358 | // Instead briefly swap it out for a null writer while we set up the chunker 359 | // The writer will only be None if finish() was called. 360 | let prev_writer = self.writer.take().expect(Self::WHERE_WRITER); 361 | 362 | self.writer = Some(match prev_writer { 363 | WriteMode::Raw(w) => { 364 | // It's chunkin time. 365 | self.stats.chunk_count += 1; 366 | WriteMode::Chunk(ChunkWriter::new(w, self.compression)?) 367 | } 368 | chunk => chunk, 369 | }); 370 | 371 | match &mut self.writer { 372 | Some(WriteMode::Chunk(c)) => Ok(c), 373 | _ => unreachable!(), 374 | } 375 | } 376 | 377 | /// Finish the current chunk, if we have one. 378 | fn finish_chunk(&mut self) -> McapResult<&mut W> { 379 | // See above 380 | let prev_writer = self.writer.take().expect(Self::WHERE_WRITER); 381 | 382 | self.writer = Some(match prev_writer { 383 | WriteMode::Chunk(c) => { 384 | let (w, index) = c.finish()?; 385 | self.chunk_indexes.push(index); 386 | WriteMode::Raw(w) 387 | } 388 | raw => raw, 389 | }); 390 | 391 | match &mut self.writer { 392 | Some(WriteMode::Raw(w)) => Ok(w), 393 | _ => unreachable!(), 394 | } 395 | } 396 | 397 | /// Finishes any current chunk and writes out the rest of the file. 398 | /// 399 | /// Subsequent calls to other methods will panic. 400 | pub fn finish(&mut self) -> McapResult<()> { 401 | if self.writer.is_none() { 402 | // We already called finish(). 403 | // Maybe we're dropping after the user called it? 404 | return Ok(()); 405 | } 406 | 407 | // Finish any chunk we were working on and update stats, indexes, etc. 408 | self.finish_chunk()?; 409 | 410 | // Grab the writer - self.writer becoming None makes subsequent writes fail. 411 | let mut writer = match self.writer.take() { 412 | // We called finish_chunk() above, so we're back to raw writes for 413 | // the summary section. 414 | Some(WriteMode::Raw(w)) => w, 415 | _ => unreachable!(), 416 | }; 417 | let writer = &mut writer; 418 | 419 | // We're done with the data secton! 420 | write_record(writer, &Record::EndOfData(records::EndOfData::default()))?; 421 | 422 | // Take all the data we need, swapping in empty containers. 423 | // Without this, we get yelled at for moving things out of a mutable ref 424 | // (&mut self). 425 | // (We could get around all this noise by having finish() take self, 426 | // but then it wouldn't be droppable _and_ finish...able.) 427 | let mut stats = records::Statistics::default(); 428 | std::mem::swap(&mut stats, &mut self.stats); 429 | 430 | let mut chunk_indexes = Vec::new(); 431 | std::mem::swap(&mut chunk_indexes, &mut self.chunk_indexes); 432 | 433 | let mut attachment_indexes = Vec::new(); 434 | std::mem::swap(&mut attachment_indexes, &mut self.attachment_indexes); 435 | 436 | let mut metadata_indexes = Vec::new(); 437 | std::mem::swap(&mut metadata_indexes, &mut self.metadata_indexes); 438 | 439 | // Make some Schema and Channel lists for the summary section. 440 | // Be sure to grab schema IDs for the channels from the schema hash map before we drain it! 441 | struct ChannelSummary<'a> { 442 | channel: Channel<'a>, 443 | channel_id: u16, 444 | schema_id: u16, 445 | } 446 | 447 | let mut all_channels: Vec> = self 448 | .channels 449 | .drain() 450 | .map(|(channel, channel_id)| { 451 | let schema_id = match &channel.schema { 452 | Some(s) => *self.schemas.get(s).unwrap(), 453 | None => 0, 454 | }; 455 | 456 | ChannelSummary { 457 | channel, 458 | channel_id, 459 | schema_id, 460 | } 461 | }) 462 | .collect(); 463 | all_channels.sort_unstable_by_key(|cs| cs.channel_id); 464 | 465 | let mut all_schemas: Vec<(Schema<'_>, u16)> = self.schemas.drain().collect(); 466 | all_schemas.sort_unstable_by_key(|(_, v)| *v); 467 | 468 | let mut offsets = Vec::new(); 469 | 470 | let summary_start = writer.stream_position()?; 471 | 472 | // Let's get a CRC of the summary section. 473 | let mut ccw = CountingCrcWriter::new(writer); 474 | 475 | fn posit(ccw: &mut CountingCrcWriter) -> io::Result { 476 | ccw.get_mut().stream_position() 477 | } 478 | 479 | // Write all schemas. 480 | let schemas_start = summary_start; 481 | for (schema, id) in all_schemas { 482 | let header = records::SchemaHeader { 483 | id, 484 | name: schema.name, 485 | encoding: schema.encoding, 486 | data_len: schema.data.len() as u32, 487 | }; 488 | let data = schema.data; 489 | 490 | write_record(&mut ccw, &Record::Schema { header, data })?; 491 | } 492 | let schemas_end = posit(&mut ccw)?; 493 | if schemas_end - schemas_start > 0 { 494 | offsets.push(records::SummaryOffset { 495 | group_opcode: op::SCHEMA, 496 | group_start: schemas_start, 497 | group_length: schemas_end - schemas_start, 498 | }); 499 | } 500 | 501 | // Write all channels. 502 | let channels_start = schemas_end; 503 | for cs in all_channels { 504 | let rec = records::Channel { 505 | id: cs.channel_id, 506 | schema_id: cs.schema_id, 507 | topic: cs.channel.topic, 508 | message_encoding: cs.channel.message_encoding, 509 | metadata: cs.channel.metadata, 510 | }; 511 | write_record(&mut ccw, &Record::Channel(rec))?; 512 | } 513 | let channels_end = posit(&mut ccw)?; 514 | if channels_end - channels_start > 0 { 515 | offsets.push(records::SummaryOffset { 516 | group_opcode: op::CHANNEL, 517 | group_start: channels_start, 518 | group_length: channels_end - channels_start, 519 | }); 520 | } 521 | 522 | // Write all chunk indexes. 523 | let chunk_indexes_start = channels_end; 524 | for index in chunk_indexes { 525 | write_record(&mut ccw, &Record::ChunkIndex(index))?; 526 | } 527 | let chunk_indexes_end = posit(&mut ccw)?; 528 | if chunk_indexes_end - chunk_indexes_start > 0 { 529 | offsets.push(records::SummaryOffset { 530 | group_opcode: op::CHUNK_INDEX, 531 | group_start: chunk_indexes_start, 532 | group_length: chunk_indexes_end - chunk_indexes_start, 533 | }); 534 | } 535 | 536 | // ...and attachment indexes 537 | let attachment_indexes_start = chunk_indexes_end; 538 | for index in attachment_indexes { 539 | write_record(&mut ccw, &Record::AttachmentIndex(index))?; 540 | } 541 | let attachment_indexes_end = posit(&mut ccw)?; 542 | if attachment_indexes_end - attachment_indexes_start > 0 { 543 | offsets.push(records::SummaryOffset { 544 | group_opcode: op::ATTACHMENT_INDEX, 545 | group_start: attachment_indexes_start, 546 | group_length: attachment_indexes_end - attachment_indexes_start, 547 | }); 548 | } 549 | 550 | // ...and metadata indexes 551 | let metadata_indexes_start = attachment_indexes_end; 552 | for index in metadata_indexes { 553 | write_record(&mut ccw, &Record::MetadataIndex(index))?; 554 | } 555 | let metadata_indexes_end = posit(&mut ccw)?; 556 | if metadata_indexes_end - metadata_indexes_start > 0 { 557 | offsets.push(records::SummaryOffset { 558 | group_opcode: op::METADATA_INDEX, 559 | group_start: metadata_indexes_start, 560 | group_length: metadata_indexes_end - metadata_indexes_start, 561 | }); 562 | } 563 | 564 | let stats_start = metadata_indexes_end; 565 | write_record(&mut ccw, &Record::Statistics(stats))?; 566 | let stats_end = posit(&mut ccw)?; 567 | assert!(stats_end > stats_start); 568 | offsets.push(records::SummaryOffset { 569 | group_opcode: op::STATISTICS, 570 | group_start: stats_start, 571 | group_length: stats_end - stats_start, 572 | }); 573 | 574 | // Write the summary offsets we've been accumulating 575 | let summary_offset_start = stats_end; 576 | for offset in offsets { 577 | write_record(&mut ccw, &Record::SummaryOffset(offset))?; 578 | } 579 | 580 | // Wat: the CRC in the footer _includes_ part of the footer. 581 | op_and_len(&mut ccw, op::FOOTER, 20)?; 582 | ccw.write_u64::(summary_start)?; 583 | ccw.write_u64::(summary_offset_start)?; 584 | 585 | let (writer, summary_crc) = ccw.finalize(); 586 | 587 | writer.write_u32::(summary_crc)?; 588 | 589 | writer.write_all(MAGIC)?; 590 | writer.flush()?; 591 | Ok(()) 592 | } 593 | } 594 | 595 | impl<'a, W: Write + Seek> Drop for Writer<'a, W> { 596 | fn drop(&mut self) { 597 | self.finish().unwrap() 598 | } 599 | } 600 | 601 | enum Compressor { 602 | Null(W), 603 | Zstd(zstd::Encoder<'static, W>), 604 | Lz4(lz4::Encoder), 605 | } 606 | 607 | impl Compressor { 608 | fn finish(self) -> io::Result { 609 | Ok(match self { 610 | Compressor::Null(w) => w, 611 | Compressor::Zstd(w) => w.finish()?, 612 | Compressor::Lz4(w) => { 613 | let (w, err) = w.finish(); 614 | err?; 615 | w 616 | } 617 | }) 618 | } 619 | } 620 | 621 | impl Write for Compressor { 622 | fn write(&mut self, buf: &[u8]) -> io::Result { 623 | match self { 624 | Compressor::Null(w) => w.write(buf), 625 | Compressor::Zstd(w) => w.write(buf), 626 | Compressor::Lz4(w) => w.write(buf), 627 | } 628 | } 629 | 630 | fn flush(&mut self) -> io::Result<()> { 631 | match self { 632 | Compressor::Null(w) => w.flush(), 633 | Compressor::Zstd(w) => w.flush(), 634 | Compressor::Lz4(w) => w.flush(), 635 | } 636 | } 637 | } 638 | 639 | struct ChunkWriter { 640 | header_start: u64, 641 | stream_start: u64, 642 | header: records::ChunkHeader, 643 | compressor: CountingCrcWriter>, 644 | indexes: BTreeMap>, 645 | } 646 | 647 | impl ChunkWriter { 648 | fn new(mut writer: W, compression: Option) -> McapResult { 649 | let header_start = writer.stream_position()?; 650 | 651 | op_and_len(&mut writer, op::CHUNK, !0)?; 652 | 653 | let compression_name = match compression { 654 | Some(Compression::Zstd) => "zstd", 655 | Some(Compression::Lz4) => "lz4", 656 | None => "", 657 | }; 658 | 659 | let header = records::ChunkHeader { 660 | message_start_time: 0, 661 | message_end_time: 0, 662 | uncompressed_size: !0, 663 | uncompressed_crc: !0, 664 | compression: String::from(compression_name), 665 | compressed_size: !0, 666 | }; 667 | 668 | writer.write_le(&header)?; 669 | let stream_start = writer.stream_position()?; 670 | 671 | let compressor = match compression { 672 | Some(Compression::Zstd) => { 673 | let mut enc = zstd::Encoder::new(writer, 0)?; 674 | enc.multithread(num_cpus::get_physical() as u32)?; 675 | Compressor::Zstd(enc) 676 | } 677 | Some(Compression::Lz4) => { 678 | let b = lz4::EncoderBuilder::new(); 679 | Compressor::Lz4(b.build(writer)?) 680 | } 681 | None => Compressor::Null(writer), 682 | }; 683 | let compressor = CountingCrcWriter::new(compressor); 684 | Ok(Self { 685 | compressor, 686 | header_start, 687 | stream_start, 688 | header, 689 | indexes: BTreeMap::new(), 690 | }) 691 | } 692 | 693 | fn write_schema(&mut self, id: u16, schema: &Schema) -> McapResult<()> { 694 | let header = records::SchemaHeader { 695 | id, 696 | name: schema.name.clone(), 697 | encoding: schema.encoding.clone(), 698 | data_len: schema.data.len() as u32, 699 | }; 700 | write_record( 701 | &mut self.compressor, 702 | &Record::Schema { 703 | header, 704 | data: Cow::Borrowed(&schema.data), 705 | }, 706 | )?; 707 | Ok(()) 708 | } 709 | 710 | fn write_channel(&mut self, id: u16, schema_id: u16, chan: &Channel) -> McapResult<()> { 711 | assert_eq!(schema_id == 0, chan.schema.is_none()); 712 | 713 | let rec = records::Channel { 714 | id, 715 | schema_id, 716 | topic: chan.topic.clone(), 717 | message_encoding: chan.message_encoding.clone(), 718 | metadata: chan.metadata.clone(), 719 | }; 720 | 721 | write_record(&mut self.compressor, &Record::Channel(rec))?; 722 | Ok(()) 723 | } 724 | 725 | fn write_message(&mut self, header: &MessageHeader, data: &[u8]) -> McapResult<()> { 726 | // Update min/max time 727 | self.header.message_start_time = match self.header.message_start_time { 728 | 0 => header.log_time, 729 | nz => nz.min(header.log_time), 730 | }; 731 | self.header.message_end_time = match self.header.message_end_time { 732 | 0 => header.log_time, 733 | nz => nz.max(header.log_time), 734 | }; 735 | 736 | // Add an index for this message 737 | self.indexes 738 | .entry(header.channel_id) 739 | .or_default() 740 | .push(records::MessageIndexEntry { 741 | log_time: header.log_time, 742 | offset: self.compressor.position(), 743 | }); 744 | 745 | write_record( 746 | &mut self.compressor, 747 | &Record::Message { 748 | header: *header, 749 | data: Cow::Borrowed(data), 750 | }, 751 | )?; 752 | Ok(()) 753 | } 754 | 755 | fn finish(mut self) -> McapResult<(W, records::ChunkIndex)> { 756 | // Get the number of uncompressed bytes written and the CRC. 757 | self.header.uncompressed_size = self.compressor.position(); 758 | let (stream, crc) = self.compressor.finalize(); 759 | self.header.uncompressed_crc = crc; 760 | 761 | // Finalize the compression stream - it maintains an internal buffer. 762 | let mut writer = stream.finish()?; 763 | let end_of_stream = writer.stream_position()?; 764 | self.header.compressed_size = end_of_stream - self.stream_start; 765 | let record_size = (end_of_stream - self.header_start) as usize - 9; // 1 byte op, 8 byte len 766 | 767 | // Back up, write our finished header, then continue at the end of the stream. 768 | writer.seek(SeekFrom::Start(self.header_start))?; 769 | op_and_len(&mut writer, op::CHUNK, record_size)?; 770 | writer.write_le(&self.header)?; 771 | assert_eq!(self.stream_start, writer.stream_position()?); 772 | assert_eq!(writer.seek(SeekFrom::End(0))?, end_of_stream); 773 | 774 | // Write our message indexes 775 | let mut message_index_offsets: BTreeMap = BTreeMap::new(); 776 | 777 | let mut index_buf = Vec::new(); 778 | for (channel_id, records) in self.indexes { 779 | assert!(message_index_offsets 780 | .insert(channel_id, writer.stream_position()?) 781 | .is_none()); 782 | index_buf.clear(); 783 | let index = records::MessageIndex { 784 | channel_id, 785 | records, 786 | }; 787 | 788 | Cursor::new(&mut index_buf).write_le(&index)?; 789 | op_and_len(&mut writer, op::MESSAGE_INDEX, index_buf.len())?; 790 | writer.write_all(&index_buf)?; 791 | } 792 | let end_of_indexes = writer.stream_position()?; 793 | 794 | let index = records::ChunkIndex { 795 | message_start_time: self.header.message_start_time, 796 | message_end_time: self.header.message_end_time, 797 | chunk_start_offset: self.header_start, 798 | chunk_length: end_of_stream - self.header_start, 799 | message_index_offsets, 800 | message_index_length: end_of_indexes - end_of_stream, 801 | compression: self.header.compression, 802 | compressed_size: self.header.compressed_size, 803 | uncompressed_size: self.header.uncompressed_size, 804 | }; 805 | 806 | Ok((writer, index)) 807 | } 808 | } 809 | -------------------------------------------------------------------------------- /src/read.rs: -------------------------------------------------------------------------------- 1 | //! Read MCAP files 2 | //! 3 | //! MCAPs are read from a byte slice instead of a [`Read`] trait object. 4 | //! This helps us avoid unnecessary copies, since [`Schema`]s and [`Message`]s 5 | //! can refer directly to their data. 6 | //! 7 | //! Consider [memory-mapping](https://docs.rs/memmap/0.7.0/memmap/struct.Mmap.html) 8 | //! the file - the OS will load (and cache!) it on-demand, without any 9 | //! further system calls. 10 | use std::{ 11 | borrow::Cow, 12 | collections::{BTreeMap, HashMap}, 13 | fmt, 14 | io::{self, prelude::*, Cursor}, 15 | sync::Arc, 16 | }; 17 | 18 | use binrw::prelude::*; 19 | use crc32fast::hash as crc32; 20 | use enumset::{enum_set, EnumSet, EnumSetType}; 21 | use log::*; 22 | 23 | use crate::{ 24 | io_utils::CountingCrcReader, 25 | records::{self, op, Record}, 26 | Attachment, Channel, McapError, McapResult, Message, Schema, MAGIC, 27 | }; 28 | 29 | /// Nonstandard reading options, e.g., 30 | /// to be more lenient when trying to recover incomplete/damaged files. 31 | /// 32 | /// More may be added in future releases. 33 | #[derive(EnumSetType, Debug)] 34 | pub enum Options { 35 | /// Don't require the MCAP file to end with its magic bytes. 36 | IgnoreEndMagic, 37 | } 38 | 39 | /// Scans a mapped MCAP file from start to end, returning each record. 40 | /// 41 | /// You probably want a [MessageStream] instead - this yields the raw records 42 | /// from the file without any postprocessing (decompressing chunks, etc.) 43 | /// and is mostly meant as a building block for higher-level readers. 44 | pub struct LinearReader<'a> { 45 | buf: &'a [u8], 46 | malformed: bool, 47 | } 48 | 49 | impl<'a> LinearReader<'a> { 50 | /// Create a reader for the given file, 51 | /// checking [`MAGIC`] bytes on both ends. 52 | pub fn new(buf: &'a [u8]) -> McapResult { 53 | Self::new_with_options(buf, enum_set!()) 54 | } 55 | 56 | /// Create a reader for the given file with special options. 57 | pub fn new_with_options(buf: &'a [u8], options: EnumSet) -> McapResult { 58 | if !buf.starts_with(MAGIC) 59 | || (!options.contains(Options::IgnoreEndMagic) 60 | && (!buf.ends_with(MAGIC) || buf.len() < 2 * MAGIC.len())) 61 | { 62 | return Err(McapError::BadMagic); 63 | } 64 | let buf = &buf[MAGIC.len()..]; 65 | if buf.ends_with(MAGIC) { 66 | Ok(Self::sans_magic(&buf[0..buf.len() - MAGIC.len()])) 67 | } else { 68 | Ok(Self::sans_magic(buf)) 69 | } 70 | } 71 | 72 | /// Like [`new()`](Self::new), but assumes `buf` has the magic bytes sliced off. 73 | /// 74 | /// Useful for iterating through slices of an MCAP file instead of the whole thing. 75 | pub fn sans_magic(buf: &'a [u8]) -> Self { 76 | Self { 77 | buf, 78 | malformed: false, 79 | } 80 | } 81 | 82 | /// Returns the number of unprocessed bytes 83 | /// (sans the file's starting and ending magic) 84 | /// 85 | /// Used to calculate offsets for the data section et al. 86 | fn bytes_remaining(&self) -> usize { 87 | self.buf.len() 88 | } 89 | } 90 | 91 | impl<'a> Iterator for LinearReader<'a> { 92 | type Item = McapResult>; 93 | 94 | fn next(&mut self) -> Option { 95 | if self.buf.is_empty() { 96 | return None; 97 | } 98 | 99 | // After an unrecoverable error (due to something wonky in the file), 100 | // don't keep trying to walk it. 101 | if self.malformed { 102 | return None; 103 | } 104 | 105 | let record = match read_record_from_slice(&mut self.buf) { 106 | Ok(k) => k, 107 | Err(e) => { 108 | self.malformed = true; 109 | return Some(Err(e)); 110 | } 111 | }; 112 | 113 | Some(Ok(record)) 114 | } 115 | } 116 | 117 | /// Read a record and advance the slice 118 | fn read_record_from_slice<'a>(buf: &mut &'a [u8]) -> McapResult> { 119 | if buf.len() < 5 { 120 | warn!("Malformed MCAP - not enough space for record + length!"); 121 | return Err(McapError::UnexpectedEof); 122 | } 123 | 124 | let op = read_u8(buf); 125 | let len = read_u64(buf); 126 | 127 | if buf.len() < len as usize { 128 | warn!( 129 | "Malformed MCAP - record with length {len}, but only {} bytes remain", 130 | buf.len() 131 | ); 132 | return Err(McapError::UnexpectedEof); 133 | } 134 | 135 | let body = &buf[..len as usize]; 136 | debug!("slice: opcode {op:02X}, length {len}"); 137 | let record = read_record(op, body)?; 138 | trace!(" {:?}", record); 139 | 140 | *buf = &buf[len as usize..]; 141 | Ok(record) 142 | } 143 | 144 | /// Given a record's opcode and its slice, read it into a [Record] 145 | fn read_record(op: u8, body: &[u8]) -> McapResult> { 146 | macro_rules! record { 147 | ($b:ident) => {{ 148 | let mut cur = Cursor::new($b); 149 | let res = cur.read_le()?; 150 | assert_eq!($b.len() as u64, cur.position()); 151 | res 152 | }}; 153 | } 154 | 155 | Ok(match op { 156 | op::HEADER => Record::Header(record!(body)), 157 | op::FOOTER => Record::Footer(record!(body)), 158 | op::SCHEMA => { 159 | let mut c = Cursor::new(body); 160 | let header: records::SchemaHeader = c.read_le()?; 161 | let data = Cow::Borrowed(&body[c.position() as usize..]); 162 | if header.data_len != data.len() as u32 { 163 | warn!( 164 | "Schema {}'s data length doesn't match the total schema length", 165 | header.name 166 | ); 167 | } 168 | Record::Schema { header, data } 169 | } 170 | op::CHANNEL => Record::Channel(record!(body)), 171 | op::MESSAGE => { 172 | let mut c = Cursor::new(body); 173 | let header = c.read_le()?; 174 | let data = Cow::Borrowed(&body[c.position() as usize..]); 175 | Record::Message { header, data } 176 | } 177 | op::CHUNK => { 178 | let mut c = Cursor::new(body); 179 | let header: records::ChunkHeader = c.read_le()?; 180 | let data = &body[c.position() as usize..]; 181 | if header.compressed_size != data.len() as u64 { 182 | warn!("Chunk's compressed length doesn't match its header"); 183 | } 184 | Record::Chunk { header, data } 185 | } 186 | op::MESSAGE_INDEX => Record::MessageIndex(record!(body)), 187 | op::CHUNK_INDEX => Record::ChunkIndex(record!(body)), 188 | op::ATTACHMENT => { 189 | let mut c = Cursor::new(body); 190 | let header: records::AttachmentHeader = c.read_le()?; 191 | let data = &body[c.position() as usize..body.len() - 4]; 192 | if header.data_len != data.len() as u64 { 193 | warn!( 194 | "Attachment {}'s data length doesn't match the total schema length", 195 | header.name 196 | ); 197 | } 198 | let crc = Cursor::new(&body[body.len() - 4..]).read_le()?; 199 | 200 | // We usually leave CRCs to higher-level readers - 201 | // (ChunkReader, read_summary(), etc.) - but 202 | // 203 | // 1. We can trivially check it here without checking other records, 204 | // decompressing anything, or doing any other non-trivial work 205 | // 206 | // 2. Since the CRC depends on the serialized header, it doesn't make 207 | // much sense to have users check it. 208 | // (What would they do? lol reserialize the header?) 209 | if crc != 0 { 210 | let calculated = crc32(&body[..body.len() - 4]); 211 | if crc != calculated { 212 | return Err(McapError::BadAttachmentCrc { 213 | saved: crc, 214 | calculated, 215 | }); 216 | } 217 | } 218 | 219 | Record::Attachment { header, data } 220 | } 221 | op::ATTACHMENT_INDEX => Record::AttachmentIndex(record!(body)), 222 | op::STATISTICS => Record::Statistics(record!(body)), 223 | op::METADATA => Record::Metadata(record!(body)), 224 | op::METADATA_INDEX => Record::MetadataIndex(record!(body)), 225 | op::SUMMARY_OFFSET => Record::SummaryOffset(record!(body)), 226 | op::END_OF_DATA => Record::EndOfData(record!(body)), 227 | opcode => Record::Unknown { 228 | opcode, 229 | data: Cow::Borrowed(body), 230 | }, 231 | }) 232 | } 233 | 234 | enum ChunkDecompressor<'a> { 235 | Null(LinearReader<'a>), 236 | Compressed(Option>>), 237 | } 238 | 239 | /// Streams records out of a [Chunk](Record::Chunk), decompressing as needed. 240 | pub struct ChunkReader<'a> { 241 | header: records::ChunkHeader, 242 | decompressor: ChunkDecompressor<'a>, 243 | } 244 | 245 | impl<'a> ChunkReader<'a> { 246 | pub fn new(header: records::ChunkHeader, data: &'a [u8]) -> McapResult { 247 | let decompressor = match header.compression.as_str() { 248 | "zstd" => ChunkDecompressor::Compressed(Some(CountingCrcReader::new(Box::new( 249 | zstd::Decoder::new(data)?, 250 | )))), 251 | "lz4" => ChunkDecompressor::Compressed(Some(CountingCrcReader::new(Box::new( 252 | lz4::Decoder::new(data)?, 253 | )))), 254 | "" => { 255 | if header.uncompressed_size != header.compressed_size { 256 | warn!( 257 | "Chunk is uncompressed, but claims different compress/uncompressed lengths" 258 | ); 259 | } 260 | 261 | if header.uncompressed_crc != 0 { 262 | let calculated = crc32(data); 263 | if header.uncompressed_crc != calculated { 264 | return Err(McapError::BadChunkCrc { 265 | saved: header.uncompressed_crc, 266 | calculated, 267 | }); 268 | } 269 | } 270 | 271 | ChunkDecompressor::Null(LinearReader::sans_magic(data)) 272 | } 273 | wat => return Err(McapError::UnsupportedCompression(wat.to_string())), 274 | }; 275 | 276 | Ok(Self { 277 | header, 278 | decompressor, 279 | }) 280 | } 281 | } 282 | 283 | impl<'a> Iterator for ChunkReader<'a> { 284 | type Item = McapResult>; 285 | 286 | fn next(&mut self) -> Option { 287 | match &mut self.decompressor { 288 | ChunkDecompressor::Null(r) => r.next(), 289 | ChunkDecompressor::Compressed(stream) => { 290 | // If we consumed the stream last time to get the CRC, 291 | // or because of an error, we're done. 292 | if stream.is_none() { 293 | return None; 294 | } 295 | 296 | let s = stream.as_mut().unwrap(); 297 | 298 | let record = match read_record_from_chunk_stream(s) { 299 | Ok(k) => k, 300 | Err(e) => { 301 | *stream = None; // Don't try to recover. 302 | return Some(Err(e)); 303 | } 304 | }; 305 | 306 | // If we've read all there is to read... 307 | if s.position() >= self.header.uncompressed_size { 308 | // Get the CRC. 309 | let calculated = stream.take().unwrap().finalize(); 310 | 311 | // If the header stored a CRC 312 | // and it doesn't match what we have, complain. 313 | if self.header.uncompressed_crc != 0 314 | && self.header.uncompressed_crc != calculated 315 | { 316 | return Some(Err(McapError::BadChunkCrc { 317 | saved: self.header.uncompressed_crc, 318 | calculated, 319 | })); 320 | } 321 | // All good! 322 | } 323 | 324 | Some(Ok(record)) 325 | } 326 | } 327 | } 328 | } 329 | 330 | /// Like [read_record_from_slice], but for a decompression stream 331 | fn read_record_from_chunk_stream<'a, R: Read>(r: &mut R) -> McapResult> { 332 | // We can't use binrw because compressions streams aren't seekable. 333 | // byteorder time! 334 | use byteorder::{ReadBytesExt, LE}; 335 | 336 | let op = r.read_u8()?; 337 | let len = r.read_u64::()?; 338 | 339 | debug!("chunk: opcode {op:02X}, length {len}"); 340 | let record = match op { 341 | op::SCHEMA => { 342 | let mut record = Vec::new(); 343 | r.take(len).read_to_end(&mut record)?; 344 | if len as usize != record.len() { 345 | return Err(McapError::UnexpectedEoc); 346 | } 347 | 348 | let mut c = Cursor::new(&record); 349 | let header: records::SchemaHeader = c.read_le()?; 350 | 351 | let header_end = c.position(); 352 | 353 | // Should we rotate and shrink instead? 354 | let data = record.split_off(header_end as usize); 355 | 356 | if header.data_len as usize != data.len() { 357 | warn!( 358 | "Schema {}'s data length doesn't match the total schema length", 359 | header.name 360 | ); 361 | } 362 | Record::Schema { 363 | header, 364 | data: Cow::Owned(data), 365 | } 366 | } 367 | op::CHANNEL => { 368 | let mut record = Vec::new(); 369 | r.take(len).read_to_end(&mut record)?; 370 | if len as usize != record.len() { 371 | return Err(McapError::UnexpectedEoc); 372 | } 373 | 374 | let mut c = Cursor::new(&record); 375 | let channel: records::Channel = c.read_le()?; 376 | 377 | if c.position() != record.len() as u64 { 378 | warn!( 379 | "Channel {}'s length doesn't match its record length", 380 | channel.topic 381 | ); 382 | } 383 | 384 | Record::Channel(channel) 385 | } 386 | op::MESSAGE => { 387 | // Optimization: messages are the mainstay of the file, 388 | // so allocate the header and the data separately to avoid having 389 | // to split them up or move them around later. 390 | // Fortunately, message headers are fixed length. 391 | const HEADER_LEN: u64 = 22; 392 | 393 | let mut header_buf = Vec::new(); 394 | r.take(HEADER_LEN).read_to_end(&mut header_buf)?; 395 | if header_buf.len() as u64 != HEADER_LEN { 396 | return Err(McapError::UnexpectedEoc); 397 | } 398 | let header: records::MessageHeader = Cursor::new(header_buf).read_le()?; 399 | 400 | let mut data = Vec::new(); 401 | r.take(len - HEADER_LEN).read_to_end(&mut data)?; 402 | if data.len() as u64 != len - HEADER_LEN { 403 | return Err(McapError::UnexpectedEoc); 404 | } 405 | 406 | Record::Message { 407 | header, 408 | data: Cow::Owned(data), 409 | } 410 | } 411 | wut => return Err(McapError::UnexpectedChunkRecord(wut)), 412 | }; 413 | trace!(" {:?}", record); 414 | Ok(record) 415 | } 416 | 417 | /// Like [`LinearReader`], but unpacks chunks' records into its stream 418 | pub struct ChunkFlattener<'a> { 419 | top_level: LinearReader<'a>, 420 | dechunk: Option>, 421 | malformed: bool, 422 | } 423 | 424 | impl<'a> ChunkFlattener<'a> { 425 | pub fn new(buf: &'a [u8]) -> McapResult { 426 | Self::new_with_options(buf, enum_set!()) 427 | } 428 | 429 | pub fn new_with_options(buf: &'a [u8], options: EnumSet) -> McapResult { 430 | let top_level = LinearReader::new_with_options(buf, options)?; 431 | Ok(Self { 432 | top_level, 433 | dechunk: None, 434 | malformed: false, 435 | }) 436 | } 437 | 438 | fn bytes_remaining(&self) -> usize { 439 | self.top_level.bytes_remaining() 440 | } 441 | } 442 | 443 | impl<'a> Iterator for ChunkFlattener<'a> { 444 | type Item = McapResult>; 445 | 446 | fn next(&mut self) -> Option { 447 | if self.malformed { 448 | return None; 449 | } 450 | 451 | let n: Option = loop { 452 | // If we're reading from a chunk, do that until it returns None. 453 | if let Some(d) = &mut self.dechunk { 454 | match d.next() { 455 | Some(d) => break Some(d), 456 | None => self.dechunk = None, 457 | } 458 | } 459 | // Fall through - if we didn't extract a record from a chunk 460 | // (or that chunk ended), move on to the next top-level record. 461 | match self.top_level.next() { 462 | // If it's a chunk, get a new chunk reader going... 463 | Some(Ok(Record::Chunk { header, data })) => { 464 | self.dechunk = match ChunkReader::new(header, data) { 465 | Ok(d) => Some(d), 466 | Err(e) => break Some(Err(e)), 467 | }; 468 | // ...then continue the loop to get the first item from the chunk. 469 | } 470 | // If it's not a chunk, just yield it. 471 | not_a_chunk => break not_a_chunk, 472 | } 473 | }; 474 | 475 | // Give up on errors 476 | if matches!(n, Some(Err(_))) { 477 | self.malformed = true; 478 | } 479 | n 480 | } 481 | } 482 | 483 | /// Parses schemas and channels and wires them together 484 | #[derive(Debug, Default)] 485 | struct ChannelAccumulator<'a> { 486 | schemas: HashMap>>, 487 | channels: HashMap>>, 488 | } 489 | 490 | impl<'a> ChannelAccumulator<'a> { 491 | fn add_schema(&mut self, header: records::SchemaHeader, data: Cow<'a, [u8]>) -> McapResult<()> { 492 | if header.id == 0 { 493 | return Err(McapError::InvalidSchemaId); 494 | } 495 | 496 | let schema = Arc::new(Schema { 497 | name: header.name.clone(), 498 | encoding: header.encoding, 499 | data, 500 | }); 501 | 502 | if let Some(preexisting) = self.schemas.insert(header.id, schema.clone()) { 503 | // Oh boy, we have this schema already. 504 | // It had better be identital. 505 | if schema != preexisting { 506 | return Err(McapError::ConflictingSchemas(header.name)); 507 | } 508 | } 509 | Ok(()) 510 | } 511 | 512 | fn add_channel(&mut self, chan: records::Channel) -> McapResult<()> { 513 | // The schema ID can be 0 for "no schema", 514 | // Or must reference some previously-read schema. 515 | let schema = if chan.schema_id == 0 { 516 | None 517 | } else { 518 | match self.schemas.get(&chan.schema_id) { 519 | Some(s) => Some(s.clone()), 520 | None => { 521 | return Err(McapError::UnknownSchema(chan.topic, chan.schema_id)); 522 | } 523 | } 524 | }; 525 | 526 | let channel = Arc::new(Channel { 527 | topic: chan.topic.clone(), 528 | schema, 529 | message_encoding: chan.message_encoding, 530 | metadata: chan.metadata, 531 | }); 532 | if let Some(preexisting) = self.channels.insert(chan.id, channel.clone()) { 533 | // Oh boy, we have this channel already. 534 | // It had better be identital. 535 | if preexisting != channel { 536 | return Err(McapError::ConflictingChannels(chan.topic)); 537 | } 538 | } 539 | Ok(()) 540 | } 541 | 542 | fn get(&self, chan_id: u16) -> Option>> { 543 | self.channels.get(&chan_id).cloned() 544 | } 545 | } 546 | 547 | /// Reads all messages from the MCAP file---in the order they were written---and 548 | /// perform needed validation (CRCs, etc.) as we go. 549 | /// 550 | /// This stops at the end of the data section and does not read the summary. 551 | /// 552 | /// Because tying the lifetime of each message to the underlying MCAP memory map 553 | /// makes it very difficult to send between threads or use in async land, 554 | /// and because we assume _most_ MCAP files have _most_ messages in compressed chunks, 555 | /// yielded [`Message`](crate::Message)s have unbounded lifetimes. 556 | /// For messages we've decompressed into their own buffers, this is free! 557 | /// For uncompressed messages, we take a copy of the message's data. 558 | pub struct MessageStream<'a> { 559 | full_file: &'a [u8], 560 | records: ChunkFlattener<'a>, 561 | done: bool, 562 | channeler: ChannelAccumulator<'static>, 563 | } 564 | 565 | impl<'a> MessageStream<'a> { 566 | pub fn new(buf: &'a [u8]) -> McapResult { 567 | Self::new_with_options(buf, enum_set!()) 568 | } 569 | 570 | pub fn new_with_options(buf: &'a [u8], options: EnumSet) -> McapResult { 571 | let full_file = buf; 572 | let records = ChunkFlattener::new_with_options(buf, options)?; 573 | 574 | Ok(Self { 575 | full_file, 576 | records, 577 | done: false, 578 | channeler: ChannelAccumulator::default(), 579 | }) 580 | } 581 | } 582 | 583 | impl<'a> Iterator for MessageStream<'a> { 584 | type Item = McapResult>; 585 | 586 | fn next(&mut self) -> Option { 587 | if self.done { 588 | return None; 589 | } 590 | 591 | let n = loop { 592 | // Let's start with a working record. 593 | let record = match self.records.next() { 594 | Some(Ok(rec)) => rec, 595 | Some(Err(e)) => break Some(Err(e)), 596 | None => break None, 597 | }; 598 | 599 | match record { 600 | // Insert schemas into self so we know when subsequent channels reference them. 601 | Record::Schema { header, data } => { 602 | let data = Cow::Owned(data.into_owned()); 603 | if let Err(e) = self.channeler.add_schema(header, data) { 604 | break Some(Err(e)); 605 | } 606 | } 607 | 608 | // Insert channels into self so we know when subsequent messages reference them. 609 | Record::Channel(chan) => { 610 | if let Err(e) = self.channeler.add_channel(chan) { 611 | break Some(Err(e)); 612 | } 613 | } 614 | 615 | Record::Message { header, data } => { 616 | // Messages must have a previously-read channel. 617 | let channel = match self.channeler.get(header.channel_id) { 618 | Some(c) => c, 619 | None => { 620 | break Some(Err(McapError::UnknownChannel( 621 | header.sequence, 622 | header.channel_id, 623 | ))) 624 | } 625 | }; 626 | 627 | let m = Message { 628 | channel, 629 | sequence: header.sequence, 630 | log_time: header.log_time, 631 | publish_time: header.publish_time, 632 | data: Cow::Owned(data.into_owned()), 633 | }; 634 | break Some(Ok(m)); 635 | } 636 | 637 | // If it's EOD, do unholy things to calculate the CRC. 638 | Record::EndOfData(end) => { 639 | if end.data_section_crc != 0 { 640 | // This is terrible. Less math with less magic numbers, please. 641 | let data_section_len = (self.full_file.len() - MAGIC.len() * 2) // Actual working area 642 | - self.records.bytes_remaining(); 643 | 644 | let data_section = 645 | &self.full_file[MAGIC.len()..MAGIC.len() + data_section_len]; 646 | let calculated = crc32(data_section); 647 | if end.data_section_crc != calculated { 648 | break Some(Err(McapError::BadDataCrc { 649 | saved: end.data_section_crc, 650 | calculated, 651 | })); 652 | } 653 | } 654 | break None; // We're done at any rate. 655 | } 656 | _skip => {} 657 | }; 658 | }; 659 | 660 | if !matches!(n, Some(Ok(_))) { 661 | self.done = true; 662 | } 663 | n 664 | } 665 | } 666 | 667 | const FOOTER_LEN: usize = 20 + 8 + 1; // 20 bytes + 8 byte len + 1 byte opcode 668 | 669 | /// Read the MCAP footer. 670 | /// 671 | /// You'd probably prefer to use [`Summary::read`] to parse the whole summary, 672 | /// then index into the rest of the file with 673 | /// [`Summary::stream_chunk`], [`attachment`], [`metadata`], etc. 674 | pub fn footer(mcap: &[u8]) -> McapResult { 675 | if mcap.len() < MAGIC.len() * 2 + FOOTER_LEN { 676 | return Err(McapError::UnexpectedEof); 677 | } 678 | 679 | if !mcap.starts_with(MAGIC) || !mcap.ends_with(MAGIC) { 680 | return Err(McapError::BadMagic); 681 | } 682 | 683 | let footer_buf = &mcap[mcap.len() - MAGIC.len() - FOOTER_LEN..]; 684 | 685 | match LinearReader::sans_magic(footer_buf).next() { 686 | Some(Ok(Record::Footer(f))) => Ok(f), 687 | _ => Err(McapError::BadFooter), 688 | } 689 | } 690 | 691 | /// Indexes of an MCAP file parsed from its (optional) summary section 692 | #[derive(Default, Eq, PartialEq)] 693 | pub struct Summary<'a> { 694 | pub stats: Option, 695 | /// Maps channel IDs to their channel 696 | pub channels: HashMap>>, 697 | /// Maps schema IDs to their schema 698 | pub schemas: HashMap>>, 699 | pub chunk_indexes: Vec, 700 | pub attachment_indexes: Vec, 701 | pub metadata_indexes: Vec, 702 | } 703 | 704 | impl fmt::Debug for Summary<'_> { 705 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 706 | // Keep the actual maps as HashMaps for constant-time lookups, 707 | // but order everything up before debug printing it here. 708 | let channels = self.channels.iter().collect::>(); 709 | let schemas = self.schemas.iter().collect::>(); 710 | 711 | f.debug_struct("Summary") 712 | .field("stats", &self.stats) 713 | .field("channels", &channels) 714 | .field("schemas", &schemas) 715 | .field("chunk_indexes", &self.chunk_indexes) 716 | .field("attachment_indexes", &self.attachment_indexes) 717 | .field("metadata_indexes", &self.metadata_indexes) 718 | .finish() 719 | } 720 | } 721 | 722 | impl<'a> Summary<'a> { 723 | /// Read the summary section of the given mapped MCAP file, if it has one. 724 | pub fn read(mcap: &'a [u8]) -> McapResult> { 725 | let foot = footer(mcap)?; 726 | 727 | // A summary start offset of 0 means there's no summary. 728 | if foot.summary_start == 0 { 729 | return Ok(None); 730 | } 731 | 732 | if foot.summary_crc != 0 { 733 | // The checksum covers the entire summary _except_ itself, including other footer bytes. 734 | let calculated = 735 | crc32(&mcap[foot.summary_start as usize..mcap.len() - MAGIC.len() - 4]); 736 | if foot.summary_crc != calculated { 737 | return Err(McapError::BadSummaryCrc { 738 | saved: foot.summary_crc, 739 | calculated, 740 | }); 741 | } 742 | } 743 | 744 | let mut summary = Summary::default(); 745 | let mut channeler = ChannelAccumulator::default(); 746 | 747 | let summary_end = match foot.summary_offset_start { 748 | 0 => MAGIC.len() - FOOTER_LEN, 749 | sos => sos as usize, 750 | }; 751 | let summary_buf = &mcap[foot.summary_start as usize..summary_end]; 752 | 753 | for record in LinearReader::sans_magic(summary_buf) { 754 | match record? { 755 | Record::Statistics(s) => { 756 | if summary.stats.is_some() { 757 | warn!("Multiple statistics records found in summary"); 758 | } 759 | summary.stats = Some(s); 760 | } 761 | Record::Schema { header, data } => channeler.add_schema(header, data)?, 762 | Record::Channel(c) => channeler.add_channel(c)?, 763 | Record::ChunkIndex(c) => summary.chunk_indexes.push(c), 764 | Record::AttachmentIndex(a) => summary.attachment_indexes.push(a), 765 | Record::MetadataIndex(i) => summary.metadata_indexes.push(i), 766 | _ => {} 767 | }; 768 | } 769 | 770 | summary.schemas = channeler.schemas; 771 | summary.channels = channeler.channels; 772 | 773 | Ok(Some(summary)) 774 | } 775 | 776 | /// Stream messages from the chunk with the given index. 777 | /// 778 | /// To avoid having to read all preceding chunks first, 779 | /// channels and their schemas are pulled from this summary. 780 | pub fn stream_chunk( 781 | &self, 782 | mcap: &'a [u8], 783 | index: &records::ChunkIndex, 784 | ) -> McapResult>> + '_> { 785 | let end = (index.chunk_start_offset + index.chunk_length) as usize; 786 | if mcap.len() < end { 787 | return Err(McapError::BadIndex); 788 | } 789 | 790 | // Get the chunk (as a header and its data) out of the file at the given offset. 791 | let mut reader = LinearReader::sans_magic(&mcap[index.chunk_start_offset as usize..end]); 792 | let (h, d) = match reader.next().ok_or(McapError::BadIndex)? { 793 | Ok(records::Record::Chunk { header, data }) => (header, data), 794 | Ok(_other_record) => return Err(McapError::BadIndex), 795 | Err(e) => return Err(e), 796 | }; 797 | 798 | if reader.next().is_some() { 799 | // Wut - multiple records in the given slice? 800 | return Err(McapError::BadIndex); 801 | } 802 | 803 | // Now let's stream messages out of the chunk. 804 | let messages = ChunkReader::new(h, d)?.filter_map(|record| match record { 805 | Ok(records::Record::Message { header, data }) => { 806 | // Correlate the message to its channel from this summary. 807 | let channel = match self.channels.get(&header.channel_id) { 808 | Some(c) => c.clone(), 809 | None => { 810 | return Some(Err(McapError::UnknownChannel( 811 | header.sequence, 812 | header.channel_id, 813 | ))); 814 | } 815 | }; 816 | 817 | let m = Message { 818 | channel, 819 | sequence: header.sequence, 820 | log_time: header.log_time, 821 | publish_time: header.publish_time, 822 | data, 823 | }; 824 | 825 | Some(Ok(m)) 826 | } 827 | // We don't care about other chunk records (channels, schemas) - 828 | // we should have them from &self already. 829 | Ok(_other_record) => None, 830 | // We do care about errors, though. 831 | Err(e) => Some(Err(e)), 832 | }); 833 | 834 | Ok(messages) 835 | } 836 | 837 | /// Read the mesage indexes for the given indexed chunk. 838 | /// 839 | /// Channels and their schemas are pulled from this summary. 840 | /// The offsets in each [`MessageIndexEntry`](records::MessageIndexEntry) 841 | /// is relative to the decompressed contents of the given chunk. 842 | pub fn read_message_indexes( 843 | &self, 844 | mcap: &[u8], 845 | index: &records::ChunkIndex, 846 | ) -> McapResult, Vec>> { 847 | if index.message_index_offsets.is_empty() { 848 | // Message indexing is optional... should we be more descriptive here? 849 | return Err(McapError::BadIndex); 850 | } 851 | 852 | let mut indexes = HashMap::new(); 853 | 854 | for (channel_id, offset) in &index.message_index_offsets { 855 | let offset = *offset as usize; 856 | 857 | // Message indexes are at least 15 bytes: 858 | // 1 byte opcode, 8 byte length, 2 byte channel ID, 4 byte array len 859 | if mcap.len() < offset + 15 { 860 | return Err(McapError::BadIndex); 861 | } 862 | 863 | // Get the MessageIndex out of the file at the given offset. 864 | let mut reader = LinearReader::sans_magic(&mcap[offset..]); 865 | let index = match reader.next().ok_or(McapError::BadIndex)? { 866 | Ok(records::Record::MessageIndex(i)) => i, 867 | Ok(_other_record) => return Err(McapError::BadIndex), 868 | Err(e) => return Err(e), 869 | }; 870 | 871 | // The channel ID from the chunk index and the message index should match 872 | if *channel_id != index.channel_id { 873 | return Err(McapError::BadIndex); 874 | } 875 | 876 | let channel = match self.channels.get(&index.channel_id) { 877 | Some(c) => c, 878 | None => { 879 | return Err(McapError::UnknownChannel( 880 | 0, // We don't have a message sequence num yet. 881 | index.channel_id, 882 | )); 883 | } 884 | }; 885 | 886 | if indexes.insert(channel.clone(), index.records).is_some() { 887 | return Err(McapError::ConflictingChannels(channel.topic.clone())); 888 | } 889 | } 890 | 891 | Ok(indexes) 892 | } 893 | 894 | /// Seek to the given message in the given indexed chunk. 895 | /// 896 | /// If you're interested in more than a single message from the chunk, 897 | /// filtering [`Summary::stream_chunk`] is probably a better bet. 898 | /// Compressed chunks aren't random access - 899 | /// this decompresses everything in the chunk before 900 | /// [`message.offset`](records::MessageIndexEntry::offset) and throws it away. 901 | pub fn seek_message( 902 | &self, 903 | mcap: &'a [u8], 904 | index: &records::ChunkIndex, 905 | message: &records::MessageIndexEntry, 906 | ) -> McapResult { 907 | // Get the chunk (as a header and its data) out of the file at the given offset. 908 | let end = (index.chunk_start_offset + index.chunk_length) as usize; 909 | if mcap.len() < end { 910 | return Err(McapError::BadIndex); 911 | } 912 | 913 | let mut reader = LinearReader::sans_magic(&mcap[index.chunk_start_offset as usize..end]); 914 | let (h, d) = match reader.next().ok_or(McapError::BadIndex)? { 915 | Ok(records::Record::Chunk { header, data }) => (header, data), 916 | Ok(_other_record) => return Err(McapError::BadIndex), 917 | Err(e) => return Err(e), 918 | }; 919 | 920 | if reader.next().is_some() { 921 | // Wut - multiple records in the given slice? 922 | return Err(McapError::BadIndex); 923 | } 924 | 925 | let mut chunk_reader = ChunkReader::new(h, d)?; 926 | 927 | // Do unspeakable things to seek to the message. 928 | match &mut chunk_reader.decompressor { 929 | ChunkDecompressor::Null(reader) => { 930 | // Skip messages until we're at the offset. 931 | while reader.bytes_remaining() as u64 > index.uncompressed_size - message.offset { 932 | match reader.next() { 933 | Some(Ok(_)) => {} 934 | Some(Err(e)) => return Err(e), 935 | None => return Err(McapError::BadIndex), 936 | }; 937 | } 938 | // Be exact! 939 | if reader.bytes_remaining() as u64 != index.uncompressed_size - message.offset { 940 | return Err(McapError::BadIndex); 941 | } 942 | } 943 | ChunkDecompressor::Compressed(maybe_read) => { 944 | let reader = maybe_read.as_mut().unwrap(); 945 | // Decompress offset bytes, which should put us at the message we want. 946 | io::copy(&mut reader.take(message.offset), &mut io::sink())?; 947 | } 948 | } 949 | 950 | // Now let's get our message. 951 | match chunk_reader.next() { 952 | Some(Ok(records::Record::Message { header, data })) => { 953 | // Correlate the message to its channel from this summary. 954 | let channel = match self.channels.get(&header.channel_id) { 955 | Some(c) => c.clone(), 956 | None => { 957 | return Err(McapError::UnknownChannel( 958 | header.sequence, 959 | header.channel_id, 960 | )); 961 | } 962 | }; 963 | 964 | let m = Message { 965 | channel, 966 | sequence: header.sequence, 967 | log_time: header.log_time, 968 | publish_time: header.publish_time, 969 | data, 970 | }; 971 | 972 | Ok(m) 973 | } 974 | // The index told us this was a message... 975 | Some(Ok(_other_record)) => Err(McapError::BadIndex), 976 | Some(Err(e)) => Err(e), 977 | None => Err(McapError::BadIndex), 978 | } 979 | } 980 | } 981 | 982 | /// Read the attachment with the given index. 983 | pub fn attachment<'a>( 984 | mcap: &'a [u8], 985 | index: &records::AttachmentIndex, 986 | ) -> McapResult> { 987 | let end = (index.offset + index.length) as usize; 988 | if mcap.len() < end { 989 | return Err(McapError::BadIndex); 990 | } 991 | 992 | let mut reader = LinearReader::sans_magic(&mcap[index.offset as usize..end]); 993 | let (h, d) = match reader.next().ok_or(McapError::BadIndex)? { 994 | Ok(records::Record::Attachment { header, data }) => (header, data), 995 | Ok(_other_record) => return Err(McapError::BadIndex), 996 | Err(e) => return Err(e), 997 | }; 998 | 999 | if reader.next().is_some() { 1000 | // Wut - multiple records in the given slice? 1001 | return Err(McapError::BadIndex); 1002 | } 1003 | 1004 | Ok(Attachment { 1005 | log_time: h.log_time, 1006 | create_time: h.create_time, 1007 | name: h.name, 1008 | content_type: h.content_type, 1009 | data: Cow::Borrowed(d), 1010 | }) 1011 | } 1012 | 1013 | /// Read the metadata with the given index. 1014 | pub fn metadata(mcap: &[u8], index: &records::MetadataIndex) -> McapResult { 1015 | let end = (index.offset + index.length) as usize; 1016 | if mcap.len() < end { 1017 | return Err(McapError::BadIndex); 1018 | } 1019 | 1020 | let mut reader = LinearReader::sans_magic(&mcap[index.offset as usize..end]); 1021 | let m = match reader.next().ok_or(McapError::BadIndex)? { 1022 | Ok(records::Record::Metadata(m)) => m, 1023 | Ok(_other_record) => return Err(McapError::BadIndex), 1024 | Err(e) => return Err(e), 1025 | }; 1026 | 1027 | if reader.next().is_some() { 1028 | // Wut - multiple records in the given slice? 1029 | return Err(McapError::BadIndex); 1030 | } 1031 | 1032 | Ok(m) 1033 | } 1034 | 1035 | // All of the following panic if they walk off the back of the data block; 1036 | // callers are assumed to have made sure they got enoug bytes back with 1037 | // `validate_response()` 1038 | 1039 | /// Builds a `read_(&mut buf)` function that reads a given type 1040 | /// off the buffer and advances it the appropriate number of bytes. 1041 | macro_rules! reader { 1042 | ($type:ty) => { 1043 | paste::paste! { 1044 | #[inline] 1045 | fn [](block: &mut &[u8]) -> $type { 1046 | const SIZE: usize = std::mem::size_of::<$type>(); 1047 | let res = $type::from_le_bytes( 1048 | block[0..SIZE].try_into().unwrap() 1049 | ); 1050 | *block = &block[SIZE..]; 1051 | res 1052 | } 1053 | } 1054 | }; 1055 | } 1056 | 1057 | reader!(u8); 1058 | reader!(u64); 1059 | 1060 | #[cfg(test)] 1061 | mod test { 1062 | use super::*; 1063 | 1064 | // Can we read a file that's only magic? 1065 | // (Probably considered malformed by the spec, but let's not panic on user input) 1066 | 1067 | #[test] 1068 | fn only_two_magics() { 1069 | let two_magics = MAGIC.repeat(2); 1070 | let mut reader = LinearReader::new(&two_magics).unwrap(); 1071 | assert!(reader.next().is_none()); 1072 | } 1073 | 1074 | #[test] 1075 | fn only_one_magic() { 1076 | assert!(matches!(LinearReader::new(MAGIC), Err(McapError::BadMagic))); 1077 | } 1078 | 1079 | #[test] 1080 | fn only_two_magic_with_ignore_end_magic() { 1081 | let two_magics = MAGIC.repeat(2); 1082 | let mut reader = 1083 | LinearReader::new_with_options(&two_magics, enum_set!(Options::IgnoreEndMagic)) 1084 | .unwrap(); 1085 | assert!(reader.next().is_none()); 1086 | } 1087 | 1088 | #[test] 1089 | fn only_one_magic_with_ignore_end_magic() { 1090 | let mut reader = 1091 | LinearReader::new_with_options(MAGIC, enum_set!(Options::IgnoreEndMagic)).unwrap(); 1092 | assert!(reader.next().is_none()); 1093 | } 1094 | } 1095 | --------------------------------------------------------------------------------