├── .gitignore
├── .gitattributes
├── tests
    ├── references
    │   ├── OneMessage.mcap
    │   ├── OneMetadata.mcap
    │   ├── demo.mcap
    │   └── OneAttachment.mcap
    ├── common.rs
    ├── flush.rs
    ├── compression.rs
    ├── metadata.rs
    ├── attachment.rs
    ├── message.rs
    └── round_trip.rs
├── README.md
├── LICENSE.md
├── Cargo.toml
├── examples
    ├── common
    │   └── logsetup.rs
    ├── mcapcat
    │   └── main.rs
    ├── mcapcopy
    │   └── main.rs
    └── recover
    │   └── main.rs
└── src
    ├── io_utils.rs
    ├── lib.rs
    ├── records.rs
    ├── write.rs
    └── read.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.mcap filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/tests/references/OneMessage.mcap:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c92429a3aa64497c8855552a7e03b8401fe9da0e94c2d8e82d72c8d4341ffcaa
3 | size 190
4 | 


--------------------------------------------------------------------------------
/tests/references/OneMetadata.mcap:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e254aad93777c23e224c2f800295f7573607e7841a4883916695d0a292786302
3 | size 116
4 | 


--------------------------------------------------------------------------------
/tests/references/demo.mcap:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f878642b6fc15d2e771ce530252e6454de296e6d99b18748e6cd7d09eaa80598
3 | size 61497068
4 | 


--------------------------------------------------------------------------------
/tests/references/OneAttachment.mcap:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:816f7ef60af488cccbb67fbe7f2e3de2abf788793ba548993254140dd8a95acc
3 | size 153
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mcap-rs
2 | 
3 | ...has been upstreamed into the [Foxglove MCAP repo](https://github.com/foxglove/mcap)!
4 | Use the [`mcap` crate](https://crates.io/crates/mcap) for the latest Rust MCAP goodness.
5 | 


--------------------------------------------------------------------------------
/tests/common.rs:
--------------------------------------------------------------------------------
 1 | use std::fs;
 2 | 
 3 | use anyhow::{Context, Result};
 4 | use camino::Utf8Path;
 5 | use memmap::Mmap;
 6 | 
 7 | pub fn map_mcap<P: AsRef<Utf8Path>>(p: P) -> Result<Mmap> {
 8 |     let p = p.as_ref();
 9 |     let fd = fs::File::open(p).with_context(|| format!("Couldn't open {p}"))?;
10 |     unsafe { Mmap::map(&fd) }.with_context(|| format!("Couldn't map {p}"))
11 | }
12 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright © 2022 Anduril Industries
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/flush.rs:
--------------------------------------------------------------------------------
 1 | mod common;
 2 | 
 3 | use common::*;
 4 | 
 5 | use std::io::BufWriter;
 6 | 
 7 | use anyhow::Result;
 8 | use itertools::Itertools;
 9 | use mcap_rs as mcap;
10 | use memmap::Mmap;
11 | use tempfile::tempfile;
12 | 
13 | #[test]
14 | fn flush_and_cut_chunks() -> Result<()> {
15 |     let mapped = map_mcap("tests/references/demo.mcap")?;
16 | 
17 |     let messages = mcap::MessageStream::new(&mapped)?;
18 | 
19 |     let mut tmp = tempfile()?;
20 |     let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?;
21 | 
22 |     for (i, m) in messages.enumerate() {
23 |         writer.write(&m?)?;
24 |         // Cut a new chunk every other message
25 |         if i % 2 == 0 {
26 |             writer.flush()?;
27 |         }
28 |     }
29 |     drop(writer);
30 | 
31 |     let ours = unsafe { Mmap::map(&tmp) }?;
32 | 
33 |     // Compare the message stream of our MCAP to the reference one.
34 |     // Regardless of the chunk boundaries, they should be the same.
35 |     for (theirs, ours) in
36 |         mcap::MessageStream::new(&mapped)?.zip_eq(mcap::MessageStream::new(&ours)?)
37 |     {
38 |         assert_eq!(ours?, theirs?)
39 |     }
40 | 
41 |     Ok(())
42 | }
43 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "mcap-rs"
 3 | description = "A library for reading and writing Foxglove MCAP files"
 4 | keywords = [ "foxglove", "mcap" ]
 5 | categories = [ "science::robotics", "compression" ]
 6 | repository = "https://github.com/anduril/mcap-rs"
 7 | readme = "README.md"
 8 | documentation = "https://docs.rs/mcap-rs"
 9 | license = "Apache-2.0"
10 | version = "0.3.4"
11 | 
12 | edition = "2021"
13 | 
14 | # See the repo for the reference files
15 | # (all taken from github.com/foxglove/mcap FWIW)
16 | exclude = [ "/tests/references/" ]
17 | 
18 | [profile.dev]
19 | opt-level = 3 # Profiling!
20 | 
21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
22 | 
23 | [dependencies]
24 | binrw = "0.9"
25 | byteorder = "1.4"
26 | crc32fast = "1.3"
27 | log = "0.4"
28 | lz4 = "1.0"
29 | num_cpus = "1.13"
30 | paste = "1.0"
31 | thiserror = "1.0"
32 | enumset = "1.0.11"
33 | zstd = { version = "0.11", features = ["zstdmt"] }
34 | 
35 | [dev-dependencies]
36 | anyhow = "1.0"
37 | atty = "0.2"
38 | camino = "1.0"
39 | clap = { version = "3.2", features = ["derive"]}
40 | itertools = "0.10"
41 | memmap = "0.7"
42 | rayon = "1.5"
43 | simplelog = "0.12"
44 | tempfile = "3.3"
45 | 


--------------------------------------------------------------------------------
/examples/common/logsetup.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::*;
 2 | use simplelog::*;
 3 | 
 4 | #[derive(clap::ArgEnum, Debug, Copy, Clone)]
 5 | pub enum Color {
 6 |     Auto,
 7 |     Always,
 8 |     Never,
 9 | }
10 | 
11 | /// Set up simplelog to spit messages to stderr.
12 | pub fn init_logger(verbosity: u8, color: Color) {
13 |     let mut builder = ConfigBuilder::new();
14 |     // Shut a bunch of stuff off - we're just spitting to stderr.
15 |     builder.set_location_level(LevelFilter::Trace);
16 |     builder.set_target_level(LevelFilter::Off);
17 |     builder.set_thread_level(LevelFilter::Off);
18 |     builder.set_time_level(LevelFilter::Off);
19 | 
20 |     let level = match verbosity {
21 |         0 => LevelFilter::Warn,
22 |         1 => LevelFilter::Info,
23 |         2 => LevelFilter::Debug,
24 |         _ => LevelFilter::Trace,
25 |     };
26 | 
27 |     let config = builder.build();
28 | 
29 |     let color = match color {
30 |         Color::Always => ColorChoice::AlwaysAnsi,
31 |         Color::Auto => {
32 |             if atty::is(atty::Stream::Stderr) {
33 |                 ColorChoice::Auto
34 |             } else {
35 |                 ColorChoice::Never
36 |             }
37 |         }
38 |         Color::Never => ColorChoice::Never,
39 |     };
40 | 
41 |     TermLogger::init(level, config.clone(), TerminalMode::Stderr, color)
42 |         .or_else(|_| SimpleLogger::init(level, config))
43 |         .context("Couldn't init logger")
44 |         .unwrap()
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/compression.rs:
--------------------------------------------------------------------------------
 1 | mod common;
 2 | 
 3 | use common::*;
 4 | 
 5 | use std::io::BufWriter;
 6 | 
 7 | use anyhow::Result;
 8 | use itertools::Itertools;
 9 | use mcap_rs as mcap;
10 | use memmap::Mmap;
11 | use tempfile::tempfile;
12 | 
13 | fn round_trip(comp: Option<mcap::Compression>) -> Result<()> {
14 |     let mapped = map_mcap("tests/references/demo.mcap")?;
15 | 
16 |     let mut tmp = tempfile()?;
17 |     let mut writer = mcap::WriteOptions::new()
18 |         .compression(comp)
19 |         .profile("fooey")
20 |         .create(BufWriter::new(&mut tmp))?;
21 | 
22 |     for m in mcap::MessageStream::new(&mapped)? {
23 |         // IRL, we'd add channels, then write messages to known channels,
24 |         // which skips having to re-hash the channel and its schema each time.
25 |         // But since here we'd need to do the same anyways...
26 |         writer.write(&m?)?;
27 |     }
28 |     drop(writer);
29 | 
30 |     let ours = unsafe { Mmap::map(&tmp) }?;
31 | 
32 |     // Compare the message stream of our MCAP to the reference one.
33 |     for (theirs, ours) in
34 |         mcap::MessageStream::new(&mapped)?.zip_eq(mcap::MessageStream::new(&ours)?)
35 |     {
36 |         assert_eq!(ours?, theirs?)
37 |     }
38 | 
39 |     Ok(())
40 | }
41 | 
42 | #[test]
43 | fn uncompressed_round_trip() -> Result<()> {
44 |     round_trip(None)
45 | }
46 | 
47 | #[test]
48 | fn zstd_round_trip() -> Result<()> {
49 |     round_trip(Some(mcap::Compression::Zstd))
50 | }
51 | 
52 | #[test]
53 | fn lz4_round_trip() -> Result<()> {
54 |     round_trip(Some(mcap::Compression::Lz4))
55 | }
56 | 


--------------------------------------------------------------------------------
/examples/mcapcat/main.rs:
--------------------------------------------------------------------------------
 1 | #[path = "../common/logsetup.rs"]
 2 | mod logsetup;
 3 | 
 4 | use std::{fs, process};
 5 | 
 6 | use anyhow::{Context, Result};
 7 | use camino::{Utf8Path, Utf8PathBuf};
 8 | use clap::Parser;
 9 | use log::*;
10 | use mcap_rs as mcap;
11 | use memmap::Mmap;
12 | 
13 | #[derive(Parser, Debug)]
14 | struct Args {
15 |     /// Verbosity (-v, -vv, -vvv, etc.)
16 |     #[clap(short, long, parse(from_occurrences))]
17 |     verbose: u8,
18 | 
19 |     #[clap(short, long, arg_enum, default_value = "auto")]
20 |     color: logsetup::Color,
21 | 
22 |     mcap: Utf8PathBuf,
23 | }
24 | 
25 | fn map_mcap(p: &Utf8Path) -> Result<Mmap> {
26 |     let fd = fs::File::open(p).context("Couldn't open MCAP file")?;
27 |     unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file")
28 | }
29 | 
30 | fn run() -> Result<()> {
31 |     let args = Args::parse();
32 |     logsetup::init_logger(args.verbose, args.color);
33 | 
34 |     let mapped = map_mcap(&args.mcap)?;
35 | 
36 |     for message in mcap::MessageStream::new(&mapped)? {
37 |         let message = message?;
38 |         let ts = message.publish_time;
39 |         println!(
40 |             "{} {} [{}] [{}]...",
41 |             ts,
42 |             message.channel.topic,
43 |             message
44 |                 .channel
45 |                 .schema
46 |                 .as_ref()
47 |                 .map(|s| s.name.as_str())
48 |                 .unwrap_or_default(),
49 |             message
50 |                 .data
51 |                 .iter()
52 |                 .take(10)
53 |                 .map(|b| b.to_string())
54 |                 .collect::<Vec<_>>()
55 |                 .join(" ")
56 |         );
57 |     }
58 | 
59 |     info!("{:#?}", mcap::Summary::read(&mapped)?);
60 |     Ok(())
61 | }
62 | 
63 | fn main() {
64 |     run().unwrap_or_else(|e| {
65 |         error!("{:?}", e);
66 |         process::exit(1);
67 |     });
68 | }
69 | 


--------------------------------------------------------------------------------
/src/io_utils.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{self, prelude::*};
 2 | 
 3 | use crc32fast::Hasher;
 4 | 
 5 | /// Counts how many bytes have been read and calculates a running CRC32
 6 | pub struct CountingCrcReader<R> {
 7 |     inner: R,
 8 |     hasher: Hasher,
 9 |     count: u64,
10 | }
11 | 
12 | impl<R: Read> CountingCrcReader<R> {
13 |     pub fn new(inner: R) -> Self {
14 |         Self {
15 |             inner,
16 |             hasher: Hasher::new(),
17 |             count: 0,
18 |         }
19 |     }
20 | 
21 |     pub fn position(&self) -> u64 {
22 |         self.count
23 |     }
24 | 
25 |     /// Consumes the reader and returns the checksum
26 |     pub fn finalize(self) -> u32 {
27 |         self.hasher.finalize()
28 |     }
29 | }
30 | 
31 | impl<R: Read> Read for CountingCrcReader<R> {
32 |     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
33 |         let res = self.inner.read(buf)?;
34 |         self.count += res as u64;
35 |         self.hasher.update(&buf[..res]);
36 |         Ok(res)
37 |     }
38 | }
39 | 
40 | pub struct CountingCrcWriter<W> {
41 |     inner: W,
42 |     hasher: Hasher,
43 |     count: u64,
44 | }
45 | 
46 | impl<W: Write> CountingCrcWriter<W> {
47 |     pub fn new(inner: W) -> Self {
48 |         Self {
49 |             inner,
50 |             hasher: Hasher::new(),
51 |             count: 0,
52 |         }
53 |     }
54 | 
55 |     pub fn position(&self) -> u64 {
56 |         self.count
57 |     }
58 | 
59 |     pub fn get_mut(&mut self) -> &mut W {
60 |         &mut self.inner
61 |     }
62 | 
63 |     /// Consumes the reader and returns the inner writer and the checksum
64 |     pub fn finalize(self) -> (W, u32) {
65 |         (self.inner, self.hasher.finalize())
66 |     }
67 | }
68 | 
69 | impl<W: Write> Write for CountingCrcWriter<W> {
70 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
71 |         let res = self.inner.write(buf)?;
72 |         self.count += res as u64;
73 |         self.hasher.update(&buf[..res]);
74 |         Ok(res)
75 |     }
76 | 
77 |     fn flush(&mut self) -> io::Result<()> {
78 |         self.inner.flush()
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/examples/mcapcopy/main.rs:
--------------------------------------------------------------------------------
 1 | #[path = "../common/logsetup.rs"]
 2 | mod logsetup;
 3 | 
 4 | use std::{fs, io::BufWriter};
 5 | 
 6 | use anyhow::{Context, Result};
 7 | use camino::{Utf8Path, Utf8PathBuf};
 8 | use clap::Parser;
 9 | use log::*;
10 | use mcap_rs as mcap;
11 | use memmap::Mmap;
12 | 
13 | #[derive(Parser, Debug)]
14 | struct Args {
15 |     /// Verbosity (-v, -vv, -vvv, etc.)
16 |     #[clap(short, long, parse(from_occurrences))]
17 |     verbose: u8,
18 | 
19 |     #[clap(short, long, arg_enum, default_value = "auto")]
20 |     color: logsetup::Color,
21 | 
22 |     mcap: Utf8PathBuf,
23 | }
24 | 
25 | fn map_mcap(p: &Utf8Path) -> Result<Mmap> {
26 |     let fd = fs::File::open(p).context("Couldn't open MCAP file")?;
27 |     unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file")
28 | }
29 | 
30 | fn run() -> Result<()> {
31 |     let args = Args::parse();
32 |     logsetup::init_logger(args.verbose, args.color);
33 | 
34 |     let mapped = map_mcap(&args.mcap)?;
35 | 
36 |     let mut out = mcap::Writer::new(BufWriter::new(fs::File::create("out.mcap")?))?;
37 | 
38 |     for message in mcap::MessageStream::new(&mapped)? {
39 |         let message = message?;
40 |         let ts = message.publish_time;
41 |         info!(
42 |             "{} {} [{}] [{}]...",
43 |             ts,
44 |             message.channel.topic,
45 |             message
46 |                 .channel
47 |                 .schema
48 |                 .as_ref()
49 |                 .map(|s| s.name.as_str())
50 |                 .unwrap_or_default(),
51 |             message
52 |                 .data
53 |                 .iter()
54 |                 .take(10)
55 |                 .map(|b| b.to_string())
56 |                 .collect::<Vec<_>>()
57 |                 .join(" ")
58 |         );
59 | 
60 |         // We can easily take each Message and write it as a quick and dirty example,
61 |         // but in real code, we'd be much better off adding each channel to the writer,
62 |         // then calling `write_to_known_channel()`.
63 |         // This avoids having to rehash the channel (and its schema) on each `write()`
64 |         // to figure out what its ID is.
65 |         out.write(&message)?;
66 |     }
67 |     Ok(())
68 | }
69 | 
70 | fn main() {
71 |     run().unwrap_or_else(|e| {
72 |         error!("{:?}", e);
73 |         std::process::exit(1);
74 |     });
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/metadata.rs:
--------------------------------------------------------------------------------
 1 | mod common;
 2 | 
 3 | use common::*;
 4 | 
 5 | use std::io::BufWriter;
 6 | 
 7 | use anyhow::Result;
 8 | use mcap_rs as mcap;
 9 | use memmap::Mmap;
10 | use tempfile::tempfile;
11 | 
12 | #[test]
13 | fn smoke() -> Result<()> {
14 |     let mapped = map_mcap("tests/references/OneMetadata.mcap")?;
15 |     let metas = mcap::read::LinearReader::new(&mapped)?
16 |         .filter_map(|record| match record.unwrap() {
17 |             mcap::records::Record::Metadata(m) => Some(m),
18 |             _ => None,
19 |         })
20 |         .collect::<Vec<_>>();
21 | 
22 |     assert_eq!(metas.len(), 1);
23 | 
24 |     let expected = mcap::records::Metadata {
25 |         name: String::from("myMetadata"),
26 |         metadata: [(String::from("foo"), String::from("bar"))].into(),
27 |     };
28 | 
29 |     assert_eq!(metas[0], expected);
30 | 
31 |     Ok(())
32 | }
33 | 
34 | #[test]
35 | fn round_trip() -> Result<()> {
36 |     let mapped = map_mcap("tests/references/OneMetadata.mcap")?;
37 |     let metas =
38 |         mcap::read::LinearReader::new(&mapped)?.filter_map(|record| match record.unwrap() {
39 |             mcap::records::Record::Metadata(m) => Some(m),
40 |             _ => None,
41 |         });
42 | 
43 |     let mut tmp = tempfile()?;
44 |     let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?;
45 | 
46 |     for m in metas {
47 |         writer.write_metadata(&m)?;
48 |     }
49 |     drop(writer);
50 | 
51 |     let ours = unsafe { Mmap::map(&tmp) }?;
52 |     let summary = mcap::Summary::read(&ours)?;
53 | 
54 |     let expected_summary = Some(mcap::Summary {
55 |         stats: Some(mcap::records::Statistics {
56 |             metadata_count: 1,
57 |             ..Default::default()
58 |         }),
59 |         metadata_indexes: vec![mcap::records::MetadataIndex {
60 |             offset: 38, // Finicky - depends on the length of the library version string
61 |             length: 41,
62 |             name: String::from("myMetadata"),
63 |         }],
64 |         ..Default::default()
65 |     });
66 |     assert_eq!(summary, expected_summary);
67 | 
68 |     let expected = mcap::records::Metadata {
69 |         name: String::from("myMetadata"),
70 |         metadata: [(String::from("foo"), String::from("bar"))].into(),
71 |     };
72 | 
73 |     assert_eq!(
74 |         mcap::read::metadata(&ours, &summary.unwrap().metadata_indexes[0])?,
75 |         expected
76 |     );
77 | 
78 |     Ok(())
79 | }
80 | 


--------------------------------------------------------------------------------
/examples/recover/main.rs:
--------------------------------------------------------------------------------
 1 | #[path = "../common/logsetup.rs"]
 2 | mod logsetup;
 3 | 
 4 | use std::{fs, io::BufWriter};
 5 | 
 6 | use anyhow::{ensure, Context, Result};
 7 | use camino::{Utf8Path, Utf8PathBuf};
 8 | use clap::Parser;
 9 | use enumset::enum_set;
10 | use log::*;
11 | use mcap_rs as mcap;
12 | use memmap::Mmap;
13 | 
14 | #[derive(Parser, Debug)]
15 | struct Args {
16 |     /// Verbosity (-v, -vv, -vvv, etc.)
17 |     #[clap(short, long, parse(from_occurrences))]
18 |     verbose: u8,
19 | 
20 |     #[clap(short, long, arg_enum, default_value = "auto")]
21 |     color: logsetup::Color,
22 | 
23 |     #[clap(help = "input mcap file")]
24 |     input: Utf8PathBuf,
25 | 
26 |     #[clap(
27 |         short,
28 |         long,
29 |         help = "output mcap file, defaults to <input-file>.recovered.mcap"
30 |     )]
31 |     output: Option<Utf8PathBuf>,
32 | }
33 | 
34 | fn map_mcap(p: &Utf8Path) -> Result<Mmap> {
35 |     let fd = fs::File::open(p).context("Couldn't open MCAP file")?;
36 |     unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file")
37 | }
38 | 
39 | fn make_output_path(input: Utf8PathBuf) -> Result<Utf8PathBuf> {
40 |     use std::str::FromStr;
41 |     let file_stem = input.file_stem().context("no file stem for input path")?;
42 |     let output_path = Utf8PathBuf::from_str(file_stem)?.with_extension("recovered.mcap");
43 |     Ok(output_path)
44 | }
45 | 
46 | fn run() -> Result<()> {
47 |     let args = Args::parse();
48 |     logsetup::init_logger(args.verbose, args.color);
49 |     debug!("{:?}", args);
50 | 
51 |     let mapped = map_mcap(&args.input)?;
52 |     let output_path = args.output.unwrap_or(make_output_path(args.input)?);
53 |     ensure!(
54 |         !output_path.exists(),
55 |         "output path {output_path} already exists"
56 |     );
57 | 
58 |     let mut out = mcap::Writer::new(BufWriter::new(fs::File::create(output_path)?))?;
59 | 
60 |     info!("recovering as many messages as possible...");
61 |     let mut recovered_count = 0;
62 |     for maybe_message in mcap::MessageStream::new_with_options(
63 |         &mapped,
64 |         enum_set!(mcap::read::Options::IgnoreEndMagic),
65 |     )? {
66 |         match maybe_message {
67 |             Ok(message) => {
68 |                 out.write(&message)?;
69 |                 recovered_count += 1;
70 |             }
71 |             Err(err) => {
72 |                 error!("{err} -- stopping");
73 |                 break;
74 |             }
75 |         }
76 |     }
77 |     info!("recovered {} messages", recovered_count);
78 |     Ok(())
79 | }
80 | 
81 | fn main() {
82 |     run().unwrap_or_else(|e| {
83 |         error!("{:?}", e);
84 |         std::process::exit(1);
85 |     });
86 | }
87 | 


--------------------------------------------------------------------------------
/tests/attachment.rs:
--------------------------------------------------------------------------------
 1 | mod common;
 2 | 
 3 | use common::*;
 4 | 
 5 | use std::{borrow::Cow, io::BufWriter};
 6 | 
 7 | use anyhow::Result;
 8 | use mcap_rs as mcap;
 9 | use memmap::Mmap;
10 | use tempfile::tempfile;
11 | 
12 | #[test]
13 | fn smoke() -> Result<()> {
14 |     let mapped = map_mcap("tests/references/OneAttachment.mcap")?;
15 |     let attachments = mcap::read::LinearReader::new(&mapped)?
16 |         .filter_map(|record| match record.unwrap() {
17 |             mcap::records::Record::Attachment { header, data } => Some((header, data)),
18 |             _ => None,
19 |         })
20 |         .collect::<Vec<_>>();
21 | 
22 |     assert_eq!(attachments.len(), 1);
23 | 
24 |     let expected_header = mcap::records::AttachmentHeader {
25 |         log_time: 2,
26 |         create_time: 1,
27 |         name: String::from("myFile"),
28 |         content_type: String::from("application/octet-stream"),
29 |         data_len: 3,
30 |     };
31 | 
32 |     assert_eq!(attachments[0].0, expected_header);
33 |     assert_eq!(attachments[0].1, &[1, 2, 3]);
34 | 
35 |     Ok(())
36 | }
37 | 
38 | #[test]
39 | fn round_trip() -> Result<()> {
40 |     let mapped = map_mcap("tests/references/OneAttachment.mcap")?;
41 |     let attachments =
42 |         mcap::read::LinearReader::new(&mapped)?.filter_map(|record| match record.unwrap() {
43 |             mcap::records::Record::Attachment { header, data } => Some((header, data)),
44 |             _ => None,
45 |         });
46 | 
47 |     let mut tmp = tempfile()?;
48 |     let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?;
49 | 
50 |     for (h, d) in attachments {
51 |         let a = mcap::Attachment {
52 |             log_time: h.log_time,
53 |             create_time: h.create_time,
54 |             content_type: h.content_type,
55 |             name: h.name,
56 |             data: Cow::Borrowed(d),
57 |         };
58 |         writer.attach(&a)?;
59 |     }
60 |     drop(writer);
61 | 
62 |     let ours = unsafe { Mmap::map(&tmp) }?;
63 |     let summary = mcap::Summary::read(&ours)?;
64 | 
65 |     let expected_summary = Some(mcap::Summary {
66 |         stats: Some(mcap::records::Statistics {
67 |             attachment_count: 1,
68 |             ..Default::default()
69 |         }),
70 |         attachment_indexes: vec![mcap::records::AttachmentIndex {
71 |             offset: 38, // Finicky - depends on the length of the library version string
72 |             length: 78,
73 |             log_time: 2,
74 |             create_time: 1,
75 |             data_size: 3,
76 |             name: String::from("myFile"),
77 |             content_type: String::from("application/octet-stream"),
78 |         }],
79 |         ..Default::default()
80 |     });
81 |     assert_eq!(summary, expected_summary);
82 | 
83 |     let expected_attachment = mcap::Attachment {
84 |         log_time: 2,
85 |         create_time: 1,
86 |         name: String::from("myFile"),
87 |         content_type: String::from("application/octet-stream"),
88 |         data: Cow::Borrowed(&[1, 2, 3]),
89 |     };
90 | 
91 |     assert_eq!(
92 |         mcap::read::attachment(&ours, &summary.unwrap().attachment_indexes[0])?,
93 |         expected_attachment
94 |     );
95 | 
96 |     Ok(())
97 | }
98 | 


--------------------------------------------------------------------------------
/tests/message.rs:
--------------------------------------------------------------------------------
  1 | mod common;
  2 | 
  3 | use common::*;
  4 | 
  5 | use std::{borrow::Cow, io::BufWriter, sync::Arc};
  6 | 
  7 | use anyhow::Result;
  8 | use mcap_rs as mcap;
  9 | use memmap::Mmap;
 10 | use tempfile::tempfile;
 11 | 
 12 | #[test]
 13 | fn smoke() -> Result<()> {
 14 |     let mapped = map_mcap("tests/references/OneMessage.mcap")?;
 15 |     let messages = mcap::MessageStream::new(&mapped)?.collect::<mcap::McapResult<Vec<_>>>()?;
 16 | 
 17 |     assert_eq!(messages.len(), 1);
 18 | 
 19 |     let expected = mcap::Message {
 20 |         channel: Arc::new(mcap::Channel {
 21 |             schema: Some(Arc::new(mcap::Schema {
 22 |                 name: String::from("Example"),
 23 |                 encoding: String::from("c"),
 24 |                 data: Cow::Borrowed(&[4, 5, 6]),
 25 |             })),
 26 |             topic: String::from("example"),
 27 |             message_encoding: String::from("a"),
 28 |             metadata: [(String::from("foo"), String::from("bar"))].into(),
 29 |         }),
 30 |         sequence: 10,
 31 |         log_time: 2,
 32 |         publish_time: 1,
 33 |         data: Cow::Borrowed(&[1, 2, 3]),
 34 |     };
 35 | 
 36 |     assert_eq!(messages[0], expected);
 37 | 
 38 |     Ok(())
 39 | }
 40 | 
 41 | #[test]
 42 | fn round_trip() -> Result<()> {
 43 |     let mapped = map_mcap("tests/references/OneMessage.mcap")?;
 44 |     let messages = mcap::MessageStream::new(&mapped)?;
 45 | 
 46 |     let mut tmp = tempfile()?;
 47 |     let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?;
 48 | 
 49 |     for m in messages {
 50 |         writer.write(&m?)?;
 51 |     }
 52 |     drop(writer);
 53 | 
 54 |     let ours = unsafe { Mmap::map(&tmp) }?;
 55 |     let summary = mcap::Summary::read(&ours)?.unwrap();
 56 | 
 57 |     let schema = Arc::new(mcap::Schema {
 58 |         name: String::from("Example"),
 59 |         encoding: String::from("c"),
 60 |         data: Cow::Borrowed(&[4, 5, 6]),
 61 |     });
 62 | 
 63 |     let channel = Arc::new(mcap::Channel {
 64 |         schema: Some(schema.clone()),
 65 |         topic: String::from("example"),
 66 |         message_encoding: String::from("a"),
 67 |         metadata: [(String::from("foo"), String::from("bar"))].into(),
 68 |     });
 69 | 
 70 |     let expected_summary = mcap::Summary {
 71 |         stats: Some(mcap::records::Statistics {
 72 |             message_count: 1,
 73 |             schema_count: 1,
 74 |             channel_count: 1,
 75 |             chunk_count: 1,
 76 |             message_start_time: 2,
 77 |             message_end_time: 2,
 78 |             channel_message_counts: [(0, 1)].into(),
 79 |             ..Default::default()
 80 |         }),
 81 |         channels: [(0, channel.clone())].into(),
 82 |         schemas: [(1, schema.clone())].into(),
 83 |         ..Default::default()
 84 |     };
 85 |     // Don't assert the chunk indexes - their size is at the whim of compressors.
 86 |     assert_eq!(summary.stats, expected_summary.stats);
 87 |     assert_eq!(summary.channels, expected_summary.channels);
 88 |     assert_eq!(summary.schemas, expected_summary.schemas);
 89 |     assert_eq!(
 90 |         summary.attachment_indexes,
 91 |         expected_summary.attachment_indexes
 92 |     );
 93 |     assert_eq!(summary.metadata_indexes, expected_summary.metadata_indexes);
 94 | 
 95 |     let expected = mcap::Message {
 96 |         channel,
 97 |         sequence: 10,
 98 |         log_time: 2,
 99 |         publish_time: 1,
100 |         data: Cow::Borrowed(&[1, 2, 3]),
101 |     };
102 | 
103 |     assert_eq!(
104 |         mcap::MessageStream::new(&ours)?.collect::<mcap::McapResult<Vec<_>>>()?,
105 |         &[expected]
106 |     );
107 | 
108 |     Ok(())
109 | }
110 | 


--------------------------------------------------------------------------------
/tests/round_trip.rs:
--------------------------------------------------------------------------------
  1 | mod common;
  2 | 
  3 | use common::*;
  4 | 
  5 | use std::io::BufWriter;
  6 | 
  7 | use anyhow::Result;
  8 | use itertools::Itertools;
  9 | use mcap_rs as mcap;
 10 | use memmap::Mmap;
 11 | use rayon::prelude::*;
 12 | use tempfile::tempfile;
 13 | 
 14 | #[test]
 15 | fn demo_round_trip() -> Result<()> {
 16 |     use mcap::records::op;
 17 | 
 18 |     let mapped = map_mcap("tests/references/demo.mcap")?;
 19 | 
 20 |     let messages = mcap::MessageStream::new(&mapped)?;
 21 | 
 22 |     let mut tmp = tempfile()?;
 23 |     let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?;
 24 | 
 25 |     for m in messages {
 26 |         // IRL, we'd add channels, then write messages to known channels,
 27 |         // which skips having to re-hash the channel and its schema each time.
 28 |         // But since here we'd need to do the same anyways...
 29 |         writer.write(&m?)?;
 30 |     }
 31 |     drop(writer);
 32 | 
 33 |     let ours = unsafe { Mmap::map(&tmp) }?;
 34 | 
 35 |     // Compare the message stream of our MCAP to the reference one.
 36 |     for (theirs, ours) in
 37 |         mcap::MessageStream::new(&mapped)?.zip_eq(mcap::MessageStream::new(&ours)?)
 38 |     {
 39 |         assert_eq!(ours?, theirs?)
 40 |     }
 41 | 
 42 |     // We don't use them, but verify the summary offsets.
 43 |     let footer = mcap::read::footer(&ours)?;
 44 |     assert_ne!(footer.summary_offset_start, 0);
 45 | 
 46 |     const FOOTER_LEN: usize = 20 + 8 + 1; // 20 bytes + 8 byte len + 1 byte opcode
 47 |     let summary_offset_end = ours.len() - FOOTER_LEN - mcap::MAGIC.len();
 48 | 
 49 |     for (i, rec) in mcap::read::LinearReader::sans_magic(
 50 |         &ours[footer.summary_offset_start as usize..summary_offset_end],
 51 |     )
 52 |     .enumerate()
 53 |     {
 54 |         let offset = match rec {
 55 |             Ok(mcap::records::Record::SummaryOffset(sos)) => sos,
 56 |             wut => panic!("Expected summary offset, got {:?}", wut),
 57 |         };
 58 | 
 59 |         // We expect these offsets in this (arbitrary) order:
 60 |         match (i, offset.group_opcode) {
 61 |             (0, op::SCHEMA) => (),
 62 |             (1, op::CHANNEL) => (),
 63 |             (2, op::CHUNK_INDEX) => (),
 64 |             (3, op::STATISTICS) => (),
 65 |             _ => panic!("Summary offset {i} was {offset:?}"),
 66 |         };
 67 | 
 68 |         // We should be able to read each group from start to finish,
 69 |         // and the records should be the expected type.
 70 |         let group_start = offset.group_start as usize;
 71 |         let group_end = (offset.group_start + offset.group_length) as usize;
 72 |         for group_rec in mcap::read::LinearReader::sans_magic(&ours[group_start..group_end]) {
 73 |             match group_rec {
 74 |                 Ok(rec) => assert_eq!(offset.group_opcode, rec.opcode()),
 75 |                 wut => panic!("Expected op {}, got {:?}", offset.group_opcode, wut),
 76 |             }
 77 |         }
 78 |     }
 79 | 
 80 |     // Verify the summary and its connectivity.
 81 | 
 82 |     let summary = mcap::Summary::read(&ours)?.unwrap();
 83 |     assert!(summary.attachment_indexes.is_empty());
 84 |     assert!(summary.metadata_indexes.is_empty());
 85 | 
 86 |     // EZ mode: Streamed chunks should match up with a file-level message stream.
 87 |     for (whole, by_chunk) in mcap::MessageStream::new(&ours)?.zip_eq(
 88 |         summary
 89 |             .chunk_indexes
 90 |             .iter()
 91 |             .flat_map(|ci| summary.stream_chunk(&ours, ci).unwrap()),
 92 |     ) {
 93 |         assert_eq!(whole?, by_chunk?);
 94 |     }
 95 | 
 96 |     // Hard mode: randomly access every message in the MCAP.
 97 |     // Yes, this is dumb and O(n^2).
 98 |     let mut messages = Vec::new();
 99 | 
100 |     for ci in &summary.chunk_indexes {
101 |         let mut offsets_and_messages = summary
102 |             .read_message_indexes(&ours, ci)
103 |             .unwrap()
104 |             // At least parallelize the dumb.
105 |             .into_par_iter()
106 |             .flat_map(|(_k, v)| v)
107 |             .map(|e| (e.offset, summary.seek_message(&ours, ci, &e).unwrap()))
108 |             .collect::<Vec<(u64, mcap::Message)>>();
109 | 
110 |         offsets_and_messages.sort_unstable_by_key(|im| im.0);
111 | 
112 |         for om in offsets_and_messages {
113 |             messages.push(om.1);
114 |         }
115 |     }
116 | 
117 |     for (streamed, seeked) in mcap::MessageStream::new(&ours)?.zip_eq(messages.into_iter()) {
118 |         assert_eq!(streamed?, seeked);
119 |     }
120 | 
121 |     Ok(())
122 | }
123 | 
124 | #[test]
125 | fn demo_random_chunk_access() -> Result<()> {
126 |     let mapped = map_mcap("tests/references/demo.mcap")?;
127 | 
128 |     let summary = mcap::Summary::read(&mapped)?.unwrap();
129 | 
130 |     // Random access of the second chunk should match the stream of the whole file.
131 |     let messages_in_first_chunk: usize = summary
132 |         .read_message_indexes(&mapped, &summary.chunk_indexes[0])?
133 |         .values()
134 |         .map(|entries| entries.len())
135 |         .sum();
136 |     let messages_in_second_chunk: usize = summary
137 |         .read_message_indexes(&mapped, &summary.chunk_indexes[1])?
138 |         .values()
139 |         .map(|entries| entries.len())
140 |         .sum();
141 | 
142 |     for (whole, random) in mcap::MessageStream::new(&mapped)?
143 |         .skip(messages_in_first_chunk)
144 |         .take(messages_in_second_chunk)
145 |         .zip_eq(summary.stream_chunk(&mapped, &summary.chunk_indexes[1])?)
146 |     {
147 |         assert_eq!(whole?, random?);
148 |     }
149 | 
150 |     // Let's poke around the message indexes
151 |     let mut index_entries = summary
152 |         .read_message_indexes(&mapped, &summary.chunk_indexes[1])?
153 |         .values()
154 |         .flatten()
155 |         .copied()
156 |         .collect::<Vec<mcap::records::MessageIndexEntry>>();
157 | 
158 |     index_entries.sort_unstable_by_key(|e| e.offset);
159 | 
160 |     // Do a big dumb n^2 seek of each message (dear god, don't ever actually do this)
161 |     for (entry, message) in index_entries
162 |         .iter()
163 |         .zip_eq(summary.stream_chunk(&mapped, &summary.chunk_indexes[1])?)
164 |     {
165 |         let seeked = summary.seek_message(&mapped, &summary.chunk_indexes[1], entry)?;
166 |         assert_eq!(seeked, message?);
167 |     }
168 | 
169 |     Ok(())
170 | }
171 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! A library for manipulating [Foxglove MCAP](https://github.com/foxglove/mcap) files,
  2 | //! both reading:
  3 | //!
  4 | //! ```no_run
  5 | //! use std::fs;
  6 | //!
  7 | //! use anyhow::{Context, Result};
  8 | //! use camino::Utf8Path;
  9 | //! use memmap::Mmap;
 10 | //!
 11 | //! fn map_mcap<P: AsRef<Utf8Path>>(p: P) -> Result<Mmap> {
 12 | //!     let fd = fs::File::open(p.as_ref()).context("Couldn't open MCAP file")?;
 13 | //!     unsafe { Mmap::map(&fd) }.context("Couldn't map MCAP file")
 14 | //! }
 15 | //!
 16 | //! fn read_it() -> Result<()> {
 17 | //!     let mapped = map_mcap("in.mcap")?;
 18 | //!
 19 | //!     for message in mcap_rs::MessageStream::new(&mapped)? {
 20 | //!         println!("{:?}", message?);
 21 | //!         // Or whatever else you'd like to do...
 22 | //!     }
 23 | //!     Ok(())
 24 | //! }
 25 | //! ```
 26 | //! or writing:
 27 | //! ```no_run
 28 | //! use std::{collections::BTreeMap, fs, io::BufWriter};
 29 | //!
 30 | //! use anyhow::Result;
 31 | //!
 32 | //! use mcap_rs::{Channel, records::MessageHeader, Writer};
 33 | //!
 34 | //! fn write_it() -> Result<()> {
 35 | //!     // To set the profile or compression options, see mcap_rs::WriteOptions.
 36 | //!     let mut out = Writer::new(
 37 | //!         BufWriter::new(fs::File::create("out.mcap")?)
 38 | //!     )?;
 39 | //!
 40 | //!     // Channels and schemas are automatically assigned ID as they're serialized,
 41 | //!     // and automatically deduplicated with `Arc` when deserialized.
 42 | //!     let my_channel = Channel {
 43 | //!         topic: String::from("cool stuff"),
 44 | //!         schema: None,
 45 | //!         message_encoding: String::from("application/octet-stream"),
 46 | //!         metadata: BTreeMap::default()
 47 | //!     };
 48 | //!
 49 | //!     let channel_id = out.add_channel(&my_channel)?;
 50 | //!
 51 | //!     out.write_to_known_channel(
 52 | //!         &MessageHeader {
 53 | //!             channel_id,
 54 | //!             sequence: 25,
 55 | //!             log_time: 6,
 56 | //!             publish_time: 24
 57 | //!         },
 58 | //!         &[1, 2, 3]
 59 | //!     )?;
 60 | //!     out.write_to_known_channel(
 61 | //!         &MessageHeader {
 62 | //!             channel_id,
 63 | //!             sequence: 32,
 64 | //!             log_time: 23,
 65 | //!             publish_time: 25
 66 | //!         },
 67 | //!         &[3, 4, 5]
 68 | //!     )?;
 69 | //!
 70 | //!     out.finish()?;
 71 | //!
 72 | //!     Ok(())
 73 | //! }
 74 | //! ```
 75 | 
 76 | pub mod read;
 77 | pub mod records;
 78 | pub mod write;
 79 | 
 80 | mod io_utils;
 81 | 
 82 | use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc};
 83 | 
 84 | use thiserror::Error;
 85 | 
 86 | #[derive(Debug, Error)]
 87 | pub enum McapError {
 88 |     #[error("Bad magic number")]
 89 |     BadMagic,
 90 |     #[error("Footer record couldn't be found at the end of the file, before the magic bytes")]
 91 |     BadFooter,
 92 |     #[error("Attachment CRC failed (expeted {saved:08X}, got {calculated:08X}")]
 93 |     BadAttachmentCrc { saved: u32, calculated: u32 },
 94 |     #[error("Chunk CRC failed (expected {saved:08X}, got {calculated:08X}")]
 95 |     BadChunkCrc { saved: u32, calculated: u32 },
 96 |     #[error("Data section CRC failed (expected {saved:08X}, got {calculated:08X})")]
 97 |     BadDataCrc { saved: u32, calculated: u32 },
 98 |     #[error("Summary section CRC failed (expected {saved:08X}, got {calculated:08X})")]
 99 |     BadSummaryCrc { saved: u32, calculated: u32 },
100 |     #[error("Index offset and length didn't point to the expected record type")]
101 |     BadIndex,
102 |     #[error("Channel `{0}` has mulitple records that don't match.")]
103 |     ConflictingChannels(String),
104 |     #[error("Schema `{0}` has mulitple records that don't match.")]
105 |     ConflictingSchemas(String),
106 |     #[error("Record parse failed")]
107 |     Parse(#[from] binrw::Error),
108 |     #[error("I/O error from writing, or reading a compression stream")]
109 |     Io(#[from] std::io::Error),
110 |     #[error("Schema has an ID of 0")]
111 |     InvalidSchemaId,
112 |     #[error("MCAP file ended in the middle of a record")]
113 |     UnexpectedEof,
114 |     #[error("Chunk ended in the middle of a record")]
115 |     UnexpectedEoc,
116 |     #[error("Message {0} referenced unknown channel {1}")]
117 |     UnknownChannel(u32, u16),
118 |     #[error("Channel `{0}` referenced unknown schema {1}")]
119 |     UnknownSchema(String, u16),
120 |     #[error("Found record with opcode {0:02X} in a chunk")]
121 |     UnexpectedChunkRecord(u8),
122 |     #[error("Unsupported compression format `{0}`")]
123 |     UnsupportedCompression(String),
124 | }
125 | 
126 | pub type McapResult<T> = Result<T, McapError>;
127 | 
128 | /// Magic bytes for the MCAP format
129 | pub const MAGIC: &[u8] = &[0x89, b'M', b'C', b'A', b'P', 0x30, b'\r', b'\n'];
130 | 
131 | /// Compression options for chunks of channels, schemas, and messages in an MCAP file
132 | #[derive(Debug, Copy, Clone, Default)]
133 | pub enum Compression {
134 |     #[default]
135 |     Zstd,
136 |     Lz4,
137 | }
138 | 
139 | /// Describes a schema used by one or more [Channel]s in an MCAP file
140 | ///
141 | /// The [`CoW`](std::borrow::Cow) can either borrow directly from the mapped file,
142 | /// or hold its own buffer if it was decompressed from a chunk.
143 | #[derive(Clone, PartialEq, Eq, Hash)]
144 | pub struct Schema<'a> {
145 |     pub name: String,
146 |     pub encoding: String,
147 |     pub data: Cow<'a, [u8]>,
148 | }
149 | 
150 | impl fmt::Debug for Schema<'_> {
151 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152 |         f.debug_struct("Schema")
153 |             .field("name", &self.name)
154 |             .field("encoding", &self.encoding)
155 |             .finish_non_exhaustive()
156 |     }
157 | }
158 | 
159 | /// Describes a channel which [Message]s are published to in an MCAP file
160 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
161 | pub struct Channel<'a> {
162 |     pub topic: String,
163 |     pub schema: Option<Arc<Schema<'a>>>,
164 | 
165 |     pub message_encoding: String,
166 |     pub metadata: BTreeMap<String, String>,
167 | }
168 | 
169 | /// An event in an MCAP file, published to a [Channel]
170 | ///
171 | /// The [`CoW`](std::borrow::Cow) can either borrow directly from the mapped file,
172 | /// or hold its own buffer if it was decompressed from a chunk.
173 | #[derive(Debug, Clone, PartialEq, Eq)]
174 | pub struct Message<'a> {
175 |     pub channel: Arc<Channel<'a>>,
176 |     pub sequence: u32,
177 |     pub log_time: u64,
178 |     pub publish_time: u64,
179 |     pub data: Cow<'a, [u8]>,
180 | }
181 | 
182 | /// An attachment and its metadata in an MCAP file
183 | #[derive(Debug, PartialEq, Eq)]
184 | pub struct Attachment<'a> {
185 |     pub log_time: u64,
186 |     pub create_time: u64,
187 |     pub name: String,
188 |     pub content_type: String,
189 |     pub data: Cow<'a, [u8]>,
190 | }
191 | 
192 | pub use read::{MessageStream, Summary};
193 | pub use write::{WriteOptions, Writer};
194 | 


--------------------------------------------------------------------------------
/src/records.rs:
--------------------------------------------------------------------------------
  1 | //! Raw records parsed from an MCAP file
  2 | //!
  3 | //! See <https://github.com/foxglove/mcap/tree/main/docs/specification>
  4 | //!
  5 | //! You probably want to user higher-level interfaces, like
  6 | //! [`Message`](crate::Message), [`Channel`](crate::Channel), and [`Schema`](crate::Schema),
  7 | //! read from iterators like [`MesssageStream`](crate::MessageStream).
  8 | 
  9 | use binrw::io::{Read, Seek, Write};
 10 | use binrw::*;
 11 | 
 12 | use std::{
 13 |     borrow::Cow,
 14 |     collections::BTreeMap,
 15 |     time::{Duration, SystemTime, UNIX_EPOCH},
 16 | };
 17 | 
 18 | /// Opcodes for MCAP file records.
 19 | ///
 20 | /// "Records are identified by a single-byte opcode.
 21 | /// Record opcodes in the range 0x01-0x7F are reserved for future MCAP format usage.
 22 | /// 0x80-0xFF are reserved for application extensions and user proposals."
 23 | pub mod op {
 24 |     pub const HEADER: u8 = 0x01;
 25 |     pub const FOOTER: u8 = 0x02;
 26 |     pub const SCHEMA: u8 = 0x03;
 27 |     pub const CHANNEL: u8 = 0x04;
 28 |     pub const MESSAGE: u8 = 0x05;
 29 |     pub const CHUNK: u8 = 0x06;
 30 |     pub const MESSAGE_INDEX: u8 = 0x07;
 31 |     pub const CHUNK_INDEX: u8 = 0x08;
 32 |     pub const ATTACHMENT: u8 = 0x09;
 33 |     pub const ATTACHMENT_INDEX: u8 = 0x0A;
 34 |     pub const STATISTICS: u8 = 0x0B;
 35 |     pub const METADATA: u8 = 0x0C;
 36 |     pub const METADATA_INDEX: u8 = 0x0D;
 37 |     pub const SUMMARY_OFFSET: u8 = 0x0E;
 38 |     pub const END_OF_DATA: u8 = 0x0F;
 39 | }
 40 | 
 41 | /// A raw record from an MCAP file.
 42 | ///
 43 | /// For records with large slices of binary data (schemas, messages, chunks...),
 44 | /// we use a [`CoW`](std::borrow::Cow) that can either borrow directly from the mapped file,
 45 | /// or hold its own buffer if it was decompressed from a chunk.
 46 | #[derive(Debug)]
 47 | pub enum Record<'a> {
 48 |     Header(Header),
 49 |     Footer(Footer),
 50 |     Schema {
 51 |         header: SchemaHeader,
 52 |         data: Cow<'a, [u8]>,
 53 |     },
 54 |     Channel(Channel),
 55 |     Message {
 56 |         header: MessageHeader,
 57 |         data: Cow<'a, [u8]>,
 58 |     },
 59 |     Chunk {
 60 |         header: ChunkHeader,
 61 |         data: &'a [u8],
 62 |     },
 63 |     MessageIndex(MessageIndex),
 64 |     ChunkIndex(ChunkIndex),
 65 |     Attachment {
 66 |         header: AttachmentHeader,
 67 |         data: &'a [u8],
 68 |     },
 69 |     AttachmentIndex(AttachmentIndex),
 70 |     Statistics(Statistics),
 71 |     Metadata(Metadata),
 72 |     MetadataIndex(MetadataIndex),
 73 |     SummaryOffset(SummaryOffset),
 74 |     EndOfData(EndOfData),
 75 |     /// A record of unknown type
 76 |     Unknown {
 77 |         opcode: u8,
 78 |         data: Cow<'a, [u8]>,
 79 |     },
 80 | }
 81 | 
 82 | impl Record<'_> {
 83 |     pub fn opcode(&self) -> u8 {
 84 |         match &self {
 85 |             Record::Header(_) => op::HEADER,
 86 |             Record::Footer(_) => op::FOOTER,
 87 |             Record::Schema { .. } => op::SCHEMA,
 88 |             Record::Channel(_) => op::CHANNEL,
 89 |             Record::Message { .. } => op::MESSAGE,
 90 |             Record::Chunk { .. } => op::CHUNK,
 91 |             Record::MessageIndex(_) => op::MESSAGE_INDEX,
 92 |             Record::ChunkIndex(_) => op::CHUNK_INDEX,
 93 |             Record::Attachment { .. } => op::ATTACHMENT,
 94 |             Record::AttachmentIndex(_) => op::ATTACHMENT_INDEX,
 95 |             Record::Statistics(_) => op::STATISTICS,
 96 |             Record::Metadata(_) => op::METADATA,
 97 |             Record::MetadataIndex(_) => op::METADATA_INDEX,
 98 |             Record::SummaryOffset(_) => op::SUMMARY_OFFSET,
 99 |             Record::EndOfData(_) => op::END_OF_DATA,
100 |             Record::Unknown { opcode, .. } => *opcode,
101 |         }
102 |     }
103 | }
104 | 
105 | #[binrw]
106 | #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
107 | struct McapString {
108 |     #[br(temp)]
109 |     #[bw(calc = inner.len() as u32)]
110 |     pub len: u32,
111 | 
112 |     #[br(count = len, try_map = String::from_utf8)]
113 |     #[bw(map = |s| s.as_bytes())]
114 |     pub inner: String,
115 | }
116 | 
117 | /// Avoids taking a copy to turn a String to an McapString for serialization
118 | fn write_string<W: binrw::io::Write + binrw::io::Seek>(
119 |     s: &String,
120 |     w: &mut W,
121 |     opts: &WriteOptions,
122 |     args: (),
123 | ) -> BinResult<()> {
124 |     (s.len() as u32).write_options(w, opts, args)?;
125 |     (s.as_bytes()).write_options(w, opts, args)?;
126 |     Ok(())
127 | }
128 | 
129 | fn parse_vec<T: binrw::BinRead<Args = ()>, R: Read + Seek>(
130 |     reader: &mut R,
131 |     ro: &ReadOptions,
132 |     args: (),
133 | ) -> BinResult<Vec<T>> {
134 |     let mut parsed = Vec::new();
135 | 
136 |     // Length of the map in BYTES, not records.
137 |     let byte_len: u32 = BinRead::read_options(reader, ro, args)?;
138 |     let pos = reader.stream_position()?;
139 | 
140 |     while (reader.stream_position()? - pos) < byte_len as u64 {
141 |         parsed.push(T::read_options(reader, ro, args)?);
142 |     }
143 | 
144 |     Ok(parsed)
145 | }
146 | 
147 | #[allow(clippy::ptr_arg)] // needed to match binrw macros
148 | fn write_vec<W: binrw::io::Write + binrw::io::Seek, T: binrw::BinWrite<Args = ()>>(
149 |     v: &Vec<T>,
150 |     w: &mut W,
151 |     opts: &WriteOptions,
152 |     args: (),
153 | ) -> BinResult<()> {
154 |     use std::io::SeekFrom;
155 | 
156 |     let start = w.stream_position()?;
157 |     (!0u32).write_options(w, opts, args)?; // Revisit...
158 |     for e in v.iter() {
159 |         e.write_options(w, opts, args)?;
160 |     }
161 |     let end = w.stream_position()?;
162 |     let data_len = end - start - 4;
163 |     w.seek(SeekFrom::Start(start))?;
164 |     (data_len as u32).write_options(w, opts, args)?;
165 |     assert_eq!(w.seek(SeekFrom::End(0))?, end);
166 |     Ok(())
167 | }
168 | 
169 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
170 | pub struct Header {
171 |     #[br(map = |s: McapString| s.inner )]
172 |     #[bw(write_with = write_string)]
173 |     pub profile: String,
174 | 
175 |     #[br(map = |s: McapString| s.inner )]
176 |     #[bw(write_with = write_string)]
177 |     pub library: String,
178 | }
179 | 
180 | #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)]
181 | pub struct Footer {
182 |     pub summary_start: u64,
183 |     pub summary_offset_start: u64,
184 |     pub summary_crc: u32,
185 | }
186 | 
187 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
188 | pub struct SchemaHeader {
189 |     pub id: u16,
190 | 
191 |     #[br(map = |s: McapString| s.inner )]
192 |     #[bw(write_with = write_string)]
193 |     pub name: String,
194 | 
195 |     #[br(map = |s: McapString| s.inner )]
196 |     #[bw(write_with = write_string)]
197 |     pub encoding: String,
198 | 
199 |     pub data_len: u32,
200 | }
201 | 
202 | fn parse_string_map<R: Read + Seek>(
203 |     reader: &mut R,
204 |     ro: &ReadOptions,
205 |     args: (),
206 | ) -> BinResult<BTreeMap<String, String>> {
207 |     let mut parsed = BTreeMap::new();
208 | 
209 |     // Length of the map in BYTES, not records.
210 |     let byte_len: u32 = BinRead::read_options(reader, ro, args)?;
211 |     let pos = reader.stream_position()?;
212 | 
213 |     while (reader.stream_position()? - pos) < byte_len as u64 {
214 |         let k = McapString::read_options(reader, ro, args)?;
215 |         let v = McapString::read_options(reader, ro, args)?;
216 |         if let Some(_prev) = parsed.insert(k.inner, v.inner) {
217 |             return Err(binrw::Error::Custom {
218 |                 pos,
219 |                 err: Box::new("Duplicate keys in map"),
220 |             });
221 |         }
222 |     }
223 | 
224 |     Ok(parsed)
225 | }
226 | 
227 | fn write_string_map<W: Write + Seek>(
228 |     s: &BTreeMap<String, String>,
229 |     w: &mut W,
230 |     opts: &WriteOptions,
231 |     args: (),
232 | ) -> BinResult<()> {
233 |     // Ugh: figure out total number of bytes to write:
234 |     let mut byte_len = 0;
235 |     for (k, v) in s {
236 |         byte_len += 8; // Four bytes each for lengths of key and value
237 |         byte_len += k.len();
238 |         byte_len += v.len();
239 |     }
240 | 
241 |     (byte_len as u32).write_options(w, opts, args)?;
242 |     let pos = w.stream_position()?;
243 | 
244 |     for (k, v) in s {
245 |         write_string(k, w, opts, args)?;
246 |         write_string(v, w, opts, args)?;
247 |     }
248 |     assert_eq!(w.stream_position()?, pos + byte_len as u64);
249 |     Ok(())
250 | }
251 | 
252 | fn write_int_map<K: BinWrite<Args = ()>, V: BinWrite<Args = ()>, W: Write + Seek>(
253 |     s: &BTreeMap<K, V>,
254 |     w: &mut W,
255 |     opts: &WriteOptions,
256 |     args: (),
257 | ) -> BinResult<()> {
258 |     // Ugh: figure out total number of bytes to write:
259 |     let mut byte_len = 0;
260 |     for _ in s.values() {
261 |         // Hack: We're assuming serialized size of the value is its in-memory size.
262 |         // For ints of all flavors, this should be true.
263 |         byte_len += core::mem::size_of::<K>();
264 |         byte_len += core::mem::size_of::<V>();
265 |     }
266 | 
267 |     (byte_len as u32).write_options(w, opts, args)?;
268 |     let pos = w.stream_position()?;
269 | 
270 |     for (k, v) in s {
271 |         k.write_options(w, opts, args)?;
272 |         v.write_options(w, opts, args)?;
273 |     }
274 |     assert_eq!(w.stream_position()?, pos + byte_len as u64);
275 |     Ok(())
276 | }
277 | 
278 | fn parse_int_map<K, V, R>(reader: &mut R, ro: &ReadOptions, args: ()) -> BinResult<BTreeMap<K, V>>
279 | where
280 |     K: BinRead<Args = ()> + std::cmp::Ord,
281 |     V: BinRead<Args = ()>,
282 |     R: Read + Seek,
283 | {
284 |     let mut parsed = BTreeMap::new();
285 | 
286 |     // Length of the map in BYTES, not records.
287 |     let byte_len: u32 = BinRead::read_options(reader, ro, args)?;
288 |     let pos = reader.stream_position()?;
289 | 
290 |     while (reader.stream_position()? - pos) < byte_len as u64 {
291 |         let k = K::read_options(reader, ro, args)?;
292 |         let v = V::read_options(reader, ro, args)?;
293 |         if let Some(_prev) = parsed.insert(k, v) {
294 |             return Err(binrw::Error::Custom {
295 |                 pos,
296 |                 err: Box::new("Duplicate keys in map"),
297 |             });
298 |         }
299 |     }
300 | 
301 |     Ok(parsed)
302 | }
303 | 
304 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
305 | pub struct Channel {
306 |     pub id: u16,
307 |     pub schema_id: u16,
308 | 
309 |     #[br(map = |s: McapString| s.inner )]
310 |     #[bw(write_with = write_string)]
311 |     pub topic: String,
312 | 
313 |     #[br(map = |s: McapString| s.inner )]
314 |     #[bw(write_with = write_string)]
315 |     pub message_encoding: String,
316 | 
317 |     #[br(parse_with = parse_string_map)]
318 |     #[bw(write_with = write_string_map)]
319 |     pub metadata: BTreeMap<String, String>,
320 | }
321 | 
322 | pub fn system_time_to_nanos(d: &SystemTime) -> u64 {
323 |     let ns = d.duration_since(UNIX_EPOCH).unwrap().as_nanos();
324 |     assert!(ns <= u64::MAX as u128);
325 |     ns as u64
326 | }
327 | 
328 | pub fn nanos_to_system_time(n: u64) -> SystemTime {
329 |     UNIX_EPOCH + Duration::from_nanos(n)
330 | }
331 | 
332 | #[derive(Debug, Copy, Clone, Eq, PartialEq, BinRead, BinWrite)]
333 | pub struct MessageHeader {
334 |     pub channel_id: u16,
335 |     pub sequence: u32,
336 | 
337 |     pub log_time: u64,
338 | 
339 |     pub publish_time: u64,
340 | }
341 | 
342 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
343 | pub struct ChunkHeader {
344 |     pub message_start_time: u64,
345 | 
346 |     pub message_end_time: u64,
347 | 
348 |     pub uncompressed_size: u64,
349 | 
350 |     pub uncompressed_crc: u32,
351 | 
352 |     #[br(map = |s: McapString| s.inner )]
353 |     #[bw(write_with = write_string)]
354 |     pub compression: String,
355 | 
356 |     pub compressed_size: u64,
357 | }
358 | 
359 | #[derive(Debug, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)]
360 | pub struct MessageIndexEntry {
361 |     pub log_time: u64,
362 | 
363 |     pub offset: u64,
364 | }
365 | 
366 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
367 | pub struct MessageIndex {
368 |     pub channel_id: u16,
369 | 
370 |     #[br(parse_with = parse_vec)]
371 |     #[bw(write_with = write_vec)]
372 |     pub records: Vec<MessageIndexEntry>,
373 | }
374 | 
375 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
376 | pub struct ChunkIndex {
377 |     pub message_start_time: u64,
378 | 
379 |     pub message_end_time: u64,
380 | 
381 |     pub chunk_start_offset: u64,
382 | 
383 |     pub chunk_length: u64,
384 | 
385 |     #[br(parse_with = parse_int_map)]
386 |     #[bw(write_with = write_int_map)]
387 |     pub message_index_offsets: BTreeMap<u16, u64>,
388 | 
389 |     pub message_index_length: u64,
390 | 
391 |     #[br(map = |s: McapString| s.inner )]
392 |     #[bw(write_with = write_string)]
393 |     pub compression: String,
394 | 
395 |     pub compressed_size: u64,
396 | 
397 |     pub uncompressed_size: u64,
398 | }
399 | 
400 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
401 | pub struct AttachmentHeader {
402 |     pub log_time: u64,
403 | 
404 |     pub create_time: u64,
405 | 
406 |     #[br(map = |s: McapString| s.inner )]
407 |     #[bw(write_with = write_string)]
408 |     pub name: String,
409 | 
410 |     #[br(map = |s: McapString| s.inner )]
411 |     #[bw(write_with = write_string)]
412 |     pub content_type: String,
413 | 
414 |     pub data_len: u64,
415 | }
416 | 
417 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
418 | pub struct AttachmentIndex {
419 |     pub offset: u64,
420 | 
421 |     pub length: u64,
422 | 
423 |     pub log_time: u64,
424 | 
425 |     pub create_time: u64,
426 | 
427 |     pub data_size: u64,
428 | 
429 |     #[br(map = |s: McapString| s.inner )]
430 |     #[bw(write_with = write_string)]
431 |     pub name: String,
432 | 
433 |     #[br(map = |s: McapString| s.inner )]
434 |     #[bw(write_with = write_string)]
435 |     pub content_type: String,
436 | }
437 | 
438 | #[derive(Debug, Default, Clone, Eq, PartialEq, BinRead, BinWrite)]
439 | pub struct Statistics {
440 |     pub message_count: u64,
441 |     pub schema_count: u16,
442 |     pub channel_count: u32,
443 |     pub attachment_count: u32,
444 |     pub metadata_count: u32,
445 |     pub chunk_count: u32,
446 | 
447 |     pub message_start_time: u64,
448 | 
449 |     pub message_end_time: u64,
450 | 
451 |     #[br(parse_with = parse_int_map)]
452 |     #[bw(write_with = write_int_map)]
453 |     pub channel_message_counts: BTreeMap<u16, u64>,
454 | }
455 | 
456 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
457 | pub struct Metadata {
458 |     #[br(map = |s: McapString| s.inner )]
459 |     #[bw(write_with = write_string)]
460 |     pub name: String,
461 | 
462 |     #[br(parse_with = parse_string_map)]
463 |     #[bw(write_with = write_string_map)]
464 |     pub metadata: BTreeMap<String, String>,
465 | }
466 | 
467 | #[derive(Debug, Clone, Eq, PartialEq, BinRead, BinWrite)]
468 | pub struct MetadataIndex {
469 |     pub offset: u64,
470 | 
471 |     pub length: u64,
472 | 
473 |     #[br(map = |s: McapString| s.inner )]
474 |     #[bw(write_with = write_string)]
475 |     pub name: String,
476 | }
477 | 
478 | #[derive(Debug, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)]
479 | pub struct SummaryOffset {
480 |     pub group_opcode: u8,
481 |     pub group_start: u64,
482 |     pub group_length: u64,
483 | }
484 | 
485 | #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, BinRead, BinWrite)]
486 | pub struct EndOfData {
487 |     pub data_section_crc: u32,
488 | }
489 | 
490 | #[cfg(test)]
491 | mod tests {
492 |     use super::*;
493 |     use std::io::Cursor;
494 | 
495 |     #[test]
496 |     fn string_parse() {
497 |         let ms: McapString = Cursor::new(b"\x04\0\0\0abcd").read_le().unwrap();
498 |         assert_eq!(
499 |             ms,
500 |             McapString {
501 |                 inner: String::from("abcd")
502 |             }
503 |         );
504 | 
505 |         assert!(Cursor::new(b"\x05\0\0\0abcd")
506 |             .read_le::<McapString>()
507 |             .is_err());
508 | 
509 |         let mut written = Vec::new();
510 |         Cursor::new(&mut written)
511 |             .write_le(&McapString {
512 |                 inner: String::from("hullo"),
513 |             })
514 |             .unwrap();
515 |         assert_eq!(&written, b"\x05\0\0\0hullo");
516 |     }
517 | 
518 |     #[test]
519 |     fn header_parse() {
520 |         let expected = b"\x04\0\0\0abcd\x03\0\0\x00123";
521 | 
522 |         let h: Header = Cursor::new(expected).read_le().unwrap();
523 |         assert_eq!(h.profile, "abcd");
524 |         assert_eq!(h.library, "123");
525 | 
526 |         let mut written = Vec::new();
527 |         Cursor::new(&mut written).write_le(&h).unwrap();
528 |         assert_eq!(written, expected);
529 |     }
530 | }
531 | 


--------------------------------------------------------------------------------
/src/write.rs:
--------------------------------------------------------------------------------
  1 | //! Write MCAP files
  2 | 
  3 | use std::{
  4 |     borrow::Cow,
  5 |     collections::{BTreeMap, HashMap},
  6 |     io::{self, prelude::*, Cursor, SeekFrom},
  7 | };
  8 | 
  9 | use binrw::prelude::*;
 10 | use byteorder::{WriteBytesExt, LE};
 11 | 
 12 | use crate::{
 13 |     io_utils::CountingCrcWriter,
 14 |     records::{self, op, MessageHeader, Record},
 15 |     Attachment, Channel, Compression, McapError, McapResult, Message, Schema, MAGIC,
 16 | };
 17 | 
 18 | pub use records::Metadata;
 19 | 
 20 | enum WriteMode<W: Write + Seek> {
 21 |     Raw(W),
 22 |     Chunk(ChunkWriter<W>),
 23 | }
 24 | 
 25 | fn op_and_len<W: Write>(w: &mut W, op: u8, len: usize) -> io::Result<()> {
 26 |     w.write_u8(op)?;
 27 |     w.write_u64::<LE>(len as u64)?;
 28 |     Ok(())
 29 | }
 30 | 
 31 | fn write_record<W: Write>(w: &mut W, r: &Record) -> io::Result<()> {
 32 |     // Annoying: our stream isn't Seek if we're writing to a compressed chunk stream,
 33 |     // so we need an intermediate buffer.
 34 |     macro_rules! record {
 35 |         ($op:expr, $b:ident) => {{
 36 |             let mut rec_buf = Vec::new();
 37 |             Cursor::new(&mut rec_buf).write_le($b).unwrap();
 38 | 
 39 |             op_and_len(w, $op, rec_buf.len())?;
 40 |             w.write_all(&rec_buf)?;
 41 |         }};
 42 |     }
 43 | 
 44 |     macro_rules! header_and_data {
 45 |         ($op:expr, $header:ident, $data:ident) => {{
 46 |             let mut header_buf = Vec::new();
 47 |             Cursor::new(&mut header_buf).write_le($header).unwrap();
 48 | 
 49 |             op_and_len(w, $op, header_buf.len() + $data.len())?;
 50 |             w.write_all(&header_buf)?;
 51 |             w.write_all($data)?;
 52 |         }};
 53 |     }
 54 | 
 55 |     match r {
 56 |         Record::Header(h) => record!(op::HEADER, h),
 57 |         Record::Footer(_) => {
 58 |             unreachable!("Footer handles its own serialization because its CRC is self-referencing")
 59 |         }
 60 |         Record::Schema { header, data } => header_and_data!(op::SCHEMA, header, data),
 61 |         Record::Channel(c) => record!(op::CHANNEL, c),
 62 |         Record::Message { header, data } => header_and_data!(op::MESSAGE, header, data),
 63 |         Record::Chunk { .. } => {
 64 |             unreachable!("Chunks handle their own serialization due to seeking shenanigans")
 65 |         }
 66 |         Record::MessageIndex(_) => {
 67 |             unreachable!("MessageIndexes handle their own serialization to recycle the buffer between indexes")
 68 |         }
 69 |         Record::ChunkIndex(c) => record!(op::CHUNK_INDEX, c),
 70 |         Record::Attachment { header, data } => {
 71 |             assert_eq!(header.data_len, data.len() as u64);
 72 | 
 73 |             // Can't use header_and_data since we need to checksum those,
 74 |             // but not the op and len
 75 |             let mut header_buf = Vec::new();
 76 |             Cursor::new(&mut header_buf).write_le(header).unwrap();
 77 |             op_and_len(w, op::ATTACHMENT, header_buf.len() + data.len() + 4)?; // 4 for crc
 78 | 
 79 |             let mut checksummer = CountingCrcWriter::new(w);
 80 |             checksummer.write_all(&header_buf)?;
 81 |             checksummer.write_all(data)?;
 82 |             let (w, crc) = checksummer.finalize();
 83 |             w.write_u32::<LE>(crc)?;
 84 |         }
 85 |         Record::AttachmentIndex(ai) => record!(op::ATTACHMENT_INDEX, ai),
 86 |         Record::Statistics(s) => record!(op::STATISTICS, s),
 87 |         Record::Metadata(m) => record!(op::METADATA, m),
 88 |         Record::MetadataIndex(mi) => record!(op::METADATA_INDEX, mi),
 89 |         Record::SummaryOffset(so) => record!(op::SUMMARY_OFFSET, so),
 90 |         Record::EndOfData(eod) => record!(op::END_OF_DATA, eod),
 91 |         _ => todo!(),
 92 |     };
 93 |     Ok(())
 94 | }
 95 | 
 96 | #[derive(Debug, Clone)]
 97 | pub struct WriteOptions {
 98 |     compression: Option<Compression>,
 99 |     profile: String,
100 | }
101 | 
102 | impl Default for WriteOptions {
103 |     fn default() -> Self {
104 |         Self {
105 |             compression: Some(Compression::Zstd),
106 |             profile: String::new(),
107 |         }
108 |     }
109 | }
110 | 
111 | impl WriteOptions {
112 |     pub fn new() -> Self {
113 |         Self::default()
114 |     }
115 | 
116 |     pub fn compression(self, compression: Option<Compression>) -> Self {
117 |         Self {
118 |             compression,
119 |             ..self
120 |         }
121 |     }
122 | 
123 |     pub fn profile<S: Into<String>>(self, profile: S) -> Self {
124 |         Self {
125 |             profile: profile.into(),
126 |             ..self
127 |         }
128 |     }
129 | 
130 |     /// Creates a [`Writer`] whch writes to `w` using the given options
131 |     pub fn create<'a, W: Write + Seek>(self, w: W) -> McapResult<Writer<'a, W>> {
132 |         Writer::with_options(w, self)
133 |     }
134 | }
135 | 
136 | /// Writes an MCAP file to the given [writer](Write).
137 | ///
138 | /// Users should call [`finish()`](Self::finish) to flush the stream
139 | /// and check for errors when done; otherwise the result will be unwrapped on drop.
140 | pub struct Writer<'a, W: Write + Seek> {
141 |     writer: Option<WriteMode<W>>,
142 |     compression: Option<Compression>,
143 |     schemas: HashMap<Schema<'a>, u16>,
144 |     channels: HashMap<Channel<'a>, u16>,
145 |     stats: records::Statistics,
146 |     chunk_indexes: Vec<records::ChunkIndex>,
147 |     attachment_indexes: Vec<records::AttachmentIndex>,
148 |     metadata_indexes: Vec<records::MetadataIndex>,
149 | }
150 | 
151 | impl<'a, W: Write + Seek> Writer<'a, W> {
152 |     pub fn new(writer: W) -> McapResult<Self> {
153 |         Self::with_options(writer, WriteOptions::default())
154 |     }
155 | 
156 |     fn with_options(mut writer: W, opts: WriteOptions) -> McapResult<Self> {
157 |         writer.write_all(MAGIC)?;
158 | 
159 |         write_record(
160 |             &mut writer,
161 |             &Record::Header(records::Header {
162 |                 profile: opts.profile,
163 |                 library: String::from("mcap-rs-") + env!("CARGO_PKG_VERSION"),
164 |             }),
165 |         )?;
166 | 
167 |         Ok(Self {
168 |             writer: Some(WriteMode::Raw(writer)),
169 |             compression: opts.compression,
170 |             schemas: HashMap::new(),
171 |             channels: HashMap::new(),
172 |             stats: records::Statistics::default(),
173 |             chunk_indexes: Vec::new(),
174 |             attachment_indexes: Vec::new(),
175 |             metadata_indexes: Vec::new(),
176 |         })
177 |     }
178 | 
179 |     /// Adds a channel (and its provided schema, if any), returning its ID.
180 |     ///
181 |     /// Useful with subequent calls to [`write_to_known_channel()`](Self::write_to_known_channel)
182 |     pub fn add_channel(&mut self, chan: &Channel<'a>) -> McapResult<u16> {
183 |         let schema_id = match &chan.schema {
184 |             Some(s) => self.add_schema(s)?,
185 |             None => 0,
186 |         };
187 | 
188 |         if let Some(id) = self.channels.get(chan) {
189 |             return Ok(*id);
190 |         }
191 | 
192 |         self.stats.channel_count += 1;
193 | 
194 |         let next_channel_id = self.channels.len() as u16;
195 |         assert!(self
196 |             .channels
197 |             .insert(chan.clone(), next_channel_id)
198 |             .is_none());
199 |         self.chunkin_time()?
200 |             .write_channel(next_channel_id, schema_id, chan)?;
201 |         Ok(next_channel_id)
202 |     }
203 | 
204 |     fn add_schema(&mut self, schema: &Schema<'a>) -> McapResult<u16> {
205 |         if let Some(id) = self.schemas.get(schema) {
206 |             return Ok(*id);
207 |         }
208 | 
209 |         self.stats.schema_count += 1;
210 | 
211 |         // Schema IDs cannot be zero, that's the sentinel value in a channel
212 |         // for "no schema"
213 |         let next_schema_id = self.schemas.len() as u16 + 1;
214 |         assert!(self
215 |             .schemas
216 |             .insert(schema.clone(), next_schema_id)
217 |             .is_none());
218 |         self.chunkin_time()?.write_schema(next_schema_id, schema)?;
219 |         Ok(next_schema_id)
220 |     }
221 | 
222 |     /// Write the given message (and its provided channel, if needed).
223 |     pub fn write(&mut self, message: &Message<'a>) -> McapResult<()> {
224 |         let channel_id = self.add_channel(&message.channel)?;
225 |         let header = MessageHeader {
226 |             channel_id,
227 |             sequence: message.sequence,
228 |             log_time: message.log_time,
229 |             publish_time: message.publish_time,
230 |         };
231 |         let data: &[u8] = &message.data;
232 |         self.write_to_known_channel(&header, data)
233 |     }
234 | 
235 |     /// Write a message to an added channel, given its ID.
236 |     ///
237 |     /// This skips hash lookups of the channel and schema if you already added them.
238 |     pub fn write_to_known_channel(
239 |         &mut self,
240 |         header: &MessageHeader,
241 |         data: &[u8],
242 |     ) -> McapResult<()> {
243 |         // The number of channels should be relatively small,
244 |         // do a quick linear search to make sure we're not being given a bogus ID
245 |         if !self.channels.values().any(|id| *id == header.channel_id) {
246 |             return Err(McapError::UnknownChannel(
247 |                 header.sequence,
248 |                 header.channel_id,
249 |             ));
250 |         }
251 | 
252 |         self.stats.message_count += 1;
253 |         self.stats.message_start_time = match self.stats.message_start_time {
254 |             0 => header.log_time,
255 |             nz => nz.min(header.log_time),
256 |         };
257 |         self.stats.message_end_time = match self.stats.message_end_time {
258 |             0 => header.log_time,
259 |             nz => nz.max(header.log_time),
260 |         };
261 |         *self
262 |             .stats
263 |             .channel_message_counts
264 |             .entry(header.channel_id)
265 |             .or_insert(0) += 1;
266 | 
267 |         self.chunkin_time()?.write_message(header, data)?;
268 |         Ok(())
269 |     }
270 | 
271 |     pub fn attach(&mut self, attachment: &Attachment) -> McapResult<()> {
272 |         self.stats.attachment_count += 1;
273 | 
274 |         let header = records::AttachmentHeader {
275 |             log_time: attachment.log_time,
276 |             create_time: attachment.create_time,
277 |             name: attachment.name.clone(),
278 |             content_type: attachment.content_type.clone(),
279 |             data_len: attachment.data.len() as u64,
280 |         };
281 | 
282 |         // Attachments don't live in chunks.
283 |         let w = self.finish_chunk()?;
284 | 
285 |         let offset = w.stream_position()?;
286 | 
287 |         write_record(
288 |             w,
289 |             &Record::Attachment {
290 |                 header,
291 |                 data: &attachment.data,
292 |             },
293 |         )?;
294 | 
295 |         let length = w.stream_position()? - offset;
296 |         self.attachment_indexes.push(records::AttachmentIndex {
297 |             offset,
298 |             length,
299 |             log_time: attachment.log_time,
300 |             create_time: attachment.create_time,
301 |             data_size: attachment.data.len() as u64,
302 |             name: attachment.name.clone(),
303 |             content_type: attachment.content_type.clone(),
304 |         });
305 | 
306 |         Ok(())
307 |     }
308 | 
309 |     pub fn write_metadata(&mut self, metadata: &Metadata) -> McapResult<()> {
310 |         self.stats.metadata_count += 1;
311 | 
312 |         let w = self.finish_chunk()?;
313 |         let offset = w.stream_position()?;
314 | 
315 |         // Should we specialize this to avoid taking a clone of the map?
316 |         write_record(w, &Record::Metadata(metadata.clone()))?;
317 | 
318 |         let length = w.stream_position()? - offset;
319 | 
320 |         self.metadata_indexes.push(records::MetadataIndex {
321 |             offset,
322 |             length,
323 |             name: metadata.name.clone(),
324 |         });
325 | 
326 |         Ok(())
327 |     }
328 | 
329 |     /// Finishes the current chunk, if we have one, and flushes the underlying
330 |     /// [writer](Write).
331 |     ///
332 |     /// We finish the chunk to guarantee that the file can be streamed by future
333 |     /// readers at least up to this point.
334 |     /// (The alternative is to just flush the writer mid-chunk.
335 |     /// But if we did that, and then writing was suddenly interrupted afterwards,
336 |     /// readers would have to try to recover a half-written chunk,
337 |     /// probably with an unfinished compresion stream.)
338 |     ///
339 |     /// Note that lossless compression schemes like LZ4 and Zstd improve
340 |     /// as they go, so larger chunks will tend to have better compression.
341 |     /// (Of course, this depends heavily on the entropy of what's being compressed!
342 |     /// A stream of zeroes will compress great at any chunk size, and a stream
343 |     /// of random data will compress terribly at any chunk size.)
344 |     pub fn flush(&mut self) -> McapResult<()> {
345 |         self.finish_chunk()?.flush()?;
346 |         Ok(())
347 |     }
348 | 
349 |     /// `.expect()` message when we go to write and self.writer is `None`,
350 |     /// which should only happen when [`Writer::finish()`] was called.
351 |     const WHERE_WRITER: &'static str = "Trying to write a record on a finished MCAP";
352 | 
353 |     /// Starts a new chunk if we haven't done so already.
354 |     fn chunkin_time(&mut self) -> McapResult<&mut ChunkWriter<W>> {
355 |         // Some Rust tricky: we can't move the writer out of self.writer,
356 |         // leave that empty for a bit, and then replace it with a ChunkWriter.
357 |         // (That would leave it in an unspecified state if we bailed here!)
358 |         // Instead briefly swap it out for a null writer while we set up the chunker
359 |         // The writer will only be None if finish() was called.
360 |         let prev_writer = self.writer.take().expect(Self::WHERE_WRITER);
361 | 
362 |         self.writer = Some(match prev_writer {
363 |             WriteMode::Raw(w) => {
364 |                 // It's chunkin time.
365 |                 self.stats.chunk_count += 1;
366 |                 WriteMode::Chunk(ChunkWriter::new(w, self.compression)?)
367 |             }
368 |             chunk => chunk,
369 |         });
370 | 
371 |         match &mut self.writer {
372 |             Some(WriteMode::Chunk(c)) => Ok(c),
373 |             _ => unreachable!(),
374 |         }
375 |     }
376 | 
377 |     /// Finish the current chunk, if we have one.
378 |     fn finish_chunk(&mut self) -> McapResult<&mut W> {
379 |         // See above
380 |         let prev_writer = self.writer.take().expect(Self::WHERE_WRITER);
381 | 
382 |         self.writer = Some(match prev_writer {
383 |             WriteMode::Chunk(c) => {
384 |                 let (w, index) = c.finish()?;
385 |                 self.chunk_indexes.push(index);
386 |                 WriteMode::Raw(w)
387 |             }
388 |             raw => raw,
389 |         });
390 | 
391 |         match &mut self.writer {
392 |             Some(WriteMode::Raw(w)) => Ok(w),
393 |             _ => unreachable!(),
394 |         }
395 |     }
396 | 
397 |     /// Finishes any current chunk and writes out the rest of the file.
398 |     ///
399 |     /// Subsequent calls to other methods will panic.
400 |     pub fn finish(&mut self) -> McapResult<()> {
401 |         if self.writer.is_none() {
402 |             // We already called finish().
403 |             // Maybe we're dropping after the user called it?
404 |             return Ok(());
405 |         }
406 | 
407 |         // Finish any chunk we were working on and update stats, indexes, etc.
408 |         self.finish_chunk()?;
409 | 
410 |         // Grab the writer - self.writer becoming None makes subsequent writes fail.
411 |         let mut writer = match self.writer.take() {
412 |             // We called finish_chunk() above, so we're back to raw writes for
413 |             // the summary section.
414 |             Some(WriteMode::Raw(w)) => w,
415 |             _ => unreachable!(),
416 |         };
417 |         let writer = &mut writer;
418 | 
419 |         // We're done with the data secton!
420 |         write_record(writer, &Record::EndOfData(records::EndOfData::default()))?;
421 | 
422 |         // Take all the data we need, swapping in empty containers.
423 |         // Without this, we get yelled at for moving things out of a mutable ref
424 |         // (&mut self).
425 |         // (We could get around all this noise by having finish() take self,
426 |         // but then it wouldn't be droppable _and_ finish...able.)
427 |         let mut stats = records::Statistics::default();
428 |         std::mem::swap(&mut stats, &mut self.stats);
429 | 
430 |         let mut chunk_indexes = Vec::new();
431 |         std::mem::swap(&mut chunk_indexes, &mut self.chunk_indexes);
432 | 
433 |         let mut attachment_indexes = Vec::new();
434 |         std::mem::swap(&mut attachment_indexes, &mut self.attachment_indexes);
435 | 
436 |         let mut metadata_indexes = Vec::new();
437 |         std::mem::swap(&mut metadata_indexes, &mut self.metadata_indexes);
438 | 
439 |         // Make some Schema and Channel lists for the summary section.
440 |         // Be sure to grab schema IDs for the channels from the schema hash map before we drain it!
441 |         struct ChannelSummary<'a> {
442 |             channel: Channel<'a>,
443 |             channel_id: u16,
444 |             schema_id: u16,
445 |         }
446 | 
447 |         let mut all_channels: Vec<ChannelSummary<'_>> = self
448 |             .channels
449 |             .drain()
450 |             .map(|(channel, channel_id)| {
451 |                 let schema_id = match &channel.schema {
452 |                     Some(s) => *self.schemas.get(s).unwrap(),
453 |                     None => 0,
454 |                 };
455 | 
456 |                 ChannelSummary {
457 |                     channel,
458 |                     channel_id,
459 |                     schema_id,
460 |                 }
461 |             })
462 |             .collect();
463 |         all_channels.sort_unstable_by_key(|cs| cs.channel_id);
464 | 
465 |         let mut all_schemas: Vec<(Schema<'_>, u16)> = self.schemas.drain().collect();
466 |         all_schemas.sort_unstable_by_key(|(_, v)| *v);
467 | 
468 |         let mut offsets = Vec::new();
469 | 
470 |         let summary_start = writer.stream_position()?;
471 | 
472 |         // Let's get a CRC of the summary section.
473 |         let mut ccw = CountingCrcWriter::new(writer);
474 | 
475 |         fn posit<W: Write + Seek>(ccw: &mut CountingCrcWriter<W>) -> io::Result<u64> {
476 |             ccw.get_mut().stream_position()
477 |         }
478 | 
479 |         // Write all schemas.
480 |         let schemas_start = summary_start;
481 |         for (schema, id) in all_schemas {
482 |             let header = records::SchemaHeader {
483 |                 id,
484 |                 name: schema.name,
485 |                 encoding: schema.encoding,
486 |                 data_len: schema.data.len() as u32,
487 |             };
488 |             let data = schema.data;
489 | 
490 |             write_record(&mut ccw, &Record::Schema { header, data })?;
491 |         }
492 |         let schemas_end = posit(&mut ccw)?;
493 |         if schemas_end - schemas_start > 0 {
494 |             offsets.push(records::SummaryOffset {
495 |                 group_opcode: op::SCHEMA,
496 |                 group_start: schemas_start,
497 |                 group_length: schemas_end - schemas_start,
498 |             });
499 |         }
500 | 
501 |         // Write all channels.
502 |         let channels_start = schemas_end;
503 |         for cs in all_channels {
504 |             let rec = records::Channel {
505 |                 id: cs.channel_id,
506 |                 schema_id: cs.schema_id,
507 |                 topic: cs.channel.topic,
508 |                 message_encoding: cs.channel.message_encoding,
509 |                 metadata: cs.channel.metadata,
510 |             };
511 |             write_record(&mut ccw, &Record::Channel(rec))?;
512 |         }
513 |         let channels_end = posit(&mut ccw)?;
514 |         if channels_end - channels_start > 0 {
515 |             offsets.push(records::SummaryOffset {
516 |                 group_opcode: op::CHANNEL,
517 |                 group_start: channels_start,
518 |                 group_length: channels_end - channels_start,
519 |             });
520 |         }
521 | 
522 |         // Write all chunk indexes.
523 |         let chunk_indexes_start = channels_end;
524 |         for index in chunk_indexes {
525 |             write_record(&mut ccw, &Record::ChunkIndex(index))?;
526 |         }
527 |         let chunk_indexes_end = posit(&mut ccw)?;
528 |         if chunk_indexes_end - chunk_indexes_start > 0 {
529 |             offsets.push(records::SummaryOffset {
530 |                 group_opcode: op::CHUNK_INDEX,
531 |                 group_start: chunk_indexes_start,
532 |                 group_length: chunk_indexes_end - chunk_indexes_start,
533 |             });
534 |         }
535 | 
536 |         // ...and attachment indexes
537 |         let attachment_indexes_start = chunk_indexes_end;
538 |         for index in attachment_indexes {
539 |             write_record(&mut ccw, &Record::AttachmentIndex(index))?;
540 |         }
541 |         let attachment_indexes_end = posit(&mut ccw)?;
542 |         if attachment_indexes_end - attachment_indexes_start > 0 {
543 |             offsets.push(records::SummaryOffset {
544 |                 group_opcode: op::ATTACHMENT_INDEX,
545 |                 group_start: attachment_indexes_start,
546 |                 group_length: attachment_indexes_end - attachment_indexes_start,
547 |             });
548 |         }
549 | 
550 |         // ...and metadata indexes
551 |         let metadata_indexes_start = attachment_indexes_end;
552 |         for index in metadata_indexes {
553 |             write_record(&mut ccw, &Record::MetadataIndex(index))?;
554 |         }
555 |         let metadata_indexes_end = posit(&mut ccw)?;
556 |         if metadata_indexes_end - metadata_indexes_start > 0 {
557 |             offsets.push(records::SummaryOffset {
558 |                 group_opcode: op::METADATA_INDEX,
559 |                 group_start: metadata_indexes_start,
560 |                 group_length: metadata_indexes_end - metadata_indexes_start,
561 |             });
562 |         }
563 | 
564 |         let stats_start = metadata_indexes_end;
565 |         write_record(&mut ccw, &Record::Statistics(stats))?;
566 |         let stats_end = posit(&mut ccw)?;
567 |         assert!(stats_end > stats_start);
568 |         offsets.push(records::SummaryOffset {
569 |             group_opcode: op::STATISTICS,
570 |             group_start: stats_start,
571 |             group_length: stats_end - stats_start,
572 |         });
573 | 
574 |         // Write the summary offsets we've been accumulating
575 |         let summary_offset_start = stats_end;
576 |         for offset in offsets {
577 |             write_record(&mut ccw, &Record::SummaryOffset(offset))?;
578 |         }
579 | 
580 |         // Wat: the CRC in the footer _includes_ part of the footer.
581 |         op_and_len(&mut ccw, op::FOOTER, 20)?;
582 |         ccw.write_u64::<LE>(summary_start)?;
583 |         ccw.write_u64::<LE>(summary_offset_start)?;
584 | 
585 |         let (writer, summary_crc) = ccw.finalize();
586 | 
587 |         writer.write_u32::<LE>(summary_crc)?;
588 | 
589 |         writer.write_all(MAGIC)?;
590 |         writer.flush()?;
591 |         Ok(())
592 |     }
593 | }
594 | 
595 | impl<'a, W: Write + Seek> Drop for Writer<'a, W> {
596 |     fn drop(&mut self) {
597 |         self.finish().unwrap()
598 |     }
599 | }
600 | 
601 | enum Compressor<W: Write> {
602 |     Null(W),
603 |     Zstd(zstd::Encoder<'static, W>),
604 |     Lz4(lz4::Encoder<W>),
605 | }
606 | 
607 | impl<W: Write> Compressor<W> {
608 |     fn finish(self) -> io::Result<W> {
609 |         Ok(match self {
610 |             Compressor::Null(w) => w,
611 |             Compressor::Zstd(w) => w.finish()?,
612 |             Compressor::Lz4(w) => {
613 |                 let (w, err) = w.finish();
614 |                 err?;
615 |                 w
616 |             }
617 |         })
618 |     }
619 | }
620 | 
621 | impl<W: Write> Write for Compressor<W> {
622 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
623 |         match self {
624 |             Compressor::Null(w) => w.write(buf),
625 |             Compressor::Zstd(w) => w.write(buf),
626 |             Compressor::Lz4(w) => w.write(buf),
627 |         }
628 |     }
629 | 
630 |     fn flush(&mut self) -> io::Result<()> {
631 |         match self {
632 |             Compressor::Null(w) => w.flush(),
633 |             Compressor::Zstd(w) => w.flush(),
634 |             Compressor::Lz4(w) => w.flush(),
635 |         }
636 |     }
637 | }
638 | 
639 | struct ChunkWriter<W: Write> {
640 |     header_start: u64,
641 |     stream_start: u64,
642 |     header: records::ChunkHeader,
643 |     compressor: CountingCrcWriter<Compressor<W>>,
644 |     indexes: BTreeMap<u16, Vec<records::MessageIndexEntry>>,
645 | }
646 | 
647 | impl<W: Write + Seek> ChunkWriter<W> {
648 |     fn new(mut writer: W, compression: Option<Compression>) -> McapResult<Self> {
649 |         let header_start = writer.stream_position()?;
650 | 
651 |         op_and_len(&mut writer, op::CHUNK, !0)?;
652 | 
653 |         let compression_name = match compression {
654 |             Some(Compression::Zstd) => "zstd",
655 |             Some(Compression::Lz4) => "lz4",
656 |             None => "",
657 |         };
658 | 
659 |         let header = records::ChunkHeader {
660 |             message_start_time: 0,
661 |             message_end_time: 0,
662 |             uncompressed_size: !0,
663 |             uncompressed_crc: !0,
664 |             compression: String::from(compression_name),
665 |             compressed_size: !0,
666 |         };
667 | 
668 |         writer.write_le(&header)?;
669 |         let stream_start = writer.stream_position()?;
670 | 
671 |         let compressor = match compression {
672 |             Some(Compression::Zstd) => {
673 |                 let mut enc = zstd::Encoder::new(writer, 0)?;
674 |                 enc.multithread(num_cpus::get_physical() as u32)?;
675 |                 Compressor::Zstd(enc)
676 |             }
677 |             Some(Compression::Lz4) => {
678 |                 let b = lz4::EncoderBuilder::new();
679 |                 Compressor::Lz4(b.build(writer)?)
680 |             }
681 |             None => Compressor::Null(writer),
682 |         };
683 |         let compressor = CountingCrcWriter::new(compressor);
684 |         Ok(Self {
685 |             compressor,
686 |             header_start,
687 |             stream_start,
688 |             header,
689 |             indexes: BTreeMap::new(),
690 |         })
691 |     }
692 | 
693 |     fn write_schema(&mut self, id: u16, schema: &Schema) -> McapResult<()> {
694 |         let header = records::SchemaHeader {
695 |             id,
696 |             name: schema.name.clone(),
697 |             encoding: schema.encoding.clone(),
698 |             data_len: schema.data.len() as u32,
699 |         };
700 |         write_record(
701 |             &mut self.compressor,
702 |             &Record::Schema {
703 |                 header,
704 |                 data: Cow::Borrowed(&schema.data),
705 |             },
706 |         )?;
707 |         Ok(())
708 |     }
709 | 
710 |     fn write_channel(&mut self, id: u16, schema_id: u16, chan: &Channel) -> McapResult<()> {
711 |         assert_eq!(schema_id == 0, chan.schema.is_none());
712 | 
713 |         let rec = records::Channel {
714 |             id,
715 |             schema_id,
716 |             topic: chan.topic.clone(),
717 |             message_encoding: chan.message_encoding.clone(),
718 |             metadata: chan.metadata.clone(),
719 |         };
720 | 
721 |         write_record(&mut self.compressor, &Record::Channel(rec))?;
722 |         Ok(())
723 |     }
724 | 
725 |     fn write_message(&mut self, header: &MessageHeader, data: &[u8]) -> McapResult<()> {
726 |         // Update min/max time
727 |         self.header.message_start_time = match self.header.message_start_time {
728 |             0 => header.log_time,
729 |             nz => nz.min(header.log_time),
730 |         };
731 |         self.header.message_end_time = match self.header.message_end_time {
732 |             0 => header.log_time,
733 |             nz => nz.max(header.log_time),
734 |         };
735 | 
736 |         // Add an index for this message
737 |         self.indexes
738 |             .entry(header.channel_id)
739 |             .or_default()
740 |             .push(records::MessageIndexEntry {
741 |                 log_time: header.log_time,
742 |                 offset: self.compressor.position(),
743 |             });
744 | 
745 |         write_record(
746 |             &mut self.compressor,
747 |             &Record::Message {
748 |                 header: *header,
749 |                 data: Cow::Borrowed(data),
750 |             },
751 |         )?;
752 |         Ok(())
753 |     }
754 | 
755 |     fn finish(mut self) -> McapResult<(W, records::ChunkIndex)> {
756 |         // Get the number of uncompressed bytes written and the CRC.
757 |         self.header.uncompressed_size = self.compressor.position();
758 |         let (stream, crc) = self.compressor.finalize();
759 |         self.header.uncompressed_crc = crc;
760 | 
761 |         // Finalize the compression stream - it maintains an internal buffer.
762 |         let mut writer = stream.finish()?;
763 |         let end_of_stream = writer.stream_position()?;
764 |         self.header.compressed_size = end_of_stream - self.stream_start;
765 |         let record_size = (end_of_stream - self.header_start) as usize - 9; // 1 byte op, 8 byte len
766 | 
767 |         // Back up, write our finished header, then continue at the end of the stream.
768 |         writer.seek(SeekFrom::Start(self.header_start))?;
769 |         op_and_len(&mut writer, op::CHUNK, record_size)?;
770 |         writer.write_le(&self.header)?;
771 |         assert_eq!(self.stream_start, writer.stream_position()?);
772 |         assert_eq!(writer.seek(SeekFrom::End(0))?, end_of_stream);
773 | 
774 |         // Write our message indexes
775 |         let mut message_index_offsets: BTreeMap<u16, u64> = BTreeMap::new();
776 | 
777 |         let mut index_buf = Vec::new();
778 |         for (channel_id, records) in self.indexes {
779 |             assert!(message_index_offsets
780 |                 .insert(channel_id, writer.stream_position()?)
781 |                 .is_none());
782 |             index_buf.clear();
783 |             let index = records::MessageIndex {
784 |                 channel_id,
785 |                 records,
786 |             };
787 | 
788 |             Cursor::new(&mut index_buf).write_le(&index)?;
789 |             op_and_len(&mut writer, op::MESSAGE_INDEX, index_buf.len())?;
790 |             writer.write_all(&index_buf)?;
791 |         }
792 |         let end_of_indexes = writer.stream_position()?;
793 | 
794 |         let index = records::ChunkIndex {
795 |             message_start_time: self.header.message_start_time,
796 |             message_end_time: self.header.message_end_time,
797 |             chunk_start_offset: self.header_start,
798 |             chunk_length: end_of_stream - self.header_start,
799 |             message_index_offsets,
800 |             message_index_length: end_of_indexes - end_of_stream,
801 |             compression: self.header.compression,
802 |             compressed_size: self.header.compressed_size,
803 |             uncompressed_size: self.header.uncompressed_size,
804 |         };
805 | 
806 |         Ok((writer, index))
807 |     }
808 | }
809 | 


--------------------------------------------------------------------------------
/src/read.rs:
--------------------------------------------------------------------------------
   1 | //! Read MCAP files
   2 | //!
   3 | //! MCAPs are read from a byte slice instead of a [`Read`] trait object.
   4 | //! This helps us avoid unnecessary copies, since [`Schema`]s and [`Message`]s
   5 | //! can refer directly to their data.
   6 | //!
   7 | //! Consider [memory-mapping](https://docs.rs/memmap/0.7.0/memmap/struct.Mmap.html)
   8 | //! the file - the OS will load (and cache!) it on-demand, without any
   9 | //! further system calls.
  10 | use std::{
  11 |     borrow::Cow,
  12 |     collections::{BTreeMap, HashMap},
  13 |     fmt,
  14 |     io::{self, prelude::*, Cursor},
  15 |     sync::Arc,
  16 | };
  17 | 
  18 | use binrw::prelude::*;
  19 | use crc32fast::hash as crc32;
  20 | use enumset::{enum_set, EnumSet, EnumSetType};
  21 | use log::*;
  22 | 
  23 | use crate::{
  24 |     io_utils::CountingCrcReader,
  25 |     records::{self, op, Record},
  26 |     Attachment, Channel, McapError, McapResult, Message, Schema, MAGIC,
  27 | };
  28 | 
  29 | /// Nonstandard reading options, e.g.,
  30 | /// to be more lenient when trying to recover incomplete/damaged files.
  31 | ///
  32 | /// More may be added in future releases.
  33 | #[derive(EnumSetType, Debug)]
  34 | pub enum Options {
  35 |     /// Don't require the MCAP file to end with its magic bytes.
  36 |     IgnoreEndMagic,
  37 | }
  38 | 
  39 | /// Scans a mapped MCAP file from start to end, returning each record.
  40 | ///
  41 | /// You probably want a [MessageStream] instead - this yields the raw records
  42 | /// from the file without any postprocessing (decompressing chunks, etc.)
  43 | /// and is mostly meant as a building block for higher-level readers.
  44 | pub struct LinearReader<'a> {
  45 |     buf: &'a [u8],
  46 |     malformed: bool,
  47 | }
  48 | 
  49 | impl<'a> LinearReader<'a> {
  50 |     /// Create a reader for the given file,
  51 |     /// checking [`MAGIC`] bytes on both ends.
  52 |     pub fn new(buf: &'a [u8]) -> McapResult<Self> {
  53 |         Self::new_with_options(buf, enum_set!())
  54 |     }
  55 | 
  56 |     /// Create a reader for the given file with special options.
  57 |     pub fn new_with_options(buf: &'a [u8], options: EnumSet<Options>) -> McapResult<Self> {
  58 |         if !buf.starts_with(MAGIC)
  59 |             || (!options.contains(Options::IgnoreEndMagic)
  60 |                 && (!buf.ends_with(MAGIC) || buf.len() < 2 * MAGIC.len()))
  61 |         {
  62 |             return Err(McapError::BadMagic);
  63 |         }
  64 |         let buf = &buf[MAGIC.len()..];
  65 |         if buf.ends_with(MAGIC) {
  66 |             Ok(Self::sans_magic(&buf[0..buf.len() - MAGIC.len()]))
  67 |         } else {
  68 |             Ok(Self::sans_magic(buf))
  69 |         }
  70 |     }
  71 | 
  72 |     /// Like [`new()`](Self::new), but assumes `buf` has the magic bytes sliced off.
  73 |     ///
  74 |     /// Useful for iterating through slices of an MCAP file instead of the whole thing.
  75 |     pub fn sans_magic(buf: &'a [u8]) -> Self {
  76 |         Self {
  77 |             buf,
  78 |             malformed: false,
  79 |         }
  80 |     }
  81 | 
  82 |     /// Returns the number of unprocessed bytes
  83 |     /// (sans the file's starting and ending magic)
  84 |     ///
  85 |     /// Used to calculate offsets for the data section et al.
  86 |     fn bytes_remaining(&self) -> usize {
  87 |         self.buf.len()
  88 |     }
  89 | }
  90 | 
  91 | impl<'a> Iterator for LinearReader<'a> {
  92 |     type Item = McapResult<records::Record<'a>>;
  93 | 
  94 |     fn next(&mut self) -> Option<Self::Item> {
  95 |         if self.buf.is_empty() {
  96 |             return None;
  97 |         }
  98 | 
  99 |         // After an unrecoverable error (due to something wonky in the file),
 100 |         // don't keep trying to walk it.
 101 |         if self.malformed {
 102 |             return None;
 103 |         }
 104 | 
 105 |         let record = match read_record_from_slice(&mut self.buf) {
 106 |             Ok(k) => k,
 107 |             Err(e) => {
 108 |                 self.malformed = true;
 109 |                 return Some(Err(e));
 110 |             }
 111 |         };
 112 | 
 113 |         Some(Ok(record))
 114 |     }
 115 | }
 116 | 
 117 | /// Read a record and advance the slice
 118 | fn read_record_from_slice<'a>(buf: &mut &'a [u8]) -> McapResult<records::Record<'a>> {
 119 |     if buf.len() < 5 {
 120 |         warn!("Malformed MCAP - not enough space for record + length!");
 121 |         return Err(McapError::UnexpectedEof);
 122 |     }
 123 | 
 124 |     let op = read_u8(buf);
 125 |     let len = read_u64(buf);
 126 | 
 127 |     if buf.len() < len as usize {
 128 |         warn!(
 129 |             "Malformed MCAP - record with length {len}, but only {} bytes remain",
 130 |             buf.len()
 131 |         );
 132 |         return Err(McapError::UnexpectedEof);
 133 |     }
 134 | 
 135 |     let body = &buf[..len as usize];
 136 |     debug!("slice: opcode {op:02X}, length {len}");
 137 |     let record = read_record(op, body)?;
 138 |     trace!("       {:?}", record);
 139 | 
 140 |     *buf = &buf[len as usize..];
 141 |     Ok(record)
 142 | }
 143 | 
 144 | /// Given a record's opcode and its slice, read it into a [Record]
 145 | fn read_record(op: u8, body: &[u8]) -> McapResult<records::Record<'_>> {
 146 |     macro_rules! record {
 147 |         ($b:ident) => {{
 148 |             let mut cur = Cursor::new($b);
 149 |             let res = cur.read_le()?;
 150 |             assert_eq!($b.len() as u64, cur.position());
 151 |             res
 152 |         }};
 153 |     }
 154 | 
 155 |     Ok(match op {
 156 |         op::HEADER => Record::Header(record!(body)),
 157 |         op::FOOTER => Record::Footer(record!(body)),
 158 |         op::SCHEMA => {
 159 |             let mut c = Cursor::new(body);
 160 |             let header: records::SchemaHeader = c.read_le()?;
 161 |             let data = Cow::Borrowed(&body[c.position() as usize..]);
 162 |             if header.data_len != data.len() as u32 {
 163 |                 warn!(
 164 |                     "Schema {}'s data length doesn't match the total schema length",
 165 |                     header.name
 166 |                 );
 167 |             }
 168 |             Record::Schema { header, data }
 169 |         }
 170 |         op::CHANNEL => Record::Channel(record!(body)),
 171 |         op::MESSAGE => {
 172 |             let mut c = Cursor::new(body);
 173 |             let header = c.read_le()?;
 174 |             let data = Cow::Borrowed(&body[c.position() as usize..]);
 175 |             Record::Message { header, data }
 176 |         }
 177 |         op::CHUNK => {
 178 |             let mut c = Cursor::new(body);
 179 |             let header: records::ChunkHeader = c.read_le()?;
 180 |             let data = &body[c.position() as usize..];
 181 |             if header.compressed_size != data.len() as u64 {
 182 |                 warn!("Chunk's compressed length doesn't match its header");
 183 |             }
 184 |             Record::Chunk { header, data }
 185 |         }
 186 |         op::MESSAGE_INDEX => Record::MessageIndex(record!(body)),
 187 |         op::CHUNK_INDEX => Record::ChunkIndex(record!(body)),
 188 |         op::ATTACHMENT => {
 189 |             let mut c = Cursor::new(body);
 190 |             let header: records::AttachmentHeader = c.read_le()?;
 191 |             let data = &body[c.position() as usize..body.len() - 4];
 192 |             if header.data_len != data.len() as u64 {
 193 |                 warn!(
 194 |                     "Attachment {}'s data length doesn't match the total schema length",
 195 |                     header.name
 196 |                 );
 197 |             }
 198 |             let crc = Cursor::new(&body[body.len() - 4..]).read_le()?;
 199 | 
 200 |             // We usually leave CRCs to higher-level readers -
 201 |             // (ChunkReader, read_summary(), etc.) - but
 202 |             //
 203 |             // 1. We can trivially check it here without checking other records,
 204 |             //    decompressing anything, or doing any other non-trivial work
 205 |             //
 206 |             // 2. Since the CRC depends on the serialized header, it doesn't make
 207 |             //    much sense to have users check it.
 208 |             //    (What would they do? lol reserialize the header?)
 209 |             if crc != 0 {
 210 |                 let calculated = crc32(&body[..body.len() - 4]);
 211 |                 if crc != calculated {
 212 |                     return Err(McapError::BadAttachmentCrc {
 213 |                         saved: crc,
 214 |                         calculated,
 215 |                     });
 216 |                 }
 217 |             }
 218 | 
 219 |             Record::Attachment { header, data }
 220 |         }
 221 |         op::ATTACHMENT_INDEX => Record::AttachmentIndex(record!(body)),
 222 |         op::STATISTICS => Record::Statistics(record!(body)),
 223 |         op::METADATA => Record::Metadata(record!(body)),
 224 |         op::METADATA_INDEX => Record::MetadataIndex(record!(body)),
 225 |         op::SUMMARY_OFFSET => Record::SummaryOffset(record!(body)),
 226 |         op::END_OF_DATA => Record::EndOfData(record!(body)),
 227 |         opcode => Record::Unknown {
 228 |             opcode,
 229 |             data: Cow::Borrowed(body),
 230 |         },
 231 |     })
 232 | }
 233 | 
 234 | enum ChunkDecompressor<'a> {
 235 |     Null(LinearReader<'a>),
 236 |     Compressed(Option<CountingCrcReader<Box<dyn Read + Send + 'a>>>),
 237 | }
 238 | 
 239 | /// Streams records out of a [Chunk](Record::Chunk), decompressing as needed.
 240 | pub struct ChunkReader<'a> {
 241 |     header: records::ChunkHeader,
 242 |     decompressor: ChunkDecompressor<'a>,
 243 | }
 244 | 
 245 | impl<'a> ChunkReader<'a> {
 246 |     pub fn new(header: records::ChunkHeader, data: &'a [u8]) -> McapResult<Self> {
 247 |         let decompressor = match header.compression.as_str() {
 248 |             "zstd" => ChunkDecompressor::Compressed(Some(CountingCrcReader::new(Box::new(
 249 |                 zstd::Decoder::new(data)?,
 250 |             )))),
 251 |             "lz4" => ChunkDecompressor::Compressed(Some(CountingCrcReader::new(Box::new(
 252 |                 lz4::Decoder::new(data)?,
 253 |             )))),
 254 |             "" => {
 255 |                 if header.uncompressed_size != header.compressed_size {
 256 |                     warn!(
 257 |                         "Chunk is uncompressed, but claims different compress/uncompressed lengths"
 258 |                     );
 259 |                 }
 260 | 
 261 |                 if header.uncompressed_crc != 0 {
 262 |                     let calculated = crc32(data);
 263 |                     if header.uncompressed_crc != calculated {
 264 |                         return Err(McapError::BadChunkCrc {
 265 |                             saved: header.uncompressed_crc,
 266 |                             calculated,
 267 |                         });
 268 |                     }
 269 |                 }
 270 | 
 271 |                 ChunkDecompressor::Null(LinearReader::sans_magic(data))
 272 |             }
 273 |             wat => return Err(McapError::UnsupportedCompression(wat.to_string())),
 274 |         };
 275 | 
 276 |         Ok(Self {
 277 |             header,
 278 |             decompressor,
 279 |         })
 280 |     }
 281 | }
 282 | 
 283 | impl<'a> Iterator for ChunkReader<'a> {
 284 |     type Item = McapResult<records::Record<'a>>;
 285 | 
 286 |     fn next(&mut self) -> Option<Self::Item> {
 287 |         match &mut self.decompressor {
 288 |             ChunkDecompressor::Null(r) => r.next(),
 289 |             ChunkDecompressor::Compressed(stream) => {
 290 |                 // If we consumed the stream last time to get the CRC,
 291 |                 // or because of an error, we're done.
 292 |                 if stream.is_none() {
 293 |                     return None;
 294 |                 }
 295 | 
 296 |                 let s = stream.as_mut().unwrap();
 297 | 
 298 |                 let record = match read_record_from_chunk_stream(s) {
 299 |                     Ok(k) => k,
 300 |                     Err(e) => {
 301 |                         *stream = None; // Don't try to recover.
 302 |                         return Some(Err(e));
 303 |                     }
 304 |                 };
 305 | 
 306 |                 // If we've read all there is to read...
 307 |                 if s.position() >= self.header.uncompressed_size {
 308 |                     // Get the CRC.
 309 |                     let calculated = stream.take().unwrap().finalize();
 310 | 
 311 |                     // If the header stored a CRC
 312 |                     // and it doesn't match what we have, complain.
 313 |                     if self.header.uncompressed_crc != 0
 314 |                         && self.header.uncompressed_crc != calculated
 315 |                     {
 316 |                         return Some(Err(McapError::BadChunkCrc {
 317 |                             saved: self.header.uncompressed_crc,
 318 |                             calculated,
 319 |                         }));
 320 |                     }
 321 |                     // All good!
 322 |                 }
 323 | 
 324 |                 Some(Ok(record))
 325 |             }
 326 |         }
 327 |     }
 328 | }
 329 | 
 330 | /// Like [read_record_from_slice], but for a decompression stream
 331 | fn read_record_from_chunk_stream<'a, R: Read>(r: &mut R) -> McapResult<records::Record<'a>> {
 332 |     // We can't use binrw because compressions streams aren't seekable.
 333 |     // byteorder time!
 334 |     use byteorder::{ReadBytesExt, LE};
 335 | 
 336 |     let op = r.read_u8()?;
 337 |     let len = r.read_u64::<LE>()?;
 338 | 
 339 |     debug!("chunk: opcode {op:02X}, length {len}");
 340 |     let record = match op {
 341 |         op::SCHEMA => {
 342 |             let mut record = Vec::new();
 343 |             r.take(len).read_to_end(&mut record)?;
 344 |             if len as usize != record.len() {
 345 |                 return Err(McapError::UnexpectedEoc);
 346 |             }
 347 | 
 348 |             let mut c = Cursor::new(&record);
 349 |             let header: records::SchemaHeader = c.read_le()?;
 350 | 
 351 |             let header_end = c.position();
 352 | 
 353 |             // Should we rotate and shrink instead?
 354 |             let data = record.split_off(header_end as usize);
 355 | 
 356 |             if header.data_len as usize != data.len() {
 357 |                 warn!(
 358 |                     "Schema {}'s data length doesn't match the total schema length",
 359 |                     header.name
 360 |                 );
 361 |             }
 362 |             Record::Schema {
 363 |                 header,
 364 |                 data: Cow::Owned(data),
 365 |             }
 366 |         }
 367 |         op::CHANNEL => {
 368 |             let mut record = Vec::new();
 369 |             r.take(len).read_to_end(&mut record)?;
 370 |             if len as usize != record.len() {
 371 |                 return Err(McapError::UnexpectedEoc);
 372 |             }
 373 | 
 374 |             let mut c = Cursor::new(&record);
 375 |             let channel: records::Channel = c.read_le()?;
 376 | 
 377 |             if c.position() != record.len() as u64 {
 378 |                 warn!(
 379 |                     "Channel {}'s length doesn't match its record length",
 380 |                     channel.topic
 381 |                 );
 382 |             }
 383 | 
 384 |             Record::Channel(channel)
 385 |         }
 386 |         op::MESSAGE => {
 387 |             // Optimization: messages are the mainstay of the file,
 388 |             // so allocate the header and the data separately to avoid having
 389 |             // to split them up or move them around later.
 390 |             // Fortunately, message headers are fixed length.
 391 |             const HEADER_LEN: u64 = 22;
 392 | 
 393 |             let mut header_buf = Vec::new();
 394 |             r.take(HEADER_LEN).read_to_end(&mut header_buf)?;
 395 |             if header_buf.len() as u64 != HEADER_LEN {
 396 |                 return Err(McapError::UnexpectedEoc);
 397 |             }
 398 |             let header: records::MessageHeader = Cursor::new(header_buf).read_le()?;
 399 | 
 400 |             let mut data = Vec::new();
 401 |             r.take(len - HEADER_LEN).read_to_end(&mut data)?;
 402 |             if data.len() as u64 != len - HEADER_LEN {
 403 |                 return Err(McapError::UnexpectedEoc);
 404 |             }
 405 | 
 406 |             Record::Message {
 407 |                 header,
 408 |                 data: Cow::Owned(data),
 409 |             }
 410 |         }
 411 |         wut => return Err(McapError::UnexpectedChunkRecord(wut)),
 412 |     };
 413 |     trace!("       {:?}", record);
 414 |     Ok(record)
 415 | }
 416 | 
 417 | /// Like [`LinearReader`], but unpacks chunks' records into its stream
 418 | pub struct ChunkFlattener<'a> {
 419 |     top_level: LinearReader<'a>,
 420 |     dechunk: Option<ChunkReader<'a>>,
 421 |     malformed: bool,
 422 | }
 423 | 
 424 | impl<'a> ChunkFlattener<'a> {
 425 |     pub fn new(buf: &'a [u8]) -> McapResult<Self> {
 426 |         Self::new_with_options(buf, enum_set!())
 427 |     }
 428 | 
 429 |     pub fn new_with_options(buf: &'a [u8], options: EnumSet<Options>) -> McapResult<Self> {
 430 |         let top_level = LinearReader::new_with_options(buf, options)?;
 431 |         Ok(Self {
 432 |             top_level,
 433 |             dechunk: None,
 434 |             malformed: false,
 435 |         })
 436 |     }
 437 | 
 438 |     fn bytes_remaining(&self) -> usize {
 439 |         self.top_level.bytes_remaining()
 440 |     }
 441 | }
 442 | 
 443 | impl<'a> Iterator for ChunkFlattener<'a> {
 444 |     type Item = McapResult<records::Record<'a>>;
 445 | 
 446 |     fn next(&mut self) -> Option<Self::Item> {
 447 |         if self.malformed {
 448 |             return None;
 449 |         }
 450 | 
 451 |         let n: Option<Self::Item> = loop {
 452 |             // If we're reading from a chunk, do that until it returns None.
 453 |             if let Some(d) = &mut self.dechunk {
 454 |                 match d.next() {
 455 |                     Some(d) => break Some(d),
 456 |                     None => self.dechunk = None,
 457 |                 }
 458 |             }
 459 |             // Fall through - if we didn't extract a record from a chunk
 460 |             // (or that chunk ended), move on to the next top-level record.
 461 |             match self.top_level.next() {
 462 |                 // If it's a chunk, get a new chunk reader going...
 463 |                 Some(Ok(Record::Chunk { header, data })) => {
 464 |                     self.dechunk = match ChunkReader::new(header, data) {
 465 |                         Ok(d) => Some(d),
 466 |                         Err(e) => break Some(Err(e)),
 467 |                     };
 468 |                     // ...then continue the loop to get the first item from the chunk.
 469 |                 }
 470 |                 // If it's not a chunk, just yield it.
 471 |                 not_a_chunk => break not_a_chunk,
 472 |             }
 473 |         };
 474 | 
 475 |         // Give up on errors
 476 |         if matches!(n, Some(Err(_))) {
 477 |             self.malformed = true;
 478 |         }
 479 |         n
 480 |     }
 481 | }
 482 | 
 483 | /// Parses schemas and channels and wires them together
 484 | #[derive(Debug, Default)]
 485 | struct ChannelAccumulator<'a> {
 486 |     schemas: HashMap<u16, Arc<Schema<'a>>>,
 487 |     channels: HashMap<u16, Arc<Channel<'a>>>,
 488 | }
 489 | 
 490 | impl<'a> ChannelAccumulator<'a> {
 491 |     fn add_schema(&mut self, header: records::SchemaHeader, data: Cow<'a, [u8]>) -> McapResult<()> {
 492 |         if header.id == 0 {
 493 |             return Err(McapError::InvalidSchemaId);
 494 |         }
 495 | 
 496 |         let schema = Arc::new(Schema {
 497 |             name: header.name.clone(),
 498 |             encoding: header.encoding,
 499 |             data,
 500 |         });
 501 | 
 502 |         if let Some(preexisting) = self.schemas.insert(header.id, schema.clone()) {
 503 |             // Oh boy, we have this schema already.
 504 |             // It had better be identital.
 505 |             if schema != preexisting {
 506 |                 return Err(McapError::ConflictingSchemas(header.name));
 507 |             }
 508 |         }
 509 |         Ok(())
 510 |     }
 511 | 
 512 |     fn add_channel(&mut self, chan: records::Channel) -> McapResult<()> {
 513 |         // The schema ID can be 0 for "no schema",
 514 |         // Or must reference some previously-read schema.
 515 |         let schema = if chan.schema_id == 0 {
 516 |             None
 517 |         } else {
 518 |             match self.schemas.get(&chan.schema_id) {
 519 |                 Some(s) => Some(s.clone()),
 520 |                 None => {
 521 |                     return Err(McapError::UnknownSchema(chan.topic, chan.schema_id));
 522 |                 }
 523 |             }
 524 |         };
 525 | 
 526 |         let channel = Arc::new(Channel {
 527 |             topic: chan.topic.clone(),
 528 |             schema,
 529 |             message_encoding: chan.message_encoding,
 530 |             metadata: chan.metadata,
 531 |         });
 532 |         if let Some(preexisting) = self.channels.insert(chan.id, channel.clone()) {
 533 |             // Oh boy, we have this channel already.
 534 |             // It had better be identital.
 535 |             if preexisting != channel {
 536 |                 return Err(McapError::ConflictingChannels(chan.topic));
 537 |             }
 538 |         }
 539 |         Ok(())
 540 |     }
 541 | 
 542 |     fn get(&self, chan_id: u16) -> Option<Arc<Channel<'a>>> {
 543 |         self.channels.get(&chan_id).cloned()
 544 |     }
 545 | }
 546 | 
 547 | /// Reads all messages from the MCAP file---in the order they were written---and
 548 | /// perform needed validation (CRCs, etc.) as we go.
 549 | ///
 550 | /// This stops at the end of the data section and does not read the summary.
 551 | ///
 552 | /// Because tying the lifetime of each message to the underlying MCAP memory map
 553 | /// makes it very difficult to send between threads or use in async land,
 554 | /// and because we assume _most_ MCAP files have _most_ messages in compressed chunks,
 555 | /// yielded [`Message`](crate::Message)s have unbounded lifetimes.
 556 | /// For messages we've decompressed into their own buffers, this is free!
 557 | /// For uncompressed messages, we take a copy of the message's data.
 558 | pub struct MessageStream<'a> {
 559 |     full_file: &'a [u8],
 560 |     records: ChunkFlattener<'a>,
 561 |     done: bool,
 562 |     channeler: ChannelAccumulator<'static>,
 563 | }
 564 | 
 565 | impl<'a> MessageStream<'a> {
 566 |     pub fn new(buf: &'a [u8]) -> McapResult<Self> {
 567 |         Self::new_with_options(buf, enum_set!())
 568 |     }
 569 | 
 570 |     pub fn new_with_options(buf: &'a [u8], options: EnumSet<Options>) -> McapResult<Self> {
 571 |         let full_file = buf;
 572 |         let records = ChunkFlattener::new_with_options(buf, options)?;
 573 | 
 574 |         Ok(Self {
 575 |             full_file,
 576 |             records,
 577 |             done: false,
 578 |             channeler: ChannelAccumulator::default(),
 579 |         })
 580 |     }
 581 | }
 582 | 
 583 | impl<'a> Iterator for MessageStream<'a> {
 584 |     type Item = McapResult<Message<'static>>;
 585 | 
 586 |     fn next(&mut self) -> Option<Self::Item> {
 587 |         if self.done {
 588 |             return None;
 589 |         }
 590 | 
 591 |         let n = loop {
 592 |             // Let's start with a working record.
 593 |             let record = match self.records.next() {
 594 |                 Some(Ok(rec)) => rec,
 595 |                 Some(Err(e)) => break Some(Err(e)),
 596 |                 None => break None,
 597 |             };
 598 | 
 599 |             match record {
 600 |                 // Insert schemas into self so we know when subsequent channels reference them.
 601 |                 Record::Schema { header, data } => {
 602 |                     let data = Cow::Owned(data.into_owned());
 603 |                     if let Err(e) = self.channeler.add_schema(header, data) {
 604 |                         break Some(Err(e));
 605 |                     }
 606 |                 }
 607 | 
 608 |                 // Insert channels into self so we know when subsequent messages reference them.
 609 |                 Record::Channel(chan) => {
 610 |                     if let Err(e) = self.channeler.add_channel(chan) {
 611 |                         break Some(Err(e));
 612 |                     }
 613 |                 }
 614 | 
 615 |                 Record::Message { header, data } => {
 616 |                     // Messages must have a previously-read channel.
 617 |                     let channel = match self.channeler.get(header.channel_id) {
 618 |                         Some(c) => c,
 619 |                         None => {
 620 |                             break Some(Err(McapError::UnknownChannel(
 621 |                                 header.sequence,
 622 |                                 header.channel_id,
 623 |                             )))
 624 |                         }
 625 |                     };
 626 | 
 627 |                     let m = Message {
 628 |                         channel,
 629 |                         sequence: header.sequence,
 630 |                         log_time: header.log_time,
 631 |                         publish_time: header.publish_time,
 632 |                         data: Cow::Owned(data.into_owned()),
 633 |                     };
 634 |                     break Some(Ok(m));
 635 |                 }
 636 | 
 637 |                 // If it's EOD, do unholy things to calculate the CRC.
 638 |                 Record::EndOfData(end) => {
 639 |                     if end.data_section_crc != 0 {
 640 |                         // This is terrible. Less math with less magic numbers, please.
 641 |                         let data_section_len = (self.full_file.len() - MAGIC.len() * 2) // Actual working area
 642 |                             - self.records.bytes_remaining();
 643 | 
 644 |                         let data_section =
 645 |                             &self.full_file[MAGIC.len()..MAGIC.len() + data_section_len];
 646 |                         let calculated = crc32(data_section);
 647 |                         if end.data_section_crc != calculated {
 648 |                             break Some(Err(McapError::BadDataCrc {
 649 |                                 saved: end.data_section_crc,
 650 |                                 calculated,
 651 |                             }));
 652 |                         }
 653 |                     }
 654 |                     break None; // We're done at any rate.
 655 |                 }
 656 |                 _skip => {}
 657 |             };
 658 |         };
 659 | 
 660 |         if !matches!(n, Some(Ok(_))) {
 661 |             self.done = true;
 662 |         }
 663 |         n
 664 |     }
 665 | }
 666 | 
 667 | const FOOTER_LEN: usize = 20 + 8 + 1; // 20 bytes + 8 byte len + 1 byte opcode
 668 | 
 669 | /// Read the MCAP footer.
 670 | ///
 671 | /// You'd probably prefer to use [`Summary::read`] to parse the whole summary,
 672 | /// then index into the rest of the file with
 673 | /// [`Summary::stream_chunk`], [`attachment`], [`metadata`], etc.
 674 | pub fn footer(mcap: &[u8]) -> McapResult<records::Footer> {
 675 |     if mcap.len() < MAGIC.len() * 2 + FOOTER_LEN {
 676 |         return Err(McapError::UnexpectedEof);
 677 |     }
 678 | 
 679 |     if !mcap.starts_with(MAGIC) || !mcap.ends_with(MAGIC) {
 680 |         return Err(McapError::BadMagic);
 681 |     }
 682 | 
 683 |     let footer_buf = &mcap[mcap.len() - MAGIC.len() - FOOTER_LEN..];
 684 | 
 685 |     match LinearReader::sans_magic(footer_buf).next() {
 686 |         Some(Ok(Record::Footer(f))) => Ok(f),
 687 |         _ => Err(McapError::BadFooter),
 688 |     }
 689 | }
 690 | 
 691 | /// Indexes of an MCAP file parsed from its (optional) summary section
 692 | #[derive(Default, Eq, PartialEq)]
 693 | pub struct Summary<'a> {
 694 |     pub stats: Option<records::Statistics>,
 695 |     /// Maps channel IDs to their channel
 696 |     pub channels: HashMap<u16, Arc<Channel<'a>>>,
 697 |     /// Maps schema IDs to their schema
 698 |     pub schemas: HashMap<u16, Arc<Schema<'a>>>,
 699 |     pub chunk_indexes: Vec<records::ChunkIndex>,
 700 |     pub attachment_indexes: Vec<records::AttachmentIndex>,
 701 |     pub metadata_indexes: Vec<records::MetadataIndex>,
 702 | }
 703 | 
 704 | impl fmt::Debug for Summary<'_> {
 705 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 706 |         // Keep the actual maps as HashMaps for constant-time lookups,
 707 |         // but order everything up before debug printing it here.
 708 |         let channels = self.channels.iter().collect::<BTreeMap<_, _>>();
 709 |         let schemas = self.schemas.iter().collect::<BTreeMap<_, _>>();
 710 | 
 711 |         f.debug_struct("Summary")
 712 |             .field("stats", &self.stats)
 713 |             .field("channels", &channels)
 714 |             .field("schemas", &schemas)
 715 |             .field("chunk_indexes", &self.chunk_indexes)
 716 |             .field("attachment_indexes", &self.attachment_indexes)
 717 |             .field("metadata_indexes", &self.metadata_indexes)
 718 |             .finish()
 719 |     }
 720 | }
 721 | 
 722 | impl<'a> Summary<'a> {
 723 |     /// Read the summary section of the given mapped MCAP file, if it has one.
 724 |     pub fn read(mcap: &'a [u8]) -> McapResult<Option<Self>> {
 725 |         let foot = footer(mcap)?;
 726 | 
 727 |         // A summary start offset of 0 means there's no summary.
 728 |         if foot.summary_start == 0 {
 729 |             return Ok(None);
 730 |         }
 731 | 
 732 |         if foot.summary_crc != 0 {
 733 |             // The checksum covers the entire summary _except_ itself, including other footer bytes.
 734 |             let calculated =
 735 |                 crc32(&mcap[foot.summary_start as usize..mcap.len() - MAGIC.len() - 4]);
 736 |             if foot.summary_crc != calculated {
 737 |                 return Err(McapError::BadSummaryCrc {
 738 |                     saved: foot.summary_crc,
 739 |                     calculated,
 740 |                 });
 741 |             }
 742 |         }
 743 | 
 744 |         let mut summary = Summary::default();
 745 |         let mut channeler = ChannelAccumulator::default();
 746 | 
 747 |         let summary_end = match foot.summary_offset_start {
 748 |             0 => MAGIC.len() - FOOTER_LEN,
 749 |             sos => sos as usize,
 750 |         };
 751 |         let summary_buf = &mcap[foot.summary_start as usize..summary_end];
 752 | 
 753 |         for record in LinearReader::sans_magic(summary_buf) {
 754 |             match record? {
 755 |                 Record::Statistics(s) => {
 756 |                     if summary.stats.is_some() {
 757 |                         warn!("Multiple statistics records found in summary");
 758 |                     }
 759 |                     summary.stats = Some(s);
 760 |                 }
 761 |                 Record::Schema { header, data } => channeler.add_schema(header, data)?,
 762 |                 Record::Channel(c) => channeler.add_channel(c)?,
 763 |                 Record::ChunkIndex(c) => summary.chunk_indexes.push(c),
 764 |                 Record::AttachmentIndex(a) => summary.attachment_indexes.push(a),
 765 |                 Record::MetadataIndex(i) => summary.metadata_indexes.push(i),
 766 |                 _ => {}
 767 |             };
 768 |         }
 769 | 
 770 |         summary.schemas = channeler.schemas;
 771 |         summary.channels = channeler.channels;
 772 | 
 773 |         Ok(Some(summary))
 774 |     }
 775 | 
 776 |     /// Stream messages from the chunk with the given index.
 777 |     ///
 778 |     /// To avoid having to read all preceding chunks first,
 779 |     /// channels and their schemas are pulled from this summary.
 780 |     pub fn stream_chunk(
 781 |         &self,
 782 |         mcap: &'a [u8],
 783 |         index: &records::ChunkIndex,
 784 |     ) -> McapResult<impl Iterator<Item = McapResult<Message<'a>>> + '_> {
 785 |         let end = (index.chunk_start_offset + index.chunk_length) as usize;
 786 |         if mcap.len() < end {
 787 |             return Err(McapError::BadIndex);
 788 |         }
 789 | 
 790 |         // Get the chunk (as a header and its data) out of the file at the given offset.
 791 |         let mut reader = LinearReader::sans_magic(&mcap[index.chunk_start_offset as usize..end]);
 792 |         let (h, d) = match reader.next().ok_or(McapError::BadIndex)? {
 793 |             Ok(records::Record::Chunk { header, data }) => (header, data),
 794 |             Ok(_other_record) => return Err(McapError::BadIndex),
 795 |             Err(e) => return Err(e),
 796 |         };
 797 | 
 798 |         if reader.next().is_some() {
 799 |             // Wut - multiple records in the given slice?
 800 |             return Err(McapError::BadIndex);
 801 |         }
 802 | 
 803 |         // Now let's stream messages out of the chunk.
 804 |         let messages = ChunkReader::new(h, d)?.filter_map(|record| match record {
 805 |             Ok(records::Record::Message { header, data }) => {
 806 |                 // Correlate the message to its channel from this summary.
 807 |                 let channel = match self.channels.get(&header.channel_id) {
 808 |                     Some(c) => c.clone(),
 809 |                     None => {
 810 |                         return Some(Err(McapError::UnknownChannel(
 811 |                             header.sequence,
 812 |                             header.channel_id,
 813 |                         )));
 814 |                     }
 815 |                 };
 816 | 
 817 |                 let m = Message {
 818 |                     channel,
 819 |                     sequence: header.sequence,
 820 |                     log_time: header.log_time,
 821 |                     publish_time: header.publish_time,
 822 |                     data,
 823 |                 };
 824 | 
 825 |                 Some(Ok(m))
 826 |             }
 827 |             // We don't care about other chunk records (channels, schemas) -
 828 |             // we should have them from &self already.
 829 |             Ok(_other_record) => None,
 830 |             // We do care about errors, though.
 831 |             Err(e) => Some(Err(e)),
 832 |         });
 833 | 
 834 |         Ok(messages)
 835 |     }
 836 | 
 837 |     /// Read the mesage indexes for the given indexed chunk.
 838 |     ///
 839 |     /// Channels and their schemas are pulled from this summary.
 840 |     /// The offsets in each [`MessageIndexEntry`](records::MessageIndexEntry)
 841 |     /// is relative to the decompressed contents of the given chunk.
 842 |     pub fn read_message_indexes(
 843 |         &self,
 844 |         mcap: &[u8],
 845 |         index: &records::ChunkIndex,
 846 |     ) -> McapResult<HashMap<Arc<Channel>, Vec<records::MessageIndexEntry>>> {
 847 |         if index.message_index_offsets.is_empty() {
 848 |             // Message indexing is optional... should we be more descriptive here?
 849 |             return Err(McapError::BadIndex);
 850 |         }
 851 | 
 852 |         let mut indexes = HashMap::new();
 853 | 
 854 |         for (channel_id, offset) in &index.message_index_offsets {
 855 |             let offset = *offset as usize;
 856 | 
 857 |             // Message indexes are at least 15 bytes:
 858 |             // 1 byte opcode, 8 byte length, 2 byte channel ID, 4 byte array len
 859 |             if mcap.len() < offset + 15 {
 860 |                 return Err(McapError::BadIndex);
 861 |             }
 862 | 
 863 |             // Get the MessageIndex out of the file at the given offset.
 864 |             let mut reader = LinearReader::sans_magic(&mcap[offset..]);
 865 |             let index = match reader.next().ok_or(McapError::BadIndex)? {
 866 |                 Ok(records::Record::MessageIndex(i)) => i,
 867 |                 Ok(_other_record) => return Err(McapError::BadIndex),
 868 |                 Err(e) => return Err(e),
 869 |             };
 870 | 
 871 |             // The channel ID from the chunk index and the message index should match
 872 |             if *channel_id != index.channel_id {
 873 |                 return Err(McapError::BadIndex);
 874 |             }
 875 | 
 876 |             let channel = match self.channels.get(&index.channel_id) {
 877 |                 Some(c) => c,
 878 |                 None => {
 879 |                     return Err(McapError::UnknownChannel(
 880 |                         0, // We don't have a message sequence num yet.
 881 |                         index.channel_id,
 882 |                     ));
 883 |                 }
 884 |             };
 885 | 
 886 |             if indexes.insert(channel.clone(), index.records).is_some() {
 887 |                 return Err(McapError::ConflictingChannels(channel.topic.clone()));
 888 |             }
 889 |         }
 890 | 
 891 |         Ok(indexes)
 892 |     }
 893 | 
 894 |     /// Seek to the given message in the given indexed chunk.
 895 |     ///
 896 |     /// If you're interested in more than a single message from the chunk,
 897 |     /// filtering [`Summary::stream_chunk`] is probably a better bet.
 898 |     /// Compressed chunks aren't random access -
 899 |     /// this decompresses everything in the chunk before
 900 |     /// [`message.offset`](records::MessageIndexEntry::offset) and throws it away.
 901 |     pub fn seek_message(
 902 |         &self,
 903 |         mcap: &'a [u8],
 904 |         index: &records::ChunkIndex,
 905 |         message: &records::MessageIndexEntry,
 906 |     ) -> McapResult<Message> {
 907 |         // Get the chunk (as a header and its data) out of the file at the given offset.
 908 |         let end = (index.chunk_start_offset + index.chunk_length) as usize;
 909 |         if mcap.len() < end {
 910 |             return Err(McapError::BadIndex);
 911 |         }
 912 | 
 913 |         let mut reader = LinearReader::sans_magic(&mcap[index.chunk_start_offset as usize..end]);
 914 |         let (h, d) = match reader.next().ok_or(McapError::BadIndex)? {
 915 |             Ok(records::Record::Chunk { header, data }) => (header, data),
 916 |             Ok(_other_record) => return Err(McapError::BadIndex),
 917 |             Err(e) => return Err(e),
 918 |         };
 919 | 
 920 |         if reader.next().is_some() {
 921 |             // Wut - multiple records in the given slice?
 922 |             return Err(McapError::BadIndex);
 923 |         }
 924 | 
 925 |         let mut chunk_reader = ChunkReader::new(h, d)?;
 926 | 
 927 |         // Do unspeakable things to seek to the message.
 928 |         match &mut chunk_reader.decompressor {
 929 |             ChunkDecompressor::Null(reader) => {
 930 |                 // Skip messages until we're at the offset.
 931 |                 while reader.bytes_remaining() as u64 > index.uncompressed_size - message.offset {
 932 |                     match reader.next() {
 933 |                         Some(Ok(_)) => {}
 934 |                         Some(Err(e)) => return Err(e),
 935 |                         None => return Err(McapError::BadIndex),
 936 |                     };
 937 |                 }
 938 |                 // Be exact!
 939 |                 if reader.bytes_remaining() as u64 != index.uncompressed_size - message.offset {
 940 |                     return Err(McapError::BadIndex);
 941 |                 }
 942 |             }
 943 |             ChunkDecompressor::Compressed(maybe_read) => {
 944 |                 let reader = maybe_read.as_mut().unwrap();
 945 |                 // Decompress offset bytes, which should put us at the message we want.
 946 |                 io::copy(&mut reader.take(message.offset), &mut io::sink())?;
 947 |             }
 948 |         }
 949 | 
 950 |         // Now let's get our message.
 951 |         match chunk_reader.next() {
 952 |             Some(Ok(records::Record::Message { header, data })) => {
 953 |                 // Correlate the message to its channel from this summary.
 954 |                 let channel = match self.channels.get(&header.channel_id) {
 955 |                     Some(c) => c.clone(),
 956 |                     None => {
 957 |                         return Err(McapError::UnknownChannel(
 958 |                             header.sequence,
 959 |                             header.channel_id,
 960 |                         ));
 961 |                     }
 962 |                 };
 963 | 
 964 |                 let m = Message {
 965 |                     channel,
 966 |                     sequence: header.sequence,
 967 |                     log_time: header.log_time,
 968 |                     publish_time: header.publish_time,
 969 |                     data,
 970 |                 };
 971 | 
 972 |                 Ok(m)
 973 |             }
 974 |             // The index told us this was a message...
 975 |             Some(Ok(_other_record)) => Err(McapError::BadIndex),
 976 |             Some(Err(e)) => Err(e),
 977 |             None => Err(McapError::BadIndex),
 978 |         }
 979 |     }
 980 | }
 981 | 
 982 | /// Read the attachment with the given index.
 983 | pub fn attachment<'a>(
 984 |     mcap: &'a [u8],
 985 |     index: &records::AttachmentIndex,
 986 | ) -> McapResult<Attachment<'a>> {
 987 |     let end = (index.offset + index.length) as usize;
 988 |     if mcap.len() < end {
 989 |         return Err(McapError::BadIndex);
 990 |     }
 991 | 
 992 |     let mut reader = LinearReader::sans_magic(&mcap[index.offset as usize..end]);
 993 |     let (h, d) = match reader.next().ok_or(McapError::BadIndex)? {
 994 |         Ok(records::Record::Attachment { header, data }) => (header, data),
 995 |         Ok(_other_record) => return Err(McapError::BadIndex),
 996 |         Err(e) => return Err(e),
 997 |     };
 998 | 
 999 |     if reader.next().is_some() {
1000 |         // Wut - multiple records in the given slice?
1001 |         return Err(McapError::BadIndex);
1002 |     }
1003 | 
1004 |     Ok(Attachment {
1005 |         log_time: h.log_time,
1006 |         create_time: h.create_time,
1007 |         name: h.name,
1008 |         content_type: h.content_type,
1009 |         data: Cow::Borrowed(d),
1010 |     })
1011 | }
1012 | 
1013 | /// Read the metadata with the given index.
1014 | pub fn metadata(mcap: &[u8], index: &records::MetadataIndex) -> McapResult<records::Metadata> {
1015 |     let end = (index.offset + index.length) as usize;
1016 |     if mcap.len() < end {
1017 |         return Err(McapError::BadIndex);
1018 |     }
1019 | 
1020 |     let mut reader = LinearReader::sans_magic(&mcap[index.offset as usize..end]);
1021 |     let m = match reader.next().ok_or(McapError::BadIndex)? {
1022 |         Ok(records::Record::Metadata(m)) => m,
1023 |         Ok(_other_record) => return Err(McapError::BadIndex),
1024 |         Err(e) => return Err(e),
1025 |     };
1026 | 
1027 |     if reader.next().is_some() {
1028 |         // Wut - multiple records in the given slice?
1029 |         return Err(McapError::BadIndex);
1030 |     }
1031 | 
1032 |     Ok(m)
1033 | }
1034 | 
1035 | // All of the following panic if they walk off the back of the data block;
1036 | // callers are assumed to have made sure they got enoug bytes back with
1037 | // `validate_response()`
1038 | 
1039 | /// Builds a `read_<type>(&mut buf)` function that reads a given type
1040 | /// off the buffer and advances it the appropriate number of bytes.
1041 | macro_rules! reader {
1042 |     ($type:ty) => {
1043 |         paste::paste! {
1044 |             #[inline]
1045 |             fn [<read_ $type>](block: &mut &[u8]) -> $type {
1046 |                 const SIZE: usize = std::mem::size_of::<$type>();
1047 |                 let res = $type::from_le_bytes(
1048 |                     block[0..SIZE].try_into().unwrap()
1049 |                 );
1050 |                 *block = &block[SIZE..];
1051 |                 res
1052 |             }
1053 |         }
1054 |     };
1055 | }
1056 | 
1057 | reader!(u8);
1058 | reader!(u64);
1059 | 
1060 | #[cfg(test)]
1061 | mod test {
1062 |     use super::*;
1063 | 
1064 |     // Can we read a file that's only magic?
1065 |     // (Probably considered malformed by the spec, but let's not panic on user input)
1066 | 
1067 |     #[test]
1068 |     fn only_two_magics() {
1069 |         let two_magics = MAGIC.repeat(2);
1070 |         let mut reader = LinearReader::new(&two_magics).unwrap();
1071 |         assert!(reader.next().is_none());
1072 |     }
1073 | 
1074 |     #[test]
1075 |     fn only_one_magic() {
1076 |         assert!(matches!(LinearReader::new(MAGIC), Err(McapError::BadMagic)));
1077 |     }
1078 | 
1079 |     #[test]
1080 |     fn only_two_magic_with_ignore_end_magic() {
1081 |         let two_magics = MAGIC.repeat(2);
1082 |         let mut reader =
1083 |             LinearReader::new_with_options(&two_magics, enum_set!(Options::IgnoreEndMagic))
1084 |                 .unwrap();
1085 |         assert!(reader.next().is_none());
1086 |     }
1087 | 
1088 |     #[test]
1089 |     fn only_one_magic_with_ignore_end_magic() {
1090 |         let mut reader =
1091 |             LinearReader::new_with_options(MAGIC, enum_set!(Options::IgnoreEndMagic)).unwrap();
1092 |         assert!(reader.next().is_none());
1093 |     }
1094 | }
1095 | 


--------------------------------------------------------------------------------